From 1dffc3c5d6a4cb5406966b4cda8998802dee22a3 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 20 Nov 2017 14:16:49 +0000 Subject: [PATCH] [rt] Update to 4.14-rt1 and reenable (Closes: #882192) --- debian/changelog | 6 + debian/config/defines | 2 +- ...-futex-Avoid-freeing-an-active-timer.patch | 51 - ...-variable-names-for-futex_top_waiter.patch | 118 - ...ogy-Remove-cpus_allowed-manipulation.patch | 53 - ...-init-task-to-the-boot-CPU-initially.patch | 74 - ...oost-before-waking-up-the-top-waiter.patch | 178 -- ...early-boot-preempt-assumption-in-__s.patch | 62 - ...c-keys-for-migrate_enable-and-nohz_a.patch | 308 +++ ...unofficial-trace_recursive_lock-patc.patch | 128 + .../0002-arm-Adjust-system_state-check.patch | 37 - ...and-harmless-looking-inconsistencies.patch | 55 - ...smp_store_release-in-mark_wake_futex.patch | 39 - ...imer-Correct-blantanly-wrong-comment.patch | 37 + ...adline-Fix-a-PI-crash-for-deadline-t.patch | 167 -- ...the-order-of-trace_types_lock-and-ev.patch | 189 ++ ...2-workqueue-Provide-work_on_cpu_safe.patch | 84 - ...0003-arm64-Adjust-system_state-check.patch | 38 - ...mark_wake_futex-memory-barrier-usage.patch | 36 - ...ex-Remove-rt_mutex_deadlock_account_.patch | 185 -- ...erneldoc-for-struct-hrtimer_cpu_base.patch | 43 + ...nfo-Replace-racy-task-affinity-logic.patch | 129 - ...tmutex-Dont-miss-the-dl_runtime-dl_p.patch | 52 - ...clude-generic-fields-from-histograms.patch | 38 + ...0004-MAINTAINERS-Add-FUTEX-SUBSYSTEM.patch | 48 - ...-Provide-futex-specific-rt_mutex-API.patch | 221 -- ...clock-argument-in-schedule_hrtimeout.patch | 81 + ...erf-Replace-racy-task-affinity-logic.patch | 76 - .../all/rt/0004-rtmutex-Clean-up.patch | 145 - ...ve-lookups-from-tracing_map-hitcount.patch | 27 + ...04-x86-smp-Adjust-system_state-check.patch | 34 - .../rt/0005-futex-Change-locking-rules.patch | 371 --- ...mer-Fix-hrtimer-function-description.patch | 61 + ...0005-metag-Adjust-system_state-check.patch | 36 - ...place-open-coded-task-affinity-logic.patch | 89 - ...ed-rtmutex-Refactor-rt_mutex_setprio.patch | 392 --- ...-Increase-tracing-map-KEYS_MAX-size.patch} | 8 +- .../rt/0006-futex-Cleanup-refcounting.patch | 76 - ...OSIX-compliance-relative-CLOCK_REALT.patch | 42 + ...06-powerpc-Adjust-system_state-check.patch | 39 - ...racing-Update-trace_sched_pi_setprio.patch | 107 - ...sfs-Replace-racy-task-affinity-logic.patch | 118 - ...ke-traceprobe-parsing-code-reusable.patch} | 18 +- .../0007-ACPI-Adjust-system_state-check.patch | 38 - ...ix-error-handling-in-__acpi_processo.patch | 45 - ...-inconsistent-rt_mutex-futex_q-state.patch | 140 - ...07-hrtimer-Cleanup-hrtimer_mode-enum.patch | 46 + ...rtmutex-Fix-PI-chain-order-integrity.patch | 120 - ...acing-Clean-up-hist_field_flags-enum.patch | 44 + ...sor-Replace-racy-task-affinity-logic.patch | 193 -- ...tex_futex_unlock-out-from-under-hb-l.patch | 358 --- .../0008-mm-Adjust-system_state-check.patch | 42 - ...08-rtmutex-Fix-more-prio-comparisons.patch | 100 - ...racing-Add-hist_field_name-accessor.patch} | 27 +- ...Take-all-clock-bases-and-modes-into-.patch | 56 + ...a64-Replace-racy-task-affinity-logic.patch | 209 -- ...req-pasemi-Adjust-system_state-check.patch | 38 - ...mutex-Introduce-rt_mutex_init_waiter.patch | 80 - ...empt-count-leak-in-rt_mutex_futex_un.patch | 41 - ...ch => 0009-tracing-Reimplement-log2.patch} | 8 +- ...Print-hrtimer-mode-in-hrtimer_start-.patch | 114 + ...-sh-Replace-racy-task-affinity-logic.patch | 120 - ...estructure-rt_mutex_finish_proxy_loc.patch | 159 -- ...r-Switch-for-loop-to-_ffs-evaluation.patch | 80 + ...ommu-vt-d-Adjust-system_state-checks.patch | 47 - ...acing-Add-NO_DISCARD-event-file-flag.patch | 106 - ...pport-to-detect-and-avoid-duplicates.patch | 114 + ...us3-Replace-racy-task-affinity-logic.patch | 124 - ...ex_lock_pi-to-use-rt_mutex_-_proxy_l.patch | 267 -- ...-running-timer-in-hrtimer_clock_base.patch | 192 ++ ...-trigger-flag-to-hist-trigger-comman.patch | 29 - ...-Remove-code-which-merges-duplicates.patch | 188 ++ ...012-async-Adjust-system_state-checks.patch | 61 - ...s2e-Replace-racy-task-affinity-logic.patch | 129 - ...12-futex-Futex_unlock_pi-determinism.patch | 81 - ...Make-room-in-struct-hrtimer_cpu_base.patch | 34 + ...nterface-for-setting-absolute-time-.patch} | 60 +- ...-N2-Replace-racy-task-affinity-logic.patch | 95 - ...3-extable-Adjust-system_state-checks.patch | 36 - ...ock-before-enqueueing-on-the-rtmutex.patch | 204 -- ...-Reduce-conditional-code-hres_active.patch | 150 + ...ine-the-unimplemented-RINGBUF_TIME_.patch} | 71 +- ...sor-functions-instead-of-direct-acce.patch | 36 + ...14-printk-Adjust-system_state-checks.patch | 35 - ...t-triggers-access-to-ring_buffer_ev.patch} | 26 +- ...e-remote-enqueue-check-unconditional.patch | 129 + ...mm-vmscan-Adjust-system_state-checks.patch | 39 - ...buffer-event-param-to-hist-field-fu.patch} | 16 +- ...imer_cpu_base.next_timer-handling-un.patch | 99 + ...it-Introduce-SYSTEM_SCHEDULING-state.patch | 60 - ...out-hist-trigger-assignment-parsing.patch} | 43 +- ...ake-hrtimer_reprogramm-unconditional.patch | 187 ++ ...e-might_sleep-and-smp_processor_id-c.patch | 74 - ...-Add-hist-trigger-timestamp-support.patch} | 60 +- ...onditional-code-and-make-hrtimer_for.patch | 105 + ...lement-variable-support-to-tracing_.patch} | 44 +- ...mer-Unify-handling-of-hrtimer-remove.patch | 89 + ...-Add-hist_data-member-to-hist_field.patch} | 18 +- ...mer-Unify-handling-of-remote-enqueue.patch | 158 ++ ...-Add-support-for-dynamic-tracepoints.patch | 196 -- ...-modifier-for-hist-trigger-timestam.patch} | 24 +- ...ote-enqueue-decision-less-restrictiv.patch | 32 + ...d-variable-support-to-hist-triggers.patch} | 309 ++- ...base-argument-from-hrtimer_reprogram.patch | 50 + ...or-variables-in-named-trigger-compa.patch} | 21 +- ...hrtimer-Split-hrtimer_start_range_ns.patch | 76 + ...-onmatch-hist-trigger-action-support.patch | 1269 --------- ...23-tracing-Move-get_hist_field_flags.patch | 74 + ...timer-Split-__hrtimer_get_next_event.patch | 53 + ...e-expression-support-to-hist-trigge.patch} | 266 +- ...save-irqrestore-around-__run_hrtimer.patch | 145 + ...ralize-per-element-hist-trigger-data.patch | 150 + ...k-bases-and-hrtimer-mode-for-soft-ir.patch | 109 + ...licate-count-from-tracing_map-availa.patch | 125 - ...cing_map_elt-to-hist_field-accessor-.patch | 222 ++ ...handling-of-hard-and-softirq-based-h.patch | 117 + ...27-tracing-Add-hist_field-type-field.patch | 114 + ...t-support-for-softirq-based-hrtimers.patch | 509 ++++ ...-trigger-support-for-variable-refere.patch | 106 - ...ble-reference-handling-to-hist-trig.patch} | 616 ++--- ...ement-SOFT-HARD-clock-base-selection.patch | 56 + ...racing-Add-hist-trigger-action-hook.patch} | 66 +- ...hrtimer_tasklet-with-softirq-based-h.patch | 315 +++ ...ng-Add-support-for-synthetic-events.patch} | 626 +++-- ...eplace-hrtimer-tasklet-with-softirq-.patch | 136 + ...cing-Add-support-for-field-variables.patch | 589 ++++ ...-onmatch-hist-trigger-action-support.patch | 555 ++++ ...hrtimer-tasklet-with-softirq-hrtimer.patch | 133 + .../0033-softirq-Remove-tasklet_hrtimer.patch | 110 + ...d-onmax-hist-trigger-action-support.patch} | 181 +- ...Replace-tasklet-with-softirq-hrtimer.patch | 99 + ...tespace-to-surround-hist-trigger-fi.patch} | 32 +- ...ing-Add-cpu-field-for-hist-triggers.patch} | 66 +- ...Replace-tasklet-with-softirq-hrtimer.patch | 97 + ...Replace-tasklet-with-softirq-hrtimer.patch | 133 + ...-trigger-support-for-variable-refere.patch | 142 + ...error-error-facility-for-hist-trigg.patch} | 226 +- ...er-event-hist-trigger-Documentation.patch} | 48 +- ...g-Make-tracing_set_clock-non-static.patch} | 10 +- ...a-clock-attribute-for-hist-triggers.patch} | 45 +- ...-trace_recursive_lock-limit-for-synt.patch | 44 + ...r-event-blurb-to-HIST_TRIGGERS-confi.patch | 28 + ...n-translation-section-permission-fau.patch | 6 +- ...recursive-locking-in-hci_send_to_cha.patch | 71 + ...-Loongson2-drop-set_cpus_allowed_ptr.patch | 43 - ...intk-drop-the-logbuf_lock-more-often.patch | 12 +- ...wngrade-preempt_disable-d-region-to-.patch | 18 +- ...-mark-LAPIC-timer-handler-as-irqsafe.patch | 26 - ...-replace-seqcount_t-with-a-seqlock_t.patch | 14 +- ...kip-that-warning-but-only-on-sleepin.patch | 36 + ...acpi_gbl_hardware-lock-back-to-a-raw.patch | 6 +- .../features/all/rt/add_migrate_disable.patch | 28 +- ...-a-locallock-instead-preempt_disable.patch | 84 + .../arch-arm64-Add-lazy-preempt-support.patch | 38 +- ...ove-irq-handler-when-clock-is-unused.patch | 2 +- ...-tclib-default-to-tclib-timer-for-rt.patch | 2 +- .../all/rt/arm-convert-boot-lock-to-raw.patch | 2 +- .../all/rt/arm-enable-highmem-for-rt.patch | 8 +- .../rt/arm-highmem-flush-tlb-on-unmap.patch | 2 +- ...arm-include-definition-for-cpumask_t.patch | 4 +- ...probe-replace-patch_lock-to-raw-lock.patch | 8 +- .../all/rt/arm-preempt-lazy-support.patch | 22 +- .../all/rt/arm-unwind-use_raw_lock.patch | 2 +- ...ture-don-t-use-mutex-in-bringup-path.patch | 170 -- ...arm64-xen--Make-XEN-depend-on-non-rt.patch | 4 +- .../rt/at91_dont_enable_disable_clock.patch | 22 +- .../rt/ata-disable-interrupts-if-non-rt.patch | 42 +- .../all/rt/block-blk-mq-use-swait.patch | 20 +- ...k-mq-don-t-complete-requests-via-IPI.patch | 30 +- .../rt/block-mq-drop-preempt-disable.patch | 12 +- .../all/rt/block-mq-use-cpu_light.patch | 4 +- ...k-shorten-interrupt-disabled-regions.patch | 12 +- .../features/all/rt/block-use-cpu-chill.patch | 8 +- .../all/rt/bug-rt-dependend-variants.patch | 4 +- ...heduling-while-atomic-in-cgroup-code.patch | 24 +- ...roups-use-simple-wait-in-css_release.patch | 16 +- ...om-don-t-print-that-the-init-is-done.patch | 2 +- ...ers-timer-atmel-pit-fix-double-free_.patch | 2 +- ...source-tclib-allow-higher-clockrates.patch | 34 +- .../completion-use-simple-wait-queues.patch | 131 +- .../all/rt/cond-resched-lock-rt-tweak.patch | 4 +- .../all/rt/cond-resched-softirq-rt.patch | 8 +- ...c-Protect-send_msg-with-a-local-lock.patch | 2 +- .../cpu-hotplug--Implement-CPU-pinning.patch | 111 + ...ument-why-PREEMPT_RT-uses-a-spinlock.patch | 56 - ...tplug-lock-a-sleeping-spinlock-on-rt.patch | 115 - .../all/rt/cpu-rt-rework-cpu-down.patch | 526 ---- ...-a-UNINTERRUPTIBLE-hrtimer_nanosleep.patch | 60 +- .../cpu_down_move_migrate_enable_back.patch | 53 - ...lace-raw_notifier-to-atomic_notifier.patch | 173 -- ...rop-K8-s-driver-from-beeing-selected.patch | 2 +- .../rt/cpumask-disable-offstack-on-rt.patch | 6 +- ...vert-callback_lock-to-raw_spinlock_t.patch | 46 +- ...-preempt-disabled-regions-more-algos.patch | 24 +- .../features/all/rt/debugobjects-rt.patch | 4 +- .../all/rt/delayacct-use-raw_spinlocks.patch | 2 +- .../features/all/rt/dm-make-rt-aware.patch | 4 +- ...am-Replace-bit-spinlocks-with-rtmute.patch | 190 +- .../drivers-net-8139-disable-irq-nosync.patch | 4 +- ...rivers-net-vortex-fix-locking-issues.patch | 2 +- ...andom-reduce-preempt-disabled-region.patch | 2 +- .../rt/drivers-tty-fix-omap-lock-crap.patch | 6 +- ...rivers-tty-pl011-irq-disable-madness.patch | 6 +- ...-t-disable-preemption-in-zcomp_strea.patch | 41 +- ...-zcomp_stream_get-smp_processor_id-u.patch | 38 + ...op-trace_i915_gem_ring_dispatch-onrt.patch | 59 - ...rm-i915-init-spinlock-properly-on-RT.patch | 16 +- ...rq()_in_intel_pipe_update_startend().patch | 26 +- ...disableenable_rt()_where_recommended.patch | 10 +- .../all/rt/epoll-use-get-cpu-light.patch | 4 +- .../all/rt/fs-aio-simple-simple-work.patch | 10 +- .../features/all/rt/fs-block-rt-support.patch | 12 +- ...-back-explicit-INIT_HLIST_BL_HEAD-in.patch | 52 + ...e-preemption-on-i_dir_seq-s-write-si.patch | 119 + .../fs-dcache-init-in_lookup_hashtable.patch | 28 - ...cache-use-cpu-chill-in-trylock-loops.patch | 16 +- ...use-swait_queue-instead-of-waitqueue.patch | 36 +- .../all/rt/fs-jbd-replace-bh_state-lock.patch | 6 +- ...ull-your-plug-when-waiting-for-space.patch | 2 +- .../all/rt/fs-namespace-preemption-fix.patch | 4 +- ...-nfs-turn-rmdir_sem-into-a-semaphore.patch | 28 +- .../rt/fs-ntfs-disable-interrupt-non-rt.patch | 2 +- .../fs-replace-bh_uptodate_lock-for-rt.patch | 64 +- .../ftrace-Fix-trace-header-alignment.patch | 25 +- .../rt/ftrace-migrate-disable-tracing.patch | 10 +- ...k-unlock-symetry-versus-pi_lock-and-.patch | 10 +- .../all/rt/futex-requeue-pi-fix.patch | 6 +- ...utex-Fix-rt_mutex_cleanup_proxy_lock.patch | 126 - ...tmutex-Cure-RT-double-blocking-issue.patch | 61 - ...-migrate_disable-enable-in-different.patch | 12 +- .../all/rt/genirq-disable-irqpoll-on-rt.patch | 6 +- ...voke-the-affinity-callback-via-a-wor.patch | 16 +- .../all/rt/genirq-force-threading.patch | 4 +- ...-irq_set_irqchip_state-documentation.patch | 4 +- ...udio-don-t-inclide-rwlock.h-directly.patch | 31 + ...cpus_allowed_ptr-in-sync_unplug_thre.patch | 47 - ...duct-tape-RT-rwlock-usage-for-non-RT.patch | 96 + .../rt/hotplug-light-get-online-cpus.patch | 176 +- ...ync_unplug-no-27-5cn-27-in-task-name.patch | 25 - .../all/rt/hotplug-use-migrate-disable.patch | 40 - ...-schedule_work-call-to-helper-thread.patch | 6 +- ...-by-default-into-the-softirq-context.patch | 212 ++ ...ate-hrtimer_init-hrtimer_init_sleepe.patch | 247 ++ .../rt/hrtimer-enfore-64byte-alignment.patch | 28 - ...timer-callback-changes-for-preempt-r.patch | 338 --- .../rt/hrtimers-prepare-full-preemption.patch | 72 +- ...ng-from-i915-when-running-on-PREEMPT.patch | 30 - .../rt/ide-use-nort-local-irq-variants.patch | 2 +- .../infiniband-mellanox-ib-use-nort-irq.patch | 10 +- .../rt/inpt-gameport-use-local-irq-nort.patch | 2 +- .../all/rt/iommu-amd--Use-WARN_ON_NORT.patch | 6 +- ...aw_cpu_ptr-instead-of-get_cpu_ptr-fo.patch | 43 + ...-disable-preempt-around-this_cpu_ptr.patch | 82 - ...-disable-preemption-while-accessing-.patch | 59 - ...irq-processing-in-irq-thread-context.patch | 26 +- ...rk-Move-irq-safe-work-to-irq-context.patch | 14 +- ...-push_most_work_into_softirq_context.patch | 21 +- .../features/all/rt/jump-label-rt.patch | 6 +- .../rt/kconfig-disable-a-few-options-rt.patch | 6 +- .../all/rt/kconfig-preempt-rt-full.patch | 8 +- ...el-SRCU-provide-a-static-initializer.patch | 83 +- ...pu-down-problem-if-kthread-s-cpu-is-.patch | 86 - ...restore-original-cpu-mask-oncpu-down.patch | 59 - ...-use-an-exclusive-wait_q-for-sleeper.patch | 142 - ...-perf_cpu_context-s-timer-as-irqsafe.patch | 25 - ...n-t-try-to-print-from-IRQ-NMI-region.patch | 6 +- ...vide-a-pointer-to-the-valid-CPU-mask.patch | 140 +- ...e-stack-kprobe-clean-up-to-__put_tas.patch | 20 +- .../kernel-softirq-unlock-with-irqs-on.patch | 2 +- .../all/rt/kgb-serial-hackaround.patch | 6 +- ...ds-trigger-disable-CPU-trigger-on-RT.patch | 2 +- .../list_bl-fixup-bogus-lockdep-warning.patch | 4 +- ..._bl.h-make-list-head-locking-RT-safe.patch | 8 +- .../rt/local-irq-rt-depending-variants.patch | 6 +- .../all/rt/locallock-add-local_lock_on.patch | 32 - .../features/all/rt/localversion.patch | 4 +- ...ilation-error-for-CONFIG_MODULES-and.patch | 56 - .../lockdep-Fix-per-cpu-static-objects.patch | 124 - ...tatically-initialized-PER_CPU-locks-.patch | 269 -- .../all/rt/lockdep-disable-self-test.patch | 29 + .../lockdep-no-softirq-accounting-on-rt.patch | 44 +- ...-fix-warnings-due-to-missing-PREEMPT.patch | 24 +- ...ardirq-context-test-for-raw-spinlock.patch | 6 +- ...eck-for-__LINUX_SPINLOCK_TYPES_H-on-.patch | 228 ++ ...ure-Do-NOT-include-rwlock.h-directly.patch | 2 +- ...-fix-deadlock-in-device-mapper-block.patch | 74 + ...mutex-don-t-drop-the-wait_lock-twice.patch | 30 + ...re-init-the-wait_lock-in-rt_mutex_in.patch | 33 + .../features/all/rt/md-disable-bcache.patch | 2 +- .../md-raid5-do-not-disable-interrupts.patch | 55 + .../md-raid5-percpu-handling-rt-aware.patch | 14 +- ...fd-syscon-atmel-smc-include-string.h.patch | 23 + .../all/rt/mips-disable-highmem-on-rt.patch | 4 +- ...--rt--Fix-generic-kmap_atomic-for-RT.patch | 6 +- ...on-t-disable-IRQs-in-wb_congested_pu.patch | 4 +- .../rt/mm-bounce-local-irq-save-nort.patch | 4 +- .../rt/mm-convert-swap-to-percpu-locked.patch | 38 +- .../features/all/rt/mm-disable-sloub-rt.patch | 6 +- .../features/all/rt/mm-enable-slub.patch | 103 +- .../all/rt/mm-make-vmstat-rt-aware.patch | 26 +- ...n-t-call-schedule_work_on-in-preempt.patch | 16 +- .../rt/mm-memcontrol-do_not_disable_irq.patch | 55 +- ...m_cgroup_migrate-replace-another-loc.patch | 30 - ...e-alloc-use-local-lock-on-target-cpu.patch | 4 +- ...e_alloc-reduce-lock-sections-further.patch | 39 +- ...page_alloc-rt-friendly-per-cpu-pages.patch | 36 +- ...m-perform-lru_add_drain_all-remotely.patch | 22 +- .../rt/mm-protect-activate-switch-mm.patch | 4 +- .../all/rt/mm-rt-kmap-atomic-scheduling.patch | 24 +- ...-scatterlist-dont-disable-irqs-on-RT.patch | 2 +- ...sable-preemption-while-taking-the-pe.patch | 46 - .../all/rt/mm-vmalloc-use-get-cpu-light.patch | 12 +- ...-not-protect-workingset_shadow_nodes.patch | 34 +- ...oc_copy_with_get_cpu_var_and_locking.patch | 14 +- .../all/rt/mmci-remove-bogus-irq-save.patch | 6 +- .../move_sched_delayed_work_to_helper.patch | 6 +- .../features/all/rt/mutex-no-spin-on-rt.patch | 2 +- ...schedule_irqoff-disable-interrupts-o.patch | 6 +- ...Qdisc-use-a-seqlock-instead-seqcount.patch | 38 +- .../rt/net-add-a-lock-around-icmp_sk.patch | 6 +- ...-missing-serialization-in-ip_send_un.patch | 12 +- ...al-irq-disable-alloc-atomic-headache.patch | 8 +- ...tplug-drain-input_pkt_queue-lockless.patch | 4 +- ...-users-of-napi_alloc_cache-against-r.patch | 16 +- ...explicit-do_softirq-from-busy_poll_s.patch | 28 - ...ake-qdisc-s-busylock-in-__dev_xmit_s.patch | 4 +- ...ble-xt-write-recseq-begin-rt-fallout.patch | 17 +- .../net-make-devnet_rename_seq-a-mutex.patch | 14 +- ...recursion-to-per-task-variable-on-RT.patch | 16 +- .../all/rt/net-prevent-abba-deadlock.patch | 4 +- ...y-to-delegate-processing-a-softirq-t.patch | 6 +- ...activate_many-use-msleep-1-instead-o.patch | 4 +- ...he-tcp_sk_lock-lock-with-BH-disabled.patch | 68 + .../features/all/rt/net-use-cpu-chill.patch | 6 +- .../all/rt/net-use-trylock-in-icmp_sk.patch | 74 + .../all/rt/net-wireless-warn-nort.patch | 4 +- .../all/rt/net_disable_NET_RX_BUSY_POLL.patch | 4 +- .../features/all/rt/oleg-signal-rt-fix.patch | 12 +- .../all/rt/panic-disable-random-on-rt.patch | 4 +- ...ce-rcu-bh-qs-where-safe-from-softirq.patch | 24 +- ...ec-Don-t-use-completion-s-wait-queue.patch | 109 + .../all/rt/percpu_ida-use-locklocks.patch | 2 +- .../perf-make-swevent-hrtimer-irqsafe.patch | 69 - .../all/rt/peter_zijlstra-frob-rcu.patch | 4 +- .../all/rt/peterz-percpu-rwsem-rt.patch | 38 +- .../all/rt/peterz-srcu-crypto-chain.patch | 2 +- .../all/rt/pid.h-include-atomic.h.patch | 4 +- .../patches/features/all/rt/ping-sysrq.patch | 8 +- .../all/rt/posix-timers-no-broadcast.patch | 6 +- ...timers-thread-posix-cpu-timers-on-rt.patch | 32 +- .../all/rt/power-disable-highmem-on-rt.patch | 4 +- .../rt/power-use-generic-rwsem-on-rt.patch | 4 +- ...ble-in-kernel-MPIC-emulation-for-PRE.patch | 4 +- .../all/rt/powerpc-preempt-lazy-support.patch | 34 +- ...ce-init.c-adapt-to-completions-using.patch | 2 +- .../all/rt/preempt-lazy-support.patch | 123 +- .../all/rt/preempt-nort-rt-variants.patch | 6 +- ...27-boot-param-to-help-with-debugging.patch | 4 +- .../patches/features/all/rt/printk-kill.patch | 29 +- .../features/all/rt/printk-rt-aware.patch | 10 +- ...ace-fix-ptrace-vs-tasklist_lock-race.patch | 14 +- .../all/rt/radix-tree-use-local-locks.patch | 25 +- ...dom-avoid-preempt_disable-ed-section.patch | 22 +- .../all/rt/random-make-it-work-on-rt.patch | 20 +- ...tree-include-rcu.h-because-we-use-it.patch | 4 +- ...nate-softirq-processing-from-rcutree.patch | 115 +- ...kdep-false-positive-boost_mtx-compla.patch | 50 + .../rt/rcu-disable-rcu-fast-no-hz-on-rt.patch | 10 +- ..._normal_after_boot-by-default-for-RT.patch | 6 +- .../rt/rcu-make-RCU_BOOST-default-on-RT.patch | 12 +- ...merge-rcu-bh-into-rcu-preempt-for-rt.patch | 112 +- .../rt/rcu-segcblist-include-rcupdate.h.patch | 22 + ...s-disable-irq-while-calling-rcu_pree.patch | 4 +- ...ate_disable-race-with-cpu-hotplug-3f.patch | 35 - ...l-arm-coredump-fails-for-cpu-3e-3d-4.patch | 2 +- ...function-called-from-invalid-context.patch | 2 +- ...ease-the-nr-of-migratory-tasks-when-.patch | 155 ++ .../features/all/rt/rt-add-rt-locks.patch | 2401 ----------------- .../all/rt/rt-introduce-cpu-chill.patch | 12 +- .../features/all/rt/rt-local-irq-lock.patch | 43 +- ...-Reenable-migration-accross-schedule.patch | 112 - .../all/rt/rt-preempt-base-config.patch | 2 +- .../features/all/rt/rt-serial-warn-fix.patch | 2 +- ...ndle-non-enqueued-waiters-gracefully.patch | 4 +- .../rt/rtmutex-Fix-lock-stealing-logic.patch | 162 -- .../rt/rtmutex-Make-lock_killable-work.patch | 4 +- .../rtmutex-Provide-rt_mutex_lock_state.patch | 112 - ...ex-Provide-rt_mutex_slowlock_locked.patch} | 85 +- ...utex-implementation-based-on-rtmutex.patch | 373 +++ ...lock-implementation-based-on-rtmutex.patch | 569 ++++ ...wsem-implementation-based-on-rtmutex.patch | 392 +++ ...tex-add-sleeping-lock-implementation.patch | 1197 ++++++++ ...tex-add-ww_mutex-addon-for-mutex-rt.patch} | 233 +- .../all/rt/rtmutex-avoid-include-hell.patch | 4 +- ...ockdep-less-version-of-rt_mutex-s-lo.patch | 151 ++ .../all/rt/rtmutex-futex-prepare-rt.patch | 39 +- .../all/rt/rtmutex-lock-killable.patch | 6 +- .../rt/rtmutex-trylock-is-okay-on-RT.patch | 10 +- .../all/rt/rtmutex-wire-up-RT-s-locking.patch | 249 ++ .../all/rt/rtmutex_dont_include_rcu.patch | 22 +- ...em-rt-Lift-single-reader-restriction.patch | 743 ----- ...rxrpc-remove-unused-static-variables.patch | 2 +- ...t-t-disable-interrupts-in-qc_issue-h.patch | 2 +- ...sk-state-corruption-by-spurious-lock.patch | 4 +- .../all/rt/sched-Remove-TASK_ALL.patch | 4 +- ...line-dl_task_timer-has-to-be-irqsafe.patch | 23 - .../all/rt/sched-delay-put-task.patch | 21 +- .../sched-disable-rt-group-sched-on-rt.patch | 4 +- .../all/rt/sched-disable-ttwu-queue.patch | 4 +- .../all/rt/sched-limit-nr-migrate.patch | 4 +- ...might-sleep-do-not-account-rcu-depth.patch | 8 +- .../all/rt/sched-mmdrop-delayed.patch | 20 +- .../all/rt/sched-rt-mutex-wakeup.patch | 20 +- ...twu-ensure-success-return-is-correct.patch | 4 +- ...Only-wake-up-idle-workers-if-not-blo.patch | 4 +- .../features/all/rt/scsi-fcoe-rt-aware.patch | 6 +- ...function-called-from-invalid-context.patch | 2 +- .../rt/seqlock-prevent-rt-starvation.patch | 20 +- .../all/rt/signal-fix-up-rcu-wreckage.patch | 6 +- .../signal-revert-ptrace-preempt-magic.patch | 4 +- ...t-tasks-to-cache-one-sigqueue-struct.patch | 24 +- .../features/all/rt/skbufhead-raw-lock.patch | 28 +- .../rt/slub-disable-SLUB_CPU_PARTIAL.patch | 4 +- .../all/rt/slub-enable-irqs-for-no-wait.patch | 6 +- ...-unparking-of-percpu-threads-to-the-.patch | 158 -- ...pcm_stream_lock-irqs_disabled-splats.patch | 10 +- ...oftirq-disable-softirq-stacks-for-rt.patch | 30 +- .../all/rt/softirq-preempt-fix-3-re.patch | 41 +- .../features/all/rt/softirq-split-locks.patch | 40 +- ...plit-timer-softirqs-out-of-ksoftirqd.patch | 4 +- ...irq-wake-the-timer-softirq-if-needed.patch | 2 +- ...arc64-use-generic-rwsem-spinlocks-rt.patch | 4 +- .../all/rt/spinlock-types-separate-raw.patch | 2 +- ...it-call_srcu-use-under-raw-spinlocks.patch | 403 +++ ...place-local_irqsave-with-a-locallock.patch | 71 + ...-use-cpu_online-instead-custom-check.patch | 100 + .../all/rt/stop-machine-raw-lock.patch | 2 +- ...nvert-stop_machine_run-to-PREEMPT_RT.patch | 2 +- ...vc_xprt_do_enqueue-use-get_cpu_light.patch | 2 +- .../suspend-prevernt-might-sleep-splats.patch | 8 +- .../all/rt/sysfs-realtime-entry.patch | 12 +- ...-from-going-into-infinite-spin-in-rt.patch | 10 +- ...rmal-Defer-thermal-wakups-to-threads.patch | 2 +- ...tick-broadcast--Make-hrtimer-irqsafe.patch | 58 - ...id-schedule_work-with-interrupts-dis.patch | 53 + .../rt/timekeeping-split-jiffies-lock.patch | 8 +- ...-waking-softirqs-from-the-jiffy-tick.patch | 4 +- .../all/rt/timer-fd-avoid-live-lock.patch | 2 +- ...r-check-properly-for-a-running-timer.patch | 34 - .../all/rt/timer-make-the-base-lock-raw.patch | 181 -- .../timers-prepare-for-full-preemption.patch | 24 +- .../tpm_tis-fix-stall-after-iowrite-s.patch | 78 + ...-for-preempt-off-in-preempt_schedule.patch | 4 +- ...0-don-t-take-the-trylock-during-oops.patch | 4 +- ...ove-preemption-disabling-in-netif_rx.patch | 6 +- .../all/rt/usb-use-_nort-in-giveback.patch | 4 +- .../all/rt/user-use-local-irq-nort.patch | 2 +- .../all/rt/wait.h-include-atomic.h.patch | 8 +- ...rk-around-irqsafe-timer-optimization.patch | 4 +- ...mple-Simple-work-queue-implemenation.patch | 12 +- .../rt/workqueue-distangle-from-rq-lock.patch | 40 +- .../rt/workqueue-prevent-deadlock-stall.patch | 22 +- .../all/rt/workqueue-use-locallock.patch | 26 +- .../features/all/rt/workqueue-use-rcu.patch | 62 +- .../rt/x86-UV-raw_spinlock-conversion.patch | 22 +- ...ypto-reduce-preempt-disabled-regions.patch | 12 +- ...highmem-add-a-already-used-pte-check.patch | 2 +- .../all/rt/x86-io-apic-migra-no-unmask.patch | 4 +- .../rt/x86-kvm-require-const-tsc-for-rt.patch | 4 +- .../all/rt/x86-mce-timer-hrtimer.patch | 24 +- ...-mce-use-swait-queue-for-mce-wakeups.patch | 104 +- .../features/all/rt/x86-preempt-lazy.patch | 44 +- ...ignal-delay-calling-signals-on-32bit.patch | 4 +- .../rt/x86-stackprot-no-random-on-rt.patch | 13 +- .../rt/x86-use-gen-rwsem-spinlocks-rt.patch | 4 +- ...9pfs-don-t-inclide-rwlock.h-directly.patch | 29 + debian/patches/series-rt | 310 +-- 477 files changed, 16457 insertions(+), 19018 deletions(-) delete mode 100644 debian/patches/features/all/rt/0001-futex-Avoid-freeing-an-active-timer.patch delete mode 100644 debian/patches/features/all/rt/0001-futex-Cleanup-variable-names-for-futex_top_waiter.patch delete mode 100644 debian/patches/features/all/rt/0001-ia64-topology-Remove-cpus_allowed-manipulation.patch delete mode 100644 debian/patches/features/all/rt/0001-init-Pin-init-task-to-the-boot-CPU-initially.patch delete mode 100644 debian/patches/features/all/rt/0001-rtmutex-Deboost-before-waking-up-the-top-waiter.patch delete mode 100644 debian/patches/features/all/rt/0001-sched-clock-Fix-early-boot-preempt-assumption-in-__s.patch create mode 100644 debian/patches/features/all/rt/0001-timers-Use-static-keys-for-migrate_enable-and-nohz_a.patch create mode 100644 debian/patches/features/all/rt/0001-tracing-Steve-s-unofficial-trace_recursive_lock-patc.patch delete mode 100644 debian/patches/features/all/rt/0002-arm-Adjust-system_state-check.patch delete mode 100644 debian/patches/features/all/rt/0002-futex-Fix-small-and-harmless-looking-inconsistencies.patch delete mode 100644 debian/patches/features/all/rt/0002-futex-Use-smp_store_release-in-mark_wake_futex.patch create mode 100644 debian/patches/features/all/rt/0002-hrtimer-Correct-blantanly-wrong-comment.patch delete mode 100644 debian/patches/features/all/rt/0002-sched-rtmutex-deadline-Fix-a-PI-crash-for-deadline-t.patch create mode 100644 debian/patches/features/all/rt/0002-tracing-Reverse-the-order-of-trace_types_lock-and-ev.patch delete mode 100644 debian/patches/features/all/rt/0002-workqueue-Provide-work_on_cpu_safe.patch delete mode 100644 debian/patches/features/all/rt/0003-arm64-Adjust-system_state-check.patch delete mode 100644 debian/patches/features/all/rt/0003-futex-Clarify-mark_wake_futex-memory-barrier-usage.patch delete mode 100644 debian/patches/features/all/rt/0003-futex-Remove-rt_mutex_deadlock_account_.patch create mode 100644 debian/patches/features/all/rt/0003-hrtimer-Fix-kerneldoc-for-struct-hrtimer_cpu_base.patch delete mode 100644 debian/patches/features/all/rt/0003-ia64-salinfo-Replace-racy-task-affinity-logic.patch delete mode 100644 debian/patches/features/all/rt/0003-sched-deadline-rtmutex-Dont-miss-the-dl_runtime-dl_p.patch create mode 100644 debian/patches/features/all/rt/0003-tracing-Exclude-generic-fields-from-histograms.patch delete mode 100644 debian/patches/features/all/rt/0004-MAINTAINERS-Add-FUTEX-SUBSYSTEM.patch delete mode 100644 debian/patches/features/all/rt/0004-futex-rt_mutex-Provide-futex-specific-rt_mutex-API.patch create mode 100644 debian/patches/features/all/rt/0004-hrtimer-Cleanup-clock-argument-in-schedule_hrtimeout.patch delete mode 100644 debian/patches/features/all/rt/0004-ia64-sn-hwperf-Replace-racy-task-affinity-logic.patch delete mode 100644 debian/patches/features/all/rt/0004-rtmutex-Clean-up.patch create mode 100644 debian/patches/features/all/rt/0004-tracing-Remove-lookups-from-tracing_map-hitcount.patch delete mode 100644 debian/patches/features/all/rt/0004-x86-smp-Adjust-system_state-check.patch delete mode 100644 debian/patches/features/all/rt/0005-futex-Change-locking-rules.patch create mode 100644 debian/patches/features/all/rt/0005-hrtimer-Fix-hrtimer-function-description.patch delete mode 100644 debian/patches/features/all/rt/0005-metag-Adjust-system_state-check.patch delete mode 100644 debian/patches/features/all/rt/0005-powerpc-smp-Replace-open-coded-task-affinity-logic.patch delete mode 100644 debian/patches/features/all/rt/0005-sched-rtmutex-Refactor-rt_mutex_setprio.patch rename debian/patches/features/all/rt/{0007-tracing-Increase-tracing-map-KEYS_MAX-size.patch => 0005-tracing-Increase-tracing-map-KEYS_MAX-size.patch} (77%) delete mode 100644 debian/patches/features/all/rt/0006-futex-Cleanup-refcounting.patch create mode 100644 debian/patches/features/all/rt/0006-hrtimer-Ensure-POSIX-compliance-relative-CLOCK_REALT.patch delete mode 100644 debian/patches/features/all/rt/0006-powerpc-Adjust-system_state-check.patch delete mode 100644 debian/patches/features/all/rt/0006-sched-tracing-Update-trace_sched_pi_setprio.patch delete mode 100644 debian/patches/features/all/rt/0006-sparc-sysfs-Replace-racy-task-affinity-logic.patch rename debian/patches/features/all/rt/{0009-tracing-Make-traceprobe-parsing-code-reusable.patch => 0006-tracing-Make-traceprobe-parsing-code-reusable.patch} (93%) delete mode 100644 debian/patches/features/all/rt/0007-ACPI-Adjust-system_state-check.patch delete mode 100644 debian/patches/features/all/rt/0007-ACPI-processor-Fix-error-handling-in-__acpi_processo.patch delete mode 100644 debian/patches/features/all/rt/0007-futex-Rework-inconsistent-rt_mutex-futex_q-state.patch create mode 100644 debian/patches/features/all/rt/0007-hrtimer-Cleanup-hrtimer_mode-enum.patch delete mode 100644 debian/patches/features/all/rt/0007-rtmutex-Fix-PI-chain-order-integrity.patch create mode 100644 debian/patches/features/all/rt/0007-tracing-Clean-up-hist_field_flags-enum.patch delete mode 100644 debian/patches/features/all/rt/0008-ACPI-processor-Replace-racy-task-affinity-logic.patch delete mode 100644 debian/patches/features/all/rt/0008-futex-Pull-rt_mutex_futex_unlock-out-from-under-hb-l.patch delete mode 100644 debian/patches/features/all/rt/0008-mm-Adjust-system_state-check.patch delete mode 100644 debian/patches/features/all/rt/0008-rtmutex-Fix-more-prio-comparisons.patch rename debian/patches/features/all/rt/{0001-tracing-Add-hist_field_name-accessor.patch => 0008-tracing-Add-hist_field_name-accessor.patch} (86%) create mode 100644 debian/patches/features/all/rt/0008-tracing-hrtimer-Take-all-clock-bases-and-modes-into-.patch delete mode 100644 debian/patches/features/all/rt/0009-cpufreq-ia64-Replace-racy-task-affinity-logic.patch delete mode 100644 debian/patches/features/all/rt/0009-cpufreq-pasemi-Adjust-system_state-check.patch delete mode 100644 debian/patches/features/all/rt/0009-futex-rt_mutex-Introduce-rt_mutex_init_waiter.patch delete mode 100644 debian/patches/features/all/rt/0009-rtmutex-Plug-preempt-count-leak-in-rt_mutex_futex_un.patch rename debian/patches/features/all/rt/{0002-tracing-Reimplement-log2.patch => 0009-tracing-Reimplement-log2.patch} (93%) create mode 100644 debian/patches/features/all/rt/0009-tracing-hrtimer-Print-hrtimer-mode-in-hrtimer_start-.patch delete mode 100644 debian/patches/features/all/rt/0010-cpufreq-sh-Replace-racy-task-affinity-logic.patch delete mode 100644 debian/patches/features/all/rt/0010-futex-rt_mutex-Restructure-rt_mutex_finish_proxy_loc.patch create mode 100644 debian/patches/features/all/rt/0010-hrtimer-Switch-for-loop-to-_ffs-evaluation.patch delete mode 100644 debian/patches/features/all/rt/0010-iommu-vt-d-Adjust-system_state-checks.patch delete mode 100644 debian/patches/features/all/rt/0010-tracing-Add-NO_DISCARD-event-file-flag.patch create mode 100644 debian/patches/features/all/rt/0010-tracing-Add-support-to-detect-and-avoid-duplicates.patch delete mode 100644 debian/patches/features/all/rt/0011-cpufreq-sparc-us3-Replace-racy-task-affinity-logic.patch delete mode 100644 debian/patches/features/all/rt/0011-futex-Rework-futex_lock_pi-to-use-rt_mutex_-_proxy_l.patch create mode 100644 debian/patches/features/all/rt/0011-hrtimer-Store-running-timer-in-hrtimer_clock_base.patch delete mode 100644 debian/patches/features/all/rt/0011-tracing-Add-post-trigger-flag-to-hist-trigger-comman.patch create mode 100644 debian/patches/features/all/rt/0011-tracing-Remove-code-which-merges-duplicates.patch delete mode 100644 debian/patches/features/all/rt/0012-async-Adjust-system_state-checks.patch delete mode 100644 debian/patches/features/all/rt/0012-cpufreq-sparc-us2e-Replace-racy-task-affinity-logic.patch delete mode 100644 debian/patches/features/all/rt/0012-futex-Futex_unlock_pi-determinism.patch create mode 100644 debian/patches/features/all/rt/0012-hrtimer-Make-room-in-struct-hrtimer_cpu_base.patch rename debian/patches/features/all/rt/{0003-ring-buffer-Add-interface-for-setting-absolute-time-.patch => 0012-ring-buffer-Add-interface-for-setting-absolute-time-.patch} (71%) delete mode 100644 debian/patches/features/all/rt/0013-crypto-N2-Replace-racy-task-affinity-logic.patch delete mode 100644 debian/patches/features/all/rt/0013-extable-Adjust-system_state-checks.patch delete mode 100644 debian/patches/features/all/rt/0013-futex-Drop-hb-lock-before-enqueueing-on-the-rtmutex.patch create mode 100644 debian/patches/features/all/rt/0013-hrtimer-Reduce-conditional-code-hres_active.patch rename debian/patches/features/all/rt/{0004-ring-buffer-Redefine-the-unimplemented-RINGBUF_TIME_.patch => 0013-ring-buffer-Redefine-the-unimplemented-RINGBUF_TIME_.patch} (82%) create mode 100644 debian/patches/features/all/rt/0014-hrtimer-Use-accesor-functions-instead-of-direct-acce.patch delete mode 100644 debian/patches/features/all/rt/0014-printk-Adjust-system_state-checks.patch rename debian/patches/features/all/rt/{0005-tracing-Give-event-triggers-access-to-ring_buffer_ev.patch => 0014-tracing-Give-event-triggers-access-to-ring_buffer_ev.patch} (91%) create mode 100644 debian/patches/features/all/rt/0015-hrtimer-Make-the-remote-enqueue-check-unconditional.patch delete mode 100644 debian/patches/features/all/rt/0015-mm-vmscan-Adjust-system_state-checks.patch rename debian/patches/features/all/rt/{0006-tracing-Add-ring-buffer-event-param-to-hist-field-fu.patch => 0015-tracing-Add-ring-buffer-event-param-to-hist-field-fu.patch} (89%) create mode 100644 debian/patches/features/all/rt/0016-hrtimer-Make-hrtimer_cpu_base.next_timer-handling-un.patch delete mode 100644 debian/patches/features/all/rt/0016-init-Introduce-SYSTEM_SCHEDULING-state.patch rename debian/patches/features/all/rt/{0008-tracing-Break-out-hist-trigger-assignment-parsing.patch => 0016-tracing-Break-out-hist-trigger-assignment-parsing.patch} (67%) create mode 100644 debian/patches/features/all/rt/0017-hrtimer-Make-hrtimer_reprogramm-unconditional.patch delete mode 100644 debian/patches/features/all/rt/0017-sched-core-Enable-might_sleep-and-smp_processor_id-c.patch rename debian/patches/features/all/rt/{0012-tracing-Add-hist-trigger-timestamp-support.patch => 0017-tracing-Add-hist-trigger-timestamp-support.patch} (80%) create mode 100644 debian/patches/features/all/rt/0018-hrtimer-Reduce-conditional-code-and-make-hrtimer_for.patch rename debian/patches/features/all/rt/{0013-tracing-Add-per-element-variable-support-to-tracing_.patch => 0018-tracing-Add-per-element-variable-support-to-tracing_.patch} (86%) create mode 100644 debian/patches/features/all/rt/0019-hrtimer-Unify-handling-of-hrtimer-remove.patch rename debian/patches/features/all/rt/{0014-tracing-Add-hist_data-member-to-hist_field.patch => 0019-tracing-Add-hist_data-member-to-hist_field.patch} (80%) create mode 100644 debian/patches/features/all/rt/0020-hrtimer-Unify-handling-of-remote-enqueue.patch delete mode 100644 debian/patches/features/all/rt/0020-tracing-Add-support-for-dynamic-tracepoints.patch rename debian/patches/features/all/rt/{0015-tracing-Add-usecs-modifier-for-hist-trigger-timestam.patch => 0020-tracing-Add-usecs-modifier-for-hist-trigger-timestam.patch} (84%) create mode 100644 debian/patches/features/all/rt/0021-hrtimer-Make-remote-enqueue-decision-less-restrictiv.patch rename debian/patches/features/all/rt/{0016-tracing-Add-variable-support-to-hist-triggers.patch => 0021-tracing-Add-variable-support-to-hist-triggers.patch} (69%) create mode 100644 debian/patches/features/all/rt/0022-hrtimer-Remove-base-argument-from-hrtimer_reprogram.patch rename debian/patches/features/all/rt/{0017-tracing-Account-for-variables-in-named-trigger-compa.patch => 0022-tracing-Account-for-variables-in-named-trigger-compa.patch} (58%) create mode 100644 debian/patches/features/all/rt/0023-hrtimer-Split-hrtimer_start_range_ns.patch delete mode 100644 debian/patches/features/all/rt/0023-tracing-Add-onmatch-hist-trigger-action-support.patch create mode 100644 debian/patches/features/all/rt/0023-tracing-Move-get_hist_field_flags.patch create mode 100644 debian/patches/features/all/rt/0024-hrtimer-Split-__hrtimer_get_next_event.patch rename debian/patches/features/all/rt/{0018-tracing-Add-simple-expression-support-to-hist-trigge.patch => 0024-tracing-Add-simple-expression-support-to-hist-trigge.patch} (77%) create mode 100644 debian/patches/features/all/rt/0025-hrtimer-Use-irqsave-irqrestore-around-__run_hrtimer.patch create mode 100644 debian/patches/features/all/rt/0025-tracing-Generalize-per-element-hist-trigger-data.patch create mode 100644 debian/patches/features/all/rt/0026-hrtimer-Add-clock-bases-and-hrtimer-mode-for-soft-ir.patch delete mode 100644 debian/patches/features/all/rt/0026-tracing-Make-duplicate-count-from-tracing_map-availa.patch create mode 100644 debian/patches/features/all/rt/0026-tracing-Pass-tracing_map_elt-to-hist_field-accessor-.patch create mode 100644 debian/patches/features/all/rt/0027-hrtimer-Prepare-handling-of-hard-and-softirq-based-h.patch create mode 100644 debian/patches/features/all/rt/0027-tracing-Add-hist_field-type-field.patch create mode 100644 debian/patches/features/all/rt/0028-hrtimer-Implement-support-for-softirq-based-hrtimers.patch delete mode 100644 debian/patches/features/all/rt/0028-tracing-Add-hist-trigger-support-for-variable-refere.patch rename debian/patches/features/all/rt/{0019-tracing-Add-variable-reference-handling-to-hist-trig.patch => 0028-tracing-Add-variable-reference-handling-to-hist-trig.patch} (55%) create mode 100644 debian/patches/features/all/rt/0029-hrtimer-Implement-SOFT-HARD-clock-base-selection.patch rename debian/patches/features/all/rt/{0021-tracing-Add-hist-trigger-action-hook.patch => 0029-tracing-Add-hist-trigger-action-hook.patch} (72%) create mode 100644 debian/patches/features/all/rt/0030-can-bcm-Replace-hrtimer_tasklet-with-softirq-based-h.patch rename debian/patches/features/all/rt/{0022-tracing-Add-support-for-synthetic-events.patch => 0030-tracing-Add-support-for-synthetic-events.patch} (69%) create mode 100644 debian/patches/features/all/rt/0031-mac80211_hwsim-Replace-hrtimer-tasklet-with-softirq-.patch create mode 100644 debian/patches/features/all/rt/0031-tracing-Add-support-for-field-variables.patch create mode 100644 debian/patches/features/all/rt/0032-tracing-Add-onmatch-hist-trigger-action-support.patch create mode 100644 debian/patches/features/all/rt/0032-xfrm-Replace-hrtimer-tasklet-with-softirq-hrtimer.patch create mode 100644 debian/patches/features/all/rt/0033-softirq-Remove-tasklet_hrtimer.patch rename debian/patches/features/all/rt/{0024-tracing-Add-onmax-hist-trigger-action-support.patch => 0033-tracing-Add-onmax-hist-trigger-action-support.patch} (73%) create mode 100644 debian/patches/features/all/rt/0034-ALSA-dummy-Replace-tasklet-with-softirq-hrtimer.patch rename debian/patches/features/all/rt/{0025-tracing-Allow-whitespace-to-surround-hist-trigger-fi.patch => 0034-tracing-Allow-whitespace-to-surround-hist-trigger-fi.patch} (53%) rename debian/patches/features/all/rt/{0027-tracing-Add-cpu-field-for-hist-triggers.patch => 0035-tracing-Add-cpu-field-for-hist-triggers.patch} (62%) create mode 100644 debian/patches/features/all/rt/0035-usb-gadget-NCM-Replace-tasklet-with-softirq-hrtimer.patch create mode 100644 debian/patches/features/all/rt/0036-net-mvpp2-Replace-tasklet-with-softirq-hrtimer.patch create mode 100644 debian/patches/features/all/rt/0036-tracing-Add-hist-trigger-support-for-variable-refere.patch rename debian/patches/features/all/rt/{0029-tracing-Add-last-error-error-facility-for-hist-trigg.patch => 0037-tracing-Add-last-error-error-facility-for-hist-trigg.patch} (65%) rename debian/patches/features/all/rt/{0030-tracing-Add-inter-event-hist-trigger-Documentation.patch => 0038-tracing-Add-inter-event-hist-trigger-Documentation.patch} (91%) rename debian/patches/features/all/rt/{0031-tracing-Make-tracing_set_clock-non-static.patch => 0039-tracing-Make-tracing_set_clock-non-static.patch} (77%) rename debian/patches/features/all/rt/{0032-tracing-Add-a-clock-attribute-for-hist-triggers.patch => 0040-tracing-Add-a-clock-attribute-for-hist-triggers.patch} (69%) create mode 100644 debian/patches/features/all/rt/0041-tracing-Increase-trace_recursive_lock-limit-for-synt.patch create mode 100644 debian/patches/features/all/rt/0042-tracing-Add-inter-event-blurb-to-HIST_TRIGGERS-confi.patch create mode 100644 debian/patches/features/all/rt/Bluetooth-avoid-recursive-locking-in-hci_send_to_cha.patch delete mode 100644 debian/patches/features/all/rt/CPUFREQ-Loongson2-drop-set_cpus_allowed_ptr.patch delete mode 100644 debian/patches/features/all/rt/KVM-lapic-mark-LAPIC-timer-handler-as-irqsafe.patch create mode 100644 debian/patches/features/all/rt/RCU-we-need-to-skip-that-warning-but-only-on-sleepin.patch create mode 100644 debian/patches/features/all/rt/apparmor-use-a-locallock-instead-preempt_disable.patch delete mode 100644 debian/patches/features/all/rt/arm64-cpufeature-don-t-use-mutex-in-bringup-path.patch create mode 100644 debian/patches/features/all/rt/cpu-hotplug--Implement-CPU-pinning.patch delete mode 100644 debian/patches/features/all/rt/cpu-hotplug-Document-why-PREEMPT_RT-uses-a-spinlock.patch delete mode 100644 debian/patches/features/all/rt/cpu-rt-make-hotplug-lock-a-sleeping-spinlock-on-rt.patch delete mode 100644 debian/patches/features/all/rt/cpu-rt-rework-cpu-down.patch delete mode 100644 debian/patches/features/all/rt/cpu_down_move_migrate_enable_back.patch delete mode 100644 debian/patches/features/all/rt/cpu_pm-replace-raw_notifier-to-atomic_notifier.patch create mode 100644 debian/patches/features/all/rt/drivers-zram-fix-zcomp_stream_get-smp_processor_id-u.patch delete mode 100644 debian/patches/features/all/rt/drm-i915-drop-trace_i915_gem_ring_dispatch-onrt.patch create mode 100644 debian/patches/features/all/rt/fs-dcache-bringt-back-explicit-INIT_HLIST_BL_HEAD-in.patch create mode 100644 debian/patches/features/all/rt/fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch delete mode 100644 debian/patches/features/all/rt/fs-dcache-init-in_lookup_hashtable.patch delete mode 100644 debian/patches/features/all/rt/futex-rt_mutex-Fix-rt_mutex_cleanup_proxy_lock.patch delete mode 100644 debian/patches/features/all/rt/futex-rtmutex-Cure-RT-double-blocking-issue.patch create mode 100644 debian/patches/features/all/rt/greybus-audio-don-t-inclide-rwlock.h-directly.patch delete mode 100644 debian/patches/features/all/rt/hotplug-Use-set_cpus_allowed_ptr-in-sync_unplug_thre.patch create mode 100644 debian/patches/features/all/rt/hotplug-duct-tape-RT-rwlock-usage-for-non-RT.patch delete mode 100644 debian/patches/features/all/rt/hotplug-sync_unplug-no-27-5cn-27-in-task-name.patch delete mode 100644 debian/patches/features/all/rt/hotplug-use-migrate-disable.patch create mode 100644 debian/patches/features/all/rt/hrtimer-by-timers-by-default-into-the-softirq-context.patch create mode 100644 debian/patches/features/all/rt/hrtimer-consolidate-hrtimer_init-hrtimer_init_sleepe.patch delete mode 100644 debian/patches/features/all/rt/hrtimer-enfore-64byte-alignment.patch delete mode 100644 debian/patches/features/all/rt/hrtimer-fixup-hrtimer-callback-changes-for-preempt-r.patch delete mode 100644 debian/patches/features/all/rt/i915-bogus-warning-from-i915-when-running-on-PREEMPT.patch create mode 100644 debian/patches/features/all/rt/iommu-iova-Use-raw_cpu_ptr-instead-of-get_cpu_ptr-fo.patch delete mode 100644 debian/patches/features/all/rt/iommu-iova-don-t-disable-preempt-around-this_cpu_ptr.patch delete mode 100644 debian/patches/features/all/rt/iommu-vt-d-don-t-disable-preemption-while-accessing-.patch delete mode 100644 debian/patches/features/all/rt/kernel-cpu-fix-cpu-down-problem-if-kthread-s-cpu-is-.patch delete mode 100644 debian/patches/features/all/rt/kernel-hotplug-restore-original-cpu-mask-oncpu-down.patch delete mode 100644 debian/patches/features/all/rt/kernel-locking-use-an-exclusive-wait_q-for-sleeper.patch delete mode 100644 debian/patches/features/all/rt/kernel-perf-mark-perf_cpu_context-s-timer-as-irqsafe.patch delete mode 100644 debian/patches/features/all/rt/locallock-add-local_lock_on.patch delete mode 100644 debian/patches/features/all/rt/lockdep-Fix-compilation-error-for-CONFIG_MODULES-and.patch delete mode 100644 debian/patches/features/all/rt/lockdep-Fix-per-cpu-static-objects.patch delete mode 100644 debian/patches/features/all/rt/lockdep-Handle-statically-initialized-PER_CPU-locks-.patch create mode 100644 debian/patches/features/all/rt/lockdep-disable-self-test.patch create mode 100644 debian/patches/features/all/rt/locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch create mode 100644 debian/patches/features/all/rt/locking-rt-mutex-fix-deadlock-in-device-mapper-block.patch create mode 100644 debian/patches/features/all/rt/locking-rtmutex-don-t-drop-the-wait_lock-twice.patch create mode 100644 debian/patches/features/all/rt/locking-rtmutex-re-init-the-wait_lock-in-rt_mutex_in.patch create mode 100644 debian/patches/features/all/rt/md-raid5-do-not-disable-interrupts.patch create mode 100644 debian/patches/features/all/rt/mfd-syscon-atmel-smc-include-string.h.patch delete mode 100644 debian/patches/features/all/rt/mm-memcontrol-mem_cgroup_migrate-replace-another-loc.patch delete mode 100644 debian/patches/features/all/rt/mm-swap-don-t-disable-preemption-while-taking-the-pe.patch delete mode 100644 debian/patches/features/all/rt/net-core-remove-explicit-do_softirq-from-busy_poll_s.patch create mode 100644 debian/patches/features/all/rt/net-take-the-tcp_sk_lock-lock-with-BH-disabled.patch create mode 100644 debian/patches/features/all/rt/net-use-trylock-in-icmp_sk.patch create mode 100644 debian/patches/features/all/rt/pci-switchtec-Don-t-use-completion-s-wait-queue.patch delete mode 100644 debian/patches/features/all/rt/perf-make-swevent-hrtimer-irqsafe.patch create mode 100644 debian/patches/features/all/rt/rcu-Suppress-lockdep-false-positive-boost_mtx-compla.patch create mode 100644 debian/patches/features/all/rt/rcu-segcblist-include-rcupdate.h.patch delete mode 100644 debian/patches/features/all/rt/re-migrate_disable-race-with-cpu-hotplug-3f.patch create mode 100644 debian/patches/features/all/rt/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch delete mode 100644 debian/patches/features/all/rt/rt-add-rt-locks.patch delete mode 100644 debian/patches/features/all/rt/rt-locking-Reenable-migration-accross-schedule.patch delete mode 100644 debian/patches/features/all/rt/rtmutex-Fix-lock-stealing-logic.patch delete mode 100644 debian/patches/features/all/rt/rtmutex-Provide-rt_mutex_lock_state.patch rename debian/patches/features/all/rt/{rtmutex-Provide-locked-slowpath.patch => rtmutex-Provide-rt_mutex_slowlock_locked.patch} (56%) create mode 100644 debian/patches/features/all/rt/rtmutex-add-mutex-implementation-based-on-rtmutex.patch create mode 100644 debian/patches/features/all/rt/rtmutex-add-rwlock-implementation-based-on-rtmutex.patch create mode 100644 debian/patches/features/all/rt/rtmutex-add-rwsem-implementation-based-on-rtmutex.patch create mode 100644 debian/patches/features/all/rt/rtmutex-add-sleeping-lock-implementation.patch rename debian/patches/features/all/rt/{rtmutex-add-a-first-shot-of-ww_mutex.patch => rtmutex-add-ww_mutex-addon-for-mutex-rt.patch} (66%) create mode 100644 debian/patches/features/all/rt/rtmutex-export-lockdep-less-version-of-rt_mutex-s-lo.patch create mode 100644 debian/patches/features/all/rt/rtmutex-wire-up-RT-s-locking.patch delete mode 100644 debian/patches/features/all/rt/rwsem-rt-Lift-single-reader-restriction.patch delete mode 100644 debian/patches/features/all/rt/sched-deadline-dl_task_timer-has-to-be-irqsafe.patch delete mode 100644 debian/patches/features/all/rt/smp-hotplug-Move-unparking-of-percpu-threads-to-the-.patch create mode 100644 debian/patches/features/all/rt/srcu-Prohibit-call_srcu-use-under-raw-spinlocks.patch create mode 100644 debian/patches/features/all/rt/srcu-replace-local_irqsave-with-a-locallock.patch create mode 100644 debian/patches/features/all/rt/srcu-use-cpu_online-instead-custom-check.patch delete mode 100644 debian/patches/features/all/rt/tick-broadcast--Make-hrtimer-irqsafe.patch create mode 100644 debian/patches/features/all/rt/time-hrtimer-avoid-schedule_work-with-interrupts-dis.patch delete mode 100644 debian/patches/features/all/rt/timer-hrtimer-check-properly-for-a-running-timer.patch delete mode 100644 debian/patches/features/all/rt/timer-make-the-base-lock-raw.patch create mode 100644 debian/patches/features/all/rt/tpm_tis-fix-stall-after-iowrite-s.patch create mode 100644 debian/patches/features/all/rt/xen-9pfs-don-t-inclide-rwlock.h-directly.patch diff --git a/debian/changelog b/debian/changelog index d243d9f81..3bbaa5470 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +linux (4.14-1~exp2) UNRELEASED; urgency=medium + + * [rt] Update to 4.14-rt1 and reenable (Closes: #882192) + + -- Ben Hutchings Mon, 20 Nov 2017 14:16:28 +0000 + linux (4.14-1~exp1) experimental; urgency=medium * New upstream release: https://kernelnewbies.org/Linux_4.14 diff --git a/debian/config/defines b/debian/config/defines index 143ecdea3..056a2b51e 100644 --- a/debian/config/defines +++ b/debian/config/defines @@ -110,7 +110,7 @@ debug-info: true signed-modules: false [featureset-rt_base] -enabled: false +enabled: true [description] part-long-up: This kernel is not suitable for SMP (multi-processor, diff --git a/debian/patches/features/all/rt/0001-futex-Avoid-freeing-an-active-timer.patch b/debian/patches/features/all/rt/0001-futex-Avoid-freeing-an-active-timer.patch deleted file mode 100644 index dad7b8914..000000000 --- a/debian/patches/features/all/rt/0001-futex-Avoid-freeing-an-active-timer.patch +++ /dev/null @@ -1,51 +0,0 @@ -From: Thomas Gleixner -Date: Mon, 10 Apr 2017 18:03:36 +0200 -Subject: [PATCH 1/4] futex: Avoid freeing an active timer -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Alexander reported a hrtimer debug_object splat: - - ODEBUG: free active (active state 0) object type: hrtimer hint: hrtimer_wakeup (kernel/time/hrtimer.c:1423) - - debug_object_free (lib/debugobjects.c:603) - destroy_hrtimer_on_stack (kernel/time/hrtimer.c:427) - futex_lock_pi (kernel/futex.c:2740) - do_futex (kernel/futex.c:3399) - SyS_futex (kernel/futex.c:3447 kernel/futex.c:3415) - do_syscall_64 (arch/x86/entry/common.c:284) - entry_SYSCALL64_slow_path (arch/x86/entry/entry_64.S:249) - -Which was caused by commit: - - cfafcd117da0 ("futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock()") - -... losing the hrtimer_cancel() in the shuffle. Where previously the -hrtimer_cancel() was done by rt_mutex_slowlock() we now need to do it -manually. - -Reported-by: Alexander Levin -Signed-off-by: Thomas Gleixner -Signed-off-by: Peter Zijlstra (Intel) -Cc: Linus Torvalds -Cc: Peter Zijlstra -Fixes: cfafcd117da0 ("futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock()") -Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1704101802370.2906@nanos -Signed-off-by: Ingo Molnar ---- - kernel/futex.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -2736,8 +2736,10 @@ static int futex_lock_pi(u32 __user *uad - out_put_key: - put_futex_key(&q.key); - out: -- if (to) -+ if (to) { -+ hrtimer_cancel(&to->timer); - destroy_hrtimer_on_stack(&to->timer); -+ } - return ret != -EINTR ? ret : -ERESTARTNOINTR; - - uaddr_faulted: diff --git a/debian/patches/features/all/rt/0001-futex-Cleanup-variable-names-for-futex_top_waiter.patch b/debian/patches/features/all/rt/0001-futex-Cleanup-variable-names-for-futex_top_waiter.patch deleted file mode 100644 index 73d5c932c..000000000 --- a/debian/patches/features/all/rt/0001-futex-Cleanup-variable-names-for-futex_top_waiter.patch +++ /dev/null @@ -1,118 +0,0 @@ -From: Peter Zijlstra -Date: Wed, 22 Mar 2017 11:35:48 +0100 -Subject: [PATCH] futex: Cleanup variable names for futex_top_waiter() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Upstream commit 499f5aca2cdd5e958b27e2655e7e7f82524f46b1 - -futex_top_waiter() returns the top-waiter on the pi_mutex. Assinging -this to a variable 'match' totally obscures the code. - -Signed-off-by: Peter Zijlstra (Intel) -Cc: juri.lelli@arm.com -Cc: bigeasy@linutronix.de -Cc: xlpang@redhat.com -Cc: rostedt@goodmis.org -Cc: mathieu.desnoyers@efficios.com -Cc: jdesfossez@efficios.com -Cc: dvhart@infradead.org -Cc: bristot@redhat.com -Link: http://lkml.kernel.org/r/20170322104151.554710645@infradead.org -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/futex.c | 30 +++++++++++++++--------------- - 1 file changed, 15 insertions(+), 15 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1122,14 +1122,14 @@ static int attach_to_pi_owner(u32 uval, - static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, - union futex_key *key, struct futex_pi_state **ps) - { -- struct futex_q *match = futex_top_waiter(hb, key); -+ struct futex_q *top_waiter = futex_top_waiter(hb, key); - - /* - * If there is a waiter on that futex, validate it and - * attach to the pi_state when the validation succeeds. - */ -- if (match) -- return attach_to_pi_state(uval, match->pi_state, ps); -+ if (top_waiter) -+ return attach_to_pi_state(uval, top_waiter->pi_state, ps); - - /* - * We are the first waiter - try to look up the owner based on -@@ -1176,7 +1176,7 @@ static int futex_lock_pi_atomic(u32 __us - struct task_struct *task, int set_waiters) - { - u32 uval, newval, vpid = task_pid_vnr(task); -- struct futex_q *match; -+ struct futex_q *top_waiter; - int ret; - - /* -@@ -1202,9 +1202,9 @@ static int futex_lock_pi_atomic(u32 __us - * Lookup existing state first. If it exists, try to attach to - * its pi_state. - */ -- match = futex_top_waiter(hb, key); -- if (match) -- return attach_to_pi_state(uval, match->pi_state, ps); -+ top_waiter = futex_top_waiter(hb, key); -+ if (top_waiter) -+ return attach_to_pi_state(uval, top_waiter->pi_state, ps); - - /* - * No waiter and user TID is 0. We are here because the -@@ -1294,11 +1294,11 @@ static void mark_wake_futex(struct wake_ - q->lock_ptr = NULL; - } - --static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, -+static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *top_waiter, - struct futex_hash_bucket *hb) - { - struct task_struct *new_owner; -- struct futex_pi_state *pi_state = this->pi_state; -+ struct futex_pi_state *pi_state = top_waiter->pi_state; - u32 uninitialized_var(curval), newval; - DEFINE_WAKE_Q(wake_q); - bool deboost; -@@ -1319,11 +1319,11 @@ static int wake_futex_pi(u32 __user *uad - - /* - * It is possible that the next waiter (the one that brought -- * this owner to the kernel) timed out and is no longer -+ * top_waiter owner to the kernel) timed out and is no longer - * waiting on the lock. - */ - if (!new_owner) -- new_owner = this->task; -+ new_owner = top_waiter->task; - - /* - * We pass it to the next owner. The WAITERS bit is always -@@ -2633,7 +2633,7 @@ static int futex_unlock_pi(u32 __user *u - u32 uninitialized_var(curval), uval, vpid = task_pid_vnr(current); - union futex_key key = FUTEX_KEY_INIT; - struct futex_hash_bucket *hb; -- struct futex_q *match; -+ struct futex_q *top_waiter; - int ret; - - retry: -@@ -2657,9 +2657,9 @@ static int futex_unlock_pi(u32 __user *u - * all and we at least want to know if user space fiddled - * with the futex value instead of blindly unlocking. - */ -- match = futex_top_waiter(hb, &key); -- if (match) { -- ret = wake_futex_pi(uaddr, uval, match, hb); -+ top_waiter = futex_top_waiter(hb, &key); -+ if (top_waiter) { -+ ret = wake_futex_pi(uaddr, uval, top_waiter, hb); - /* - * In case of success wake_futex_pi dropped the hash - * bucket lock. diff --git a/debian/patches/features/all/rt/0001-ia64-topology-Remove-cpus_allowed-manipulation.patch b/debian/patches/features/all/rt/0001-ia64-topology-Remove-cpus_allowed-manipulation.patch deleted file mode 100644 index e9c491a79..000000000 --- a/debian/patches/features/all/rt/0001-ia64-topology-Remove-cpus_allowed-manipulation.patch +++ /dev/null @@ -1,53 +0,0 @@ -From: Thomas Gleixner -Date: Wed, 12 Apr 2017 22:07:27 +0200 -Subject: [PATCH 01/13] ia64/topology: Remove cpus_allowed manipulation -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -The CPU hotplug callback fiddles with the cpus_allowed pointer to pin the -calling thread on the plugged CPU. That's already guaranteed by the hotplug -core code. - -Remove it. - -Signed-off-by: Thomas Gleixner -Cc: Fenghua Yu -Cc: Tony Luck -Cc: linux-ia64@vger.kernel.org -Cc: Herbert Xu -Cc: "Rafael J. Wysocki" -Cc: Peter Zijlstra -Cc: Benjamin Herrenschmidt -Cc: Sebastian Siewior -Cc: Lai Jiangshan -Cc: Viresh Kumar -Cc: Michael Ellerman -Cc: Tejun Heo -Cc: "David S. Miller" -Cc: Len Brown -Link: http://lkml.kernel.org/r/20170412201042.174518069@linutronix.de -Signed-off-by: Thomas Gleixner ---- - arch/ia64/kernel/topology.c | 6 ------ - 1 file changed, 6 deletions(-) - ---- a/arch/ia64/kernel/topology.c -+++ b/arch/ia64/kernel/topology.c -@@ -355,18 +355,12 @@ static int cache_add_dev(unsigned int cp - unsigned long i, j; - struct cache_info *this_object; - int retval = 0; -- cpumask_t oldmask; - - if (all_cpu_cache_info[cpu].kobj.parent) - return 0; - -- oldmask = current->cpus_allowed; -- retval = set_cpus_allowed_ptr(current, cpumask_of(cpu)); -- if (unlikely(retval)) -- return retval; - - retval = cpu_cache_sysfs_init(cpu); -- set_cpus_allowed_ptr(current, &oldmask); - if (unlikely(retval < 0)) - return retval; - diff --git a/debian/patches/features/all/rt/0001-init-Pin-init-task-to-the-boot-CPU-initially.patch b/debian/patches/features/all/rt/0001-init-Pin-init-task-to-the-boot-CPU-initially.patch deleted file mode 100644 index 05b7858a2..000000000 --- a/debian/patches/features/all/rt/0001-init-Pin-init-task-to-the-boot-CPU-initially.patch +++ /dev/null @@ -1,74 +0,0 @@ -From: Thomas Gleixner -Date: Tue, 16 May 2017 20:42:32 +0200 -Subject: [PATCH 01/17] init: Pin init task to the boot CPU, initially -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Some of the boot code in init_kernel_freeable() which runs before SMP -bringup assumes (rightfully) that it runs on the boot CPU and therefore can -use smp_processor_id() in preemptible context. - -That works so far because the smp_processor_id() check starts to be -effective after smp bringup. That's just wrong. Starting with SMP bringup -and the ability to move threads around, smp_processor_id() in preemptible -context is broken. - -Aside of that it does not make sense to allow init to run on all CPUs -before sched_smp_init() has been run. - -Pin the init to the boot CPU so the existing code can continue to use -smp_processor_id() without triggering the checks when the enabling of those -checks starts earlier. - -Tested-by: Mark Rutland -Signed-off-by: Thomas Gleixner -Signed-off-by: Peter Zijlstra (Intel) -Cc: Greg Kroah-Hartman -Cc: Linus Torvalds -Cc: Peter Zijlstra -Cc: Steven Rostedt -Link: http://lkml.kernel.org/r/20170516184734.943149935@linutronix.de -Signed-off-by: Ingo Molnar ---- - init/main.c | 17 ++++++++++++----- - 1 file changed, 12 insertions(+), 5 deletions(-) - ---- a/init/main.c -+++ b/init/main.c -@@ -389,6 +389,7 @@ static __initdata DECLARE_COMPLETION(kth - - static noinline void __ref rest_init(void) - { -+ struct task_struct *tsk; - int pid; - - rcu_scheduler_starting(); -@@ -397,7 +398,17 @@ static noinline void __ref rest_init(voi - * the init task will end up wanting to create kthreads, which, if - * we schedule it before we create kthreadd, will OOPS. - */ -- kernel_thread(kernel_init, NULL, CLONE_FS); -+ pid = kernel_thread(kernel_init, NULL, CLONE_FS); -+ /* -+ * Pin init on the boot CPU. Task migration is not properly working -+ * until sched_init_smp() has been run. It will set the allowed -+ * CPUs for init to the non isolated CPUs. -+ */ -+ rcu_read_lock(); -+ tsk = find_task_by_pid_ns(pid, &init_pid_ns); -+ set_cpus_allowed_ptr(tsk, cpumask_of(smp_processor_id())); -+ rcu_read_unlock(); -+ - numa_default_policy(); - pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); - rcu_read_lock(); -@@ -1011,10 +1022,6 @@ static noinline void __init kernel_init_ - * init can allocate pages on any node - */ - set_mems_allowed(node_states[N_MEMORY]); -- /* -- * init can run on any cpu. -- */ -- set_cpus_allowed_ptr(current, cpu_all_mask); - - cad_pid = task_pid(current); - diff --git a/debian/patches/features/all/rt/0001-rtmutex-Deboost-before-waking-up-the-top-waiter.patch b/debian/patches/features/all/rt/0001-rtmutex-Deboost-before-waking-up-the-top-waiter.patch deleted file mode 100644 index 5e2939fd6..000000000 --- a/debian/patches/features/all/rt/0001-rtmutex-Deboost-before-waking-up-the-top-waiter.patch +++ /dev/null @@ -1,178 +0,0 @@ -From: Xunlei Pang -Date: Thu, 23 Mar 2017 15:56:07 +0100 -Subject: [PATCH 1/9] rtmutex: Deboost before waking up the top waiter -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -We should deboost before waking the high-priority task, such that we -don't run two tasks with the same "state" (priority, deadline, -sched_class, etc). - -In order to make sure the boosting task doesn't start running between -unlock and deboost (due to 'spurious' wakeup), we move the deboost -under the wait_lock, that way its serialized against the wait loop in -__rt_mutex_slowlock(). - -Doing the deboost early can however lead to priority-inversion if -current would get preempted after the deboost but before waking our -high-prio task, hence we disable preemption before doing deboost, and -enabling it after the wake up is over. - -This gets us the right semantic order, but most importantly however; -this change ensures pointer stability for the next patch, where we -have rt_mutex_setprio() cache a pointer to the top-most waiter task. -If we, as before this change, do the wakeup first and then deboost, -this pointer might point into thin air. - -[peterz: Changelog + patch munging] -Suggested-by: Peter Zijlstra -Signed-off-by: Xunlei Pang -Signed-off-by: Peter Zijlstra (Intel) -Acked-by: Steven Rostedt -Cc: juri.lelli@arm.com -Cc: bigeasy@linutronix.de -Cc: mathieu.desnoyers@efficios.com -Cc: jdesfossez@efficios.com -Cc: bristot@redhat.com -Link: http://lkml.kernel.org/r/20170323150216.110065320@infradead.org -Signed-off-by: Thomas Gleixner ---- - kernel/futex.c | 5 --- - kernel/locking/rtmutex.c | 59 +++++++++++++++++++++------------------- - kernel/locking/rtmutex_common.h | 2 - - 3 files changed, 34 insertions(+), 32 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1460,10 +1460,7 @@ static int wake_futex_pi(u32 __user *uad - out_unlock: - raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - -- if (deboost) { -- wake_up_q(&wake_q); -- rt_mutex_adjust_prio(current); -- } -+ rt_mutex_postunlock(&wake_q, deboost); - - return ret; - } ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -373,24 +373,6 @@ static void __rt_mutex_adjust_prio(struc - } - - /* -- * Adjust task priority (undo boosting). Called from the exit path of -- * rt_mutex_slowunlock() and rt_mutex_slowlock(). -- * -- * (Note: We do this outside of the protection of lock->wait_lock to -- * allow the lock to be taken while or before we readjust the priority -- * of task. We do not use the spin_xx_mutex() variants here as we are -- * outside of the debug path.) -- */ --void rt_mutex_adjust_prio(struct task_struct *task) --{ -- unsigned long flags; -- -- raw_spin_lock_irqsave(&task->pi_lock, flags); -- __rt_mutex_adjust_prio(task); -- raw_spin_unlock_irqrestore(&task->pi_lock, flags); --} -- --/* - * Deadlock detection is conditional: - * - * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted -@@ -1051,6 +1033,7 @@ static void mark_wakeup_next_waiter(stru - * lock->wait_lock. - */ - rt_mutex_dequeue_pi(current, waiter); -+ __rt_mutex_adjust_prio(current); - - /* - * As we are waking up the top waiter, and the waiter stays -@@ -1393,6 +1376,16 @@ static bool __sched rt_mutex_slowunlock( - */ - mark_wakeup_next_waiter(wake_q, lock); - -+ /* -+ * We should deboost before waking the top waiter task such that -+ * we don't run two tasks with the 'same' priority. This however -+ * can lead to prio-inversion if we would get preempted after -+ * the deboost but before waking our high-prio task, hence the -+ * preempt_disable before unlock. Pairs with preempt_enable() in -+ * rt_mutex_postunlock(); -+ */ -+ preempt_disable(); -+ - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - - /* check PI boosting */ -@@ -1442,6 +1435,18 @@ rt_mutex_fasttrylock(struct rt_mutex *lo - return slowfn(lock); - } - -+/* -+ * Undo pi boosting (if necessary) and wake top waiter. -+ */ -+void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost) -+{ -+ wake_up_q(wake_q); -+ -+ /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ -+ if (deboost) -+ preempt_enable(); -+} -+ - static inline void - rt_mutex_fastunlock(struct rt_mutex *lock, - bool (*slowfn)(struct rt_mutex *lock, -@@ -1455,11 +1460,7 @@ rt_mutex_fastunlock(struct rt_mutex *loc - - deboost = slowfn(lock, &wake_q); - -- wake_up_q(&wake_q); -- -- /* Undo pi boosting if necessary: */ -- if (deboost) -- rt_mutex_adjust_prio(current); -+ rt_mutex_postunlock(&wake_q, deboost); - } - - /** -@@ -1572,6 +1573,13 @@ bool __sched __rt_mutex_futex_unlock(str - } - - mark_wakeup_next_waiter(wake_q, lock); -+ /* -+ * We've already deboosted, retain preempt_disabled when dropping -+ * the wait_lock to avoid inversion until the wakeup. Matched -+ * by rt_mutex_postunlock(); -+ */ -+ preempt_disable(); -+ - return true; /* deboost and wakeups */ - } - -@@ -1584,10 +1592,7 @@ void __sched rt_mutex_futex_unlock(struc - deboost = __rt_mutex_futex_unlock(lock, &wake_q); - raw_spin_unlock_irq(&lock->wait_lock); - -- if (deboost) { -- wake_up_q(&wake_q); -- rt_mutex_adjust_prio(current); -- } -+ rt_mutex_postunlock(&wake_q, deboost); - } - - /** ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -122,7 +122,7 @@ extern void rt_mutex_futex_unlock(struct - extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, - struct wake_q_head *wqh); - --extern void rt_mutex_adjust_prio(struct task_struct *task); -+extern void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost); - - #ifdef CONFIG_DEBUG_RT_MUTEXES - # include "rtmutex-debug.h" diff --git a/debian/patches/features/all/rt/0001-sched-clock-Fix-early-boot-preempt-assumption-in-__s.patch b/debian/patches/features/all/rt/0001-sched-clock-Fix-early-boot-preempt-assumption-in-__s.patch deleted file mode 100644 index 6acbece70..000000000 --- a/debian/patches/features/all/rt/0001-sched-clock-Fix-early-boot-preempt-assumption-in-__s.patch +++ /dev/null @@ -1,62 +0,0 @@ -From: Peter Zijlstra -Date: Wed, 24 May 2017 08:52:02 +0200 -Subject: [PATCH] sched/clock: Fix early boot preempt assumption in - __set_sched_clock_stable() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -The more strict early boot preemption warnings found that -__set_sched_clock_stable() was incorrectly assuming we'd still be -running on a single CPU: - - BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 - caller is debug_smp_processor_id+0x1c/0x1e - CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.12.0-rc2-00108-g1c3c5ea #1 - Call Trace: - dump_stack+0x110/0x192 - check_preemption_disabled+0x10c/0x128 - ? set_debug_rodata+0x25/0x25 - debug_smp_processor_id+0x1c/0x1e - sched_clock_init_late+0x27/0x87 - [...] - -Fix it by disabling IRQs. - -Reported-by: kernel test robot -Signed-off-by: Peter Zijlstra (Intel) -Acked-by: Thomas Gleixner -Cc: Greg Kroah-Hartman -Cc: Linus Torvalds -Cc: Mark Rutland -Cc: Peter Zijlstra -Cc: Steven Rostedt -Cc: lkp@01.org -Cc: tipbuild@zytor.com -Link: http://lkml.kernel.org/r/20170524065202.v25vyu7pvba5mhpd@hirez.programming.kicks-ass.net -Signed-off-by: Ingo Molnar ---- - kernel/sched/clock.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - ---- a/kernel/sched/clock.c -+++ b/kernel/sched/clock.c -@@ -126,12 +126,19 @@ int sched_clock_stable(void) - - static void __set_sched_clock_stable(void) - { -- struct sched_clock_data *scd = this_scd(); -+ struct sched_clock_data *scd; - - /* -+ * Since we're still unstable and the tick is already running, we have -+ * to disable IRQs in order to get a consistent scd->tick* reading. -+ */ -+ local_irq_disable(); -+ scd = this_scd(); -+ /* - * Attempt to make the (initial) unstable->stable transition continuous. - */ - __sched_clock_offset = (scd->tick_gtod + __gtod_offset) - (scd->tick_raw); -+ local_irq_enable(); - - printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n", - scd->tick_gtod, __gtod_offset, diff --git a/debian/patches/features/all/rt/0001-timers-Use-static-keys-for-migrate_enable-and-nohz_a.patch b/debian/patches/features/all/rt/0001-timers-Use-static-keys-for-migrate_enable-and-nohz_a.patch new file mode 100644 index 000000000..ee7e2bc30 --- /dev/null +++ b/debian/patches/features/all/rt/0001-timers-Use-static-keys-for-migrate_enable-and-nohz_a.patch @@ -0,0 +1,308 @@ +From: Thomas Gleixner +Date: Mon, 13 Nov 2017 20:23:44 +0100 +Subject: [PATCH 01/36] timers: Use static keys for migrate_enable and + nohz_active +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The migration_enabled and nohz_active will be later moved into the bitfield. In +the bitfield a change to one bit causes RMW operation and without holding a +lock it might happen that a concurrent change on a second CPU might cause the +loss of the an update. +To avoid that and since both fields are changed to static_branch. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/hrtimer.h | 4 - + kernel/time/hrtimer.c | 17 ++------ + kernel/time/tick-internal.h | 21 +++++++--- + kernel/time/tick-sched.c | 2 + kernel/time/timer.c | 91 ++++++++++++++++++++++---------------------- + 5 files changed, 69 insertions(+), 66 deletions(-) + +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -153,8 +153,6 @@ enum hrtimer_base_type { + * @cpu: cpu number + * @active_bases: Bitfield to mark bases with active timers + * @clock_was_set_seq: Sequence counter of clock was set events +- * @migration_enabled: The migration of hrtimers to other cpus is enabled +- * @nohz_active: The nohz functionality is enabled + * @expires_next: absolute time of the next event which was scheduled + * via clock_set_next_event() + * @next_timer: Pointer to the first expiring timer +@@ -178,8 +176,6 @@ struct hrtimer_cpu_base { + unsigned int cpu; + unsigned int active_bases; + unsigned int clock_was_set_seq; +- bool migration_enabled; +- bool nohz_active; + #ifdef CONFIG_HIGH_RES_TIMERS + unsigned int in_hrtirq : 1, + hres_active : 1, +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -178,23 +178,16 @@ hrtimer_check_target(struct hrtimer *tim + #endif + } + +-#ifdef CONFIG_NO_HZ_COMMON +-static inline +-struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, +- int pinned) +-{ +- if (pinned || !base->migration_enabled) +- return base; +- return &per_cpu(hrtimer_bases, get_nohz_timer_target()); +-} +-#else + static inline + struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, + int pinned) + { ++#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) ++ if (static_branch_unlikely(&timers_migration_enabled) && !pinned) ++ return &per_cpu(hrtimer_bases, get_nohz_timer_target()); ++#endif + return base; + } +-#endif + + /* + * We switch the timer base to a power-optimized selected CPU target, +@@ -971,7 +964,7 @@ void hrtimer_start_range_ns(struct hrtim + * Kick to reschedule the next tick to handle the new timer + * on dynticks target. + */ +- if (new_base->cpu_base->nohz_active) ++ if (is_timers_nohz_active()) + wake_up_nohz_cpu(new_base->cpu_base->cpu); + } else { + hrtimer_reprogram(timer, new_base); +--- a/kernel/time/tick-internal.h ++++ b/kernel/time/tick-internal.h +@@ -150,14 +150,25 @@ static inline void tick_nohz_init(void) + + #ifdef CONFIG_NO_HZ_COMMON + extern unsigned long tick_nohz_active; ++extern void timers_update_nohz(void); ++extern struct static_key_false timers_nohz_active; ++ ++static inline bool is_timers_nohz_active(void) ++{ ++ return static_branch_unlikely(&timers_nohz_active); ++} ++ ++#ifdef CONFIG_SMP ++extern struct static_key_false timers_migration_enabled; ++#endif + #else ++static inline void timers_update_nohz(void) { } + #define tick_nohz_active (0) +-#endif + +-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) +-extern void timers_update_migration(bool update_nohz); +-#else +-static inline void timers_update_migration(bool update_nohz) { } ++static inline bool is_timers_nohz_active(void) ++{ ++ return false; ++} + #endif + + DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); +--- a/kernel/time/tick-sched.c ++++ b/kernel/time/tick-sched.c +@@ -1103,7 +1103,7 @@ static inline void tick_nohz_activate(st + ts->nohz_mode = mode; + /* One update is enough */ + if (!test_and_set_bit(0, &tick_nohz_active)) +- timers_update_migration(true); ++ timers_update_nohz(); + } + + /** +--- a/kernel/time/timer.c ++++ b/kernel/time/timer.c +@@ -200,8 +200,6 @@ struct timer_base { + unsigned long clk; + unsigned long next_expiry; + unsigned int cpu; +- bool migration_enabled; +- bool nohz_active; + bool is_idle; + bool must_forward_clk; + DECLARE_BITMAP(pending_map, WHEEL_SIZE); +@@ -210,45 +208,59 @@ struct timer_base { + + static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]); + +-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) ++#ifdef CONFIG_NO_HZ_COMMON ++ ++DEFINE_STATIC_KEY_FALSE(timers_nohz_active); ++static DEFINE_MUTEX(timer_keys_mutex); ++ ++static void timer_update_keys(struct work_struct *work); ++static DECLARE_WORK(timer_update_work, timer_update_keys); ++ ++#ifdef CONFIG_SMP + unsigned int sysctl_timer_migration = 1; + +-void timers_update_migration(bool update_nohz) ++DEFINE_STATIC_KEY_FALSE(timers_migration_enabled); ++ ++static void timers_update_migration(void) + { + bool on = sysctl_timer_migration && tick_nohz_active; +- unsigned int cpu; + +- /* Avoid the loop, if nothing to update */ +- if (this_cpu_read(timer_bases[BASE_STD].migration_enabled) == on) +- return; ++ if (on) ++ static_branch_enable(&timers_migration_enabled); ++ else ++ static_branch_disable(&timers_migration_enabled); ++} ++#else ++static inline void timers_update_migration(void) { } ++#endif /* !CONFIG_SMP */ + +- for_each_possible_cpu(cpu) { +- per_cpu(timer_bases[BASE_STD].migration_enabled, cpu) = on; +- per_cpu(timer_bases[BASE_DEF].migration_enabled, cpu) = on; +- per_cpu(hrtimer_bases.migration_enabled, cpu) = on; +- if (!update_nohz) +- continue; +- per_cpu(timer_bases[BASE_STD].nohz_active, cpu) = true; +- per_cpu(timer_bases[BASE_DEF].nohz_active, cpu) = true; +- per_cpu(hrtimer_bases.nohz_active, cpu) = true; +- } ++static void timer_update_keys(struct work_struct *work) ++{ ++ mutex_lock(&timer_keys_mutex); ++ timers_update_migration(); ++ static_branch_enable(&timers_nohz_active); ++ mutex_unlock(&timer_keys_mutex); ++} ++ ++void timers_update_nohz(void) ++{ ++ schedule_work(&timer_update_work); + } + + int timer_migration_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) + { +- static DEFINE_MUTEX(mutex); + int ret; + +- mutex_lock(&mutex); ++ mutex_lock(&timer_keys_mutex); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (!ret && write) +- timers_update_migration(false); +- mutex_unlock(&mutex); ++ timers_update_migration(); ++ mutex_unlock(&timer_keys_mutex); + return ret; + } +-#endif ++#endif /* NO_HZ_COMMON */ + + static unsigned long round_jiffies_common(unsigned long j, int cpu, + bool force_up) +@@ -534,7 +546,7 @@ static void + static void + trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer) + { +- if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active) ++ if (!is_timers_nohz_active()) + return; + + /* +@@ -817,7 +829,7 @@ static inline struct timer_base *get_tim + * If the timer is deferrable and nohz is active then we need to use + * the deferrable base. + */ +- if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && ++ if (is_timers_nohz_active() && + (tflags & TIMER_DEFERRABLE)) + base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu); + return base; +@@ -831,7 +843,7 @@ static inline struct timer_base *get_tim + * If the timer is deferrable and nohz is active then we need to use + * the deferrable base. + */ +- if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && ++ if (is_timers_nohz_active() && + (tflags & TIMER_DEFERRABLE)) + base = this_cpu_ptr(&timer_bases[BASE_DEF]); + return base; +@@ -842,21 +854,20 @@ static inline struct timer_base *get_tim + return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK); + } + +-#ifdef CONFIG_NO_HZ_COMMON + static inline struct timer_base * + get_target_base(struct timer_base *base, unsigned tflags) + { +-#ifdef CONFIG_SMP +- if ((tflags & TIMER_PINNED) || !base->migration_enabled) +- return get_timer_this_cpu_base(tflags); +- return get_timer_cpu_base(tflags, get_nohz_timer_target()); +-#else +- return get_timer_this_cpu_base(tflags); ++#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) ++ if (static_branch_unlikely(&timers_migration_enabled) && ++ !(tflags & TIMER_PINNED)) ++ return get_timer_cpu_base(tflags, get_nohz_timer_target()); + #endif ++ return get_timer_this_cpu_base(tflags); + } + + static inline void forward_timer_base(struct timer_base *base) + { ++#ifdef CONFIG_NO_HZ_COMMON + unsigned long jnow; + + /* +@@ -880,16 +891,8 @@ static inline void forward_timer_base(st + base->clk = jnow; + else + base->clk = base->next_expiry; +-} +-#else +-static inline struct timer_base * +-get_target_base(struct timer_base *base, unsigned tflags) +-{ +- return get_timer_this_cpu_base(tflags); +-} +- +-static inline void forward_timer_base(struct timer_base *base) { } + #endif ++} + + + /* +@@ -1644,7 +1647,7 @@ static __latent_entropy void run_timer_s + base->must_forward_clk = false; + + __run_timers(base); +- if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) ++ if (is_timers_nohz_active()) + __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); + } + +@@ -1658,7 +1661,7 @@ void run_local_timers(void) + hrtimer_run_queues(); + /* Raise the softirq only if required. */ + if (time_before(jiffies, base->clk)) { +- if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active) ++ if (!is_timers_nohz_active()) + return; + /* CPU is awake, so check the deferrable base. */ + base++; diff --git a/debian/patches/features/all/rt/0001-tracing-Steve-s-unofficial-trace_recursive_lock-patc.patch b/debian/patches/features/all/rt/0001-tracing-Steve-s-unofficial-trace_recursive_lock-patc.patch new file mode 100644 index 000000000..775fd2613 --- /dev/null +++ b/debian/patches/features/all/rt/0001-tracing-Steve-s-unofficial-trace_recursive_lock-patc.patch @@ -0,0 +1,128 @@ +From: Steven Rostedt +Date: Fri, 22 Sep 2017 14:58:15 -0500 +Subject: [PATCH 01/42] tracing: Steve's unofficial trace_recursive_lock() + patch +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +On Tue, 5 Sep 2017 16:57:52 -0500 +Tom Zanussi wrote: + +> Synthetic event generation requires the reservation of a second event +> while the reservation of a previous event is still in progress. The +> trace_recursive_lock() check in ring_buffer_lock_reserve() prevents +> this however. +> +> This sets up a special reserve pathway for this particular case, +> leaving existing pathways untouched, other than an additional check in +> ring_buffer_lock_reserve() and trace_event_buffer_reserve(). These +> checks could be gotten rid of as well, with copies of those functions, +> but for now try to avoid that unless necessary. +> +> Signed-off-by: Tom Zanussi + +I've been planing on changing that lock, which may help you here +without having to mess around with parameters. That is to simply add a +counter. Would this patch help you. You can add a patch to increment +the count to 5 with an explanation of handling synthetic events, but +even getting to 4 is extremely unlikely. + +I'll make this into an official patch if this works for you, and then +you can include it in your series. + +-- Steve + +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/ring_buffer.c | 66 ++++++++++++--------------------------------- + 1 file changed, 18 insertions(+), 48 deletions(-) + +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -2538,61 +2538,29 @@ rb_wakeups(struct ring_buffer *buffer, s + * The lock and unlock are done within a preempt disable section. + * The current_context per_cpu variable can only be modified + * by the current task between lock and unlock. But it can +- * be modified more than once via an interrupt. To pass this +- * information from the lock to the unlock without having to +- * access the 'in_interrupt()' functions again (which do show +- * a bit of overhead in something as critical as function tracing, +- * we use a bitmask trick. ++ * be modified more than once via an interrupt. There are four ++ * different contexts that we need to consider. + * +- * bit 0 = NMI context +- * bit 1 = IRQ context +- * bit 2 = SoftIRQ context +- * bit 3 = normal context. +- * +- * This works because this is the order of contexts that can +- * preempt other contexts. A SoftIRQ never preempts an IRQ +- * context. +- * +- * When the context is determined, the corresponding bit is +- * checked and set (if it was set, then a recursion of that context +- * happened). +- * +- * On unlock, we need to clear this bit. To do so, just subtract +- * 1 from the current_context and AND it to itself. +- * +- * (binary) +- * 101 - 1 = 100 +- * 101 & 100 = 100 (clearing bit zero) +- * +- * 1010 - 1 = 1001 +- * 1010 & 1001 = 1000 (clearing bit 1) +- * +- * The least significant bit can be cleared this way, and it +- * just so happens that it is the same bit corresponding to +- * the current context. ++ * Normal context. ++ * SoftIRQ context ++ * IRQ context ++ * NMI context ++ * ++ * If for some reason the ring buffer starts to recurse, we ++ * only allow that to happen at most 4 times (one for each ++ * context). If it happens 5 times, then we consider this a ++ * recusive loop and do not let it go further. + */ + + static __always_inline int + trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) + { +- unsigned int val = cpu_buffer->current_context; +- int bit; +- +- if (in_interrupt()) { +- if (in_nmi()) +- bit = RB_CTX_NMI; +- else if (in_irq()) +- bit = RB_CTX_IRQ; +- else +- bit = RB_CTX_SOFTIRQ; +- } else +- bit = RB_CTX_NORMAL; +- +- if (unlikely(val & (1 << bit))) ++ if (cpu_buffer->current_context >= 4) + return 1; + +- val |= (1 << bit); +- cpu_buffer->current_context = val; ++ cpu_buffer->current_context++; ++ /* Interrupts must see this update */ ++ barrier(); + + return 0; + } +@@ -2600,7 +2568,9 @@ trace_recursive_lock(struct ring_buffer_ + static __always_inline void + trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) + { +- cpu_buffer->current_context &= cpu_buffer->current_context - 1; ++ /* Don't let the dec leak out */ ++ barrier(); ++ cpu_buffer->current_context--; + } + + /** diff --git a/debian/patches/features/all/rt/0002-arm-Adjust-system_state-check.patch b/debian/patches/features/all/rt/0002-arm-Adjust-system_state-check.patch deleted file mode 100644 index 9bc273583..000000000 --- a/debian/patches/features/all/rt/0002-arm-Adjust-system_state-check.patch +++ /dev/null @@ -1,37 +0,0 @@ -From: Thomas Gleixner -Date: Tue, 16 May 2017 20:42:33 +0200 -Subject: [PATCH 02/17] arm: Adjust system_state check -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -To enable smp_processor_id() and might_sleep() debug checks earlier, it's -required to add system states between SYSTEM_BOOTING and SYSTEM_RUNNING. - -Adjust the system_state check in ipi_cpu_stop() to handle the extra states. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Peter Zijlstra (Intel) -Cc: Greg Kroah-Hartman -Cc: Linus Torvalds -Cc: Mark Rutland -Cc: Peter Zijlstra -Cc: Russell King -Cc: Steven Rostedt -Cc: linux-arm-kernel@lists.infradead.org -Link: http://lkml.kernel.org/r/20170516184735.020718977@linutronix.de -Signed-off-by: Ingo Molnar ---- - arch/arm/kernel/smp.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - ---- a/arch/arm/kernel/smp.c -+++ b/arch/arm/kernel/smp.c -@@ -555,8 +555,7 @@ static DEFINE_RAW_SPINLOCK(stop_lock); - */ - static void ipi_cpu_stop(unsigned int cpu) - { -- if (system_state == SYSTEM_BOOTING || -- system_state == SYSTEM_RUNNING) { -+ if (system_state <= SYSTEM_RUNNING) { - raw_spin_lock(&stop_lock); - pr_crit("CPU%u: stopping\n", cpu); - dump_stack(); diff --git a/debian/patches/features/all/rt/0002-futex-Fix-small-and-harmless-looking-inconsistencies.patch b/debian/patches/features/all/rt/0002-futex-Fix-small-and-harmless-looking-inconsistencies.patch deleted file mode 100644 index 4e3800bab..000000000 --- a/debian/patches/features/all/rt/0002-futex-Fix-small-and-harmless-looking-inconsistencies.patch +++ /dev/null @@ -1,55 +0,0 @@ -From: Peter Zijlstra -Date: Fri, 7 Apr 2017 09:04:07 +0200 -Subject: [PATCH 2/4] futex: Fix small (and harmless looking) inconsistencies -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -During (post-commit) review Darren spotted a few minor things. One -(harmless AFAICT) type inconsistency and a comment that wasn't as -clear as hoped. - -Reported-by: Darren Hart (VMWare) -Signed-off-by: Peter Zijlstra (Intel) -Reviewed-by: Darren Hart (VMware) -Cc: Linus Torvalds -Cc: Peter Zijlstra -Cc: Thomas Gleixner -Cc: linux-kernel@vger.kernel.org -Signed-off-by: Ingo Molnar ---- - kernel/futex.c | 11 +++++++---- - 1 file changed, 7 insertions(+), 4 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1025,7 +1025,8 @@ static int attach_to_pi_state(u32 __user - struct futex_pi_state **ps) - { - pid_t pid = uval & FUTEX_TID_MASK; -- int ret, uval2; -+ u32 uval2; -+ int ret; - - /* - * Userspace might have messed up non-PI and PI futexes [3] -@@ -1441,6 +1442,11 @@ static int wake_futex_pi(u32 __user *uad - if (ret) - goto out_unlock; - -+ /* -+ * This is a point of no return; once we modify the uval there is no -+ * going back and subsequent operations must not fail. -+ */ -+ - raw_spin_lock(&pi_state->owner->pi_lock); - WARN_ON(list_empty(&pi_state->list)); - list_del_init(&pi_state->list); -@@ -1452,9 +1458,6 @@ static int wake_futex_pi(u32 __user *uad - pi_state->owner = new_owner; - raw_spin_unlock(&new_owner->pi_lock); - -- /* -- * We've updated the uservalue, this unlock cannot fail. -- */ - postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); - - out_unlock: diff --git a/debian/patches/features/all/rt/0002-futex-Use-smp_store_release-in-mark_wake_futex.patch b/debian/patches/features/all/rt/0002-futex-Use-smp_store_release-in-mark_wake_futex.patch deleted file mode 100644 index a617739bf..000000000 --- a/debian/patches/features/all/rt/0002-futex-Use-smp_store_release-in-mark_wake_futex.patch +++ /dev/null @@ -1,39 +0,0 @@ -From: Peter Zijlstra -Date: Wed, 22 Mar 2017 11:35:49 +0100 -Subject: [PATCH] futex: Use smp_store_release() in mark_wake_futex() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Upstream commit 1b367ece0d7e696cab1c8501bab282cc6a538b3f - -Since the futex_q can dissapear the instruction after assigning NULL, -this really should be a RELEASE barrier. That stops loads from hitting -dead memory too. - -Signed-off-by: Peter Zijlstra (Intel) -Cc: juri.lelli@arm.com -Cc: bigeasy@linutronix.de -Cc: xlpang@redhat.com -Cc: rostedt@goodmis.org -Cc: mathieu.desnoyers@efficios.com -Cc: jdesfossez@efficios.com -Cc: dvhart@infradead.org -Cc: bristot@redhat.com -Link: http://lkml.kernel.org/r/20170322104151.604296452@infradead.org -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/futex.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1290,8 +1290,7 @@ static void mark_wake_futex(struct wake_ - * memory barrier is required here to prevent the following - * store to lock_ptr from getting ahead of the plist_del. - */ -- smp_wmb(); -- q->lock_ptr = NULL; -+ smp_store_release(&q->lock_ptr, NULL); - } - - static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *top_waiter, diff --git a/debian/patches/features/all/rt/0002-hrtimer-Correct-blantanly-wrong-comment.patch b/debian/patches/features/all/rt/0002-hrtimer-Correct-blantanly-wrong-comment.patch new file mode 100644 index 000000000..c7fd1a9b7 --- /dev/null +++ b/debian/patches/features/all/rt/0002-hrtimer-Correct-blantanly-wrong-comment.patch @@ -0,0 +1,37 @@ +From: Thomas Gleixner +Date: Sun, 22 Oct 2017 23:39:39 +0200 +Subject: [PATCH 02/36] hrtimer: Correct blantanly wrong comment +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The protection of a hrtimer which runs its callback against migration to a +different CPU has nothing to do with hard interrupt context. + +The protection against migration of a hrtimer running the expiry callback +is the pointer in the cpu_base which holds a pointer to the currently +running timer. This pointer is evaluated in the code which potentially +switches the timer base and makes sure it's kept on the CPU on which the +callback is running. + +Reported-by: Anna-Maria Gleixner +Signed-off-by: Thomas Gleixner +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/time/hrtimer.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -1197,9 +1197,9 @@ static void __run_hrtimer(struct hrtimer + timer->is_rel = false; + + /* +- * Because we run timers from hardirq context, there is no chance +- * they get migrated to another cpu, therefore its safe to unlock +- * the timer base. ++ * The timer is marked as running in the cpu base, so it is ++ * protected against migration to a different CPU even if the lock ++ * is dropped. + */ + raw_spin_unlock(&cpu_base->lock); + trace_hrtimer_expire_entry(timer, now); diff --git a/debian/patches/features/all/rt/0002-sched-rtmutex-deadline-Fix-a-PI-crash-for-deadline-t.patch b/debian/patches/features/all/rt/0002-sched-rtmutex-deadline-Fix-a-PI-crash-for-deadline-t.patch deleted file mode 100644 index db04feb73..000000000 --- a/debian/patches/features/all/rt/0002-sched-rtmutex-deadline-Fix-a-PI-crash-for-deadline-t.patch +++ /dev/null @@ -1,167 +0,0 @@ -From: Xunlei Pang -Date: Thu, 23 Mar 2017 15:56:08 +0100 -Subject: [PATCH 2/9] sched/rtmutex/deadline: Fix a PI crash for deadline tasks -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -A crash happened while I was playing with deadline PI rtmutex. - - BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 - IP: [] rt_mutex_get_top_task+0x1f/0x30 - PGD 232a75067 PUD 230947067 PMD 0 - Oops: 0000 [#1] SMP - CPU: 1 PID: 10994 Comm: a.out Not tainted - - Call Trace: - [] enqueue_task+0x2c/0x80 - [] activate_task+0x23/0x30 - [] pull_dl_task+0x1d5/0x260 - [] pre_schedule_dl+0x16/0x20 - [] __schedule+0xd3/0x900 - [] schedule+0x29/0x70 - [] __rt_mutex_slowlock+0x4b/0xc0 - [] rt_mutex_slowlock+0xd1/0x190 - [] rt_mutex_timed_lock+0x53/0x60 - [] futex_lock_pi.isra.18+0x28c/0x390 - [] do_futex+0x190/0x5b0 - [] SyS_futex+0x80/0x180 - -This is because rt_mutex_enqueue_pi() and rt_mutex_dequeue_pi() -are only protected by pi_lock when operating pi waiters, while -rt_mutex_get_top_task(), will access them with rq lock held but -not holding pi_lock. - -In order to tackle it, we introduce new "pi_top_task" pointer -cached in task_struct, and add new rt_mutex_update_top_task() -to update its value, it can be called by rt_mutex_setprio() -which held both owner's pi_lock and rq lock. Thus "pi_top_task" -can be safely accessed by enqueue_task_dl() under rq lock. - -Originally-From: Peter Zijlstra -Signed-off-by: Xunlei Pang -Signed-off-by: Peter Zijlstra (Intel) -Acked-by: Steven Rostedt -Reviewed-by: Thomas Gleixner -Cc: juri.lelli@arm.com -Cc: bigeasy@linutronix.de -Cc: mathieu.desnoyers@efficios.com -Cc: jdesfossez@efficios.com -Cc: bristot@redhat.com -Link: http://lkml.kernel.org/r/20170323150216.157682758@infradead.org -Signed-off-by: Thomas Gleixner ---- - include/linux/init_task.h | 1 + - include/linux/sched.h | 2 ++ - include/linux/sched/rt.h | 1 + - kernel/fork.c | 1 + - kernel/locking/rtmutex.c | 29 +++++++++++++++++++++-------- - kernel/sched/core.c | 2 ++ - 6 files changed, 28 insertions(+), 8 deletions(-) - ---- a/include/linux/init_task.h -+++ b/include/linux/init_task.h -@@ -181,6 +181,7 @@ extern struct cred init_cred; - #ifdef CONFIG_RT_MUTEXES - # define INIT_RT_MUTEXES(tsk) \ - .pi_waiters = RB_ROOT, \ -+ .pi_top_task = NULL, \ - .pi_waiters_leftmost = NULL, - #else - # define INIT_RT_MUTEXES(tsk) ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -779,6 +779,8 @@ struct task_struct { - /* PI waiters blocked on a rt_mutex held by this task: */ - struct rb_root pi_waiters; - struct rb_node *pi_waiters_leftmost; -+ /* Updated under owner's pi_lock and rq lock */ -+ struct task_struct *pi_top_task; - /* Deadlock detection and priority inheritance handling: */ - struct rt_mutex_waiter *pi_blocked_on; - #endif ---- a/include/linux/sched/rt.h -+++ b/include/linux/sched/rt.h -@@ -21,6 +21,7 @@ static inline int rt_task(struct task_st - extern int rt_mutex_getprio(struct task_struct *p); - extern void rt_mutex_setprio(struct task_struct *p, int prio); - extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio); -+extern void rt_mutex_update_top_task(struct task_struct *p); - extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task); - extern void rt_mutex_adjust_pi(struct task_struct *p); - static inline bool tsk_is_pi_blocked(struct task_struct *tsk) ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -1438,6 +1438,7 @@ static void rt_mutex_init_task(struct ta - #ifdef CONFIG_RT_MUTEXES - p->pi_waiters = RB_ROOT; - p->pi_waiters_leftmost = NULL; -+ p->pi_top_task = NULL; - p->pi_blocked_on = NULL; - #endif - } ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -323,6 +323,19 @@ rt_mutex_dequeue_pi(struct task_struct * - } - - /* -+ * Must hold both p->pi_lock and task_rq(p)->lock. -+ */ -+void rt_mutex_update_top_task(struct task_struct *p) -+{ -+ if (!task_has_pi_waiters(p)) { -+ p->pi_top_task = NULL; -+ return; -+ } -+ -+ p->pi_top_task = task_top_pi_waiter(p)->task; -+} -+ -+/* - * Calculate task priority from the waiter tree priority - * - * Return task->normal_prio when the waiter tree is empty or when -@@ -337,12 +350,12 @@ int rt_mutex_getprio(struct task_struct - task->normal_prio); - } - -+/* -+ * Must hold either p->pi_lock or task_rq(p)->lock. -+ */ - struct task_struct *rt_mutex_get_top_task(struct task_struct *task) - { -- if (likely(!task_has_pi_waiters(task))) -- return NULL; -- -- return task_top_pi_waiter(task)->task; -+ return task->pi_top_task; - } - - /* -@@ -351,12 +364,12 @@ struct task_struct *rt_mutex_get_top_tas - */ - int rt_mutex_get_effective_prio(struct task_struct *task, int newprio) - { -- if (!task_has_pi_waiters(task)) -+ struct task_struct *top_task = rt_mutex_get_top_task(task); -+ -+ if (!top_task) - return newprio; - -- if (task_top_pi_waiter(task)->task->prio <= newprio) -- return task_top_pi_waiter(task)->task->prio; -- return newprio; -+ return min(top_task->prio, newprio); - } - - /* ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -3712,6 +3712,8 @@ void rt_mutex_setprio(struct task_struct - goto out_unlock; - } - -+ rt_mutex_update_top_task(p); -+ - trace_sched_pi_setprio(p, prio); - oldprio = p->prio; - diff --git a/debian/patches/features/all/rt/0002-tracing-Reverse-the-order-of-trace_types_lock-and-ev.patch b/debian/patches/features/all/rt/0002-tracing-Reverse-the-order-of-trace_types_lock-and-ev.patch new file mode 100644 index 000000000..7856309b5 --- /dev/null +++ b/debian/patches/features/all/rt/0002-tracing-Reverse-the-order-of-trace_types_lock-and-ev.patch @@ -0,0 +1,189 @@ +From: "Steven Rostedt (VMware)" +Date: Fri, 22 Sep 2017 14:58:16 -0500 +Subject: [PATCH 02/42] tracing: Reverse the order of trace_types_lock and + event_mutex +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +In order to make future changes where we need to call +tracing_set_clock() from within an event command, the order of +trace_types_lock and event_mutex must be reversed, as the event command +will hold event_mutex and the trace_types_lock is taken from within +tracing_set_clock(). + +Requested-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace.c | 5 +++++ + kernel/trace/trace_events.c | 31 +++++++++++++++---------------- + 2 files changed, 20 insertions(+), 16 deletions(-) + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -7687,6 +7687,7 @@ static int instance_mkdir(const char *na + struct trace_array *tr; + int ret; + ++ mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); + + ret = -EEXIST; +@@ -7742,6 +7743,7 @@ static int instance_mkdir(const char *na + list_add(&tr->list, &ftrace_trace_arrays); + + mutex_unlock(&trace_types_lock); ++ mutex_unlock(&event_mutex); + + return 0; + +@@ -7753,6 +7755,7 @@ static int instance_mkdir(const char *na + + out_unlock: + mutex_unlock(&trace_types_lock); ++ mutex_unlock(&event_mutex); + + return ret; + +@@ -7765,6 +7768,7 @@ static int instance_rmdir(const char *na + int ret; + int i; + ++ mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); + + ret = -ENODEV; +@@ -7810,6 +7814,7 @@ static int instance_rmdir(const char *na + + out_unlock: + mutex_unlock(&trace_types_lock); ++ mutex_unlock(&event_mutex); + + return ret; + } +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -1406,8 +1406,8 @@ static int subsystem_open(struct inode * + return -ENODEV; + + /* Make sure the system still exists */ +- mutex_lock(&trace_types_lock); + mutex_lock(&event_mutex); ++ mutex_lock(&trace_types_lock); + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + list_for_each_entry(dir, &tr->systems, list) { + if (dir == inode->i_private) { +@@ -1421,8 +1421,8 @@ static int subsystem_open(struct inode * + } + } + exit_loop: +- mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); ++ mutex_unlock(&event_mutex); + + if (!system) + return -ENODEV; +@@ -2294,15 +2294,15 @@ static void __add_event_to_tracers(struc + int trace_add_event_call(struct trace_event_call *call) + { + int ret; +- mutex_lock(&trace_types_lock); + mutex_lock(&event_mutex); ++ mutex_lock(&trace_types_lock); + + ret = __register_event(call, NULL); + if (ret >= 0) + __add_event_to_tracers(call); + +- mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); ++ mutex_unlock(&event_mutex); + return ret; + } + +@@ -2356,13 +2356,13 @@ int trace_remove_event_call(struct trace + { + int ret; + +- mutex_lock(&trace_types_lock); + mutex_lock(&event_mutex); ++ mutex_lock(&trace_types_lock); + down_write(&trace_event_sem); + ret = probe_remove_event_call(call); + up_write(&trace_event_sem); +- mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); ++ mutex_unlock(&event_mutex); + + return ret; + } +@@ -2424,8 +2424,8 @@ static int trace_module_notify(struct no + { + struct module *mod = data; + +- mutex_lock(&trace_types_lock); + mutex_lock(&event_mutex); ++ mutex_lock(&trace_types_lock); + switch (val) { + case MODULE_STATE_COMING: + trace_module_add_events(mod); +@@ -2434,8 +2434,8 @@ static int trace_module_notify(struct no + trace_module_remove_events(mod); + break; + } +- mutex_unlock(&event_mutex); + mutex_unlock(&trace_types_lock); ++ mutex_unlock(&event_mutex); + + return 0; + } +@@ -2950,24 +2950,24 @@ create_event_toplevel_files(struct dentr + * creates the event hierachry in the @parent/events directory. + * + * Returns 0 on success. ++ * ++ * Must be called with event_mutex held. + */ + int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr) + { + int ret; + +- mutex_lock(&event_mutex); ++ lockdep_assert_held(&event_mutex); + + ret = create_event_toplevel_files(parent, tr); + if (ret) +- goto out_unlock; ++ goto out; + + down_write(&trace_event_sem); + __trace_add_event_dirs(tr); + up_write(&trace_event_sem); + +- out_unlock: +- mutex_unlock(&event_mutex); +- ++ out: + return ret; + } + +@@ -2996,9 +2996,10 @@ early_event_add_tracer(struct dentry *pa + return ret; + } + ++/* Must be called with event_mutex held */ + int event_trace_del_tracer(struct trace_array *tr) + { +- mutex_lock(&event_mutex); ++ lockdep_assert_held(&event_mutex); + + /* Disable any event triggers and associated soft-disabled events */ + clear_event_triggers(tr); +@@ -3019,8 +3020,6 @@ int event_trace_del_tracer(struct trace_ + + tr->event_dir = NULL; + +- mutex_unlock(&event_mutex); +- + return 0; + } + diff --git a/debian/patches/features/all/rt/0002-workqueue-Provide-work_on_cpu_safe.patch b/debian/patches/features/all/rt/0002-workqueue-Provide-work_on_cpu_safe.patch deleted file mode 100644 index 99f59c1e0..000000000 --- a/debian/patches/features/all/rt/0002-workqueue-Provide-work_on_cpu_safe.patch +++ /dev/null @@ -1,84 +0,0 @@ -From: Thomas Gleixner -Date: Wed, 12 Apr 2017 22:07:28 +0200 -Subject: [PATCH 02/13] workqueue: Provide work_on_cpu_safe() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -work_on_cpu() is not protected against CPU hotplug. For code which requires -to be either executed on an online CPU or to fail if the CPU is not -available the callsite would have to protect against CPU hotplug. - -Provide a function which does get/put_online_cpus() around the call to -work_on_cpu() and fails the call with -ENODEV if the target CPU is not -online. - -Preparatory patch to convert several racy task affinity manipulations. - -Signed-off-by: Thomas Gleixner -Acked-by: Tejun Heo -Cc: Fenghua Yu -Cc: Tony Luck -Cc: Herbert Xu -Cc: "Rafael J. Wysocki" -Cc: Peter Zijlstra -Cc: Benjamin Herrenschmidt -Cc: Sebastian Siewior -Cc: Lai Jiangshan -Cc: Viresh Kumar -Cc: Michael Ellerman -Cc: "David S. Miller" -Cc: Len Brown -Link: http://lkml.kernel.org/r/20170412201042.262610721@linutronix.de -Signed-off-by: Thomas Gleixner ---- - include/linux/workqueue.h | 5 +++++ - kernel/workqueue.c | 23 +++++++++++++++++++++++ - 2 files changed, 28 insertions(+) - ---- a/include/linux/workqueue.h -+++ b/include/linux/workqueue.h -@@ -608,8 +608,13 @@ static inline long work_on_cpu(int cpu, - { - return fn(arg); - } -+static inline long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg) -+{ -+ return fn(arg); -+} - #else - long work_on_cpu(int cpu, long (*fn)(void *), void *arg); -+long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg); - #endif /* CONFIG_SMP */ - - #ifdef CONFIG_FREEZER ---- a/kernel/workqueue.c -+++ b/kernel/workqueue.c -@@ -4735,6 +4735,29 @@ long work_on_cpu(int cpu, long (*fn)(voi - return wfc.ret; - } - EXPORT_SYMBOL_GPL(work_on_cpu); -+ -+/** -+ * work_on_cpu_safe - run a function in thread context on a particular cpu -+ * @cpu: the cpu to run on -+ * @fn: the function to run -+ * @arg: the function argument -+ * -+ * Disables CPU hotplug and calls work_on_cpu(). The caller must not hold -+ * any locks which would prevent @fn from completing. -+ * -+ * Return: The value @fn returns. -+ */ -+long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg) -+{ -+ long ret = -ENODEV; -+ -+ get_online_cpus(); -+ if (cpu_online(cpu)) -+ ret = work_on_cpu(cpu, fn, arg); -+ put_online_cpus(); -+ return ret; -+} -+EXPORT_SYMBOL_GPL(work_on_cpu_safe); - #endif /* CONFIG_SMP */ - - #ifdef CONFIG_FREEZER diff --git a/debian/patches/features/all/rt/0003-arm64-Adjust-system_state-check.patch b/debian/patches/features/all/rt/0003-arm64-Adjust-system_state-check.patch deleted file mode 100644 index a2cb59865..000000000 --- a/debian/patches/features/all/rt/0003-arm64-Adjust-system_state-check.patch +++ /dev/null @@ -1,38 +0,0 @@ -From: Thomas Gleixner -Date: Tue, 16 May 2017 20:42:34 +0200 -Subject: [PATCH 03/17] arm64: Adjust system_state check -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -To enable smp_processor_id() and might_sleep() debug checks earlier, it's -required to add system states between SYSTEM_BOOTING and SYSTEM_RUNNING. - -Adjust the system_state check in smp_send_stop() to handle the extra states. - -Tested-by: Mark Rutland -Signed-off-by: Thomas Gleixner -Signed-off-by: Peter Zijlstra (Intel) -Acked-by: Mark Rutland -Acked-by: Catalin Marinas -Cc: Greg Kroah-Hartman -Cc: Linus Torvalds -Cc: Peter Zijlstra -Cc: Steven Rostedt -Cc: Will Deacon -Link: http://lkml.kernel.org/r/20170516184735.112589728@linutronix.de -Signed-off-by: Ingo Molnar ---- - arch/arm64/kernel/smp.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - ---- a/arch/arm64/kernel/smp.c -+++ b/arch/arm64/kernel/smp.c -@@ -915,8 +915,7 @@ void smp_send_stop(void) - cpumask_copy(&mask, cpu_online_mask); - cpumask_clear_cpu(smp_processor_id(), &mask); - -- if (system_state == SYSTEM_BOOTING || -- system_state == SYSTEM_RUNNING) -+ if (system_state <= SYSTEM_RUNNING) - pr_crit("SMP: stopping secondary CPUs\n"); - smp_cross_call(&mask, IPI_CPU_STOP); - } diff --git a/debian/patches/features/all/rt/0003-futex-Clarify-mark_wake_futex-memory-barrier-usage.patch b/debian/patches/features/all/rt/0003-futex-Clarify-mark_wake_futex-memory-barrier-usage.patch deleted file mode 100644 index 5451e3aa8..000000000 --- a/debian/patches/features/all/rt/0003-futex-Clarify-mark_wake_futex-memory-barrier-usage.patch +++ /dev/null @@ -1,36 +0,0 @@ -From: "Darren Hart (VMware)" -Date: Fri, 14 Apr 2017 15:31:38 -0700 -Subject: [PATCH 3/4] futex: Clarify mark_wake_futex memory barrier usage -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Clarify the scenario described in mark_wake_futex requiring the -smp_store_release(). Update the comment to explicitly refer to the -plist_del now under __unqueue_futex() (previously plist_del was in the -same function as the comment). - -Signed-off-by: Darren Hart (VMware) -Cc: Peter Zijlstra -Link: http://lkml.kernel.org/r/20170414223138.GA4222@fury -Signed-off-by: Thomas Gleixner ---- - kernel/futex.c | 9 +++++---- - 1 file changed, 5 insertions(+), 4 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1380,10 +1380,11 @@ static void mark_wake_futex(struct wake_ - wake_q_add(wake_q, p); - __unqueue_futex(q); - /* -- * The waiting task can free the futex_q as soon as -- * q->lock_ptr = NULL is written, without taking any locks. A -- * memory barrier is required here to prevent the following -- * store to lock_ptr from getting ahead of the plist_del. -+ * The waiting task can free the futex_q as soon as q->lock_ptr = NULL -+ * is written, without taking any locks. This is possible in the event -+ * of a spurious wakeup, for example. A memory barrier is required here -+ * to prevent the following store to lock_ptr from getting ahead of the -+ * plist_del in __unqueue_futex(). - */ - smp_store_release(&q->lock_ptr, NULL); - } diff --git a/debian/patches/features/all/rt/0003-futex-Remove-rt_mutex_deadlock_account_.patch b/debian/patches/features/all/rt/0003-futex-Remove-rt_mutex_deadlock_account_.patch deleted file mode 100644 index d08106a7a..000000000 --- a/debian/patches/features/all/rt/0003-futex-Remove-rt_mutex_deadlock_account_.patch +++ /dev/null @@ -1,185 +0,0 @@ -From: Peter Zijlstra -Date: Wed, 22 Mar 2017 11:35:50 +0100 -Subject: [PATCH] futex: Remove rt_mutex_deadlock_account_*() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Upstream commit fffa954fb528963c2fb7b0c0084eb77e2be7ab52 - -These are unused and clutter up the code. - -Signed-off-by: Peter Zijlstra (Intel) -Cc: juri.lelli@arm.com -Cc: bigeasy@linutronix.de -Cc: xlpang@redhat.com -Cc: rostedt@goodmis.org -Cc: mathieu.desnoyers@efficios.com -Cc: jdesfossez@efficios.com -Cc: dvhart@infradead.org -Cc: bristot@redhat.com -Link: http://lkml.kernel.org/r/20170322104151.652692478@infradead.org -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/locking/rtmutex-debug.c | 9 ------- - kernel/locking/rtmutex-debug.h | 3 -- - kernel/locking/rtmutex.c | 47 +++++++++++++++-------------------------- - kernel/locking/rtmutex.h | 2 - - 4 files changed, 18 insertions(+), 43 deletions(-) - ---- a/kernel/locking/rtmutex-debug.c -+++ b/kernel/locking/rtmutex-debug.c -@@ -174,12 +174,3 @@ void debug_rt_mutex_init(struct rt_mutex - lock->name = name; - } - --void --rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task) --{ --} -- --void rt_mutex_deadlock_account_unlock(struct task_struct *task) --{ --} -- ---- a/kernel/locking/rtmutex-debug.h -+++ b/kernel/locking/rtmutex-debug.h -@@ -9,9 +9,6 @@ - * This file contains macros used solely by rtmutex.c. Debug version. - */ - --extern void --rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task); --extern void rt_mutex_deadlock_account_unlock(struct task_struct *task); - extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); - extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter); - extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name); ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -938,8 +938,6 @@ static int try_to_take_rt_mutex(struct r - */ - rt_mutex_set_owner(lock, task); - -- rt_mutex_deadlock_account_lock(lock, task); -- - return 1; - } - -@@ -1342,8 +1340,6 @@ static bool __sched rt_mutex_slowunlock( - - debug_rt_mutex_unlock(lock); - -- rt_mutex_deadlock_account_unlock(current); -- - /* - * We must be careful here if the fast path is enabled. If we - * have no waiters queued we cannot set owner to NULL here -@@ -1409,11 +1405,10 @@ rt_mutex_fastlock(struct rt_mutex *lock, - struct hrtimer_sleeper *timeout, - enum rtmutex_chainwalk chwalk)) - { -- if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { -- rt_mutex_deadlock_account_lock(lock, current); -+ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) - return 0; -- } else -- return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); -+ -+ return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); - } - - static inline int -@@ -1425,21 +1420,19 @@ rt_mutex_timed_fastlock(struct rt_mutex - enum rtmutex_chainwalk chwalk)) - { - if (chwalk == RT_MUTEX_MIN_CHAINWALK && -- likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { -- rt_mutex_deadlock_account_lock(lock, current); -+ likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) - return 0; -- } else -- return slowfn(lock, state, timeout, chwalk); -+ -+ return slowfn(lock, state, timeout, chwalk); - } - - static inline int - rt_mutex_fasttrylock(struct rt_mutex *lock, - int (*slowfn)(struct rt_mutex *lock)) - { -- if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { -- rt_mutex_deadlock_account_lock(lock, current); -+ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) - return 1; -- } -+ - return slowfn(lock); - } - -@@ -1449,19 +1442,18 @@ rt_mutex_fastunlock(struct rt_mutex *loc - struct wake_q_head *wqh)) - { - DEFINE_WAKE_Q(wake_q); -+ bool deboost; - -- if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { -- rt_mutex_deadlock_account_unlock(current); -+ if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) -+ return; - -- } else { -- bool deboost = slowfn(lock, &wake_q); -+ deboost = slowfn(lock, &wake_q); - -- wake_up_q(&wake_q); -+ wake_up_q(&wake_q); - -- /* Undo pi boosting if necessary: */ -- if (deboost) -- rt_mutex_adjust_prio(current); -- } -+ /* Undo pi boosting if necessary: */ -+ if (deboost) -+ rt_mutex_adjust_prio(current); - } - - /** -@@ -1572,10 +1564,9 @@ EXPORT_SYMBOL_GPL(rt_mutex_unlock); - bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock, - struct wake_q_head *wqh) - { -- if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { -- rt_mutex_deadlock_account_unlock(current); -+ if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) - return false; -- } -+ - return rt_mutex_slowunlock(lock, wqh); - } - -@@ -1637,7 +1628,6 @@ void rt_mutex_init_proxy_locked(struct r - __rt_mutex_init(lock, NULL); - debug_rt_mutex_proxy_lock(lock, proxy_owner); - rt_mutex_set_owner(lock, proxy_owner); -- rt_mutex_deadlock_account_lock(lock, proxy_owner); - } - - /** -@@ -1657,7 +1647,6 @@ void rt_mutex_proxy_unlock(struct rt_mut - { - debug_rt_mutex_proxy_unlock(lock); - rt_mutex_set_owner(lock, NULL); -- rt_mutex_deadlock_account_unlock(proxy_owner); - } - - /** ---- a/kernel/locking/rtmutex.h -+++ b/kernel/locking/rtmutex.h -@@ -11,8 +11,6 @@ - */ - - #define rt_mutex_deadlock_check(l) (0) --#define rt_mutex_deadlock_account_lock(m, t) do { } while (0) --#define rt_mutex_deadlock_account_unlock(l) do { } while (0) - #define debug_rt_mutex_init_waiter(w) do { } while (0) - #define debug_rt_mutex_free_waiter(w) do { } while (0) - #define debug_rt_mutex_lock(l) do { } while (0) diff --git a/debian/patches/features/all/rt/0003-hrtimer-Fix-kerneldoc-for-struct-hrtimer_cpu_base.patch b/debian/patches/features/all/rt/0003-hrtimer-Fix-kerneldoc-for-struct-hrtimer_cpu_base.patch new file mode 100644 index 000000000..ae156cadb --- /dev/null +++ b/debian/patches/features/all/rt/0003-hrtimer-Fix-kerneldoc-for-struct-hrtimer_cpu_base.patch @@ -0,0 +1,43 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:39:40 +0200 +Subject: [PATCH 03/36] hrtimer: Fix kerneldoc for struct hrtimer_cpu_base +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The sequence '/**' marks the start of a struct description. Add the +missing second asterisk. While at it adapt the ordering of the struct +members to the struct definition and document the purpose of +expires_next more precisely. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/hrtimer.h | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -144,7 +144,7 @@ enum hrtimer_base_type { + HRTIMER_MAX_CLOCK_BASES, + }; + +-/* ++/** + * struct hrtimer_cpu_base - the per cpu clock bases + * @lock: lock protecting the base and associated clock bases + * and timers +@@ -153,12 +153,12 @@ enum hrtimer_base_type { + * @cpu: cpu number + * @active_bases: Bitfield to mark bases with active timers + * @clock_was_set_seq: Sequence counter of clock was set events +- * @expires_next: absolute time of the next event which was scheduled +- * via clock_set_next_event() +- * @next_timer: Pointer to the first expiring timer + * @in_hrtirq: hrtimer_interrupt() is currently executing + * @hres_active: State of high resolution mode + * @hang_detected: The last hrtimer interrupt detected a hang ++ * @expires_next: absolute time of the next event, is required for remote ++ * hrtimer enqueue ++ * @next_timer: Pointer to the first expiring timer + * @nr_events: Total number of hrtimer interrupt events + * @nr_retries: Total number of hrtimer interrupt retries + * @nr_hangs: Total number of hrtimer interrupt hangs diff --git a/debian/patches/features/all/rt/0003-ia64-salinfo-Replace-racy-task-affinity-logic.patch b/debian/patches/features/all/rt/0003-ia64-salinfo-Replace-racy-task-affinity-logic.patch deleted file mode 100644 index f22b59785..000000000 --- a/debian/patches/features/all/rt/0003-ia64-salinfo-Replace-racy-task-affinity-logic.patch +++ /dev/null @@ -1,129 +0,0 @@ -From: Thomas Gleixner -Date: Wed, 12 Apr 2017 22:07:29 +0200 -Subject: [PATCH 03/13] ia64/salinfo: Replace racy task affinity logic -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Some of the file operations in /proc/sal require to run code on the -requested cpu. This is achieved by temporarily setting the affinity of the -calling user space thread to the requested CPU and reset it to the original -affinity afterwards. - -That's racy vs. CPU hotplug and concurrent affinity settings for that -thread resulting in code executing on the wrong CPU and overwriting the -new affinity setting. - -Replace it by using work_on_cpu_safe() which guarantees to run the code on -the requested CPU or to fail in case the CPU is offline. - -Signed-off-by: Thomas Gleixner -Cc: Fenghua Yu -Cc: Tony Luck -Cc: linux-ia64@vger.kernel.org -Cc: Herbert Xu -Cc: "Rafael J. Wysocki" -Cc: Peter Zijlstra -Cc: Benjamin Herrenschmidt -Cc: Sebastian Siewior -Cc: Lai Jiangshan -Cc: Viresh Kumar -Cc: Michael Ellerman -Cc: Tejun Heo -Cc: "David S. Miller" -Cc: Len Brown -Link: http://lkml.kernel.org/r/20170412201042.341863457@linutronix.de -Signed-off-by: Thomas Gleixner ---- - arch/ia64/kernel/salinfo.c | 31 ++++++++++++------------------- - 1 file changed, 12 insertions(+), 19 deletions(-) - ---- a/arch/ia64/kernel/salinfo.c -+++ b/arch/ia64/kernel/salinfo.c -@@ -179,14 +179,14 @@ struct salinfo_platform_oemdata_parms { - const u8 *efi_guid; - u8 **oemdata; - u64 *oemdata_size; -- int ret; - }; - --static void -+static long - salinfo_platform_oemdata_cpu(void *context) - { - struct salinfo_platform_oemdata_parms *parms = context; -- parms->ret = salinfo_platform_oemdata(parms->efi_guid, parms->oemdata, parms->oemdata_size); -+ -+ return salinfo_platform_oemdata(parms->efi_guid, parms->oemdata, parms->oemdata_size); - } - - static void -@@ -380,16 +380,7 @@ salinfo_log_release(struct inode *inode, - return 0; - } - --static void --call_on_cpu(int cpu, void (*fn)(void *), void *arg) --{ -- cpumask_t save_cpus_allowed = current->cpus_allowed; -- set_cpus_allowed_ptr(current, cpumask_of(cpu)); -- (*fn)(arg); -- set_cpus_allowed_ptr(current, &save_cpus_allowed); --} -- --static void -+static long - salinfo_log_read_cpu(void *context) - { - struct salinfo_data *data = context; -@@ -399,6 +390,7 @@ salinfo_log_read_cpu(void *context) - /* Clear corrected errors as they are read from SAL */ - if (rh->severity == sal_log_severity_corrected) - ia64_sal_clear_state_info(data->type); -+ return 0; - } - - static void -@@ -430,7 +422,7 @@ salinfo_log_new_read(int cpu, struct sal - spin_unlock_irqrestore(&data_saved_lock, flags); - - if (!data->saved_num) -- call_on_cpu(cpu, salinfo_log_read_cpu, data); -+ work_on_cpu_safe(cpu, salinfo_log_read_cpu, data); - if (!data->log_size) { - data->state = STATE_NO_DATA; - cpumask_clear_cpu(cpu, &data->cpu_event); -@@ -459,11 +451,13 @@ salinfo_log_read(struct file *file, char - return simple_read_from_buffer(buffer, count, ppos, buf, bufsize); - } - --static void -+static long - salinfo_log_clear_cpu(void *context) - { - struct salinfo_data *data = context; -+ - ia64_sal_clear_state_info(data->type); -+ return 0; - } - - static int -@@ -486,7 +480,7 @@ salinfo_log_clear(struct salinfo_data *d - rh = (sal_log_record_header_t *)(data->log_buffer); - /* Corrected errors have already been cleared from SAL */ - if (rh->severity != sal_log_severity_corrected) -- call_on_cpu(cpu, salinfo_log_clear_cpu, data); -+ work_on_cpu_safe(cpu, salinfo_log_clear_cpu, data); - /* clearing a record may make a new record visible */ - salinfo_log_new_read(cpu, data); - if (data->state == STATE_LOG_RECORD) { -@@ -531,9 +525,8 @@ salinfo_log_write(struct file *file, con - .oemdata = &data->oemdata, - .oemdata_size = &data->oemdata_size - }; -- call_on_cpu(cpu, salinfo_platform_oemdata_cpu, &parms); -- if (parms.ret) -- count = parms.ret; -+ count = work_on_cpu_safe(cpu, salinfo_platform_oemdata_cpu, -+ &parms); - } else - data->oemdata_size = 0; - } else diff --git a/debian/patches/features/all/rt/0003-sched-deadline-rtmutex-Dont-miss-the-dl_runtime-dl_p.patch b/debian/patches/features/all/rt/0003-sched-deadline-rtmutex-Dont-miss-the-dl_runtime-dl_p.patch deleted file mode 100644 index b4fc27ddb..000000000 --- a/debian/patches/features/all/rt/0003-sched-deadline-rtmutex-Dont-miss-the-dl_runtime-dl_p.patch +++ /dev/null @@ -1,52 +0,0 @@ -From: Xunlei Pang -Date: Thu, 23 Mar 2017 15:56:09 +0100 -Subject: [PATCH 3/9] sched/deadline/rtmutex: Dont miss the - dl_runtime/dl_period update -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Currently dl tasks will actually return at the very beginning -of rt_mutex_adjust_prio_chain() in !detect_deadlock cases: - - if (waiter->prio == task->prio) { - if (!detect_deadlock) - goto out_unlock_pi; // out here - else - requeue = false; - } - -As the deadline value of blocked deadline tasks(waiters) without -changing their sched_class(thus prio doesn't change) never changes, -this seems reasonable, but it actually misses the chance of updating -rt_mutex_waiter's "dl_runtime(period)_copy" if a waiter updates its -deadline parameters(dl_runtime, dl_period) or boosted waiter changes -to !deadline class. - -Thus, force deadline task not out by adding the !dl_prio() condition. - -Signed-off-by: Xunlei Pang -Signed-off-by: Peter Zijlstra (Intel) -Acked-by: Steven Rostedt -Reviewed-by: Thomas Gleixner -Cc: juri.lelli@arm.com -Cc: bigeasy@linutronix.de -Cc: mathieu.desnoyers@efficios.com -Cc: jdesfossez@efficios.com -Cc: bristot@redhat.com -Link: http://lkml.kernel.org/r/1460633827-345-7-git-send-email-xlpang@redhat.com -Link: http://lkml.kernel.org/r/20170323150216.206577901@infradead.org -Signed-off-by: Thomas Gleixner ---- - kernel/locking/rtmutex.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -605,7 +605,7 @@ static int rt_mutex_adjust_prio_chain(st - * enabled we continue, but stop the requeueing in the chain - * walk. - */ -- if (waiter->prio == task->prio) { -+ if (waiter->prio == task->prio && !dl_task(task)) { - if (!detect_deadlock) - goto out_unlock_pi; - else diff --git a/debian/patches/features/all/rt/0003-tracing-Exclude-generic-fields-from-histograms.patch b/debian/patches/features/all/rt/0003-tracing-Exclude-generic-fields-from-histograms.patch new file mode 100644 index 000000000..74ad78151 --- /dev/null +++ b/debian/patches/features/all/rt/0003-tracing-Exclude-generic-fields-from-histograms.patch @@ -0,0 +1,38 @@ +From: Tom Zanussi +Date: Fri, 22 Sep 2017 14:58:17 -0500 +Subject: [PATCH 03/42] tracing: Exclude 'generic fields' from histograms +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +There are a small number of 'generic fields' (comm/COMM/cpu/CPU) that +are found by trace_find_event_field() but are only meant for +filtering. Specifically, they unlike normal fields, they have a size +of 0 and thus wreak havoc when used as a histogram key. + +Exclude these (return -EINVAL) when used as histogram keys. + +Signed-off-by: Tom Zanussi +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -450,7 +450,7 @@ static int create_val_field(struct hist_ + } + + field = trace_find_event_field(file->event_call, field_name); +- if (!field) { ++ if (!field || !field->size) { + ret = -EINVAL; + goto out; + } +@@ -548,7 +548,7 @@ static int create_key_field(struct hist_ + } + + field = trace_find_event_field(file->event_call, field_name); +- if (!field) { ++ if (!field || !field->size) { + ret = -EINVAL; + goto out; + } diff --git a/debian/patches/features/all/rt/0004-MAINTAINERS-Add-FUTEX-SUBSYSTEM.patch b/debian/patches/features/all/rt/0004-MAINTAINERS-Add-FUTEX-SUBSYSTEM.patch deleted file mode 100644 index b61eb4088..000000000 --- a/debian/patches/features/all/rt/0004-MAINTAINERS-Add-FUTEX-SUBSYSTEM.patch +++ /dev/null @@ -1,48 +0,0 @@ -From: "Darren Hart (VMware)" -Date: Fri, 14 Apr 2017 15:46:08 -0700 -Subject: [PATCH 4/4] MAINTAINERS: Add FUTEX SUBSYSTEM -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Add a MAINTAINERS block for the FUTEX SUBSYSTEM which includes the core -kernel code, include headers, testing code, and Documentation. Excludes -arch files, and higher level test code. - -I added tglx and mingo as M as they have made the tip commits and peterz -and myself as R. - -Signed-off-by: Darren Hart (VMware) -Cc: Peter Zijlstra -Cc: Shuah Khan -Cc: Arnaldo Carvalho de Melo -Link: http://lkml.kernel.org/r/20170414224608.GA5180@fury -Signed-off-by: Thomas Gleixner ---- - MAINTAINERS | 17 +++++++++++++++++ - 1 file changed, 17 insertions(+) - ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -5420,6 +5420,23 @@ F: fs/fuse/ - F: include/uapi/linux/fuse.h - F: Documentation/filesystems/fuse.txt - -+FUTEX SUBSYSTEM -+M: Thomas Gleixner -+M: Ingo Molnar -+R: Peter Zijlstra -+R: Darren Hart -+L: linux-kernel@vger.kernel.org -+T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core -+S: Maintained -+F: kernel/futex.c -+F: kernel/futex_compat.c -+F: include/asm-generic/futex.h -+F: include/linux/futex.h -+F: include/uapi/linux/futex.h -+F: tools/testing/selftests/futex/ -+F: tools/perf/bench/futex* -+F: Documentation/*futex* -+ - FUTURE DOMAIN TMC-16x0 SCSI DRIVER (16-bit) - M: Rik Faith - L: linux-scsi@vger.kernel.org diff --git a/debian/patches/features/all/rt/0004-futex-rt_mutex-Provide-futex-specific-rt_mutex-API.patch b/debian/patches/features/all/rt/0004-futex-rt_mutex-Provide-futex-specific-rt_mutex-API.patch deleted file mode 100644 index 799c883fe..000000000 --- a/debian/patches/features/all/rt/0004-futex-rt_mutex-Provide-futex-specific-rt_mutex-API.patch +++ /dev/null @@ -1,221 +0,0 @@ -From: Peter Zijlstra -Date: Wed, 22 Mar 2017 11:35:51 +0100 -Subject: [PATCH] futex,rt_mutex: Provide futex specific rt_mutex API -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Upstream commit 5293c2efda37775346885c7e924d4ef7018ea60b - -Part of what makes futex_unlock_pi() intricate is that -rt_mutex_futex_unlock() -> rt_mutex_slowunlock() can drop -rt_mutex::wait_lock. - -This means it cannot rely on the atomicy of wait_lock, which would be -preferred in order to not rely on hb->lock so much. - -The reason rt_mutex_slowunlock() needs to drop wait_lock is because it can -race with the rt_mutex fastpath, however futexes have their own fast path. - -Since futexes already have a bunch of separate rt_mutex accessors, complete -that set and implement a rt_mutex variant without fastpath for them. - -Signed-off-by: Peter Zijlstra (Intel) -Cc: juri.lelli@arm.com -Cc: bigeasy@linutronix.de -Cc: xlpang@redhat.com -Cc: rostedt@goodmis.org -Cc: mathieu.desnoyers@efficios.com -Cc: jdesfossez@efficios.com -Cc: dvhart@infradead.org -Cc: bristot@redhat.com -Link: http://lkml.kernel.org/r/20170322104151.702962446@infradead.org -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/futex.c | 30 ++++++++++----------- - kernel/locking/rtmutex.c | 55 +++++++++++++++++++++++++++++----------- - kernel/locking/rtmutex_common.h | 9 +++++- - 3 files changed, 62 insertions(+), 32 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -916,7 +916,7 @@ void exit_pi_state_list(struct task_stru - pi_state->owner = NULL; - raw_spin_unlock_irq(&curr->pi_lock); - -- rt_mutex_unlock(&pi_state->pi_mutex); -+ rt_mutex_futex_unlock(&pi_state->pi_mutex); - - spin_unlock(&hb->lock); - -@@ -1364,20 +1364,18 @@ static int wake_futex_pi(u32 __user *uad - pi_state->owner = new_owner; - raw_spin_unlock(&new_owner->pi_lock); - -- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); -- -- deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); -- - /* -- * First unlock HB so the waiter does not spin on it once he got woken -- * up. Second wake up the waiter before the priority is adjusted. If we -- * deboost first (and lose our higher priority), then the task might get -- * scheduled away before the wake up can take place. -+ * We've updated the uservalue, this unlock cannot fail. - */ -+ deboost = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); -+ -+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - spin_unlock(&hb->lock); -- wake_up_q(&wake_q); -- if (deboost) -+ -+ if (deboost) { -+ wake_up_q(&wake_q); - rt_mutex_adjust_prio(current); -+ } - - return 0; - } -@@ -2253,7 +2251,7 @@ static int fixup_owner(u32 __user *uaddr - * task acquired the rt_mutex after we removed ourself from the - * rt_mutex waiters list. - */ -- if (rt_mutex_trylock(&q->pi_state->pi_mutex)) { -+ if (rt_mutex_futex_trylock(&q->pi_state->pi_mutex)) { - locked = 1; - goto out; - } -@@ -2568,7 +2566,7 @@ static int futex_lock_pi(u32 __user *uad - if (!trylock) { - ret = rt_mutex_timed_futex_lock(&q.pi_state->pi_mutex, to); - } else { -- ret = rt_mutex_trylock(&q.pi_state->pi_mutex); -+ ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex); - /* Fixup the trylock return value: */ - ret = ret ? 0 : -EWOULDBLOCK; - } -@@ -2591,7 +2589,7 @@ static int futex_lock_pi(u32 __user *uad - * it and return the fault to userspace. - */ - if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) -- rt_mutex_unlock(&q.pi_state->pi_mutex); -+ rt_mutex_futex_unlock(&q.pi_state->pi_mutex); - - /* Unqueue and drop the lock */ - unqueue_me_pi(&q); -@@ -2898,7 +2896,7 @@ static int futex_wait_requeue_pi(u32 __u - spin_lock(q.lock_ptr); - ret = fixup_pi_state_owner(uaddr2, &q, current); - if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) -- rt_mutex_unlock(&q.pi_state->pi_mutex); -+ rt_mutex_futex_unlock(&q.pi_state->pi_mutex); - /* - * Drop the reference to the pi state which - * the requeue_pi() code acquired for us. -@@ -2938,7 +2936,7 @@ static int futex_wait_requeue_pi(u32 __u - * userspace. - */ - if (ret && rt_mutex_owner(pi_mutex) == current) -- rt_mutex_unlock(pi_mutex); -+ rt_mutex_futex_unlock(pi_mutex); - - /* Unqueue and drop the lock. */ - unqueue_me_pi(&q); ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1488,15 +1488,23 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock_interrup - - /* - * Futex variant with full deadlock detection. -+ * Futex variants must not use the fast-path, see __rt_mutex_futex_unlock(). - */ --int rt_mutex_timed_futex_lock(struct rt_mutex *lock, -+int __sched rt_mutex_timed_futex_lock(struct rt_mutex *lock, - struct hrtimer_sleeper *timeout) - { - might_sleep(); - -- return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, -- RT_MUTEX_FULL_CHAINWALK, -- rt_mutex_slowlock); -+ return rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, -+ timeout, RT_MUTEX_FULL_CHAINWALK); -+} -+ -+/* -+ * Futex variant, must not use fastpath. -+ */ -+int __sched rt_mutex_futex_trylock(struct rt_mutex *lock) -+{ -+ return rt_mutex_slowtrylock(lock); - } - - /** -@@ -1555,19 +1563,38 @@ void __sched rt_mutex_unlock(struct rt_m - EXPORT_SYMBOL_GPL(rt_mutex_unlock); - - /** -- * rt_mutex_futex_unlock - Futex variant of rt_mutex_unlock -- * @lock: the rt_mutex to be unlocked -- * -- * Returns: true/false indicating whether priority adjustment is -- * required or not. -+ * Futex variant, that since futex variants do not use the fast-path, can be -+ * simple and will not need to retry. - */ --bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock, -- struct wake_q_head *wqh) -+bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, -+ struct wake_q_head *wake_q) -+{ -+ lockdep_assert_held(&lock->wait_lock); -+ -+ debug_rt_mutex_unlock(lock); -+ -+ if (!rt_mutex_has_waiters(lock)) { -+ lock->owner = NULL; -+ return false; /* done */ -+ } -+ -+ mark_wakeup_next_waiter(wake_q, lock); -+ return true; /* deboost and wakeups */ -+} -+ -+void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) - { -- if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) -- return false; -+ DEFINE_WAKE_Q(wake_q); -+ bool deboost; - -- return rt_mutex_slowunlock(lock, wqh); -+ raw_spin_lock_irq(&lock->wait_lock); -+ deboost = __rt_mutex_futex_unlock(lock, &wake_q); -+ raw_spin_unlock_irq(&lock->wait_lock); -+ -+ if (deboost) { -+ wake_up_q(&wake_q); -+ rt_mutex_adjust_prio(current); -+ } - } - - /** ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -109,9 +109,14 @@ extern int rt_mutex_start_proxy_lock(str - extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, - struct hrtimer_sleeper *to, - struct rt_mutex_waiter *waiter); -+ - extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to); --extern bool rt_mutex_futex_unlock(struct rt_mutex *lock, -- struct wake_q_head *wqh); -+extern int rt_mutex_futex_trylock(struct rt_mutex *l); -+ -+extern void rt_mutex_futex_unlock(struct rt_mutex *lock); -+extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, -+ struct wake_q_head *wqh); -+ - extern void rt_mutex_adjust_prio(struct task_struct *task); - - #ifdef CONFIG_DEBUG_RT_MUTEXES diff --git a/debian/patches/features/all/rt/0004-hrtimer-Cleanup-clock-argument-in-schedule_hrtimeout.patch b/debian/patches/features/all/rt/0004-hrtimer-Cleanup-clock-argument-in-schedule_hrtimeout.patch new file mode 100644 index 000000000..247a8bd0c --- /dev/null +++ b/debian/patches/features/all/rt/0004-hrtimer-Cleanup-clock-argument-in-schedule_hrtimeout.patch @@ -0,0 +1,81 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:39:41 +0200 +Subject: [PATCH 04/36] hrtimer: Cleanup clock argument in + schedule_hrtimeout_range_clock() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +schedule_hrtimeout_range_clock() uses an integer for the clock id +instead of the predefined type "clockid_t". The ID of the clock is +indicated in hrtimer code as clock_id. Therefore change the name of +the variable as well to make it consistent. + +While at it, clean up the description for the function parameters clock_id +and mode. The clock modes and the clock ids are not restricted as the +comment suggests. Fix the mode description as well for the callers of +schedule_hrtimeout_range_clock(). + +No functional change. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/hrtimer.h | 2 +- + kernel/time/hrtimer.c | 12 ++++++------ + 2 files changed, 7 insertions(+), 7 deletions(-) + +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -462,7 +462,7 @@ extern int schedule_hrtimeout_range(ktim + extern int schedule_hrtimeout_range_clock(ktime_t *expires, + u64 delta, + const enum hrtimer_mode mode, +- int clock); ++ clockid_t clock_id); + extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); + + /* Soft interrupt function to run the hrtimer queues: */ +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -1664,12 +1664,12 @@ void __init hrtimers_init(void) + * schedule_hrtimeout_range_clock - sleep until timeout + * @expires: timeout value (ktime_t) + * @delta: slack in expires timeout (ktime_t) +- * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL +- * @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME ++ * @mode: timer mode ++ * @clock_id: timer clock to be used + */ + int __sched + schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, +- const enum hrtimer_mode mode, int clock) ++ const enum hrtimer_mode mode, clockid_t clock_id) + { + struct hrtimer_sleeper t; + +@@ -1690,7 +1690,7 @@ schedule_hrtimeout_range_clock(ktime_t * + return -EINTR; + } + +- hrtimer_init_on_stack(&t.timer, clock, mode); ++ hrtimer_init_on_stack(&t.timer, clock_id, mode); + hrtimer_set_expires_range_ns(&t.timer, *expires, delta); + + hrtimer_init_sleeper(&t, current); +@@ -1712,7 +1712,7 @@ schedule_hrtimeout_range_clock(ktime_t * + * schedule_hrtimeout_range - sleep until timeout + * @expires: timeout value (ktime_t) + * @delta: slack in expires timeout (ktime_t) +- * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL ++ * @mode: timer mode + * + * Make the current task sleep until the given expiry time has + * elapsed. The routine will return immediately unless +@@ -1751,7 +1751,7 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_ran + /** + * schedule_hrtimeout - sleep until timeout + * @expires: timeout value (ktime_t) +- * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL ++ * @mode: timer mode + * + * Make the current task sleep until the given expiry time has + * elapsed. The routine will return immediately unless diff --git a/debian/patches/features/all/rt/0004-ia64-sn-hwperf-Replace-racy-task-affinity-logic.patch b/debian/patches/features/all/rt/0004-ia64-sn-hwperf-Replace-racy-task-affinity-logic.patch deleted file mode 100644 index 8e6c95ad3..000000000 --- a/debian/patches/features/all/rt/0004-ia64-sn-hwperf-Replace-racy-task-affinity-logic.patch +++ /dev/null @@ -1,76 +0,0 @@ -From: Thomas Gleixner -Date: Thu, 6 Apr 2017 14:56:18 +0200 -Subject: [PATCH 04/13] ia64/sn/hwperf: Replace racy task affinity logic -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -sn_hwperf_op_cpu() which is invoked from an ioctl requires to run code on -the requested cpu. This is achieved by temporarily setting the affinity of -the calling user space thread to the requested CPU and reset it to the -original affinity afterwards. - -That's racy vs. CPU hotplug and concurrent affinity settings for that -thread resulting in code executing on the wrong CPU and overwriting the -new affinity setting. - -Replace it by using work_on_cpu_safe() which guarantees to run the code on -the requested CPU or to fail in case the CPU is offline. - -Signed-off-by: Thomas Gleixner -Cc: Fenghua Yu -Cc: Tony Luck -Cc: linux-ia64@vger.kernel.org -Cc: Herbert Xu -Cc: "Rafael J. Wysocki" -Cc: Peter Zijlstra -Cc: Benjamin Herrenschmidt -Cc: Sebastian Siewior -Cc: Lai Jiangshan -Cc: Viresh Kumar -Cc: Michael Ellerman -Cc: Tejun Heo -Cc: "David S. Miller" -Cc: Len Brown -Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1704122251450.2548@nanos -Signed-off-by: Thomas Gleixner ---- - arch/ia64/sn/kernel/sn2/sn_hwperf.c | 17 +++++++++-------- - 1 file changed, 9 insertions(+), 8 deletions(-) - ---- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c -+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c -@@ -598,12 +598,17 @@ static void sn_hwperf_call_sal(void *inf - op_info->ret = r; - } - -+static long sn_hwperf_call_sal_work(void *info) -+{ -+ sn_hwperf_call_sal(info); -+ return 0; -+} -+ - static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info) - { - u32 cpu; - u32 use_ipi; - int r = 0; -- cpumask_t save_allowed; - - cpu = (op_info->a->arg & SN_HWPERF_ARG_CPU_MASK) >> 32; - use_ipi = op_info->a->arg & SN_HWPERF_ARG_USE_IPI_MASK; -@@ -629,13 +634,9 @@ static int sn_hwperf_op_cpu(struct sn_hw - /* use an interprocessor interrupt to call SAL */ - smp_call_function_single(cpu, sn_hwperf_call_sal, - op_info, 1); -- } -- else { -- /* migrate the task before calling SAL */ -- save_allowed = current->cpus_allowed; -- set_cpus_allowed_ptr(current, cpumask_of(cpu)); -- sn_hwperf_call_sal(op_info); -- set_cpus_allowed_ptr(current, &save_allowed); -+ } else { -+ /* Call on the target CPU */ -+ work_on_cpu_safe(cpu, sn_hwperf_call_sal_work, op_info); - } - } - r = op_info->ret; diff --git a/debian/patches/features/all/rt/0004-rtmutex-Clean-up.patch b/debian/patches/features/all/rt/0004-rtmutex-Clean-up.patch deleted file mode 100644 index edd348e3e..000000000 --- a/debian/patches/features/all/rt/0004-rtmutex-Clean-up.patch +++ /dev/null @@ -1,145 +0,0 @@ -From: Peter Zijlstra -Date: Thu, 23 Mar 2017 15:56:10 +0100 -Subject: [PATCH 4/9] rtmutex: Clean up -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Previous patches changed the meaning of the return value of -rt_mutex_slowunlock(); update comments and code to reflect this. - -Signed-off-by: Peter Zijlstra (Intel) -Cc: juri.lelli@arm.com -Cc: bigeasy@linutronix.de -Cc: xlpang@redhat.com -Cc: rostedt@goodmis.org -Cc: mathieu.desnoyers@efficios.com -Cc: jdesfossez@efficios.com -Cc: bristot@redhat.com -Link: http://lkml.kernel.org/r/20170323150216.255058238@infradead.org -Signed-off-by: Thomas Gleixner ---- - kernel/futex.c | 7 ++++--- - kernel/locking/rtmutex.c | 28 +++++++++++++--------------- - kernel/locking/rtmutex_common.h | 2 +- - 3 files changed, 18 insertions(+), 19 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1394,7 +1394,7 @@ static int wake_futex_pi(u32 __user *uad - { - u32 uninitialized_var(curval), newval; - struct task_struct *new_owner; -- bool deboost = false; -+ bool postunlock = false; - DEFINE_WAKE_Q(wake_q); - int ret = 0; - -@@ -1455,12 +1455,13 @@ static int wake_futex_pi(u32 __user *uad - /* - * We've updated the uservalue, this unlock cannot fail. - */ -- deboost = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); -+ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); - - out_unlock: - raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - -- rt_mutex_postunlock(&wake_q, deboost); -+ if (postunlock) -+ rt_mutex_postunlock(&wake_q); - - return ret; - } ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1330,7 +1330,8 @@ static inline int rt_mutex_slowtrylock(s - - /* - * Slow path to release a rt-mutex. -- * Return whether the current task needs to undo a potential priority boosting. -+ * -+ * Return whether the current task needs to call rt_mutex_postunlock(). - */ - static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, - struct wake_q_head *wake_q) -@@ -1401,8 +1402,7 @@ static bool __sched rt_mutex_slowunlock( - - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - -- /* check PI boosting */ -- return true; -+ return true; /* call rt_mutex_postunlock() */ - } - - /* -@@ -1449,15 +1449,14 @@ rt_mutex_fasttrylock(struct rt_mutex *lo - } - - /* -- * Undo pi boosting (if necessary) and wake top waiter. -+ * Performs the wakeup of the the top-waiter and re-enables preemption. - */ --void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost) -+void rt_mutex_postunlock(struct wake_q_head *wake_q) - { - wake_up_q(wake_q); - - /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ -- if (deboost) -- preempt_enable(); -+ preempt_enable(); - } - - static inline void -@@ -1466,14 +1465,12 @@ rt_mutex_fastunlock(struct rt_mutex *loc - struct wake_q_head *wqh)) - { - DEFINE_WAKE_Q(wake_q); -- bool deboost; - - if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) - return; - -- deboost = slowfn(lock, &wake_q); -- -- rt_mutex_postunlock(&wake_q, deboost); -+ if (slowfn(lock, &wake_q)) -+ rt_mutex_postunlock(&wake_q); - } - - /** -@@ -1593,19 +1590,20 @@ bool __sched __rt_mutex_futex_unlock(str - */ - preempt_disable(); - -- return true; /* deboost and wakeups */ -+ return true; /* call postunlock() */ - } - - void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) - { - DEFINE_WAKE_Q(wake_q); -- bool deboost; -+ bool postunlock; - - raw_spin_lock_irq(&lock->wait_lock); -- deboost = __rt_mutex_futex_unlock(lock, &wake_q); -+ postunlock = __rt_mutex_futex_unlock(lock, &wake_q); - raw_spin_unlock_irq(&lock->wait_lock); - -- rt_mutex_postunlock(&wake_q, deboost); -+ if (postunlock) -+ rt_mutex_postunlock(&wake_q); - } - - /** ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -122,7 +122,7 @@ extern void rt_mutex_futex_unlock(struct - extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, - struct wake_q_head *wqh); - --extern void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost); -+extern void rt_mutex_postunlock(struct wake_q_head *wake_q); - - #ifdef CONFIG_DEBUG_RT_MUTEXES - # include "rtmutex-debug.h" diff --git a/debian/patches/features/all/rt/0004-tracing-Remove-lookups-from-tracing_map-hitcount.patch b/debian/patches/features/all/rt/0004-tracing-Remove-lookups-from-tracing_map-hitcount.patch new file mode 100644 index 000000000..63bf44800 --- /dev/null +++ b/debian/patches/features/all/rt/0004-tracing-Remove-lookups-from-tracing_map-hitcount.patch @@ -0,0 +1,27 @@ +From: Tom Zanussi +Date: Fri, 22 Sep 2017 14:58:18 -0500 +Subject: [PATCH 04/42] tracing: Remove lookups from tracing_map hitcount +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Lookups inflate the hitcount, making it essentially useless. Only +inserts and updates should really affect the hitcount anyway, so +explicitly filter lookups out. + +Signed-off-by: Tom Zanussi +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/tracing_map.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/kernel/trace/tracing_map.c ++++ b/kernel/trace/tracing_map.c +@@ -428,7 +428,8 @@ static inline struct tracing_map_elt * + + if (test_key && test_key == key_hash && entry->val && + keys_match(key, entry->val->key, map->key_size)) { +- atomic64_inc(&map->hits); ++ if (!lookup_only) ++ atomic64_inc(&map->hits); + return entry->val; + } + diff --git a/debian/patches/features/all/rt/0004-x86-smp-Adjust-system_state-check.patch b/debian/patches/features/all/rt/0004-x86-smp-Adjust-system_state-check.patch deleted file mode 100644 index 1b96a211c..000000000 --- a/debian/patches/features/all/rt/0004-x86-smp-Adjust-system_state-check.patch +++ /dev/null @@ -1,34 +0,0 @@ -From: Thomas Gleixner -Date: Tue, 16 May 2017 20:42:35 +0200 -Subject: [PATCH 04/17] x86/smp: Adjust system_state check -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -To enable smp_processor_id() and might_sleep() debug checks earlier, it's -required to add system states between SYSTEM_BOOTING and SYSTEM_RUNNING. - -Adjust the system_state check in announce_cpu() to handle the extra states. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Peter Zijlstra (Intel) -Reviewed-by: Steven Rostedt (VMware) -Cc: Greg Kroah-Hartman -Cc: Linus Torvalds -Cc: Mark Rutland -Cc: Peter Zijlstra -Link: http://lkml.kernel.org/r/20170516184735.191715856@linutronix.de -Signed-off-by: Ingo Molnar ---- - arch/x86/kernel/smpboot.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/arch/x86/kernel/smpboot.c -+++ b/arch/x86/kernel/smpboot.c -@@ -863,7 +863,7 @@ static void announce_cpu(int cpu, int ap - if (cpu == 1) - printk(KERN_INFO "x86: Booting SMP configuration:\n"); - -- if (system_state == SYSTEM_BOOTING) { -+ if (system_state < SYSTEM_RUNNING) { - if (node != current_node) { - if (current_node > (-1)) - pr_cont("\n"); diff --git a/debian/patches/features/all/rt/0005-futex-Change-locking-rules.patch b/debian/patches/features/all/rt/0005-futex-Change-locking-rules.patch deleted file mode 100644 index 63c453d6f..000000000 --- a/debian/patches/features/all/rt/0005-futex-Change-locking-rules.patch +++ /dev/null @@ -1,371 +0,0 @@ -From: Peter Zijlstra -Date: Wed, 22 Mar 2017 11:35:52 +0100 -Subject: [PATCH] futex: Change locking rules -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Upstream commit 734009e96d1983ad739e5b656e03430b3660c913 - -Currently futex-pi relies on hb->lock to serialize everything. But hb->lock -creates another set of problems, especially priority inversions on RT where -hb->lock becomes a rt_mutex itself. - -The rt_mutex::wait_lock is the most obvious protection for keeping the -futex user space value and the kernel internal pi_state in sync. - -Rework and document the locking so rt_mutex::wait_lock is held accross all -operations which modify the user space value and the pi state. - -This allows to invoke rt_mutex_unlock() (including deboost) without holding -hb->lock as a next step. - -Nothing yet relies on the new locking rules. - -Signed-off-by: Peter Zijlstra (Intel) -Cc: juri.lelli@arm.com -Cc: bigeasy@linutronix.de -Cc: xlpang@redhat.com -Cc: rostedt@goodmis.org -Cc: mathieu.desnoyers@efficios.com -Cc: jdesfossez@efficios.com -Cc: dvhart@infradead.org -Cc: bristot@redhat.com -Link: http://lkml.kernel.org/r/20170322104151.751993333@infradead.org -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/futex.c | 165 +++++++++++++++++++++++++++++++++++++++++++++------------ - 1 file changed, 132 insertions(+), 33 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -973,6 +973,39 @@ void exit_pi_state_list(struct task_stru - * - * [10] There is no transient state which leaves owner and user space - * TID out of sync. -+ * -+ * -+ * Serialization and lifetime rules: -+ * -+ * hb->lock: -+ * -+ * hb -> futex_q, relation -+ * futex_q -> pi_state, relation -+ * -+ * (cannot be raw because hb can contain arbitrary amount -+ * of futex_q's) -+ * -+ * pi_mutex->wait_lock: -+ * -+ * {uval, pi_state} -+ * -+ * (and pi_mutex 'obviously') -+ * -+ * p->pi_lock: -+ * -+ * p->pi_state_list -> pi_state->list, relation -+ * -+ * pi_state->refcount: -+ * -+ * pi_state lifetime -+ * -+ * -+ * Lock order: -+ * -+ * hb->lock -+ * pi_mutex->wait_lock -+ * p->pi_lock -+ * - */ - - /* -@@ -980,10 +1013,12 @@ void exit_pi_state_list(struct task_stru - * the pi_state against the user space value. If correct, attach to - * it. - */ --static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, -+static int attach_to_pi_state(u32 __user *uaddr, u32 uval, -+ struct futex_pi_state *pi_state, - struct futex_pi_state **ps) - { - pid_t pid = uval & FUTEX_TID_MASK; -+ int ret, uval2; - - /* - * Userspace might have messed up non-PI and PI futexes [3] -@@ -991,9 +1026,34 @@ static int attach_to_pi_state(u32 uval, - if (unlikely(!pi_state)) - return -EINVAL; - -+ /* -+ * We get here with hb->lock held, and having found a -+ * futex_top_waiter(). This means that futex_lock_pi() of said futex_q -+ * has dropped the hb->lock in between queue_me() and unqueue_me_pi(), -+ * which in turn means that futex_lock_pi() still has a reference on -+ * our pi_state. -+ */ - WARN_ON(!atomic_read(&pi_state->refcount)); - - /* -+ * Now that we have a pi_state, we can acquire wait_lock -+ * and do the state validation. -+ */ -+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); -+ -+ /* -+ * Since {uval, pi_state} is serialized by wait_lock, and our current -+ * uval was read without holding it, it can have changed. Verify it -+ * still is what we expect it to be, otherwise retry the entire -+ * operation. -+ */ -+ if (get_futex_value_locked(&uval2, uaddr)) -+ goto out_efault; -+ -+ if (uval != uval2) -+ goto out_eagain; -+ -+ /* - * Handle the owner died case: - */ - if (uval & FUTEX_OWNER_DIED) { -@@ -1008,11 +1068,11 @@ static int attach_to_pi_state(u32 uval, - * is not 0. Inconsistent state. [5] - */ - if (pid) -- return -EINVAL; -+ goto out_einval; - /* - * Take a ref on the state and return success. [4] - */ -- goto out_state; -+ goto out_attach; - } - - /* -@@ -1024,14 +1084,14 @@ static int attach_to_pi_state(u32 uval, - * Take a ref on the state and return success. [6] - */ - if (!pid) -- goto out_state; -+ goto out_attach; - } else { - /* - * If the owner died bit is not set, then the pi_state - * must have an owner. [7] - */ - if (!pi_state->owner) -- return -EINVAL; -+ goto out_einval; - } - - /* -@@ -1040,11 +1100,29 @@ static int attach_to_pi_state(u32 uval, - * user space TID. [9/10] - */ - if (pid != task_pid_vnr(pi_state->owner)) -- return -EINVAL; --out_state: -+ goto out_einval; -+ -+out_attach: - atomic_inc(&pi_state->refcount); -+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - *ps = pi_state; - return 0; -+ -+out_einval: -+ ret = -EINVAL; -+ goto out_error; -+ -+out_eagain: -+ ret = -EAGAIN; -+ goto out_error; -+ -+out_efault: -+ ret = -EFAULT; -+ goto out_error; -+ -+out_error: -+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); -+ return ret; - } - - /* -@@ -1095,6 +1173,9 @@ static int attach_to_pi_owner(u32 uval, - - /* - * No existing pi state. First waiter. [2] -+ * -+ * This creates pi_state, we have hb->lock held, this means nothing can -+ * observe this state, wait_lock is irrelevant. - */ - pi_state = alloc_pi_state(); - -@@ -1119,7 +1200,8 @@ static int attach_to_pi_owner(u32 uval, - return 0; - } - --static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, -+static int lookup_pi_state(u32 __user *uaddr, u32 uval, -+ struct futex_hash_bucket *hb, - union futex_key *key, struct futex_pi_state **ps) - { - struct futex_q *top_waiter = futex_top_waiter(hb, key); -@@ -1129,7 +1211,7 @@ static int lookup_pi_state(u32 uval, str - * attach to the pi_state when the validation succeeds. - */ - if (top_waiter) -- return attach_to_pi_state(uval, top_waiter->pi_state, ps); -+ return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps); - - /* - * We are the first waiter - try to look up the owner based on -@@ -1148,7 +1230,7 @@ static int lock_pi_update_atomic(u32 __u - if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) - return -EFAULT; - -- /*If user space value changed, let the caller retry */ -+ /* If user space value changed, let the caller retry */ - return curval != uval ? -EAGAIN : 0; - } - -@@ -1204,7 +1286,7 @@ static int futex_lock_pi_atomic(u32 __us - */ - top_waiter = futex_top_waiter(hb, key); - if (top_waiter) -- return attach_to_pi_state(uval, top_waiter->pi_state, ps); -+ return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps); - - /* - * No waiter and user TID is 0. We are here because the -@@ -1336,6 +1418,7 @@ static int wake_futex_pi(u32 __user *uad - - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) { - ret = -EFAULT; -+ - } else if (curval != uval) { - /* - * If a unconditional UNLOCK_PI operation (user space did not -@@ -1348,6 +1431,7 @@ static int wake_futex_pi(u32 __user *uad - else - ret = -EINVAL; - } -+ - if (ret) { - raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - return ret; -@@ -1823,7 +1907,7 @@ static int futex_requeue(u32 __user *uad - * If that call succeeds then we have pi_state and an - * initial refcount on it. - */ -- ret = lookup_pi_state(ret, hb2, &key2, &pi_state); -+ ret = lookup_pi_state(uaddr2, ret, hb2, &key2, &pi_state); - } - - switch (ret) { -@@ -2122,10 +2206,13 @@ static int fixup_pi_state_owner(u32 __us - { - u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; - struct futex_pi_state *pi_state = q->pi_state; -- struct task_struct *oldowner = pi_state->owner; - u32 uval, uninitialized_var(curval), newval; -+ struct task_struct *oldowner; - int ret; - -+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); -+ -+ oldowner = pi_state->owner; - /* Owner died? */ - if (!pi_state->owner) - newtid |= FUTEX_OWNER_DIED; -@@ -2141,11 +2228,10 @@ static int fixup_pi_state_owner(u32 __us - * because we can fault here. Imagine swapped out pages or a fork - * that marked all the anonymous memory readonly for cow. - * -- * Modifying pi_state _before_ the user space value would -- * leave the pi_state in an inconsistent state when we fault -- * here, because we need to drop the hash bucket lock to -- * handle the fault. This might be observed in the PID check -- * in lookup_pi_state. -+ * Modifying pi_state _before_ the user space value would leave the -+ * pi_state in an inconsistent state when we fault here, because we -+ * need to drop the locks to handle the fault. This might be observed -+ * in the PID check in lookup_pi_state. - */ - retry: - if (get_futex_value_locked(&uval, uaddr)) -@@ -2166,47 +2252,60 @@ static int fixup_pi_state_owner(u32 __us - * itself. - */ - if (pi_state->owner != NULL) { -- raw_spin_lock_irq(&pi_state->owner->pi_lock); -+ raw_spin_lock(&pi_state->owner->pi_lock); - WARN_ON(list_empty(&pi_state->list)); - list_del_init(&pi_state->list); -- raw_spin_unlock_irq(&pi_state->owner->pi_lock); -+ raw_spin_unlock(&pi_state->owner->pi_lock); - } - - pi_state->owner = newowner; - -- raw_spin_lock_irq(&newowner->pi_lock); -+ raw_spin_lock(&newowner->pi_lock); - WARN_ON(!list_empty(&pi_state->list)); - list_add(&pi_state->list, &newowner->pi_state_list); -- raw_spin_unlock_irq(&newowner->pi_lock); -+ raw_spin_unlock(&newowner->pi_lock); -+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); -+ - return 0; - - /* -- * To handle the page fault we need to drop the hash bucket -- * lock here. That gives the other task (either the highest priority -- * waiter itself or the task which stole the rtmutex) the -- * chance to try the fixup of the pi_state. So once we are -- * back from handling the fault we need to check the pi_state -- * after reacquiring the hash bucket lock and before trying to -- * do another fixup. When the fixup has been done already we -- * simply return. -+ * To handle the page fault we need to drop the locks here. That gives -+ * the other task (either the highest priority waiter itself or the -+ * task which stole the rtmutex) the chance to try the fixup of the -+ * pi_state. So once we are back from handling the fault we need to -+ * check the pi_state after reacquiring the locks and before trying to -+ * do another fixup. When the fixup has been done already we simply -+ * return. -+ * -+ * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely -+ * drop hb->lock since the caller owns the hb -> futex_q relation. -+ * Dropping the pi_mutex->wait_lock requires the state revalidate. - */ - handle_fault: -+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - spin_unlock(q->lock_ptr); - - ret = fault_in_user_writeable(uaddr); - - spin_lock(q->lock_ptr); -+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); - - /* - * Check if someone else fixed it for us: - */ -- if (pi_state->owner != oldowner) -- return 0; -+ if (pi_state->owner != oldowner) { -+ ret = 0; -+ goto out_unlock; -+ } - - if (ret) -- return ret; -+ goto out_unlock; - - goto retry; -+ -+out_unlock: -+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); -+ return ret; - } - - static long futex_wait_restart(struct restart_block *restart); diff --git a/debian/patches/features/all/rt/0005-hrtimer-Fix-hrtimer-function-description.patch b/debian/patches/features/all/rt/0005-hrtimer-Fix-hrtimer-function-description.patch new file mode 100644 index 000000000..3223e8b2c --- /dev/null +++ b/debian/patches/features/all/rt/0005-hrtimer-Fix-hrtimer-function-description.patch @@ -0,0 +1,61 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:39:42 +0200 +Subject: [PATCH 05/36] hrtimer: Fix hrtimer function description +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The hrtimer_start[_range_ns]() starts a timer reliable on this CPU only +when HRTIMER_MODE_PINNED is set. Furthermore the HRTIMER_MODE_PINNED mode +is not considered, when a hrtimer is initialized. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/hrtimer.h | 6 +++--- + kernel/time/hrtimer.c | 9 +++++---- + 2 files changed, 8 insertions(+), 7 deletions(-) + +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -361,11 +361,11 @@ extern void hrtimer_start_range_ns(struc + u64 range_ns, const enum hrtimer_mode mode); + + /** +- * hrtimer_start - (re)start an hrtimer on the current CPU ++ * hrtimer_start - (re)start an hrtimer + * @timer: the timer to be added + * @tim: expiry time +- * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or +- * relative (HRTIMER_MODE_REL) ++ * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or ++ * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) + */ + static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim, + const enum hrtimer_mode mode) +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -926,12 +926,12 @@ static inline ktime_t hrtimer_update_low + } + + /** +- * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU ++ * hrtimer_start_range_ns - (re)start an hrtimer + * @timer: the timer to be added + * @tim: expiry time + * @delta_ns: "slack" range for the timer +- * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or +- * relative (HRTIMER_MODE_REL) ++ * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or ++ * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) + */ + void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, + u64 delta_ns, const enum hrtimer_mode mode) +@@ -1109,7 +1109,8 @@ static void __hrtimer_init(struct hrtime + * hrtimer_init - initialize a timer to the given clock + * @timer: the timer to be initialized + * @clock_id: the clock to be used +- * @mode: timer mode abs/rel ++ * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or ++ * relative (HRTIMER_MODE_REL); pinned is not considered here! + */ + void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, + enum hrtimer_mode mode) diff --git a/debian/patches/features/all/rt/0005-metag-Adjust-system_state-check.patch b/debian/patches/features/all/rt/0005-metag-Adjust-system_state-check.patch deleted file mode 100644 index d37875f97..000000000 --- a/debian/patches/features/all/rt/0005-metag-Adjust-system_state-check.patch +++ /dev/null @@ -1,36 +0,0 @@ -From: Thomas Gleixner -Date: Tue, 16 May 2017 20:42:36 +0200 -Subject: [PATCH 05/17] metag: Adjust system_state check -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -To enable smp_processor_id() and might_sleep() debug checks earlier, it's -required to add system states between SYSTEM_BOOTING and SYSTEM_RUNNING. - -Adjust the system_state check in stop_this_cpu() to handle the extra states. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Peter Zijlstra (Intel) -Cc: Greg Kroah-Hartman -Cc: James Hogan -Cc: Linus Torvalds -Cc: Mark Rutland -Cc: Peter Zijlstra -Cc: Steven Rostedt -Link: http://lkml.kernel.org/r/20170516184735.283420315@linutronix.de -Signed-off-by: Ingo Molnar ---- - arch/metag/kernel/smp.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - ---- a/arch/metag/kernel/smp.c -+++ b/arch/metag/kernel/smp.c -@@ -567,8 +567,7 @@ static void stop_this_cpu(void *data) - { - unsigned int cpu = smp_processor_id(); - -- if (system_state == SYSTEM_BOOTING || -- system_state == SYSTEM_RUNNING) { -+ if (system_state <= SYSTEM_RUNNING) { - spin_lock(&stop_lock); - pr_crit("CPU%u: stopping\n", cpu); - dump_stack(); diff --git a/debian/patches/features/all/rt/0005-powerpc-smp-Replace-open-coded-task-affinity-logic.patch b/debian/patches/features/all/rt/0005-powerpc-smp-Replace-open-coded-task-affinity-logic.patch deleted file mode 100644 index 8001fde07..000000000 --- a/debian/patches/features/all/rt/0005-powerpc-smp-Replace-open-coded-task-affinity-logic.patch +++ /dev/null @@ -1,89 +0,0 @@ -From: Thomas Gleixner -Date: Wed, 12 Apr 2017 22:07:31 +0200 -Subject: [PATCH 05/13] powerpc/smp: Replace open coded task affinity logic -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Init task invokes smp_ops->setup_cpu() from smp_cpus_done(). Init task can -run on any online CPU at this point, but the setup_cpu() callback requires -to be invoked on the boot CPU. This is achieved by temporarily setting the -affinity of the calling user space thread to the requested CPU and reset it -to the original affinity afterwards. - -That's racy vs. CPU hotplug and concurrent affinity settings for that -thread resulting in code executing on the wrong CPU and overwriting the -new affinity setting. - -That's actually not a problem in this context as neither CPU hotplug nor -affinity settings can happen, but the access to task_struct::cpus_allowed -is about to restricted. - -Replace it with a call to work_on_cpu_safe() which achieves the same result. - -Signed-off-by: Thomas Gleixner -Acked-by: Michael Ellerman -Cc: Fenghua Yu -Cc: Tony Luck -Cc: Herbert Xu -Cc: "Rafael J. Wysocki" -Cc: Peter Zijlstra -Cc: Benjamin Herrenschmidt -Cc: Sebastian Siewior -Cc: Lai Jiangshan -Cc: Viresh Kumar -Cc: Tejun Heo -Cc: Paul Mackerras -Cc: linuxppc-dev@lists.ozlabs.org -Cc: "David S. Miller" -Cc: Len Brown -Link: http://lkml.kernel.org/r/20170412201042.518053336@linutronix.de -Signed-off-by: Thomas Gleixner ---- - arch/powerpc/kernel/smp.c | 26 +++++++++++--------------- - 1 file changed, 11 insertions(+), 15 deletions(-) - ---- a/arch/powerpc/kernel/smp.c -+++ b/arch/powerpc/kernel/smp.c -@@ -787,24 +787,21 @@ static struct sched_domain_topology_leve - { NULL, }, - }; - --void __init smp_cpus_done(unsigned int max_cpus) -+static __init long smp_setup_cpu_workfn(void *data __always_unused) - { -- cpumask_var_t old_mask; -+ smp_ops->setup_cpu(boot_cpuid); -+ return 0; -+} - -- /* We want the setup_cpu() here to be called from CPU 0, but our -- * init thread may have been "borrowed" by another CPU in the meantime -- * se we pin us down to CPU 0 for a short while -+void __init smp_cpus_done(unsigned int max_cpus) -+{ -+ /* -+ * We want the setup_cpu() here to be called on the boot CPU, but -+ * init might run on any CPU, so make sure it's invoked on the boot -+ * CPU. - */ -- alloc_cpumask_var(&old_mask, GFP_NOWAIT); -- cpumask_copy(old_mask, ¤t->cpus_allowed); -- set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid)); -- - if (smp_ops && smp_ops->setup_cpu) -- smp_ops->setup_cpu(boot_cpuid); -- -- set_cpus_allowed_ptr(current, old_mask); -- -- free_cpumask_var(old_mask); -+ work_on_cpu_safe(boot_cpuid, smp_setup_cpu_workfn, NULL); - - if (smp_ops && smp_ops->bringup_done) - smp_ops->bringup_done(); -@@ -812,7 +809,6 @@ void __init smp_cpus_done(unsigned int m - dump_numa_cpu_topology(); - - set_sched_topology(powerpc_topology); -- - } - - #ifdef CONFIG_HOTPLUG_CPU diff --git a/debian/patches/features/all/rt/0005-sched-rtmutex-Refactor-rt_mutex_setprio.patch b/debian/patches/features/all/rt/0005-sched-rtmutex-Refactor-rt_mutex_setprio.patch deleted file mode 100644 index db4cf1dce..000000000 --- a/debian/patches/features/all/rt/0005-sched-rtmutex-Refactor-rt_mutex_setprio.patch +++ /dev/null @@ -1,392 +0,0 @@ -From: Peter Zijlstra -Date: Thu, 23 Mar 2017 15:56:11 +0100 -Subject: [PATCH 5/9] sched/rtmutex: Refactor rt_mutex_setprio() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -With the introduction of SCHED_DEADLINE the whole notion that priority -is a single number is gone, therefore the @prio argument to -rt_mutex_setprio() doesn't make sense anymore. - -So rework the code to pass a pi_task instead. - -Note this also fixes a problem with pi_top_task caching; previously we -would not set the pointer (call rt_mutex_update_top_task) if the -priority didn't change, this could lead to a stale pointer. - -As for the XXX, I think its fine to use pi_task->prio, because if it -differs from waiter->prio, a PI chain update is immenent. - -Signed-off-by: Peter Zijlstra (Intel) -Cc: juri.lelli@arm.com -Cc: bigeasy@linutronix.de -Cc: xlpang@redhat.com -Cc: rostedt@goodmis.org -Cc: mathieu.desnoyers@efficios.com -Cc: jdesfossez@efficios.com -Cc: bristot@redhat.com -Link: http://lkml.kernel.org/r/20170323150216.303827095@infradead.org -Signed-off-by: Thomas Gleixner ---- - include/linux/sched/rt.h | 24 +++------- - kernel/locking/rtmutex.c | 112 ++++++++++++----------------------------------- - kernel/sched/core.c | 66 ++++++++++++++++++++++----- - 3 files changed, 91 insertions(+), 111 deletions(-) - ---- a/include/linux/sched/rt.h -+++ b/include/linux/sched/rt.h -@@ -18,28 +18,20 @@ static inline int rt_task(struct task_st - } - - #ifdef CONFIG_RT_MUTEXES --extern int rt_mutex_getprio(struct task_struct *p); --extern void rt_mutex_setprio(struct task_struct *p, int prio); --extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio); --extern void rt_mutex_update_top_task(struct task_struct *p); --extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task); -+/* -+ * Must hold either p->pi_lock or task_rq(p)->lock. -+ */ -+static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p) -+{ -+ return p->pi_top_task; -+} -+extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task); - extern void rt_mutex_adjust_pi(struct task_struct *p); - static inline bool tsk_is_pi_blocked(struct task_struct *tsk) - { - return tsk->pi_blocked_on != NULL; - } - #else --static inline int rt_mutex_getprio(struct task_struct *p) --{ -- return p->normal_prio; --} -- --static inline int rt_mutex_get_effective_prio(struct task_struct *task, -- int newprio) --{ -- return newprio; --} -- - static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) - { - return NULL; ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -322,67 +322,16 @@ rt_mutex_dequeue_pi(struct task_struct * - RB_CLEAR_NODE(&waiter->pi_tree_entry); - } - --/* -- * Must hold both p->pi_lock and task_rq(p)->lock. -- */ --void rt_mutex_update_top_task(struct task_struct *p) --{ -- if (!task_has_pi_waiters(p)) { -- p->pi_top_task = NULL; -- return; -- } -- -- p->pi_top_task = task_top_pi_waiter(p)->task; --} -- --/* -- * Calculate task priority from the waiter tree priority -- * -- * Return task->normal_prio when the waiter tree is empty or when -- * the waiter is not allowed to do priority boosting -- */ --int rt_mutex_getprio(struct task_struct *task) --{ -- if (likely(!task_has_pi_waiters(task))) -- return task->normal_prio; -- -- return min(task_top_pi_waiter(task)->prio, -- task->normal_prio); --} -- --/* -- * Must hold either p->pi_lock or task_rq(p)->lock. -- */ --struct task_struct *rt_mutex_get_top_task(struct task_struct *task) --{ -- return task->pi_top_task; --} -- --/* -- * Called by sched_setscheduler() to get the priority which will be -- * effective after the change. -- */ --int rt_mutex_get_effective_prio(struct task_struct *task, int newprio) -+static void rt_mutex_adjust_prio(struct task_struct *p) - { -- struct task_struct *top_task = rt_mutex_get_top_task(task); -+ struct task_struct *pi_task = NULL; - -- if (!top_task) -- return newprio; -+ lockdep_assert_held(&p->pi_lock); - -- return min(top_task->prio, newprio); --} -+ if (task_has_pi_waiters(p)) -+ pi_task = task_top_pi_waiter(p)->task; - --/* -- * Adjust the priority of a task, after its pi_waiters got modified. -- * -- * This can be both boosting and unboosting. task->pi_lock must be held. -- */ --static void __rt_mutex_adjust_prio(struct task_struct *task) --{ -- int prio = rt_mutex_getprio(task); -- -- if (task->prio != prio || dl_prio(prio)) -- rt_mutex_setprio(task, prio); -+ rt_mutex_setprio(p, pi_task); - } - - /* -@@ -742,7 +691,7 @@ static int rt_mutex_adjust_prio_chain(st - */ - rt_mutex_dequeue_pi(task, prerequeue_top_waiter); - rt_mutex_enqueue_pi(task, waiter); -- __rt_mutex_adjust_prio(task); -+ rt_mutex_adjust_prio(task); - - } else if (prerequeue_top_waiter == waiter) { - /* -@@ -758,7 +707,7 @@ static int rt_mutex_adjust_prio_chain(st - rt_mutex_dequeue_pi(task, waiter); - waiter = rt_mutex_top_waiter(lock); - rt_mutex_enqueue_pi(task, waiter); -- __rt_mutex_adjust_prio(task); -+ rt_mutex_adjust_prio(task); - } else { - /* - * Nothing changed. No need to do any priority -@@ -966,7 +915,7 @@ static int task_blocks_on_rt_mutex(struc - return -EDEADLK; - - raw_spin_lock(&task->pi_lock); -- __rt_mutex_adjust_prio(task); -+ rt_mutex_adjust_prio(task); - waiter->task = task; - waiter->lock = lock; - waiter->prio = task->prio; -@@ -988,7 +937,7 @@ static int task_blocks_on_rt_mutex(struc - rt_mutex_dequeue_pi(owner, top_waiter); - rt_mutex_enqueue_pi(owner, waiter); - -- __rt_mutex_adjust_prio(owner); -+ rt_mutex_adjust_prio(owner); - if (owner->pi_blocked_on) - chain_walk = 1; - } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { -@@ -1040,13 +989,14 @@ static void mark_wakeup_next_waiter(stru - waiter = rt_mutex_top_waiter(lock); - - /* -- * Remove it from current->pi_waiters. We do not adjust a -- * possible priority boost right now. We execute wakeup in the -- * boosted mode and go back to normal after releasing -- * lock->wait_lock. -+ * Remove it from current->pi_waiters and deboost. -+ * -+ * We must in fact deboost here in order to ensure we call -+ * rt_mutex_setprio() to update p->pi_top_task before the -+ * task unblocks. - */ - rt_mutex_dequeue_pi(current, waiter); -- __rt_mutex_adjust_prio(current); -+ rt_mutex_adjust_prio(current); - - /* - * As we are waking up the top waiter, and the waiter stays -@@ -1058,9 +1008,19 @@ static void mark_wakeup_next_waiter(stru - */ - lock->owner = (void *) RT_MUTEX_HAS_WAITERS; - -- raw_spin_unlock(¤t->pi_lock); -- -+ /* -+ * We deboosted before waking the top waiter task such that we don't -+ * run two tasks with the 'same' priority (and ensure the -+ * p->pi_top_task pointer points to a blocked task). This however can -+ * lead to priority inversion if we would get preempted after the -+ * deboost but before waking our donor task, hence the preempt_disable() -+ * before unlock. -+ * -+ * Pairs with preempt_enable() in rt_mutex_postunlock(); -+ */ -+ preempt_disable(); - wake_q_add(wake_q, waiter->task); -+ raw_spin_unlock(¤t->pi_lock); - } - - /* -@@ -1095,7 +1055,7 @@ static void remove_waiter(struct rt_mute - if (rt_mutex_has_waiters(lock)) - rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); - -- __rt_mutex_adjust_prio(owner); -+ rt_mutex_adjust_prio(owner); - - /* Store the lock on which owner is blocked or NULL */ - next_lock = task_blocked_on_lock(owner); -@@ -1134,8 +1094,7 @@ void rt_mutex_adjust_pi(struct task_stru - raw_spin_lock_irqsave(&task->pi_lock, flags); - - waiter = task->pi_blocked_on; -- if (!waiter || (waiter->prio == task->prio && -- !dl_prio(task->prio))) { -+ if (!waiter || (waiter->prio == task->prio && !dl_prio(task->prio))) { - raw_spin_unlock_irqrestore(&task->pi_lock, flags); - return; - } -@@ -1389,17 +1348,6 @@ static bool __sched rt_mutex_slowunlock( - * Queue the next waiter for wakeup once we release the wait_lock. - */ - mark_wakeup_next_waiter(wake_q, lock); -- -- /* -- * We should deboost before waking the top waiter task such that -- * we don't run two tasks with the 'same' priority. This however -- * can lead to prio-inversion if we would get preempted after -- * the deboost but before waking our high-prio task, hence the -- * preempt_disable before unlock. Pairs with preempt_enable() in -- * rt_mutex_postunlock(); -- */ -- preempt_disable(); -- - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - - return true; /* call rt_mutex_postunlock() */ ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -3671,10 +3671,25 @@ EXPORT_SYMBOL(default_wake_function); - - #ifdef CONFIG_RT_MUTEXES - -+static inline int __rt_effective_prio(struct task_struct *pi_task, int prio) -+{ -+ if (pi_task) -+ prio = min(prio, pi_task->prio); -+ -+ return prio; -+} -+ -+static inline int rt_effective_prio(struct task_struct *p, int prio) -+{ -+ struct task_struct *pi_task = rt_mutex_get_top_task(p); -+ -+ return __rt_effective_prio(pi_task, prio); -+} -+ - /* - * rt_mutex_setprio - set the current priority of a task -- * @p: task -- * @prio: prio value (kernel-internal form) -+ * @p: task to boost -+ * @pi_task: donor task - * - * This function changes the 'effective' priority of a task. It does - * not touch ->normal_prio like __setscheduler(). -@@ -3682,17 +3697,41 @@ EXPORT_SYMBOL(default_wake_function); - * Used by the rt_mutex code to implement priority inheritance - * logic. Call site only calls if the priority of the task changed. - */ --void rt_mutex_setprio(struct task_struct *p, int prio) -+void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) - { -- int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE; -+ int prio, oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE; - const struct sched_class *prev_class; - struct rq_flags rf; - struct rq *rq; - -- BUG_ON(prio > MAX_PRIO); -+ /* XXX used to be waiter->prio, not waiter->task->prio */ -+ prio = __rt_effective_prio(pi_task, p->normal_prio); -+ -+ /* -+ * If nothing changed; bail early. -+ */ -+ if (p->pi_top_task == pi_task && prio == p->prio && !dl_prio(prio)) -+ return; - - rq = __task_rq_lock(p, &rf); - update_rq_clock(rq); -+ /* -+ * Set under pi_lock && rq->lock, such that the value can be used under -+ * either lock. -+ * -+ * Note that there is loads of tricky to make this pointer cache work -+ * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to -+ * ensure a task is de-boosted (pi_task is set to NULL) before the -+ * task is allowed to run again (and can exit). This ensures the pointer -+ * points to a blocked task -- which guaratees the task is present. -+ */ -+ p->pi_top_task = pi_task; -+ -+ /* -+ * For FIFO/RR we only need to set prio, if that matches we're done. -+ */ -+ if (prio == p->prio && !dl_prio(prio)) -+ goto out_unlock; - - /* - * Idle task boosting is a nono in general. There is one -@@ -3712,9 +3751,7 @@ void rt_mutex_setprio(struct task_struct - goto out_unlock; - } - -- rt_mutex_update_top_task(p); -- -- trace_sched_pi_setprio(p, prio); -+ trace_sched_pi_setprio(p, prio); /* broken */ - oldprio = p->prio; - - if (oldprio == prio) -@@ -3738,7 +3775,6 @@ void rt_mutex_setprio(struct task_struct - * running task - */ - if (dl_prio(prio)) { -- struct task_struct *pi_task = rt_mutex_get_top_task(p); - if (!dl_prio(p->normal_prio) || - (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) { - p->dl.dl_boosted = 1; -@@ -3776,6 +3812,11 @@ void rt_mutex_setprio(struct task_struct - balance_callback(rq); - preempt_enable(); - } -+#else -+static inline int rt_effective_prio(struct task_struct *p, int prio) -+{ -+ return prio; -+} - #endif - - void set_user_nice(struct task_struct *p, long nice) -@@ -4022,10 +4063,9 @@ static void __setscheduler(struct rq *rq - * Keep a potential priority boosting if called from - * sched_setscheduler(). - */ -+ p->prio = normal_prio(p); - if (keep_boost) -- p->prio = rt_mutex_get_effective_prio(p, normal_prio(p)); -- else -- p->prio = normal_prio(p); -+ p->prio = rt_effective_prio(p, p->prio); - - if (dl_prio(p->prio)) - p->sched_class = &dl_sched_class; -@@ -4312,7 +4352,7 @@ static int __sched_setscheduler(struct t - * the runqueue. This will be done when the task deboost - * itself. - */ -- new_effective_prio = rt_mutex_get_effective_prio(p, newprio); -+ new_effective_prio = rt_effective_prio(p, newprio); - if (new_effective_prio == oldprio) - queue_flags &= ~DEQUEUE_MOVE; - } diff --git a/debian/patches/features/all/rt/0007-tracing-Increase-tracing-map-KEYS_MAX-size.patch b/debian/patches/features/all/rt/0005-tracing-Increase-tracing-map-KEYS_MAX-size.patch similarity index 77% rename from debian/patches/features/all/rt/0007-tracing-Increase-tracing-map-KEYS_MAX-size.patch rename to debian/patches/features/all/rt/0005-tracing-Increase-tracing-map-KEYS_MAX-size.patch index 824cc4429..b7f91f0db 100644 --- a/debian/patches/features/all/rt/0007-tracing-Increase-tracing-map-KEYS_MAX-size.patch +++ b/debian/patches/features/all/rt/0005-tracing-Increase-tracing-map-KEYS_MAX-size.patch @@ -1,7 +1,7 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:08 -0500 -Subject: [PATCH 07/32] tracing: Increase tracing map KEYS_MAX size -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Date: Fri, 22 Sep 2017 14:58:19 -0500 +Subject: [PATCH 05/42] tracing: Increase tracing map KEYS_MAX size +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The current default for the number of subkeys in a compound key is 2, which is too restrictive. Increase it to a more realistic value of 3. @@ -14,7 +14,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/trace/tracing_map.h +++ b/kernel/trace/tracing_map.h -@@ -5,7 +5,7 @@ +@@ -6,7 +6,7 @@ #define TRACING_MAP_BITS_MAX 17 #define TRACING_MAP_BITS_MIN 7 diff --git a/debian/patches/features/all/rt/0006-futex-Cleanup-refcounting.patch b/debian/patches/features/all/rt/0006-futex-Cleanup-refcounting.patch deleted file mode 100644 index a3033e928..000000000 --- a/debian/patches/features/all/rt/0006-futex-Cleanup-refcounting.patch +++ /dev/null @@ -1,76 +0,0 @@ -From: Peter Zijlstra -Date: Wed, 22 Mar 2017 11:35:53 +0100 -Subject: [PATCH] futex: Cleanup refcounting -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Upstream commit bf92cf3a5100f5a0d5f9834787b130159397cb22 - -Add a put_pit_state() as counterpart for get_pi_state() so the refcounting -becomes consistent. - -Signed-off-by: Peter Zijlstra (Intel) -Cc: juri.lelli@arm.com -Cc: bigeasy@linutronix.de -Cc: xlpang@redhat.com -Cc: rostedt@goodmis.org -Cc: mathieu.desnoyers@efficios.com -Cc: jdesfossez@efficios.com -Cc: dvhart@infradead.org -Cc: bristot@redhat.com -Link: http://lkml.kernel.org/r/20170322104151.801778516@infradead.org -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/futex.c | 13 +++++++++---- - 1 file changed, 9 insertions(+), 4 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -802,7 +802,7 @@ static int refill_pi_state_cache(void) - return 0; - } - --static struct futex_pi_state * alloc_pi_state(void) -+static struct futex_pi_state *alloc_pi_state(void) - { - struct futex_pi_state *pi_state = current->pi_state_cache; - -@@ -812,6 +812,11 @@ static struct futex_pi_state * alloc_pi_ - return pi_state; - } - -+static void get_pi_state(struct futex_pi_state *pi_state) -+{ -+ WARN_ON_ONCE(!atomic_inc_not_zero(&pi_state->refcount)); -+} -+ - /* - * Drops a reference to the pi_state object and frees or caches it - * when the last reference is gone. -@@ -856,7 +861,7 @@ static void put_pi_state(struct futex_pi - * Look up the task based on what TID userspace gave us. - * We dont trust it. - */ --static struct task_struct * futex_find_get_task(pid_t pid) -+static struct task_struct *futex_find_get_task(pid_t pid) - { - struct task_struct *p; - -@@ -1103,7 +1108,7 @@ static int attach_to_pi_state(u32 __user - goto out_einval; - - out_attach: -- atomic_inc(&pi_state->refcount); -+ get_pi_state(pi_state); - raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - *ps = pi_state; - return 0; -@@ -1990,7 +1995,7 @@ static int futex_requeue(u32 __user *uad - * refcount on the pi_state and store the pointer in - * the futex_q object of the waiter. - */ -- atomic_inc(&pi_state->refcount); -+ get_pi_state(pi_state); - this->pi_state = pi_state; - ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, - this->rt_waiter, diff --git a/debian/patches/features/all/rt/0006-hrtimer-Ensure-POSIX-compliance-relative-CLOCK_REALT.patch b/debian/patches/features/all/rt/0006-hrtimer-Ensure-POSIX-compliance-relative-CLOCK_REALT.patch new file mode 100644 index 000000000..938a0ecdb --- /dev/null +++ b/debian/patches/features/all/rt/0006-hrtimer-Ensure-POSIX-compliance-relative-CLOCK_REALT.patch @@ -0,0 +1,42 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:39:43 +0200 +Subject: [PATCH 06/36] hrtimer: Ensure POSIX compliance (relative + CLOCK_REALTIME hrtimers) +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +POSIX specification defines, that relative CLOCK_REALTIME timers are not +affected by clock modifications. Those timers have to use CLOCK_MONOTONIC +to ensure POSIX compliance. + +The introduction of the additional mode HRTIMER_MODE_PINNED broke this +requirement for pinned timers. There is no user space visible impact +because user space timers are not using the pinned mode, but for +consistency reasons this needs to be fixed. + +Check whether the mode has the HRTIMER_MODE_REL bit set instead of +comparing with HRTIMER_MODE_ABS. + +Fixes: 597d0275736d ("timers: Framework for identifying pinned timers") + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/time/hrtimer.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -1097,7 +1097,12 @@ static void __hrtimer_init(struct hrtime + + cpu_base = raw_cpu_ptr(&hrtimer_bases); + +- if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS) ++ /* ++ * Posix magic: Relative CLOCK_REALTIME timers are not affected by ++ * clock modifications, so they needs to become CLOCK_MONOTONIC to ++ * ensure Posix compliance. ++ */ ++ if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL) + clock_id = CLOCK_MONOTONIC; + + base = hrtimer_clockid_to_base(clock_id); diff --git a/debian/patches/features/all/rt/0006-powerpc-Adjust-system_state-check.patch b/debian/patches/features/all/rt/0006-powerpc-Adjust-system_state-check.patch deleted file mode 100644 index 9438bf116..000000000 --- a/debian/patches/features/all/rt/0006-powerpc-Adjust-system_state-check.patch +++ /dev/null @@ -1,39 +0,0 @@ -From: Thomas Gleixner -Date: Tue, 16 May 2017 20:42:37 +0200 -Subject: [PATCH 06/17] powerpc: Adjust system_state check -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -To enable smp_processor_id() and might_sleep() debug checks earlier, it's -required to add system states between SYSTEM_BOOTING and SYSTEM_RUNNING. - -Adjust the system_state check in smp_generic_cpu_bootable() to handle the -extra states. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Peter Zijlstra (Intel) -Acked-by: Michael Ellerman -Cc: Benjamin Herrenschmidt -Cc: Greg Kroah-Hartman -Cc: Linus Torvalds -Cc: Mark Rutland -Cc: Paul Mackerras -Cc: Peter Zijlstra -Cc: Steven Rostedt -Cc: linuxppc-dev@lists.ozlabs.org -Link: http://lkml.kernel.org/r/20170516184735.359536998@linutronix.de -Signed-off-by: Ingo Molnar ---- - arch/powerpc/kernel/smp.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/arch/powerpc/kernel/smp.c -+++ b/arch/powerpc/kernel/smp.c -@@ -98,7 +98,7 @@ int smp_generic_cpu_bootable(unsigned in - /* Special case - we inhibit secondary thread startup - * during boot if the user requests it. - */ -- if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) { -+ if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) { - if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) - return 0; - if (smt_enabled_at_boot diff --git a/debian/patches/features/all/rt/0006-sched-tracing-Update-trace_sched_pi_setprio.patch b/debian/patches/features/all/rt/0006-sched-tracing-Update-trace_sched_pi_setprio.patch deleted file mode 100644 index 409e08e3d..000000000 --- a/debian/patches/features/all/rt/0006-sched-tracing-Update-trace_sched_pi_setprio.patch +++ /dev/null @@ -1,107 +0,0 @@ -From: Peter Zijlstra -Date: Thu, 23 Mar 2017 15:56:12 +0100 -Subject: [PATCH 6/9] sched,tracing: Update trace_sched_pi_setprio() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Pass the PI donor task, instead of a numerical priority. - -Numerical priorities are not sufficient to describe state ever since -SCHED_DEADLINE. - -Annotate all sched tracepoints that are currently broken; fixing them -will bork userspace. *hate*. - -Signed-off-by: Peter Zijlstra (Intel) -Reviewed-by: Steven Rostedt -Cc: juri.lelli@arm.com -Cc: bigeasy@linutronix.de -Cc: xlpang@redhat.com -Cc: mathieu.desnoyers@efficios.com -Cc: jdesfossez@efficios.com -Cc: bristot@redhat.com -Link: http://lkml.kernel.org/r/20170323150216.353599881@infradead.org -Signed-off-by: Thomas Gleixner ---- - include/trace/events/sched.h | 16 +++++++++------- - kernel/sched/core.c | 2 +- - 2 files changed, 10 insertions(+), 8 deletions(-) - ---- a/include/trace/events/sched.h -+++ b/include/trace/events/sched.h -@@ -70,7 +70,7 @@ DECLARE_EVENT_CLASS(sched_wakeup_templat - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; -- __entry->prio = p->prio; -+ __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ - __entry->success = 1; /* rudiment, kill when possible */ - __entry->target_cpu = task_cpu(p); - ), -@@ -147,6 +147,7 @@ TRACE_EVENT(sched_switch, - memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); - __entry->next_pid = next->pid; - __entry->next_prio = next->prio; -+ /* XXX SCHED_DEADLINE */ - ), - - TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", -@@ -181,7 +182,7 @@ TRACE_EVENT(sched_migrate_task, - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; -- __entry->prio = p->prio; -+ __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ - __entry->orig_cpu = task_cpu(p); - __entry->dest_cpu = dest_cpu; - ), -@@ -206,7 +207,7 @@ DECLARE_EVENT_CLASS(sched_process_templa - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; -- __entry->prio = p->prio; -+ __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ - ), - - TP_printk("comm=%s pid=%d prio=%d", -@@ -253,7 +254,7 @@ TRACE_EVENT(sched_process_wait, - TP_fast_assign( - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - __entry->pid = pid_nr(pid); -- __entry->prio = current->prio; -+ __entry->prio = current->prio; /* XXX SCHED_DEADLINE */ - ), - - TP_printk("comm=%s pid=%d prio=%d", -@@ -413,9 +414,9 @@ DEFINE_EVENT(sched_stat_runtime, sched_s - */ - TRACE_EVENT(sched_pi_setprio, - -- TP_PROTO(struct task_struct *tsk, int newprio), -+ TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task), - -- TP_ARGS(tsk, newprio), -+ TP_ARGS(tsk, pi_task), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) -@@ -428,7 +429,8 @@ TRACE_EVENT(sched_pi_setprio, - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->oldprio = tsk->prio; -- __entry->newprio = newprio; -+ __entry->newprio = pi_task ? pi_task->prio : tsk->prio; -+ /* XXX SCHED_DEADLINE bits missing */ - ), - - TP_printk("comm=%s pid=%d oldprio=%d newprio=%d", ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -3751,7 +3751,7 @@ void rt_mutex_setprio(struct task_struct - goto out_unlock; - } - -- trace_sched_pi_setprio(p, prio); /* broken */ -+ trace_sched_pi_setprio(p, pi_task); - oldprio = p->prio; - - if (oldprio == prio) diff --git a/debian/patches/features/all/rt/0006-sparc-sysfs-Replace-racy-task-affinity-logic.patch b/debian/patches/features/all/rt/0006-sparc-sysfs-Replace-racy-task-affinity-logic.patch deleted file mode 100644 index e4dc32c6b..000000000 --- a/debian/patches/features/all/rt/0006-sparc-sysfs-Replace-racy-task-affinity-logic.patch +++ /dev/null @@ -1,118 +0,0 @@ -From: Thomas Gleixner -Date: Thu, 13 Apr 2017 10:17:07 +0200 -Subject: [PATCH 06/13] sparc/sysfs: Replace racy task affinity logic -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -The mmustat_enable sysfs file accessor functions must run code on the -target CPU. This is achieved by temporarily setting the affinity of the -calling user space thread to the requested CPU and reset it to the original -affinity afterwards. - -That's racy vs. concurrent affinity settings for that thread resulting in -code executing on the wrong CPU and overwriting the new affinity setting. - -Replace it by using work_on_cpu() which guarantees to run the code on the -requested CPU. - -Protection against CPU hotplug is not required as the open sysfs file -already prevents the removal from the CPU offline callback. Using the -hotplug protected version would actually be wrong because it would deadlock -against a CPU hotplug operation of the CPU associated to the sysfs file in -progress. - -Signed-off-by: Thomas Gleixner -Acked-by: David S. Miller -Cc: fenghua.yu@intel.com -Cc: tony.luck@intel.com -Cc: herbert@gondor.apana.org.au -Cc: rjw@rjwysocki.net -Cc: peterz@infradead.org -Cc: benh@kernel.crashing.org -Cc: bigeasy@linutronix.de -Cc: jiangshanlai@gmail.com -Cc: sparclinux@vger.kernel.org -Cc: viresh.kumar@linaro.org -Cc: mpe@ellerman.id.au -Cc: tj@kernel.org -Cc: lenb@kernel.org -Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1704131001270.2408@nanos -Signed-off-by: Thomas Gleixner ---- - arch/sparc/kernel/sysfs.c | 39 +++++++++++---------------------------- - 1 file changed, 11 insertions(+), 28 deletions(-) - ---- a/arch/sparc/kernel/sysfs.c -+++ b/arch/sparc/kernel/sysfs.c -@@ -98,27 +98,7 @@ static struct attribute_group mmu_stat_g - .name = "mmu_stats", - }; - --/* XXX convert to rusty's on_one_cpu */ --static unsigned long run_on_cpu(unsigned long cpu, -- unsigned long (*func)(unsigned long), -- unsigned long arg) --{ -- cpumask_t old_affinity; -- unsigned long ret; -- -- cpumask_copy(&old_affinity, ¤t->cpus_allowed); -- /* should return -EINVAL to userspace */ -- if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) -- return 0; -- -- ret = func(arg); -- -- set_cpus_allowed_ptr(current, &old_affinity); -- -- return ret; --} -- --static unsigned long read_mmustat_enable(unsigned long junk) -+static long read_mmustat_enable(void *data __maybe_unused) - { - unsigned long ra = 0; - -@@ -127,11 +107,11 @@ static unsigned long read_mmustat_enable - return ra != 0; - } - --static unsigned long write_mmustat_enable(unsigned long val) -+static long write_mmustat_enable(void *data) - { -- unsigned long ra, orig_ra; -+ unsigned long ra, orig_ra, *val = data; - -- if (val) -+ if (*val) - ra = __pa(&per_cpu(mmu_stats, smp_processor_id())); - else - ra = 0UL; -@@ -142,7 +122,8 @@ static unsigned long write_mmustat_enabl - static ssize_t show_mmustat_enable(struct device *s, - struct device_attribute *attr, char *buf) - { -- unsigned long val = run_on_cpu(s->id, read_mmustat_enable, 0); -+ long val = work_on_cpu(s->id, read_mmustat_enable, NULL); -+ - return sprintf(buf, "%lx\n", val); - } - -@@ -150,13 +131,15 @@ static ssize_t store_mmustat_enable(stru - struct device_attribute *attr, const char *buf, - size_t count) - { -- unsigned long val, err; -- int ret = sscanf(buf, "%lu", &val); -+ unsigned long val; -+ long err; -+ int ret; - -+ ret = sscanf(buf, "%lu", &val); - if (ret != 1) - return -EINVAL; - -- err = run_on_cpu(s->id, write_mmustat_enable, val); -+ err = work_on_cpu(s->id, write_mmustat_enable, &val); - if (err) - return -EIO; - diff --git a/debian/patches/features/all/rt/0009-tracing-Make-traceprobe-parsing-code-reusable.patch b/debian/patches/features/all/rt/0006-tracing-Make-traceprobe-parsing-code-reusable.patch similarity index 93% rename from debian/patches/features/all/rt/0009-tracing-Make-traceprobe-parsing-code-reusable.patch rename to debian/patches/features/all/rt/0006-tracing-Make-traceprobe-parsing-code-reusable.patch index 8c6b851dc..a9fb914e3 100644 --- a/debian/patches/features/all/rt/0009-tracing-Make-traceprobe-parsing-code-reusable.patch +++ b/debian/patches/features/all/rt/0006-tracing-Make-traceprobe-parsing-code-reusable.patch @@ -1,7 +1,7 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:10 -0500 -Subject: [PATCH 09/32] tracing: Make traceprobe parsing code reusable -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Date: Fri, 22 Sep 2017 14:58:20 -0500 +Subject: [PATCH 06/42] tracing: Make traceprobe parsing code reusable +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz traceprobe_probes_write() and traceprobe_command() actually contain nothing that ties them to kprobes - the code is generically useful for @@ -25,7 +25,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c -@@ -7907,6 +7907,92 @@ void ftrace_dump(enum ftrace_dump_mode o +@@ -8281,6 +8281,92 @@ void ftrace_dump(enum ftrace_dump_mode o } EXPORT_SYMBOL_GPL(ftrace_dump); @@ -120,7 +120,7 @@ Signed-off-by: Sebastian Andrzej Siewior int ring_buf_size; --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h -@@ -1650,6 +1650,13 @@ void trace_printk_start_comm(void); +@@ -1755,6 +1755,13 @@ void trace_printk_start_comm(void); int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set); int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled); @@ -136,7 +136,7 @@ Signed-off-by: Sebastian Andrzej Siewior * to do the manipulation, as well as saves the print formats --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c -@@ -878,8 +878,8 @@ static int probes_open(struct inode *ino +@@ -907,8 +907,8 @@ static int probes_open(struct inode *ino static ssize_t probes_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { @@ -147,7 +147,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static const struct file_operations kprobe_events_ops = { -@@ -1404,9 +1404,9 @@ static __init int kprobe_trace_self_test +@@ -1433,9 +1433,9 @@ static __init int kprobe_trace_self_test pr_info("Testing kprobe tracing: "); @@ -160,7 +160,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (WARN_ON_ONCE(ret)) { pr_warn("error on probing function entry.\n"); warn++; -@@ -1426,8 +1426,8 @@ static __init int kprobe_trace_self_test +@@ -1455,8 +1455,8 @@ static __init int kprobe_trace_self_test } } @@ -171,7 +171,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (WARN_ON_ONCE(ret)) { pr_warn("error on probing function return.\n"); warn++; -@@ -1497,13 +1497,13 @@ static __init int kprobe_trace_self_test +@@ -1526,13 +1526,13 @@ static __init int kprobe_trace_self_test disable_trace_kprobe(tk, file); } diff --git a/debian/patches/features/all/rt/0007-ACPI-Adjust-system_state-check.patch b/debian/patches/features/all/rt/0007-ACPI-Adjust-system_state-check.patch deleted file mode 100644 index f7a9cf940..000000000 --- a/debian/patches/features/all/rt/0007-ACPI-Adjust-system_state-check.patch +++ /dev/null @@ -1,38 +0,0 @@ -From: Thomas Gleixner -Date: Tue, 16 May 2017 20:42:38 +0200 -Subject: [PATCH 07/17] ACPI: Adjust system_state check -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -To enable smp_processor_id() and might_sleep() debug checks earlier, it's -required to add system states between SYSTEM_BOOTING and SYSTEM_RUNNING. - -Make the decision whether a pci root is hotplugged depend on SYSTEM_RUNNING -instead of !SYSTEM_BOOTING. It makes no sense to cover states greater than -SYSTEM_RUNNING as there are not hotplug events on reboot and poweroff. - -Tested-by: Mark Rutland -Signed-off-by: Thomas Gleixner -Signed-off-by: Peter Zijlstra (Intel) -Reviewed-by: Steven Rostedt (VMware) -Cc: Greg Kroah-Hartman -Cc: Len Brown -Cc: Linus Torvalds -Cc: Peter Zijlstra -Cc: Rafael J. Wysocki -Link: http://lkml.kernel.org/r/20170516184735.446455652@linutronix.de -Signed-off-by: Ingo Molnar ---- - drivers/acpi/pci_root.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/drivers/acpi/pci_root.c -+++ b/drivers/acpi/pci_root.c -@@ -523,7 +523,7 @@ static int acpi_pci_root_add(struct acpi - struct acpi_pci_root *root; - acpi_handle handle = device->handle; - int no_aspm = 0; -- bool hotadd = system_state != SYSTEM_BOOTING; -+ bool hotadd = system_state == SYSTEM_RUNNING; - - root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL); - if (!root) diff --git a/debian/patches/features/all/rt/0007-ACPI-processor-Fix-error-handling-in-__acpi_processo.patch b/debian/patches/features/all/rt/0007-ACPI-processor-Fix-error-handling-in-__acpi_processo.patch deleted file mode 100644 index cb48597d9..000000000 --- a/debian/patches/features/all/rt/0007-ACPI-processor-Fix-error-handling-in-__acpi_processo.patch +++ /dev/null @@ -1,45 +0,0 @@ -From: Thomas Gleixner -Date: Wed, 12 Apr 2017 22:07:33 +0200 -Subject: [PATCH 07/13] ACPI/processor: Fix error handling in - __acpi_processor_start() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -When acpi_install_notify_handler() fails the cooling device stays -registered and the sysfs files created via acpi_pss_perf_init() are -leaked and the function returns success. - -Undo acpi_pss_perf_init() and return a proper error code. - -Signed-off-by: Thomas Gleixner -Cc: Fenghua Yu -Cc: Tony Luck -Cc: Herbert Xu -Cc: "Rafael J. Wysocki" -Cc: Peter Zijlstra -Cc: Benjamin Herrenschmidt -Cc: Sebastian Siewior -Cc: Lai Jiangshan -Cc: linux-acpi@vger.kernel.org -Cc: Viresh Kumar -Cc: Michael Ellerman -Cc: Tejun Heo -Cc: "David S. Miller" -Cc: Len Brown -Link: http://lkml.kernel.org/r/20170412201042.695499645@linutronix.de -Signed-off-by: Thomas Gleixner ---- - drivers/acpi/processor_driver.c | 3 +++ - 1 file changed, 3 insertions(+) - ---- a/drivers/acpi/processor_driver.c -+++ b/drivers/acpi/processor_driver.c -@@ -251,6 +251,9 @@ static int __acpi_processor_start(struct - if (ACPI_SUCCESS(status)) - return 0; - -+ result = -ENODEV; -+ acpi_pss_perf_exit(pr, device); -+ - err_power_exit: - acpi_processor_power_exit(pr); - return result; diff --git a/debian/patches/features/all/rt/0007-futex-Rework-inconsistent-rt_mutex-futex_q-state.patch b/debian/patches/features/all/rt/0007-futex-Rework-inconsistent-rt_mutex-futex_q-state.patch deleted file mode 100644 index 0b945ce0e..000000000 --- a/debian/patches/features/all/rt/0007-futex-Rework-inconsistent-rt_mutex-futex_q-state.patch +++ /dev/null @@ -1,140 +0,0 @@ -From: Peter Zijlstra -Date: Wed, 22 Mar 2017 11:35:54 +0100 -Subject: [PATCH] futex: Rework inconsistent rt_mutex/futex_q state -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Upstream commit 73d786bd043ebc855f349c81ea805f6b11cbf2aa - -There is a weird state in the futex_unlock_pi() path when it interleaves -with a concurrent futex_lock_pi() at the point where it drops hb->lock. - -In this case, it can happen that the rt_mutex wait_list and the futex_q -disagree on pending waiters, in particular rt_mutex will find no pending -waiters where futex_q thinks there are. In this case the rt_mutex unlock -code cannot assign an owner. - -The futex side fixup code has to cleanup the inconsistencies with quite a -bunch of interesting corner cases. - -Simplify all this by changing wake_futex_pi() to return -EAGAIN when this -situation occurs. This then gives the futex_lock_pi() code the opportunity -to continue and the retried futex_unlock_pi() will now observe a coherent -state. - -The only problem is that this breaks RT timeliness guarantees. That -is, consider the following scenario: - - T1 and T2 are both pinned to CPU0. prio(T2) > prio(T1) - - CPU0 - - T1 - lock_pi() - queue_me() <- Waiter is visible - - preemption - - T2 - unlock_pi() - loops with -EAGAIN forever - -Which is undesirable for PI primitives. Future patches will rectify -this. - -Signed-off-by: Peter Zijlstra (Intel) -Cc: juri.lelli@arm.com -Cc: bigeasy@linutronix.de -Cc: xlpang@redhat.com -Cc: rostedt@goodmis.org -Cc: mathieu.desnoyers@efficios.com -Cc: jdesfossez@efficios.com -Cc: dvhart@infradead.org -Cc: bristot@redhat.com -Link: http://lkml.kernel.org/r/20170322104151.850383690@infradead.org -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/futex.c | 50 ++++++++++++++------------------------------------ - 1 file changed, 14 insertions(+), 36 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1404,12 +1404,19 @@ static int wake_futex_pi(u32 __user *uad - new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); - - /* -- * It is possible that the next waiter (the one that brought -- * top_waiter owner to the kernel) timed out and is no longer -- * waiting on the lock. -+ * When we interleave with futex_lock_pi() where it does -+ * rt_mutex_timed_futex_lock(), we might observe @this futex_q waiter, -+ * but the rt_mutex's wait_list can be empty (either still, or again, -+ * depending on which side we land). -+ * -+ * When this happens, give up our locks and try again, giving the -+ * futex_lock_pi() instance time to complete, either by waiting on the -+ * rtmutex or removing itself from the futex queue. - */ -- if (!new_owner) -- new_owner = top_waiter->task; -+ if (!new_owner) { -+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); -+ return -EAGAIN; -+ } - - /* - * We pass it to the next owner. The WAITERS bit is always -@@ -2332,7 +2339,6 @@ static long futex_wait_restart(struct re - */ - static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) - { -- struct task_struct *owner; - int ret = 0; - - if (locked) { -@@ -2346,43 +2352,15 @@ static int fixup_owner(u32 __user *uaddr - } - - /* -- * Catch the rare case, where the lock was released when we were on the -- * way back before we locked the hash bucket. -- */ -- if (q->pi_state->owner == current) { -- /* -- * Try to get the rt_mutex now. This might fail as some other -- * task acquired the rt_mutex after we removed ourself from the -- * rt_mutex waiters list. -- */ -- if (rt_mutex_futex_trylock(&q->pi_state->pi_mutex)) { -- locked = 1; -- goto out; -- } -- -- /* -- * pi_state is incorrect, some other task did a lock steal and -- * we returned due to timeout or signal without taking the -- * rt_mutex. Too late. -- */ -- raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock); -- owner = rt_mutex_owner(&q->pi_state->pi_mutex); -- if (!owner) -- owner = rt_mutex_next_owner(&q->pi_state->pi_mutex); -- raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock); -- ret = fixup_pi_state_owner(uaddr, q, owner); -- goto out; -- } -- -- /* - * Paranoia check. If we did not take the lock, then we should not be - * the owner of the rt_mutex. - */ -- if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) -+ if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) { - printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " - "pi-state %p\n", ret, - q->pi_state->pi_mutex.owner, - q->pi_state->owner); -+ } - - out: - return ret ? ret : locked; diff --git a/debian/patches/features/all/rt/0007-hrtimer-Cleanup-hrtimer_mode-enum.patch b/debian/patches/features/all/rt/0007-hrtimer-Cleanup-hrtimer_mode-enum.patch new file mode 100644 index 000000000..4182a36bb --- /dev/null +++ b/debian/patches/features/all/rt/0007-hrtimer-Cleanup-hrtimer_mode-enum.patch @@ -0,0 +1,46 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:39:44 +0200 +Subject: [PATCH 07/36] hrtimer: Cleanup hrtimer_mode enum +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +It's not obvious that the HRTIMER_MODE variants are bit combinations +because all modes are hard coded constants. + +Change it so the bit meanings are clear and use the symbols for creating +modes which combine bits. + +While at it get rid of the ugly tail comments. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/hrtimer.h | 16 +++++++++++----- + 1 file changed, 11 insertions(+), 5 deletions(-) + +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -28,13 +28,19 @@ struct hrtimer_cpu_base; + + /* + * Mode arguments of xxx_hrtimer functions: ++ * ++ * HRTIMER_MODE_ABS - Time value is absolute ++ * HRTIMER_MODE_REL - Time value is relative to now ++ * HRTIMER_MODE_PINNED - Timer is bound to CPU (is only considered ++ * when starting the timer) + */ + enum hrtimer_mode { +- HRTIMER_MODE_ABS = 0x0, /* Time value is absolute */ +- HRTIMER_MODE_REL = 0x1, /* Time value is relative to now */ +- HRTIMER_MODE_PINNED = 0x02, /* Timer is bound to CPU */ +- HRTIMER_MODE_ABS_PINNED = 0x02, +- HRTIMER_MODE_REL_PINNED = 0x03, ++ HRTIMER_MODE_ABS = 0x00, ++ HRTIMER_MODE_REL = 0x01, ++ HRTIMER_MODE_PINNED = 0x02, ++ ++ HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, ++ HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, + }; + + /* diff --git a/debian/patches/features/all/rt/0007-rtmutex-Fix-PI-chain-order-integrity.patch b/debian/patches/features/all/rt/0007-rtmutex-Fix-PI-chain-order-integrity.patch deleted file mode 100644 index 3e26fb373..000000000 --- a/debian/patches/features/all/rt/0007-rtmutex-Fix-PI-chain-order-integrity.patch +++ /dev/null @@ -1,120 +0,0 @@ -From: Peter Zijlstra -Date: Thu, 23 Mar 2017 15:56:13 +0100 -Subject: [PATCH 7/9] rtmutex: Fix PI chain order integrity -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -rt_mutex_waiter::prio is a copy of task_struct::prio which is updated -during the PI chain walk, such that the PI chain order isn't messed up -by (asynchronous) task state updates. - -Currently rt_mutex_waiter_less() uses task state for deadline tasks; -this is broken, since the task state can, as said above, change -asynchronously, causing the RB tree order to change without actual -tree update -> FAIL. - -Fix this by also copying the deadline into the rt_mutex_waiter state -and updating it along with its prio field. - -Ideally we would also force PI chain updates whenever DL tasks update -their deadline parameter, but for first approximation this is less -broken than it was. - -Signed-off-by: Peter Zijlstra (Intel) -Cc: juri.lelli@arm.com -Cc: bigeasy@linutronix.de -Cc: xlpang@redhat.com -Cc: rostedt@goodmis.org -Cc: mathieu.desnoyers@efficios.com -Cc: jdesfossez@efficios.com -Cc: bristot@redhat.com -Link: http://lkml.kernel.org/r/20170323150216.403992539@infradead.org -Signed-off-by: Thomas Gleixner ---- - kernel/locking/rtmutex.c | 29 +++++++++++++++++++++++++++-- - kernel/locking/rtmutex_common.h | 1 + - 2 files changed, 28 insertions(+), 2 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -238,8 +238,7 @@ rt_mutex_waiter_less(struct rt_mutex_wai - * then right waiter has a dl_prio() too. - */ - if (dl_prio(left->prio)) -- return dl_time_before(left->task->dl.deadline, -- right->task->dl.deadline); -+ return dl_time_before(left->deadline, right->deadline); - - return 0; - } -@@ -650,7 +649,26 @@ static int rt_mutex_adjust_prio_chain(st - - /* [7] Requeue the waiter in the lock waiter tree. */ - rt_mutex_dequeue(lock, waiter); -+ -+ /* -+ * Update the waiter prio fields now that we're dequeued. -+ * -+ * These values can have changed through either: -+ * -+ * sys_sched_set_scheduler() / sys_sched_setattr() -+ * -+ * or -+ * -+ * DL CBS enforcement advancing the effective deadline. -+ * -+ * Even though pi_waiters also uses these fields, and that tree is only -+ * updated in [11], we can do this here, since we hold [L], which -+ * serializes all pi_waiters access and rb_erase() does not care about -+ * the values of the node being removed. -+ */ - waiter->prio = task->prio; -+ waiter->deadline = task->dl.deadline; -+ - rt_mutex_enqueue(lock, waiter); - - /* [8] Release the task */ -@@ -777,6 +795,8 @@ static int rt_mutex_adjust_prio_chain(st - static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, - struct rt_mutex_waiter *waiter) - { -+ lockdep_assert_held(&lock->wait_lock); -+ - /* - * Before testing whether we can acquire @lock, we set the - * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all -@@ -902,6 +922,8 @@ static int task_blocks_on_rt_mutex(struc - struct rt_mutex *next_lock; - int chain_walk = 0, res; - -+ lockdep_assert_held(&lock->wait_lock); -+ - /* - * Early deadlock detection. We really don't want the task to - * enqueue on itself just to untangle the mess later. It's not -@@ -919,6 +941,7 @@ static int task_blocks_on_rt_mutex(struc - waiter->task = task; - waiter->lock = lock; - waiter->prio = task->prio; -+ waiter->deadline = task->dl.deadline; - - /* Get the top priority waiter on the lock */ - if (rt_mutex_has_waiters(lock)) -@@ -1036,6 +1059,8 @@ static void remove_waiter(struct rt_mute - struct task_struct *owner = rt_mutex_owner(lock); - struct rt_mutex *next_lock; - -+ lockdep_assert_held(&lock->wait_lock); -+ - raw_spin_lock(¤t->pi_lock); - rt_mutex_dequeue(lock, waiter); - current->pi_blocked_on = NULL; ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -34,6 +34,7 @@ struct rt_mutex_waiter { - struct rt_mutex *deadlock_lock; - #endif - int prio; -+ u64 deadline; - }; - - /* diff --git a/debian/patches/features/all/rt/0007-tracing-Clean-up-hist_field_flags-enum.patch b/debian/patches/features/all/rt/0007-tracing-Clean-up-hist_field_flags-enum.patch new file mode 100644 index 000000000..8c78a6867 --- /dev/null +++ b/debian/patches/features/all/rt/0007-tracing-Clean-up-hist_field_flags-enum.patch @@ -0,0 +1,44 @@ +From: Tom Zanussi +Date: Fri, 22 Sep 2017 14:58:21 -0500 +Subject: [PATCH 07/42] tracing: Clean up hist_field_flags enum +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +As we add more flags, specifying explicit integers for the flag values +becomes more unwieldy and error-prone - switch them over to left-shift +values. + +Signed-off-by: Tom Zanussi +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -110,16 +110,16 @@ DEFINE_HIST_FIELD_FN(u8); + #define HIST_KEY_SIZE_MAX (MAX_FILTER_STR_VAL + HIST_STACKTRACE_SIZE) + + enum hist_field_flags { +- HIST_FIELD_FL_HITCOUNT = 1, +- HIST_FIELD_FL_KEY = 2, +- HIST_FIELD_FL_STRING = 4, +- HIST_FIELD_FL_HEX = 8, +- HIST_FIELD_FL_SYM = 16, +- HIST_FIELD_FL_SYM_OFFSET = 32, +- HIST_FIELD_FL_EXECNAME = 64, +- HIST_FIELD_FL_SYSCALL = 128, +- HIST_FIELD_FL_STACKTRACE = 256, +- HIST_FIELD_FL_LOG2 = 512, ++ HIST_FIELD_FL_HITCOUNT = 1 << 0, ++ HIST_FIELD_FL_KEY = 1 << 1, ++ HIST_FIELD_FL_STRING = 1 << 2, ++ HIST_FIELD_FL_HEX = 1 << 3, ++ HIST_FIELD_FL_SYM = 1 << 4, ++ HIST_FIELD_FL_SYM_OFFSET = 1 << 5, ++ HIST_FIELD_FL_EXECNAME = 1 << 6, ++ HIST_FIELD_FL_SYSCALL = 1 << 7, ++ HIST_FIELD_FL_STACKTRACE = 1 << 8, ++ HIST_FIELD_FL_LOG2 = 1 << 9, + }; + + struct hist_trigger_attrs { diff --git a/debian/patches/features/all/rt/0008-ACPI-processor-Replace-racy-task-affinity-logic.patch b/debian/patches/features/all/rt/0008-ACPI-processor-Replace-racy-task-affinity-logic.patch deleted file mode 100644 index 30c1a96b0..000000000 --- a/debian/patches/features/all/rt/0008-ACPI-processor-Replace-racy-task-affinity-logic.patch +++ /dev/null @@ -1,193 +0,0 @@ -From: Thomas Gleixner -Date: Wed, 12 Apr 2017 22:07:34 +0200 -Subject: [PATCH 08/13] ACPI/processor: Replace racy task affinity logic -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -acpi_processor_get_throttling() requires to invoke the getter function on -the target CPU. This is achieved by temporarily setting the affinity of the -calling user space thread to the requested CPU and reset it to the original -affinity afterwards. - -That's racy vs. CPU hotplug and concurrent affinity settings for that -thread resulting in code executing on the wrong CPU and overwriting the -new affinity setting. - -acpi_processor_get_throttling() is invoked in two ways: - -1) The CPU online callback, which is already running on the target CPU and - obviously protected against hotplug and not affected by affinity - settings. - -2) The ACPI driver probe function, which is not protected against hotplug - during modprobe. - -Switch it over to work_on_cpu() and protect the probe function against CPU -hotplug. - -Signed-off-by: Thomas Gleixner -Cc: Fenghua Yu -Cc: Tony Luck -Cc: Herbert Xu -Cc: "Rafael J. Wysocki" -Cc: Peter Zijlstra -Cc: Benjamin Herrenschmidt -Cc: Sebastian Siewior -Cc: Lai Jiangshan -Cc: linux-acpi@vger.kernel.org -Cc: Viresh Kumar -Cc: Michael Ellerman -Cc: Tejun Heo -Cc: "David S. Miller" -Cc: Len Brown -Link: http://lkml.kernel.org/r/20170412201042.785920903@linutronix.de -Signed-off-by: Thomas Gleixner ---- - drivers/acpi/processor_driver.c | 7 +++- - drivers/acpi/processor_throttling.c | 62 ++++++++++++++++++++---------------- - 2 files changed, 42 insertions(+), 27 deletions(-) - ---- a/drivers/acpi/processor_driver.c -+++ b/drivers/acpi/processor_driver.c -@@ -262,11 +262,16 @@ static int __acpi_processor_start(struct - static int acpi_processor_start(struct device *dev) - { - struct acpi_device *device = ACPI_COMPANION(dev); -+ int ret; - - if (!device) - return -ENODEV; - -- return __acpi_processor_start(device); -+ /* Protect against concurrent CPU hotplug operations */ -+ get_online_cpus(); -+ ret = __acpi_processor_start(device); -+ put_online_cpus(); -+ return ret; - } - - static int acpi_processor_stop(struct device *dev) ---- a/drivers/acpi/processor_throttling.c -+++ b/drivers/acpi/processor_throttling.c -@@ -62,8 +62,8 @@ struct acpi_processor_throttling_arg { - #define THROTTLING_POSTCHANGE (2) - - static int acpi_processor_get_throttling(struct acpi_processor *pr); --int acpi_processor_set_throttling(struct acpi_processor *pr, -- int state, bool force); -+static int __acpi_processor_set_throttling(struct acpi_processor *pr, -+ int state, bool force, bool direct); - - static int acpi_processor_update_tsd_coord(void) - { -@@ -891,7 +891,8 @@ static int acpi_processor_get_throttling - ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "Invalid throttling state, reset\n")); - state = 0; -- ret = acpi_processor_set_throttling(pr, state, true); -+ ret = __acpi_processor_set_throttling(pr, state, true, -+ true); - if (ret) - return ret; - } -@@ -901,36 +902,31 @@ static int acpi_processor_get_throttling - return 0; - } - --static int acpi_processor_get_throttling(struct acpi_processor *pr) -+static long __acpi_processor_get_throttling(void *data) - { -- cpumask_var_t saved_mask; -- int ret; -+ struct acpi_processor *pr = data; -+ -+ return pr->throttling.acpi_processor_get_throttling(pr); -+} - -+static int acpi_processor_get_throttling(struct acpi_processor *pr) -+{ - if (!pr) - return -EINVAL; - - if (!pr->flags.throttling) - return -ENODEV; - -- if (!alloc_cpumask_var(&saved_mask, GFP_KERNEL)) -- return -ENOMEM; -- - /* -- * Migrate task to the cpu pointed by pr. -+ * This is either called from the CPU hotplug callback of -+ * processor_driver or via the ACPI probe function. In the latter -+ * case the CPU is not guaranteed to be online. Both call sites are -+ * protected against CPU hotplug. - */ -- cpumask_copy(saved_mask, ¤t->cpus_allowed); -- /* FIXME: use work_on_cpu() */ -- if (set_cpus_allowed_ptr(current, cpumask_of(pr->id))) { -- /* Can't migrate to the target pr->id CPU. Exit */ -- free_cpumask_var(saved_mask); -+ if (!cpu_online(pr->id)) - return -ENODEV; -- } -- ret = pr->throttling.acpi_processor_get_throttling(pr); -- /* restore the previous state */ -- set_cpus_allowed_ptr(current, saved_mask); -- free_cpumask_var(saved_mask); - -- return ret; -+ return work_on_cpu(pr->id, __acpi_processor_get_throttling, pr); - } - - static int acpi_processor_get_fadt_info(struct acpi_processor *pr) -@@ -1080,8 +1076,15 @@ static long acpi_processor_throttling_fn - arg->target_state, arg->force); - } - --int acpi_processor_set_throttling(struct acpi_processor *pr, -- int state, bool force) -+static int call_on_cpu(int cpu, long (*fn)(void *), void *arg, bool direct) -+{ -+ if (direct) -+ return fn(arg); -+ return work_on_cpu(cpu, fn, arg); -+} -+ -+static int __acpi_processor_set_throttling(struct acpi_processor *pr, -+ int state, bool force, bool direct) - { - int ret = 0; - unsigned int i; -@@ -1130,7 +1133,8 @@ int acpi_processor_set_throttling(struct - arg.pr = pr; - arg.target_state = state; - arg.force = force; -- ret = work_on_cpu(pr->id, acpi_processor_throttling_fn, &arg); -+ ret = call_on_cpu(pr->id, acpi_processor_throttling_fn, &arg, -+ direct); - } else { - /* - * When the T-state coordination is SW_ALL or HW_ALL, -@@ -1163,8 +1167,8 @@ int acpi_processor_set_throttling(struct - arg.pr = match_pr; - arg.target_state = state; - arg.force = force; -- ret = work_on_cpu(pr->id, acpi_processor_throttling_fn, -- &arg); -+ ret = call_on_cpu(pr->id, acpi_processor_throttling_fn, -+ &arg, direct); - } - } - /* -@@ -1182,6 +1186,12 @@ int acpi_processor_set_throttling(struct - return ret; - } - -+int acpi_processor_set_throttling(struct acpi_processor *pr, int state, -+ bool force) -+{ -+ return __acpi_processor_set_throttling(pr, state, force, false); -+} -+ - int acpi_processor_get_throttling_info(struct acpi_processor *pr) - { - int result = 0; diff --git a/debian/patches/features/all/rt/0008-futex-Pull-rt_mutex_futex_unlock-out-from-under-hb-l.patch b/debian/patches/features/all/rt/0008-futex-Pull-rt_mutex_futex_unlock-out-from-under-hb-l.patch deleted file mode 100644 index 1a808f971..000000000 --- a/debian/patches/features/all/rt/0008-futex-Pull-rt_mutex_futex_unlock-out-from-under-hb-l.patch +++ /dev/null @@ -1,358 +0,0 @@ -From: Peter Zijlstra -Date: Wed, 22 Mar 2017 11:35:55 +0100 -Subject: [PATCH] futex: Pull rt_mutex_futex_unlock() out from under hb->lock -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Upstream commit 16ffa12d742534d4ff73e8b3a4e81c1de39196f0 - -There's a number of 'interesting' problems, all caused by holding -hb->lock while doing the rt_mutex_unlock() equivalient. - -Notably: - - - a PI inversion on hb->lock; and, - - - a SCHED_DEADLINE crash because of pointer instability. - -The previous changes: - - - changed the locking rules to cover {uval,pi_state} with wait_lock. - - - allow to do rt_mutex_futex_unlock() without dropping wait_lock; which in - turn allows to rely on wait_lock atomicity completely. - - - simplified the waiter conundrum. - -It's now sufficient to hold rtmutex::wait_lock and a reference on the -pi_state to protect the state consistency, so hb->lock can be dropped -before calling rt_mutex_futex_unlock(). - -Signed-off-by: Peter Zijlstra (Intel) -Cc: juri.lelli@arm.com -Cc: bigeasy@linutronix.de -Cc: xlpang@redhat.com -Cc: rostedt@goodmis.org -Cc: mathieu.desnoyers@efficios.com -Cc: jdesfossez@efficios.com -Cc: dvhart@infradead.org -Cc: bristot@redhat.com -Link: http://lkml.kernel.org/r/20170322104151.900002056@infradead.org -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/futex.c | 154 +++++++++++++++++++++++++++++++++++++-------------------- - 1 file changed, 100 insertions(+), 54 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -921,10 +921,12 @@ void exit_pi_state_list(struct task_stru - pi_state->owner = NULL; - raw_spin_unlock_irq(&curr->pi_lock); - -- rt_mutex_futex_unlock(&pi_state->pi_mutex); -- -+ get_pi_state(pi_state); - spin_unlock(&hb->lock); - -+ rt_mutex_futex_unlock(&pi_state->pi_mutex); -+ put_pi_state(pi_state); -+ - raw_spin_lock_irq(&curr->pi_lock); - } - raw_spin_unlock_irq(&curr->pi_lock); -@@ -1037,6 +1039,11 @@ static int attach_to_pi_state(u32 __user - * has dropped the hb->lock in between queue_me() and unqueue_me_pi(), - * which in turn means that futex_lock_pi() still has a reference on - * our pi_state. -+ * -+ * The waiter holding a reference on @pi_state also protects against -+ * the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi() -+ * and futex_wait_requeue_pi() as it cannot go to 0 and consequently -+ * free pi_state before we can take a reference ourselves. - */ - WARN_ON(!atomic_read(&pi_state->refcount)); - -@@ -1380,48 +1387,40 @@ static void mark_wake_futex(struct wake_ - smp_store_release(&q->lock_ptr, NULL); - } - --static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *top_waiter, -- struct futex_hash_bucket *hb) -+/* -+ * Caller must hold a reference on @pi_state. -+ */ -+static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state) - { -- struct task_struct *new_owner; -- struct futex_pi_state *pi_state = top_waiter->pi_state; - u32 uninitialized_var(curval), newval; -+ struct task_struct *new_owner; -+ bool deboost = false; - DEFINE_WAKE_Q(wake_q); -- bool deboost; - int ret = 0; - -- if (!pi_state) -- return -EINVAL; -- -- /* -- * If current does not own the pi_state then the futex is -- * inconsistent and user space fiddled with the futex value. -- */ -- if (pi_state->owner != current) -- return -EINVAL; -- - raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); - new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); -- -- /* -- * When we interleave with futex_lock_pi() where it does -- * rt_mutex_timed_futex_lock(), we might observe @this futex_q waiter, -- * but the rt_mutex's wait_list can be empty (either still, or again, -- * depending on which side we land). -- * -- * When this happens, give up our locks and try again, giving the -- * futex_lock_pi() instance time to complete, either by waiting on the -- * rtmutex or removing itself from the futex queue. -- */ - if (!new_owner) { -- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); -- return -EAGAIN; -+ /* -+ * Since we held neither hb->lock nor wait_lock when coming -+ * into this function, we could have raced with futex_lock_pi() -+ * such that we might observe @this futex_q waiter, but the -+ * rt_mutex's wait_list can be empty (either still, or again, -+ * depending on which side we land). -+ * -+ * When this happens, give up our locks and try again, giving -+ * the futex_lock_pi() instance time to complete, either by -+ * waiting on the rtmutex or removing itself from the futex -+ * queue. -+ */ -+ ret = -EAGAIN; -+ goto out_unlock; - } - - /* -- * We pass it to the next owner. The WAITERS bit is always -- * kept enabled while there is PI state around. We cleanup the -- * owner died bit, because we are the owner. -+ * We pass it to the next owner. The WAITERS bit is always kept -+ * enabled while there is PI state around. We cleanup the owner -+ * died bit, because we are the owner. - */ - newval = FUTEX_WAITERS | task_pid_vnr(new_owner); - -@@ -1444,10 +1443,8 @@ static int wake_futex_pi(u32 __user *uad - ret = -EINVAL; - } - -- if (ret) { -- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); -- return ret; -- } -+ if (ret) -+ goto out_unlock; - - raw_spin_lock(&pi_state->owner->pi_lock); - WARN_ON(list_empty(&pi_state->list)); -@@ -1465,15 +1462,15 @@ static int wake_futex_pi(u32 __user *uad - */ - deboost = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); - -+out_unlock: - raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); -- spin_unlock(&hb->lock); - - if (deboost) { - wake_up_q(&wake_q); - rt_mutex_adjust_prio(current); - } - -- return 0; -+ return ret; - } - - /* -@@ -2232,7 +2229,8 @@ static int fixup_pi_state_owner(u32 __us - /* - * We are here either because we stole the rtmutex from the - * previous highest priority waiter or we are the highest priority -- * waiter but failed to get the rtmutex the first time. -+ * waiter but have failed to get the rtmutex the first time. -+ * - * We have to replace the newowner TID in the user space variable. - * This must be atomic as we have to preserve the owner died bit here. - * -@@ -2249,7 +2247,7 @@ static int fixup_pi_state_owner(u32 __us - if (get_futex_value_locked(&uval, uaddr)) - goto handle_fault; - -- while (1) { -+ for (;;) { - newval = (uval & FUTEX_OWNER_DIED) | newtid; - - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) -@@ -2345,6 +2343,10 @@ static int fixup_owner(u32 __user *uaddr - /* - * Got the lock. We might not be the anticipated owner if we - * did a lock-steal - fix up the PI-state in that case: -+ * -+ * We can safely read pi_state->owner without holding wait_lock -+ * because we now own the rt_mutex, only the owner will attempt -+ * to change it. - */ - if (q->pi_state->owner != current) - ret = fixup_pi_state_owner(uaddr, q, current); -@@ -2584,6 +2586,7 @@ static int futex_lock_pi(u32 __user *uad - ktime_t *time, int trylock) - { - struct hrtimer_sleeper timeout, *to = NULL; -+ struct futex_pi_state *pi_state = NULL; - struct futex_hash_bucket *hb; - struct futex_q q = futex_q_init; - int res, ret; -@@ -2670,12 +2673,19 @@ static int futex_lock_pi(u32 __user *uad - * If fixup_owner() faulted and was unable to handle the fault, unlock - * it and return the fault to userspace. - */ -- if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) -- rt_mutex_futex_unlock(&q.pi_state->pi_mutex); -+ if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) { -+ pi_state = q.pi_state; -+ get_pi_state(pi_state); -+ } - - /* Unqueue and drop the lock */ - unqueue_me_pi(&q); - -+ if (pi_state) { -+ rt_mutex_futex_unlock(&pi_state->pi_mutex); -+ put_pi_state(pi_state); -+ } -+ - goto out_put_key; - - out_unlock_put_key: -@@ -2738,10 +2748,36 @@ static int futex_unlock_pi(u32 __user *u - */ - top_waiter = futex_top_waiter(hb, &key); - if (top_waiter) { -- ret = wake_futex_pi(uaddr, uval, top_waiter, hb); -+ struct futex_pi_state *pi_state = top_waiter->pi_state; -+ -+ ret = -EINVAL; -+ if (!pi_state) -+ goto out_unlock; -+ -+ /* -+ * If current does not own the pi_state then the futex is -+ * inconsistent and user space fiddled with the futex value. -+ */ -+ if (pi_state->owner != current) -+ goto out_unlock; -+ -+ /* -+ * Grab a reference on the pi_state and drop hb->lock. -+ * -+ * The reference ensures pi_state lives, dropping the hb->lock -+ * is tricky.. wake_futex_pi() will take rt_mutex::wait_lock to -+ * close the races against futex_lock_pi(), but in case of -+ * _any_ fail we'll abort and retry the whole deal. -+ */ -+ get_pi_state(pi_state); -+ spin_unlock(&hb->lock); -+ -+ ret = wake_futex_pi(uaddr, uval, pi_state); -+ -+ put_pi_state(pi_state); -+ - /* -- * In case of success wake_futex_pi dropped the hash -- * bucket lock. -+ * Success, we're done! No tricky corner cases. - */ - if (!ret) - goto out_putkey; -@@ -2756,7 +2792,6 @@ static int futex_unlock_pi(u32 __user *u - * setting the FUTEX_WAITERS bit. Try again. - */ - if (ret == -EAGAIN) { -- spin_unlock(&hb->lock); - put_futex_key(&key); - goto retry; - } -@@ -2764,7 +2799,7 @@ static int futex_unlock_pi(u32 __user *u - * wake_futex_pi has detected invalid state. Tell user - * space. - */ -- goto out_unlock; -+ goto out_putkey; - } - - /* -@@ -2774,8 +2809,10 @@ static int futex_unlock_pi(u32 __user *u - * preserve the WAITERS bit not the OWNER_DIED one. We are the - * owner. - */ -- if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0)) -+ if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0)) { -+ spin_unlock(&hb->lock); - goto pi_faulted; -+ } - - /* - * If uval has changed, let user space handle it. -@@ -2789,7 +2826,6 @@ static int futex_unlock_pi(u32 __user *u - return ret; - - pi_faulted: -- spin_unlock(&hb->lock); - put_futex_key(&key); - - ret = fault_in_user_writeable(uaddr); -@@ -2893,6 +2929,7 @@ static int futex_wait_requeue_pi(u32 __u - u32 __user *uaddr2) - { - struct hrtimer_sleeper timeout, *to = NULL; -+ struct futex_pi_state *pi_state = NULL; - struct rt_mutex_waiter rt_waiter; - struct futex_hash_bucket *hb; - union futex_key key2 = FUTEX_KEY_INIT; -@@ -2977,8 +3014,10 @@ static int futex_wait_requeue_pi(u32 __u - if (q.pi_state && (q.pi_state->owner != current)) { - spin_lock(q.lock_ptr); - ret = fixup_pi_state_owner(uaddr2, &q, current); -- if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) -- rt_mutex_futex_unlock(&q.pi_state->pi_mutex); -+ if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { -+ pi_state = q.pi_state; -+ get_pi_state(pi_state); -+ } - /* - * Drop the reference to the pi state which - * the requeue_pi() code acquired for us. -@@ -3017,13 +3056,20 @@ static int futex_wait_requeue_pi(u32 __u - * the fault, unlock the rt_mutex and return the fault to - * userspace. - */ -- if (ret && rt_mutex_owner(pi_mutex) == current) -- rt_mutex_futex_unlock(pi_mutex); -+ if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { -+ pi_state = q.pi_state; -+ get_pi_state(pi_state); -+ } - - /* Unqueue and drop the lock. */ - unqueue_me_pi(&q); - } - -+ if (pi_state) { -+ rt_mutex_futex_unlock(&pi_state->pi_mutex); -+ put_pi_state(pi_state); -+ } -+ - if (ret == -EINTR) { - /* - * We've already been requeued, but cannot restart by calling diff --git a/debian/patches/features/all/rt/0008-mm-Adjust-system_state-check.patch b/debian/patches/features/all/rt/0008-mm-Adjust-system_state-check.patch deleted file mode 100644 index 8a3b6cf0b..000000000 --- a/debian/patches/features/all/rt/0008-mm-Adjust-system_state-check.patch +++ /dev/null @@ -1,42 +0,0 @@ -From: Thomas Gleixner -Date: Tue, 16 May 2017 20:42:39 +0200 -Subject: [PATCH 08/17] mm: Adjust system_state check -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -To enable smp_processor_id() and might_sleep() debug checks earlier, it's -required to add system states between SYSTEM_BOOTING and SYSTEM_RUNNING. - -get_nid_for_pfn() checks for system_state == BOOTING to decide whether to -use early_pfn_to_nid() when CONFIG_DEFERRED_STRUCT_PAGE_INIT=y. - -That check is dubious, because the switch to state RUNNING happes way after -page_alloc_init_late() has been invoked. - -Change the check to less than RUNNING state so it covers the new -intermediate states as well. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Peter Zijlstra (Intel) -Acked-by: Greg Kroah-Hartman -Cc: Linus Torvalds -Cc: Mark Rutland -Cc: Mel Gorman -Cc: Peter Zijlstra -Cc: Steven Rostedt -Link: http://lkml.kernel.org/r/20170516184735.528279534@linutronix.de -Signed-off-by: Ingo Molnar ---- - drivers/base/node.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/drivers/base/node.c -+++ b/drivers/base/node.c -@@ -377,7 +377,7 @@ static int __ref get_nid_for_pfn(unsigne - if (!pfn_valid_within(pfn)) - return -1; - #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT -- if (system_state == SYSTEM_BOOTING) -+ if (system_state < SYSTEM_RUNNING) - return early_pfn_to_nid(pfn); - #endif - page = pfn_to_page(pfn); diff --git a/debian/patches/features/all/rt/0008-rtmutex-Fix-more-prio-comparisons.patch b/debian/patches/features/all/rt/0008-rtmutex-Fix-more-prio-comparisons.patch deleted file mode 100644 index fe044ed5c..000000000 --- a/debian/patches/features/all/rt/0008-rtmutex-Fix-more-prio-comparisons.patch +++ /dev/null @@ -1,100 +0,0 @@ -From: Peter Zijlstra -Date: Thu, 23 Mar 2017 15:56:14 +0100 -Subject: [PATCH 8/9] rtmutex: Fix more prio comparisons -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -There was a pure ->prio comparison left in try_to_wake_rt_mutex(), -convert it to use rt_mutex_waiter_less(), noting that greater-or-equal -is not-less (both in kernel priority view). - -This necessitated the introduction of cmp_task() which creates a -pointer to an unnamed stack variable of struct rt_mutex_waiter type to -compare against tasks. - -With this, we can now also create and employ rt_mutex_waiter_equal(). - -Reviewed-and-tested-by: Juri Lelli -Signed-off-by: Peter Zijlstra (Intel) -Reviewed-by: Thomas Gleixner -Cc: juri.lelli@arm.com -Cc: bigeasy@linutronix.de -Cc: xlpang@redhat.com -Cc: rostedt@goodmis.org -Cc: mathieu.desnoyers@efficios.com -Cc: jdesfossez@efficios.com -Cc: bristot@redhat.com -Link: http://lkml.kernel.org/r/20170323150216.455584638@infradead.org -Signed-off-by: Thomas Gleixner ---- - kernel/locking/rtmutex.c | 32 +++++++++++++++++++++++++++++--- - 1 file changed, 29 insertions(+), 3 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -224,6 +224,12 @@ static inline bool unlock_rt_mutex_safe( - } - #endif - -+/* -+ * Only use with rt_mutex_waiter_{less,equal}() -+ */ -+#define task_to_waiter(p) \ -+ &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline } -+ - static inline int - rt_mutex_waiter_less(struct rt_mutex_waiter *left, - struct rt_mutex_waiter *right) -@@ -243,6 +249,25 @@ rt_mutex_waiter_less(struct rt_mutex_wai - return 0; - } - -+static inline int -+rt_mutex_waiter_equal(struct rt_mutex_waiter *left, -+ struct rt_mutex_waiter *right) -+{ -+ if (left->prio != right->prio) -+ return 0; -+ -+ /* -+ * If both waiters have dl_prio(), we check the deadlines of the -+ * associated tasks. -+ * If left waiter has a dl_prio(), and we didn't return 0 above, -+ * then right waiter has a dl_prio() too. -+ */ -+ if (dl_prio(left->prio)) -+ return left->deadline == right->deadline; -+ -+ return 1; -+} -+ - static void - rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) - { -@@ -553,7 +578,7 @@ static int rt_mutex_adjust_prio_chain(st - * enabled we continue, but stop the requeueing in the chain - * walk. - */ -- if (waiter->prio == task->prio && !dl_task(task)) { -+ if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { - if (!detect_deadlock) - goto out_unlock_pi; - else -@@ -856,7 +881,8 @@ static int try_to_take_rt_mutex(struct r - * the top waiter priority (kernel view), - * @task lost. - */ -- if (task->prio >= rt_mutex_top_waiter(lock)->prio) -+ if (!rt_mutex_waiter_less(task_to_waiter(task), -+ rt_mutex_top_waiter(lock))) - return 0; - - /* -@@ -1119,7 +1145,7 @@ void rt_mutex_adjust_pi(struct task_stru - raw_spin_lock_irqsave(&task->pi_lock, flags); - - waiter = task->pi_blocked_on; -- if (!waiter || (waiter->prio == task->prio && !dl_prio(task->prio))) { -+ if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { - raw_spin_unlock_irqrestore(&task->pi_lock, flags); - return; - } diff --git a/debian/patches/features/all/rt/0001-tracing-Add-hist_field_name-accessor.patch b/debian/patches/features/all/rt/0008-tracing-Add-hist_field_name-accessor.patch similarity index 86% rename from debian/patches/features/all/rt/0001-tracing-Add-hist_field_name-accessor.patch rename to debian/patches/features/all/rt/0008-tracing-Add-hist_field_name-accessor.patch index 02fd44864..3c370ac51 100644 --- a/debian/patches/features/all/rt/0001-tracing-Add-hist_field_name-accessor.patch +++ b/debian/patches/features/all/rt/0008-tracing-Add-hist_field_name-accessor.patch @@ -1,7 +1,7 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:02 -0500 -Subject: [PATCH 01/32] tracing: Add hist_field_name() accessor -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Date: Fri, 22 Sep 2017 14:58:22 -0500 +Subject: [PATCH 08/42] tracing: Add hist_field_name() accessor +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz In preparation for hist_fields that won't be strictly based on trace_event_fields, add a new hist_field_name() accessor to allow that @@ -10,8 +10,8 @@ flexibility and update associated users. Signed-off-by: Tom Zanussi Signed-off-by: Sebastian Andrzej Siewior --- - kernel/trace/trace_events_hist.c | 68 ++++++++++++++++++++++++++------------- - 1 file changed, 46 insertions(+), 22 deletions(-) + kernel/trace/trace_events_hist.c | 67 ++++++++++++++++++++++++++------------- + 1 file changed, 45 insertions(+), 22 deletions(-) --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -57,7 +57,7 @@ Signed-off-by: Sebastian Andrzej Siewior sort_key = &hist_data->sort_keys[i]; -@@ -703,8 +721,11 @@ static int create_sort_keys(struct hist_ +@@ -703,8 +721,10 @@ static int create_sort_keys(struct hist_ } for (j = 1; j < hist_data->n_fields; j++) { @@ -65,13 +65,12 @@ Signed-off-by: Sebastian Andrzej Siewior - if (field && (strcmp(field_name, field->name) == 0)) { + hist_field = hist_data->fields[j]; + test_name = hist_field_name(hist_field, 0); -+ if (test_name == NULL) -+ continue; ++ + if (strcmp(field_name, test_name) == 0) { sort_key->field_idx = j; descending = is_descending(field_str); if (descending < 0) { -@@ -952,6 +973,7 @@ hist_trigger_entry_print(struct seq_file +@@ -952,6 +972,7 @@ hist_trigger_entry_print(struct seq_file struct hist_field *key_field; char str[KSYM_SYMBOL_LEN]; bool multiline = false; @@ -79,7 +78,7 @@ Signed-off-by: Sebastian Andrzej Siewior unsigned int i; u64 uval; -@@ -963,26 +985,27 @@ hist_trigger_entry_print(struct seq_file +@@ -963,26 +984,27 @@ hist_trigger_entry_print(struct seq_file if (i > hist_data->n_vals) seq_puts(m, ", "); @@ -115,7 +114,7 @@ Signed-off-by: Sebastian Andrzej Siewior } else if (key_field->flags & HIST_FIELD_FL_SYSCALL) { const char *syscall_name; -@@ -991,8 +1014,8 @@ hist_trigger_entry_print(struct seq_file +@@ -991,8 +1013,8 @@ hist_trigger_entry_print(struct seq_file if (!syscall_name) syscall_name = "unknown_syscall"; @@ -126,7 +125,7 @@ Signed-off-by: Sebastian Andrzej Siewior } else if (key_field->flags & HIST_FIELD_FL_STACKTRACE) { seq_puts(m, "stacktrace:\n"); hist_trigger_stacktrace_print(m, -@@ -1000,15 +1023,14 @@ hist_trigger_entry_print(struct seq_file +@@ -1000,15 +1022,14 @@ hist_trigger_entry_print(struct seq_file HIST_STACKTRACE_DEPTH); multiline = true; } else if (key_field->flags & HIST_FIELD_FL_LOG2) { @@ -145,7 +144,7 @@ Signed-off-by: Sebastian Andrzej Siewior } } -@@ -1021,13 +1043,13 @@ hist_trigger_entry_print(struct seq_file +@@ -1021,13 +1042,13 @@ hist_trigger_entry_print(struct seq_file tracing_map_read_sum(elt, HITCOUNT_IDX)); for (i = 1; i < hist_data->n_vals; i++) { @@ -163,7 +162,7 @@ Signed-off-by: Sebastian Andrzej Siewior tracing_map_read_sum(elt, i)); } } -@@ -1142,7 +1164,9 @@ static const char *get_hist_field_flags( +@@ -1142,7 +1163,9 @@ static const char *get_hist_field_flags( static void hist_field_print(struct seq_file *m, struct hist_field *hist_field) { diff --git a/debian/patches/features/all/rt/0008-tracing-hrtimer-Take-all-clock-bases-and-modes-into-.patch b/debian/patches/features/all/rt/0008-tracing-hrtimer-Take-all-clock-bases-and-modes-into-.patch new file mode 100644 index 000000000..7d30bd54e --- /dev/null +++ b/debian/patches/features/all/rt/0008-tracing-hrtimer-Take-all-clock-bases-and-modes-into-.patch @@ -0,0 +1,56 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:39:45 +0200 +Subject: [PATCH 08/36] tracing: hrtimer: Take all clock bases and modes into + account +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +So far only CLOCK_MONOTONIC and CLOCK_REALTIME were taken into account as +well as HRTIMER_MODE_ABS/REL in hrtimer_init tracepoint. The query for +detecting timer mode ABS or REL is not valid, since the introduction of +HRTIMER_MODE_PINNED. + +HRTIMER_MODE_PINNED is not evaluated in hrtimer_init() call. But for the +sake of completeness print all given modes. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/trace/events/timer.h | 20 ++++++++++++++++---- + 1 file changed, 16 insertions(+), 4 deletions(-) + +--- a/include/trace/events/timer.h ++++ b/include/trace/events/timer.h +@@ -136,6 +136,20 @@ DEFINE_EVENT(timer_class, timer_cancel, + TP_ARGS(timer) + ); + ++#define decode_clockid(type) \ ++ __print_symbolic(type, \ ++ { CLOCK_REALTIME, "CLOCK_REALTIME" }, \ ++ { CLOCK_MONOTONIC, "CLOCK_MONOTONIC" }, \ ++ { CLOCK_BOOTTIME, "CLOCK_BOOTTIME" }, \ ++ { CLOCK_TAI, "CLOCK_TAI" }) ++ ++#define decode_hrtimer_mode(mode) \ ++ __print_symbolic(mode, \ ++ { HRTIMER_MODE_ABS, "ABS" }, \ ++ { HRTIMER_MODE_REL, "REL" }, \ ++ { HRTIMER_MODE_ABS_PINNED, "ABS|PINNED" }, \ ++ { HRTIMER_MODE_REL_PINNED, "REL|PINNED" }) ++ + /** + * hrtimer_init - called when the hrtimer is initialized + * @hrtimer: pointer to struct hrtimer +@@ -162,10 +176,8 @@ TRACE_EVENT(hrtimer_init, + ), + + TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer, +- __entry->clockid == CLOCK_REALTIME ? +- "CLOCK_REALTIME" : "CLOCK_MONOTONIC", +- __entry->mode == HRTIMER_MODE_ABS ? +- "HRTIMER_MODE_ABS" : "HRTIMER_MODE_REL") ++ decode_clockid(__entry->clockid), ++ decode_hrtimer_mode(__entry->mode)) + ); + + /** diff --git a/debian/patches/features/all/rt/0009-cpufreq-ia64-Replace-racy-task-affinity-logic.patch b/debian/patches/features/all/rt/0009-cpufreq-ia64-Replace-racy-task-affinity-logic.patch deleted file mode 100644 index 664eaf97f..000000000 --- a/debian/patches/features/all/rt/0009-cpufreq-ia64-Replace-racy-task-affinity-logic.patch +++ /dev/null @@ -1,209 +0,0 @@ -From: Thomas Gleixner -Date: Wed, 12 Apr 2017 22:55:03 +0200 -Subject: [PATCH 09/13] cpufreq/ia64: Replace racy task affinity logic -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -The get() and target() callbacks must run on the affected cpu. This is -achieved by temporarily setting the affinity of the calling thread to the -requested CPU and reset it to the original affinity afterwards. - -That's racy vs. concurrent affinity settings for that thread resulting in -code executing on the wrong CPU and overwriting the new affinity setting. - -Replace it by work_on_cpu(). All call pathes which invoke the callbacks are -already protected against CPU hotplug. - -Signed-off-by: Thomas Gleixner -Acked-by: Viresh Kumar -Cc: Fenghua Yu -Cc: Tony Luck -Cc: Herbert Xu -Cc: "Rafael J. Wysocki" -Cc: Peter Zijlstra -Cc: Benjamin Herrenschmidt -Cc: Sebastian Siewior -Cc: linux-pm@vger.kernel.org -Cc: Lai Jiangshan -Cc: Michael Ellerman -Cc: Tejun Heo -Cc: "David S. Miller" -Cc: Len Brown -Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1704122231100.2548@nanos -Signed-off-by: Thomas Gleixner ---- - drivers/cpufreq/ia64-acpi-cpufreq.c | 92 +++++++++++++++--------------------- - 1 file changed, 39 insertions(+), 53 deletions(-) - ---- a/drivers/cpufreq/ia64-acpi-cpufreq.c -+++ b/drivers/cpufreq/ia64-acpi-cpufreq.c -@@ -34,6 +34,11 @@ struct cpufreq_acpi_io { - unsigned int resume; - }; - -+struct cpufreq_acpi_req { -+ unsigned int cpu; -+ unsigned int state; -+}; -+ - static struct cpufreq_acpi_io *acpi_io_data[NR_CPUS]; - - static struct cpufreq_driver acpi_cpufreq_driver; -@@ -83,8 +88,7 @@ processor_get_pstate ( - static unsigned - extract_clock ( - struct cpufreq_acpi_io *data, -- unsigned value, -- unsigned int cpu) -+ unsigned value) - { - unsigned long i; - -@@ -98,60 +102,43 @@ extract_clock ( - } - - --static unsigned int -+static long - processor_get_freq ( -- struct cpufreq_acpi_io *data, -- unsigned int cpu) -+ void *arg) - { -- int ret = 0; -- u32 value = 0; -- cpumask_t saved_mask; -- unsigned long clock_freq; -+ struct cpufreq_acpi_req *req = arg; -+ unsigned int cpu = req->cpu; -+ struct cpufreq_acpi_io *data = acpi_io_data[cpu]; -+ u32 value; -+ int ret; - - pr_debug("processor_get_freq\n"); -- -- saved_mask = current->cpus_allowed; -- set_cpus_allowed_ptr(current, cpumask_of(cpu)); - if (smp_processor_id() != cpu) -- goto migrate_end; -+ return -EAGAIN; - - /* processor_get_pstate gets the instantaneous frequency */ - ret = processor_get_pstate(&value); -- - if (ret) { -- set_cpus_allowed_ptr(current, &saved_mask); - pr_warn("get performance failed with error %d\n", ret); -- ret = 0; -- goto migrate_end; -+ return ret; - } -- clock_freq = extract_clock(data, value, cpu); -- ret = (clock_freq*1000); -- --migrate_end: -- set_cpus_allowed_ptr(current, &saved_mask); -- return ret; -+ return 1000 * extract_clock(data, value); - } - - --static int -+static long - processor_set_freq ( -- struct cpufreq_acpi_io *data, -- struct cpufreq_policy *policy, -- int state) -+ void *arg) - { -- int ret = 0; -- u32 value = 0; -- cpumask_t saved_mask; -- int retval; -+ struct cpufreq_acpi_req *req = arg; -+ unsigned int cpu = req->cpu; -+ struct cpufreq_acpi_io *data = acpi_io_data[cpu]; -+ int ret, state = req->state; -+ u32 value; - - pr_debug("processor_set_freq\n"); -- -- saved_mask = current->cpus_allowed; -- set_cpus_allowed_ptr(current, cpumask_of(policy->cpu)); -- if (smp_processor_id() != policy->cpu) { -- retval = -EAGAIN; -- goto migrate_end; -- } -+ if (smp_processor_id() != cpu) -+ return -EAGAIN; - - if (state == data->acpi_data.state) { - if (unlikely(data->resume)) { -@@ -159,8 +146,7 @@ processor_set_freq ( - data->resume = 0; - } else { - pr_debug("Already at target state (P%d)\n", state); -- retval = 0; -- goto migrate_end; -+ return 0; - } - } - -@@ -171,7 +157,6 @@ processor_set_freq ( - * First we write the target state's 'control' value to the - * control_register. - */ -- - value = (u32) data->acpi_data.states[state].control; - - pr_debug("Transitioning to state: 0x%08x\n", value); -@@ -179,17 +164,11 @@ processor_set_freq ( - ret = processor_set_pstate(value); - if (ret) { - pr_warn("Transition failed with error %d\n", ret); -- retval = -ENODEV; -- goto migrate_end; -+ return -ENODEV; - } - - data->acpi_data.state = state; -- -- retval = 0; -- --migrate_end: -- set_cpus_allowed_ptr(current, &saved_mask); -- return (retval); -+ return 0; - } - - -@@ -197,11 +176,13 @@ static unsigned int - acpi_cpufreq_get ( - unsigned int cpu) - { -- struct cpufreq_acpi_io *data = acpi_io_data[cpu]; -+ struct cpufreq_acpi_req req; -+ long ret; - -- pr_debug("acpi_cpufreq_get\n"); -+ req.cpu = cpu; -+ ret = work_on_cpu(cpu, processor_get_freq, &req); - -- return processor_get_freq(data, cpu); -+ return ret > 0 ? (unsigned int) ret : 0; - } - - -@@ -210,7 +191,12 @@ acpi_cpufreq_target ( - struct cpufreq_policy *policy, - unsigned int index) - { -- return processor_set_freq(acpi_io_data[policy->cpu], policy, index); -+ struct cpufreq_acpi_req req; -+ -+ req.cpu = policy->cpu; -+ req.state = index; -+ -+ return work_on_cpu(req.cpu, processor_set_freq, &req); - } - - static int diff --git a/debian/patches/features/all/rt/0009-cpufreq-pasemi-Adjust-system_state-check.patch b/debian/patches/features/all/rt/0009-cpufreq-pasemi-Adjust-system_state-check.patch deleted file mode 100644 index 62c1a2d99..000000000 --- a/debian/patches/features/all/rt/0009-cpufreq-pasemi-Adjust-system_state-check.patch +++ /dev/null @@ -1,38 +0,0 @@ -From: Thomas Gleixner -Date: Tue, 16 May 2017 20:42:40 +0200 -Subject: [PATCH 09/17] cpufreq/pasemi: Adjust system_state check -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -To enable smp_processor_id() and might_sleep() debug checks earlier, it's -required to add system states between SYSTEM_BOOTING and SYSTEM_RUNNING. - -Adjust the system_state check in pas_cpufreq_cpu_exit() to handle the extra -states. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Peter Zijlstra (Intel) -Acked-by: Viresh Kumar -Cc: Greg Kroah-Hartman -Cc: Linus Torvalds -Cc: Mark Rutland -Cc: Peter Zijlstra -Cc: Rafael J. Wysocki -Cc: Steven Rostedt -Cc: linuxppc-dev@lists.ozlabs.org -Link: http://lkml.kernel.org/r/20170516184735.620023128@linutronix.de -Signed-off-by: Ingo Molnar ---- - drivers/cpufreq/pasemi-cpufreq.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/drivers/cpufreq/pasemi-cpufreq.c -+++ b/drivers/cpufreq/pasemi-cpufreq.c -@@ -226,7 +226,7 @@ static int pas_cpufreq_cpu_exit(struct c - * We don't support CPU hotplug. Don't unmap after the system - * has already made it to a running state. - */ -- if (system_state != SYSTEM_BOOTING) -+ if (system_state >= SYSTEM_RUNNING) - return 0; - - if (sdcasr_mapbase) diff --git a/debian/patches/features/all/rt/0009-futex-rt_mutex-Introduce-rt_mutex_init_waiter.patch b/debian/patches/features/all/rt/0009-futex-rt_mutex-Introduce-rt_mutex_init_waiter.patch deleted file mode 100644 index 31f153a04..000000000 --- a/debian/patches/features/all/rt/0009-futex-rt_mutex-Introduce-rt_mutex_init_waiter.patch +++ /dev/null @@ -1,80 +0,0 @@ -From: Peter Zijlstra -Date: Wed, 22 Mar 2017 11:35:56 +0100 -Subject: [PATCH] futex,rt_mutex: Introduce rt_mutex_init_waiter() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Upstream commit 50809358dd7199aa7ce232f6877dd09ec30ef374 - -Since there's already two copies of this code, introduce a helper now -before adding a third one. - -Signed-off-by: Peter Zijlstra (Intel) -Cc: juri.lelli@arm.com -Cc: bigeasy@linutronix.de -Cc: xlpang@redhat.com -Cc: rostedt@goodmis.org -Cc: mathieu.desnoyers@efficios.com -Cc: jdesfossez@efficios.com -Cc: dvhart@infradead.org -Cc: bristot@redhat.com -Link: http://lkml.kernel.org/r/20170322104151.950039479@infradead.org -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/futex.c | 5 +---- - kernel/locking/rtmutex.c | 12 +++++++++--- - kernel/locking/rtmutex_common.h | 1 + - 3 files changed, 11 insertions(+), 7 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -2956,10 +2956,7 @@ static int futex_wait_requeue_pi(u32 __u - * The waiter is allocated on our stack, manipulated by the requeue - * code while we sleep on uaddr. - */ -- debug_rt_mutex_init_waiter(&rt_waiter); -- RB_CLEAR_NODE(&rt_waiter.pi_tree_entry); -- RB_CLEAR_NODE(&rt_waiter.tree_entry); -- rt_waiter.task = NULL; -+ rt_mutex_init_waiter(&rt_waiter); - - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); - if (unlikely(ret != 0)) ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1153,6 +1153,14 @@ void rt_mutex_adjust_pi(struct task_stru - next_lock, NULL, task); - } - -+void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) -+{ -+ debug_rt_mutex_init_waiter(waiter); -+ RB_CLEAR_NODE(&waiter->pi_tree_entry); -+ RB_CLEAR_NODE(&waiter->tree_entry); -+ waiter->task = NULL; -+} -+ - /** - * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop - * @lock: the rt_mutex to take -@@ -1235,9 +1243,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, - unsigned long flags; - int ret = 0; - -- debug_rt_mutex_init_waiter(&waiter); -- RB_CLEAR_NODE(&waiter.pi_tree_entry); -- RB_CLEAR_NODE(&waiter.tree_entry); -+ rt_mutex_init_waiter(&waiter); - - /* - * Technically we could use raw_spin_[un]lock_irq() here, but this can ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -103,6 +103,7 @@ extern void rt_mutex_init_proxy_locked(s - struct task_struct *proxy_owner); - extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, - struct task_struct *proxy_owner); -+extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); - extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, - struct rt_mutex_waiter *waiter, - struct task_struct *task); diff --git a/debian/patches/features/all/rt/0009-rtmutex-Plug-preempt-count-leak-in-rt_mutex_futex_un.patch b/debian/patches/features/all/rt/0009-rtmutex-Plug-preempt-count-leak-in-rt_mutex_futex_un.patch deleted file mode 100644 index b8c4efaa2..000000000 --- a/debian/patches/features/all/rt/0009-rtmutex-Plug-preempt-count-leak-in-rt_mutex_futex_un.patch +++ /dev/null @@ -1,41 +0,0 @@ -From: Mike Galbraith -Date: Wed, 5 Apr 2017 10:08:27 +0200 -Subject: [PATCH 9/9] rtmutex: Plug preempt count leak in - rt_mutex_futex_unlock() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -mark_wakeup_next_waiter() already disables preemption, doing so again -leaves us with an unpaired preempt_disable(). - -Fixes: 2a1c60299406 ("rtmutex: Deboost before waking up the top waiter") -Signed-off-by: Mike Galbraith -Cc: Peter Zijlstra -Cc: xlpang@redhat.com -Cc: rostedt@goodmis.org -Link: http://lkml.kernel.org/r/1491379707.6538.2.camel@gmx.de -Signed-off-by: Thomas Gleixner ---- - kernel/locking/rtmutex.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1581,13 +1581,13 @@ bool __sched __rt_mutex_futex_unlock(str - return false; /* done */ - } - -- mark_wakeup_next_waiter(wake_q, lock); - /* -- * We've already deboosted, retain preempt_disabled when dropping -- * the wait_lock to avoid inversion until the wakeup. Matched -- * by rt_mutex_postunlock(); -+ * We've already deboosted, mark_wakeup_next_waiter() will -+ * retain preempt_disabled when we drop the wait_lock, to -+ * avoid inversion prior to the wakeup. preempt_disable() -+ * therein pairs with rt_mutex_postunlock(). - */ -- preempt_disable(); -+ mark_wakeup_next_waiter(wake_q, lock); - - return true; /* call postunlock() */ - } diff --git a/debian/patches/features/all/rt/0002-tracing-Reimplement-log2.patch b/debian/patches/features/all/rt/0009-tracing-Reimplement-log2.patch similarity index 93% rename from debian/patches/features/all/rt/0002-tracing-Reimplement-log2.patch rename to debian/patches/features/all/rt/0009-tracing-Reimplement-log2.patch index c61f1b28a..abceebb6a 100644 --- a/debian/patches/features/all/rt/0002-tracing-Reimplement-log2.patch +++ b/debian/patches/features/all/rt/0009-tracing-Reimplement-log2.patch @@ -1,7 +1,7 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:03 -0500 -Subject: [PATCH 02/32] tracing: Reimplement log2 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Date: Fri, 22 Sep 2017 14:58:23 -0500 +Subject: [PATCH 09/42] tracing: Reimplement log2 +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz log2 as currently implemented applies only to u64 trace_event_field derived fields, and assumes that anything it's applied to is a u64 @@ -79,7 +79,7 @@ Signed-off-by: Sebastian Andrzej Siewior + return; + + for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) -+ destroy_hist_field(hist_field->operands[i], ++level); ++ destroy_hist_field(hist_field->operands[i], level + 1); + kfree(hist_field); } diff --git a/debian/patches/features/all/rt/0009-tracing-hrtimer-Print-hrtimer-mode-in-hrtimer_start-.patch b/debian/patches/features/all/rt/0009-tracing-hrtimer-Print-hrtimer-mode-in-hrtimer_start-.patch new file mode 100644 index 000000000..1a5d61f94 --- /dev/null +++ b/debian/patches/features/all/rt/0009-tracing-hrtimer-Print-hrtimer-mode-in-hrtimer_start-.patch @@ -0,0 +1,114 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:39:46 +0200 +Subject: [PATCH 09/36] tracing: hrtimer: Print hrtimer mode in hrtimer_start + tracepoint +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The hrtimer_start tracepoint lacks the mode information. The mode is +important because consecutive starts can switch from ABS to REL or from +PINNED to non PINNED. + +Add the mode information. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/trace/events/timer.h | 13 ++++++++----- + kernel/time/hrtimer.c | 16 +++++++++------- + 2 files changed, 17 insertions(+), 12 deletions(-) + +--- a/include/trace/events/timer.h ++++ b/include/trace/events/timer.h +@@ -186,15 +186,16 @@ TRACE_EVENT(hrtimer_init, + */ + TRACE_EVENT(hrtimer_start, + +- TP_PROTO(struct hrtimer *hrtimer), ++ TP_PROTO(struct hrtimer *hrtimer, enum hrtimer_mode mode), + +- TP_ARGS(hrtimer), ++ TP_ARGS(hrtimer, mode), + + TP_STRUCT__entry( + __field( void *, hrtimer ) + __field( void *, function ) + __field( s64, expires ) + __field( s64, softexpires ) ++ __field( enum hrtimer_mode, mode ) + ), + + TP_fast_assign( +@@ -202,12 +203,14 @@ TRACE_EVENT(hrtimer_start, + __entry->function = hrtimer->function; + __entry->expires = hrtimer_get_expires(hrtimer); + __entry->softexpires = hrtimer_get_softexpires(hrtimer); ++ __entry->mode = mode; + ), + +- TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu", +- __entry->hrtimer, __entry->function, ++ TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu " ++ "mode=%s", __entry->hrtimer, __entry->function, + (unsigned long long) __entry->expires, +- (unsigned long long) __entry->softexpires) ++ (unsigned long long) __entry->softexpires, ++ decode_hrtimer_mode(__entry->mode)) + ); + + /** +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -435,10 +435,11 @@ debug_init(struct hrtimer *timer, clocki + trace_hrtimer_init(timer, clockid, mode); + } + +-static inline void debug_activate(struct hrtimer *timer) ++static inline void debug_activate(struct hrtimer *timer, ++ enum hrtimer_mode mode) + { + debug_hrtimer_activate(timer); +- trace_hrtimer_start(timer); ++ trace_hrtimer_start(timer, mode); + } + + static inline void debug_deactivate(struct hrtimer *timer) +@@ -830,9 +831,10 @@ EXPORT_SYMBOL_GPL(hrtimer_forward); + * Returns 1 when the new timer is the leftmost timer in the tree. + */ + static int enqueue_hrtimer(struct hrtimer *timer, +- struct hrtimer_clock_base *base) ++ struct hrtimer_clock_base *base, ++ enum hrtimer_mode mode) + { +- debug_activate(timer); ++ debug_activate(timer, mode); + + base->cpu_base->active_bases |= 1 << base->index; + +@@ -955,7 +957,7 @@ void hrtimer_start_range_ns(struct hrtim + /* Switch the timer base, if necessary: */ + new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); + +- leftmost = enqueue_hrtimer(timer, new_base); ++ leftmost = enqueue_hrtimer(timer, new_base, mode); + if (!leftmost) + goto unlock; + +@@ -1224,7 +1226,7 @@ static void __run_hrtimer(struct hrtimer + */ + if (restart != HRTIMER_NORESTART && + !(timer->state & HRTIMER_STATE_ENQUEUED)) +- enqueue_hrtimer(timer, base); ++ enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS); + + /* + * Separate the ->running assignment from the ->state assignment. +@@ -1623,7 +1625,7 @@ static void migrate_hrtimer_list(struct + * sort out already expired timers and reprogram the + * event device. + */ +- enqueue_hrtimer(timer, new_base); ++ enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS); + } + } + diff --git a/debian/patches/features/all/rt/0010-cpufreq-sh-Replace-racy-task-affinity-logic.patch b/debian/patches/features/all/rt/0010-cpufreq-sh-Replace-racy-task-affinity-logic.patch deleted file mode 100644 index 19126ab67..000000000 --- a/debian/patches/features/all/rt/0010-cpufreq-sh-Replace-racy-task-affinity-logic.patch +++ /dev/null @@ -1,120 +0,0 @@ -From: Thomas Gleixner -Date: Wed, 12 Apr 2017 22:07:36 +0200 -Subject: [PATCH 10/13] cpufreq/sh: Replace racy task affinity logic -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -The target() callback must run on the affected cpu. This is achieved by -temporarily setting the affinity of the calling thread to the requested CPU -and reset it to the original affinity afterwards. - -That's racy vs. concurrent affinity settings for that thread resulting in -code executing on the wrong CPU. - -Replace it by work_on_cpu(). All call pathes which invoke the callbacks are -already protected against CPU hotplug. - -Signed-off-by: Thomas Gleixner -Acked-by: Viresh Kumar -Cc: Fenghua Yu -Cc: Tony Luck -Cc: Herbert Xu -Cc: "Rafael J. Wysocki" -Cc: Peter Zijlstra -Cc: Benjamin Herrenschmidt -Cc: Sebastian Siewior -Cc: linux-pm@vger.kernel.org -Cc: Lai Jiangshan -Cc: Michael Ellerman -Cc: Tejun Heo -Cc: "David S. Miller" -Cc: Len Brown -Link: http://lkml.kernel.org/r/20170412201042.958216363@linutronix.de -Signed-off-by: Thomas Gleixner ---- - drivers/cpufreq/sh-cpufreq.c | 45 +++++++++++++++++++++++++------------------ - 1 file changed, 27 insertions(+), 18 deletions(-) - ---- a/drivers/cpufreq/sh-cpufreq.c -+++ b/drivers/cpufreq/sh-cpufreq.c -@@ -30,54 +30,63 @@ - - static DEFINE_PER_CPU(struct clk, sh_cpuclk); - -+struct cpufreq_target { -+ struct cpufreq_policy *policy; -+ unsigned int freq; -+}; -+ - static unsigned int sh_cpufreq_get(unsigned int cpu) - { - return (clk_get_rate(&per_cpu(sh_cpuclk, cpu)) + 500) / 1000; - } - --/* -- * Here we notify other drivers of the proposed change and the final change. -- */ --static int sh_cpufreq_target(struct cpufreq_policy *policy, -- unsigned int target_freq, -- unsigned int relation) -+static long __sh_cpufreq_target(void *arg) - { -- unsigned int cpu = policy->cpu; -+ struct cpufreq_target *target = arg; -+ struct cpufreq_policy *policy = target->policy; -+ int cpu = policy->cpu; - struct clk *cpuclk = &per_cpu(sh_cpuclk, cpu); -- cpumask_t cpus_allowed; - struct cpufreq_freqs freqs; - struct device *dev; - long freq; - -- cpus_allowed = current->cpus_allowed; -- set_cpus_allowed_ptr(current, cpumask_of(cpu)); -- -- BUG_ON(smp_processor_id() != cpu); -+ if (smp_processor_id() != cpu) -+ return -ENODEV; - - dev = get_cpu_device(cpu); - - /* Convert target_freq from kHz to Hz */ -- freq = clk_round_rate(cpuclk, target_freq * 1000); -+ freq = clk_round_rate(cpuclk, target->freq * 1000); - - if (freq < (policy->min * 1000) || freq > (policy->max * 1000)) - return -EINVAL; - -- dev_dbg(dev, "requested frequency %u Hz\n", target_freq * 1000); -+ dev_dbg(dev, "requested frequency %u Hz\n", target->freq * 1000); - - freqs.old = sh_cpufreq_get(cpu); - freqs.new = (freq + 500) / 1000; - freqs.flags = 0; - -- cpufreq_freq_transition_begin(policy, &freqs); -- set_cpus_allowed_ptr(current, &cpus_allowed); -+ cpufreq_freq_transition_begin(target->policy, &freqs); - clk_set_rate(cpuclk, freq); -- cpufreq_freq_transition_end(policy, &freqs, 0); -+ cpufreq_freq_transition_end(target->policy, &freqs, 0); - - dev_dbg(dev, "set frequency %lu Hz\n", freq); -- - return 0; - } - -+/* -+ * Here we notify other drivers of the proposed change and the final change. -+ */ -+static int sh_cpufreq_target(struct cpufreq_policy *policy, -+ unsigned int target_freq, -+ unsigned int relation) -+{ -+ struct cpufreq_target data = { .policy = policy, .freq = target_freq }; -+ -+ return work_on_cpu(policy->cpu, __sh_cpufreq_target, &data); -+} -+ - static int sh_cpufreq_verify(struct cpufreq_policy *policy) - { - struct clk *cpuclk = &per_cpu(sh_cpuclk, policy->cpu); diff --git a/debian/patches/features/all/rt/0010-futex-rt_mutex-Restructure-rt_mutex_finish_proxy_loc.patch b/debian/patches/features/all/rt/0010-futex-rt_mutex-Restructure-rt_mutex_finish_proxy_loc.patch deleted file mode 100644 index 7bcbad22f..000000000 --- a/debian/patches/features/all/rt/0010-futex-rt_mutex-Restructure-rt_mutex_finish_proxy_loc.patch +++ /dev/null @@ -1,159 +0,0 @@ -From: Peter Zijlstra -Date: Wed, 22 Mar 2017 11:35:57 +0100 -Subject: [PATCH] futex,rt_mutex: Restructure rt_mutex_finish_proxy_lock() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Upstream commit 38d589f2fd08f1296aea3ce62bebd185125c6d81 - -With the ultimate goal of keeping rt_mutex wait_list and futex_q waiters -consistent it's necessary to split 'rt_mutex_futex_lock()' into finer -parts, such that only the actual blocking can be done without hb->lock -held. - -Split split_mutex_finish_proxy_lock() into two parts, one that does the -blocking and one that does remove_waiter() when the lock acquire failed. - -When the rtmutex was acquired successfully the waiter can be removed in the -acquisiton path safely, since there is no concurrency on the lock owner. - -This means that, except for futex_lock_pi(), all wait_list modifications -are done with both hb->lock and wait_lock held. - -[bigeasy@linutronix.de: fix for futex_requeue_pi_signal_restart] - -Signed-off-by: Peter Zijlstra (Intel) -Cc: juri.lelli@arm.com -Cc: bigeasy@linutronix.de -Cc: xlpang@redhat.com -Cc: rostedt@goodmis.org -Cc: mathieu.desnoyers@efficios.com -Cc: jdesfossez@efficios.com -Cc: dvhart@infradead.org -Cc: bristot@redhat.com -Link: http://lkml.kernel.org/r/20170322104152.001659630@infradead.org -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/futex.c | 7 +++-- - kernel/locking/rtmutex.c | 52 ++++++++++++++++++++++++++++++++++------ - kernel/locking/rtmutex_common.h | 8 +++--- - 3 files changed, 55 insertions(+), 12 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -3032,10 +3032,13 @@ static int futex_wait_requeue_pi(u32 __u - */ - WARN_ON(!q.pi_state); - pi_mutex = &q.pi_state->pi_mutex; -- ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter); -- debug_rt_mutex_free_waiter(&rt_waiter); -+ ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); - - spin_lock(q.lock_ptr); -+ if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter)) -+ ret = 0; -+ -+ debug_rt_mutex_free_waiter(&rt_waiter); - /* - * Fixup the pi_state owner and possibly acquire the lock if we - * haven't already. ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1753,21 +1753,23 @@ struct task_struct *rt_mutex_next_owner( - } - - /** -- * rt_mutex_finish_proxy_lock() - Complete lock acquisition -+ * rt_mutex_wait_proxy_lock() - Wait for lock acquisition - * @lock: the rt_mutex we were woken on - * @to: the timeout, null if none. hrtimer should already have - * been started. - * @waiter: the pre-initialized rt_mutex_waiter - * -- * Complete the lock acquisition started our behalf by another thread. -+ * Wait for the the lock acquisition started on our behalf by -+ * rt_mutex_start_proxy_lock(). Upon failure, the caller must call -+ * rt_mutex_cleanup_proxy_lock(). - * - * Returns: - * 0 - success - * <0 - error, one of -EINTR, -ETIMEDOUT - * -- * Special API call for PI-futex requeue support -+ * Special API call for PI-futex support - */ --int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, -+int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, - struct hrtimer_sleeper *to, - struct rt_mutex_waiter *waiter) - { -@@ -1780,9 +1782,6 @@ int rt_mutex_finish_proxy_lock(struct rt - /* sleep on the mutex */ - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); - -- if (unlikely(ret)) -- remove_waiter(lock, waiter); -- - /* - * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might - * have to fix that up. -@@ -1793,3 +1792,42 @@ int rt_mutex_finish_proxy_lock(struct rt - - return ret; - } -+ -+/** -+ * rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition -+ * @lock: the rt_mutex we were woken on -+ * @waiter: the pre-initialized rt_mutex_waiter -+ * -+ * Attempt to clean up after a failed rt_mutex_wait_proxy_lock(). -+ * -+ * Unless we acquired the lock; we're still enqueued on the wait-list and can -+ * in fact still be granted ownership until we're removed. Therefore we can -+ * find we are in fact the owner and must disregard the -+ * rt_mutex_wait_proxy_lock() failure. -+ * -+ * Returns: -+ * true - did the cleanup, we done. -+ * false - we acquired the lock after rt_mutex_wait_proxy_lock() returned, -+ * caller should disregards its return value. -+ * -+ * Special API call for PI-futex support -+ */ -+bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, -+ struct rt_mutex_waiter *waiter) -+{ -+ bool cleanup = false; -+ -+ raw_spin_lock_irq(&lock->wait_lock); -+ /* -+ * Unless we're the owner; we're still enqueued on the wait_list. -+ * So check if we became owner, if not, take us off the wait_list. -+ */ -+ if (rt_mutex_owner(lock) != current) { -+ remove_waiter(lock, waiter); -+ fixup_rt_mutex_waiters(lock); -+ cleanup = true; -+ } -+ raw_spin_unlock_irq(&lock->wait_lock); -+ -+ return cleanup; -+} ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -107,9 +107,11 @@ extern void rt_mutex_init_waiter(struct - extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, - struct rt_mutex_waiter *waiter, - struct task_struct *task); --extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, -- struct hrtimer_sleeper *to, -- struct rt_mutex_waiter *waiter); -+extern int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, -+ struct hrtimer_sleeper *to, -+ struct rt_mutex_waiter *waiter); -+extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, -+ struct rt_mutex_waiter *waiter); - - extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to); - extern int rt_mutex_futex_trylock(struct rt_mutex *l); diff --git a/debian/patches/features/all/rt/0010-hrtimer-Switch-for-loop-to-_ffs-evaluation.patch b/debian/patches/features/all/rt/0010-hrtimer-Switch-for-loop-to-_ffs-evaluation.patch new file mode 100644 index 000000000..289027990 --- /dev/null +++ b/debian/patches/features/all/rt/0010-hrtimer-Switch-for-loop-to-_ffs-evaluation.patch @@ -0,0 +1,80 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:39:47 +0200 +Subject: [PATCH 10/36] hrtimer: Switch for loop to _ffs() evaluation +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Looping over all clock bases to find active bits is suboptimal if not all +bases are active. + +Avoid this by converting it to a __ffs() evaluation. The functionallity is +outsourced into an own function and is called via a macro as suggested by +Peter Zijlstra. + +Suggested-by: Peter Zijlstra +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/time/hrtimer.c | 31 +++++++++++++++++++++---------- + 1 file changed, 21 insertions(+), 10 deletions(-) + +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -457,20 +457,34 @@ static inline void hrtimer_update_next_t + #endif + } + ++static struct hrtimer_clock_base * ++__next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) ++{ ++ struct hrtimer_clock_base *base = NULL; ++ ++ if (*active) { ++ unsigned int idx = __ffs(*active); ++ *active &= ~(1U << idx); ++ base = &cpu_base->clock_base[idx]; ++ } ++ ++ return base; ++} ++ ++#define for_each_active_base(base, cpu_base, active) \ ++ while ((base = __next_base((cpu_base), &(active)))) ++ + static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) + { +- struct hrtimer_clock_base *base = cpu_base->clock_base; ++ struct hrtimer_clock_base *base; + unsigned int active = cpu_base->active_bases; + ktime_t expires, expires_next = KTIME_MAX; + + hrtimer_update_next_timer(cpu_base, NULL); +- for (; active; base++, active >>= 1) { ++ for_each_active_base(base, cpu_base, active) { + struct timerqueue_node *next; + struct hrtimer *timer; + +- if (!(active & 0x01)) +- continue; +- + next = timerqueue_getnext(&base->active); + timer = container_of(next, struct hrtimer, node); + expires = ktime_sub(hrtimer_get_expires(timer), base->offset); +@@ -1243,16 +1257,13 @@ static void __run_hrtimer(struct hrtimer + + static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) + { +- struct hrtimer_clock_base *base = cpu_base->clock_base; ++ struct hrtimer_clock_base *base; + unsigned int active = cpu_base->active_bases; + +- for (; active; base++, active >>= 1) { ++ for_each_active_base(base, cpu_base, active) { + struct timerqueue_node *node; + ktime_t basenow; + +- if (!(active & 0x01)) +- continue; +- + basenow = ktime_add(now, base->offset); + + while ((node = timerqueue_getnext(&base->active))) { diff --git a/debian/patches/features/all/rt/0010-iommu-vt-d-Adjust-system_state-checks.patch b/debian/patches/features/all/rt/0010-iommu-vt-d-Adjust-system_state-checks.patch deleted file mode 100644 index 42ef6f122..000000000 --- a/debian/patches/features/all/rt/0010-iommu-vt-d-Adjust-system_state-checks.patch +++ /dev/null @@ -1,47 +0,0 @@ -From: Thomas Gleixner -Date: Tue, 16 May 2017 20:42:41 +0200 -Subject: [PATCH 10/17] iommu/vt-d: Adjust system_state checks -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -To enable smp_processor_id() and might_sleep() debug checks earlier, it's -required to add system states between SYSTEM_BOOTING and SYSTEM_RUNNING. - -Adjust the system_state checks in dmar_parse_one_atsr() and -dmar_iommu_notify_scope_dev() to handle the extra states. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Peter Zijlstra (Intel) -Acked-by: Joerg Roedel -Cc: David Woodhouse -Cc: Greg Kroah-Hartman -Cc: Linus Torvalds -Cc: Mark Rutland -Cc: Peter Zijlstra -Cc: Steven Rostedt -Cc: iommu@lists.linux-foundation.org -Link: http://lkml.kernel.org/r/20170516184735.712365947@linutronix.de -Signed-off-by: Ingo Molnar ---- - drivers/iommu/intel-iommu.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/drivers/iommu/intel-iommu.c -+++ b/drivers/iommu/intel-iommu.c -@@ -4310,7 +4310,7 @@ int dmar_parse_one_atsr(struct acpi_dmar - struct acpi_dmar_atsr *atsr; - struct dmar_atsr_unit *atsru; - -- if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled) -+ if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled) - return 0; - - atsr = container_of(hdr, struct acpi_dmar_atsr, header); -@@ -4560,7 +4560,7 @@ int dmar_iommu_notify_scope_dev(struct d - struct acpi_dmar_atsr *atsr; - struct acpi_dmar_reserved_memory *rmrr; - -- if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING) -+ if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING) - return 0; - - list_for_each_entry(rmrru, &dmar_rmrr_units, list) { diff --git a/debian/patches/features/all/rt/0010-tracing-Add-NO_DISCARD-event-file-flag.patch b/debian/patches/features/all/rt/0010-tracing-Add-NO_DISCARD-event-file-flag.patch deleted file mode 100644 index 55aeedc84..000000000 --- a/debian/patches/features/all/rt/0010-tracing-Add-NO_DISCARD-event-file-flag.patch +++ /dev/null @@ -1,106 +0,0 @@ -From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:11 -0500 -Subject: [PATCH 10/32] tracing: Add NO_DISCARD event file flag -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Whenever an event_command has a post-trigger that needs access to the -event record, the event record can't be discarded, or the post-trigger -will eventually see bogus data. - -In order to allow the discard check to treat this case separately, add -an EVENT_FILE_FL_NO_DISCARD flag to the event file flags, along with -code in the discard check that checks the flag and avoids the discard -when the flag is set. - -Signed-off-by: Tom Zanussi -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/trace_events.h | 3 +++ - kernel/trace/trace.h | 13 ++++++++++--- - kernel/trace/trace_events_trigger.c | 16 +++++++++++++--- - 3 files changed, 26 insertions(+), 6 deletions(-) - ---- a/include/linux/trace_events.h -+++ b/include/linux/trace_events.h -@@ -306,6 +306,7 @@ enum { - EVENT_FILE_FL_TRIGGER_MODE_BIT, - EVENT_FILE_FL_TRIGGER_COND_BIT, - EVENT_FILE_FL_PID_FILTER_BIT, -+ EVENT_FILE_FL_NO_DISCARD_BIT, - }; - - /* -@@ -320,6 +321,7 @@ enum { - * TRIGGER_MODE - When set, invoke the triggers associated with the event - * TRIGGER_COND - When set, one or more triggers has an associated filter - * PID_FILTER - When set, the event is filtered based on pid -+ * NO_DISCARD - When set, do not discard events, something needs them later - */ - enum { - EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT), -@@ -331,6 +333,7 @@ enum { - EVENT_FILE_FL_TRIGGER_MODE = (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT), - EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT), - EVENT_FILE_FL_PID_FILTER = (1 << EVENT_FILE_FL_PID_FILTER_BIT), -+ EVENT_FILE_FL_NO_DISCARD = (1 << EVENT_FILE_FL_NO_DISCARD_BIT), - }; - - struct trace_event_file { ---- a/kernel/trace/trace.h -+++ b/kernel/trace/trace.h -@@ -1191,9 +1191,16 @@ static inline bool - if (eflags & EVENT_FILE_FL_TRIGGER_COND) - *tt = event_triggers_call(file, entry, event); - -- if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || -- (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && -- !filter_match_preds(file->filter, entry))) { -+ if (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && -+ !filter_match_preds(file->filter, entry)) { -+ __trace_event_discard_commit(buffer, event); -+ return true; -+ } -+ -+ if (test_bit(EVENT_FILE_FL_NO_DISCARD_BIT, &file->flags)) -+ return false; -+ -+ if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags)) { - __trace_event_discard_commit(buffer, event); - return true; - } ---- a/kernel/trace/trace_events_trigger.c -+++ b/kernel/trace/trace_events_trigger.c -@@ -505,20 +505,30 @@ clear_event_triggers(struct trace_array - void update_cond_flag(struct trace_event_file *file) - { - struct event_trigger_data *data; -- bool set_cond = false; -+ bool set_cond = false, set_no_discard = false; - - list_for_each_entry_rcu(data, &file->triggers, list) { - if (data->filter || event_command_post_trigger(data->cmd_ops) || -- event_command_needs_rec(data->cmd_ops)) { -+ event_command_needs_rec(data->cmd_ops)) - set_cond = true; -+ -+ if (event_command_post_trigger(data->cmd_ops) && -+ event_command_needs_rec(data->cmd_ops)) -+ set_no_discard = true; -+ -+ if (set_cond && set_no_discard) - break; -- } - } - - if (set_cond) - set_bit(EVENT_FILE_FL_TRIGGER_COND_BIT, &file->flags); - else - clear_bit(EVENT_FILE_FL_TRIGGER_COND_BIT, &file->flags); -+ -+ if (set_no_discard) -+ set_bit(EVENT_FILE_FL_NO_DISCARD_BIT, &file->flags); -+ else -+ clear_bit(EVENT_FILE_FL_NO_DISCARD_BIT, &file->flags); - } - - /** diff --git a/debian/patches/features/all/rt/0010-tracing-Add-support-to-detect-and-avoid-duplicates.patch b/debian/patches/features/all/rt/0010-tracing-Add-support-to-detect-and-avoid-duplicates.patch new file mode 100644 index 000000000..35c142e5a --- /dev/null +++ b/debian/patches/features/all/rt/0010-tracing-Add-support-to-detect-and-avoid-duplicates.patch @@ -0,0 +1,114 @@ +From: Vedang Patel +Date: Fri, 22 Sep 2017 14:59:41 -0500 +Subject: [PATCH 10/42] tracing: Add support to detect and avoid duplicates +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +A duplicate in the tracing_map hash table is when 2 different entries +have the same key and, as a result, the key_hash. This is possible due +to a race condition in the algorithm. This race condition is inherent to +the algorithm and not a bug. This was fine because, until now, we were +only interested in the sum of all the values related to a particular +key (the duplicates are dealt with in tracing_map_sort_entries()). But, +with the inclusion of variables[1], we are interested in individual +values. So, it will not be clear what value to choose when +there are duplicates. So, the duplicates need to be removed. + +The duplicates can occur in the code in the following scenarios: + +- A thread is in the process of adding a new element. It has +successfully executed cmpxchg() and inserted the key. But, it is still +not done acquiring the trace_map_elt struct, populating it and storing +the pointer to the struct in the value field of tracing_map hash table. +If another thread comes in at this time and wants to add an element with +the same key, it will not see the current element and add a new one. + +- There are multiple threads trying to execute cmpxchg at the same time, +one of the threads will succeed and the others will fail. The ones which +fail will go ahead increment 'idx' and add a new element there creating +a duplicate. + +This patch detects and avoids the first condition by asking the thread +which detects the duplicate to loop one more time. There is also a +possibility of infinite loop if the thread which is trying to insert +goes to sleep indefinitely and the one which is trying to insert a new +element detects a duplicate. Which is why, the thread loops for +map_size iterations before returning NULL. + +The second scenario is avoided by preventing the threads which failed +cmpxchg() from incrementing idx. This way, they will loop +around and check if the thread which succeeded in executing cmpxchg() +had the same key. + +[1] http://lkml.kernel.org/r/cover.1498510759.git.tom.zanussi@linux.intel.com + +Signed-off-by: Vedang Patel +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/tracing_map.c | 41 ++++++++++++++++++++++++++++++++++++----- + 1 file changed, 36 insertions(+), 5 deletions(-) + +--- a/kernel/trace/tracing_map.c ++++ b/kernel/trace/tracing_map.c +@@ -414,7 +414,9 @@ static inline struct tracing_map_elt * + __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only) + { + u32 idx, key_hash, test_key; ++ int dup_try = 0; + struct tracing_map_entry *entry; ++ struct tracing_map_elt *val; + + key_hash = jhash(key, map->key_size, 0); + if (key_hash == 0) +@@ -426,11 +428,33 @@ static inline struct tracing_map_elt * + entry = TRACING_MAP_ENTRY(map->map, idx); + test_key = entry->key; + +- if (test_key && test_key == key_hash && entry->val && +- keys_match(key, entry->val->key, map->key_size)) { +- if (!lookup_only) +- atomic64_inc(&map->hits); +- return entry->val; ++ if (test_key && test_key == key_hash) { ++ val = READ_ONCE(entry->val); ++ if (val && ++ keys_match(key, val->key, map->key_size)) { ++ if (!lookup_only) ++ atomic64_inc(&map->hits); ++ return val; ++ } else if (unlikely(!val)) { ++ /* ++ * The key is present. But, val (pointer to elt ++ * struct) is still NULL. which means some other ++ * thread is in the process of inserting an ++ * element. ++ * ++ * On top of that, it's key_hash is same as the ++ * one being inserted right now. So, it's ++ * possible that the element has the same ++ * key as well. ++ */ ++ ++ dup_try++; ++ if (dup_try > map->map_size) { ++ atomic64_inc(&map->drops); ++ break; ++ } ++ continue; ++ } + } + + if (!test_key) { +@@ -452,6 +476,13 @@ static inline struct tracing_map_elt * + atomic64_inc(&map->hits); + + return entry->val; ++ } else { ++ /* ++ * cmpxchg() failed. Loop around once ++ * more to check what key was inserted. ++ */ ++ dup_try++; ++ continue; + } + } + diff --git a/debian/patches/features/all/rt/0011-cpufreq-sparc-us3-Replace-racy-task-affinity-logic.patch b/debian/patches/features/all/rt/0011-cpufreq-sparc-us3-Replace-racy-task-affinity-logic.patch deleted file mode 100644 index b6bc11325..000000000 --- a/debian/patches/features/all/rt/0011-cpufreq-sparc-us3-Replace-racy-task-affinity-logic.patch +++ /dev/null @@ -1,124 +0,0 @@ -From: Thomas Gleixner -Date: Wed, 12 Apr 2017 22:07:37 +0200 -Subject: [PATCH 11/13] cpufreq/sparc-us3: Replace racy task affinity logic -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -The access to the safari config register in the CPU frequency functions -must be executed on the target CPU. This is achieved by temporarily setting -the affinity of the calling user space thread to the requested CPU and -reset it to the original affinity afterwards. - -That's racy vs. CPU hotplug and concurrent affinity settings for that -thread resulting in code executing on the wrong CPU and overwriting the -new affinity setting. - -Replace it by a straight forward smp function call. - -Signed-off-by: Thomas Gleixner -Acked-by: Viresh Kumar -Cc: Fenghua Yu -Cc: Tony Luck -Cc: Herbert Xu -Cc: "Rafael J. Wysocki" -Cc: Peter Zijlstra -Cc: Benjamin Herrenschmidt -Cc: Sebastian Siewior -Cc: linux-pm@vger.kernel.org -Cc: Lai Jiangshan -Cc: Michael Ellerman -Cc: Tejun Heo -Cc: "David S. Miller" -Cc: Len Brown -Link: http://lkml.kernel.org/r/20170412201043.047558840@linutronix.de -Signed-off-by: Thomas Gleixner ---- - drivers/cpufreq/sparc-us3-cpufreq.c | 46 ++++++++++++------------------------ - 1 file changed, 16 insertions(+), 30 deletions(-) - ---- a/drivers/cpufreq/sparc-us3-cpufreq.c -+++ b/drivers/cpufreq/sparc-us3-cpufreq.c -@@ -35,22 +35,28 @@ static struct us3_freq_percpu_info *us3_ - #define SAFARI_CFG_DIV_32 0x0000000080000000UL - #define SAFARI_CFG_DIV_MASK 0x00000000C0000000UL - --static unsigned long read_safari_cfg(void) -+static void read_safari_cfg(void *arg) - { -- unsigned long ret; -+ unsigned long ret, *val = arg; - - __asm__ __volatile__("ldxa [%%g0] %1, %0" - : "=&r" (ret) - : "i" (ASI_SAFARI_CONFIG)); -- return ret; -+ *val = ret; - } - --static void write_safari_cfg(unsigned long val) -+static void update_safari_cfg(void *arg) - { -+ unsigned long reg, *new_bits = arg; -+ -+ read_safari_cfg(®); -+ reg &= ~SAFARI_CFG_DIV_MASK; -+ reg |= *new_bits; -+ - __asm__ __volatile__("stxa %0, [%%g0] %1\n\t" - "membar #Sync" - : /* no outputs */ -- : "r" (val), "i" (ASI_SAFARI_CONFIG) -+ : "r" (reg), "i" (ASI_SAFARI_CONFIG) - : "memory"); - } - -@@ -78,29 +84,17 @@ static unsigned long get_current_freq(un - - static unsigned int us3_freq_get(unsigned int cpu) - { -- cpumask_t cpus_allowed; - unsigned long reg; -- unsigned int ret; -- -- cpumask_copy(&cpus_allowed, ¤t->cpus_allowed); -- set_cpus_allowed_ptr(current, cpumask_of(cpu)); -- -- reg = read_safari_cfg(); -- ret = get_current_freq(cpu, reg); - -- set_cpus_allowed_ptr(current, &cpus_allowed); -- -- return ret; -+ if (smp_call_function_single(cpu, read_safari_cfg, ®, 1)) -+ return 0; -+ return get_current_freq(cpu, reg); - } - - static int us3_freq_target(struct cpufreq_policy *policy, unsigned int index) - { - unsigned int cpu = policy->cpu; -- unsigned long new_bits, new_freq, reg; -- cpumask_t cpus_allowed; -- -- cpumask_copy(&cpus_allowed, ¤t->cpus_allowed); -- set_cpus_allowed_ptr(current, cpumask_of(cpu)); -+ unsigned long new_bits, new_freq; - - new_freq = sparc64_get_clock_tick(cpu) / 1000; - switch (index) { -@@ -121,15 +115,7 @@ static int us3_freq_target(struct cpufre - BUG(); - } - -- reg = read_safari_cfg(); -- -- reg &= ~SAFARI_CFG_DIV_MASK; -- reg |= new_bits; -- write_safari_cfg(reg); -- -- set_cpus_allowed_ptr(current, &cpus_allowed); -- -- return 0; -+ return smp_call_function_single(cpu, update_safari_cfg, &new_bits, 1); - } - - static int __init us3_freq_cpu_init(struct cpufreq_policy *policy) diff --git a/debian/patches/features/all/rt/0011-futex-Rework-futex_lock_pi-to-use-rt_mutex_-_proxy_l.patch b/debian/patches/features/all/rt/0011-futex-Rework-futex_lock_pi-to-use-rt_mutex_-_proxy_l.patch deleted file mode 100644 index dedca6b4a..000000000 --- a/debian/patches/features/all/rt/0011-futex-Rework-futex_lock_pi-to-use-rt_mutex_-_proxy_l.patch +++ /dev/null @@ -1,267 +0,0 @@ -From: Peter Zijlstra -Date: Wed, 22 Mar 2017 11:35:58 +0100 -Subject: [PATCH] futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Upstream commit cfafcd117da0216520568c195cb2f6cd1980c4bb - -By changing futex_lock_pi() to use rt_mutex_*_proxy_lock() all wait_list -modifications are done under both hb->lock and wait_lock. - -This closes the obvious interleave pattern between futex_lock_pi() and -futex_unlock_pi(), but not entirely so. See below: - -Before: - -futex_lock_pi() futex_unlock_pi() - unlock hb->lock - - lock hb->lock - unlock hb->lock - - lock rt_mutex->wait_lock - unlock rt_mutex_wait_lock - -EAGAIN - - lock rt_mutex->wait_lock - list_add - unlock rt_mutex->wait_lock - - schedule() - - lock rt_mutex->wait_lock - list_del - unlock rt_mutex->wait_lock - - - -EAGAIN - - lock hb->lock - - -After: - -futex_lock_pi() futex_unlock_pi() - - lock hb->lock - lock rt_mutex->wait_lock - list_add - unlock rt_mutex->wait_lock - unlock hb->lock - - schedule() - lock hb->lock - unlock hb->lock - lock hb->lock - lock rt_mutex->wait_lock - list_del - unlock rt_mutex->wait_lock - - lock rt_mutex->wait_lock - unlock rt_mutex_wait_lock - -EAGAIN - - unlock hb->lock - - -It does however solve the earlier starvation/live-lock scenario which got -introduced with the -EAGAIN since unlike the before scenario; where the --EAGAIN happens while futex_unlock_pi() doesn't hold any locks; in the -after scenario it happens while futex_unlock_pi() actually holds a lock, -and then it is serialized on that lock. - -Signed-off-by: Peter Zijlstra (Intel) -Cc: juri.lelli@arm.com -Cc: bigeasy@linutronix.de -Cc: xlpang@redhat.com -Cc: rostedt@goodmis.org -Cc: mathieu.desnoyers@efficios.com -Cc: jdesfossez@efficios.com -Cc: dvhart@infradead.org -Cc: bristot@redhat.com -Link: http://lkml.kernel.org/r/20170322104152.062785528@infradead.org -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/futex.c | 77 ++++++++++++++++++++++++++++------------ - kernel/locking/rtmutex.c | 26 +++---------- - kernel/locking/rtmutex_common.h | 1 - 3 files changed, 62 insertions(+), 42 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -2099,20 +2099,7 @@ queue_unlock(struct futex_hash_bucket *h - hb_waiters_dec(hb); - } - --/** -- * queue_me() - Enqueue the futex_q on the futex_hash_bucket -- * @q: The futex_q to enqueue -- * @hb: The destination hash bucket -- * -- * The hb->lock must be held by the caller, and is released here. A call to -- * queue_me() is typically paired with exactly one call to unqueue_me(). The -- * exceptions involve the PI related operations, which may use unqueue_me_pi() -- * or nothing if the unqueue is done as part of the wake process and the unqueue -- * state is implicit in the state of woken task (see futex_wait_requeue_pi() for -- * an example). -- */ --static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) -- __releases(&hb->lock) -+static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb) - { - int prio; - -@@ -2129,6 +2116,24 @@ static inline void queue_me(struct futex - plist_node_init(&q->list, prio); - plist_add(&q->list, &hb->chain); - q->task = current; -+} -+ -+/** -+ * queue_me() - Enqueue the futex_q on the futex_hash_bucket -+ * @q: The futex_q to enqueue -+ * @hb: The destination hash bucket -+ * -+ * The hb->lock must be held by the caller, and is released here. A call to -+ * queue_me() is typically paired with exactly one call to unqueue_me(). The -+ * exceptions involve the PI related operations, which may use unqueue_me_pi() -+ * or nothing if the unqueue is done as part of the wake process and the unqueue -+ * state is implicit in the state of woken task (see futex_wait_requeue_pi() for -+ * an example). -+ */ -+static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) -+ __releases(&hb->lock) -+{ -+ __queue_me(q, hb); - spin_unlock(&hb->lock); - } - -@@ -2587,6 +2592,7 @@ static int futex_lock_pi(u32 __user *uad - { - struct hrtimer_sleeper timeout, *to = NULL; - struct futex_pi_state *pi_state = NULL; -+ struct rt_mutex_waiter rt_waiter; - struct futex_hash_bucket *hb; - struct futex_q q = futex_q_init; - int res, ret; -@@ -2639,25 +2645,52 @@ static int futex_lock_pi(u32 __user *uad - } - } - -+ WARN_ON(!q.pi_state); -+ - /* - * Only actually queue now that the atomic ops are done: - */ -- queue_me(&q, hb); -+ __queue_me(&q, hb); - -- WARN_ON(!q.pi_state); -- /* -- * Block on the PI mutex: -- */ -- if (!trylock) { -- ret = rt_mutex_timed_futex_lock(&q.pi_state->pi_mutex, to); -- } else { -+ if (trylock) { - ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex); - /* Fixup the trylock return value: */ - ret = ret ? 0 : -EWOULDBLOCK; -+ goto no_block; - } - -+ /* -+ * We must add ourselves to the rt_mutex waitlist while holding hb->lock -+ * such that the hb and rt_mutex wait lists match. -+ */ -+ rt_mutex_init_waiter(&rt_waiter); -+ ret = rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); -+ if (ret) { -+ if (ret == 1) -+ ret = 0; -+ -+ goto no_block; -+ } -+ -+ spin_unlock(q.lock_ptr); -+ -+ if (unlikely(to)) -+ hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); -+ -+ ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); -+ - spin_lock(q.lock_ptr); - /* -+ * If we failed to acquire the lock (signal/timeout), we must -+ * first acquire the hb->lock before removing the lock from the -+ * rt_mutex waitqueue, such that we can keep the hb and rt_mutex -+ * wait lists consistent. -+ */ -+ if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter)) -+ ret = 0; -+ -+no_block: -+ /* - * Fixup the pi_state owner and possibly acquire the lock if we - * haven't already. - */ ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1493,19 +1493,6 @@ int __sched rt_mutex_lock_interruptible( - EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); - - /* -- * Futex variant with full deadlock detection. -- * Futex variants must not use the fast-path, see __rt_mutex_futex_unlock(). -- */ --int __sched rt_mutex_timed_futex_lock(struct rt_mutex *lock, -- struct hrtimer_sleeper *timeout) --{ -- might_sleep(); -- -- return rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, -- timeout, RT_MUTEX_FULL_CHAINWALK); --} -- --/* - * Futex variant, must not use fastpath. - */ - int __sched rt_mutex_futex_trylock(struct rt_mutex *lock) -@@ -1782,12 +1769,6 @@ int rt_mutex_wait_proxy_lock(struct rt_m - /* sleep on the mutex */ - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); - -- /* -- * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might -- * have to fix that up. -- */ -- fixup_rt_mutex_waiters(lock); -- - raw_spin_unlock_irq(&lock->wait_lock); - - return ret; -@@ -1827,6 +1808,13 @@ bool rt_mutex_cleanup_proxy_lock(struct - fixup_rt_mutex_waiters(lock); - cleanup = true; - } -+ -+ /* -+ * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might -+ * have to fix that up. -+ */ -+ fixup_rt_mutex_waiters(lock); -+ - raw_spin_unlock_irq(&lock->wait_lock); - - return cleanup; ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -113,7 +113,6 @@ extern int rt_mutex_wait_proxy_lock(stru - extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, - struct rt_mutex_waiter *waiter); - --extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to); - extern int rt_mutex_futex_trylock(struct rt_mutex *l); - - extern void rt_mutex_futex_unlock(struct rt_mutex *lock); diff --git a/debian/patches/features/all/rt/0011-hrtimer-Store-running-timer-in-hrtimer_clock_base.patch b/debian/patches/features/all/rt/0011-hrtimer-Store-running-timer-in-hrtimer_clock_base.patch new file mode 100644 index 000000000..077c4568b --- /dev/null +++ b/debian/patches/features/all/rt/0011-hrtimer-Store-running-timer-in-hrtimer_clock_base.patch @@ -0,0 +1,192 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:39:48 +0200 +Subject: [PATCH 11/36] hrtimer: Store running timer in hrtimer_clock_base +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The pointer to the currently running timer is stored in hrtimer_cpu_base +before the base lock is dropped and the callback is invoked. + +This results in two levels of indirections and the upcoming support for +softirq based hrtimer requires splitting the "running" storage into soft +and hard irq context expiry. + +Storing both in the cpu base would require conditionals in all code paths +accessing that information. + +It's possible to have a per clock base sequence count and running pointer +without changing the semantics of the related mechanisms because the timer +base pointer cannot be changed while a timer is running the callback. + +Unfortunately this makes cpu_clock base larger than 32 bytes on 32bit +kernels. Instead of having huge gaps due to alignment, remove the alignment +and let the compiler pack cpu base for 32bit. The resulting cache access +patterns are fortunately not really different from the current +behaviour. On 64bit kernels the 64byte alignment stays and the behaviour is +unchanged. This was determined by analyzing the resulting layout and +looking at the number of cache lines involved for the frequently used +clocks. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/hrtimer.h | 20 +++++++++----------- + kernel/time/hrtimer.c | 28 +++++++++++++--------------- + 2 files changed, 22 insertions(+), 26 deletions(-) + +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -118,9 +118,9 @@ struct hrtimer_sleeper { + }; + + #ifdef CONFIG_64BIT +-# define HRTIMER_CLOCK_BASE_ALIGN 64 ++# define __hrtimer_clock_base_align ____cacheline_aligned + #else +-# define HRTIMER_CLOCK_BASE_ALIGN 32 ++# define __hrtimer_clock_base_align + #endif + + /** +@@ -129,18 +129,22 @@ struct hrtimer_sleeper { + * @index: clock type index for per_cpu support when moving a + * timer to a base on another cpu. + * @clockid: clock id for per_cpu support ++ * @seq: seqcount around __run_hrtimer ++ * @running: pointer to the currently running hrtimer + * @active: red black tree root node for the active timers + * @get_time: function to retrieve the current time of the clock + * @offset: offset of this clock to the monotonic base + */ + struct hrtimer_clock_base { + struct hrtimer_cpu_base *cpu_base; +- int index; ++ unsigned int index; + clockid_t clockid; ++ seqcount_t seq; ++ struct hrtimer *running; + struct timerqueue_head active; + ktime_t (*get_time)(void); + ktime_t offset; +-} __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN))); ++} __hrtimer_clock_base_align; + + enum hrtimer_base_type { + HRTIMER_BASE_MONOTONIC, +@@ -154,8 +158,6 @@ enum hrtimer_base_type { + * struct hrtimer_cpu_base - the per cpu clock bases + * @lock: lock protecting the base and associated clock bases + * and timers +- * @seq: seqcount around __run_hrtimer +- * @running: pointer to the currently running hrtimer + * @cpu: cpu number + * @active_bases: Bitfield to mark bases with active timers + * @clock_was_set_seq: Sequence counter of clock was set events +@@ -177,8 +179,6 @@ enum hrtimer_base_type { + */ + struct hrtimer_cpu_base { + raw_spinlock_t lock; +- seqcount_t seq; +- struct hrtimer *running; + unsigned int cpu; + unsigned int active_bases; + unsigned int clock_was_set_seq; +@@ -198,8 +198,6 @@ struct hrtimer_cpu_base { + + static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) + { +- BUILD_BUG_ON(sizeof(struct hrtimer_clock_base) > HRTIMER_CLOCK_BASE_ALIGN); +- + timer->node.expires = time; + timer->_softexpires = time; + } +@@ -424,7 +422,7 @@ static inline int hrtimer_is_queued(stru + */ + static inline int hrtimer_callback_running(struct hrtimer *timer) + { +- return timer->base->cpu_base->running == timer; ++ return timer->base->running == timer; + } + + /* Forward a hrtimer so it expires after now: */ +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -70,7 +70,6 @@ + DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = + { + .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), +- .seq = SEQCNT_ZERO(hrtimer_bases.seq), + .clock_base = + { + { +@@ -118,7 +117,6 @@ static const int hrtimer_clock_to_base_t + * timer->base->cpu_base + */ + static struct hrtimer_cpu_base migration_cpu_base = { +- .seq = SEQCNT_ZERO(migration_cpu_base), + .clock_base = { { .cpu_base = &migration_cpu_base, }, }, + }; + +@@ -1150,19 +1148,19 @@ EXPORT_SYMBOL_GPL(hrtimer_init); + */ + bool hrtimer_active(const struct hrtimer *timer) + { +- struct hrtimer_cpu_base *cpu_base; ++ struct hrtimer_clock_base *base; + unsigned int seq; + + do { +- cpu_base = READ_ONCE(timer->base->cpu_base); +- seq = raw_read_seqcount_begin(&cpu_base->seq); ++ base = READ_ONCE(timer->base); ++ seq = raw_read_seqcount_begin(&base->seq); + + if (timer->state != HRTIMER_STATE_INACTIVE || +- cpu_base->running == timer) ++ base->running == timer) + return true; + +- } while (read_seqcount_retry(&cpu_base->seq, seq) || +- cpu_base != READ_ONCE(timer->base->cpu_base)); ++ } while (read_seqcount_retry(&base->seq, seq) || ++ base != READ_ONCE(timer->base)); + + return false; + } +@@ -1196,16 +1194,16 @@ static void __run_hrtimer(struct hrtimer + lockdep_assert_held(&cpu_base->lock); + + debug_deactivate(timer); +- cpu_base->running = timer; ++ base->running = timer; + + /* + * Separate the ->running assignment from the ->state assignment. + * + * As with a regular write barrier, this ensures the read side in +- * hrtimer_active() cannot observe cpu_base->running == NULL && ++ * hrtimer_active() cannot observe base->running == NULL && + * timer->state == INACTIVE. + */ +- raw_write_seqcount_barrier(&cpu_base->seq); ++ raw_write_seqcount_barrier(&base->seq); + + __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); + fn = timer->function; +@@ -1246,13 +1244,13 @@ static void __run_hrtimer(struct hrtimer + * Separate the ->running assignment from the ->state assignment. + * + * As with a regular write barrier, this ensures the read side in +- * hrtimer_active() cannot observe cpu_base->running == NULL && ++ * hrtimer_active() cannot observe base->running.timer == NULL && + * timer->state == INACTIVE. + */ +- raw_write_seqcount_barrier(&cpu_base->seq); ++ raw_write_seqcount_barrier(&base->seq); + +- WARN_ON_ONCE(cpu_base->running != timer); +- cpu_base->running = NULL; ++ WARN_ON_ONCE(base->running != timer); ++ base->running = NULL; + } + + static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) diff --git a/debian/patches/features/all/rt/0011-tracing-Add-post-trigger-flag-to-hist-trigger-comman.patch b/debian/patches/features/all/rt/0011-tracing-Add-post-trigger-flag-to-hist-trigger-comman.patch deleted file mode 100644 index bdcc739ca..000000000 --- a/debian/patches/features/all/rt/0011-tracing-Add-post-trigger-flag-to-hist-trigger-comman.patch +++ /dev/null @@ -1,29 +0,0 @@ -From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:12 -0500 -Subject: [PATCH 11/32] tracing: Add post-trigger flag to hist trigger command -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Add EVENT_CMD_FL_POST_TRIGGER to the hist trigger cmd - it doesn't -affect the hist trigger results, and allows further events such as -synthetic events to be generated from a hist trigger. - -Without this change, generating an event from a hist trigger will -cause the generated event to fail a ring buffer trace_recursive_lock() -check and return without actually logging the event. - -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/trace/trace_events_hist.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/trace/trace_events_hist.c -+++ b/kernel/trace/trace_events_hist.c -@@ -1676,7 +1676,7 @@ static int event_hist_trigger_func(struc - static struct event_command trigger_hist_cmd = { - .name = "hist", - .trigger_type = ETT_EVENT_HIST, -- .flags = EVENT_CMD_FL_NEEDS_REC, -+ .flags = EVENT_CMD_FL_NEEDS_REC | EVENT_CMD_FL_POST_TRIGGER, - .func = event_hist_trigger_func, - .reg = hist_register_trigger, - .unreg = hist_unregister_trigger, diff --git a/debian/patches/features/all/rt/0011-tracing-Remove-code-which-merges-duplicates.patch b/debian/patches/features/all/rt/0011-tracing-Remove-code-which-merges-duplicates.patch new file mode 100644 index 000000000..48f677f79 --- /dev/null +++ b/debian/patches/features/all/rt/0011-tracing-Remove-code-which-merges-duplicates.patch @@ -0,0 +1,188 @@ +From: Vedang Patel +Date: Fri, 22 Sep 2017 14:59:42 -0500 +Subject: [PATCH 11/42] tracing: Remove code which merges duplicates +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +We now have the logic to detect and remove duplicates in the +tracing_map hash table. The code which merges duplicates in the +histogram is redundant now. So, modify this code just to detect +duplicates. The duplication detection code is still kept to ensure +that any rare race condition which might cause duplicates does not go +unnoticed. + +Signed-off-by: Vedang Patel +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 11 ----- + kernel/trace/tracing_map.c | 83 ++------------------------------------- + kernel/trace/tracing_map.h | 7 --- + 3 files changed, 6 insertions(+), 95 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -340,16 +340,6 @@ static int hist_trigger_elt_comm_alloc(s + return 0; + } + +-static void hist_trigger_elt_comm_copy(struct tracing_map_elt *to, +- struct tracing_map_elt *from) +-{ +- char *comm_from = from->private_data; +- char *comm_to = to->private_data; +- +- if (comm_from) +- memcpy(comm_to, comm_from, TASK_COMM_LEN + 1); +-} +- + static void hist_trigger_elt_comm_init(struct tracing_map_elt *elt) + { + char *comm = elt->private_data; +@@ -360,7 +350,6 @@ static void hist_trigger_elt_comm_init(s + + static const struct tracing_map_ops hist_trigger_elt_comm_ops = { + .elt_alloc = hist_trigger_elt_comm_alloc, +- .elt_copy = hist_trigger_elt_comm_copy, + .elt_free = hist_trigger_elt_comm_free, + .elt_init = hist_trigger_elt_comm_init, + }; +--- a/kernel/trace/tracing_map.c ++++ b/kernel/trace/tracing_map.c +@@ -847,67 +847,15 @@ create_sort_entry(void *key, struct trac + return sort_entry; + } + +-static struct tracing_map_elt *copy_elt(struct tracing_map_elt *elt) +-{ +- struct tracing_map_elt *dup_elt; +- unsigned int i; +- +- dup_elt = tracing_map_elt_alloc(elt->map); +- if (IS_ERR(dup_elt)) +- return NULL; +- +- if (elt->map->ops && elt->map->ops->elt_copy) +- elt->map->ops->elt_copy(dup_elt, elt); +- +- dup_elt->private_data = elt->private_data; +- memcpy(dup_elt->key, elt->key, elt->map->key_size); +- +- for (i = 0; i < elt->map->n_fields; i++) { +- atomic64_set(&dup_elt->fields[i].sum, +- atomic64_read(&elt->fields[i].sum)); +- dup_elt->fields[i].cmp_fn = elt->fields[i].cmp_fn; +- } +- +- return dup_elt; +-} +- +-static int merge_dup(struct tracing_map_sort_entry **sort_entries, +- unsigned int target, unsigned int dup) +-{ +- struct tracing_map_elt *target_elt, *elt; +- bool first_dup = (target - dup) == 1; +- int i; +- +- if (first_dup) { +- elt = sort_entries[target]->elt; +- target_elt = copy_elt(elt); +- if (!target_elt) +- return -ENOMEM; +- sort_entries[target]->elt = target_elt; +- sort_entries[target]->elt_copied = true; +- } else +- target_elt = sort_entries[target]->elt; +- +- elt = sort_entries[dup]->elt; +- +- for (i = 0; i < elt->map->n_fields; i++) +- atomic64_add(atomic64_read(&elt->fields[i].sum), +- &target_elt->fields[i].sum); +- +- sort_entries[dup]->dup = true; +- +- return 0; +-} +- +-static int merge_dups(struct tracing_map_sort_entry **sort_entries, ++static void detect_dups(struct tracing_map_sort_entry **sort_entries, + int n_entries, unsigned int key_size) + { + unsigned int dups = 0, total_dups = 0; +- int err, i, j; ++ int i; + void *key; + + if (n_entries < 2) +- return total_dups; ++ return; + + sort(sort_entries, n_entries, sizeof(struct tracing_map_sort_entry *), + (int (*)(const void *, const void *))cmp_entries_dup, NULL); +@@ -916,30 +864,14 @@ static int merge_dups(struct tracing_map + for (i = 1; i < n_entries; i++) { + if (!memcmp(sort_entries[i]->key, key, key_size)) { + dups++; total_dups++; +- err = merge_dup(sort_entries, i - dups, i); +- if (err) +- return err; + continue; + } + key = sort_entries[i]->key; + dups = 0; + } + +- if (!total_dups) +- return total_dups; +- +- for (i = 0, j = 0; i < n_entries; i++) { +- if (!sort_entries[i]->dup) { +- sort_entries[j] = sort_entries[i]; +- if (j++ != i) +- sort_entries[i] = NULL; +- } else { +- destroy_sort_entry(sort_entries[i]); +- sort_entries[i] = NULL; +- } +- } +- +- return total_dups; ++ WARN_ONCE(total_dups > 0, ++ "Duplicates detected: %d\n", total_dups); + } + + static bool is_key(struct tracing_map *map, unsigned int field_idx) +@@ -1065,10 +997,7 @@ int tracing_map_sort_entries(struct trac + return 1; + } + +- ret = merge_dups(entries, n_entries, map->key_size); +- if (ret < 0) +- goto free; +- n_entries -= ret; ++ detect_dups(entries, n_entries, map->key_size); + + if (is_key(map, sort_keys[0].field_idx)) + cmp_entries_fn = cmp_entries_key; +--- a/kernel/trace/tracing_map.h ++++ b/kernel/trace/tracing_map.h +@@ -215,11 +215,6 @@ struct tracing_map { + * Element allocation occurs before tracing begins, when the + * tracing_map_init() call is made by client code. + * +- * @elt_copy: At certain points in the lifetime of an element, it may +- * need to be copied. The copy should include a copy of the +- * client-allocated data, which can be copied into the 'to' +- * element from the 'from' element. +- * + * @elt_free: When a tracing_map_elt is freed, this function is called + * and allows client-allocated per-element data to be freed. + * +@@ -233,8 +228,6 @@ struct tracing_map { + */ + struct tracing_map_ops { + int (*elt_alloc)(struct tracing_map_elt *elt); +- void (*elt_copy)(struct tracing_map_elt *to, +- struct tracing_map_elt *from); + void (*elt_free)(struct tracing_map_elt *elt); + void (*elt_clear)(struct tracing_map_elt *elt); + void (*elt_init)(struct tracing_map_elt *elt); diff --git a/debian/patches/features/all/rt/0012-async-Adjust-system_state-checks.patch b/debian/patches/features/all/rt/0012-async-Adjust-system_state-checks.patch deleted file mode 100644 index 9e74147e8..000000000 --- a/debian/patches/features/all/rt/0012-async-Adjust-system_state-checks.patch +++ /dev/null @@ -1,61 +0,0 @@ -From: Thomas Gleixner -Date: Tue, 16 May 2017 20:42:43 +0200 -Subject: [PATCH 12/17] async: Adjust system_state checks -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -To enable smp_processor_id() and might_sleep() debug checks earlier, it's -required to add system states between SYSTEM_BOOTING and SYSTEM_RUNNING. - -Adjust the system_state check in async_run_entry_fn() and -async_synchronize_cookie_domain() to handle the extra states. - -Tested-by: Mark Rutland -Signed-off-by: Thomas Gleixner -Signed-off-by: Peter Zijlstra (Intel) -Acked-by: Arjan van de Ven -Cc: Greg Kroah-Hartman -Cc: Linus Torvalds -Cc: Peter Zijlstra -Cc: Steven Rostedt -Link: http://lkml.kernel.org/r/20170516184735.865155020@linutronix.de -Signed-off-by: Ingo Molnar ---- - kernel/async.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - ---- a/kernel/async.c -+++ b/kernel/async.c -@@ -114,14 +114,14 @@ static void async_run_entry_fn(struct wo - ktime_t uninitialized_var(calltime), delta, rettime; - - /* 1) run (and print duration) */ -- if (initcall_debug && system_state == SYSTEM_BOOTING) { -+ if (initcall_debug && system_state < SYSTEM_RUNNING) { - pr_debug("calling %lli_%pF @ %i\n", - (long long)entry->cookie, - entry->func, task_pid_nr(current)); - calltime = ktime_get(); - } - entry->func(entry->data, entry->cookie); -- if (initcall_debug && system_state == SYSTEM_BOOTING) { -+ if (initcall_debug && system_state < SYSTEM_RUNNING) { - rettime = ktime_get(); - delta = ktime_sub(rettime, calltime); - pr_debug("initcall %lli_%pF returned 0 after %lld usecs\n", -@@ -284,14 +284,14 @@ void async_synchronize_cookie_domain(asy - { - ktime_t uninitialized_var(starttime), delta, endtime; - -- if (initcall_debug && system_state == SYSTEM_BOOTING) { -+ if (initcall_debug && system_state < SYSTEM_RUNNING) { - pr_debug("async_waiting @ %i\n", task_pid_nr(current)); - starttime = ktime_get(); - } - - wait_event(async_done, lowest_in_progress(domain) >= cookie); - -- if (initcall_debug && system_state == SYSTEM_BOOTING) { -+ if (initcall_debug && system_state < SYSTEM_RUNNING) { - endtime = ktime_get(); - delta = ktime_sub(endtime, starttime); - diff --git a/debian/patches/features/all/rt/0012-cpufreq-sparc-us2e-Replace-racy-task-affinity-logic.patch b/debian/patches/features/all/rt/0012-cpufreq-sparc-us2e-Replace-racy-task-affinity-logic.patch deleted file mode 100644 index a813914f8..000000000 --- a/debian/patches/features/all/rt/0012-cpufreq-sparc-us2e-Replace-racy-task-affinity-logic.patch +++ /dev/null @@ -1,129 +0,0 @@ -From: Thomas Gleixner -Date: Thu, 13 Apr 2017 10:22:43 +0200 -Subject: [PATCH 12/13] cpufreq/sparc-us2e: Replace racy task affinity logic -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -The access to the HBIRD_ESTAR_MODE register in the cpu frequency control -functions must happen on the target CPU. This is achieved by temporarily -setting the affinity of the calling user space thread to the requested CPU -and reset it to the original affinity afterwards. - -That's racy vs. CPU hotplug and concurrent affinity settings for that -thread resulting in code executing on the wrong CPU and overwriting the -new affinity setting. - -Replace it by a straight forward smp function call. - -Signed-off-by: Thomas Gleixner -Acked-by: Viresh Kumar -Cc: Fenghua Yu -Cc: Tony Luck -Cc: Herbert Xu -Cc: "Rafael J. Wysocki" -Cc: Peter Zijlstra -Cc: Benjamin Herrenschmidt -Cc: Sebastian Siewior -Cc: linux-pm@vger.kernel.org -Cc: Lai Jiangshan -Cc: Michael Ellerman -Cc: Tejun Heo -Cc: "David S. Miller" -Cc: Len Brown -Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1704131020280.2408@nanos -Signed-off-by: Thomas Gleixner ---- - drivers/cpufreq/sparc-us2e-cpufreq.c | 45 ++++++++++++++++------------------- - 1 file changed, 21 insertions(+), 24 deletions(-) - ---- a/drivers/cpufreq/sparc-us2e-cpufreq.c -+++ b/drivers/cpufreq/sparc-us2e-cpufreq.c -@@ -118,10 +118,6 @@ static void us2e_transition(unsigned lon - unsigned long clock_tick, - unsigned long old_divisor, unsigned long divisor) - { -- unsigned long flags; -- -- local_irq_save(flags); -- - estar &= ~ESTAR_MODE_DIV_MASK; - - /* This is based upon the state transition diagram in the IIe manual. */ -@@ -152,8 +148,6 @@ static void us2e_transition(unsigned lon - } else { - BUG(); - } -- -- local_irq_restore(flags); - } - - static unsigned long index_to_estar_mode(unsigned int index) -@@ -229,48 +223,51 @@ static unsigned long estar_to_divisor(un - return ret; - } - -+static void __us2e_freq_get(void *arg) -+{ -+ unsigned long *estar = arg; -+ -+ *estar = read_hbreg(HBIRD_ESTAR_MODE_ADDR); -+} -+ - static unsigned int us2e_freq_get(unsigned int cpu) - { -- cpumask_t cpus_allowed; - unsigned long clock_tick, estar; - -- cpumask_copy(&cpus_allowed, ¤t->cpus_allowed); -- set_cpus_allowed_ptr(current, cpumask_of(cpu)); -- - clock_tick = sparc64_get_clock_tick(cpu) / 1000; -- estar = read_hbreg(HBIRD_ESTAR_MODE_ADDR); -- -- set_cpus_allowed_ptr(current, &cpus_allowed); -+ if (smp_call_function_single(cpu, __us2e_freq_get, &estar, 1)) -+ return 0; - - return clock_tick / estar_to_divisor(estar); - } - --static int us2e_freq_target(struct cpufreq_policy *policy, unsigned int index) -+static void __us2e_freq_target(void *arg) - { -- unsigned int cpu = policy->cpu; -+ unsigned int cpu = smp_processor_id(); -+ unsigned int *index = arg; - unsigned long new_bits, new_freq; - unsigned long clock_tick, divisor, old_divisor, estar; -- cpumask_t cpus_allowed; -- -- cpumask_copy(&cpus_allowed, ¤t->cpus_allowed); -- set_cpus_allowed_ptr(current, cpumask_of(cpu)); - - new_freq = clock_tick = sparc64_get_clock_tick(cpu) / 1000; -- new_bits = index_to_estar_mode(index); -- divisor = index_to_divisor(index); -+ new_bits = index_to_estar_mode(*index); -+ divisor = index_to_divisor(*index); - new_freq /= divisor; - - estar = read_hbreg(HBIRD_ESTAR_MODE_ADDR); - - old_divisor = estar_to_divisor(estar); - -- if (old_divisor != divisor) -+ if (old_divisor != divisor) { - us2e_transition(estar, new_bits, clock_tick * 1000, - old_divisor, divisor); -+ } -+} - -- set_cpus_allowed_ptr(current, &cpus_allowed); -+static int us2e_freq_target(struct cpufreq_policy *policy, unsigned int index) -+{ -+ unsigned int cpu = policy->cpu; - -- return 0; -+ return smp_call_function_single(cpu, __us2e_freq_target, &index, 1); - } - - static int __init us2e_freq_cpu_init(struct cpufreq_policy *policy) diff --git a/debian/patches/features/all/rt/0012-futex-Futex_unlock_pi-determinism.patch b/debian/patches/features/all/rt/0012-futex-Futex_unlock_pi-determinism.patch deleted file mode 100644 index fcd0c0dae..000000000 --- a/debian/patches/features/all/rt/0012-futex-Futex_unlock_pi-determinism.patch +++ /dev/null @@ -1,81 +0,0 @@ -From: Peter Zijlstra -Date: Wed, 22 Mar 2017 11:35:59 +0100 -Subject: [PATCH] futex: Futex_unlock_pi() determinism -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Upstream commit bebe5b514345f09be2c15e414d076b02ecb9cce8 - -The problem with returning -EAGAIN when the waiter state mismatches is that -it becomes very hard to proof a bounded execution time on the -operation. And seeing that this is a RT operation, this is somewhat -important. - -While in practise; given the previous patch; it will be very unlikely to -ever really take more than one or two rounds, proving so becomes rather -hard. - -However, now that modifying wait_list is done while holding both hb->lock -and wait_lock, the scenario can be avoided entirely by acquiring wait_lock -while still holding hb-lock. Doing a hand-over, without leaving a hole. - -Signed-off-by: Peter Zijlstra (Intel) -Cc: juri.lelli@arm.com -Cc: bigeasy@linutronix.de -Cc: xlpang@redhat.com -Cc: rostedt@goodmis.org -Cc: mathieu.desnoyers@efficios.com -Cc: jdesfossez@efficios.com -Cc: dvhart@infradead.org -Cc: bristot@redhat.com -Link: http://lkml.kernel.org/r/20170322104152.112378812@infradead.org -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/futex.c | 24 +++++++++++------------- - 1 file changed, 11 insertions(+), 13 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1398,15 +1398,10 @@ static int wake_futex_pi(u32 __user *uad - DEFINE_WAKE_Q(wake_q); - int ret = 0; - -- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); - new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); -- if (!new_owner) { -+ if (WARN_ON_ONCE(!new_owner)) { - /* -- * Since we held neither hb->lock nor wait_lock when coming -- * into this function, we could have raced with futex_lock_pi() -- * such that we might observe @this futex_q waiter, but the -- * rt_mutex's wait_list can be empty (either still, or again, -- * depending on which side we land). -+ * As per the comment in futex_unlock_pi() this should not happen. - * - * When this happens, give up our locks and try again, giving - * the futex_lock_pi() instance time to complete, either by -@@ -2794,15 +2789,18 @@ static int futex_unlock_pi(u32 __user *u - if (pi_state->owner != current) - goto out_unlock; - -+ get_pi_state(pi_state); - /* -- * Grab a reference on the pi_state and drop hb->lock. -+ * Since modifying the wait_list is done while holding both -+ * hb->lock and wait_lock, holding either is sufficient to -+ * observe it. - * -- * The reference ensures pi_state lives, dropping the hb->lock -- * is tricky.. wake_futex_pi() will take rt_mutex::wait_lock to -- * close the races against futex_lock_pi(), but in case of -- * _any_ fail we'll abort and retry the whole deal. -+ * By taking wait_lock while still holding hb->lock, we ensure -+ * there is no point where we hold neither; and therefore -+ * wake_futex_pi() must observe a state consistent with what we -+ * observed. - */ -- get_pi_state(pi_state); -+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); - spin_unlock(&hb->lock); - - ret = wake_futex_pi(uaddr, uval, pi_state); diff --git a/debian/patches/features/all/rt/0012-hrtimer-Make-room-in-struct-hrtimer_cpu_base.patch b/debian/patches/features/all/rt/0012-hrtimer-Make-room-in-struct-hrtimer_cpu_base.patch new file mode 100644 index 000000000..ce05e2676 --- /dev/null +++ b/debian/patches/features/all/rt/0012-hrtimer-Make-room-in-struct-hrtimer_cpu_base.patch @@ -0,0 +1,34 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:39:50 +0200 +Subject: [PATCH 12/36] hrtimer: Make room in struct hrtimer_cpu_base +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The upcoming softirq based hrtimers support requires an additional field in +the hrtimer_cpu_base struct, which would grow the struct size beyond a +cache line. + +The struct members nr_retries and nr_hangs of hrtimer_cpu_base are solely +used for diagnostic output and have no requirement to be unsigned int. + +Make them unsigned short to create room for the new struct member. No +functional change. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/hrtimer.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -189,8 +189,8 @@ struct hrtimer_cpu_base { + ktime_t expires_next; + struct hrtimer *next_timer; + unsigned int nr_events; +- unsigned int nr_retries; +- unsigned int nr_hangs; ++ unsigned short nr_retries; ++ unsigned short nr_hangs; + unsigned int max_hang_time; + #endif + struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; diff --git a/debian/patches/features/all/rt/0003-ring-buffer-Add-interface-for-setting-absolute-time-.patch b/debian/patches/features/all/rt/0012-ring-buffer-Add-interface-for-setting-absolute-time-.patch similarity index 71% rename from debian/patches/features/all/rt/0003-ring-buffer-Add-interface-for-setting-absolute-time-.patch rename to debian/patches/features/all/rt/0012-ring-buffer-Add-interface-for-setting-absolute-time-.patch index 41259f0e7..cd94a6d1d 100644 --- a/debian/patches/features/all/rt/0003-ring-buffer-Add-interface-for-setting-absolute-time-.patch +++ b/debian/patches/features/all/rt/0012-ring-buffer-Add-interface-for-setting-absolute-time-.patch @@ -1,8 +1,8 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:04 -0500 -Subject: [PATCH 03/32] ring-buffer: Add interface for setting absolute time +Date: Fri, 22 Sep 2017 14:59:43 -0500 +Subject: [PATCH 12/42] ring-buffer: Add interface for setting absolute time stamps -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Define a new function, tracing_set_time_stamp_abs(), which can be used to enable or disable the use of absolute timestamps rather than time @@ -15,17 +15,18 @@ Only the interface is added here; a subsequent patch will add the underlying implementation. Signed-off-by: Tom Zanussi +Signed-off-by: Baohong Liu Signed-off-by: Sebastian Andrzej Siewior --- include/linux/ring_buffer.h | 2 ++ kernel/trace/ring_buffer.c | 11 +++++++++++ - kernel/trace/trace.c | 25 ++++++++++++++++++++++++- - kernel/trace/trace.h | 2 ++ - 4 files changed, 39 insertions(+), 1 deletion(-) + kernel/trace/trace.c | 40 +++++++++++++++++++++++++++++++++++++++- + kernel/trace/trace.h | 3 +++ + 4 files changed, 55 insertions(+), 1 deletion(-) --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h -@@ -180,6 +180,8 @@ void ring_buffer_normalize_time_stamp(st +@@ -181,6 +181,8 @@ void ring_buffer_normalize_time_stamp(st int cpu, u64 *ts); void ring_buffer_set_clock(struct ring_buffer *buffer, u64 (*clock)(void)); @@ -36,7 +37,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c -@@ -484,6 +484,7 @@ struct ring_buffer { +@@ -485,6 +485,7 @@ struct ring_buffer { u64 (*clock)(void); struct rb_irq_work irq_work; @@ -44,7 +45,7 @@ Signed-off-by: Sebastian Andrzej Siewior }; struct ring_buffer_iter { -@@ -1378,6 +1379,16 @@ void ring_buffer_set_clock(struct ring_b +@@ -1379,6 +1380,16 @@ void ring_buffer_set_clock(struct ring_b buffer->clock = clock; } @@ -63,7 +64,7 @@ Signed-off-by: Sebastian Andrzej Siewior static inline unsigned long rb_page_entries(struct buffer_page *bpage) --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c -@@ -2082,7 +2082,7 @@ trace_event_buffer_lock_reserve(struct r +@@ -2269,7 +2269,7 @@ trace_event_buffer_lock_reserve(struct r *current_rb = trace_file->tr->trace_buffer.buffer; @@ -72,14 +73,30 @@ Signed-off-by: Sebastian Andrzej Siewior (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) && (entry = this_cpu_read(trace_buffered_event))) { /* Try to use the per cpu buffer first */ -@@ -5959,6 +5959,29 @@ static int tracing_clock_open(struct ino +@@ -6297,6 +6297,44 @@ static int tracing_clock_open(struct ino + return ret; } - ++ +int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs) +{ ++ int ret = 0; ++ + mutex_lock(&trace_types_lock); + ++ if (abs && tr->time_stamp_abs_ref++) ++ goto out; ++ ++ if (!abs) { ++ if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (--tr->time_stamp_abs_ref) ++ goto out; ++ } ++ + ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs); + + /* @@ -89,22 +106,29 @@ Signed-off-by: Sebastian Andrzej Siewior + tracing_reset_online_cpus(&tr->trace_buffer); + +#ifdef CONFIG_TRACER_MAX_TRACE -+ if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer) ++ if (tr->max_buffer.buffer) + ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs); + tracing_reset_online_cpus(&tr->max_buffer); +#endif -+ ++ out: + mutex_unlock(&trace_types_lock); + -+ return 0; ++ return ret; +} -+ + struct ftrace_buffer_info { struct trace_iterator iter; - void *spare; --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h -@@ -278,6 +278,8 @@ extern struct mutex trace_types_lock; +@@ -273,6 +273,7 @@ struct trace_array { + /* function tracing enabled */ + int function_enabled; + #endif ++ int time_stamp_abs_ref; + }; + + enum { +@@ -286,6 +287,8 @@ extern struct mutex trace_types_lock; extern int trace_array_get(struct trace_array *tr); extern void trace_array_put(struct trace_array *tr); diff --git a/debian/patches/features/all/rt/0013-crypto-N2-Replace-racy-task-affinity-logic.patch b/debian/patches/features/all/rt/0013-crypto-N2-Replace-racy-task-affinity-logic.patch deleted file mode 100644 index af01415de..000000000 --- a/debian/patches/features/all/rt/0013-crypto-N2-Replace-racy-task-affinity-logic.patch +++ /dev/null @@ -1,95 +0,0 @@ -From: Thomas Gleixner -Date: Thu, 13 Apr 2017 10:20:23 +0200 -Subject: [PATCH 13/13] crypto: N2 - Replace racy task affinity logic -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -spu_queue_register() needs to invoke setup functions on a particular -CPU. This is achieved by temporarily setting the affinity of the -calling user space thread to the requested CPU and reset it to the original -affinity afterwards. - -That's racy vs. CPU hotplug and concurrent affinity settings for that -thread resulting in code executing on the wrong CPU and overwriting the -new affinity setting. - -Replace it by using work_on_cpu_safe() which guarantees to run the code on -the requested CPU or to fail in case the CPU is offline. - -Signed-off-by: Thomas Gleixner -Acked-by: Herbert Xu -Acked-by: "David S. Miller" -Cc: Fenghua Yu -Cc: Tony Luck -Cc: "Rafael J. Wysocki" -Cc: Peter Zijlstra -Cc: Benjamin Herrenschmidt -Cc: Sebastian Siewior -Cc: Lai Jiangshan -Cc: Viresh Kumar -Cc: linux-crypto@vger.kernel.org -Cc: Michael Ellerman -Cc: Tejun Heo -Cc: Len Brown -Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1704131019420.2408@nanos -Signed-off-by: Thomas Gleixner ---- - drivers/crypto/n2_core.c | 31 ++++++++++++++++--------------- - 1 file changed, 16 insertions(+), 15 deletions(-) - ---- a/drivers/crypto/n2_core.c -+++ b/drivers/crypto/n2_core.c -@@ -65,6 +65,11 @@ struct spu_queue { - struct list_head list; - }; - -+struct spu_qreg { -+ struct spu_queue *queue; -+ unsigned long type; -+}; -+ - static struct spu_queue **cpu_to_cwq; - static struct spu_queue **cpu_to_mau; - -@@ -1631,31 +1636,27 @@ static void queue_cache_destroy(void) - kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_CWQ - 1]); - } - --static int spu_queue_register(struct spu_queue *p, unsigned long q_type) -+static long spu_queue_register_workfn(void *arg) - { -- cpumask_var_t old_allowed; -+ struct spu_qreg *qr = arg; -+ struct spu_queue *p = qr->queue; -+ unsigned long q_type = qr->type; - unsigned long hv_ret; - -- if (cpumask_empty(&p->sharing)) -- return -EINVAL; -- -- if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL)) -- return -ENOMEM; -- -- cpumask_copy(old_allowed, ¤t->cpus_allowed); -- -- set_cpus_allowed_ptr(current, &p->sharing); -- - hv_ret = sun4v_ncs_qconf(q_type, __pa(p->q), - CWQ_NUM_ENTRIES, &p->qhandle); - if (!hv_ret) - sun4v_ncs_sethead_marker(p->qhandle, 0); - -- set_cpus_allowed_ptr(current, old_allowed); -+ return hv_ret ? -EINVAL : 0; -+} - -- free_cpumask_var(old_allowed); -+static int spu_queue_register(struct spu_queue *p, unsigned long q_type) -+{ -+ int cpu = cpumask_any_and(&p->sharing, cpu_online_mask); -+ struct spu_qreg qr = { .queue = p, .type = q_type }; - -- return (hv_ret ? -EINVAL : 0); -+ return work_on_cpu_safe(cpu, spu_queue_register_workfn, &qr); - } - - static int spu_queue_setup(struct spu_queue *p) diff --git a/debian/patches/features/all/rt/0013-extable-Adjust-system_state-checks.patch b/debian/patches/features/all/rt/0013-extable-Adjust-system_state-checks.patch deleted file mode 100644 index 5e2e70988..000000000 --- a/debian/patches/features/all/rt/0013-extable-Adjust-system_state-checks.patch +++ /dev/null @@ -1,36 +0,0 @@ -From: Thomas Gleixner -Date: Tue, 16 May 2017 20:42:44 +0200 -Subject: [PATCH 13/17] extable: Adjust system_state checks -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -To enable smp_processor_id() and might_sleep() debug checks earlier, it's -required to add system states between SYSTEM_BOOTING and SYSTEM_RUNNING. - -Adjust the system_state check in core_kernel_text() to handle the extra -states, i.e. to cover init text up to the point where the system switches -to state RUNNING. - -Tested-by: Mark Rutland -Signed-off-by: Thomas Gleixner -Signed-off-by: Peter Zijlstra (Intel) -Reviewed-by: Steven Rostedt (VMware) -Cc: Greg Kroah-Hartman -Cc: Linus Torvalds -Cc: Peter Zijlstra -Link: http://lkml.kernel.org/r/20170516184735.949992741@linutronix.de -Signed-off-by: Ingo Molnar ---- - kernel/extable.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/extable.c -+++ b/kernel/extable.c -@@ -75,7 +75,7 @@ int core_kernel_text(unsigned long addr) - addr < (unsigned long)_etext) - return 1; - -- if (system_state == SYSTEM_BOOTING && -+ if (system_state < SYSTEM_RUNNING && - init_kernel_text(addr)) - return 1; - return 0; diff --git a/debian/patches/features/all/rt/0013-futex-Drop-hb-lock-before-enqueueing-on-the-rtmutex.patch b/debian/patches/features/all/rt/0013-futex-Drop-hb-lock-before-enqueueing-on-the-rtmutex.patch deleted file mode 100644 index 10c6c666d..000000000 --- a/debian/patches/features/all/rt/0013-futex-Drop-hb-lock-before-enqueueing-on-the-rtmutex.patch +++ /dev/null @@ -1,204 +0,0 @@ -From: Peter Zijlstra -Date: Wed, 22 Mar 2017 11:36:00 +0100 -Subject: [PATCH] futex: Drop hb->lock before enqueueing on the rtmutex -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Upstream commit 56222b212e8edb1cf51f5dd73ff645809b082b40 - -When PREEMPT_RT_FULL does the spinlock -> rt_mutex substitution the PI -chain code will (falsely) report a deadlock and BUG. - -The problem is that it hold hb->lock (now an rt_mutex) while doing -task_blocks_on_rt_mutex on the futex's pi_state::rtmutex. This, when -interleaved just right with futex_unlock_pi() leads it to believe to see an -AB-BA deadlock. - - Task1 (holds rt_mutex, Task2 (does FUTEX_LOCK_PI) - does FUTEX_UNLOCK_PI) - - lock hb->lock - lock rt_mutex (as per start_proxy) - lock hb->lock - -Which is a trivial AB-BA. - -It is not an actual deadlock, because it won't be holding hb->lock by the -time it actually blocks on the rt_mutex, but the chainwalk code doesn't -know that and it would be a nightmare to handle this gracefully. - -To avoid this problem, do the same as in futex_unlock_pi() and drop -hb->lock after acquiring wait_lock. This still fully serializes against -futex_unlock_pi(), since adding to the wait_list does the very same lock -dance, and removing it holds both locks. - -Aside of solving the RT problem this makes the lock and unlock mechanism -symetric and reduces the hb->lock held time. - -Reported-and-tested-by: Sebastian Andrzej Siewior -Suggested-by: Thomas Gleixner -Signed-off-by: Peter Zijlstra (Intel) -Cc: juri.lelli@arm.com -Cc: xlpang@redhat.com -Cc: rostedt@goodmis.org -Cc: mathieu.desnoyers@efficios.com -Cc: jdesfossez@efficios.com -Cc: dvhart@infradead.org -Cc: bristot@redhat.com -Link: http://lkml.kernel.org/r/20170322104152.161341537@infradead.org -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/futex.c | 30 +++++++++++++++++------- - kernel/locking/rtmutex.c | 49 ++++++++++++++++++++++------------------ - kernel/locking/rtmutex_common.h | 3 ++ - 3 files changed, 52 insertions(+), 30 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -2654,20 +2654,33 @@ static int futex_lock_pi(u32 __user *uad - goto no_block; - } - -+ rt_mutex_init_waiter(&rt_waiter); -+ - /* -- * We must add ourselves to the rt_mutex waitlist while holding hb->lock -- * such that the hb and rt_mutex wait lists match. -+ * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not -+ * hold it while doing rt_mutex_start_proxy(), because then it will -+ * include hb->lock in the blocking chain, even through we'll not in -+ * fact hold it while blocking. This will lead it to report -EDEADLK -+ * and BUG when futex_unlock_pi() interleaves with this. -+ * -+ * Therefore acquire wait_lock while holding hb->lock, but drop the -+ * latter before calling rt_mutex_start_proxy_lock(). This still fully -+ * serializes against futex_unlock_pi() as that does the exact same -+ * lock handoff sequence. - */ -- rt_mutex_init_waiter(&rt_waiter); -- ret = rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); -+ raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); -+ spin_unlock(q.lock_ptr); -+ ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); -+ raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); -+ - if (ret) { - if (ret == 1) - ret = 0; - -+ spin_lock(q.lock_ptr); - goto no_block; - } - -- spin_unlock(q.lock_ptr); - - if (unlikely(to)) - hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); -@@ -2680,6 +2693,9 @@ static int futex_lock_pi(u32 __user *uad - * first acquire the hb->lock before removing the lock from the - * rt_mutex waitqueue, such that we can keep the hb and rt_mutex - * wait lists consistent. -+ * -+ * In particular; it is important that futex_unlock_pi() can not -+ * observe this inconsistency. - */ - if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter)) - ret = 0; -@@ -2791,10 +2807,6 @@ static int futex_unlock_pi(u32 __user *u - - get_pi_state(pi_state); - /* -- * Since modifying the wait_list is done while holding both -- * hb->lock and wait_lock, holding either is sufficient to -- * observe it. -- * - * By taking wait_lock while still holding hb->lock, we ensure - * there is no point where we hold neither; and therefore - * wake_futex_pi() must observe a state consistent with what we ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1669,31 +1669,14 @@ void rt_mutex_proxy_unlock(struct rt_mut - rt_mutex_set_owner(lock, NULL); - } - --/** -- * rt_mutex_start_proxy_lock() - Start lock acquisition for another task -- * @lock: the rt_mutex to take -- * @waiter: the pre-initialized rt_mutex_waiter -- * @task: the task to prepare -- * -- * Returns: -- * 0 - task blocked on lock -- * 1 - acquired the lock for task, caller should wake it up -- * <0 - error -- * -- * Special API call for FUTEX_REQUEUE_PI support. -- */ --int rt_mutex_start_proxy_lock(struct rt_mutex *lock, -+int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, - struct rt_mutex_waiter *waiter, - struct task_struct *task) - { - int ret; - -- raw_spin_lock_irq(&lock->wait_lock); -- -- if (try_to_take_rt_mutex(lock, task, NULL)) { -- raw_spin_unlock_irq(&lock->wait_lock); -+ if (try_to_take_rt_mutex(lock, task, NULL)) - return 1; -- } - - /* We enforce deadlock detection for futexes */ - ret = task_blocks_on_rt_mutex(lock, waiter, task, -@@ -1712,12 +1695,36 @@ int rt_mutex_start_proxy_lock(struct rt_ - if (unlikely(ret)) - remove_waiter(lock, waiter); - -- raw_spin_unlock_irq(&lock->wait_lock); -- - debug_rt_mutex_print_deadlock(waiter); - - return ret; - } -+ -+/** -+ * rt_mutex_start_proxy_lock() - Start lock acquisition for another task -+ * @lock: the rt_mutex to take -+ * @waiter: the pre-initialized rt_mutex_waiter -+ * @task: the task to prepare -+ * -+ * Returns: -+ * 0 - task blocked on lock -+ * 1 - acquired the lock for task, caller should wake it up -+ * <0 - error -+ * -+ * Special API call for FUTEX_REQUEUE_PI support. -+ */ -+int rt_mutex_start_proxy_lock(struct rt_mutex *lock, -+ struct rt_mutex_waiter *waiter, -+ struct task_struct *task) -+{ -+ int ret; -+ -+ raw_spin_lock_irq(&lock->wait_lock); -+ ret = __rt_mutex_start_proxy_lock(lock, waiter, task); -+ raw_spin_unlock_irq(&lock->wait_lock); -+ -+ return ret; -+} - - /** - * rt_mutex_next_owner - return the next owner of the lock ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -104,6 +104,9 @@ extern void rt_mutex_init_proxy_locked(s - extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, - struct task_struct *proxy_owner); - extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); -+extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, -+ struct rt_mutex_waiter *waiter, -+ struct task_struct *task); - extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, - struct rt_mutex_waiter *waiter, - struct task_struct *task); diff --git a/debian/patches/features/all/rt/0013-hrtimer-Reduce-conditional-code-hres_active.patch b/debian/patches/features/all/rt/0013-hrtimer-Reduce-conditional-code-hres_active.patch new file mode 100644 index 000000000..6c0456cdd --- /dev/null +++ b/debian/patches/features/all/rt/0013-hrtimer-Reduce-conditional-code-hres_active.patch @@ -0,0 +1,150 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:39:51 +0200 +Subject: [PATCH 13/36] hrtimer: Reduce conditional code (hres_active) +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The hrtimer_cpu_base struct has the CONFIG_HIGH_RES_TIMERS conditional +struct member hres_active. All related functions to this member are +conditional as well. + +There is no functional change, when the hres_active member is +unconditional with all related functions and is set to zero during +initialization. + +The conditional code sections can be avoided by adding IS_ENABLED(HIGHRES) +conditionals into common functions, which ensures dead code elimination. + +Suggested-by: Thomas Gleixner +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/hrtimer.h | 20 ++++++++------------ + kernel/time/hrtimer.c | 31 +++++++++++++++---------------- + 2 files changed, 23 insertions(+), 28 deletions(-) + +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -161,8 +161,8 @@ enum hrtimer_base_type { + * @cpu: cpu number + * @active_bases: Bitfield to mark bases with active timers + * @clock_was_set_seq: Sequence counter of clock was set events +- * @in_hrtirq: hrtimer_interrupt() is currently executing + * @hres_active: State of high resolution mode ++ * @in_hrtirq: hrtimer_interrupt() is currently executing + * @hang_detected: The last hrtimer interrupt detected a hang + * @expires_next: absolute time of the next event, is required for remote + * hrtimer enqueue +@@ -182,9 +182,9 @@ struct hrtimer_cpu_base { + unsigned int cpu; + unsigned int active_bases; + unsigned int clock_was_set_seq; ++ unsigned int hres_active : 1; + #ifdef CONFIG_HIGH_RES_TIMERS + unsigned int in_hrtirq : 1, +- hres_active : 1, + hang_detected : 1; + ktime_t expires_next; + struct hrtimer *next_timer; +@@ -266,16 +266,17 @@ static inline ktime_t hrtimer_cb_get_tim + return timer->base->get_time(); + } + ++static inline int hrtimer_is_hres_active(struct hrtimer *timer) ++{ ++ return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? ++ timer->base->cpu_base->hres_active : 0; ++} ++ + #ifdef CONFIG_HIGH_RES_TIMERS + struct clock_event_device; + + extern void hrtimer_interrupt(struct clock_event_device *dev); + +-static inline int hrtimer_is_hres_active(struct hrtimer *timer) +-{ +- return timer->base->cpu_base->hres_active; +-} +- + /* + * The resolution of the clocks. The resolution value is returned in + * the clock_getres() system call to give application programmers an +@@ -298,11 +299,6 @@ extern unsigned int hrtimer_resolution; + + #define hrtimer_resolution (unsigned int)LOW_RES_NSEC + +-static inline int hrtimer_is_hres_active(struct hrtimer *timer) +-{ +- return 0; +-} +- + static inline void clock_was_set_delayed(void) { } + + #endif +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -512,6 +512,20 @@ static inline ktime_t hrtimer_update_bas + offs_real, offs_boot, offs_tai); + } + ++/* ++ * Is the high resolution mode active ? ++ */ ++static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) ++{ ++ return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? ++ cpu_base->hres_active : 0; ++} ++ ++static inline int hrtimer_hres_active(void) ++{ ++ return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases)); ++} ++ + /* High resolution timer related functions */ + #ifdef CONFIG_HIGH_RES_TIMERS + +@@ -541,19 +555,6 @@ static inline int hrtimer_is_hres_enable + } + + /* +- * Is the high resolution mode active ? +- */ +-static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) +-{ +- return cpu_base->hres_active; +-} +- +-static inline int hrtimer_hres_active(void) +-{ +- return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases)); +-} +- +-/* + * Reprogram the event source with checking both queues for the + * next event + * Called with interrupts disabled and base->lock held +@@ -661,7 +662,6 @@ static void hrtimer_reprogram(struct hrt + static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) + { + base->expires_next = KTIME_MAX; +- base->hres_active = 0; + } + + /* +@@ -720,8 +720,6 @@ void clock_was_set_delayed(void) + + #else + +-static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *b) { return 0; } +-static inline int hrtimer_hres_active(void) { return 0; } + static inline int hrtimer_is_hres_enabled(void) { return 0; } + static inline void hrtimer_switch_to_hres(void) { } + static inline void +@@ -1602,6 +1600,7 @@ int hrtimers_prepare_cpu(unsigned int cp + } + + cpu_base->cpu = cpu; ++ cpu_base->hres_active = 0; + hrtimer_init_hres(cpu_base); + return 0; + } diff --git a/debian/patches/features/all/rt/0004-ring-buffer-Redefine-the-unimplemented-RINGBUF_TIME_.patch b/debian/patches/features/all/rt/0013-ring-buffer-Redefine-the-unimplemented-RINGBUF_TIME_.patch similarity index 82% rename from debian/patches/features/all/rt/0004-ring-buffer-Redefine-the-unimplemented-RINGBUF_TIME_.patch rename to debian/patches/features/all/rt/0013-ring-buffer-Redefine-the-unimplemented-RINGBUF_TIME_.patch index 2b6fd9a37..e71f87c02 100644 --- a/debian/patches/features/all/rt/0004-ring-buffer-Redefine-the-unimplemented-RINGBUF_TIME_.patch +++ b/debian/patches/features/all/rt/0013-ring-buffer-Redefine-the-unimplemented-RINGBUF_TIME_.patch @@ -1,8 +1,8 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:05 -0500 -Subject: [PATCH 04/32] ring-buffer: Redefine the unimplemented +Date: Fri, 22 Sep 2017 14:59:44 -0500 +Subject: [PATCH 13/42] ring-buffer: Redefine the unimplemented RINGBUF_TIME_TIME_STAMP -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz RINGBUF_TYPE_TIME_STAMP is defined but not used, and from what I can gather was reserved for something like an absolute timestamp feature @@ -24,13 +24,13 @@ previous interface patch. Signed-off-by: Tom Zanussi Signed-off-by: Sebastian Andrzej Siewior --- - include/linux/ring_buffer.h | 12 ++-- - kernel/trace/ring_buffer.c | 107 +++++++++++++++++++++++++++++++------------- - 2 files changed, 83 insertions(+), 36 deletions(-) + include/linux/ring_buffer.h | 12 ++--- + kernel/trace/ring_buffer.c | 105 +++++++++++++++++++++++++++++++------------- + 2 files changed, 83 insertions(+), 34 deletions(-) --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h -@@ -36,10 +36,12 @@ struct ring_buffer_event { +@@ -37,10 +37,12 @@ struct ring_buffer_event { * array[0] = time delta (28 .. 59) * size = 8 bytes * @@ -47,7 +47,7 @@ Signed-off-by: Sebastian Andrzej Siewior * * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX: * Data record -@@ -56,12 +58,12 @@ enum ring_buffer_type { +@@ -57,12 +59,12 @@ enum ring_buffer_type { RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28, RINGBUF_TYPE_PADDING, RINGBUF_TYPE_TIME_EXTEND, @@ -72,7 +72,14 @@ Signed-off-by: Sebastian Andrzej Siewior trace_seq_printf(s, "\tdata max type_len == %d\n", RINGBUF_TYPE_DATA_TYPE_LEN_MAX); -@@ -147,6 +149,9 @@ enum { +@@ -141,12 +143,15 @@ int ring_buffer_print_entry_header(struc + + enum { + RB_LEN_TIME_EXTEND = 8, +- RB_LEN_TIME_STAMP = 16, ++ RB_LEN_TIME_STAMP = 8, + }; + #define skip_time_extend(event) \ ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND)) @@ -82,19 +89,7 @@ Signed-off-by: Sebastian Andrzej Siewior static inline int rb_null_event(struct ring_buffer_event *event) { return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; -@@ -187,10 +192,8 @@ rb_event_length(struct ring_buffer_event - return event->array[0] + RB_EVNT_HDR_SIZE; - - case RINGBUF_TYPE_TIME_EXTEND: -- return RB_LEN_TIME_EXTEND; -- - case RINGBUF_TYPE_TIME_STAMP: -- return RB_LEN_TIME_STAMP; -+ return RB_LEN_TIME_EXTEND; - - case RINGBUF_TYPE_DATA: - return rb_event_data_length(event); -@@ -210,7 +213,7 @@ rb_event_ts_length(struct ring_buffer_ev +@@ -210,7 +215,7 @@ rb_event_ts_length(struct ring_buffer_ev { unsigned len = 0; @@ -103,7 +98,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* time extends include the data event after it */ len = RB_LEN_TIME_EXTEND; event = skip_time_extend(event); -@@ -232,7 +235,7 @@ unsigned ring_buffer_event_length(struct +@@ -232,7 +237,7 @@ unsigned ring_buffer_event_length(struct { unsigned length; @@ -112,7 +107,7 @@ Signed-off-by: Sebastian Andrzej Siewior event = skip_time_extend(event); length = rb_event_length(event); -@@ -249,7 +252,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_leng +@@ -249,7 +254,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_leng static __always_inline void * rb_event_data(struct ring_buffer_event *event) { @@ -121,7 +116,7 @@ Signed-off-by: Sebastian Andrzej Siewior event = skip_time_extend(event); BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); /* If length is in len field, then array[0] has the data */ -@@ -276,6 +279,27 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data +@@ -276,6 +281,27 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data #define TS_MASK ((1ULL << TS_SHIFT) - 1) #define TS_DELTA_TEST (~TS_MASK) @@ -149,7 +144,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* Flag when events were overwritten */ #define RB_MISSED_EVENTS (1 << 31) /* Missed count stored at end */ -@@ -2219,13 +2243,16 @@ rb_move_tail(struct ring_buffer_per_cpu +@@ -2220,13 +2246,16 @@ rb_move_tail(struct ring_buffer_per_cpu } /* Slow path, do not inline */ @@ -171,7 +166,7 @@ Signed-off-by: Sebastian Andrzej Siewior event->time_delta = delta & TS_MASK; event->array[0] = delta >> TS_SHIFT; } else { -@@ -2268,7 +2295,9 @@ rb_update_event(struct ring_buffer_per_c +@@ -2269,7 +2298,9 @@ rb_update_event(struct ring_buffer_per_c * add it to the start of the resevered space. */ if (unlikely(info->add_timestamp)) { @@ -182,7 +177,7 @@ Signed-off-by: Sebastian Andrzej Siewior length -= RB_LEN_TIME_EXTEND; delta = 0; } -@@ -2456,7 +2485,7 @@ static __always_inline void rb_end_commi +@@ -2457,7 +2488,7 @@ static __always_inline void rb_end_commi static inline void rb_event_discard(struct ring_buffer_event *event) { @@ -191,7 +186,7 @@ Signed-off-by: Sebastian Andrzej Siewior event = skip_time_extend(event); /* array[0] holds the actual length for the discarded event */ -@@ -2487,6 +2516,10 @@ rb_update_write_stamp(struct ring_buffer +@@ -2488,6 +2519,10 @@ rb_update_write_stamp(struct ring_buffer { u64 delta; @@ -202,7 +197,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * The event first in the commit queue updates the * time stamp. -@@ -2500,9 +2533,7 @@ rb_update_write_stamp(struct ring_buffer +@@ -2501,9 +2536,7 @@ rb_update_write_stamp(struct ring_buffer cpu_buffer->write_stamp = cpu_buffer->commit_page->page->time_stamp; else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { @@ -213,7 +208,7 @@ Signed-off-by: Sebastian Andrzej Siewior cpu_buffer->write_stamp += delta; } else cpu_buffer->write_stamp += event->time_delta; -@@ -2686,7 +2717,7 @@ static struct ring_buffer_event * +@@ -2657,7 +2690,7 @@ static struct ring_buffer_event * * If this is the first commit on the page, then it has the same * timestamp as the page itself. */ @@ -222,7 +217,7 @@ Signed-off-by: Sebastian Andrzej Siewior info->delta = 0; /* See if we shot pass the end of this buffer page */ -@@ -2764,8 +2795,11 @@ rb_reserve_next_event(struct ring_buffer +@@ -2735,8 +2768,11 @@ rb_reserve_next_event(struct ring_buffer /* make sure this diff is calculated here */ barrier(); @@ -236,7 +231,7 @@ Signed-off-by: Sebastian Andrzej Siewior info.delta = diff; if (unlikely(test_time_stamp(info.delta))) rb_handle_timestamp(cpu_buffer, &info); -@@ -3447,14 +3481,12 @@ rb_update_read_stamp(struct ring_buffer_ +@@ -3418,14 +3454,12 @@ rb_update_read_stamp(struct ring_buffer_ return; case RINGBUF_TYPE_TIME_EXTEND: @@ -253,7 +248,7 @@ Signed-off-by: Sebastian Andrzej Siewior return; case RINGBUF_TYPE_DATA: -@@ -3478,14 +3510,12 @@ rb_update_iter_read_stamp(struct ring_bu +@@ -3449,14 +3483,12 @@ rb_update_iter_read_stamp(struct ring_bu return; case RINGBUF_TYPE_TIME_EXTEND: @@ -270,7 +265,7 @@ Signed-off-by: Sebastian Andrzej Siewior return; case RINGBUF_TYPE_DATA: -@@ -3709,6 +3739,8 @@ rb_buffer_peek(struct ring_buffer_per_cp +@@ -3680,6 +3712,8 @@ rb_buffer_peek(struct ring_buffer_per_cp struct buffer_page *reader; int nr_loops = 0; @@ -279,7 +274,7 @@ Signed-off-by: Sebastian Andrzej Siewior again: /* * We repeat when a time extend is encountered. -@@ -3745,12 +3777,17 @@ rb_buffer_peek(struct ring_buffer_per_cp +@@ -3716,12 +3750,17 @@ rb_buffer_peek(struct ring_buffer_per_cp goto again; case RINGBUF_TYPE_TIME_STAMP: @@ -299,7 +294,7 @@ Signed-off-by: Sebastian Andrzej Siewior *ts = cpu_buffer->read_stamp + event->time_delta; ring_buffer_normalize_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu, ts); -@@ -3775,6 +3812,9 @@ rb_iter_peek(struct ring_buffer_iter *it +@@ -3746,6 +3785,9 @@ rb_iter_peek(struct ring_buffer_iter *it struct ring_buffer_event *event; int nr_loops = 0; @@ -309,7 +304,7 @@ Signed-off-by: Sebastian Andrzej Siewior cpu_buffer = iter->cpu_buffer; buffer = cpu_buffer->buffer; -@@ -3827,12 +3867,17 @@ rb_iter_peek(struct ring_buffer_iter *it +@@ -3798,12 +3840,17 @@ rb_iter_peek(struct ring_buffer_iter *it goto again; case RINGBUF_TYPE_TIME_STAMP: diff --git a/debian/patches/features/all/rt/0014-hrtimer-Use-accesor-functions-instead-of-direct-acce.patch b/debian/patches/features/all/rt/0014-hrtimer-Use-accesor-functions-instead-of-direct-acce.patch new file mode 100644 index 000000000..ac8a11829 --- /dev/null +++ b/debian/patches/features/all/rt/0014-hrtimer-Use-accesor-functions-instead-of-direct-acce.patch @@ -0,0 +1,36 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:39:52 +0200 +Subject: [PATCH 14/36] hrtimer: Use accesor functions instead of direct access +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +__hrtimer_hres_active() is now available unconditionally. Replace the +direct access to hrtimer_cpu_base.hres_active. + +No functional change. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/time/hrtimer.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -564,7 +564,7 @@ hrtimer_force_reprogram(struct hrtimer_c + { + ktime_t expires_next; + +- if (!cpu_base->hres_active) ++ if (!__hrtimer_hres_active(cpu_base)) + return; + + expires_next = __hrtimer_get_next_event(cpu_base); +@@ -673,7 +673,7 @@ static void retrigger_next_event(void *a + { + struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); + +- if (!base->hres_active) ++ if (!__hrtimer_hres_active(base)) + return; + + raw_spin_lock(&base->lock); diff --git a/debian/patches/features/all/rt/0014-printk-Adjust-system_state-checks.patch b/debian/patches/features/all/rt/0014-printk-Adjust-system_state-checks.patch deleted file mode 100644 index 068be9f91..000000000 --- a/debian/patches/features/all/rt/0014-printk-Adjust-system_state-checks.patch +++ /dev/null @@ -1,35 +0,0 @@ -From: Thomas Gleixner -Date: Tue, 16 May 2017 20:42:45 +0200 -Subject: [PATCH 14/17] printk: Adjust system_state checks -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -To enable smp_processor_id() and might_sleep() debug checks earlier, it's -required to add system states between SYSTEM_BOOTING and SYSTEM_RUNNING. - -Adjust the system_state check in boot_delay_msec() to handle the extra -states. - -Tested-by: Mark Rutland -Signed-off-by: Thomas Gleixner -Signed-off-by: Peter Zijlstra (Intel) -Reviewed-by: Steven Rostedt (VMware) -Cc: Greg Kroah-Hartman -Cc: Linus Torvalds -Cc: Peter Zijlstra -Link: http://lkml.kernel.org/r/20170516184736.027534895@linutronix.de -Signed-off-by: Ingo Molnar ---- - kernel/printk/printk.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -1176,7 +1176,7 @@ static void boot_delay_msec(int level) - unsigned long long k; - unsigned long timeout; - -- if ((boot_delay == 0 || system_state != SYSTEM_BOOTING) -+ if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING) - || suppress_message_printing(level)) { - return; - } diff --git a/debian/patches/features/all/rt/0005-tracing-Give-event-triggers-access-to-ring_buffer_ev.patch b/debian/patches/features/all/rt/0014-tracing-Give-event-triggers-access-to-ring_buffer_ev.patch similarity index 91% rename from debian/patches/features/all/rt/0005-tracing-Give-event-triggers-access-to-ring_buffer_ev.patch rename to debian/patches/features/all/rt/0014-tracing-Give-event-triggers-access-to-ring_buffer_ev.patch index 916462ab1..73db74f05 100644 --- a/debian/patches/features/all/rt/0005-tracing-Give-event-triggers-access-to-ring_buffer_ev.patch +++ b/debian/patches/features/all/rt/0014-tracing-Give-event-triggers-access-to-ring_buffer_ev.patch @@ -1,8 +1,8 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:06 -0500 -Subject: [PATCH 05/32] tracing: Give event triggers access to +Date: Fri, 22 Sep 2017 14:59:45 -0500 +Subject: [PATCH 14/42] tracing: Give event triggers access to ring_buffer_event -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The ring_buffer event can provide a timestamp that may be useful to various triggers - pass it into the handlers for that purpose. @@ -18,7 +18,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h -@@ -400,11 +400,13 @@ enum event_trigger_type { +@@ -402,11 +402,13 @@ enum event_trigger_type { extern int filter_match_preds(struct event_filter *filter, void *rec); @@ -37,7 +37,7 @@ Signed-off-by: Sebastian Andrzej Siewior bool trace_event_ignore_this_pid(struct trace_event_file *trace_file); -@@ -424,7 +426,7 @@ trace_trigger_soft_disabled(struct trace +@@ -426,7 +428,7 @@ trace_trigger_soft_disabled(struct trace if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) { if (eflags & EVENT_FILE_FL_TRIGGER_MODE) @@ -48,7 +48,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (eflags & EVENT_FILE_FL_PID_FILTER) --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h -@@ -1189,7 +1189,7 @@ static inline bool +@@ -1296,7 +1296,7 @@ static inline bool unsigned long eflags = file->flags; if (eflags & EVENT_FILE_FL_TRIGGER_COND) @@ -57,7 +57,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && -@@ -1226,7 +1226,7 @@ event_trigger_unlock_commit(struct trace +@@ -1333,7 +1333,7 @@ event_trigger_unlock_commit(struct trace trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc); if (tt) @@ -66,7 +66,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /** -@@ -1259,7 +1259,7 @@ event_trigger_unlock_commit_regs(struct +@@ -1366,7 +1366,7 @@ event_trigger_unlock_commit_regs(struct irq_flags, pc, regs); if (tt) @@ -75,7 +75,7 @@ Signed-off-by: Sebastian Andrzej Siewior } #define FILTER_PRED_INVALID ((unsigned short)-1) -@@ -1482,7 +1482,8 @@ extern int register_trigger_hist_enable_ +@@ -1591,7 +1591,8 @@ extern int register_trigger_hist_enable_ */ struct event_trigger_ops { void (*func)(struct event_trigger_data *data, @@ -87,7 +87,7 @@ Signed-off-by: Sebastian Andrzej Siewior void (*free)(struct event_trigger_ops *ops, --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c -@@ -921,7 +921,8 @@ static inline void add_to_key(char *comp +@@ -909,7 +909,8 @@ static inline void add_to_key(char *comp memcpy(compound_key + key_field->offset, key, size); } @@ -97,7 +97,7 @@ Signed-off-by: Sebastian Andrzej Siewior { struct hist_trigger_data *hist_data = data->private_data; bool use_compound_key = (hist_data->n_keys > 1); -@@ -1672,7 +1673,8 @@ static struct event_command trigger_hist +@@ -1660,7 +1661,8 @@ static struct event_command trigger_hist } static void @@ -107,7 +107,7 @@ Signed-off-by: Sebastian Andrzej Siewior { struct enable_trigger_data *enable_data = data->private_data; struct event_trigger_data *test; -@@ -1688,7 +1690,8 @@ hist_enable_trigger(struct event_trigger +@@ -1676,7 +1678,8 @@ hist_enable_trigger(struct event_trigger } static void @@ -117,7 +117,7 @@ Signed-off-by: Sebastian Andrzej Siewior { if (!data->count) return; -@@ -1696,7 +1699,7 @@ hist_enable_count_trigger(struct event_t +@@ -1684,7 +1687,7 @@ hist_enable_count_trigger(struct event_t if (data->count != -1) (data->count)--; diff --git a/debian/patches/features/all/rt/0015-hrtimer-Make-the-remote-enqueue-check-unconditional.patch b/debian/patches/features/all/rt/0015-hrtimer-Make-the-remote-enqueue-check-unconditional.patch new file mode 100644 index 000000000..033656b9b --- /dev/null +++ b/debian/patches/features/all/rt/0015-hrtimer-Make-the-remote-enqueue-check-unconditional.patch @@ -0,0 +1,129 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:39:53 +0200 +Subject: [PATCH 15/36] hrtimer: Make the remote enqueue check unconditional +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +hrtimer_cpu_base.expires_next is used to cache the next event armed in the +timer hardware. The value is used to check whether an hrtimer can be +enqueued remotely. If the new hrtimer is expiring before expires_next, then +remote enqueue is not possible as the remote hrtimer hardware cannot be +accessed for reprogramming to an earlier expiry time. + +The remote enqueue check is currently conditional on +CONFIG_HIGH_RES_TIMERS=y and hrtimer_cpu_base.hres_active. There is no +compelling reason to make this conditional. + +Move hrtimer_cpu_base.expires_next out of the CONFIG_HIGH_RES_TIMERS=y +guarded area and remove the conditionals in hrtimer_check_target(). + +The check is currently a NOOP for the CONFIG_HIGH_RES_TIMERS=n and the +!hrtimer_cpu_base.hres_active case because in these cases nothing updates +hrtimer_cpu_base.expires_next yet. This will be changed with later patches +which further reduce the #ifdef zoo in this code. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/hrtimer.h | 6 +++--- + kernel/time/hrtimer.c | 26 ++++++-------------------- + 2 files changed, 9 insertions(+), 23 deletions(-) + +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -164,13 +164,13 @@ enum hrtimer_base_type { + * @hres_active: State of high resolution mode + * @in_hrtirq: hrtimer_interrupt() is currently executing + * @hang_detected: The last hrtimer interrupt detected a hang +- * @expires_next: absolute time of the next event, is required for remote +- * hrtimer enqueue + * @next_timer: Pointer to the first expiring timer + * @nr_events: Total number of hrtimer interrupt events + * @nr_retries: Total number of hrtimer interrupt retries + * @nr_hangs: Total number of hrtimer interrupt hangs + * @max_hang_time: Maximum time spent in hrtimer_interrupt ++ * @expires_next: absolute time of the next event, is required for remote ++ * hrtimer enqueue + * @clock_base: array of clock bases for this cpu + * + * Note: next_timer is just an optimization for __remove_hrtimer(). +@@ -186,13 +186,13 @@ struct hrtimer_cpu_base { + #ifdef CONFIG_HIGH_RES_TIMERS + unsigned int in_hrtirq : 1, + hang_detected : 1; +- ktime_t expires_next; + struct hrtimer *next_timer; + unsigned int nr_events; + unsigned short nr_retries; + unsigned short nr_hangs; + unsigned int max_hang_time; + #endif ++ ktime_t expires_next; + struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; + } ____cacheline_aligned; + +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -154,26 +154,21 @@ struct hrtimer_clock_base *lock_hrtimer_ + } + + /* +- * With HIGHRES=y we do not migrate the timer when it is expiring +- * before the next event on the target cpu because we cannot reprogram +- * the target cpu hardware and we would cause it to fire late. ++ * We do not migrate the timer when it is expiring before the next ++ * event on the target cpu. When high resolution is enabled, we cannot ++ * reprogram the target cpu hardware and we would cause it to fire ++ * late. To keep it simple, we handle the high resolution enabled and ++ * disabled case similar. + * + * Called with cpu_base->lock of target cpu held. + */ + static int + hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) + { +-#ifdef CONFIG_HIGH_RES_TIMERS + ktime_t expires; + +- if (!new_base->cpu_base->hres_active) +- return 0; +- + expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); + return expires <= new_base->cpu_base->expires_next; +-#else +- return 0; +-#endif + } + + static inline +@@ -657,14 +652,6 @@ static void hrtimer_reprogram(struct hrt + } + + /* +- * Initialize the high resolution related parts of cpu_base +- */ +-static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) +-{ +- base->expires_next = KTIME_MAX; +-} +- +-/* + * Retrigger next event is called after clock was set + * + * Called with interrupts disabled via on_each_cpu() +@@ -729,7 +716,6 @@ static inline int hrtimer_reprogram(stru + { + return 0; + } +-static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } + static inline void retrigger_next_event(void *arg) { } + + #endif /* CONFIG_HIGH_RES_TIMERS */ +@@ -1601,7 +1587,7 @@ int hrtimers_prepare_cpu(unsigned int cp + + cpu_base->cpu = cpu; + cpu_base->hres_active = 0; +- hrtimer_init_hres(cpu_base); ++ cpu_base->expires_next = KTIME_MAX; + return 0; + } + diff --git a/debian/patches/features/all/rt/0015-mm-vmscan-Adjust-system_state-checks.patch b/debian/patches/features/all/rt/0015-mm-vmscan-Adjust-system_state-checks.patch deleted file mode 100644 index 0bab3291f..000000000 --- a/debian/patches/features/all/rt/0015-mm-vmscan-Adjust-system_state-checks.patch +++ /dev/null @@ -1,39 +0,0 @@ -From: Thomas Gleixner -Date: Tue, 16 May 2017 20:42:46 +0200 -Subject: [PATCH 15/17] mm/vmscan: Adjust system_state checks -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -To enable smp_processor_id() and might_sleep() debug checks earlier, it's -required to add system states between SYSTEM_BOOTING and SYSTEM_RUNNING. - -Adjust the system_state check in kswapd_run() to handle the extra states. - -Tested-by: Mark Rutland -Signed-off-by: Thomas Gleixner -Signed-off-by: Peter Zijlstra (Intel) -Reviewed-by: Steven Rostedt (VMware) -Acked-by: Vlastimil Babka -Cc: Andrew Morton -Cc: Greg Kroah-Hartman -Cc: Johannes Weiner -Cc: Linus Torvalds -Cc: Mel Gorman -Cc: Michal Hocko -Cc: Peter Zijlstra -Link: http://lkml.kernel.org/r/20170516184736.119158930@linutronix.de -Signed-off-by: Ingo Molnar ---- - mm/vmscan.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/mm/vmscan.c -+++ b/mm/vmscan.c -@@ -3654,7 +3654,7 @@ int kswapd_run(int nid) - pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid); - if (IS_ERR(pgdat->kswapd)) { - /* failure at boot is fatal */ -- BUG_ON(system_state == SYSTEM_BOOTING); -+ BUG_ON(system_state < SYSTEM_RUNNING); - pr_err("Failed to start kswapd on node %d\n", nid); - ret = PTR_ERR(pgdat->kswapd); - pgdat->kswapd = NULL; diff --git a/debian/patches/features/all/rt/0006-tracing-Add-ring-buffer-event-param-to-hist-field-fu.patch b/debian/patches/features/all/rt/0015-tracing-Add-ring-buffer-event-param-to-hist-field-fu.patch similarity index 89% rename from debian/patches/features/all/rt/0006-tracing-Add-ring-buffer-event-param-to-hist-field-fu.patch rename to debian/patches/features/all/rt/0015-tracing-Add-ring-buffer-event-param-to-hist-field-fu.patch index 5d7f2cc04..2ca111e30 100644 --- a/debian/patches/features/all/rt/0006-tracing-Add-ring-buffer-event-param-to-hist-field-fu.patch +++ b/debian/patches/features/all/rt/0015-tracing-Add-ring-buffer-event-param-to-hist-field-fu.patch @@ -1,8 +1,8 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:07 -0500 -Subject: [PATCH 06/32] tracing: Add ring buffer event param to hist field +Date: Fri, 22 Sep 2017 14:59:46 -0500 +Subject: [PATCH 15/42] tracing: Add ring buffer event param to hist field functions -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Some events such as timestamps require access to a ring_buffer_event struct; add a param so that hist field functions can access that. @@ -91,7 +91,7 @@ Signed-off-by: Sebastian Andrzej Siewior { \ type *addr = (type *)(event + hist_field->field->offset); \ \ -@@ -883,8 +892,8 @@ create_hist_data(unsigned int map_bits, +@@ -871,8 +880,8 @@ create_hist_data(unsigned int map_bits, } static void hist_trigger_elt_update(struct hist_trigger_data *hist_data, @@ -102,7 +102,7 @@ Signed-off-by: Sebastian Andrzej Siewior { struct hist_field *hist_field; unsigned int i; -@@ -892,7 +901,7 @@ static void hist_trigger_elt_update(stru +@@ -880,7 +889,7 @@ static void hist_trigger_elt_update(stru for_each_hist_val_field(i, hist_data) { hist_field = hist_data->fields[i]; @@ -111,7 +111,7 @@ Signed-off-by: Sebastian Andrzej Siewior tracing_map_update_sum(elt, i, hist_val); } } -@@ -922,7 +931,7 @@ static inline void add_to_key(char *comp +@@ -910,7 +919,7 @@ static inline void add_to_key(char *comp } static void event_hist_trigger(struct event_trigger_data *data, void *rec, @@ -120,7 +120,7 @@ Signed-off-by: Sebastian Andrzej Siewior { struct hist_trigger_data *hist_data = data->private_data; bool use_compound_key = (hist_data->n_keys > 1); -@@ -951,7 +960,7 @@ static void event_hist_trigger(struct ev +@@ -939,7 +948,7 @@ static void event_hist_trigger(struct ev key = entries; } else { @@ -129,7 +129,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (key_field->flags & HIST_FIELD_FL_STRING) { key = (void *)(unsigned long)field_contents; use_compound_key = true; -@@ -968,7 +977,7 @@ static void event_hist_trigger(struct ev +@@ -956,7 +965,7 @@ static void event_hist_trigger(struct ev elt = tracing_map_insert(hist_data->map, key); if (elt) diff --git a/debian/patches/features/all/rt/0016-hrtimer-Make-hrtimer_cpu_base.next_timer-handling-un.patch b/debian/patches/features/all/rt/0016-hrtimer-Make-hrtimer_cpu_base.next_timer-handling-un.patch new file mode 100644 index 000000000..082e3845b --- /dev/null +++ b/debian/patches/features/all/rt/0016-hrtimer-Make-hrtimer_cpu_base.next_timer-handling-un.patch @@ -0,0 +1,99 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:39:54 +0200 +Subject: [PATCH 16/36] hrtimer: Make hrtimer_cpu_base.next_timer handling + unconditional +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +hrtimer_cpu_base.next_timer stores the pointer to the next expiring timer +in a cpu base. + +This pointer cannot be dereferenced and is solely used to check whether a +hrtimer which is removed is the hrtimer which is the first to expire in the +CPU base. If this is the case, then the timer hardware needs to be +reprogrammed to avoid an extra interrupt for nothing. + +Again, this is conditional functionality, but there is no compelling reason +to make this conditional. As a preparation, hrtimer_cpu_base.next_timer +needs to be available unconditonal. Aside of that the upcoming support for +softirq based hrtimers requires access to this pointer unconditionally. + +Make the update of hrtimer_cpu_base.next_timer unconditional and remove the +ifdef cruft. The impact on CONFIG_HIGH_RES_TIMERS=n && CONFIG_NOHZ=n is +marginal as it's just a store on an already dirtied cacheline. + +No functional change. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/hrtimer.h | 4 ++-- + kernel/time/hrtimer.c | 12 ++---------- + 2 files changed, 4 insertions(+), 12 deletions(-) + +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -164,13 +164,13 @@ enum hrtimer_base_type { + * @hres_active: State of high resolution mode + * @in_hrtirq: hrtimer_interrupt() is currently executing + * @hang_detected: The last hrtimer interrupt detected a hang +- * @next_timer: Pointer to the first expiring timer + * @nr_events: Total number of hrtimer interrupt events + * @nr_retries: Total number of hrtimer interrupt retries + * @nr_hangs: Total number of hrtimer interrupt hangs + * @max_hang_time: Maximum time spent in hrtimer_interrupt + * @expires_next: absolute time of the next event, is required for remote + * hrtimer enqueue ++ * @next_timer: Pointer to the first expiring timer + * @clock_base: array of clock bases for this cpu + * + * Note: next_timer is just an optimization for __remove_hrtimer(). +@@ -186,13 +186,13 @@ struct hrtimer_cpu_base { + #ifdef CONFIG_HIGH_RES_TIMERS + unsigned int in_hrtirq : 1, + hang_detected : 1; +- struct hrtimer *next_timer; + unsigned int nr_events; + unsigned short nr_retries; + unsigned short nr_hangs; + unsigned int max_hang_time; + #endif + ktime_t expires_next; ++ struct hrtimer *next_timer; + struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; + } ____cacheline_aligned; + +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -442,14 +442,6 @@ static inline void debug_deactivate(stru + } + + #if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) +-static inline void hrtimer_update_next_timer(struct hrtimer_cpu_base *cpu_base, +- struct hrtimer *timer) +-{ +-#ifdef CONFIG_HIGH_RES_TIMERS +- cpu_base->next_timer = timer; +-#endif +-} +- + static struct hrtimer_clock_base * + __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) + { +@@ -473,7 +465,7 @@ static ktime_t __hrtimer_get_next_event( + unsigned int active = cpu_base->active_bases; + ktime_t expires, expires_next = KTIME_MAX; + +- hrtimer_update_next_timer(cpu_base, NULL); ++ cpu_base->next_timer = NULL; + for_each_active_base(base, cpu_base, active) { + struct timerqueue_node *next; + struct hrtimer *timer; +@@ -483,7 +475,7 @@ static ktime_t __hrtimer_get_next_event( + expires = ktime_sub(hrtimer_get_expires(timer), base->offset); + if (expires < expires_next) { + expires_next = expires; +- hrtimer_update_next_timer(cpu_base, timer); ++ cpu_base->next_timer = timer; + } + } + /* diff --git a/debian/patches/features/all/rt/0016-init-Introduce-SYSTEM_SCHEDULING-state.patch b/debian/patches/features/all/rt/0016-init-Introduce-SYSTEM_SCHEDULING-state.patch deleted file mode 100644 index 14830306f..000000000 --- a/debian/patches/features/all/rt/0016-init-Introduce-SYSTEM_SCHEDULING-state.patch +++ /dev/null @@ -1,60 +0,0 @@ -From: Thomas Gleixner -Date: Tue, 16 May 2017 20:42:47 +0200 -Subject: [PATCH 16/17] init: Introduce SYSTEM_SCHEDULING state -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -might_sleep() debugging and smp_processor_id() debugging should be active -right after the scheduler starts working. The init task can invoke -smp_processor_id() from preemptible context as it is pinned on the boot cpu -until sched_smp_init() removes the pinning and lets it schedule on all non -isolated cpus. - -Add a new state which allows to enable those checks earlier and add it to -the xen do_poweroff() function. - -No functional change. - -Tested-by: Mark Rutland -Signed-off-by: Thomas Gleixner -Signed-off-by: Peter Zijlstra (Intel) -Reviewed-by: Boris Ostrovsky -Acked-by: Mark Rutland -Cc: Greg Kroah-Hartman -Cc: Juergen Gross -Cc: Linus Torvalds -Cc: Peter Zijlstra -Cc: Steven Rostedt -Link: http://lkml.kernel.org/r/20170516184736.196214622@linutronix.de -Signed-off-by: Ingo Molnar ---- - drivers/xen/manage.c | 1 + - include/linux/kernel.h | 6 +++++- - 2 files changed, 6 insertions(+), 1 deletion(-) - ---- a/drivers/xen/manage.c -+++ b/drivers/xen/manage.c -@@ -190,6 +190,7 @@ static void do_poweroff(void) - { - switch (system_state) { - case SYSTEM_BOOTING: -+ case SYSTEM_SCHEDULING: - orderly_poweroff(true); - break; - case SYSTEM_RUNNING: ---- a/include/linux/kernel.h -+++ b/include/linux/kernel.h -@@ -488,9 +488,13 @@ extern int root_mountflags; - - extern bool early_boot_irqs_disabled; - --/* Values used for system_state */ -+/* -+ * Values used for system_state. Ordering of the states must not be changed -+ * as code checks for <, <=, >, >= STATE. -+ */ - extern enum system_states { - SYSTEM_BOOTING, -+ SYSTEM_SCHEDULING, - SYSTEM_RUNNING, - SYSTEM_HALT, - SYSTEM_POWER_OFF, diff --git a/debian/patches/features/all/rt/0008-tracing-Break-out-hist-trigger-assignment-parsing.patch b/debian/patches/features/all/rt/0016-tracing-Break-out-hist-trigger-assignment-parsing.patch similarity index 67% rename from debian/patches/features/all/rt/0008-tracing-Break-out-hist-trigger-assignment-parsing.patch rename to debian/patches/features/all/rt/0016-tracing-Break-out-hist-trigger-assignment-parsing.patch index ad77a62d0..4316cd4ac 100644 --- a/debian/patches/features/all/rt/0008-tracing-Break-out-hist-trigger-assignment-parsing.patch +++ b/debian/patches/features/all/rt/0016-tracing-Break-out-hist-trigger-assignment-parsing.patch @@ -1,20 +1,21 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:09 -0500 -Subject: [PATCH 08/32] tracing: Break out hist trigger assignment parsing -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Date: Fri, 22 Sep 2017 14:59:47 -0500 +Subject: [PATCH 16/42] tracing: Break out hist trigger assignment parsing +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz This will make it easier to add variables, and makes the parsing code cleaner regardless. Signed-off-by: Tom Zanussi +Signed-off-by: Rajvi Jingar Signed-off-by: Sebastian Andrzej Siewior --- - kernel/trace/trace_events_hist.c | 56 ++++++++++++++++++++++++--------------- - 1 file changed, 35 insertions(+), 21 deletions(-) + kernel/trace/trace_events_hist.c | 72 +++++++++++++++++++++++++++------------ + 1 file changed, 51 insertions(+), 21 deletions(-) --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c -@@ -251,6 +251,35 @@ static void destroy_hist_trigger_attrs(s +@@ -251,6 +251,51 @@ static void destroy_hist_trigger_attrs(s kfree(attrs); } @@ -23,17 +24,33 @@ Signed-off-by: Sebastian Andrzej Siewior + int ret = 0; + + if ((strncmp(str, "key=", strlen("key=")) == 0) || -+ (strncmp(str, "keys=", strlen("keys=")) == 0)) ++ (strncmp(str, "keys=", strlen("keys=")) == 0)) { + attrs->keys_str = kstrdup(str, GFP_KERNEL); -+ else if ((strncmp(str, "val=", strlen("val=")) == 0) || ++ if (!attrs->keys_str) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ } else if ((strncmp(str, "val=", strlen("val=")) == 0) || + (strncmp(str, "vals=", strlen("vals=")) == 0) || -+ (strncmp(str, "values=", strlen("values=")) == 0)) ++ (strncmp(str, "values=", strlen("values=")) == 0)) { + attrs->vals_str = kstrdup(str, GFP_KERNEL); -+ else if (strncmp(str, "sort=", strlen("sort=")) == 0) ++ if (!attrs->vals_str) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ } else if (strncmp(str, "sort=", strlen("sort=")) == 0) { + attrs->sort_key_str = kstrdup(str, GFP_KERNEL); -+ else if (strncmp(str, "name=", strlen("name=")) == 0) ++ if (!attrs->sort_key_str) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ } else if (strncmp(str, "name=", strlen("name=")) == 0) { + attrs->name = kstrdup(str, GFP_KERNEL); -+ else if (strncmp(str, "size=", strlen("size=")) == 0) { ++ if (!attrs->name) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ } else if (strncmp(str, "size=", strlen("size=")) == 0) { + int map_bits = parse_map_size(str); + + if (map_bits < 0) { @@ -50,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str) { struct hist_trigger_attrs *attrs; -@@ -263,33 +292,18 @@ static struct hist_trigger_attrs *parse_ +@@ -263,33 +308,18 @@ static struct hist_trigger_attrs *parse_ while (trigger_str) { char *str = strsep(&trigger_str, ":"); diff --git a/debian/patches/features/all/rt/0017-hrtimer-Make-hrtimer_reprogramm-unconditional.patch b/debian/patches/features/all/rt/0017-hrtimer-Make-hrtimer_reprogramm-unconditional.patch new file mode 100644 index 000000000..647c2619e --- /dev/null +++ b/debian/patches/features/all/rt/0017-hrtimer-Make-hrtimer_reprogramm-unconditional.patch @@ -0,0 +1,187 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:39:55 +0200 +Subject: [PATCH 17/36] hrtimer: Make hrtimer_reprogramm() unconditional +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +hrtimer_reprogram() needs to be available unconditionally for softirq based +hrtimers. Move the function and all required struct members out of the +CONFIG_HIGH_RES_TIMERS #ifdef. + +There is no functional change because hrtimer_reprogram() is only invoked +when hrtimer_cpu_base.hres_active is true. Making it unconditional +increases the text size for the CONFIG_HIGH_RES_TIMERS=n case, but avoids +replication of that code for the upcoming softirq based hrtimers support. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/hrtimer.h | 6 +- + kernel/time/hrtimer.c | 129 +++++++++++++++++++++++------------------------- + 2 files changed, 65 insertions(+), 70 deletions(-) + +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -182,10 +182,10 @@ struct hrtimer_cpu_base { + unsigned int cpu; + unsigned int active_bases; + unsigned int clock_was_set_seq; +- unsigned int hres_active : 1; +-#ifdef CONFIG_HIGH_RES_TIMERS +- unsigned int in_hrtirq : 1, ++ unsigned int hres_active : 1, ++ in_hrtirq : 1, + hang_detected : 1; ++#ifdef CONFIG_HIGH_RES_TIMERS + unsigned int nr_events; + unsigned short nr_retries; + unsigned short nr_hangs; +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -582,68 +582,6 @@ hrtimer_force_reprogram(struct hrtimer_c + } + + /* +- * When a timer is enqueued and expires earlier than the already enqueued +- * timers, we have to check, whether it expires earlier than the timer for +- * which the clock event device was armed. +- * +- * Called with interrupts disabled and base->cpu_base.lock held +- */ +-static void hrtimer_reprogram(struct hrtimer *timer, +- struct hrtimer_clock_base *base) +-{ +- struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); +- ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); +- +- WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); +- +- /* +- * If the timer is not on the current cpu, we cannot reprogram +- * the other cpus clock event device. +- */ +- if (base->cpu_base != cpu_base) +- return; +- +- /* +- * If the hrtimer interrupt is running, then it will +- * reevaluate the clock bases and reprogram the clock event +- * device. The callbacks are always executed in hard interrupt +- * context so we don't need an extra check for a running +- * callback. +- */ +- if (cpu_base->in_hrtirq) +- return; +- +- /* +- * CLOCK_REALTIME timer might be requested with an absolute +- * expiry time which is less than base->offset. Set it to 0. +- */ +- if (expires < 0) +- expires = 0; +- +- if (expires >= cpu_base->expires_next) +- return; +- +- /* Update the pointer to the next expiring timer */ +- cpu_base->next_timer = timer; +- +- /* +- * If a hang was detected in the last timer interrupt then we +- * do not schedule a timer which is earlier than the expiry +- * which we enforced in the hang detection. We want the system +- * to make progress. +- */ +- if (cpu_base->hang_detected) +- return; +- +- /* +- * Program the timer hardware. We enforce the expiry for +- * events which are already in the past. +- */ +- cpu_base->expires_next = expires; +- tick_program_event(expires, 1); +-} +- +-/* + * Retrigger next event is called after clock was set + * + * Called with interrupts disabled via on_each_cpu() +@@ -703,16 +641,73 @@ static inline int hrtimer_is_hres_enable + static inline void hrtimer_switch_to_hres(void) { } + static inline void + hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { } +-static inline int hrtimer_reprogram(struct hrtimer *timer, +- struct hrtimer_clock_base *base) +-{ +- return 0; +-} + static inline void retrigger_next_event(void *arg) { } + + #endif /* CONFIG_HIGH_RES_TIMERS */ + + /* ++ * When a timer is enqueued and expires earlier than the already enqueued ++ * timers, we have to check, whether it expires earlier than the timer for ++ * which the clock event device was armed. ++ * ++ * Called with interrupts disabled and base->cpu_base.lock held ++ */ ++static void hrtimer_reprogram(struct hrtimer *timer, ++ struct hrtimer_clock_base *base) ++{ ++ struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); ++ ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); ++ ++ WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); ++ ++ /* ++ * If the timer is not on the current cpu, we cannot reprogram ++ * the other cpus clock event device. ++ */ ++ if (base->cpu_base != cpu_base) ++ return; ++ ++ /* ++ * If the hrtimer interrupt is running, then it will ++ * reevaluate the clock bases and reprogram the clock event ++ * device. The callbacks are always executed in hard interrupt ++ * context so we don't need an extra check for a running ++ * callback. ++ */ ++ if (cpu_base->in_hrtirq) ++ return; ++ ++ /* ++ * CLOCK_REALTIME timer might be requested with an absolute ++ * expiry time which is less than base->offset. Set it to 0. ++ */ ++ if (expires < 0) ++ expires = 0; ++ ++ if (expires >= cpu_base->expires_next) ++ return; ++ ++ /* Update the pointer to the next expiring timer */ ++ cpu_base->next_timer = timer; ++ ++ /* ++ * If a hang was detected in the last timer interrupt then we ++ * do not schedule a timer which is earlier than the expiry ++ * which we enforced in the hang detection. We want the system ++ * to make progress. ++ */ ++ if (cpu_base->hang_detected) ++ return; ++ ++ /* ++ * Program the timer hardware. We enforce the expiry for ++ * events which are already in the past. ++ */ ++ cpu_base->expires_next = expires; ++ tick_program_event(expires, 1); ++} ++ ++/* + * Clock realtime was set + * + * Change the offset of the realtime clock vs. the monotonic diff --git a/debian/patches/features/all/rt/0017-sched-core-Enable-might_sleep-and-smp_processor_id-c.patch b/debian/patches/features/all/rt/0017-sched-core-Enable-might_sleep-and-smp_processor_id-c.patch deleted file mode 100644 index c1aa4258e..000000000 --- a/debian/patches/features/all/rt/0017-sched-core-Enable-might_sleep-and-smp_processor_id-c.patch +++ /dev/null @@ -1,74 +0,0 @@ -From: Thomas Gleixner -Date: Tue, 16 May 2017 20:42:48 +0200 -Subject: [PATCH 17/17] sched/core: Enable might_sleep() and smp_processor_id() - checks early -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -might_sleep() and smp_processor_id() checks are enabled after the boot -process is done. That hides bugs in the SMP bringup and driver -initialization code. - -Enable it right when the scheduler starts working, i.e. when init task and -kthreadd have been created and right before the idle task enables -preemption. - -Tested-by: Mark Rutland -Signed-off-by: Thomas Gleixner -Signed-off-by: Peter Zijlstra (Intel) -Acked-by: Mark Rutland -Cc: Greg Kroah-Hartman -Cc: Linus Torvalds -Cc: Peter Zijlstra -Cc: Steven Rostedt -Link: http://lkml.kernel.org/r/20170516184736.272225698@linutronix.de -Signed-off-by: Ingo Molnar ---- - init/main.c | 10 ++++++++++ - kernel/sched/core.c | 4 +++- - lib/smp_processor_id.c | 2 +- - 3 files changed, 14 insertions(+), 2 deletions(-) - ---- a/init/main.c -+++ b/init/main.c -@@ -414,6 +414,16 @@ static noinline void __ref rest_init(voi - rcu_read_lock(); - kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns); - rcu_read_unlock(); -+ -+ /* -+ * Enable might_sleep() and smp_processor_id() checks. -+ * They cannot be enabled earlier because with CONFIG_PRREMPT=y -+ * kernel_thread() would trigger might_sleep() splats. With -+ * CONFIG_PREEMPT_VOLUNTARY=y the init task might have scheduled -+ * already, but it's stuck on the kthreadd_done completion. -+ */ -+ system_state = SYSTEM_SCHEDULING; -+ - complete(&kthreadd_done); - - /* ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -6223,8 +6223,10 @@ void ___might_sleep(const char *file, in - - if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && - !is_idle_task(current)) || -- system_state != SYSTEM_RUNNING || oops_in_progress) -+ system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING || -+ oops_in_progress) - return; -+ - if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) - return; - prev_jiffy = jiffies; ---- a/lib/smp_processor_id.c -+++ b/lib/smp_processor_id.c -@@ -28,7 +28,7 @@ notrace static unsigned int check_preemp - /* - * It is valid to assume CPU-locality during early bootup: - */ -- if (system_state != SYSTEM_RUNNING) -+ if (system_state < SYSTEM_SCHEDULING) - goto out; - - /* diff --git a/debian/patches/features/all/rt/0012-tracing-Add-hist-trigger-timestamp-support.patch b/debian/patches/features/all/rt/0017-tracing-Add-hist-trigger-timestamp-support.patch similarity index 80% rename from debian/patches/features/all/rt/0012-tracing-Add-hist-trigger-timestamp-support.patch rename to debian/patches/features/all/rt/0017-tracing-Add-hist-trigger-timestamp-support.patch index aeeb8913e..13fda6e79 100644 --- a/debian/patches/features/all/rt/0012-tracing-Add-hist-trigger-timestamp-support.patch +++ b/debian/patches/features/all/rt/0017-tracing-Add-hist-trigger-timestamp-support.patch @@ -1,7 +1,7 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:13 -0500 -Subject: [PATCH 12/32] tracing: Add hist trigger timestamp support -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Date: Fri, 22 Sep 2017 14:59:48 -0500 +Subject: [PATCH 17/42] tracing: Add hist trigger timestamp support +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Add support for a timestamp event field. This is actually a 'pseudo-' event field in that it behaves like it's part of the event record, but @@ -22,10 +22,11 @@ event rather than an offset. This mode will be enabled if and only if a histogram makes use of the "$common_timestamp" field. Signed-off-by: Tom Zanussi +Signed-off-by: Baohong Liu Signed-off-by: Sebastian Andrzej Siewior --- - kernel/trace/trace_events_hist.c | 90 ++++++++++++++++++++++++++++----------- - 1 file changed, 66 insertions(+), 24 deletions(-) + kernel/trace/trace_events_hist.c | 90 +++++++++++++++++++++++++++++---------- + 1 file changed, 67 insertions(+), 23 deletions(-) --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -43,10 +44,10 @@ Signed-off-by: Sebastian Andrzej Siewior static u64 hist_field_##type(struct hist_field *hist_field, \ void *event, \ @@ -135,6 +141,7 @@ enum hist_field_flags { - HIST_FIELD_FL_SYSCALL = 128, - HIST_FIELD_FL_STACKTRACE = 256, - HIST_FIELD_FL_LOG2 = 512, -+ HIST_FIELD_FL_TIMESTAMP = 1024, + HIST_FIELD_FL_SYSCALL = 1 << 7, + HIST_FIELD_FL_STACKTRACE = 1 << 8, + HIST_FIELD_FL_LOG2 = 1 << 9, ++ HIST_FIELD_FL_TIMESTAMP = 1 << 10, }; struct hist_trigger_attrs { @@ -67,7 +68,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (field_name == NULL) field_name = ""; -@@ -435,6 +445,12 @@ static struct hist_field *create_hist_fi +@@ -440,6 +450,12 @@ static struct hist_field *create_hist_fi goto out; } @@ -80,12 +81,12 @@ Signed-off-by: Sebastian Andrzej Siewior if (WARN_ON_ONCE(!field)) goto out; -@@ -512,10 +528,15 @@ static int create_val_field(struct hist_ +@@ -517,10 +533,15 @@ static int create_val_field(struct hist_ } } - field = trace_find_event_field(file->event_call, field_name); -- if (!field) { +- if (!field || !field->size) { - ret = -EINVAL; - goto out; + if (strcmp(field_name, "$common_timestamp") == 0) { @@ -93,19 +94,19 @@ Signed-off-by: Sebastian Andrzej Siewior + hist_data->enable_timestamps = true; + } else { + field = trace_find_event_field(file->event_call, field_name); -+ if (!field) { ++ if (!field || !field->size) { + ret = -EINVAL; + goto out; + } } hist_data->fields[val_idx] = create_hist_field(field, flags); -@@ -610,16 +631,22 @@ static int create_key_field(struct hist_ +@@ -615,16 +636,22 @@ static int create_key_field(struct hist_ } } - field = trace_find_event_field(file->event_call, field_name); -- if (!field) { +- if (!field || !field->size) { - ret = -EINVAL; - goto out; - } @@ -115,7 +116,7 @@ Signed-off-by: Sebastian Andrzej Siewior + key_size = sizeof(u64); + } else { + field = trace_find_event_field(file->event_call, field_name); -+ if (!field) { ++ if (!field || !field->size) { + ret = -EINVAL; + goto out; + } @@ -132,16 +133,7 @@ Signed-off-by: Sebastian Andrzej Siewior } hist_data->fields[key_idx] = create_hist_field(field, flags); -@@ -756,7 +783,7 @@ static int create_sort_keys(struct hist_ - break; - } - -- if (strcmp(field_name, "hitcount") == 0) { -+ if ((strcmp(field_name, "hitcount") == 0)) { - descending = is_descending(field_str); - if (descending < 0) { - ret = descending; -@@ -816,6 +843,9 @@ static int create_tracing_map_fields(str +@@ -820,6 +847,9 @@ static int create_tracing_map_fields(str if (hist_field->flags & HIST_FIELD_FL_STACKTRACE) cmp_fn = tracing_map_cmp_none; @@ -151,7 +143,7 @@ Signed-off-by: Sebastian Andrzej Siewior else if (is_string_field(field)) cmp_fn = tracing_map_cmp_string; else -@@ -1213,7 +1243,11 @@ static void hist_field_print(struct seq_ +@@ -1217,7 +1247,11 @@ static void hist_field_print(struct seq_ { const char *field_name = hist_field_name(hist_field, 0); @@ -164,7 +156,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (hist_field->flags) { const char *flags_str = get_hist_field_flags(hist_field); -@@ -1264,27 +1298,25 @@ static int event_hist_trigger_print(stru +@@ -1268,27 +1302,25 @@ static int event_hist_trigger_print(stru for (i = 0; i < hist_data->n_sort_keys; i++) { struct tracing_map_sort_key *sort_key; @@ -199,7 +191,7 @@ Signed-off-by: Sebastian Andrzej Siewior seq_printf(m, ":size=%u", (1 << hist_data->map->map_bits)); if (data->filter_str) -@@ -1452,6 +1484,10 @@ static bool hist_trigger_match(struct ev +@@ -1456,6 +1488,10 @@ static bool hist_trigger_match(struct ev return false; if (key_field->offset != key_field_test->offset) return false; @@ -210,7 +202,7 @@ Signed-off-by: Sebastian Andrzej Siewior } for (i = 0; i < hist_data->n_sort_keys; i++) { -@@ -1534,6 +1570,9 @@ static int hist_register_trigger(char *g +@@ -1538,6 +1574,9 @@ static int hist_register_trigger(char *g update_cond_flag(file); @@ -220,13 +212,15 @@ Signed-off-by: Sebastian Andrzej Siewior if (trace_event_trigger_enable_disable(file, 1) < 0) { list_del_rcu(&data->list); update_cond_flag(file); -@@ -1568,6 +1607,9 @@ static void hist_unregister_trigger(char +@@ -1572,6 +1611,11 @@ static void hist_unregister_trigger(char if (unregistered && test->ops->free) test->ops->free(test->ops, test); + -+ if (hist_data->enable_timestamps) -+ tracing_set_time_stamp_abs(file->tr, false); ++ if (hist_data->enable_timestamps) { ++ if (unregistered) ++ tracing_set_time_stamp_abs(file->tr, false); ++ } } static void hist_unreg_all(struct trace_event_file *file) diff --git a/debian/patches/features/all/rt/0018-hrtimer-Reduce-conditional-code-and-make-hrtimer_for.patch b/debian/patches/features/all/rt/0018-hrtimer-Reduce-conditional-code-and-make-hrtimer_for.patch new file mode 100644 index 000000000..4549f5afe --- /dev/null +++ b/debian/patches/features/all/rt/0018-hrtimer-Reduce-conditional-code-and-make-hrtimer_for.patch @@ -0,0 +1,105 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:39:56 +0200 +Subject: [PATCH 18/36] hrtimer: Reduce conditional code and make + hrtimer_force_reprogramm() unconditional +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +hrtimer_force_reprogram() needs to be available unconditionally for softirq +based hrtimers. Move the function and all required struct members out of +the CONFIG_HIGH_RES_TIMERS #ifdef. + +There is no functional change because hrtimer_force_reprogram() is +only invoked when hrtimer_cpu_base.hres_active is true and +CONFIG_HIGH_RES_TIMERS=y. Making it unconditional increases the text +size for the CONFIG_HIGH_RES_TIMERS=n case slightly, but avoids +replication of that code for the upcoming softirq based hrtimers +support. Most of the code gets eliminated in the +CONFIG_HIGH_RES_TIMERS=n case by the compiler. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/time/hrtimer.c | 58 ++++++++++++++++++++++++-------------------------- + 1 file changed, 28 insertions(+), 30 deletions(-) + +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -513,34 +513,6 @@ static inline int hrtimer_hres_active(vo + return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases)); + } + +-/* High resolution timer related functions */ +-#ifdef CONFIG_HIGH_RES_TIMERS +- +-/* +- * High resolution timer enabled ? +- */ +-static bool hrtimer_hres_enabled __read_mostly = true; +-unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC; +-EXPORT_SYMBOL_GPL(hrtimer_resolution); +- +-/* +- * Enable / Disable high resolution mode +- */ +-static int __init setup_hrtimer_hres(char *str) +-{ +- return (kstrtobool(str, &hrtimer_hres_enabled) == 0); +-} +- +-__setup("highres=", setup_hrtimer_hres); +- +-/* +- * hrtimer_high_res_enabled - query, if the highres mode is enabled +- */ +-static inline int hrtimer_is_hres_enabled(void) +-{ +- return hrtimer_hres_enabled; +-} +- + /* + * Reprogram the event source with checking both queues for the + * next event +@@ -581,6 +553,34 @@ hrtimer_force_reprogram(struct hrtimer_c + tick_program_event(cpu_base->expires_next, 1); + } + ++/* High resolution timer related functions */ ++#ifdef CONFIG_HIGH_RES_TIMERS ++ ++/* ++ * High resolution timer enabled ? ++ */ ++static bool hrtimer_hres_enabled __read_mostly = true; ++unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC; ++EXPORT_SYMBOL_GPL(hrtimer_resolution); ++ ++/* ++ * Enable / Disable high resolution mode ++ */ ++static int __init setup_hrtimer_hres(char *str) ++{ ++ return (kstrtobool(str, &hrtimer_hres_enabled) == 0); ++} ++ ++__setup("highres=", setup_hrtimer_hres); ++ ++/* ++ * hrtimer_high_res_enabled - query, if the highres mode is enabled ++ */ ++static inline int hrtimer_is_hres_enabled(void) ++{ ++ return hrtimer_hres_enabled; ++} ++ + /* + * Retrigger next event is called after clock was set + * +@@ -639,8 +639,6 @@ void clock_was_set_delayed(void) + + static inline int hrtimer_is_hres_enabled(void) { return 0; } + static inline void hrtimer_switch_to_hres(void) { } +-static inline void +-hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { } + static inline void retrigger_next_event(void *arg) { } + + #endif /* CONFIG_HIGH_RES_TIMERS */ diff --git a/debian/patches/features/all/rt/0013-tracing-Add-per-element-variable-support-to-tracing_.patch b/debian/patches/features/all/rt/0018-tracing-Add-per-element-variable-support-to-tracing_.patch similarity index 86% rename from debian/patches/features/all/rt/0013-tracing-Add-per-element-variable-support-to-tracing_.patch rename to debian/patches/features/all/rt/0018-tracing-Add-per-element-variable-support-to-tracing_.patch index 56f0f7b48..44c3822b0 100644 --- a/debian/patches/features/all/rt/0013-tracing-Add-per-element-variable-support-to-tracing_.patch +++ b/debian/patches/features/all/rt/0018-tracing-Add-per-element-variable-support-to-tracing_.patch @@ -1,8 +1,8 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:14 -0500 -Subject: [PATCH 13/32] tracing: Add per-element variable support to +Date: Fri, 22 Sep 2017 14:59:49 -0500 +Subject: [PATCH 18/42] tracing: Add per-element variable support to tracing_map -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz In order to allow information to be passed between trace events, add support for per-element variables to tracing_map. This provides a @@ -21,9 +21,9 @@ important for event-matching uses. Signed-off-by: Tom Zanussi Signed-off-by: Sebastian Andrzej Siewior --- - kernel/trace/tracing_map.c | 113 +++++++++++++++++++++++++++++++++++++++++++++ + kernel/trace/tracing_map.c | 108 +++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/tracing_map.h | 11 ++++ - 2 files changed, 124 insertions(+) + 2 files changed, 119 insertions(+) --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -130,7 +130,7 @@ Signed-off-by: Sebastian Andrzej Siewior * tracing_map_add_key_field - Add a field describing a tracing_map key * @map: The tracing_map * @offset: The offset within the key -@@ -277,6 +366,11 @@ static void tracing_map_elt_clear(struct +@@ -280,6 +369,11 @@ static void tracing_map_elt_clear(struct if (elt->fields[i].cmp_fn == tracing_map_cmp_atomic64) atomic64_set(&elt->fields[i].sum, 0); @@ -142,7 +142,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (elt->map->ops && elt->map->ops->elt_clear) elt->map->ops->elt_clear(elt); } -@@ -303,6 +397,8 @@ static void tracing_map_elt_free(struct +@@ -306,6 +400,8 @@ static void tracing_map_elt_free(struct if (elt->map->ops && elt->map->ops->elt_free) elt->map->ops->elt_free(elt); kfree(elt->fields); @@ -151,10 +151,11 @@ Signed-off-by: Sebastian Andrzej Siewior kfree(elt->key); kfree(elt); } -@@ -330,6 +426,18 @@ static struct tracing_map_elt *tracing_m +@@ -332,6 +428,18 @@ static struct tracing_map_elt *tracing_m + err = -ENOMEM; goto free; } - ++ + elt->vars = kcalloc(map->n_vars, sizeof(*elt->vars), GFP_KERNEL); + if (!elt->vars) { + err = -ENOMEM; @@ -166,25 +167,12 @@ Signed-off-by: Sebastian Andrzej Siewior + err = -ENOMEM; + goto free; + } -+ + tracing_map_elt_init_fields(elt); - if (map->ops && map->ops->elt_alloc) { -@@ -833,6 +941,11 @@ static struct tracing_map_elt *copy_elt( - dup_elt->fields[i].cmp_fn = elt->fields[i].cmp_fn; - } - -+ for (i = 0; i < elt->map->n_vars; i++) { -+ atomic64_set(&dup_elt->vars[i], atomic64_read(&elt->vars[i])); -+ dup_elt->var_set[i] = elt->var_set[i]; -+ } -+ - return dup_elt; - } - --- a/kernel/trace/tracing_map.h +++ b/kernel/trace/tracing_map.h -@@ -9,6 +9,7 @@ +@@ -10,6 +10,7 @@ #define TRACING_MAP_VALS_MAX 3 #define TRACING_MAP_FIELDS_MAX (TRACING_MAP_KEYS_MAX + \ TRACING_MAP_VALS_MAX) @@ -192,7 +180,7 @@ Signed-off-by: Sebastian Andrzej Siewior #define TRACING_MAP_SORT_KEYS_MAX 2 typedef int (*tracing_map_cmp_fn_t) (void *val_a, void *val_b); -@@ -136,6 +137,8 @@ struct tracing_map_field { +@@ -137,6 +138,8 @@ struct tracing_map_field { struct tracing_map_elt { struct tracing_map *map; struct tracing_map_field *fields; @@ -201,7 +189,7 @@ Signed-off-by: Sebastian Andrzej Siewior void *key; void *private_data; }; -@@ -191,6 +194,7 @@ struct tracing_map { +@@ -192,6 +195,7 @@ struct tracing_map { int key_idx[TRACING_MAP_KEYS_MAX]; unsigned int n_keys; struct tracing_map_sort_key sort_key; @@ -209,7 +197,7 @@ Signed-off-by: Sebastian Andrzej Siewior atomic64_t hits; atomic64_t drops; }; -@@ -247,6 +251,7 @@ tracing_map_create(unsigned int map_bits +@@ -241,6 +245,7 @@ tracing_map_create(unsigned int map_bits extern int tracing_map_init(struct tracing_map *map); extern int tracing_map_add_sum_field(struct tracing_map *map); @@ -217,7 +205,7 @@ Signed-off-by: Sebastian Andrzej Siewior extern int tracing_map_add_key_field(struct tracing_map *map, unsigned int offset, tracing_map_cmp_fn_t cmp_fn); -@@ -266,7 +271,13 @@ extern int tracing_map_cmp_none(void *va +@@ -260,7 +265,13 @@ extern int tracing_map_cmp_none(void *va extern void tracing_map_update_sum(struct tracing_map_elt *elt, unsigned int i, u64 n); diff --git a/debian/patches/features/all/rt/0019-hrtimer-Unify-handling-of-hrtimer-remove.patch b/debian/patches/features/all/rt/0019-hrtimer-Unify-handling-of-hrtimer-remove.patch new file mode 100644 index 000000000..4fb577446 --- /dev/null +++ b/debian/patches/features/all/rt/0019-hrtimer-Unify-handling-of-hrtimer-remove.patch @@ -0,0 +1,89 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:39:57 +0200 +Subject: [PATCH 19/36] hrtimer: Unify handling of hrtimer remove +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +When the first hrtimer on the current CPU is removed, +hrtimer_force_reprogram() is invoked but only when +CONFIG_HIGH_RES_TIMERS=y and hrtimer_cpu_base.hres_active is set. + +hrtimer_force_reprogram() updates hrtimer_cpu_base.expires_next and +reprograms the clock event device. When CONFIG_HIGH_RES_TIMERS=y and +hrtimer_cpu_base.hres_active is set, a pointless hrtimer interrupt can be +prevented. + +hrtimer_check_target() makes the 'can remote enqueue' decision. As soon as +hrtimer_check_target() is unconditionally available and +hrtimer_cpu_base.expires_next is updated by hrtimer_reprogram(), +hrtimer_force_reprogram() needs to be available unconditionally as well to +prevent the following scenario with CONFIG_HIGH_RES_TIMERS=n: + +- the first hrtimer on this CPU is removed and hrtimer_force_reprogram() is + not executed + +- CPU goes idle (next timer is calculated and hrtimers are taken into + account) + +- a hrtimer is enqueued remote on the idle CPU: hrtimer_check_target() + compares expiry value and hrtimer_cpu_base.expires_next. The expiry value + is after expires_next, so the hrtimer is enqueued. This timer will fire + late, if it expires before the effective first hrtimer on this CPU and + the comparison was with an outdated expires_next value. + +To prevent this scenario, make hrtimer_force_reprogram() unconditional +except the effective reprogramming part, which gets eliminated by the +compiler in the CONFIG_HIGH_RES_TIMERS=n case. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/time/hrtimer.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -523,9 +523,6 @@ hrtimer_force_reprogram(struct hrtimer_c + { + ktime_t expires_next; + +- if (!__hrtimer_hres_active(cpu_base)) +- return; +- + expires_next = __hrtimer_get_next_event(cpu_base); + + if (skip_equal && expires_next == cpu_base->expires_next) +@@ -534,6 +531,9 @@ hrtimer_force_reprogram(struct hrtimer_c + cpu_base->expires_next = expires_next; + + /* ++ * If hres is not active, hardware does not have to be ++ * reprogrammed yet. ++ * + * If a hang was detected in the last timer interrupt then we + * leave the hang delay active in the hardware. We want the + * system to make progress. That also prevents the following +@@ -547,7 +547,7 @@ hrtimer_force_reprogram(struct hrtimer_c + * set. So we'd effectivly block all timers until the T2 event + * fires. + */ +- if (cpu_base->hang_detected) ++ if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) + return; + + tick_program_event(cpu_base->expires_next, 1); +@@ -848,7 +848,6 @@ static void __remove_hrtimer(struct hrti + if (!timerqueue_del(&base->active, &timer->node)) + cpu_base->active_bases &= ~(1 << base->index); + +-#ifdef CONFIG_HIGH_RES_TIMERS + /* + * Note: If reprogram is false we do not update + * cpu_base->next_timer. This happens when we remove the first +@@ -859,7 +858,6 @@ static void __remove_hrtimer(struct hrti + */ + if (reprogram && timer == cpu_base->next_timer) + hrtimer_force_reprogram(cpu_base, 1); +-#endif + } + + /* diff --git a/debian/patches/features/all/rt/0014-tracing-Add-hist_data-member-to-hist_field.patch b/debian/patches/features/all/rt/0019-tracing-Add-hist_data-member-to-hist_field.patch similarity index 80% rename from debian/patches/features/all/rt/0014-tracing-Add-hist_data-member-to-hist_field.patch rename to debian/patches/features/all/rt/0019-tracing-Add-hist_data-member-to-hist_field.patch index 42a460d12..e8194ca69 100644 --- a/debian/patches/features/all/rt/0014-tracing-Add-hist_data-member-to-hist_field.patch +++ b/debian/patches/features/all/rt/0019-tracing-Add-hist_data-member-to-hist_field.patch @@ -1,7 +1,7 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:15 -0500 -Subject: [PATCH 14/32] tracing: Add hist_data member to hist_field -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Date: Fri, 22 Sep 2017 14:59:50 -0500 +Subject: [PATCH 19/42] tracing: Add hist_data member to hist_field +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Allow hist_data access via hist_field. Some users of hist_fields require or will require more access to the associated hist_data. @@ -22,7 +22,7 @@ Signed-off-by: Sebastian Andrzej Siewior }; static u64 hist_field_none(struct hist_field *field, void *event, -@@ -415,7 +416,8 @@ static void destroy_hist_field(struct hi +@@ -420,7 +421,8 @@ static void destroy_hist_field(struct hi kfree(hist_field); } @@ -32,7 +32,7 @@ Signed-off-by: Sebastian Andrzej Siewior unsigned long flags) { struct hist_field *hist_field; -@@ -427,6 +429,8 @@ static struct hist_field *create_hist_fi +@@ -432,6 +434,8 @@ static struct hist_field *create_hist_fi if (!hist_field) return NULL; @@ -41,7 +41,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (flags & HIST_FIELD_FL_HITCOUNT) { hist_field->fn = hist_field_counter; goto out; -@@ -440,7 +444,7 @@ static struct hist_field *create_hist_fi +@@ -445,7 +449,7 @@ static struct hist_field *create_hist_fi if (flags & HIST_FIELD_FL_LOG2) { unsigned long fl = flags & ~HIST_FIELD_FL_LOG2; hist_field->fn = hist_field_log2; @@ -50,7 +50,7 @@ Signed-off-by: Sebastian Andrzej Siewior hist_field->size = hist_field->operands[0]->size; goto out; } -@@ -493,7 +497,7 @@ static void destroy_hist_fields(struct h +@@ -498,7 +502,7 @@ static void destroy_hist_fields(struct h static int create_hitcount_val(struct hist_trigger_data *hist_data) { hist_data->fields[HITCOUNT_IDX] = @@ -59,7 +59,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!hist_data->fields[HITCOUNT_IDX]) return -ENOMEM; -@@ -539,7 +543,7 @@ static int create_val_field(struct hist_ +@@ -544,7 +548,7 @@ static int create_val_field(struct hist_ } } @@ -68,7 +68,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!hist_data->fields[val_idx]) { ret = -ENOMEM; goto out; -@@ -649,7 +653,7 @@ static int create_key_field(struct hist_ +@@ -654,7 +658,7 @@ static int create_key_field(struct hist_ } } diff --git a/debian/patches/features/all/rt/0020-hrtimer-Unify-handling-of-remote-enqueue.patch b/debian/patches/features/all/rt/0020-hrtimer-Unify-handling-of-remote-enqueue.patch new file mode 100644 index 000000000..5d29ae4be --- /dev/null +++ b/debian/patches/features/all/rt/0020-hrtimer-Unify-handling-of-remote-enqueue.patch @@ -0,0 +1,158 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:39:58 +0200 +Subject: [PATCH 20/36] hrtimer: Unify handling of remote enqueue +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +hrtimer_reprogram() is conditionally invoked from hrtimer_start_range_ns() +when hrtimer_cpu_base.hres_active is true. + +In the !hres_active case there is a special condition for the nohz_active +case: + + If the newly enqueued timer expires before the first expiring timer on a + remote CPU then the remote CPU needs to be notified and woken up from a + NOHZ idle sleep to take the new first expiring timer into account. + +Previous changes have already established the prerequisites to make the +remote enqueue behaviour the same whether high resolution mode is active or +not: + + If the to be enqueued timer expires before the first expiring timer on a + remote CPU, then it cannot be enqueued there. + +This was done for the high resolution mode because there is no way to +access the remote CPU timer hardware. The same is true for NOHZ, but was +handled differently by unconditionally enqueuing the timer and waking up +the remote CPU so it can reprogram its timer. Again there is no compelling +reason for this difference. + +hrtimer_check_target(), which makes the 'can remote enqueue' decision is +already unconditional, but not yet functional because nothing updates +hrtimer_cpu_base.expires_next in the !hres_active case. + +To unify this the following changes are required: + + 1) Make the store of the new first expiry time unconditonal in + hrtimer_reprogram() and check __hrtimer_hres_active() before proceeding + to the actual hardware access. This check also lets the compiler + eliminate the rest of the function in case of CONFIG_HIGH_RES_TIMERS=n. + + 2) Invoke hrtimer_reprogram() unconditionally from + hrtimer_start_range_ns() + + 3) Remove the remote wakeup special case for the !high_res && nohz_active + case. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/time/hrtimer.c | 18 ++++++------------ + kernel/time/tick-internal.h | 11 ----------- + kernel/time/timer.c | 15 ++++++++++++++- + 3 files changed, 20 insertions(+), 24 deletions(-) + +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -687,21 +687,24 @@ static void hrtimer_reprogram(struct hrt + + /* Update the pointer to the next expiring timer */ + cpu_base->next_timer = timer; ++ cpu_base->expires_next = expires; + + /* ++ * If hres is not active, hardware does not have to be ++ * programmed yet. ++ * + * If a hang was detected in the last timer interrupt then we + * do not schedule a timer which is earlier than the expiry + * which we enforced in the hang detection. We want the system + * to make progress. + */ +- if (cpu_base->hang_detected) ++ if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) + return; + + /* + * Program the timer hardware. We enforce the expiry for + * events which are already in the past. + */ +- cpu_base->expires_next = expires; + tick_program_event(expires, 1); + } + +@@ -940,16 +943,7 @@ void hrtimer_start_range_ns(struct hrtim + if (!leftmost) + goto unlock; + +- if (!hrtimer_is_hres_active(timer)) { +- /* +- * Kick to reschedule the next tick to handle the new timer +- * on dynticks target. +- */ +- if (is_timers_nohz_active()) +- wake_up_nohz_cpu(new_base->cpu_base->cpu); +- } else { +- hrtimer_reprogram(timer, new_base); +- } ++ hrtimer_reprogram(timer, new_base); + unlock: + unlock_hrtimer_base(timer, &flags); + } +--- a/kernel/time/tick-internal.h ++++ b/kernel/time/tick-internal.h +@@ -151,12 +151,6 @@ static inline void tick_nohz_init(void) + #ifdef CONFIG_NO_HZ_COMMON + extern unsigned long tick_nohz_active; + extern void timers_update_nohz(void); +-extern struct static_key_false timers_nohz_active; +- +-static inline bool is_timers_nohz_active(void) +-{ +- return static_branch_unlikely(&timers_nohz_active); +-} + + #ifdef CONFIG_SMP + extern struct static_key_false timers_migration_enabled; +@@ -164,11 +158,6 @@ extern struct static_key_false timers_mi + #else + static inline void timers_update_nohz(void) { } + #define tick_nohz_active (0) +- +-static inline bool is_timers_nohz_active(void) +-{ +- return false; +-} + #endif + + DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); +--- a/kernel/time/timer.c ++++ b/kernel/time/timer.c +@@ -210,7 +210,7 @@ static DEFINE_PER_CPU(struct timer_base, + + #ifdef CONFIG_NO_HZ_COMMON + +-DEFINE_STATIC_KEY_FALSE(timers_nohz_active); ++static DEFINE_STATIC_KEY_FALSE(timers_nohz_active); + static DEFINE_MUTEX(timer_keys_mutex); + + static void timer_update_keys(struct work_struct *work); +@@ -260,6 +260,19 @@ int timer_migration_handler(struct ctl_t + mutex_unlock(&timer_keys_mutex); + return ret; + } ++ ++static inline bool is_timers_nohz_active(void) ++{ ++ return static_branch_unlikely(&timers_nohz_active); ++} ++ ++#else ++ ++static inline bool is_timers_nohz_active(void) ++{ ++ return false; ++} ++ + #endif /* NO_HZ_COMMON */ + + static unsigned long round_jiffies_common(unsigned long j, int cpu, diff --git a/debian/patches/features/all/rt/0020-tracing-Add-support-for-dynamic-tracepoints.patch b/debian/patches/features/all/rt/0020-tracing-Add-support-for-dynamic-tracepoints.patch deleted file mode 100644 index e3998fab0..000000000 --- a/debian/patches/features/all/rt/0020-tracing-Add-support-for-dynamic-tracepoints.patch +++ /dev/null @@ -1,196 +0,0 @@ -From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:21 -0500 -Subject: [PATCH 20/32] tracing: Add support for dynamic tracepoints -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -The tracepoint infrastructure assumes statically-defined tracepoints -and uses static_keys for tracepoint enablement. In order to define -tracepoints on the fly, we need to have a dynamic counterpart. - -Add a dynamic_tracepoint_probe_register() and a dynamic param onto -tracepoint_probe_unregister() for this purpose. - -Signed-off-by: Tom Zanussi -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/tracepoint.h | 11 +++++++---- - kernel/trace/trace_events.c | 4 ++-- - kernel/tracepoint.c | 42 ++++++++++++++++++++++++++++++------------ - 3 files changed, 39 insertions(+), 18 deletions(-) - ---- a/include/linux/tracepoint.h -+++ b/include/linux/tracepoint.h -@@ -37,9 +37,12 @@ extern int - tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data); - extern int - tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, void *data, -- int prio); -+ int prio, bool dynamic); -+extern int dynamic_tracepoint_probe_register(struct tracepoint *tp, -+ void *probe, void *data); - extern int --tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data); -+tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data, -+ bool dynamic); - extern void - for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv), - void *priv); -@@ -206,13 +209,13 @@ extern void syscall_unregfunc(void); - int prio) \ - { \ - return tracepoint_probe_register_prio(&__tracepoint_##name, \ -- (void *)probe, data, prio); \ -+ (void *)probe, data, prio, false); \ - } \ - static inline int \ - unregister_trace_##name(void (*probe)(data_proto), void *data) \ - { \ - return tracepoint_probe_unregister(&__tracepoint_##name,\ -- (void *)probe, data); \ -+ (void *)probe, data, false); \ - } \ - static inline void \ - check_trace_callback_type_##name(void (*cb)(data_proto)) \ ---- a/kernel/trace/trace_events.c -+++ b/kernel/trace/trace_events.c -@@ -297,7 +297,7 @@ int trace_event_reg(struct trace_event_c - case TRACE_REG_UNREGISTER: - tracepoint_probe_unregister(call->tp, - call->class->probe, -- file); -+ file, false); - return 0; - - #ifdef CONFIG_PERF_EVENTS -@@ -308,7 +308,7 @@ int trace_event_reg(struct trace_event_c - case TRACE_REG_PERF_UNREGISTER: - tracepoint_probe_unregister(call->tp, - call->class->perf_probe, -- call); -+ call, false); - return 0; - case TRACE_REG_PERF_OPEN: - case TRACE_REG_PERF_CLOSE: ---- a/kernel/tracepoint.c -+++ b/kernel/tracepoint.c -@@ -192,12 +192,15 @@ static void *func_remove(struct tracepoi - * Add the probe function to a tracepoint. - */ - static int tracepoint_add_func(struct tracepoint *tp, -- struct tracepoint_func *func, int prio) -+ struct tracepoint_func *func, int prio, -+ bool dynamic) - { - struct tracepoint_func *old, *tp_funcs; - int ret; - -- if (tp->regfunc && !static_key_enabled(&tp->key)) { -+ if (tp->regfunc && -+ ((dynamic && !(atomic_read(&tp->key.enabled) > 0)) || -+ !static_key_enabled(&tp->key))) { - ret = tp->regfunc(); - if (ret < 0) - return ret; -@@ -219,7 +222,9 @@ static int tracepoint_add_func(struct tr - * is used. - */ - rcu_assign_pointer(tp->funcs, tp_funcs); -- if (!static_key_enabled(&tp->key)) -+ if (dynamic && !(atomic_read(&tp->key.enabled) > 0)) -+ atomic_inc(&tp->key.enabled); -+ else if (!dynamic && !static_key_enabled(&tp->key)) - static_key_slow_inc(&tp->key); - release_probes(old); - return 0; -@@ -232,7 +237,7 @@ static int tracepoint_add_func(struct tr - * by preempt_disable around the call site. - */ - static int tracepoint_remove_func(struct tracepoint *tp, -- struct tracepoint_func *func) -+ struct tracepoint_func *func, bool dynamic) - { - struct tracepoint_func *old, *tp_funcs; - -@@ -246,10 +251,14 @@ static int tracepoint_remove_func(struct - - if (!tp_funcs) { - /* Removed last function */ -- if (tp->unregfunc && static_key_enabled(&tp->key)) -+ if (tp->unregfunc && -+ ((dynamic && (atomic_read(&tp->key.enabled) > 0)) || -+ static_key_enabled(&tp->key))) - tp->unregfunc(); - -- if (static_key_enabled(&tp->key)) -+ if (dynamic && (atomic_read(&tp->key.enabled) > 0)) -+ atomic_dec(&tp->key.enabled); -+ else if (!dynamic && static_key_enabled(&tp->key)) - static_key_slow_dec(&tp->key); - } - rcu_assign_pointer(tp->funcs, tp_funcs); -@@ -258,7 +267,7 @@ static int tracepoint_remove_func(struct - } - - /** -- * tracepoint_probe_register - Connect a probe to a tracepoint -+ * tracepoint_probe_register_prio - Connect a probe to a tracepoint - * @tp: tracepoint - * @probe: probe handler - * @data: tracepoint data -@@ -271,7 +280,7 @@ static int tracepoint_remove_func(struct - * within module exit functions. - */ - int tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, -- void *data, int prio) -+ void *data, int prio, bool dynamic) - { - struct tracepoint_func tp_func; - int ret; -@@ -280,7 +289,7 @@ int tracepoint_probe_register_prio(struc - tp_func.func = probe; - tp_func.data = data; - tp_func.prio = prio; -- ret = tracepoint_add_func(tp, &tp_func, prio); -+ ret = tracepoint_add_func(tp, &tp_func, prio, dynamic); - mutex_unlock(&tracepoints_mutex); - return ret; - } -@@ -301,10 +310,18 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_regis - */ - int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data) - { -- return tracepoint_probe_register_prio(tp, probe, data, TRACEPOINT_DEFAULT_PRIO); -+ return tracepoint_probe_register_prio(tp, probe, data, TRACEPOINT_DEFAULT_PRIO, false); - } - EXPORT_SYMBOL_GPL(tracepoint_probe_register); - -+int dynamic_tracepoint_probe_register(struct tracepoint *tp, void *probe, -+ void *data) -+{ -+ return tracepoint_probe_register_prio(tp, probe, data, -+ TRACEPOINT_DEFAULT_PRIO, true); -+} -+EXPORT_SYMBOL_GPL(dynamic_tracepoint_probe_register); -+ - /** - * tracepoint_probe_unregister - Disconnect a probe from a tracepoint - * @tp: tracepoint -@@ -313,7 +330,8 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_regis - * - * Returns 0 if ok, error value on error. - */ --int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data) -+int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data, -+ bool dynamic) - { - struct tracepoint_func tp_func; - int ret; -@@ -321,7 +339,7 @@ int tracepoint_probe_unregister(struct t - mutex_lock(&tracepoints_mutex); - tp_func.func = probe; - tp_func.data = data; -- ret = tracepoint_remove_func(tp, &tp_func); -+ ret = tracepoint_remove_func(tp, &tp_func, dynamic); - mutex_unlock(&tracepoints_mutex); - return ret; - } diff --git a/debian/patches/features/all/rt/0015-tracing-Add-usecs-modifier-for-hist-trigger-timestam.patch b/debian/patches/features/all/rt/0020-tracing-Add-usecs-modifier-for-hist-trigger-timestam.patch similarity index 84% rename from debian/patches/features/all/rt/0015-tracing-Add-usecs-modifier-for-hist-trigger-timestam.patch rename to debian/patches/features/all/rt/0020-tracing-Add-usecs-modifier-for-hist-trigger-timestam.patch index f75ff1fc9..e16df1cb7 100644 --- a/debian/patches/features/all/rt/0015-tracing-Add-usecs-modifier-for-hist-trigger-timestam.patch +++ b/debian/patches/features/all/rt/0020-tracing-Add-usecs-modifier-for-hist-trigger-timestam.patch @@ -1,7 +1,7 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:16 -0500 -Subject: [PATCH 15/32] tracing: Add usecs modifier for hist trigger timestamps -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Date: Fri, 22 Sep 2017 14:59:51 -0500 +Subject: [PATCH 20/42] tracing: Add usecs modifier for hist trigger timestamps +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Appending .usecs onto a common_timestamp field will cause the timestamp value to be in microseconds instead of the default @@ -24,7 +24,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c -@@ -1164,6 +1164,14 @@ static struct { +@@ -1170,6 +1170,14 @@ static struct { ARCH_TRACE_CLOCKS }; @@ -41,7 +41,7 @@ Signed-off-by: Sebastian Andrzej Siewior */ --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h -@@ -280,6 +280,8 @@ extern void trace_array_put(struct trace +@@ -289,6 +289,8 @@ extern void trace_array_put(struct trace extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs); @@ -66,10 +66,10 @@ Signed-off-by: Sebastian Andrzej Siewior static u64 hist_field_##type(struct hist_field *hist_field, \ void *event, \ @@ -143,6 +137,7 @@ enum hist_field_flags { - HIST_FIELD_FL_STACKTRACE = 256, - HIST_FIELD_FL_LOG2 = 512, - HIST_FIELD_FL_TIMESTAMP = 1024, -+ HIST_FIELD_FL_TIMESTAMP_USECS = 2048, + HIST_FIELD_FL_STACKTRACE = 1 << 8, + HIST_FIELD_FL_LOG2 = 1 << 9, + HIST_FIELD_FL_TIMESTAMP = 1 << 10, ++ HIST_FIELD_FL_TIMESTAMP_USECS = 1 << 11, }; struct hist_trigger_attrs { @@ -102,7 +102,7 @@ Signed-off-by: Sebastian Andrzej Siewior static const char *hist_field_name(struct hist_field *field, unsigned int level) { -@@ -629,6 +639,8 @@ static int create_key_field(struct hist_ +@@ -634,6 +644,8 @@ static int create_key_field(struct hist_ flags |= HIST_FIELD_FL_SYSCALL; else if (strcmp(field_str, "log2") == 0) flags |= HIST_FIELD_FL_LOG2; @@ -111,7 +111,7 @@ Signed-off-by: Sebastian Andrzej Siewior else { ret = -EINVAL; goto out; -@@ -638,6 +650,8 @@ static int create_key_field(struct hist_ +@@ -643,6 +655,8 @@ static int create_key_field(struct hist_ if (strcmp(field_name, "$common_timestamp") == 0) { flags |= HIST_FIELD_FL_TIMESTAMP; hist_data->enable_timestamps = true; @@ -120,7 +120,7 @@ Signed-off-by: Sebastian Andrzej Siewior key_size = sizeof(u64); } else { field = trace_find_event_field(file->event_call, field_name); -@@ -1239,6 +1253,8 @@ static const char *get_hist_field_flags( +@@ -1243,6 +1257,8 @@ static const char *get_hist_field_flags( flags_str = "syscall"; else if (hist_field->flags & HIST_FIELD_FL_LOG2) flags_str = "log2"; diff --git a/debian/patches/features/all/rt/0021-hrtimer-Make-remote-enqueue-decision-less-restrictiv.patch b/debian/patches/features/all/rt/0021-hrtimer-Make-remote-enqueue-decision-less-restrictiv.patch new file mode 100644 index 000000000..93b9483ba --- /dev/null +++ b/debian/patches/features/all/rt/0021-hrtimer-Make-remote-enqueue-decision-less-restrictiv.patch @@ -0,0 +1,32 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:39:59 +0200 +Subject: [PATCH 21/36] hrtimer: Make remote enqueue decision less restrictive +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The current decision whether a timer can be queued on a remote CPU checks +for timer->expiry <= remote_cpu_base.expires_next. + +This is too restrictive because a timer with the same expiry time as an +existing timer will be enqueued on right-hand size of the existing timer +inside the rbtree, i.e. behind the first expiring timer. + +So its safe to allow enqueuing timers with the same expiry time as the +first expiring timer on a remote CPU base. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/time/hrtimer.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -168,7 +168,7 @@ hrtimer_check_target(struct hrtimer *tim + ktime_t expires; + + expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); +- return expires <= new_base->cpu_base->expires_next; ++ return expires < new_base->cpu_base->expires_next; + } + + static inline diff --git a/debian/patches/features/all/rt/0016-tracing-Add-variable-support-to-hist-triggers.patch b/debian/patches/features/all/rt/0021-tracing-Add-variable-support-to-hist-triggers.patch similarity index 69% rename from debian/patches/features/all/rt/0016-tracing-Add-variable-support-to-hist-triggers.patch rename to debian/patches/features/all/rt/0021-tracing-Add-variable-support-to-hist-triggers.patch index 0f2487bc1..067aa309a 100644 --- a/debian/patches/features/all/rt/0016-tracing-Add-variable-support-to-hist-triggers.patch +++ b/debian/patches/features/all/rt/0021-tracing-Add-variable-support-to-hist-triggers.patch @@ -1,7 +1,7 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:17 -0500 -Subject: [PATCH 16/32] tracing: Add variable support to hist triggers -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Date: Fri, 22 Sep 2017 14:59:52 -0500 +Subject: [PATCH 21/42] tracing: Add variable support to hist triggers +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Add support for saving the value of a current event's event field by assigning it to a variable that can be read by a subsequent event. @@ -12,8 +12,8 @@ to any event field. Both keys and values can be saved and retrieved in this way: - # echo 'hist:keys=next_pid:vals=ts0=common_timestamp ... - # echo 'hist:key=timer_pid=common_pid ...' + # echo 'hist:keys=next_pid:vals=$ts0:ts0=common_timestamp ... + # echo 'hist:timer_pid=common_pid:key=$timer_pid ...' If a variable isn't a key variable or prefixed with 'vals=', the associated event field will be saved in a variable but won't be summed @@ -23,21 +23,22 @@ as a value: Multiple variables can be assigned at the same time: - # echo 'hist:keys=pid:vals=ts0=common_timestamp,b=field1,field2 ... + # echo 'hist:keys=pid:vals=$ts0,$b,field2:ts0=common_timestamp,b=field1 ... Multiple (or single) variables can also be assigned at the same time using separate assignments: - # echo 'hist:keys=pid:vals=ts0=common_timestamp:b=field1:c=field2 ... + # echo 'hist:keys=pid:vals=$ts0:ts0=common_timestamp:b=field1:c=field2 ... Variables set as above can be used by being referenced from another event, as described in a subsequent patch. Signed-off-by: Tom Zanussi +Signed-off-by: Baohong Liu Signed-off-by: Sebastian Andrzej Siewior --- - kernel/trace/trace_events_hist.c | 299 ++++++++++++++++++++++++++++++++++----- - 1 file changed, 264 insertions(+), 35 deletions(-) + kernel/trace/trace_events_hist.c | 374 ++++++++++++++++++++++++++++++++++----- + 1 file changed, 334 insertions(+), 40 deletions(-) --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -63,22 +64,30 @@ Signed-off-by: Sebastian Andrzej Siewior }; static u64 hist_field_none(struct hist_field *field, void *event, -@@ -138,6 +146,8 @@ enum hist_field_flags { - HIST_FIELD_FL_LOG2 = 512, - HIST_FIELD_FL_TIMESTAMP = 1024, - HIST_FIELD_FL_TIMESTAMP_USECS = 2048, -+ HIST_FIELD_FL_VAR = 4096, -+ HIST_FIELD_FL_VAR_ONLY = 8192, +@@ -138,6 +146,14 @@ enum hist_field_flags { + HIST_FIELD_FL_LOG2 = 1 << 9, + HIST_FIELD_FL_TIMESTAMP = 1 << 10, + HIST_FIELD_FL_TIMESTAMP_USECS = 1 << 11, ++ HIST_FIELD_FL_VAR = 1 << 12, ++ HIST_FIELD_FL_VAR_ONLY = 1 << 13, ++}; ++ ++struct var_defs { ++ unsigned int n_vars; ++ char *name[TRACING_MAP_VARS_MAX]; ++ char *expr[TRACING_MAP_VARS_MAX]; }; struct hist_trigger_attrs { -@@ -150,13 +160,18 @@ struct hist_trigger_attrs { +@@ -150,13 +166,20 @@ struct hist_trigger_attrs { bool clear; bool ts_in_usecs; unsigned int map_bits; + + char *assignment_str[TRACING_MAP_VARS_MAX]; + unsigned int n_assignments; ++ ++ struct var_defs var_defs; }; struct hist_trigger_data { @@ -92,7 +101,7 @@ Signed-off-by: Sebastian Andrzej Siewior unsigned int key_size; struct tracing_map_sort_key sort_keys[TRACING_MAP_SORT_KEYS_MAX]; unsigned int n_sort_keys; -@@ -164,6 +179,7 @@ struct hist_trigger_data { +@@ -164,6 +187,7 @@ struct hist_trigger_data { struct hist_trigger_attrs *attrs; struct tracing_map *map; bool enable_timestamps; @@ -100,7 +109,7 @@ Signed-off-by: Sebastian Andrzej Siewior }; static u64 hist_field_timestamp(struct hist_field *hist_field, void *event, -@@ -262,9 +278,14 @@ static int parse_map_size(char *str) +@@ -262,9 +286,14 @@ static int parse_map_size(char *str) static void destroy_hist_trigger_attrs(struct hist_trigger_attrs *attrs) { @@ -115,7 +124,7 @@ Signed-off-by: Sebastian Andrzej Siewior kfree(attrs->name); kfree(attrs->sort_key_str); kfree(attrs->keys_str); -@@ -295,8 +316,22 @@ static int parse_assignment(char *str, s +@@ -311,8 +340,22 @@ static int parse_assignment(char *str, s goto out; } attrs->map_bits = map_bits; @@ -140,9 +149,9 @@ Signed-off-by: Sebastian Andrzej Siewior out: return ret; } -@@ -423,12 +458,15 @@ static void destroy_hist_field(struct hi +@@ -428,12 +471,15 @@ static void destroy_hist_field(struct hi for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) - destroy_hist_field(hist_field->operands[i], ++level); + destroy_hist_field(hist_field->operands[i], level + 1); + kfree(hist_field->var.name); + @@ -157,7 +166,7 @@ Signed-off-by: Sebastian Andrzej Siewior { struct hist_field *hist_field; -@@ -454,7 +492,7 @@ static struct hist_field *create_hist_fi +@@ -459,7 +505,7 @@ static struct hist_field *create_hist_fi if (flags & HIST_FIELD_FL_LOG2) { unsigned long fl = flags & ~HIST_FIELD_FL_LOG2; hist_field->fn = hist_field_log2; @@ -166,7 +175,7 @@ Signed-off-by: Sebastian Andrzej Siewior hist_field->size = hist_field->operands[0]->size; goto out; } -@@ -489,14 +527,23 @@ static struct hist_field *create_hist_fi +@@ -494,14 +540,23 @@ static struct hist_field *create_hist_fi hist_field->field = field; hist_field->flags = flags; @@ -191,7 +200,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (hist_data->fields[i]) { destroy_hist_field(hist_data->fields[i], 0); hist_data->fields[i] = NULL; -@@ -507,11 +554,12 @@ static void destroy_hist_fields(struct h +@@ -512,11 +567,12 @@ static void destroy_hist_fields(struct h static int create_hitcount_val(struct hist_trigger_data *hist_data) { hist_data->fields[HITCOUNT_IDX] = @@ -205,10 +214,14 @@ Signed-off-by: Sebastian Andrzej Siewior if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX)) return -EINVAL; -@@ -519,19 +567,81 @@ static int create_hitcount_val(struct hi +@@ -524,19 +580,53 @@ static int create_hitcount_val(struct hi return 0; } +-static int create_val_field(struct hist_trigger_data *hist_data, +- unsigned int val_idx, +- struct trace_event_file *file, +- char *field_str) +static struct hist_field *find_var_field(struct hist_trigger_data *hist_data, + const char *var_name) +{ @@ -246,51 +259,24 @@ Signed-off-by: Sebastian Andrzej Siewior + return NULL; +} + - static int create_val_field(struct hist_trigger_data *hist_data, - unsigned int val_idx, - struct trace_event_file *file, -- char *field_str) -+ char *field_str, bool var_only) ++static int __create_val_field(struct hist_trigger_data *hist_data, ++ unsigned int val_idx, ++ struct trace_event_file *file, ++ char *var_name, char *field_str, ++ unsigned long flags) { struct ftrace_event_field *field = NULL; -+ char *field_name, *var_name; - unsigned long flags = 0; -- char *field_name; +- unsigned long flags = 0; + char *field_name; int ret = 0; - if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX)) -+ if (WARN_ON(!var_only && val_idx >= TRACING_MAP_VALS_MAX)) - return -EINVAL; - -+ var_name = strsep(&field_str, "="); -+ if (field_str && var_name) { -+ if (find_var(file, var_name) && -+ !hist_data->remove) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ flags |= HIST_FIELD_FL_VAR; -+ hist_data->n_vars++; -+ if (hist_data->n_vars > TRACING_MAP_VARS_MAX) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ if (var_only) -+ flags |= HIST_FIELD_FL_VAR_ONLY; -+ } else if (!var_only && var_name != NULL && field_str == NULL) { -+ field_str = var_name; -+ var_name = NULL; -+ } else { -+ ret = -EINVAL; -+ goto out; -+ } -+ +- return -EINVAL; +- field_name = strsep(&field_str, "."); if (field_str) { if (strcmp(field_str, "hex") == 0) -@@ -553,15 +663,19 @@ static int create_val_field(struct hist_ +@@ -558,25 +648,65 @@ static int create_val_field(struct hist_ } } @@ -312,7 +298,45 @@ Signed-off-by: Sebastian Andrzej Siewior ret = -EINVAL; out: return ret; -@@ -571,7 +685,7 @@ static int create_val_fields(struct hist + } + ++static int create_val_field(struct hist_trigger_data *hist_data, ++ unsigned int val_idx, ++ struct trace_event_file *file, ++ char *field_str) ++{ ++ if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX)) ++ return -EINVAL; ++ ++ return __create_val_field(hist_data, val_idx, file, NULL, field_str, 0); ++} ++ ++static int create_var_field(struct hist_trigger_data *hist_data, ++ unsigned int val_idx, ++ struct trace_event_file *file, ++ char *var_name, char *expr_str) ++{ ++ unsigned long flags = 0; ++ ++ if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX)) ++ return -EINVAL; ++ ++ if (find_var(file, var_name) && !hist_data->remove) { ++ return -EINVAL; ++ } ++ ++ flags |= HIST_FIELD_FL_VAR; ++ hist_data->n_vars++; ++ if (hist_data->n_vars > TRACING_MAP_VARS_MAX) { ++ return -EINVAL; ++ } ++ ++ flags |= HIST_FIELD_FL_VAR_ONLY; ++ ++ return __create_val_field(hist_data, val_idx, file, var_name, expr_str, flags); ++} ++ + static int create_val_fields(struct hist_trigger_data *hist_data, struct trace_event_file *file) { char *fields_str, *field_str; @@ -321,16 +345,15 @@ Signed-off-by: Sebastian Andrzej Siewior int ret; ret = create_hitcount_val(hist_data); -@@ -591,12 +705,15 @@ static int create_val_fields(struct hist +@@ -596,12 +726,15 @@ static int create_val_fields(struct hist field_str = strsep(&fields_str, ","); if (!field_str) break; + if (strcmp(field_str, "hitcount") == 0) continue; -- ret = create_val_field(hist_data, j++, file, field_str); + -+ ret = create_val_field(hist_data, j++, file, field_str, false); + ret = create_val_field(hist_data, j++, file, field_str); if (ret) goto out; } @@ -338,14 +361,13 @@ Signed-off-by: Sebastian Andrzej Siewior if (fields_str && (strcmp(fields_str, "hitcount") != 0)) ret = -EINVAL; out: -@@ -610,18 +727,32 @@ static int create_key_field(struct hist_ +@@ -615,11 +748,12 @@ static int create_key_field(struct hist_ char *field_str) { struct ftrace_event_field *field = NULL; + struct hist_field *hist_field = NULL; unsigned long flags = 0; unsigned int key_size; -+ char *var_name; int ret = 0; - if (WARN_ON(key_idx >= TRACING_MAP_FIELDS_MAX)) @@ -353,35 +375,24 @@ Signed-off-by: Sebastian Andrzej Siewior return -EINVAL; flags |= HIST_FIELD_FL_KEY; - -+ var_name = strsep(&field_str, "="); -+ if (field_str) { -+ if (find_var(file, var_name) && -+ !hist_data->remove) -+ return -EINVAL; -+ flags |= HIST_FIELD_FL_VAR; -+ } else { -+ field_str = var_name; -+ var_name = NULL; -+ } -+ +@@ -627,6 +761,7 @@ static int create_key_field(struct hist_ if (strcmp(field_str, "stacktrace") == 0) { flags |= HIST_FIELD_FL_STACKTRACE; key_size = sizeof(unsigned long) * HIST_STACKTRACE_DEPTH; -+ hist_field = create_hist_field(hist_data, NULL, flags, var_name); ++ hist_field = create_hist_field(hist_data, NULL, flags, NULL); } else { char *field_name = strsep(&field_str, "."); -@@ -667,7 +798,7 @@ static int create_key_field(struct hist_ +@@ -672,7 +807,7 @@ static int create_key_field(struct hist_ } } - hist_data->fields[key_idx] = create_hist_field(hist_data, field, flags); -+ hist_data->fields[key_idx] = create_hist_field(hist_data, field, flags, var_name); ++ hist_data->fields[key_idx] = create_hist_field(hist_data, field, flags, NULL); if (!hist_data->fields[key_idx]) { ret = -ENOMEM; goto out; -@@ -683,6 +814,7 @@ static int create_key_field(struct hist_ +@@ -688,6 +823,7 @@ static int create_key_field(struct hist_ } hist_data->n_keys++; @@ -389,37 +400,99 @@ Signed-off-by: Sebastian Andrzej Siewior if (WARN_ON(hist_data->n_keys > TRACING_MAP_KEYS_MAX)) return -EINVAL; -@@ -726,6 +858,29 @@ static int create_key_fields(struct hist +@@ -731,21 +867,108 @@ static int create_key_fields(struct hist return ret; } +static int create_var_fields(struct hist_trigger_data *hist_data, + struct trace_event_file *file) +{ -+ unsigned int i, j, k = hist_data->n_vals; -+ char *str, *field_str; ++ unsigned int i, j = hist_data->n_vals; ++ int ret = 0; ++ ++ unsigned int n_vars = hist_data->attrs->var_defs.n_vars; ++ ++ for (i = 0; i < n_vars; i++) { ++ char *var_name = hist_data->attrs->var_defs.name[i]; ++ char *expr = hist_data->attrs->var_defs.expr[i]; ++ ++ ret = create_var_field(hist_data, j++, file, var_name, expr); ++ if (ret) ++ goto out; ++ } ++ out: ++ return ret; ++} ++ ++static void free_var_defs(struct hist_trigger_data *hist_data) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < hist_data->attrs->var_defs.n_vars; i++) { ++ kfree(hist_data->attrs->var_defs.name[i]); ++ kfree(hist_data->attrs->var_defs.expr[i]); ++ } ++ ++ hist_data->attrs->var_defs.n_vars = 0; ++} ++ ++static int parse_var_defs(struct hist_trigger_data *hist_data) ++{ ++ char *s, *str, *var_name, *field_str; ++ unsigned int i, j, n_vars = 0; + int ret = 0; + + for (i = 0; i < hist_data->attrs->n_assignments; i++) { + str = hist_data->attrs->assignment_str[i]; -+ + for (j = 0; j < TRACING_MAP_VARS_MAX; j++) { + field_str = strsep(&str, ","); + if (!field_str) + break; -+ ret = create_val_field(hist_data, k++, file, field_str, true); -+ if (ret) -+ goto out; ++ ++ var_name = strsep(&field_str, "="); ++ if (!var_name || !field_str) { ++ ret = -EINVAL; ++ goto free; ++ } ++ ++ s = kstrdup(var_name, GFP_KERNEL); ++ if (!s) { ++ ret = -ENOMEM; ++ goto free; ++ } ++ hist_data->attrs->var_defs.name[n_vars] = s; ++ ++ s = kstrdup(field_str, GFP_KERNEL); ++ if (!s) { ++ ret = -ENOMEM; ++ goto free; ++ } ++ hist_data->attrs->var_defs.expr[n_vars++] = s; ++ ++ hist_data->attrs->var_defs.n_vars = n_vars; ++ ++ if (n_vars == TRACING_MAP_VARS_MAX) ++ goto free; + } + } -+ out: ++ ++ return ret; ++ free: ++ free_var_defs(hist_data); ++ + return ret; +} + static int create_hist_fields(struct hist_trigger_data *hist_data, struct trace_event_file *file) { -@@ -735,11 +890,13 @@ static int create_hist_fields(struct his + int ret; + ++ ret = parse_var_defs(hist_data); ++ if (ret) ++ goto out; ++ + ret = create_val_fields(hist_data, file); if (ret) goto out; @@ -433,9 +506,12 @@ Signed-off-by: Sebastian Andrzej Siewior + if (ret) + goto out; out: ++ free_var_defs(hist_data); ++ return ret; } -@@ -763,7 +920,7 @@ static int create_sort_keys(struct hist_ + +@@ -768,7 +991,7 @@ static int create_sort_keys(struct hist_ char *fields_str = hist_data->attrs->sort_key_str; struct tracing_map_sort_key *sort_key; int descending, ret = 0; @@ -444,13 +520,13 @@ Signed-off-by: Sebastian Andrzej Siewior hist_data->n_sort_keys = 1; /* we always have at least one, hitcount */ -@@ -811,13 +968,21 @@ static int create_sort_keys(struct hist_ +@@ -816,12 +1039,19 @@ static int create_sort_keys(struct hist_ continue; } - for (j = 1; j < hist_data->n_fields; j++) { + for (j = 1, k = 1; j < hist_data->n_fields; j++) { -+ unsigned idx; ++ unsigned int idx; + hist_field = hist_data->fields[j]; + if (hist_field->flags & HIST_FIELD_FL_VAR_ONLY) @@ -459,16 +535,14 @@ Signed-off-by: Sebastian Andrzej Siewior + idx = k++; + test_name = hist_field_name(hist_field, 0); -+ - if (test_name == NULL) - continue; + if (strcmp(field_name, test_name) == 0) { - sort_key->field_idx = j; + sort_key->field_idx = idx; descending = is_descending(field_str); if (descending < 0) { ret = descending; -@@ -832,6 +997,7 @@ static int create_sort_keys(struct hist_ +@@ -836,6 +1066,7 @@ static int create_sort_keys(struct hist_ break; } } @@ -476,7 +550,7 @@ Signed-off-by: Sebastian Andrzej Siewior hist_data->n_sort_keys = i; out: return ret; -@@ -872,12 +1038,19 @@ static int create_tracing_map_fields(str +@@ -876,12 +1107,19 @@ static int create_tracing_map_fields(str idx = tracing_map_add_key_field(map, hist_field->offset, cmp_fn); @@ -498,7 +572,7 @@ Signed-off-by: Sebastian Andrzej Siewior } return 0; -@@ -901,7 +1074,8 @@ static bool need_tracing_map_ops(struct +@@ -905,7 +1143,8 @@ static bool need_tracing_map_ops(struct static struct hist_trigger_data * create_hist_data(unsigned int map_bits, struct hist_trigger_attrs *attrs, @@ -508,7 +582,7 @@ Signed-off-by: Sebastian Andrzej Siewior { const struct tracing_map_ops *map_ops = NULL; struct hist_trigger_data *hist_data; -@@ -912,6 +1086,7 @@ create_hist_data(unsigned int map_bits, +@@ -916,6 +1155,7 @@ create_hist_data(unsigned int map_bits, return ERR_PTR(-ENOMEM); hist_data->attrs = attrs; @@ -516,7 +590,7 @@ Signed-off-by: Sebastian Andrzej Siewior ret = create_hist_fields(hist_data, file); if (ret) -@@ -958,14 +1133,29 @@ static void hist_trigger_elt_update(stru +@@ -962,14 +1202,29 @@ static void hist_trigger_elt_update(stru struct ring_buffer_event *rbe) { struct hist_field *hist_field; @@ -548,7 +622,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static inline void add_to_key(char *compound_key, void *key, -@@ -1140,6 +1330,9 @@ hist_trigger_entry_print(struct seq_file +@@ -1144,6 +1399,9 @@ hist_trigger_entry_print(struct seq_file for (i = 1; i < hist_data->n_vals; i++) { field_name = hist_field_name(hist_data->fields[i], 0); @@ -558,7 +632,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) { seq_printf(m, " %s: %10llx", field_name, tracing_map_read_sum(elt, i)); -@@ -1263,6 +1456,9 @@ static void hist_field_print(struct seq_ +@@ -1267,6 +1525,9 @@ static void hist_field_print(struct seq_ { const char *field_name = hist_field_name(hist_field, 0); @@ -568,7 +642,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP) seq_puts(m, "$common_timestamp"); else if (field_name) -@@ -1281,7 +1477,8 @@ static int event_hist_trigger_print(stru +@@ -1285,7 +1546,8 @@ static int event_hist_trigger_print(stru struct event_trigger_data *data) { struct hist_trigger_data *hist_data = data->private_data; @@ -578,7 +652,7 @@ Signed-off-by: Sebastian Andrzej Siewior unsigned int i; seq_puts(m, "hist:"); -@@ -1292,25 +1489,47 @@ static int event_hist_trigger_print(stru +@@ -1296,25 +1558,47 @@ static int event_hist_trigger_print(stru seq_puts(m, "keys="); for_each_hist_key_field(i, hist_data) { @@ -630,7 +704,7 @@ Signed-off-by: Sebastian Andrzej Siewior } } -@@ -1318,7 +1537,10 @@ static int event_hist_trigger_print(stru +@@ -1322,7 +1606,10 @@ static int event_hist_trigger_print(stru for (i = 0; i < hist_data->n_sort_keys; i++) { struct tracing_map_sort_key *sort_key; @@ -642,7 +716,7 @@ Signed-off-by: Sebastian Andrzej Siewior sort_key = &hist_data->sort_keys[i]; idx = sort_key->field_idx; -@@ -1331,8 +1553,11 @@ static int event_hist_trigger_print(stru +@@ -1335,8 +1622,11 @@ static int event_hist_trigger_print(stru if (idx == HITCOUNT_IDX) seq_puts(m, "hitcount"); @@ -655,7 +729,16 @@ Signed-off-by: Sebastian Andrzej Siewior if (sort_key->descending) seq_puts(m, ".descending"); -@@ -1656,12 +1881,16 @@ static int event_hist_trigger_func(struc +@@ -1633,7 +1923,7 @@ static void hist_unregister_trigger(char + test->ops->free(test->ops, test); + + if (hist_data->enable_timestamps) { +- if (unregistered) ++ if (!hist_data->remove || unregistered) + tracing_set_time_stamp_abs(file->tr, false); + } + } +@@ -1662,12 +1952,16 @@ static int event_hist_trigger_func(struc struct hist_trigger_attrs *attrs; struct event_trigger_ops *trigger_ops; struct hist_trigger_data *hist_data; @@ -672,7 +755,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* separate the trigger from the filter (k:v [if filter]) */ trigger = strsep(¶m, " \t"); if (!trigger) -@@ -1674,7 +1903,7 @@ static int event_hist_trigger_func(struc +@@ -1680,7 +1974,7 @@ static int event_hist_trigger_func(struc if (attrs->map_bits) hist_trigger_bits = attrs->map_bits; @@ -681,7 +764,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (IS_ERR(hist_data)) { destroy_hist_trigger_attrs(attrs); return PTR_ERR(hist_data); -@@ -1703,7 +1932,7 @@ static int event_hist_trigger_func(struc +@@ -1709,7 +2003,7 @@ static int event_hist_trigger_func(struc goto out_free; } diff --git a/debian/patches/features/all/rt/0022-hrtimer-Remove-base-argument-from-hrtimer_reprogram.patch b/debian/patches/features/all/rt/0022-hrtimer-Remove-base-argument-from-hrtimer_reprogram.patch new file mode 100644 index 000000000..5649bd822 --- /dev/null +++ b/debian/patches/features/all/rt/0022-hrtimer-Remove-base-argument-from-hrtimer_reprogram.patch @@ -0,0 +1,50 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:40:00 +0200 +Subject: [PATCH 22/36] hrtimer: Remove base argument from hrtimer_reprogram() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +hrtimer_reprogram() must have access to the hrtimer_clock_base of the new +first expiring timer to access hrtimer_clock_base.offset for adjusting the +expiry time to CLOCK_MONOTONIC. This is required to evaluate whether the +new left most timer in the hrtimer_clock_base is the first expiring timer +of all clock bases in a hrtimer_cpu_base. + +The only user of hrtimer_reprogram() is hrtimer_start_range_ns(), which has +a pointer to hrtimer_clock_base already and hands it in as an argument. But +hrtimer_start_range_ns() will be split for the upcoming support for softirq +based hrtimers to avoid code duplication and will lose the direct access to +the clock base pointer. + +Instead of handing in timer and timer->base as an argument remove the base +argument from hrtimer_reprogram() and retrieve the clock base internally. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/time/hrtimer.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -650,10 +650,10 @@ static inline void retrigger_next_event( + * + * Called with interrupts disabled and base->cpu_base.lock held + */ +-static void hrtimer_reprogram(struct hrtimer *timer, +- struct hrtimer_clock_base *base) ++static void hrtimer_reprogram(struct hrtimer *timer) + { + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); ++ struct hrtimer_clock_base *base = timer->base; + ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); + + WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); +@@ -943,7 +943,7 @@ void hrtimer_start_range_ns(struct hrtim + if (!leftmost) + goto unlock; + +- hrtimer_reprogram(timer, new_base); ++ hrtimer_reprogram(timer); + unlock: + unlock_hrtimer_base(timer, &flags); + } diff --git a/debian/patches/features/all/rt/0017-tracing-Account-for-variables-in-named-trigger-compa.patch b/debian/patches/features/all/rt/0022-tracing-Account-for-variables-in-named-trigger-compa.patch similarity index 58% rename from debian/patches/features/all/rt/0017-tracing-Account-for-variables-in-named-trigger-compa.patch rename to debian/patches/features/all/rt/0022-tracing-Account-for-variables-in-named-trigger-compa.patch index de0ec05cf..5be846cb5 100644 --- a/debian/patches/features/all/rt/0017-tracing-Account-for-variables-in-named-trigger-compa.patch +++ b/debian/patches/features/all/rt/0022-tracing-Account-for-variables-in-named-trigger-compa.patch @@ -1,8 +1,8 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:18 -0500 -Subject: [PATCH 17/32] tracing: Account for variables in named trigger +Date: Fri, 22 Sep 2017 14:59:53 -0500 +Subject: [PATCH 22/42] tracing: Account for variables in named trigger compatibility -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Named triggers must also have the same set of variables in order to be considered compatible - update the trigger match test to account for @@ -14,12 +14,12 @@ are meant to allow one or more events to set the same variable. Signed-off-by: Tom Zanussi Signed-off-by: Sebastian Andrzej Siewior --- - kernel/trace/trace_events_hist.c | 8 +++++++- - 1 file changed, 7 insertions(+), 1 deletion(-) + kernel/trace/trace_events_hist.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c -@@ -1545,7 +1545,7 @@ static int event_hist_trigger_print(stru +@@ -1614,7 +1614,7 @@ static int event_hist_trigger_print(stru sort_key = &hist_data->sort_keys[i]; idx = sort_key->field_idx; @@ -28,15 +28,14 @@ Signed-off-by: Sebastian Andrzej Siewior return -EINVAL; if (i > 0) -@@ -1733,6 +1733,12 @@ static bool hist_trigger_match(struct ev +@@ -1802,6 +1802,11 @@ static bool hist_trigger_match(struct ev return false; if (key_field->is_signed != key_field_test->is_signed) return false; -+ if ((key_field->var.name && !key_field_test->var.name) || -+ (!key_field->var.name && key_field_test->var.name)) ++ if (!!key_field->var.name != !!key_field_test->var.name) + return false; -+ if ((key_field->var.name && key_field_test->var.name) && -+ strcmp(key_field->var.name, key_field_test->var.name) != 0) ++ if (key_field->var.name && ++ strcmp(key_field->var.name, key_field_test->var.name) != 0) + return false; } diff --git a/debian/patches/features/all/rt/0023-hrtimer-Split-hrtimer_start_range_ns.patch b/debian/patches/features/all/rt/0023-hrtimer-Split-hrtimer_start_range_ns.patch new file mode 100644 index 000000000..2c02f137a --- /dev/null +++ b/debian/patches/features/all/rt/0023-hrtimer-Split-hrtimer_start_range_ns.patch @@ -0,0 +1,76 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:40:01 +0200 +Subject: [PATCH 23/36] hrtimer: Split hrtimer_start_range_ns() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Preparatory patch for softirq based hrtimers to avoid code duplication. No +functional change. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/time/hrtimer.c | 44 ++++++++++++++++++++++++-------------------- + 1 file changed, 24 insertions(+), 20 deletions(-) + +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -909,22 +909,11 @@ static inline ktime_t hrtimer_update_low + return tim; + } + +-/** +- * hrtimer_start_range_ns - (re)start an hrtimer +- * @timer: the timer to be added +- * @tim: expiry time +- * @delta_ns: "slack" range for the timer +- * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or +- * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) +- */ +-void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, +- u64 delta_ns, const enum hrtimer_mode mode) ++static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, ++ u64 delta_ns, const enum hrtimer_mode mode, ++ struct hrtimer_clock_base *base) + { +- struct hrtimer_clock_base *base, *new_base; +- unsigned long flags; +- int leftmost; +- +- base = lock_hrtimer_base(timer, &flags); ++ struct hrtimer_clock_base *new_base; + + /* Remove an active timer from the queue: */ + remove_hrtimer(timer, base, true); +@@ -939,12 +928,27 @@ void hrtimer_start_range_ns(struct hrtim + /* Switch the timer base, if necessary: */ + new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); + +- leftmost = enqueue_hrtimer(timer, new_base, mode); +- if (!leftmost) +- goto unlock; ++ return enqueue_hrtimer(timer, new_base, mode); ++} ++/** ++ * hrtimer_start_range_ns - (re)start an hrtimer ++ * @timer: the timer to be added ++ * @tim: expiry time ++ * @delta_ns: "slack" range for the timer ++ * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or ++ * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) ++ */ ++void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, ++ u64 delta_ns, const enum hrtimer_mode mode) ++{ ++ struct hrtimer_clock_base *base; ++ unsigned long flags; ++ ++ base = lock_hrtimer_base(timer, &flags); ++ ++ if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base)) ++ hrtimer_reprogram(timer); + +- hrtimer_reprogram(timer); +-unlock: + unlock_hrtimer_base(timer, &flags); + } + EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); diff --git a/debian/patches/features/all/rt/0023-tracing-Add-onmatch-hist-trigger-action-support.patch b/debian/patches/features/all/rt/0023-tracing-Add-onmatch-hist-trigger-action-support.patch deleted file mode 100644 index d02a67552..000000000 --- a/debian/patches/features/all/rt/0023-tracing-Add-onmatch-hist-trigger-action-support.patch +++ /dev/null @@ -1,1269 +0,0 @@ -From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:24 -0500 -Subject: [PATCH 23/32] tracing: Add 'onmatch' hist trigger action support -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Add an 'onmatch(matching.event).(param list)' -hist trigger action which is invoked with the set of variables or -event fields named in the 'param list'. The result is the generation -of a synthetic event that consists of the values contained in those -variables and/or fields at the time the invoking event was hit. - -As an example the below defines a simple synthetic event using a -variable defined on the sched_wakeup_new event, and shows the event -definition with unresolved fields, since the sched_wakeup_new event -with the testpid variable hasn't been defined yet: - - # echo 'wakeup_new_test pid_t pid; int prio' >> \ - /sys/kernel/debug/tracing/synthetic_events - - # cat /sys/kernel/debug/tracing/synthetic_events - wakeup_new_test pid_t pid; int prio - -The following hist trigger both defines a testpid variable and -specifies an onmatch() trace action that uses that variable along with -a non-variable field to generate a wakeup_new_test synthetic event -whenever a sched_wakeup_new event occurs, which because of the 'if -comm == "cyclictest"' filter only happens when the executable is -cyclictest: - - # echo 'hist:keys=testpid=pid:\ - onmatch(sched.sched_wakeup_new).wakeup_new_test($testpid, prio) \ - if comm=="cyclictest"' >> \ - /sys/kernel/debug/tracing/events/sched/sched_wakeup_new/trigger - -Creating and displaying a histogram based on those events is now just -a matter of using the fields and new synthetic event in the -tracing/events/synthetic directory, as usual: - - # echo 'hist:keys=pid,prio:sort=pid,prio' >> \ - /sys/kernel/debug/tracing/events/synthetic/wakeup_new_test/trigger - -Signed-off-by: Tom Zanussi -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/trace/trace_events_hist.c | 955 ++++++++++++++++++++++++++++++++++++++- - 1 file changed, 940 insertions(+), 15 deletions(-) - ---- a/kernel/trace/trace_events_hist.c -+++ b/kernel/trace/trace_events_hist.c -@@ -59,6 +59,7 @@ struct hist_field { - unsigned int size; - unsigned int offset; - unsigned int is_signed; -+ const char *type; - struct hist_field *operands[HIST_FIELD_OPERANDS_MAX]; - struct hist_trigger_data *hist_data; - struct hist_var var; -@@ -243,6 +244,16 @@ struct hist_trigger_attrs { - unsigned int n_actions; - }; - -+struct field_var { -+ struct hist_field *var; -+ struct hist_field *val; -+}; -+ -+struct field_var_hist { -+ struct hist_trigger_data *hist_data; -+ char *cmd; -+}; -+ - struct hist_trigger_data { - struct hist_field *fields[HIST_FIELDS_MAX]; - unsigned int n_vals; -@@ -263,6 +274,14 @@ struct hist_trigger_data { - - struct action_data *actions[HIST_ACTIONS_MAX]; - unsigned int n_actions; -+ -+ struct hist_field *synth_var_refs[SYNTH_FIELDS_MAX]; -+ unsigned int n_synth_var_refs; -+ struct field_var *field_vars[SYNTH_FIELDS_MAX]; -+ unsigned int n_field_vars; -+ unsigned int n_field_var_str; -+ struct field_var_hist *field_var_hists[SYNTH_FIELDS_MAX]; -+ unsigned int n_field_var_hists; - }; - - struct synth_field { -@@ -291,7 +310,14 @@ typedef void (*action_fn_t) (struct hist - - struct action_data { - action_fn_t fn; -+ unsigned int n_params; -+ char *params[SYNTH_FIELDS_MAX]; -+ - unsigned int var_ref_idx; -+ char *match_event; -+ char *match_event_system; -+ char *synth_event_name; -+ struct synth_event *synth_event; - }; - - static LIST_HEAD(synth_event_list); -@@ -802,6 +828,50 @@ static struct synth_event *alloc_synth_e - return event; - } - -+static void action_trace(struct hist_trigger_data *hist_data, -+ struct tracing_map_elt *elt, void *rec, -+ struct ring_buffer_event *rbe, -+ struct action_data *data, u64 *var_ref_vals) -+{ -+ struct synth_event *event = data->synth_event; -+ -+ trace_synth(event, var_ref_vals, data->var_ref_idx); -+} -+ -+static bool check_hist_action_refs(struct hist_trigger_data *hist_data, -+ struct synth_event *event) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < hist_data->n_actions; i++) { -+ struct action_data *data = hist_data->actions[i]; -+ -+ if (data->fn == action_trace && data->synth_event == event) -+ return true; -+ } -+ -+ return false; -+} -+ -+static LIST_HEAD(hist_action_list); -+static LIST_HEAD(hist_var_list); -+ -+struct hist_var_data { -+ struct list_head list; -+ struct hist_trigger_data *hist_data; -+}; -+ -+static bool check_synth_action_refs(struct synth_event *event) -+{ -+ struct hist_var_data *var_data; -+ -+ list_for_each_entry(var_data, &hist_action_list, list) -+ if (check_hist_action_refs(var_data->hist_data, event)) -+ return true; -+ -+ return false; -+} -+ - static int create_synth_event(int argc, char **argv) - { - struct synth_field *fields[SYNTH_FIELDS_MAX]; -@@ -832,15 +902,17 @@ static int create_synth_event(int argc, - event = find_synth_event(name); - if (event) { - if (delete_event) { -+ if (check_synth_action_refs(event)) { -+ ret = -EBUSY; -+ goto out; -+ } - remove_synth_event(event); - goto err; - } else - ret = -EEXIST; - goto out; -- } else if (delete_event) { -- ret = -EINVAL; -+ } else if (delete_event) - goto out; -- } - - if (argc < 2) { - ret = -EINVAL; -@@ -891,11 +963,18 @@ static int release_all_synth_events(void - - mutex_lock(&synth_event_mutex); - -+ list_for_each_entry(event, &synth_event_list, list) { -+ if (check_synth_action_refs(event)) { -+ ret = -EBUSY; -+ goto out; -+ } -+ } -+ - list_for_each_entry_safe(event, e, &synth_event_list, list) { - remove_synth_event(event); - free_synth_event(event); - } -- -+ out: - mutex_unlock(&synth_event_mutex); - - return ret; -@@ -992,13 +1071,6 @@ static u64 hist_field_timestamp(struct h - return ts; - } - --static LIST_HEAD(hist_var_list); -- --struct hist_var_data { -- struct list_head list; -- struct hist_trigger_data *hist_data; --}; -- - static struct hist_field *check_var_ref(struct hist_field *hist_field, - struct hist_trigger_data *var_data, - unsigned int var_idx) -@@ -1248,6 +1320,7 @@ static struct hist_field *find_event_var - struct hist_elt_data { - char *comm; - u64 *var_ref_vals; -+ char *field_var_str[SYNTH_FIELDS_MAX]; - }; - - static u64 hist_field_var_ref(struct hist_field *hist_field, -@@ -1415,11 +1488,21 @@ static void destroy_hist_trigger_attrs(s - - static int parse_action(char *str, struct hist_trigger_attrs *attrs) - { -- int ret = 0; -+ int ret = -EINVAL; - - if (attrs->n_actions >= HIST_ACTIONS_MAX) - return ret; - -+ if ((strncmp(str, "onmatch(", strlen("onmatch(")) == 0)) { -+ attrs->action_str[attrs->n_actions] = kstrdup(str, GFP_KERNEL); -+ if (!attrs->action_str[attrs->n_actions]) { -+ ret = -ENOMEM; -+ return ret; -+ } -+ attrs->n_actions++; -+ ret = 0; -+ } -+ - return ret; - } - -@@ -1525,7 +1608,14 @@ static inline void save_comm(char *comm, - - static void hist_trigger_elt_data_free(struct tracing_map_elt *elt) - { -+ struct hist_trigger_data *hist_data = elt->map->private_data; - struct hist_elt_data *private_data = elt->private_data; -+ unsigned int i, n_str; -+ -+ n_str = hist_data->n_field_var_str; -+ -+ for (i = 0; i < n_str; i++) -+ kfree(private_data->field_var_str[i]); - - kfree(private_data->comm); - kfree(private_data); -@@ -1537,7 +1627,7 @@ static int hist_trigger_elt_data_alloc(s - unsigned int size = TASK_COMM_LEN + 1; - struct hist_elt_data *elt_data; - struct hist_field *key_field; -- unsigned int i; -+ unsigned int i, n_str; - - elt->private_data = elt_data = kzalloc(sizeof(*elt_data), GFP_KERNEL); - if (!elt_data) -@@ -1557,6 +1647,16 @@ static int hist_trigger_elt_data_alloc(s - } - } - -+ n_str = hist_data->n_field_var_str; -+ -+ for (i = 0; i < n_str; i++) { -+ elt_data->field_var_str[i] = kzalloc(size, GFP_KERNEL); -+ if (!elt_data->field_var_str[i]) { -+ hist_trigger_elt_data_free(elt); -+ return -ENOMEM; -+ } -+ } -+ - return 0; - } - -@@ -1674,6 +1774,7 @@ static void destroy_hist_field(struct hi - - kfree(hist_field->var.name); - kfree(hist_field->name); -+ kfree(hist_field->type); - - kfree(hist_field); - } -@@ -1704,6 +1805,10 @@ static struct hist_field *create_hist_fi - - if (flags & HIST_FIELD_FL_HITCOUNT) { - hist_field->fn = hist_field_counter; -+ hist_field->size = sizeof(u64); -+ hist_field->type = kstrdup("u64", GFP_KERNEL); -+ if (!hist_field->type) -+ goto free; - goto out; - } - -@@ -1717,12 +1822,18 @@ static struct hist_field *create_hist_fi - hist_field->fn = hist_field_log2; - hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL); - hist_field->size = hist_field->operands[0]->size; -+ hist_field->type = kstrdup(hist_field->operands[0]->type, GFP_KERNEL); -+ if (!hist_field->type) -+ goto free; - goto out; - } - - if (flags & HIST_FIELD_FL_TIMESTAMP) { - hist_field->fn = hist_field_timestamp; - hist_field->size = sizeof(u64); -+ hist_field->type = kstrdup("u64", GFP_KERNEL); -+ if (!hist_field->type) -+ goto free; - goto out; - } - -@@ -1731,6 +1842,10 @@ static struct hist_field *create_hist_fi - - if (is_string_field(field)) { - flags |= HIST_FIELD_FL_STRING; -+ hist_field->size = MAX_FILTER_STR_VAL; -+ hist_field->type = kstrdup(field->type, GFP_KERNEL); -+ if (!hist_field->type) -+ goto free; - - if (field->filter_type == FILTER_STATIC_STRING) - hist_field->fn = hist_field_string; -@@ -1739,6 +1854,12 @@ static struct hist_field *create_hist_fi - else - hist_field->fn = hist_field_pstring; - } else { -+ hist_field->size = field->size; -+ hist_field->is_signed = field->is_signed; -+ hist_field->type = kstrdup(field->type, GFP_KERNEL); -+ if (!hist_field->type) -+ goto free; -+ - hist_field->fn = select_value_fn(field->size, - field->is_signed); - if (!hist_field->fn) { -@@ -1786,7 +1907,10 @@ static struct hist_field *create_var_ref - ref_field->size = var_field->size; - ref_field->is_signed = var_field->is_signed; - ref_field->name = kstrdup(var_field->var.name, GFP_KERNEL); -- if (!ref_field->name) { -+ ref_field->type = kstrdup(var_field->type, GFP_KERNEL); -+ if (!ref_field->name || !ref_field->type) { -+ kfree(ref_field->name); -+ kfree(ref_field->type); - destroy_hist_field(ref_field, 0); - return NULL; - } -@@ -1970,6 +2094,11 @@ static struct hist_field *parse_unary(st - expr->operands[0] = operand1; - expr->operator = FIELD_OP_UNARY_MINUS; - expr->name = expr_str(expr, 0); -+ expr->type = kstrdup(operand1->type, GFP_KERNEL); -+ if (!expr->type) { -+ ret = -ENOMEM; -+ goto free; -+ } - - return expr; - free: -@@ -2053,6 +2182,11 @@ static struct hist_field *parse_expr(str - expr->operands[1] = operand2; - expr->operator = field_op; - expr->name = expr_str(expr, 0); -+ expr->type = kstrdup(operand1->type, GFP_KERNEL); -+ if (!expr->type) { -+ ret = -ENOMEM; -+ goto free; -+ } - - switch (field_op) { - case FIELD_OP_MINUS: -@@ -2074,6 +2208,718 @@ static struct hist_field *parse_expr(str - return ERR_PTR(ret); - } - -+static struct hist_var_data *find_actions(struct hist_trigger_data *hist_data) -+{ -+ struct hist_var_data *var_data, *found = NULL; -+ -+ list_for_each_entry(var_data, &hist_action_list, list) { -+ if (var_data->hist_data == hist_data) { -+ found = var_data; -+ break; -+ } -+ } -+ -+ return found; -+} -+ -+static int save_hist_actions(struct hist_trigger_data *hist_data) -+{ -+ struct hist_var_data *var_data; -+ -+ var_data = find_actions(hist_data); -+ if (var_data) -+ return 0; -+ -+ var_data = kzalloc(sizeof(*var_data), GFP_KERNEL); -+ if (!var_data) -+ return -ENOMEM; -+ -+ var_data->hist_data = hist_data; -+ list_add(&var_data->list, &hist_action_list); -+ -+ return 0; -+} -+ -+static void remove_hist_actions(struct hist_trigger_data *hist_data) -+{ -+ struct hist_var_data *var_data; -+ -+ var_data = find_actions(hist_data); -+ if (!var_data) -+ return; -+ -+ list_del(&var_data->list); -+ -+ kfree(var_data); -+} -+ -+static char *find_trigger_filter(struct hist_trigger_data *hist_data, -+ struct trace_event_file *file) -+{ -+ struct event_trigger_data *test; -+ -+ list_for_each_entry_rcu(test, &file->triggers, list) { -+ if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { -+ if (test->private_data == hist_data) -+ return test->filter_str; -+ } -+ } -+ -+ return NULL; -+} -+ -+static struct event_command trigger_hist_cmd; -+static int event_hist_trigger_func(struct event_command *cmd_ops, -+ struct trace_event_file *file, -+ char *glob, char *cmd, char *param); -+ -+static bool compatible_keys(struct hist_trigger_data *target_hist_data, -+ struct hist_trigger_data *hist_data, -+ unsigned int n_keys) -+{ -+ struct hist_field *target_hist_field, *hist_field; -+ unsigned int n, i, j; -+ -+ if (hist_data->n_fields - hist_data->n_vals != n_keys) -+ return false; -+ -+ i = hist_data->n_vals; -+ j = target_hist_data->n_vals; -+ -+ for (n = 0; n < n_keys; n++) { -+ hist_field = hist_data->fields[i + n]; -+ target_hist_field = hist_data->fields[j + n]; -+ -+ if (strcmp(hist_field->type, target_hist_field->type) != 0) -+ return false; -+ if (hist_field->size != target_hist_field->size) -+ return false; -+ if (hist_field->is_signed != target_hist_field->is_signed) -+ return false; -+ } -+ -+ return true; -+} -+ -+static struct hist_trigger_data * -+find_compatible_hist(struct hist_trigger_data *target_hist_data, -+ struct trace_event_file *file) -+{ -+ struct hist_trigger_data *hist_data; -+ struct event_trigger_data *test; -+ unsigned int n_keys; -+ -+ n_keys = target_hist_data->n_fields - target_hist_data->n_vals; -+ -+ list_for_each_entry_rcu(test, &file->triggers, list) { -+ if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { -+ hist_data = test->private_data; -+ -+ if (compatible_keys(target_hist_data, hist_data, n_keys)) -+ return hist_data; -+ } -+ } -+ -+ return NULL; -+} -+ -+static struct trace_event_file *event_file(char *system, char *event_name) -+{ -+ struct trace_event_file *file; -+ struct trace_array *tr; -+ -+ tr = top_trace_array(); -+ if (!tr) -+ return ERR_PTR(-ENODEV); -+ -+ file = find_event_file(tr, system, event_name); -+ if (!file) -+ return ERR_PTR(-EINVAL); -+ -+ return file; -+} -+ -+static struct hist_field * -+create_field_var_hist(struct hist_trigger_data *target_hist_data, -+ char *system, char *event_name, char *field_name) -+{ -+ struct hist_field *event_var = ERR_PTR(-EINVAL); -+ struct hist_trigger_data *hist_data; -+ unsigned int i, n, first = true; -+ struct field_var_hist *var_hist; -+ struct trace_event_file *file; -+ struct hist_field *key_field; -+ struct trace_array *tr; -+ char *saved_filter; -+ char *cmd; -+ int ret; -+ -+ if (target_hist_data->n_field_var_hists >= SYNTH_FIELDS_MAX) -+ return ERR_PTR(-EINVAL); -+ -+ tr = top_trace_array(); -+ if (!tr) -+ return ERR_PTR(-ENODEV); -+ -+ file = event_file(system, event_name); -+ if (IS_ERR(file)) { -+ ret = PTR_ERR(file); -+ return ERR_PTR(ret); -+ } -+ -+ hist_data = find_compatible_hist(target_hist_data, file); -+ if (!hist_data) -+ return ERR_PTR(-EINVAL); -+ -+ var_hist = kzalloc(sizeof(*var_hist), GFP_KERNEL); -+ if (!var_hist) -+ return ERR_PTR(-ENOMEM); -+ -+ cmd = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL); -+ if (!cmd) { -+ kfree(var_hist); -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ strcat(cmd, "keys="); -+ -+ for_each_hist_key_field(i, hist_data) { -+ key_field = hist_data->fields[i]; -+ if (!first) -+ strcat(cmd, ","); -+ strcat(cmd, key_field->field->name); -+ first = false; -+ } -+ -+ strcat(cmd, ":synthetic_"); -+ strcat(cmd, field_name); -+ strcat(cmd, "="); -+ strcat(cmd, field_name); -+ -+ saved_filter = find_trigger_filter(hist_data, file); -+ if (saved_filter) { -+ strcat(cmd, " if "); -+ strcat(cmd, saved_filter); -+ } -+ -+ var_hist->cmd = kstrdup(cmd, GFP_KERNEL); -+ if (!var_hist->cmd) { -+ kfree(cmd); -+ kfree(var_hist); -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ var_hist->hist_data = hist_data; -+ -+ ret = event_hist_trigger_func(&trigger_hist_cmd, file, -+ "", "hist", cmd); -+ if (ret) { -+ kfree(cmd); -+ kfree(var_hist->cmd); -+ kfree(var_hist); -+ return ERR_PTR(ret); -+ } -+ -+ strcpy(cmd, "synthetic_"); -+ strcat(cmd, field_name); -+ -+ event_var = find_event_var(system, event_name, cmd); -+ if (!event_var) { -+ kfree(cmd); -+ kfree(var_hist->cmd); -+ kfree(var_hist); -+ return ERR_PTR(-EINVAL); -+ } -+ -+ n = target_hist_data->n_field_var_hists; -+ target_hist_data->field_var_hists[n] = var_hist; -+ target_hist_data->n_field_var_hists++; -+ -+ return event_var; -+} -+ -+static struct hist_field * -+find_target_event_var(struct hist_trigger_data *hist_data, -+ char *system, char *event_name, char *var_name) -+{ -+ struct trace_event_file *file = hist_data->event_file; -+ struct hist_field *hist_field = NULL; -+ -+ if (system) { -+ struct trace_event_call *call; -+ -+ if (!event_name) -+ return NULL; -+ -+ call = file->event_call; -+ -+ if (strcmp(system, call->class->system) != 0) -+ return NULL; -+ -+ if (strcmp(event_name, trace_event_name(call)) != 0) -+ return NULL; -+ } -+ -+ hist_field = find_var_field(hist_data, var_name); -+ -+ return hist_field; -+} -+ -+static inline void __update_field_vars(struct tracing_map_elt *elt, -+ struct ring_buffer_event *rbe, -+ void *rec, -+ struct field_var **field_vars, -+ unsigned int n_field_vars, -+ unsigned int field_var_str_start) -+{ -+ struct hist_elt_data *elt_data = elt->private_data; -+ unsigned int i, j, var_idx; -+ u64 var_val; -+ -+ for (i = 0, j = field_var_str_start; i < n_field_vars; i++) { -+ struct field_var *field_var = field_vars[i]; -+ struct hist_field *var = field_var->var; -+ struct hist_field *val = field_var->val; -+ -+ var_val = val->fn(val, elt, rbe, rec); -+ var_idx = var->var.idx; -+ -+ if (val->flags & HIST_FIELD_FL_STRING) { -+ char *str = elt_data->field_var_str[j++]; -+ -+ memcpy(str, (char *)(uintptr_t)var_val, -+ TASK_COMM_LEN + 1); -+ var_val = (u64)(uintptr_t)str; -+ } -+ tracing_map_set_var(elt, var_idx, var_val); -+ } -+} -+ -+static void update_field_vars(struct hist_trigger_data *hist_data, -+ struct tracing_map_elt *elt, -+ struct ring_buffer_event *rbe, -+ void *rec) -+{ -+ __update_field_vars(elt, rbe, rec, hist_data->field_vars, -+ hist_data->n_field_vars, 0); -+} -+ -+static struct hist_field *create_var(struct hist_trigger_data *hist_data, -+ struct trace_event_file *file, -+ char *name, int size, const char *type) -+{ -+ struct hist_field *var; -+ int idx; -+ -+ if (find_var(file, name) && !hist_data->remove) { -+ var = ERR_PTR(-EINVAL); -+ goto out; -+ } -+ -+ var = kzalloc(sizeof(struct hist_field), GFP_KERNEL); -+ if (!var) { -+ var = ERR_PTR(-ENOMEM); -+ goto out; -+ } -+ -+ idx = tracing_map_add_var(hist_data->map); -+ if (idx < 0) { -+ kfree(var); -+ var = ERR_PTR(-EINVAL); -+ goto out; -+ } -+ -+ var->flags = HIST_FIELD_FL_VAR; -+ var->var.idx = idx; -+ var->var.hist_data = var->hist_data = hist_data; -+ var->size = size; -+ var->var.name = kstrdup(name, GFP_KERNEL); -+ var->type = kstrdup(type, GFP_KERNEL); -+ if (!var->var.name || !var->type) { -+ kfree(var->var.name); -+ kfree(var->type); -+ kfree(var); -+ var = ERR_PTR(-ENOMEM); -+ } -+ out: -+ return var; -+} -+ -+static struct field_var *create_field_var(struct hist_trigger_data *hist_data, -+ struct trace_event_file *file, -+ char *field_name) -+{ -+ struct hist_field *val = NULL, *var = NULL; -+ unsigned long flags = HIST_FIELD_FL_VAR; -+ struct field_var *field_var; -+ int ret = 0; -+ -+ if (hist_data->n_field_vars >= SYNTH_FIELDS_MAX) { -+ ret = -EINVAL; -+ goto err; -+ } -+ -+ val = parse_atom(hist_data, file, field_name, &flags, NULL); -+ if (IS_ERR(val)) { -+ ret = PTR_ERR(val); -+ goto err; -+ } -+ -+ var = create_var(hist_data, file, field_name, val->size, val->type); -+ if (IS_ERR(var)) { -+ kfree(val); -+ ret = PTR_ERR(var); -+ goto err; -+ } -+ -+ field_var = kzalloc(sizeof(struct field_var), GFP_KERNEL); -+ if (!field_var) { -+ kfree(val); -+ kfree(var); -+ ret = -ENOMEM; -+ goto err; -+ } -+ -+ field_var->var = var; -+ field_var->val = val; -+ out: -+ return field_var; -+ err: -+ field_var = ERR_PTR(ret); -+ goto out; -+} -+ -+static struct field_var * -+create_target_field_var(struct hist_trigger_data *hist_data, -+ char *system, char *event_name, char *var_name) -+{ -+ struct trace_event_file *file = hist_data->event_file; -+ -+ if (system) { -+ struct trace_event_call *call; -+ -+ if (!event_name) -+ return NULL; -+ -+ call = file->event_call; -+ -+ if (strcmp(system, call->class->system) != 0) -+ return NULL; -+ -+ if (strcmp(event_name, trace_event_name(call)) != 0) -+ return NULL; -+ } -+ -+ return create_field_var(hist_data, file, var_name); -+} -+ -+static void onmatch_destroy(struct action_data *data) -+{ -+ unsigned int i; -+ -+ kfree(data->match_event); -+ kfree(data->match_event_system); -+ kfree(data->synth_event_name); -+ -+ for (i = 0; i < data->n_params; i++) -+ kfree(data->params[i]); -+ -+ kfree(data); -+} -+ -+static void destroy_field_var(struct field_var *field_var) -+{ -+ if (!field_var) -+ return; -+ -+ destroy_hist_field(field_var->var, 0); -+ destroy_hist_field(field_var->val, 0); -+ -+ kfree(field_var); -+} -+ -+static void destroy_field_vars(struct hist_trigger_data *hist_data) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < hist_data->n_field_vars; i++) -+ destroy_field_var(hist_data->field_vars[i]); -+} -+ -+static void save_field_var(struct hist_trigger_data *hist_data, -+ struct field_var *field_var) -+{ -+ hist_data->field_vars[hist_data->n_field_vars++] = field_var; -+ -+ if (field_var->val->flags & HIST_FIELD_FL_STRING) -+ hist_data->n_field_var_str++; -+} -+ -+static void destroy_synth_var_refs(struct hist_trigger_data *hist_data) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < hist_data->n_synth_var_refs; i++) -+ destroy_hist_field(hist_data->synth_var_refs[i], 0); -+} -+ -+static void save_synth_var_ref(struct hist_trigger_data *hist_data, -+ struct hist_field *var_ref) -+{ -+ hist_data->synth_var_refs[hist_data->n_synth_var_refs++] = var_ref; -+ -+ hist_data->var_refs[hist_data->n_var_refs] = var_ref; -+ var_ref->var_ref_idx = hist_data->n_var_refs++; -+} -+ -+static int check_synth_field(struct synth_event *event, -+ struct hist_field *hist_field, -+ unsigned int field_pos) -+{ -+ struct synth_field *field; -+ -+ if (field_pos >= event->n_fields) -+ return -EINVAL; -+ -+ field = event->fields[field_pos]; -+ -+ if (strcmp(field->type, hist_field->type) != 0) -+ return -EINVAL; -+ -+ return 0; -+} -+ -+static int parse_action_params(char *params, struct action_data *data) -+{ -+ char *param, *saved_param; -+ int ret = 0; -+ -+ while (params) { -+ if (data->n_params >= SYNTH_FIELDS_MAX) -+ goto out; -+ -+ param = strsep(¶ms, ","); -+ if (!param) -+ goto out; -+ -+ param = strstrip(param); -+ if (strlen(param) < 2) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ saved_param = kstrdup(param, GFP_KERNEL); -+ if (!saved_param) { -+ ret = -ENOMEM; -+ goto out; -+ } -+ -+ data->params[data->n_params++] = saved_param; -+ } -+ out: -+ return ret; -+} -+ -+static struct hist_field * -+onmatch_find_var(struct hist_trigger_data *hist_data, struct action_data *data, -+ char *system, char *event, char *var) -+{ -+ struct hist_field *hist_field; -+ -+ var++; /* skip '$' */ -+ -+ hist_field = find_target_event_var(hist_data, system, event, var); -+ if (!hist_field) { -+ if (!system) { -+ system = data->match_event_system; -+ event = data->match_event; -+ } -+ -+ hist_field = find_event_var(system, event, var); -+ } -+ -+ return hist_field; -+} -+ -+static struct hist_field * -+onmatch_create_field_var(struct hist_trigger_data *hist_data, -+ struct action_data *data, char *system, -+ char *event, char *var) -+{ -+ struct hist_field *hist_field = NULL; -+ struct field_var *field_var; -+ -+ field_var = create_target_field_var(hist_data, system, event, var); -+ if (IS_ERR(field_var)) -+ goto out; -+ -+ if (field_var) { -+ save_field_var(hist_data, field_var); -+ hist_field = field_var->var; -+ } else { -+ if (!system) { -+ system = data->match_event_system; -+ event = data->match_event; -+ } -+ -+ hist_field = create_field_var_hist(hist_data, system, event, var); -+ if (IS_ERR(hist_field)) -+ goto free; -+ } -+ out: -+ return hist_field; -+ free: -+ destroy_field_var(field_var); -+ hist_field = NULL; -+ goto out; -+} -+ -+static int onmatch_create(struct hist_trigger_data *hist_data, -+ struct trace_event_file *file, -+ struct action_data *data) -+{ -+ char *event_name, *param, *system = NULL; -+ struct hist_field *hist_field, *var_ref; -+ unsigned int i, var_ref_idx; -+ unsigned int field_pos = 0; -+ struct synth_event *event; -+ int ret = 0; -+ -+ mutex_lock(&synth_event_mutex); -+ -+ event = find_synth_event(data->synth_event_name); -+ if (!event) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ var_ref_idx = hist_data->n_var_refs; -+ -+ for (i = 0; i < data->n_params; i++) { -+ char *p; -+ -+ p = param = kstrdup(data->params[i], GFP_KERNEL); -+ if (!param) -+ goto out; -+ -+ system = strsep(¶m, "."); -+ if (!param) { -+ param = (char *)system; -+ system = event_name = NULL; -+ } else { -+ event_name = strsep(¶m, "."); -+ if (!param) { -+ kfree(p); -+ ret = -EINVAL; -+ goto out; -+ } -+ } -+ -+ if (param[0] == '$') -+ hist_field = onmatch_find_var(hist_data, data, system, -+ event_name, param); -+ else -+ hist_field = onmatch_create_field_var(hist_data, data, -+ system, -+ event_name, -+ param); -+ -+ if (!hist_field) { -+ kfree(p); -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ if (check_synth_field(event, hist_field, field_pos) == 0) { -+ var_ref = create_var_ref(hist_field); -+ if (!var_ref) { -+ kfree(p); -+ ret = -ENOMEM; -+ goto out; -+ } -+ -+ save_synth_var_ref(hist_data, var_ref); -+ field_pos++; -+ kfree(p); -+ continue; -+ } -+ -+ kfree(p); -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ if (field_pos != event->n_fields) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ data->fn = action_trace; -+ data->synth_event = event; -+ data->var_ref_idx = var_ref_idx; -+ hist_data->actions[hist_data->n_actions++] = data; -+ save_hist_actions(hist_data); -+ out: -+ mutex_unlock(&synth_event_mutex); -+ -+ return ret; -+} -+ -+static struct action_data *onmatch_parse(char *str) -+{ -+ char *match_event, *match_event_system; -+ char *synth_event_name, *params; -+ struct action_data *data; -+ int ret = -EINVAL; -+ -+ data = kzalloc(sizeof(*data), GFP_KERNEL); -+ if (!data) -+ return ERR_PTR(-ENOMEM); -+ -+ match_event = strsep(&str, ")"); -+ if (!match_event || !str) -+ goto free; -+ -+ match_event_system = strsep(&match_event, "."); -+ if (!match_event) -+ goto free; -+ -+ if (IS_ERR(event_file(match_event_system, match_event))) -+ goto free; -+ -+ data->match_event = kstrdup(match_event, GFP_KERNEL); -+ data->match_event_system = kstrdup(match_event_system, GFP_KERNEL); -+ -+ strsep(&str, "."); -+ if (!str) -+ goto free; -+ -+ synth_event_name = strsep(&str, "("); -+ if (!synth_event_name || !str) -+ goto free; -+ data->synth_event_name = kstrdup(synth_event_name, GFP_KERNEL); -+ -+ params = strsep(&str, ")"); -+ if (!params || !str || (str && strlen(str))) -+ goto free; -+ -+ ret = parse_action_params(params, data); -+ if (ret) -+ goto free; -+ -+ if (!data->match_event_system || !data->match_event || -+ !data->synth_event_name) { -+ ret = -ENOMEM; -+ goto free; -+ } -+ out: -+ return data; -+ free: -+ onmatch_destroy(data); -+ data = ERR_PTR(ret); -+ goto out; -+} -+ - static int create_hitcount_val(struct hist_trigger_data *hist_data) - { - hist_data->fields[HITCOUNT_IDX] = -@@ -2465,19 +3311,37 @@ static void destroy_actions(struct hist_ - for (i = 0; i < hist_data->n_actions; i++) { - struct action_data *data = hist_data->actions[i]; - -- kfree(data); -+ if (data->fn == action_trace) -+ onmatch_destroy(data); -+ else -+ kfree(data); - } - } - - static int create_actions(struct hist_trigger_data *hist_data, - struct trace_event_file *file) - { -+ struct action_data *data; - unsigned int i; - int ret = 0; - char *str; - - for (i = 0; i < hist_data->attrs->n_actions; i++) { - str = hist_data->attrs->action_str[i]; -+ -+ if (strncmp(str, "onmatch(", strlen("onmatch(")) == 0) { -+ char *action_str = str + strlen("onmatch("); -+ -+ data = onmatch_parse(action_str); -+ if (IS_ERR(data)) -+ return PTR_ERR(data); -+ -+ ret = onmatch_create(hist_data, file, data); -+ if (ret) { -+ onmatch_destroy(data); -+ return ret; -+ } -+ } - } - - return ret; -@@ -2494,6 +3358,26 @@ static void print_actions(struct seq_fil - } - } - -+static void print_onmatch_spec(struct seq_file *m, -+ struct hist_trigger_data *hist_data, -+ struct action_data *data) -+{ -+ unsigned int i; -+ -+ seq_printf(m, ":onmatch(%s.%s).", data->match_event_system, -+ data->match_event); -+ -+ seq_printf(m, "%s(", data->synth_event->name); -+ -+ for (i = 0; i < data->n_params; i++) { -+ if (i) -+ seq_puts(m, ","); -+ seq_printf(m, "%s", data->params[i]); -+ } -+ -+ seq_puts(m, ")"); -+} -+ - static void print_actions_spec(struct seq_file *m, - struct hist_trigger_data *hist_data) - { -@@ -2501,6 +3385,19 @@ static void print_actions_spec(struct se - - for (i = 0; i < hist_data->n_actions; i++) { - struct action_data *data = hist_data->actions[i]; -+ -+ if (data->fn == action_trace) -+ print_onmatch_spec(m, hist_data, data); -+ } -+} -+ -+static void destroy_field_var_hists(struct hist_trigger_data *hist_data) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < hist_data->n_field_var_hists; i++) { -+ kfree(hist_data->field_var_hists[i]->cmd); -+ kfree(hist_data->field_var_hists[i]); - } - } - -@@ -2514,6 +3411,9 @@ static void destroy_hist_data(struct his - tracing_map_destroy(hist_data->map); - - destroy_actions(hist_data); -+ destroy_field_vars(hist_data); -+ destroy_field_var_hists(hist_data); -+ destroy_synth_var_refs(hist_data); - - kfree(hist_data); - } -@@ -2648,6 +3548,8 @@ static void hist_trigger_elt_update(stru - tracing_map_set_var(elt, var_idx, hist_val); - } - } -+ -+ update_field_vars(hist_data, elt, rbe, rec); - } - - static inline void add_to_key(char *compound_key, void *key, -@@ -2861,6 +3763,8 @@ hist_trigger_entry_print(struct seq_file - } - } - -+ print_actions(m, hist_data, elt); -+ - seq_puts(m, "\n"); - } - -@@ -3128,6 +4032,8 @@ static void event_hist_trigger_free(stru - - remove_hist_vars(hist_data); - -+ remove_hist_actions(hist_data); -+ - destroy_hist_data(hist_data); - } - } -@@ -3390,6 +4296,21 @@ static bool hist_trigger_check_refs(stru - return false; - } - -+static void unregister_field_var_hists(struct hist_trigger_data *hist_data) -+{ -+ struct trace_event_file *file; -+ unsigned int i; -+ char *cmd; -+ int ret; -+ -+ for (i = 0; i < hist_data->n_field_var_hists; i++) { -+ file = hist_data->field_var_hists[i]->hist_data->event_file; -+ cmd = hist_data->field_var_hists[i]->cmd; -+ ret = event_hist_trigger_func(&trigger_hist_cmd, file, -+ "!hist", "hist", cmd); -+ } -+} -+ - static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops, - struct event_trigger_data *data, - struct trace_event_file *file) -@@ -3405,6 +4326,7 @@ static void hist_unregister_trigger(char - if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { - if (!hist_trigger_match(data, test, named_data, false)) - continue; -+ unregister_field_var_hists(test->private_data); - unregistered = true; - list_del_rcu(&test->list); - trace_event_trigger_enable_disable(file, 0); -@@ -3448,6 +4370,7 @@ static void hist_unreg_all(struct trace_ - - list_for_each_entry_safe(test, n, &file->triggers, list) { - if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { -+ unregister_field_var_hists(test->private_data); - list_del_rcu(&test->list); - trace_event_trigger_enable_disable(file, 0); - update_cond_flag(file); -@@ -3571,6 +4494,8 @@ static int event_hist_trigger_func(struc - - remove_hist_vars(hist_data); - -+ remove_hist_actions(hist_data); -+ - kfree(trigger_data); - destroy_hist_data(hist_data); - diff --git a/debian/patches/features/all/rt/0023-tracing-Move-get_hist_field_flags.patch b/debian/patches/features/all/rt/0023-tracing-Move-get_hist_field_flags.patch new file mode 100644 index 000000000..9d9ae5d3b --- /dev/null +++ b/debian/patches/features/all/rt/0023-tracing-Move-get_hist_field_flags.patch @@ -0,0 +1,74 @@ +From: Tom Zanussi +Date: Fri, 22 Sep 2017 14:59:54 -0500 +Subject: [PATCH 23/42] tracing: Move get_hist_field_flags() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Move get_hist_field_flags() to make it more easily accessible for new +code (and keep the move separate from new functionality). + +Signed-off-by: Tom Zanussi +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 44 +++++++++++++++++++-------------------- + 1 file changed, 22 insertions(+), 22 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -457,6 +457,28 @@ static const struct tracing_map_ops hist + .elt_init = hist_trigger_elt_comm_init, + }; + ++static const char *get_hist_field_flags(struct hist_field *hist_field) ++{ ++ const char *flags_str = NULL; ++ ++ if (hist_field->flags & HIST_FIELD_FL_HEX) ++ flags_str = "hex"; ++ else if (hist_field->flags & HIST_FIELD_FL_SYM) ++ flags_str = "sym"; ++ else if (hist_field->flags & HIST_FIELD_FL_SYM_OFFSET) ++ flags_str = "sym-offset"; ++ else if (hist_field->flags & HIST_FIELD_FL_EXECNAME) ++ flags_str = "execname"; ++ else if (hist_field->flags & HIST_FIELD_FL_SYSCALL) ++ flags_str = "syscall"; ++ else if (hist_field->flags & HIST_FIELD_FL_LOG2) ++ flags_str = "log2"; ++ else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS) ++ flags_str = "usecs"; ++ ++ return flags_str; ++} ++ + static void destroy_hist_field(struct hist_field *hist_field, + unsigned int level) + { +@@ -1499,28 +1521,6 @@ const struct file_operations event_hist_ + .release = single_release, + }; + +-static const char *get_hist_field_flags(struct hist_field *hist_field) +-{ +- const char *flags_str = NULL; +- +- if (hist_field->flags & HIST_FIELD_FL_HEX) +- flags_str = "hex"; +- else if (hist_field->flags & HIST_FIELD_FL_SYM) +- flags_str = "sym"; +- else if (hist_field->flags & HIST_FIELD_FL_SYM_OFFSET) +- flags_str = "sym-offset"; +- else if (hist_field->flags & HIST_FIELD_FL_EXECNAME) +- flags_str = "execname"; +- else if (hist_field->flags & HIST_FIELD_FL_SYSCALL) +- flags_str = "syscall"; +- else if (hist_field->flags & HIST_FIELD_FL_LOG2) +- flags_str = "log2"; +- else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS) +- flags_str = "usecs"; +- +- return flags_str; +-} +- + static void hist_field_print(struct seq_file *m, struct hist_field *hist_field) + { + const char *field_name = hist_field_name(hist_field, 0); diff --git a/debian/patches/features/all/rt/0024-hrtimer-Split-__hrtimer_get_next_event.patch b/debian/patches/features/all/rt/0024-hrtimer-Split-__hrtimer_get_next_event.patch new file mode 100644 index 000000000..3eb74f769 --- /dev/null +++ b/debian/patches/features/all/rt/0024-hrtimer-Split-__hrtimer_get_next_event.patch @@ -0,0 +1,53 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:40:02 +0200 +Subject: [PATCH 24/36] hrtimer: Split __hrtimer_get_next_event() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Preparatory patch for softirq based hrtimers to avoid code duplication. No +functional change. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/time/hrtimer.c | 20 ++++++++++++++++---- + 1 file changed, 16 insertions(+), 4 deletions(-) + +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -459,13 +459,13 @@ static struct hrtimer_clock_base * + #define for_each_active_base(base, cpu_base, active) \ + while ((base = __next_base((cpu_base), &(active)))) + +-static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) ++static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, ++ unsigned int active, ++ ktime_t expires_next) + { + struct hrtimer_clock_base *base; +- unsigned int active = cpu_base->active_bases; +- ktime_t expires, expires_next = KTIME_MAX; ++ ktime_t expires; + +- cpu_base->next_timer = NULL; + for_each_active_base(base, cpu_base, active) { + struct timerqueue_node *next; + struct hrtimer *timer; +@@ -487,6 +487,18 @@ static ktime_t __hrtimer_get_next_event( + expires_next = 0; + return expires_next; + } ++ ++static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) ++{ ++ unsigned int active = cpu_base->active_bases; ++ ktime_t expires_next = KTIME_MAX; ++ ++ cpu_base->next_timer = NULL; ++ ++ expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); ++ ++ return expires_next; ++} + #endif + + static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) diff --git a/debian/patches/features/all/rt/0018-tracing-Add-simple-expression-support-to-hist-trigge.patch b/debian/patches/features/all/rt/0024-tracing-Add-simple-expression-support-to-hist-trigge.patch similarity index 77% rename from debian/patches/features/all/rt/0018-tracing-Add-simple-expression-support-to-hist-trigge.patch rename to debian/patches/features/all/rt/0024-tracing-Add-simple-expression-support-to-hist-trigge.patch index 9f7ff6dbe..1aa5aaee7 100644 --- a/debian/patches/features/all/rt/0018-tracing-Add-simple-expression-support-to-hist-trigge.patch +++ b/debian/patches/features/all/rt/0024-tracing-Add-simple-expression-support-to-hist-trigge.patch @@ -1,7 +1,7 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:19 -0500 -Subject: [PATCH 18/32] tracing: Add simple expression support to hist triggers -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Date: Fri, 22 Sep 2017 14:59:55 -0500 +Subject: [PATCH 24/42] tracing: Add simple expression support to hist triggers +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Add support for simple addition, subtraction, and unary expressions (-(expr) and expr, where expr = b-a, a+b, a+b+c) to hist triggers, in @@ -17,8 +17,8 @@ parsing. Signed-off-by: Tom Zanussi Signed-off-by: Sebastian Andrzej Siewior --- - kernel/trace/trace_events_hist.c | 457 +++++++++++++++++++++++++++++++++------ - 1 file changed, 390 insertions(+), 67 deletions(-) + kernel/trace/trace_events_hist.c | 514 +++++++++++++++++++++++++++++++++------ + 1 file changed, 440 insertions(+), 74 deletions(-) --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -88,14 +88,14 @@ Signed-off-by: Sebastian Andrzej Siewior static u64 hist_field_##type(struct hist_field *hist_field, \ void *event, \ @@ -148,6 +192,7 @@ enum hist_field_flags { - HIST_FIELD_FL_TIMESTAMP_USECS = 2048, - HIST_FIELD_FL_VAR = 4096, - HIST_FIELD_FL_VAR_ONLY = 8192, -+ HIST_FIELD_FL_EXPR = 16384, + HIST_FIELD_FL_TIMESTAMP_USECS = 1 << 11, + HIST_FIELD_FL_VAR = 1 << 12, + HIST_FIELD_FL_VAR_ONLY = 1 << 13, ++ HIST_FIELD_FL_EXPR = 1 << 14, }; - struct hist_trigger_attrs { -@@ -210,6 +255,8 @@ static const char *hist_field_name(struc + struct var_defs { +@@ -218,6 +263,8 @@ static const char *hist_field_name(struc field_name = hist_field_name(field->operands[0], ++level); else if (field->flags & HIST_FIELD_FL_TIMESTAMP) field_name = "$common_timestamp"; @@ -104,15 +104,19 @@ Signed-off-by: Sebastian Andrzej Siewior if (field_name == NULL) field_name = ""; -@@ -444,6 +491,73 @@ static const struct tracing_map_ops hist - .elt_init = hist_trigger_elt_comm_init, - }; +@@ -479,6 +526,93 @@ static const char *get_hist_field_flags( + return flags_str; + } +static char *expr_str(struct hist_field *field, unsigned int level) +{ -+ char *expr = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL); ++ char *expr; + -+ if (!expr || level > 1) ++ if (level > 1) ++ return NULL; ++ ++ expr = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL); ++ if (!expr) + return NULL; + + if (field->operator == FIELD_OP_UNARY_MINUS) { @@ -131,6 +135,14 @@ Signed-off-by: Sebastian Andrzej Siewior + } + + strcat(expr, hist_field_name(field->operands[0], 0)); ++ if (field->operands[0]->flags) { ++ const char *flags_str = get_hist_field_flags(field->operands[0]); ++ ++ if (flags_str) { ++ strcat(expr, "."); ++ strcat(expr, flags_str); ++ } ++ } + + switch (field->operator) { + case FIELD_OP_MINUS: @@ -145,6 +157,14 @@ Signed-off-by: Sebastian Andrzej Siewior + } + + strcat(expr, hist_field_name(field->operands[1], 0)); ++ if (field->operands[1]->flags) { ++ const char *flags_str = get_hist_field_flags(field->operands[1]); ++ ++ if (flags_str) { ++ strcat(expr, "."); ++ strcat(expr, flags_str); ++ } ++ } + + return expr; +} @@ -178,15 +198,15 @@ Signed-off-by: Sebastian Andrzej Siewior static void destroy_hist_field(struct hist_field *hist_field, unsigned int level) { -@@ -459,6 +573,7 @@ static void destroy_hist_field(struct hi - destroy_hist_field(hist_field->operands[i], ++level); +@@ -494,6 +628,7 @@ static void destroy_hist_field(struct hi + destroy_hist_field(hist_field->operands[i], level + 1); kfree(hist_field->var.name); + kfree(hist_field->name); kfree(hist_field); } -@@ -479,6 +594,9 @@ static struct hist_field *create_hist_fi +@@ -514,6 +649,9 @@ static struct hist_field *create_hist_fi hist_field->hist_data = hist_data; @@ -196,36 +216,70 @@ Signed-off-by: Sebastian Andrzej Siewior if (flags & HIST_FIELD_FL_HITCOUNT) { hist_field->fn = hist_field_counter; goto out; -@@ -551,6 +669,247 @@ static void destroy_hist_fields(struct h +@@ -586,6 +724,289 @@ static void destroy_hist_fields(struct h } } ++static char *field_name_from_var(struct hist_trigger_data *hist_data, ++ char *var_name) ++{ ++ char *name, *field; ++ unsigned int i; ++ ++ for (i = 0; i < hist_data->attrs->var_defs.n_vars; i++) { ++ name = hist_data->attrs->var_defs.name[i]; ++ ++ if (strcmp(var_name, name) == 0) { ++ field = hist_data->attrs->var_defs.expr[i]; ++ if (contains_operator(field)) ++ continue; ++ return field; ++ } ++ } ++ ++ return NULL; ++} ++ ++static char *local_field_var_ref(struct hist_trigger_data *hist_data, ++ char *var_name) ++{ ++ var_name++; ++ ++ return field_name_from_var(hist_data, var_name); ++} ++ +static struct ftrace_event_field * +parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, + char *field_str, unsigned long *flags) +{ + struct ftrace_event_field *field = NULL; -+ char *field_name; ++ char *field_name, *modifier, *str; + -+ field_name = strsep(&field_str, "."); -+ if (field_str) { -+ if (strcmp(field_str, "hex") == 0) ++ modifier = str = kstrdup(field_str, GFP_KERNEL); ++ if (!modifier) ++ return ERR_PTR(-ENOMEM); ++ ++ field_name = strsep(&modifier, "."); ++ if (modifier) { ++ if (strcmp(modifier, "hex") == 0) + *flags |= HIST_FIELD_FL_HEX; -+ else if (strcmp(field_str, "sym") == 0) ++ else if (strcmp(modifier, "sym") == 0) + *flags |= HIST_FIELD_FL_SYM; -+ else if (strcmp(field_str, "sym-offset") == 0) ++ else if (strcmp(modifier, "sym-offset") == 0) + *flags |= HIST_FIELD_FL_SYM_OFFSET; -+ else if ((strcmp(field_str, "execname") == 0) && ++ else if ((strcmp(modifier, "execname") == 0) && + (strcmp(field_name, "common_pid") == 0)) + *flags |= HIST_FIELD_FL_EXECNAME; -+ else if (strcmp(field_str, "syscall") == 0) ++ else if (strcmp(modifier, "syscall") == 0) + *flags |= HIST_FIELD_FL_SYSCALL; -+ else if (strcmp(field_str, "log2") == 0) ++ else if (strcmp(modifier, "log2") == 0) + *flags |= HIST_FIELD_FL_LOG2; -+ else if (strcmp(field_str, "usecs") == 0) ++ else if (strcmp(modifier, "usecs") == 0) + *flags |= HIST_FIELD_FL_TIMESTAMP_USECS; -+ else -+ return ERR_PTR(-EINVAL); ++ else { ++ field = ERR_PTR(-EINVAL); ++ goto out; ++ } + } + + if (strcmp(field_name, "$common_timestamp") == 0) { @@ -235,9 +289,13 @@ Signed-off-by: Sebastian Andrzej Siewior + hist_data->attrs->ts_in_usecs = true; + } else { + field = trace_find_event_field(file->event_call, field_name); -+ if (!field) -+ return ERR_PTR(-EINVAL); ++ if (!field || !field->size) { ++ field = ERR_PTR(-EINVAL); ++ goto out; ++ } + } ++ out: ++ kfree(str); + + return field; +} @@ -246,10 +304,15 @@ Signed-off-by: Sebastian Andrzej Siewior + struct trace_event_file *file, char *str, + unsigned long *flags, char *var_name) +{ ++ char *s; + struct ftrace_event_field *field = NULL; + struct hist_field *hist_field = NULL; + int ret = 0; + ++ s = local_field_var_ref(hist_data, str); ++ if (s) ++ str = s; ++ + field = parse_field(hist_data, file, str, flags); + if (IS_ERR(field)) { + ret = PTR_ERR(field); @@ -279,7 +342,6 @@ Signed-off-by: Sebastian Andrzej Siewior +{ + struct hist_field *operand1, *expr = NULL; + unsigned long operand_flags; -+ char *operand1_str; + int ret = 0; + char *s; + @@ -308,9 +370,11 @@ Signed-off-by: Sebastian Andrzej Siewior + goto free; + } + -+ operand1_str = strsep(&str, "("); -+ if (!operand1_str) ++ strsep(&str, "("); ++ if (!str) { ++ ret = -EINVAL; + goto free; ++ } + + flags |= HIST_FIELD_FL_EXPR; + expr = create_hist_field(hist_data, NULL, flags, var_name); @@ -326,16 +390,6 @@ Signed-off-by: Sebastian Andrzej Siewior + goto free; + } + -+ if (operand1 == NULL) { -+ operand_flags = 0; -+ operand1 = parse_atom(hist_data, file, operand1_str, -+ &operand_flags, NULL); -+ if (IS_ERR(operand1)) { -+ ret = PTR_ERR(operand1); -+ goto free; -+ } -+ } -+ + expr->fn = hist_field_unary_minus; + expr->operands[0] = operand1; + expr->operator = FIELD_OP_UNARY_MINUS; @@ -346,6 +400,19 @@ Signed-off-by: Sebastian Andrzej Siewior + return ERR_PTR(ret); +} + ++static int check_expr_operands(struct hist_field *operand1, ++ struct hist_field *operand2) ++{ ++ unsigned long operand1_flags = operand1->flags; ++ unsigned long operand2_flags = operand2->flags; ++ ++ if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) != ++ (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS)) ++ return -EINVAL; ++ ++ return 0; ++} ++ +static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, + struct trace_event_file *file, + char *str, unsigned long flags, @@ -357,11 +424,12 @@ Signed-off-by: Sebastian Andrzej Siewior + char *sep, *operand1_str; + + if (level > 2) -+ return NULL; ++ return ERR_PTR(-EINVAL); + + field_op = contains_operator(str); ++ + if (field_op == FIELD_OP_NONE) -+ return NULL; ++ return parse_atom(hist_data, file, str, &flags, var_name); + + if (field_op == FIELD_OP_UNARY_MINUS) + return parse_unary(hist_data, file, str, flags, var_name, ++level); @@ -398,16 +466,10 @@ Signed-off-by: Sebastian Andrzej Siewior + operand2 = NULL; + goto free; + } -+ if (!operand2) { -+ operand_flags = 0; -+ operand2 = parse_atom(hist_data, file, str, -+ &operand_flags, NULL); -+ if (IS_ERR(operand2)) { -+ ret = PTR_ERR(operand2); -+ operand2 = NULL; -+ goto free; -+ } -+ } ++ ++ ret = check_expr_operands(operand1, operand2); ++ if (ret) ++ goto free; + + flags |= HIST_FIELD_FL_EXPR; + expr = create_hist_field(hist_data, NULL, flags, var_name); @@ -444,22 +506,15 @@ Signed-off-by: Sebastian Andrzej Siewior static int create_hitcount_val(struct hist_trigger_data *hist_data) { hist_data->fields[HITCOUNT_IDX] = -@@ -609,9 +968,9 @@ static int create_val_field(struct hist_ - struct trace_event_file *file, - char *field_str, bool var_only) +@@ -645,41 +1066,21 @@ static int __create_val_field(struct his + char *var_name, char *field_str, + unsigned long flags) { - struct ftrace_event_field *field = NULL; -- char *field_name, *var_name; +- char *field_name; + struct hist_field *hist_field; - unsigned long flags = 0; -+ char *var_name; int ret = 0; - if (WARN_ON(!var_only && val_idx >= TRACING_MAP_VALS_MAX)) -@@ -642,37 +1001,27 @@ static int create_val_field(struct hist_ - goto out; - } - - field_name = strsep(&field_str, "."); - if (field_str) { - if (strcmp(field_str, "hex") == 0) @@ -468,35 +523,30 @@ Signed-off-by: Sebastian Andrzej Siewior - ret = -EINVAL; - goto out; - } -+ hist_field = parse_expr(hist_data, file, field_str, flags, var_name, 0); -+ if (IS_ERR(hist_field)) { -+ ret = PTR_ERR(hist_field); -+ goto out; - } - +- } +- - if (strcmp(field_name, "$common_timestamp") == 0) { - flags |= HIST_FIELD_FL_TIMESTAMP; - hist_data->enable_timestamps = true; - } else { - field = trace_find_event_field(file->event_call, field_name); -- if (!field) { +- if (!field || !field->size) { - ret = -EINVAL; -+ if (!hist_field) { -+ hist_field = parse_atom(hist_data, file, field_str, -+ &flags, var_name); -+ if (IS_ERR(hist_field)) { -+ ret = PTR_ERR(hist_field); - goto out; - } - } - +- goto out; +- } +- } +- - hist_data->fields[val_idx] = create_hist_field(hist_data, field, flags, var_name); - if (!hist_data->fields[val_idx]) { - ret = -ENOMEM; -- goto out; -- } -+ hist_data->fields[val_idx] = hist_field; ++ hist_field = parse_expr(hist_data, file, field_str, flags, var_name, 0); ++ if (IS_ERR(hist_field)) { ++ ret = PTR_ERR(hist_field); + goto out; + } ++ hist_data->fields[val_idx] = hist_field; ++ ++hist_data->n_vals; ++hist_data->n_fields; @@ -505,7 +555,7 @@ Signed-off-by: Sebastian Andrzej Siewior hist_data->n_var_only++; if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX)) -@@ -726,8 +1075,8 @@ static int create_key_field(struct hist_ +@@ -769,8 +1170,8 @@ static int create_key_field(struct hist_ struct trace_event_file *file, char *field_str) { @@ -514,10 +564,10 @@ Signed-off-by: Sebastian Andrzej Siewior + unsigned long flags = 0; unsigned int key_size; - char *var_name; -@@ -754,60 +1103,33 @@ static int create_key_field(struct hist_ + int ret = 0; +@@ -785,60 +1186,24 @@ static int create_key_field(struct hist_ key_size = sizeof(unsigned long) * HIST_STACKTRACE_DEPTH; - hist_field = create_hist_field(hist_data, NULL, flags, var_name); + hist_field = create_hist_field(hist_data, NULL, flags, NULL); } else { - char *field_name = strsep(&field_str, "."); - @@ -542,7 +592,7 @@ Signed-off-by: Sebastian Andrzej Siewior - goto out; - } + hist_field = parse_expr(hist_data, file, field_str, flags, -+ var_name, 0); ++ NULL, 0); + if (IS_ERR(hist_field)) { + ret = PTR_ERR(hist_field); + goto out; @@ -556,32 +606,26 @@ Signed-off-by: Sebastian Andrzej Siewior - key_size = sizeof(u64); - } else { - field = trace_find_event_field(file->event_call, field_name); -- if (!field) { +- if (!field || !field->size) { - ret = -EINVAL; -+ if (!hist_field) { -+ hist_field = parse_atom(hist_data, file, field_str, -+ &flags, var_name); -+ if (IS_ERR(hist_field)) { -+ ret = PTR_ERR(hist_field); - goto out; - } +- goto out; +- } - - if (is_string_field(field)) - key_size = MAX_FILTER_STR_VAL; - else - key_size = field->size; - } -- } - -- hist_data->fields[key_idx] = create_hist_field(hist_data, field, flags, var_name); -- if (!hist_data->fields[key_idx]) { -- ret = -ENOMEM; -- goto out; +- } + key_size = hist_field->size; } +- hist_data->fields[key_idx] = create_hist_field(hist_data, field, flags, NULL); +- if (!hist_data->fields[key_idx]) { +- ret = -ENOMEM; +- goto out; +- } + hist_data->fields[key_idx] = hist_field; -+ + key_size = ALIGN(key_size, sizeof(u64)); hist_data->fields[key_idx]->size = key_size; hist_data->fields[key_idx]->offset = key_offset; @@ -591,7 +635,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (hist_data->key_size > HIST_KEY_SIZE_MAX) { ret = -EINVAL; goto out; -@@ -1330,7 +1652,8 @@ hist_trigger_entry_print(struct seq_file +@@ -1421,7 +1786,8 @@ hist_trigger_entry_print(struct seq_file for (i = 1; i < hist_data->n_vals; i++) { field_name = hist_field_name(hist_data->fields[i], 0); diff --git a/debian/patches/features/all/rt/0025-hrtimer-Use-irqsave-irqrestore-around-__run_hrtimer.patch b/debian/patches/features/all/rt/0025-hrtimer-Use-irqsave-irqrestore-around-__run_hrtimer.patch new file mode 100644 index 000000000..487bbff26 --- /dev/null +++ b/debian/patches/features/all/rt/0025-hrtimer-Use-irqsave-irqrestore-around-__run_hrtimer.patch @@ -0,0 +1,145 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:40:03 +0200 +Subject: [PATCH 25/36] hrtimer: Use irqsave/irqrestore around __run_hrtimer() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +__run_hrtimer() is called with the hrtimer_cpu_base.lock held and +interrupts disabled. Before invoking the timer callback the base lock is +dropped, but interrupts stay disabled. + +The upcoming support for softirq based hrtimers requires that interrupts +are enabled before the timer callback is invoked. + +To avoid code duplication, take hrtimer_cpu_base.lock with +raw_spin_lock_irqsave(flags) at the call site and hand in the flags as +argument. So raw_spin_unlock_irqrestore() before the callback invocation +will either keep interrupts disabled in interrupt context or restore to +interrupt enabled state when called from softirq context. + +Suggested-by: Peter Zijlstra +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/time/hrtimer.c | 31 ++++++++++++++++++------------- + 1 file changed, 18 insertions(+), 13 deletions(-) + +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -1163,7 +1163,8 @@ EXPORT_SYMBOL_GPL(hrtimer_active); + + static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, + struct hrtimer_clock_base *base, +- struct hrtimer *timer, ktime_t *now) ++ struct hrtimer *timer, ktime_t *now, ++ unsigned long flags) + { + enum hrtimer_restart (*fn)(struct hrtimer *); + int restart; +@@ -1198,11 +1199,11 @@ static void __run_hrtimer(struct hrtimer + * protected against migration to a different CPU even if the lock + * is dropped. + */ +- raw_spin_unlock(&cpu_base->lock); ++ raw_spin_unlock_irqrestore(&cpu_base->lock, flags); + trace_hrtimer_expire_entry(timer, now); + restart = fn(timer); + trace_hrtimer_expire_exit(timer); +- raw_spin_lock(&cpu_base->lock); ++ raw_spin_lock_irq(&cpu_base->lock); + + /* + * Note: We clear the running state after enqueue_hrtimer and +@@ -1230,7 +1231,8 @@ static void __run_hrtimer(struct hrtimer + base->running = NULL; + } + +-static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) ++static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, ++ unsigned long flags) + { + struct hrtimer_clock_base *base; + unsigned int active = cpu_base->active_bases; +@@ -1261,7 +1263,7 @@ static void __hrtimer_run_queues(struct + if (basenow < hrtimer_get_softexpires_tv64(timer)) + break; + +- __run_hrtimer(cpu_base, base, timer, &basenow); ++ __run_hrtimer(cpu_base, base, timer, &basenow, flags); + } + } + } +@@ -1276,13 +1278,14 @@ void hrtimer_interrupt(struct clock_even + { + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); + ktime_t expires_next, now, entry_time, delta; ++ unsigned long flags; + int retries = 0; + + BUG_ON(!cpu_base->hres_active); + cpu_base->nr_events++; + dev->next_event = KTIME_MAX; + +- raw_spin_lock(&cpu_base->lock); ++ raw_spin_lock_irqsave(&cpu_base->lock, flags); + entry_time = now = hrtimer_update_base(cpu_base); + retry: + cpu_base->in_hrtirq = 1; +@@ -1295,7 +1298,7 @@ void hrtimer_interrupt(struct clock_even + */ + cpu_base->expires_next = KTIME_MAX; + +- __hrtimer_run_queues(cpu_base, now); ++ __hrtimer_run_queues(cpu_base, now, flags); + + /* Reevaluate the clock bases for the next expiry */ + expires_next = __hrtimer_get_next_event(cpu_base); +@@ -1305,7 +1308,7 @@ void hrtimer_interrupt(struct clock_even + */ + cpu_base->expires_next = expires_next; + cpu_base->in_hrtirq = 0; +- raw_spin_unlock(&cpu_base->lock); ++ raw_spin_unlock_irqrestore(&cpu_base->lock, flags); + + /* Reprogramming necessary ? */ + if (!tick_program_event(expires_next, 0)) { +@@ -1326,7 +1329,7 @@ void hrtimer_interrupt(struct clock_even + * Acquire base lock for updating the offsets and retrieving + * the current time. + */ +- raw_spin_lock(&cpu_base->lock); ++ raw_spin_lock_irqsave(&cpu_base->lock, flags); + now = hrtimer_update_base(cpu_base); + cpu_base->nr_retries++; + if (++retries < 3) +@@ -1339,7 +1342,8 @@ void hrtimer_interrupt(struct clock_even + */ + cpu_base->nr_hangs++; + cpu_base->hang_detected = 1; +- raw_spin_unlock(&cpu_base->lock); ++ raw_spin_unlock_irqrestore(&cpu_base->lock, flags); ++ + delta = ktime_sub(now, entry_time); + if ((unsigned int)delta > cpu_base->max_hang_time) + cpu_base->max_hang_time = (unsigned int) delta; +@@ -1381,6 +1385,7 @@ static inline void __hrtimer_peek_ahead_ + void hrtimer_run_queues(void) + { + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); ++ unsigned long flags; + ktime_t now; + + if (__hrtimer_hres_active(cpu_base)) +@@ -1398,10 +1403,10 @@ void hrtimer_run_queues(void) + return; + } + +- raw_spin_lock(&cpu_base->lock); ++ raw_spin_lock_irqsave(&cpu_base->lock, flags); + now = hrtimer_update_base(cpu_base); +- __hrtimer_run_queues(cpu_base, now); +- raw_spin_unlock(&cpu_base->lock); ++ __hrtimer_run_queues(cpu_base, now, flags); ++ raw_spin_unlock_irqrestore(&cpu_base->lock, flags); + } + + /* diff --git a/debian/patches/features/all/rt/0025-tracing-Generalize-per-element-hist-trigger-data.patch b/debian/patches/features/all/rt/0025-tracing-Generalize-per-element-hist-trigger-data.patch new file mode 100644 index 000000000..64fbc192f --- /dev/null +++ b/debian/patches/features/all/rt/0025-tracing-Generalize-per-element-hist-trigger-data.patch @@ -0,0 +1,150 @@ +From: Tom Zanussi +Date: Fri, 22 Sep 2017 14:59:56 -0500 +Subject: [PATCH 25/42] tracing: Generalize per-element hist trigger data +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Up until now, hist triggers only needed per-element support for saving +'comm' data, which was saved directly as a private data pointer. + +In anticipation of the need to save other data besides 'comm', add a +new hist_elt_data struct for the purpose, and switch the current +'comm'-related code over to that. + +Signed-off-by: Tom Zanussi +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 71 ++++++++++++++++++++------------------- + 1 file changed, 38 insertions(+), 33 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -249,6 +249,10 @@ static u64 hist_field_timestamp(struct h + return ts; + } + ++struct hist_elt_data { ++ char *comm; ++}; ++ + static const char *hist_field_name(struct hist_field *field, + unsigned int level) + { +@@ -463,45 +467,61 @@ static inline void save_comm(char *comm, + memcpy(comm, task->comm, TASK_COMM_LEN); + } + +-static void hist_trigger_elt_comm_free(struct tracing_map_elt *elt) ++static void hist_elt_data_free(struct hist_elt_data *elt_data) ++{ ++ kfree(elt_data->comm); ++ kfree(elt_data); ++} ++ ++static void hist_trigger_elt_data_free(struct tracing_map_elt *elt) + { +- kfree((char *)elt->private_data); ++ struct hist_elt_data *elt_data = elt->private_data; ++ ++ hist_elt_data_free(elt_data); + } + +-static int hist_trigger_elt_comm_alloc(struct tracing_map_elt *elt) ++static int hist_trigger_elt_data_alloc(struct tracing_map_elt *elt) + { + struct hist_trigger_data *hist_data = elt->map->private_data; ++ unsigned int size = TASK_COMM_LEN + 1; ++ struct hist_elt_data *elt_data; + struct hist_field *key_field; + unsigned int i; + ++ elt_data = kzalloc(sizeof(*elt_data), GFP_KERNEL); ++ if (!elt_data) ++ return -ENOMEM; ++ + for_each_hist_key_field(i, hist_data) { + key_field = hist_data->fields[i]; + + if (key_field->flags & HIST_FIELD_FL_EXECNAME) { +- unsigned int size = TASK_COMM_LEN + 1; +- +- elt->private_data = kzalloc(size, GFP_KERNEL); +- if (!elt->private_data) ++ elt_data->comm = kzalloc(size, GFP_KERNEL); ++ if (!elt_data->comm) { ++ kfree(elt_data); + return -ENOMEM; ++ } + break; + } + } + ++ elt->private_data = elt_data; ++ + return 0; + } + +-static void hist_trigger_elt_comm_init(struct tracing_map_elt *elt) ++static void hist_trigger_elt_data_init(struct tracing_map_elt *elt) + { +- char *comm = elt->private_data; ++ struct hist_elt_data *elt_data = elt->private_data; + +- if (comm) +- save_comm(comm, current); ++ if (elt_data->comm) ++ save_comm(elt_data->comm, current); + } + +-static const struct tracing_map_ops hist_trigger_elt_comm_ops = { +- .elt_alloc = hist_trigger_elt_comm_alloc, +- .elt_free = hist_trigger_elt_comm_free, +- .elt_init = hist_trigger_elt_comm_init, ++static const struct tracing_map_ops hist_trigger_elt_data_ops = { ++ .elt_alloc = hist_trigger_elt_data_alloc, ++ .elt_free = hist_trigger_elt_data_free, ++ .elt_init = hist_trigger_elt_data_init, + }; + + static const char *get_hist_field_flags(struct hist_field *hist_field) +@@ -1512,21 +1532,6 @@ static int create_tracing_map_fields(str + return 0; + } + +-static bool need_tracing_map_ops(struct hist_trigger_data *hist_data) +-{ +- struct hist_field *key_field; +- unsigned int i; +- +- for_each_hist_key_field(i, hist_data) { +- key_field = hist_data->fields[i]; +- +- if (key_field->flags & HIST_FIELD_FL_EXECNAME) +- return true; +- } +- +- return false; +-} +- + static struct hist_trigger_data * + create_hist_data(unsigned int map_bits, + struct hist_trigger_attrs *attrs, +@@ -1552,8 +1557,7 @@ create_hist_data(unsigned int map_bits, + if (ret) + goto free; + +- if (need_tracing_map_ops(hist_data)) +- map_ops = &hist_trigger_elt_comm_ops; ++ map_ops = &hist_trigger_elt_data_ops; + + hist_data->map = tracing_map_create(map_bits, hist_data->key_size, + map_ops, hist_data); +@@ -1742,7 +1746,8 @@ hist_trigger_entry_print(struct seq_file + seq_printf(m, "%s: [%llx] %-55s", field_name, + uval, str); + } else if (key_field->flags & HIST_FIELD_FL_EXECNAME) { +- char *comm = elt->private_data; ++ struct hist_elt_data *elt_data = elt->private_data; ++ char *comm = elt_data->comm; + + uval = *(u64 *)(key + key_field->offset); + seq_printf(m, "%s: %-16s[%10llu]", field_name, diff --git a/debian/patches/features/all/rt/0026-hrtimer-Add-clock-bases-and-hrtimer-mode-for-soft-ir.patch b/debian/patches/features/all/rt/0026-hrtimer-Add-clock-bases-and-hrtimer-mode-for-soft-ir.patch new file mode 100644 index 000000000..1d30d3ddd --- /dev/null +++ b/debian/patches/features/all/rt/0026-hrtimer-Add-clock-bases-and-hrtimer-mode-for-soft-ir.patch @@ -0,0 +1,109 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:40:04 +0200 +Subject: [PATCH 26/36] hrtimer: Add clock bases and hrtimer mode for soft irq + context +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +hrtimer callback functions are always executed in hard interrupt +context. Users of hrtimer which need their timer function to be executed +in soft interrupt context, make use of tasklets to get the proper context. + +Add additional hrtimer clock bases for timers which must expire in softirq +context, so the detour via the tasklet can be avoided. This is also +required for RT, where the majority of hrtimer is moved into softirq +hrtimer context. + +The selection of the expiry mode happens via a mode bit. Introduce +HRTIMER_MODE_SOFT and the matching combinations with the ABS/REL/PINNED +bits and update the decoding of hrtimer_mode in tracepoints. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/hrtimer.h | 14 ++++++++++++++ + include/trace/events/timer.h | 6 +++++- + kernel/time/hrtimer.c | 20 ++++++++++++++++++++ + 3 files changed, 39 insertions(+), 1 deletion(-) + +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -33,14 +33,24 @@ struct hrtimer_cpu_base; + * HRTIMER_MODE_REL - Time value is relative to now + * HRTIMER_MODE_PINNED - Timer is bound to CPU (is only considered + * when starting the timer) ++ * HRTIMER_MODE_SOFT - Timer callback function will be executed in ++ * soft irq context + */ + enum hrtimer_mode { + HRTIMER_MODE_ABS = 0x00, + HRTIMER_MODE_REL = 0x01, + HRTIMER_MODE_PINNED = 0x02, ++ HRTIMER_MODE_SOFT = 0x04, + + HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, + HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, ++ ++ HRTIMER_MODE_ABS_SOFT = HRTIMER_MODE_ABS | HRTIMER_MODE_SOFT, ++ HRTIMER_MODE_REL_SOFT = HRTIMER_MODE_REL | HRTIMER_MODE_SOFT, ++ ++ HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT, ++ HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT, ++ + }; + + /* +@@ -151,6 +161,10 @@ enum hrtimer_base_type { + HRTIMER_BASE_REALTIME, + HRTIMER_BASE_BOOTTIME, + HRTIMER_BASE_TAI, ++ HRTIMER_BASE_MONOTONIC_SOFT, ++ HRTIMER_BASE_REALTIME_SOFT, ++ HRTIMER_BASE_BOOTTIME_SOFT, ++ HRTIMER_BASE_TAI_SOFT, + HRTIMER_MAX_CLOCK_BASES, + }; + +--- a/include/trace/events/timer.h ++++ b/include/trace/events/timer.h +@@ -148,7 +148,11 @@ DEFINE_EVENT(timer_class, timer_cancel, + { HRTIMER_MODE_ABS, "ABS" }, \ + { HRTIMER_MODE_REL, "REL" }, \ + { HRTIMER_MODE_ABS_PINNED, "ABS|PINNED" }, \ +- { HRTIMER_MODE_REL_PINNED, "REL|PINNED" }) ++ { HRTIMER_MODE_REL_PINNED, "REL|PINNED" }, \ ++ { HRTIMER_MODE_ABS_SOFT, "ABS|SOFT" }, \ ++ { HRTIMER_MODE_REL_SOFT, "REL|SOFT" }, \ ++ { HRTIMER_MODE_ABS_PINNED_SOFT, "ABS|PINNED|SOFT" }, \ ++ { HRTIMER_MODE_REL_PINNED_SOFT, "REL|PINNED|SOFT" }) + + /** + * hrtimer_init - called when the hrtimer is initialized +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -92,6 +92,26 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, + .clockid = CLOCK_TAI, + .get_time = &ktime_get_clocktai, + }, ++ { ++ .index = HRTIMER_BASE_MONOTONIC_SOFT, ++ .clockid = CLOCK_MONOTONIC, ++ .get_time = &ktime_get, ++ }, ++ { ++ .index = HRTIMER_BASE_REALTIME_SOFT, ++ .clockid = CLOCK_REALTIME, ++ .get_time = &ktime_get_real, ++ }, ++ { ++ .index = HRTIMER_BASE_BOOTTIME_SOFT, ++ .clockid = CLOCK_BOOTTIME, ++ .get_time = &ktime_get_boottime, ++ }, ++ { ++ .index = HRTIMER_BASE_TAI_SOFT, ++ .clockid = CLOCK_TAI, ++ .get_time = &ktime_get_clocktai, ++ }, + } + }; + diff --git a/debian/patches/features/all/rt/0026-tracing-Make-duplicate-count-from-tracing_map-availa.patch b/debian/patches/features/all/rt/0026-tracing-Make-duplicate-count-from-tracing_map-availa.patch deleted file mode 100644 index 9e9c22761..000000000 --- a/debian/patches/features/all/rt/0026-tracing-Make-duplicate-count-from-tracing_map-availa.patch +++ /dev/null @@ -1,125 +0,0 @@ -From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:27 -0500 -Subject: [PATCH 26/32] tracing: Make duplicate count from tracing_map - available -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Though extremely rare, there can be duplicate entries in the tracing -map. This isn't normally a problem, as the sorting code makes this -transparent by merging them during the sort. - -It's useful to know however, as a check on that assumption - if a -non-zero duplicate count is seen more than rarely, it might indicate -an unexpected change to the algorithm, or a pathological data set. - -Add an extra param to tracing_map_sort_entries() and use it to display -the value in the hist trigger output. - -Signed-off-by: Tom Zanussi -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/trace/trace_events_hist.c | 14 ++++++++------ - kernel/trace/tracing_map.c | 12 +++++++++--- - kernel/trace/tracing_map.h | 3 ++- - 3 files changed, 19 insertions(+), 10 deletions(-) - ---- a/kernel/trace/trace_events_hist.c -+++ b/kernel/trace/trace_events_hist.c -@@ -4011,7 +4011,8 @@ hist_trigger_entry_print(struct seq_file - } - - static int print_entries(struct seq_file *m, -- struct hist_trigger_data *hist_data) -+ struct hist_trigger_data *hist_data, -+ unsigned int *n_dups) - { - struct tracing_map_sort_entry **sort_entries = NULL; - struct tracing_map *map = hist_data->map; -@@ -4019,7 +4020,7 @@ static int print_entries(struct seq_file - - n_entries = tracing_map_sort_entries(map, hist_data->sort_keys, - hist_data->n_sort_keys, -- &sort_entries); -+ &sort_entries, n_dups); - if (n_entries < 0) - return n_entries; - -@@ -4038,6 +4039,7 @@ static void hist_trigger_show(struct seq - { - struct hist_trigger_data *hist_data; - int n_entries, ret = 0; -+ unsigned int n_dups; - - if (n > 0) - seq_puts(m, "\n\n"); -@@ -4047,15 +4049,15 @@ static void hist_trigger_show(struct seq - seq_puts(m, "#\n\n"); - - hist_data = data->private_data; -- n_entries = print_entries(m, hist_data); -+ n_entries = print_entries(m, hist_data, &n_dups); - if (n_entries < 0) { - ret = n_entries; - n_entries = 0; - } - -- seq_printf(m, "\nTotals:\n Hits: %llu\n Entries: %u\n Dropped: %llu\n", -- (u64)atomic64_read(&hist_data->map->hits), -- n_entries, (u64)atomic64_read(&hist_data->map->drops)); -+ seq_printf(m, "\nTotals:\n Hits: %llu\n Entries: %u\n Dropped: %llu\n Duplicates: %u\n", -+ (u64)atomic64_read(&hist_data->map->hits), n_entries, -+ (u64)atomic64_read(&hist_data->map->drops), n_dups); - } - - static int hist_show(struct seq_file *m, void *v) ---- a/kernel/trace/tracing_map.c -+++ b/kernel/trace/tracing_map.c -@@ -1084,6 +1084,7 @@ static void sort_secondary(struct tracin - * @map: The tracing_map - * @sort_key: The sort key to use for sorting - * @sort_entries: outval: pointer to allocated and sorted array of entries -+ * @n_dups: outval: pointer to variable receiving a count of duplicates found - * - * tracing_map_sort_entries() sorts the current set of entries in the - * map and returns the list of tracing_map_sort_entries containing -@@ -1100,13 +1101,16 @@ static void sort_secondary(struct tracin - * The client should not hold on to the returned array but should use - * it and call tracing_map_destroy_sort_entries() when done. - * -- * Return: the number of sort_entries in the struct tracing_map_sort_entry -- * array, negative on error -+ * Return: the number of sort_entries in the struct -+ * tracing_map_sort_entry array, negative on error. If n_dups is -+ * non-NULL, it will receive the number of duplicate entries found -+ * (and merged) during the sort. - */ - int tracing_map_sort_entries(struct tracing_map *map, - struct tracing_map_sort_key *sort_keys, - unsigned int n_sort_keys, -- struct tracing_map_sort_entry ***sort_entries) -+ struct tracing_map_sort_entry ***sort_entries, -+ unsigned int *n_dups) - { - int (*cmp_entries_fn)(const struct tracing_map_sort_entry **, - const struct tracing_map_sort_entry **); -@@ -1147,6 +1151,8 @@ int tracing_map_sort_entries(struct trac - if (ret < 0) - goto free; - n_entries -= ret; -+ if (n_dups) -+ *n_dups = ret; - - if (is_key(map, sort_keys[0].field_idx)) - cmp_entries_fn = cmp_entries_key; ---- a/kernel/trace/tracing_map.h -+++ b/kernel/trace/tracing_map.h -@@ -286,7 +286,8 @@ extern int - tracing_map_sort_entries(struct tracing_map *map, - struct tracing_map_sort_key *sort_keys, - unsigned int n_sort_keys, -- struct tracing_map_sort_entry ***sort_entries); -+ struct tracing_map_sort_entry ***sort_entries, -+ unsigned int *n_dups); - - extern void - tracing_map_destroy_sort_entries(struct tracing_map_sort_entry **entries, diff --git a/debian/patches/features/all/rt/0026-tracing-Pass-tracing_map_elt-to-hist_field-accessor-.patch b/debian/patches/features/all/rt/0026-tracing-Pass-tracing_map_elt-to-hist_field-accessor-.patch new file mode 100644 index 000000000..80d9e9983 --- /dev/null +++ b/debian/patches/features/all/rt/0026-tracing-Pass-tracing_map_elt-to-hist_field-accessor-.patch @@ -0,0 +1,222 @@ +From: Tom Zanussi +Date: Fri, 22 Sep 2017 14:59:57 -0500 +Subject: [PATCH 26/42] tracing: Pass tracing_map_elt to hist_field accessor + functions +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Some accessor functions, such as for variable references, require +access to a corrsponding tracing_map_elt. + +Add a tracing_map_elt param to the function signature and update the +accessor functions accordingly. + +Signed-off-by: Tom Zanussi +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 91 ++++++++++++++++++++++++--------------- + 1 file changed, 57 insertions(+), 34 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -26,8 +26,10 @@ + + struct hist_field; + +-typedef u64 (*hist_field_fn_t) (struct hist_field *field, void *event, +- struct ring_buffer_event *rbe); ++typedef u64 (*hist_field_fn_t) (struct hist_field *field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event); + + #define HIST_FIELD_OPERANDS_MAX 2 + #define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX) +@@ -59,28 +61,36 @@ struct hist_field { + char *name; + }; + +-static u64 hist_field_none(struct hist_field *field, void *event, +- struct ring_buffer_event *rbe) ++static u64 hist_field_none(struct hist_field *field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) + { + return 0; + } + +-static u64 hist_field_counter(struct hist_field *field, void *event, +- struct ring_buffer_event *rbe) ++static u64 hist_field_counter(struct hist_field *field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) + { + return 1; + } + +-static u64 hist_field_string(struct hist_field *hist_field, void *event, +- struct ring_buffer_event *rbe) ++static u64 hist_field_string(struct hist_field *hist_field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) + { + char *addr = (char *)(event + hist_field->field->offset); + + return (u64)(unsigned long)addr; + } + +-static u64 hist_field_dynstring(struct hist_field *hist_field, void *event, +- struct ring_buffer_event *rbe) ++static u64 hist_field_dynstring(struct hist_field *hist_field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) + { + u32 str_item = *(u32 *)(event + hist_field->field->offset); + int str_loc = str_item & 0xffff; +@@ -89,54 +99,64 @@ static u64 hist_field_dynstring(struct h + return (u64)(unsigned long)addr; + } + +-static u64 hist_field_pstring(struct hist_field *hist_field, void *event, +- struct ring_buffer_event *rbe) ++static u64 hist_field_pstring(struct hist_field *hist_field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) + { + char **addr = (char **)(event + hist_field->field->offset); + + return (u64)(unsigned long)*addr; + } + +-static u64 hist_field_log2(struct hist_field *hist_field, void *event, +- struct ring_buffer_event *rbe) ++static u64 hist_field_log2(struct hist_field *hist_field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) + { + struct hist_field *operand = hist_field->operands[0]; + +- u64 val = operand->fn(operand, event, rbe); ++ u64 val = operand->fn(operand, elt, rbe, event); + + return (u64) ilog2(roundup_pow_of_two(val)); + } + +-static u64 hist_field_plus(struct hist_field *hist_field, void *event, +- struct ring_buffer_event *rbe) ++static u64 hist_field_plus(struct hist_field *hist_field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) + { + struct hist_field *operand1 = hist_field->operands[0]; + struct hist_field *operand2 = hist_field->operands[1]; + +- u64 val1 = operand1->fn(operand1, event, rbe); +- u64 val2 = operand2->fn(operand2, event, rbe); ++ u64 val1 = operand1->fn(operand1, elt, rbe, event); ++ u64 val2 = operand2->fn(operand2, elt, rbe, event); + + return val1 + val2; + } + +-static u64 hist_field_minus(struct hist_field *hist_field, void *event, +- struct ring_buffer_event *rbe) ++static u64 hist_field_minus(struct hist_field *hist_field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) + { + struct hist_field *operand1 = hist_field->operands[0]; + struct hist_field *operand2 = hist_field->operands[1]; + +- u64 val1 = operand1->fn(operand1, event, rbe); +- u64 val2 = operand2->fn(operand2, event, rbe); ++ u64 val1 = operand1->fn(operand1, elt, rbe, event); ++ u64 val2 = operand2->fn(operand2, elt, rbe, event); + + return val1 - val2; + } + +-static u64 hist_field_unary_minus(struct hist_field *hist_field, void *event, +- struct ring_buffer_event *rbe) ++static u64 hist_field_unary_minus(struct hist_field *hist_field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) + { + struct hist_field *operand = hist_field->operands[0]; + +- s64 sval = (s64)operand->fn(operand, event, rbe); ++ s64 sval = (s64)operand->fn(operand, elt, rbe, event); + u64 val = (u64)-sval; + + return val; +@@ -144,8 +164,9 @@ static u64 hist_field_unary_minus(struct + + #define DEFINE_HIST_FIELD_FN(type) \ + static u64 hist_field_##type(struct hist_field *hist_field, \ +- void *event, \ +- struct ring_buffer_event *rbe) \ ++ struct tracing_map_elt *elt, \ ++ struct ring_buffer_event *rbe, \ ++ void *event) \ + { \ + type *addr = (type *)(event + hist_field->field->offset); \ + \ +@@ -235,8 +256,10 @@ struct hist_trigger_data { + bool remove; + }; + +-static u64 hist_field_timestamp(struct hist_field *hist_field, void *event, +- struct ring_buffer_event *rbe) ++static u64 hist_field_timestamp(struct hist_field *hist_field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) + { + struct hist_trigger_data *hist_data = hist_field->hist_data; + struct trace_array *tr = hist_data->event_file->tr; +@@ -1598,7 +1621,7 @@ static void hist_trigger_elt_update(stru + + for_each_hist_val_field(i, hist_data) { + hist_field = hist_data->fields[i]; +- hist_val = hist_field->fn(hist_field, rbe, rec); ++ hist_val = hist_field->fn(hist_field, elt, rbe, rec); + if (hist_field->flags & HIST_FIELD_FL_VAR) { + var_idx = hist_field->var.idx; + tracing_map_set_var(elt, var_idx, hist_val); +@@ -1611,7 +1634,7 @@ static void hist_trigger_elt_update(stru + for_each_hist_key_field(i, hist_data) { + hist_field = hist_data->fields[i]; + if (hist_field->flags & HIST_FIELD_FL_VAR) { +- hist_val = hist_field->fn(hist_field, rbe, rec); ++ hist_val = hist_field->fn(hist_field, elt, rbe, rec); + var_idx = hist_field->var.idx; + tracing_map_set_var(elt, var_idx, hist_val); + } +@@ -1649,9 +1672,9 @@ static void event_hist_trigger(struct ev + bool use_compound_key = (hist_data->n_keys > 1); + unsigned long entries[HIST_STACKTRACE_DEPTH]; + char compound_key[HIST_KEY_SIZE_MAX]; ++ struct tracing_map_elt *elt = NULL; + struct stack_trace stacktrace; + struct hist_field *key_field; +- struct tracing_map_elt *elt; + u64 field_contents; + void *key = NULL; + unsigned int i; +@@ -1672,7 +1695,7 @@ static void event_hist_trigger(struct ev + + key = entries; + } else { +- field_contents = key_field->fn(key_field, rec, rbe); ++ field_contents = key_field->fn(key_field, elt, rbe, rec); + if (key_field->flags & HIST_FIELD_FL_STRING) { + key = (void *)(unsigned long)field_contents; + use_compound_key = true; diff --git a/debian/patches/features/all/rt/0027-hrtimer-Prepare-handling-of-hard-and-softirq-based-h.patch b/debian/patches/features/all/rt/0027-hrtimer-Prepare-handling-of-hard-and-softirq-based-h.patch new file mode 100644 index 000000000..6e9d89696 --- /dev/null +++ b/debian/patches/features/all/rt/0027-hrtimer-Prepare-handling-of-hard-and-softirq-based-h.patch @@ -0,0 +1,117 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:40:05 +0200 +Subject: [PATCH 27/36] hrtimer: Prepare handling of hard and softirq based + hrtimers +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The softirq based hrtimer can utilize most of the existing hrtimers +functions, but need to operate on a different data set. + +Add an active_mask argument to various functions so the hard and soft bases +can be selected. Fixup the existing callers and hand in the ACTIVE_HARD +mask. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/time/hrtimer.c | 38 +++++++++++++++++++++++++++++--------- + 1 file changed, 29 insertions(+), 9 deletions(-) + +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -60,6 +60,15 @@ + #include "tick-internal.h" + + /* ++ * Masks for selecting the soft and hard context timers from ++ * cpu_base->active ++ */ ++#define MASK_SHIFT (HRTIMER_BASE_MONOTONIC_SOFT) ++#define HRTIMER_ACTIVE_HARD ((1U << MASK_SHIFT) - 1) ++#define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT) ++#define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) ++ ++/* + * The timer bases: + * + * There are more clockids than hrtimer bases. Thus, we index +@@ -508,13 +517,24 @@ static ktime_t __hrtimer_next_event_base + return expires_next; + } + +-static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) ++/* ++ * Recomputes cpu_base::*next_timer and returns the earliest expires_next but ++ * does not set cpu_base::*expires_next, that is done by hrtimer_reprogram. ++ * ++ * @active_mask must be one of: ++ * - HRTIMER_ACTIVE, ++ * - HRTIMER_ACTIVE_SOFT, or ++ * - HRTIMER_ACTIVE_HARD. ++ */ ++static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, ++ unsigned int active_mask) + { +- unsigned int active = cpu_base->active_bases; ++ unsigned int active; + ktime_t expires_next = KTIME_MAX; + + cpu_base->next_timer = NULL; + ++ active = cpu_base->active_bases & active_mask; + expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); + + return expires_next; +@@ -555,7 +575,7 @@ hrtimer_force_reprogram(struct hrtimer_c + { + ktime_t expires_next; + +- expires_next = __hrtimer_get_next_event(cpu_base); ++ expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); + + if (skip_equal && expires_next == cpu_base->expires_next) + return; +@@ -1078,7 +1098,7 @@ u64 hrtimer_get_next_event(void) + raw_spin_lock_irqsave(&cpu_base->lock, flags); + + if (!__hrtimer_hres_active(cpu_base)) +- expires = __hrtimer_get_next_event(cpu_base); ++ expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); + + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); + +@@ -1252,10 +1272,10 @@ static void __run_hrtimer(struct hrtimer + } + + static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, +- unsigned long flags) ++ unsigned long flags, unsigned int active_mask) + { + struct hrtimer_clock_base *base; +- unsigned int active = cpu_base->active_bases; ++ unsigned int active = cpu_base->active_bases & active_mask; + + for_each_active_base(base, cpu_base, active) { + struct timerqueue_node *node; +@@ -1318,10 +1338,10 @@ void hrtimer_interrupt(struct clock_even + */ + cpu_base->expires_next = KTIME_MAX; + +- __hrtimer_run_queues(cpu_base, now, flags); ++ __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); + + /* Reevaluate the clock bases for the next expiry */ +- expires_next = __hrtimer_get_next_event(cpu_base); ++ expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); + /* + * Store the new expiry value so the migration code can verify + * against it. +@@ -1425,7 +1445,7 @@ void hrtimer_run_queues(void) + + raw_spin_lock_irqsave(&cpu_base->lock, flags); + now = hrtimer_update_base(cpu_base); +- __hrtimer_run_queues(cpu_base, now, flags); ++ __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); + } + diff --git a/debian/patches/features/all/rt/0027-tracing-Add-hist_field-type-field.patch b/debian/patches/features/all/rt/0027-tracing-Add-hist_field-type-field.patch new file mode 100644 index 000000000..6110ffc56 --- /dev/null +++ b/debian/patches/features/all/rt/0027-tracing-Add-hist_field-type-field.patch @@ -0,0 +1,114 @@ +From: Tom Zanussi +Date: Fri, 22 Sep 2017 14:59:58 -0500 +Subject: [PATCH 27/42] tracing: Add hist_field 'type' field +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Future support for synthetic events requires hist_field 'type' +information, so add a field for that. + +Also, make other hist_field attribute usage consistent (size, +is_signed, etc). + +Signed-off-by: Tom Zanussi +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 33 +++++++++++++++++++++++++++++++++ + 1 file changed, 33 insertions(+) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -54,6 +54,7 @@ struct hist_field { + unsigned int size; + unsigned int offset; + unsigned int is_signed; ++ const char *type; + struct hist_field *operands[HIST_FIELD_OPERANDS_MAX]; + struct hist_trigger_data *hist_data; + struct hist_var var; +@@ -672,6 +673,7 @@ static void destroy_hist_field(struct hi + + kfree(hist_field->var.name); + kfree(hist_field->name); ++ kfree(hist_field->type); + + kfree(hist_field); + } +@@ -697,6 +699,10 @@ static struct hist_field *create_hist_fi + + if (flags & HIST_FIELD_FL_HITCOUNT) { + hist_field->fn = hist_field_counter; ++ hist_field->size = sizeof(u64); ++ hist_field->type = kstrdup("u64", GFP_KERNEL); ++ if (!hist_field->type) ++ goto free; + goto out; + } + +@@ -710,12 +716,18 @@ static struct hist_field *create_hist_fi + hist_field->fn = hist_field_log2; + hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL); + hist_field->size = hist_field->operands[0]->size; ++ hist_field->type = kstrdup(hist_field->operands[0]->type, GFP_KERNEL); ++ if (!hist_field->type) ++ goto free; + goto out; + } + + if (flags & HIST_FIELD_FL_TIMESTAMP) { + hist_field->fn = hist_field_timestamp; + hist_field->size = sizeof(u64); ++ hist_field->type = kstrdup("u64", GFP_KERNEL); ++ if (!hist_field->type) ++ goto free; + goto out; + } + +@@ -725,6 +737,11 @@ static struct hist_field *create_hist_fi + if (is_string_field(field)) { + flags |= HIST_FIELD_FL_STRING; + ++ hist_field->size = MAX_FILTER_STR_VAL; ++ hist_field->type = kstrdup(field->type, GFP_KERNEL); ++ if (!hist_field->type) ++ goto free; ++ + if (field->filter_type == FILTER_STATIC_STRING) + hist_field->fn = hist_field_string; + else if (field->filter_type == FILTER_DYN_STRING) +@@ -732,6 +749,12 @@ static struct hist_field *create_hist_fi + else + hist_field->fn = hist_field_pstring; + } else { ++ hist_field->size = field->size; ++ hist_field->is_signed = field->is_signed; ++ hist_field->type = kstrdup(field->type, GFP_KERNEL); ++ if (!hist_field->type) ++ goto free; ++ + hist_field->fn = select_value_fn(field->size, + field->is_signed); + if (!hist_field->fn) { +@@ -941,6 +964,11 @@ static struct hist_field *parse_unary(st + expr->operands[0] = operand1; + expr->operator = FIELD_OP_UNARY_MINUS; + expr->name = expr_str(expr, 0); ++ expr->type = kstrdup(operand1->type, GFP_KERNEL); ++ if (!expr->type) { ++ ret = -ENOMEM; ++ goto free; ++ } + + return expr; + free: +@@ -1029,6 +1057,11 @@ static struct hist_field *parse_expr(str + expr->operands[1] = operand2; + expr->operator = field_op; + expr->name = expr_str(expr, 0); ++ expr->type = kstrdup(operand1->type, GFP_KERNEL); ++ if (!expr->type) { ++ ret = -ENOMEM; ++ goto free; ++ } + + switch (field_op) { + case FIELD_OP_MINUS: diff --git a/debian/patches/features/all/rt/0028-hrtimer-Implement-support-for-softirq-based-hrtimers.patch b/debian/patches/features/all/rt/0028-hrtimer-Implement-support-for-softirq-based-hrtimers.patch new file mode 100644 index 000000000..9dab27c4f --- /dev/null +++ b/debian/patches/features/all/rt/0028-hrtimer-Implement-support-for-softirq-based-hrtimers.patch @@ -0,0 +1,509 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:40:06 +0200 +Subject: [PATCH 28/36] hrtimer: Implement support for softirq based hrtimers +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +hrtimer callbacks are always invoked in hard interrupt context. Several +users in tree require soft interrupt context for their callbacks and +achieve this by combining a hrtimer with a tasklet. The hrtimer schedules +the tasklet in hard interrupt context and the tasklet callback gets invoked +in softirq context later. + +That's suboptimal and aside of that the real-time patch moves most of the +hrtimers into softirq context. So adding native support for hrtimers +expiring in softirq context is a valuable extension for both mainline and +the RT patch set. + +Each valid hrtimer clock id has two associated hrtimer clock bases: one for +timers expiring in hardirq context and one for timers expiring in softirq +context. + +Implement the functionality to associate a hrtimer with the hard or softirq +related clock bases and update the relevant functions to take them into +account when the next expiry time needs to be evaluated. + +Add a check into the hard interrupt context handler functions to check +whether the first expiring softirq based timer has expired. If it's expired +the softirq is raised and the accounting of softirq based timers to +evaluate the next expiry time for programming the timer hardware is skipped +until the softirq processing has finished. At the end of the softirq +processing the regular processing is resumed. + +Suggested-by: Thomas Gleixner +Suggested-by: Peter Zijlstra +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/hrtimer.h | 20 +++- + kernel/time/hrtimer.c | 201 ++++++++++++++++++++++++++++++++++++++++-------- + 2 files changed, 185 insertions(+), 36 deletions(-) + +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -113,6 +113,7 @@ struct hrtimer { + struct hrtimer_clock_base *base; + u8 state; + u8 is_rel; ++ u8 is_soft; + }; + + /** +@@ -178,13 +179,18 @@ enum hrtimer_base_type { + * @hres_active: State of high resolution mode + * @in_hrtirq: hrtimer_interrupt() is currently executing + * @hang_detected: The last hrtimer interrupt detected a hang ++ * @softirq_activated: displays, if the softirq is raised - update of softirq ++ * related settings is not required then. + * @nr_events: Total number of hrtimer interrupt events + * @nr_retries: Total number of hrtimer interrupt retries + * @nr_hangs: Total number of hrtimer interrupt hangs + * @max_hang_time: Maximum time spent in hrtimer_interrupt + * @expires_next: absolute time of the next event, is required for remote +- * hrtimer enqueue ++ * hrtimer enqueue; it is the total first expiry time (hard ++ * and soft hrtimer are taken into account) + * @next_timer: Pointer to the first expiring timer ++ * @softirq_expires_next: Time to check, if soft queues needs also to be expired ++ * @softirq_next_timer: Pointer to the first expiring softirq based timer + * @clock_base: array of clock bases for this cpu + * + * Note: next_timer is just an optimization for __remove_hrtimer(). +@@ -196,9 +202,10 @@ struct hrtimer_cpu_base { + unsigned int cpu; + unsigned int active_bases; + unsigned int clock_was_set_seq; +- unsigned int hres_active : 1, +- in_hrtirq : 1, +- hang_detected : 1; ++ unsigned int hres_active : 1, ++ in_hrtirq : 1, ++ hang_detected : 1, ++ softirq_activated : 1; + #ifdef CONFIG_HIGH_RES_TIMERS + unsigned int nr_events; + unsigned short nr_retries; +@@ -207,6 +214,8 @@ struct hrtimer_cpu_base { + #endif + ktime_t expires_next; + struct hrtimer *next_timer; ++ ktime_t softirq_expires_next; ++ struct hrtimer *softirq_next_timer; + struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; + } ____cacheline_aligned; + +@@ -379,7 +388,8 @@ extern void hrtimer_start_range_ns(struc + * @timer: the timer to be added + * @tim: expiry time + * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or +- * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) ++ * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); ++ * softirq based mode is considered for debug purpose only! + */ + static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim, + const enum hrtimer_mode mode) +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -68,6 +68,9 @@ + #define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT) + #define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) + ++/* Define for debug mode check */ ++#define HRTIMER_MODECHECK true ++ + /* + * The timer bases: + * +@@ -411,8 +414,17 @@ static inline void debug_hrtimer_init(st + debug_object_init(timer, &hrtimer_debug_descr); + } + +-static inline void debug_hrtimer_activate(struct hrtimer *timer) ++static inline void debug_hrtimer_activate(struct hrtimer *timer, ++ enum hrtimer_mode mode, ++ bool modecheck) + { ++ /* ++ * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft ++ * match, when a timer is started via__hrtimer_start_range_ns(). ++ */ ++ if (modecheck) ++ WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); ++ + debug_object_activate(timer, &hrtimer_debug_descr); + } + +@@ -444,8 +456,11 @@ void destroy_hrtimer_on_stack(struct hrt + EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack); + + #else ++ + static inline void debug_hrtimer_init(struct hrtimer *timer) { } +-static inline void debug_hrtimer_activate(struct hrtimer *timer) { } ++static inline void debug_hrtimer_activate(struct hrtimer *timer, ++ enum hrtimer_mode mode, ++ bool modecheck) { } + static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } + #endif + +@@ -458,9 +473,10 @@ debug_init(struct hrtimer *timer, clocki + } + + static inline void debug_activate(struct hrtimer *timer, +- enum hrtimer_mode mode) ++ enum hrtimer_mode mode, ++ bool modecheck) + { +- debug_hrtimer_activate(timer); ++ debug_hrtimer_activate(timer, mode, modecheck); + trace_hrtimer_start(timer, mode); + } + +@@ -470,7 +486,6 @@ static inline void debug_deactivate(stru + trace_hrtimer_cancel(timer); + } + +-#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) + static struct hrtimer_clock_base * + __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) + { +@@ -504,7 +519,10 @@ static ktime_t __hrtimer_next_event_base + expires = ktime_sub(hrtimer_get_expires(timer), base->offset); + if (expires < expires_next) { + expires_next = expires; +- cpu_base->next_timer = timer; ++ if (timer->is_soft) ++ cpu_base->softirq_next_timer = timer; ++ else ++ cpu_base->next_timer = timer; + } + } + /* +@@ -521,25 +539,42 @@ static ktime_t __hrtimer_next_event_base + * Recomputes cpu_base::*next_timer and returns the earliest expires_next but + * does not set cpu_base::*expires_next, that is done by hrtimer_reprogram. + * ++ * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases, ++ * those timers will get run whenever the softirq gets handled, at the end of ++ * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases. ++ * ++ * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases. ++ * The !softirq values are the minima across HRTIMER_ACTIVE, unless an actual ++ * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD. ++ * + * @active_mask must be one of: + * - HRTIMER_ACTIVE, + * - HRTIMER_ACTIVE_SOFT, or + * - HRTIMER_ACTIVE_HARD. + */ +-static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, +- unsigned int active_mask) ++static ktime_t ++__hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask) + { + unsigned int active; ++ struct hrtimer *next_timer = NULL; + ktime_t expires_next = KTIME_MAX; + +- cpu_base->next_timer = NULL; ++ if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) { ++ active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; ++ cpu_base->softirq_next_timer = NULL; ++ expires_next = __hrtimer_next_event_base(cpu_base, active, KTIME_MAX); ++ ++ next_timer = cpu_base->softirq_next_timer; ++ } + +- active = cpu_base->active_bases & active_mask; +- expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); ++ if (active_mask & HRTIMER_ACTIVE_HARD) { ++ active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; ++ cpu_base->next_timer = next_timer; ++ expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); ++ } + + return expires_next; + } +-#endif + + static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) + { +@@ -547,8 +582,14 @@ static inline ktime_t hrtimer_update_bas + ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; + ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset; + +- return ktime_get_update_offsets_now(&base->clock_was_set_seq, ++ ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq, + offs_real, offs_boot, offs_tai); ++ ++ base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real; ++ base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot; ++ base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai; ++ ++ return now; + } + + /* +@@ -575,7 +616,23 @@ hrtimer_force_reprogram(struct hrtimer_c + { + ktime_t expires_next; + +- expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); ++ /* ++ * Find the current next expiration time. ++ */ ++ expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); ++ ++ if (cpu_base->next_timer && cpu_base->next_timer->is_soft) { ++ /* ++ * When the softirq is activated, hrtimer has to be ++ * programmed with the first hard hrtimer because soft ++ * timer interrupt could occur too late. ++ */ ++ if (cpu_base->softirq_activated) ++ expires_next = __hrtimer_get_next_event(cpu_base, ++ HRTIMER_ACTIVE_HARD); ++ else ++ cpu_base->softirq_expires_next = expires_next; ++ } + + if (skip_equal && expires_next == cpu_base->expires_next) + return; +@@ -702,7 +759,7 @@ static inline void retrigger_next_event( + * + * Called with interrupts disabled and base->cpu_base.lock held + */ +-static void hrtimer_reprogram(struct hrtimer *timer) ++static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram) + { + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); + struct hrtimer_clock_base *base = timer->base; +@@ -711,6 +768,28 @@ static void hrtimer_reprogram(struct hrt + WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); + + /* ++ * CLOCK_REALTIME timer might be requested with an absolute ++ * expiry time which is less than base->offset. Set it to 0. ++ */ ++ if (expires < 0) ++ expires = 0; ++ ++ if (timer->is_soft) { ++ if (cpu_base->softirq_activated) ++ return; ++ ++ if (!ktime_before(expires, cpu_base->softirq_expires_next)) ++ return; ++ ++ cpu_base->softirq_next_timer = timer; ++ cpu_base->softirq_expires_next = expires; ++ ++ if (!ktime_before(expires, cpu_base->expires_next) || ++ !reprogram) ++ return; ++ } ++ ++ /* + * If the timer is not on the current cpu, we cannot reprogram + * the other cpus clock event device. + */ +@@ -727,13 +806,6 @@ static void hrtimer_reprogram(struct hrt + if (cpu_base->in_hrtirq) + return; + +- /* +- * CLOCK_REALTIME timer might be requested with an absolute +- * expiry time which is less than base->offset. Set it to 0. +- */ +- if (expires < 0) +- expires = 0; +- + if (expires >= cpu_base->expires_next) + return; + +@@ -868,9 +940,10 @@ EXPORT_SYMBOL_GPL(hrtimer_forward); + */ + static int enqueue_hrtimer(struct hrtimer *timer, + struct hrtimer_clock_base *base, +- enum hrtimer_mode mode) ++ enum hrtimer_mode mode, ++ bool modecheck) + { +- debug_activate(timer, mode); ++ debug_activate(timer, mode, modecheck); + + base->cpu_base->active_bases |= 1 << base->index; + +@@ -961,6 +1034,31 @@ static inline ktime_t hrtimer_update_low + return tim; + } + ++static void ++hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram) ++{ ++ ktime_t expires; ++ ++ /* ++ * Find the next SOFT expiration. ++ */ ++ expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); ++ ++ /* ++ * reprogramming needs to be triggered, even if the next soft ++ * hrtimer expires at the same time than the next hard ++ * hrtimer. cpu_base->softirq_expires_next needs to be updated! ++ */ ++ if (!reprogram || expires == KTIME_MAX) ++ return; ++ ++ /* ++ * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event() ++ * cpu_base->*expires_next is only set by hrtimer_reprogram() ++ */ ++ hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram); ++} ++ + static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, + u64 delta_ns, const enum hrtimer_mode mode, + struct hrtimer_clock_base *base) +@@ -980,7 +1078,7 @@ static int __hrtimer_start_range_ns(stru + /* Switch the timer base, if necessary: */ + new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); + +- return enqueue_hrtimer(timer, new_base, mode); ++ return enqueue_hrtimer(timer, new_base, mode, HRTIMER_MODECHECK); + } + /** + * hrtimer_start_range_ns - (re)start an hrtimer +@@ -988,7 +1086,8 @@ static int __hrtimer_start_range_ns(stru + * @tim: expiry time + * @delta_ns: "slack" range for the timer + * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or +- * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) ++ * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); ++ * softirq based mode is considered for debug purpose only! + */ + void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, + u64 delta_ns, const enum hrtimer_mode mode) +@@ -999,7 +1098,7 @@ void hrtimer_start_range_ns(struct hrtim + base = lock_hrtimer_base(timer, &flags); + + if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base)) +- hrtimer_reprogram(timer); ++ hrtimer_reprogram(timer, true); + + unlock_hrtimer_base(timer, &flags); + } +@@ -1098,7 +1197,7 @@ u64 hrtimer_get_next_event(void) + raw_spin_lock_irqsave(&cpu_base->lock, flags); + + if (!__hrtimer_hres_active(cpu_base)) +- expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); ++ expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); + + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); + +@@ -1256,7 +1355,8 @@ static void __run_hrtimer(struct hrtimer + */ + if (restart != HRTIMER_NORESTART && + !(timer->state & HRTIMER_STATE_ENQUEUED)) +- enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS); ++ enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS, ++ !HRTIMER_MODECHECK); + + /* + * Separate the ->running assignment from the ->state assignment. +@@ -1308,6 +1408,23 @@ static void __hrtimer_run_queues(struct + } + } + ++static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h) ++{ ++ struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); ++ unsigned long flags; ++ ktime_t now; ++ ++ raw_spin_lock_irqsave(&cpu_base->lock, flags); ++ ++ now = hrtimer_update_base(cpu_base); ++ __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT); ++ ++ cpu_base->softirq_activated = 0; ++ hrtimer_update_softirq_timer(cpu_base, true); ++ ++ raw_spin_unlock_irqrestore(&cpu_base->lock, flags); ++} ++ + #ifdef CONFIG_HIGH_RES_TIMERS + + /* +@@ -1338,10 +1455,16 @@ void hrtimer_interrupt(struct clock_even + */ + cpu_base->expires_next = KTIME_MAX; + ++ if (!ktime_before(now, cpu_base->softirq_expires_next)) { ++ cpu_base->softirq_expires_next = KTIME_MAX; ++ cpu_base->softirq_activated = 1; ++ raise_softirq_irqoff(HRTIMER_SOFTIRQ); ++ } ++ + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); + + /* Reevaluate the clock bases for the next expiry */ +- expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); ++ expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); + /* + * Store the new expiry value so the migration code can verify + * against it. +@@ -1445,6 +1568,13 @@ void hrtimer_run_queues(void) + + raw_spin_lock_irqsave(&cpu_base->lock, flags); + now = hrtimer_update_base(cpu_base); ++ ++ if (!ktime_before(now, cpu_base->softirq_expires_next)) { ++ cpu_base->softirq_expires_next = KTIME_MAX; ++ cpu_base->softirq_activated = 1; ++ raise_softirq_irqoff(HRTIMER_SOFTIRQ); ++ } ++ + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); + } +@@ -1626,6 +1756,7 @@ int hrtimers_prepare_cpu(unsigned int cp + cpu_base->cpu = cpu; + cpu_base->hres_active = 0; + cpu_base->expires_next = KTIME_MAX; ++ cpu_base->softirq_expires_next = KTIME_MAX; + return 0; + } + +@@ -1657,7 +1788,8 @@ static void migrate_hrtimer_list(struct + * sort out already expired timers and reprogram the + * event device. + */ +- enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS); ++ enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS, ++ !HRTIMER_MODECHECK); + } + } + +@@ -1684,6 +1816,12 @@ int hrtimers_dead_cpu(unsigned int scpu) + &new_base->clock_base[i]); + } + ++ /* ++ * The migration might have changed the first expiring softirq ++ * timer on this CPU. Update it. ++ */ ++ hrtimer_update_softirq_timer(new_base, false); ++ + raw_spin_unlock(&old_base->lock); + raw_spin_unlock(&new_base->lock); + +@@ -1698,6 +1836,7 @@ int hrtimers_dead_cpu(unsigned int scpu) + void __init hrtimers_init(void) + { + hrtimers_prepare_cpu(smp_processor_id()); ++ open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq); + } + + /** diff --git a/debian/patches/features/all/rt/0028-tracing-Add-hist-trigger-support-for-variable-refere.patch b/debian/patches/features/all/rt/0028-tracing-Add-hist-trigger-support-for-variable-refere.patch deleted file mode 100644 index 741bd52d1..000000000 --- a/debian/patches/features/all/rt/0028-tracing-Add-hist-trigger-support-for-variable-refere.patch +++ /dev/null @@ -1,106 +0,0 @@ -From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:29 -0500 -Subject: [PATCH 28/32] tracing: Add hist trigger support for variable - reference aliases -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Add support for alias=$somevar where alias can be used as -onmatch($alias). - -Signed-off-by: Tom Zanussi -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/trace/trace_events_hist.c | 46 ++++++++++++++++++++++++++++++++++++--- - 1 file changed, 43 insertions(+), 3 deletions(-) - ---- a/kernel/trace/trace_events_hist.c -+++ b/kernel/trace/trace_events_hist.c -@@ -225,6 +225,7 @@ enum hist_field_flags { - HIST_FIELD_FL_EXPR = 16384, - HIST_FIELD_FL_VAR_REF = 32768, - HIST_FIELD_FL_CPU = 65536, -+ HIST_FIELD_FL_ALIAS = 131072, - }; - - struct hist_trigger_attrs { -@@ -1414,7 +1415,8 @@ static const char *hist_field_name(struc - - if (field->field) - field_name = field->field->name; -- else if (field->flags & HIST_FIELD_FL_LOG2) -+ else if (field->flags & HIST_FIELD_FL_LOG2 || -+ field->flags & HIST_FIELD_FL_ALIAS) - field_name = hist_field_name(field->operands[0], ++level); - else if (field->flags & HIST_FIELD_FL_TIMESTAMP) - field_name = "$common_timestamp"; -@@ -1819,7 +1821,7 @@ static struct hist_field *create_hist_fi - - hist_field->hist_data = hist_data; - -- if (flags & HIST_FIELD_FL_EXPR) -+ if (flags & HIST_FIELD_FL_EXPR || flags & HIST_FIELD_FL_ALIAS) - goto out; /* caller will populate */ - - if (flags & HIST_FIELD_FL_VAR_REF) { -@@ -2013,6 +2015,34 @@ parse_field(struct hist_trigger_data *hi - return field; - } - -+static struct hist_field *create_alias(struct hist_trigger_data *hist_data, -+ struct hist_field *var_ref, -+ char *var_name) -+{ -+ struct hist_field *alias = NULL; -+ unsigned long flags = HIST_FIELD_FL_ALIAS | HIST_FIELD_FL_VAR | -+ HIST_FIELD_FL_VAR_ONLY; -+ -+ alias = create_hist_field(hist_data, NULL, flags, var_name); -+ if (!alias) -+ return NULL; -+ -+ alias->fn = var_ref->fn; -+ alias->operands[0] = var_ref; -+ alias->var.idx = var_ref->var.idx; -+ alias->var.hist_data = var_ref->hist_data; -+ alias->size = var_ref->size; -+ alias->is_signed = var_ref->is_signed; -+ alias->type = kstrdup(var_ref->type, GFP_KERNEL); -+ if (!alias->type) { -+ kfree(alias->type); -+ destroy_hist_field(alias, 0); -+ return NULL; -+ } -+ -+ return alias; -+} -+ - struct hist_field *parse_atom(struct hist_trigger_data *hist_data, - struct trace_event_file *file, char *str, - unsigned long *flags, char *var_name) -@@ -2036,6 +2066,13 @@ struct hist_field *parse_atom(struct his - if (hist_field) { - hist_data->var_refs[hist_data->n_var_refs] = hist_field; - hist_field->var_ref_idx = hist_data->n_var_refs++; -+ if (var_name) { -+ hist_field = create_alias(hist_data, hist_field, var_name); -+ if (!hist_field) { -+ ret = -ENOMEM; -+ goto out; -+ } -+ } - return hist_field; - } - -@@ -4152,8 +4189,11 @@ static void hist_field_print(struct seq_ - seq_puts(m, "$common_timestamp"); - else if (hist_field->flags & HIST_FIELD_FL_CPU) - seq_puts(m, "cpu"); -- else if (field_name) -+ else if (field_name) { -+ if (hist_field->flags & HIST_FIELD_FL_ALIAS) -+ seq_putc(m, '$'); - seq_printf(m, "%s", field_name); -+ } - - if (hist_field->flags) { - const char *flags_str = get_hist_field_flags(hist_field); diff --git a/debian/patches/features/all/rt/0019-tracing-Add-variable-reference-handling-to-hist-trig.patch b/debian/patches/features/all/rt/0028-tracing-Add-variable-reference-handling-to-hist-trig.patch similarity index 55% rename from debian/patches/features/all/rt/0019-tracing-Add-variable-reference-handling-to-hist-trig.patch rename to debian/patches/features/all/rt/0028-tracing-Add-variable-reference-handling-to-hist-trig.patch index 8f9b88654..da23fe146 100644 --- a/debian/patches/features/all/rt/0019-tracing-Add-variable-reference-handling-to-hist-trig.patch +++ b/debian/patches/features/all/rt/0028-tracing-Add-variable-reference-handling-to-hist-trig.patch @@ -1,8 +1,8 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:20 -0500 -Subject: [PATCH 19/32] tracing: Add variable reference handling to hist +Date: Fri, 22 Sep 2017 14:59:59 -0500 +Subject: [PATCH 28/42] tracing: Add variable reference handling to hist triggers -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Add the necessary infrastructure to allow the variables defined on one event to be referenced in another. This allows variables set by a @@ -25,14 +25,41 @@ be displayed in a latency histogram. Signed-off-by: Tom Zanussi Signed-off-by: Sebastian Andrzej Siewior --- - kernel/trace/trace.h | 2 - kernel/trace/trace_events_hist.c | 719 +++++++++++++++++++++++++++++------- + kernel/trace/trace.c | 2 + kernel/trace/trace.h | 3 + kernel/trace/trace_events_hist.c | 613 ++++++++++++++++++++++++++++++++---- kernel/trace/trace_events_trigger.c | 6 - 3 files changed, 604 insertions(+), 123 deletions(-) + 4 files changed, 568 insertions(+), 56 deletions(-) +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -7766,6 +7766,7 @@ static int instance_mkdir(const char *na + + INIT_LIST_HEAD(&tr->systems); + INIT_LIST_HEAD(&tr->events); ++ INIT_LIST_HEAD(&tr->hist_vars); + + if (allocate_trace_buffers(tr, trace_buf_size) < 0) + goto out_free_tr; +@@ -8513,6 +8514,7 @@ ssize_t trace_parse_run_command(struct f + + INIT_LIST_HEAD(&global_trace.systems); + INIT_LIST_HEAD(&global_trace.events); ++ INIT_LIST_HEAD(&global_trace.hist_vars); + list_add(&global_trace.list, &ftrace_trace_arrays); + + apply_trace_boot_options(); --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h -@@ -1448,6 +1448,8 @@ extern void pause_named_trigger(struct e +@@ -274,6 +274,7 @@ struct trace_array { + int function_enabled; + #endif + int time_stamp_abs_ref; ++ struct list_head hist_vars; + }; + + enum { +@@ -1550,6 +1551,8 @@ extern void pause_named_trigger(struct e extern void unpause_named_trigger(struct event_trigger_data *data); extern void set_named_trigger_data(struct event_trigger_data *data, struct event_trigger_data *named_data); @@ -43,20 +70,7 @@ Signed-off-by: Sebastian Andrzej Siewior extern int register_trigger_hist_enable_disable_cmds(void); --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c -@@ -26,8 +26,10 @@ - - struct hist_field; - --typedef u64 (*hist_field_fn_t) (struct hist_field *field, void *event, -- struct ring_buffer_event *rbe); -+typedef u64 (*hist_field_fn_t) (struct hist_field *field, -+ struct tracing_map_elt *elt, -+ struct ring_buffer_event *rbe, -+ void *event); - - #define HIST_FIELD_OPERANDS_MAX 2 - #define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX) -@@ -57,30 +59,41 @@ struct hist_field { +@@ -60,6 +60,9 @@ struct hist_field { struct hist_var var; enum field_op_id operator; char *name; @@ -65,149 +79,16 @@ Signed-off-by: Sebastian Andrzej Siewior + bool read_once; }; --static u64 hist_field_none(struct hist_field *field, void *event, -- struct ring_buffer_event *rbe) -+static u64 hist_field_none(struct hist_field *field, -+ struct tracing_map_elt *elt, -+ struct ring_buffer_event *rbe, -+ void *event) - { - return 0; - } - --static u64 hist_field_counter(struct hist_field *field, void *event, -- struct ring_buffer_event *rbe) -+static u64 hist_field_counter(struct hist_field *field, -+ struct tracing_map_elt *elt, -+ struct ring_buffer_event *rbe, -+ void *event) - { - return 1; - } - --static u64 hist_field_string(struct hist_field *hist_field, void *event, -- struct ring_buffer_event *rbe) -+static u64 hist_field_string(struct hist_field *hist_field, -+ struct tracing_map_elt *elt, -+ struct ring_buffer_event *rbe, -+ void *event) - { - char *addr = (char *)(event + hist_field->field->offset); - - return (u64)(unsigned long)addr; - } - --static u64 hist_field_dynstring(struct hist_field *hist_field, void *event, -- struct ring_buffer_event *rbe) -+static u64 hist_field_dynstring(struct hist_field *hist_field, -+ struct tracing_map_elt *elt, -+ struct ring_buffer_event *rbe, -+ void *event) - { - u32 str_item = *(u32 *)(event + hist_field->field->offset); - int str_loc = str_item & 0xffff; -@@ -89,54 +102,64 @@ static u64 hist_field_dynstring(struct h - return (u64)(unsigned long)addr; - } - --static u64 hist_field_pstring(struct hist_field *hist_field, void *event, -- struct ring_buffer_event *rbe) -+static u64 hist_field_pstring(struct hist_field *hist_field, -+ struct tracing_map_elt *elt, -+ struct ring_buffer_event *rbe, -+ void *event) - { - char **addr = (char **)(event + hist_field->field->offset); - - return (u64)(unsigned long)*addr; - } - --static u64 hist_field_log2(struct hist_field *hist_field, void *event, -- struct ring_buffer_event *rbe) -+static u64 hist_field_log2(struct hist_field *hist_field, -+ struct tracing_map_elt *elt, -+ struct ring_buffer_event *rbe, -+ void *event) - { - struct hist_field *operand = hist_field->operands[0]; - -- u64 val = operand->fn(operand, event, rbe); -+ u64 val = operand->fn(operand, elt, rbe, event); - - return (u64) ilog2(roundup_pow_of_two(val)); - } - --static u64 hist_field_plus(struct hist_field *hist_field, void *event, -- struct ring_buffer_event *rbe) -+static u64 hist_field_plus(struct hist_field *hist_field, -+ struct tracing_map_elt *elt, -+ struct ring_buffer_event *rbe, -+ void *event) - { - struct hist_field *operand1 = hist_field->operands[0]; - struct hist_field *operand2 = hist_field->operands[1]; - -- u64 val1 = operand1->fn(operand1, event, rbe); -- u64 val2 = operand2->fn(operand2, event, rbe); -+ u64 val1 = operand1->fn(operand1, elt, rbe, event); -+ u64 val2 = operand2->fn(operand2, elt, rbe, event); - - return val1 + val2; - } - --static u64 hist_field_minus(struct hist_field *hist_field, void *event, -- struct ring_buffer_event *rbe) -+static u64 hist_field_minus(struct hist_field *hist_field, -+ struct tracing_map_elt *elt, -+ struct ring_buffer_event *rbe, -+ void *event) - { - struct hist_field *operand1 = hist_field->operands[0]; - struct hist_field *operand2 = hist_field->operands[1]; - -- u64 val1 = operand1->fn(operand1, event, rbe); -- u64 val2 = operand2->fn(operand2, event, rbe); -+ u64 val1 = operand1->fn(operand1, elt, rbe, event); -+ u64 val2 = operand2->fn(operand2, elt, rbe, event); - - return val1 - val2; - } - --static u64 hist_field_unary_minus(struct hist_field *hist_field, void *event, -- struct ring_buffer_event *rbe) -+static u64 hist_field_unary_minus(struct hist_field *hist_field, -+ struct tracing_map_elt *elt, -+ struct ring_buffer_event *rbe, -+ void *event) - { - struct hist_field *operand = hist_field->operands[0]; - -- s64 sval = (s64)operand->fn(operand, event, rbe); -+ s64 sval = (s64)operand->fn(operand, elt, rbe, event); - u64 val = (u64)-sval; - - return val; -@@ -144,8 +167,9 @@ static u64 hist_field_unary_minus(struct - - #define DEFINE_HIST_FIELD_FN(type) \ - static u64 hist_field_##type(struct hist_field *hist_field, \ -- void *event, \ -- struct ring_buffer_event *rbe) \ -+ struct tracing_map_elt *elt, \ -+ struct ring_buffer_event *rbe, \ -+ void *event) \ - { \ - type *addr = (type *)(event + hist_field->field->offset); \ - \ -@@ -193,6 +217,7 @@ enum hist_field_flags { - HIST_FIELD_FL_VAR = 4096, - HIST_FIELD_FL_VAR_ONLY = 8192, - HIST_FIELD_FL_EXPR = 16384, -+ HIST_FIELD_FL_VAR_REF = 32768, + static u64 hist_field_none(struct hist_field *field, +@@ -215,6 +218,7 @@ enum hist_field_flags { + HIST_FIELD_FL_VAR = 1 << 12, + HIST_FIELD_FL_VAR_ONLY = 1 << 13, + HIST_FIELD_FL_EXPR = 1 << 14, ++ HIST_FIELD_FL_VAR_REF = 1 << 15, }; - struct hist_trigger_attrs { -@@ -225,10 +250,14 @@ struct hist_trigger_data { + struct var_defs { +@@ -255,6 +259,8 @@ struct hist_trigger_data { struct tracing_map *map; bool enable_timestamps; bool remove; @@ -215,21 +96,11 @@ Signed-off-by: Sebastian Andrzej Siewior + unsigned int n_var_refs; }; --static u64 hist_field_timestamp(struct hist_field *hist_field, void *event, -- struct ring_buffer_event *rbe) -+static u64 hist_field_timestamp(struct hist_field *hist_field, -+ struct tracing_map_elt *elt, -+ struct ring_buffer_event *rbe, -+ void *event) - { - struct hist_trigger_data *hist_data = hist_field->hist_data; - struct trace_array *tr = hist_data->event_file->tr; -@@ -241,6 +270,324 @@ static u64 hist_field_timestamp(struct h + static u64 hist_field_timestamp(struct hist_field *hist_field, +@@ -273,10 +279,344 @@ static u64 hist_field_timestamp(struct h return ts; } -+static LIST_HEAD(hist_var_list); -+ +struct hist_var_data { + struct list_head list; + struct hist_trigger_data *hist_data; @@ -280,10 +151,11 @@ Signed-off-by: Sebastian Andrzej Siewior +static struct hist_field *find_any_var_ref(struct hist_trigger_data *hist_data, + unsigned int var_idx) +{ ++ struct trace_array *tr = hist_data->event_file->tr; + struct hist_field *found = NULL; + struct hist_var_data *var_data; + -+ list_for_each_entry(var_data, &hist_var_list, list) { ++ list_for_each_entry(var_data, &tr->hist_vars, list) { + found = find_var_ref(var_data->hist_data, hist_data, var_idx); + if (found) + break; @@ -313,9 +185,10 @@ Signed-off-by: Sebastian Andrzej Siewior + +static struct hist_var_data *find_hist_vars(struct hist_trigger_data *hist_data) +{ ++ struct trace_array *tr = hist_data->event_file->tr; + struct hist_var_data *var_data, *found = NULL; + -+ list_for_each_entry(var_data, &hist_var_list, list) { ++ list_for_each_entry(var_data, &tr->hist_vars, list) { + if (var_data->hist_data == hist_data) { + found = var_data; + break; @@ -328,40 +201,56 @@ Signed-off-by: Sebastian Andrzej Siewior +static bool has_hist_vars(struct hist_trigger_data *hist_data) +{ + struct hist_field *hist_field; -+ bool found = false; -+ int i; ++ int i, j; + + for_each_hist_field(i, hist_data) { + hist_field = hist_data->fields[i]; -+ if (hist_field && hist_field->flags & HIST_FIELD_FL_VAR) { -+ found = true; -+ break; ++ if (hist_field && ++ (hist_field->flags & HIST_FIELD_FL_VAR || ++ hist_field->flags & HIST_FIELD_FL_VAR_REF)) ++ return true; ++ ++ for (j = 0; j < HIST_FIELD_OPERANDS_MAX; j++) { ++ struct hist_field *operand; ++ ++ operand = hist_field->operands[j]; ++ if (operand && ++ (operand->flags & HIST_FIELD_FL_VAR || ++ operand->flags & HIST_FIELD_FL_VAR_REF)) ++ return true; + } + } + -+ return found; ++ return false; +} + +static int save_hist_vars(struct hist_trigger_data *hist_data) +{ ++ struct trace_array *tr = hist_data->event_file->tr; + struct hist_var_data *var_data; + + var_data = find_hist_vars(hist_data); + if (var_data) + return 0; + ++ if (trace_array_get(tr) < 0) ++ return -ENODEV; ++ + var_data = kzalloc(sizeof(*var_data), GFP_KERNEL); -+ if (!var_data) ++ if (!var_data) { ++ trace_array_put(tr); + return -ENOMEM; ++ } + + var_data->hist_data = hist_data; -+ list_add(&var_data->list, &hist_var_list); ++ list_add(&var_data->list, &tr->hist_vars); + + return 0; +} + +static void remove_hist_vars(struct hist_trigger_data *hist_data) +{ ++ struct trace_array *tr = hist_data->event_file->tr; + struct hist_var_data *var_data; + + var_data = find_hist_vars(hist_data); @@ -374,6 +263,8 @@ Signed-off-by: Sebastian Andrzej Siewior + list_del(&var_data->list); + + kfree(var_data); ++ ++ trace_array_put(tr); +} + +static struct hist_field *find_var_field(struct hist_trigger_data *hist_data, @@ -413,7 +304,8 @@ Signed-off-by: Sebastian Andrzej Siewior + return NULL; +} + -+static struct trace_event_file *find_var_file(const char *system, ++static struct trace_event_file *find_var_file(struct trace_array *tr, ++ const char *system, + const char *event_name, + const char *var_name) +{ @@ -423,7 +315,7 @@ Signed-off-by: Sebastian Andrzej Siewior + struct trace_event_file *file; + const char *name; + -+ list_for_each_entry(var_data, &hist_var_list, list) { ++ list_for_each_entry(var_data, &tr->hist_vars, list) { + var_hist_data = var_data->hist_data; + file = var_hist_data->event_file; + call = file->event_call; @@ -465,14 +357,15 @@ Signed-off-by: Sebastian Andrzej Siewior + return NULL; +} + -+static struct hist_field *find_event_var(const char *system, ++static struct hist_field *find_event_var(struct trace_array *tr, ++ const char *system, + const char *event_name, + const char *var_name) +{ + struct hist_field *hist_field = NULL; + struct trace_event_file *file; + -+ file = find_var_file(system, event_name, var_name); ++ file = find_var_file(tr, system, event_name, var_name); + if (!file) + return NULL; + @@ -481,11 +374,11 @@ Signed-off-by: Sebastian Andrzej Siewior + return hist_field; +} + -+struct hist_elt_data { -+ char *comm; + struct hist_elt_data { + char *comm; + u64 *var_ref_vals; -+}; -+ + }; + +static u64 hist_field_var_ref(struct hist_field *hist_field, + struct tracing_map_elt *elt, + struct ring_buffer_event *rbe, @@ -549,7 +442,7 @@ Signed-off-by: Sebastian Andrzej Siewior static const char *hist_field_name(struct hist_field *field, unsigned int level) { -@@ -255,7 +602,8 @@ static const char *hist_field_name(struc +@@ -291,7 +631,8 @@ static const char *hist_field_name(struc field_name = hist_field_name(field->operands[0], ++level); else if (field->flags & HIST_FIELD_FL_TIMESTAMP) field_name = "$common_timestamp"; @@ -559,115 +452,25 @@ Signed-off-by: Sebastian Andrzej Siewior field_name = field->name; if (field_name == NULL) -@@ -439,26 +787,36 @@ static inline void save_comm(char *comm, - memcpy(comm, task->comm, TASK_COMM_LEN); - } - --static void hist_trigger_elt_comm_free(struct tracing_map_elt *elt) -+static void hist_trigger_elt_data_free(struct tracing_map_elt *elt) - { -- kfree((char *)elt->private_data); -+ struct hist_elt_data *private_data = elt->private_data; -+ -+ kfree(private_data->comm); -+ kfree(private_data); - } - --static int hist_trigger_elt_comm_alloc(struct tracing_map_elt *elt) -+static int hist_trigger_elt_data_alloc(struct tracing_map_elt *elt) - { - struct hist_trigger_data *hist_data = elt->map->private_data; -+ unsigned int size = TASK_COMM_LEN + 1; -+ struct hist_elt_data *elt_data; - struct hist_field *key_field; - unsigned int i; - -+ elt->private_data = elt_data = kzalloc(sizeof(*elt_data), GFP_KERNEL); -+ if (!elt_data) -+ return -ENOMEM; -+ - for_each_hist_key_field(i, hist_data) { - key_field = hist_data->fields[i]; - - if (key_field->flags & HIST_FIELD_FL_EXECNAME) { -- unsigned int size = TASK_COMM_LEN + 1; -- -- elt->private_data = kzalloc(size, GFP_KERNEL); -- if (!elt->private_data) -+ elt_data->comm = kzalloc(size, GFP_KERNEL); -+ if (!elt_data->comm) { -+ kfree(elt_data); -+ elt->private_data = NULL; - return -ENOMEM; -+ } - break; - } - } -@@ -466,29 +824,31 @@ static int hist_trigger_elt_comm_alloc(s - return 0; - } - --static void hist_trigger_elt_comm_copy(struct tracing_map_elt *to, -+static void hist_trigger_elt_data_copy(struct tracing_map_elt *to, - struct tracing_map_elt *from) - { -- char *comm_from = from->private_data; -- char *comm_to = to->private_data; -+ struct hist_elt_data *from_data = from->private_data; -+ struct hist_elt_data *to_data = to->private_data; -+ -+ memcpy(to_data, from_data, sizeof(*to)); - -- if (comm_from) -- memcpy(comm_to, comm_from, TASK_COMM_LEN + 1); -+ if (from_data->comm) -+ memcpy(to_data->comm, from_data->comm, TASK_COMM_LEN + 1); - } - --static void hist_trigger_elt_comm_init(struct tracing_map_elt *elt) -+static void hist_trigger_elt_data_init(struct tracing_map_elt *elt) - { -- char *comm = elt->private_data; -+ struct hist_elt_data *private_data = elt->private_data; - -- if (comm) -- save_comm(comm, current); -+ if (private_data->comm) -+ save_comm(private_data->comm, current); - } - --static const struct tracing_map_ops hist_trigger_elt_comm_ops = { -- .elt_alloc = hist_trigger_elt_comm_alloc, -- .elt_copy = hist_trigger_elt_comm_copy, -- .elt_free = hist_trigger_elt_comm_free, -- .elt_init = hist_trigger_elt_comm_init, -+static const struct tracing_map_ops hist_trigger_elt_data_ops = { -+ .elt_alloc = hist_trigger_elt_data_alloc, -+ .elt_copy = hist_trigger_elt_data_copy, -+ .elt_free = hist_trigger_elt_data_free, -+ .elt_init = hist_trigger_elt_data_init, - }; - - static char *expr_str(struct hist_field *field, unsigned int level) -@@ -513,6 +873,8 @@ static char *expr_str(struct hist_field +@@ -596,6 +937,8 @@ static char *expr_str(struct hist_field return expr; } + if (field->operands[0]->flags & HIST_FIELD_FL_VAR_REF) + strcat(expr, "$"); strcat(expr, hist_field_name(field->operands[0], 0)); - - switch (field->operator) { -@@ -527,6 +889,8 @@ static char *expr_str(struct hist_field + if (field->operands[0]->flags) { + const char *flags_str = get_hist_field_flags(field->operands[0]); +@@ -618,6 +961,8 @@ static char *expr_str(struct hist_field return NULL; } + if (field->operands[1]->flags & HIST_FIELD_FL_VAR_REF) + strcat(expr, "$"); strcat(expr, hist_field_name(field->operands[1], 0)); - - return expr; -@@ -597,6 +961,11 @@ static struct hist_field *create_hist_fi + if (field->operands[1]->flags) { + const char *flags_str = get_hist_field_flags(field->operands[1]); +@@ -697,6 +1042,11 @@ static struct hist_field *create_hist_fi if (flags & HIST_FIELD_FL_EXPR) goto out; /* caller will populate */ @@ -678,11 +481,32 @@ Signed-off-by: Sebastian Andrzej Siewior + if (flags & HIST_FIELD_FL_HITCOUNT) { hist_field->fn = hist_field_counter; - goto out; -@@ -669,6 +1038,44 @@ static void destroy_hist_fields(struct h + hist_field->size = sizeof(u64); +@@ -790,6 +1140,51 @@ static void destroy_hist_fields(struct h } } ++static int init_var_ref(struct hist_field *ref_field, ++ struct hist_field *var_field) ++{ ++ ref_field->var.idx = var_field->var.idx; ++ ref_field->var.hist_data = var_field->hist_data; ++ ref_field->size = var_field->size; ++ ref_field->is_signed = var_field->is_signed; ++ ++ ref_field->name = kstrdup(var_field->var.name, GFP_KERNEL); ++ if (!ref_field->name) ++ return -ENOMEM; ++ ++ ref_field->type = kstrdup(var_field->type, GFP_KERNEL); ++ if (!ref_field->type) { ++ kfree(ref_field->name); ++ return -ENOMEM; ++ } ++ ++ return 0; ++} ++ +static struct hist_field *create_var_ref(struct hist_field *var_field) +{ + unsigned long flags = HIST_FIELD_FL_VAR_REF; @@ -690,12 +514,7 @@ Signed-off-by: Sebastian Andrzej Siewior + + ref_field = create_hist_field(var_field->hist_data, NULL, flags, NULL); + if (ref_field) { -+ ref_field->var.idx = var_field->var.idx; -+ ref_field->var.hist_data = var_field->hist_data; -+ ref_field->size = var_field->size; -+ ref_field->is_signed = var_field->is_signed; -+ ref_field->name = kstrdup(var_field->var.name, GFP_KERNEL); -+ if (!ref_field->name) { ++ if (init_var_ref(ref_field, var_field)) { + destroy_hist_field(ref_field, 0); + return NULL; + } @@ -704,17 +523,50 @@ Signed-off-by: Sebastian Andrzej Siewior + return ref_field; +} + -+static struct hist_field *parse_var_ref(char *system, char *event_name, ++static bool is_var_ref(char *var_name) ++{ ++ if (!var_name || strlen(var_name) < 2 || var_name[0] != '$') ++ return false; ++ ++ return true; ++} ++ + static char *field_name_from_var(struct hist_trigger_data *hist_data, + char *var_name) + { +@@ -801,7 +1196,7 @@ static char *field_name_from_var(struct + + if (strcmp(var_name, name) == 0) { + field = hist_data->attrs->var_defs.expr[i]; +- if (contains_operator(field)) ++ if (contains_operator(field) || is_var_ref(field)) + continue; + return field; + } +@@ -813,11 +1208,32 @@ static char *field_name_from_var(struct + static char *local_field_var_ref(struct hist_trigger_data *hist_data, + char *var_name) + { ++ if (!is_var_ref(var_name)) ++ return NULL; ++ + var_name++; + + return field_name_from_var(hist_data, var_name); + } + ++static struct hist_field *parse_var_ref(struct trace_array *tr, ++ char *system, char *event_name, + char *var_name) +{ + struct hist_field *var_field = NULL, *ref_field = NULL; + -+ if (!var_name || strlen(var_name) < 2 || var_name[0] != '$') ++ if (!is_var_ref(var_name)) + return NULL; + + var_name++; + -+ var_field = find_event_var(system, event_name, var_name); ++ var_field = find_event_var(tr, system, event_name, var_name); + if (var_field) + ref_field = create_var_ref(var_field); + @@ -724,36 +576,50 @@ Signed-off-by: Sebastian Andrzej Siewior static struct ftrace_event_field * parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, char *field_str, unsigned long *flags) -@@ -715,10 +1122,28 @@ struct hist_field *parse_atom(struct his +@@ -874,13 +1290,39 @@ struct hist_field *parse_atom(struct his struct trace_event_file *file, char *str, unsigned long *flags, char *var_name) { +- char *s; + char *s, *ref_system = NULL, *ref_event = NULL, *ref_var = str; ++ struct trace_array *tr = hist_data->event_file->tr; struct ftrace_event_field *field = NULL; struct hist_field *hist_field = NULL; int ret = 0; +- s = local_field_var_ref(hist_data, str); +- if (s) + s = strchr(str, '.'); + if (s) { + s = strchr(++s, '.'); + if (s) { + ref_system = strsep(&str, "."); ++ if (!str) { ++ ret = -EINVAL; ++ goto out; ++ } + ref_event = strsep(&str, "."); ++ if (!str) { ++ ret = -EINVAL; ++ goto out; ++ } + ref_var = str; + } + } + -+ hist_field = parse_var_ref(ref_system, ref_event, ref_var); -+ if (hist_field) { -+ hist_data->var_refs[hist_data->n_var_refs] = hist_field; -+ hist_field->var_ref_idx = hist_data->n_var_refs++; -+ return hist_field; -+ } -+ ++ s = local_field_var_ref(hist_data, ref_var); ++ if (!s) { ++ hist_field = parse_var_ref(tr, ref_system, ref_event, ref_var); ++ if (hist_field) { ++ hist_data->var_refs[hist_data->n_var_refs] = hist_field; ++ hist_field->var_ref_idx = hist_data->n_var_refs++; ++ return hist_field; ++ } ++ } else + str = s; + field = parse_field(hist_data, file, str, flags); - if (IS_ERR(field)) { - ret = PTR_ERR(field); -@@ -885,6 +1310,9 @@ static struct hist_field *parse_expr(str +@@ -1053,6 +1495,9 @@ static struct hist_field *parse_expr(str goto free; } @@ -763,7 +629,7 @@ Signed-off-by: Sebastian Andrzej Siewior expr->operands[0] = operand1; expr->operands[1] = operand2; expr->operator = field_op; -@@ -926,43 +1354,6 @@ static int create_hitcount_val(struct hi +@@ -1099,43 +1544,6 @@ static int create_hitcount_val(struct hi return 0; } @@ -804,11 +670,11 @@ Signed-off-by: Sebastian Andrzej Siewior - return NULL; -} - - static int create_val_field(struct hist_trigger_data *hist_data, - unsigned int val_idx, - struct trace_event_file *file, -@@ -1119,6 +1510,12 @@ static int create_key_field(struct hist_ - } + static int __create_val_field(struct hist_trigger_data *hist_data, + unsigned int val_idx, + struct trace_event_file *file, +@@ -1269,6 +1677,12 @@ static int create_key_field(struct hist_ + goto out; } + if (hist_field->flags & HIST_FIELD_FL_VAR_REF) { @@ -820,50 +686,28 @@ Signed-off-by: Sebastian Andrzej Siewior key_size = hist_field->size; } -@@ -1378,21 +1775,6 @@ static int create_tracing_map_fields(str - return 0; - } +@@ -1604,6 +2018,7 @@ create_hist_data(unsigned int map_bits, --static bool need_tracing_map_ops(struct hist_trigger_data *hist_data) --{ -- struct hist_field *key_field; -- unsigned int i; -- -- for_each_hist_key_field(i, hist_data) { -- key_field = hist_data->fields[i]; -- -- if (key_field->flags & HIST_FIELD_FL_EXECNAME) -- return true; -- } -- -- return false; --} -- - static struct hist_trigger_data * - create_hist_data(unsigned int map_bits, - struct hist_trigger_attrs *attrs, -@@ -1418,8 +1800,7 @@ create_hist_data(unsigned int map_bits, + hist_data->attrs = attrs; + hist_data->remove = remove; ++ hist_data->event_file = file; + + ret = create_hist_fields(hist_data, file); + if (ret) +@@ -1626,12 +2041,6 @@ create_hist_data(unsigned int map_bits, + ret = create_tracing_map_fields(hist_data); if (ret) goto free; - -- if (need_tracing_map_ops(hist_data)) -- map_ops = &hist_trigger_elt_comm_ops; -+ map_ops = &hist_trigger_elt_data_ops; - - hist_data->map = tracing_map_create(map_bits, hist_data->key_size, - map_ops, hist_data); -@@ -1433,10 +1814,6 @@ create_hist_data(unsigned int map_bits, - if (ret) - goto free; - +- - ret = tracing_map_init(hist_data->map); - if (ret) - goto free; - - hist_data->event_file = file; +- hist_data->event_file = file; out: return hist_data; -@@ -1452,15 +1829,20 @@ create_hist_data(unsigned int map_bits, + free: +@@ -1646,12 +2055,17 @@ create_hist_data(unsigned int map_bits, static void hist_trigger_elt_update(struct hist_trigger_data *hist_data, struct tracing_map_elt *elt, void *rec, @@ -881,43 +725,16 @@ Signed-off-by: Sebastian Andrzej Siewior + for_each_hist_val_field(i, hist_data) { hist_field = hist_data->fields[i]; -- hist_val = hist_field->fn(hist_field, rbe, rec); -+ hist_val = hist_field->fn(hist_field, elt, rbe, rec); - if (hist_field->flags & HIST_FIELD_FL_VAR) { - var_idx = hist_field->var.idx; - tracing_map_set_var(elt, var_idx, hist_val); -@@ -1473,7 +1855,7 @@ static void hist_trigger_elt_update(stru - for_each_hist_key_field(i, hist_data) { - hist_field = hist_data->fields[i]; - if (hist_field->flags & HIST_FIELD_FL_VAR) { -- hist_val = hist_field->fn(hist_field, rbe, rec); -+ hist_val = hist_field->fn(hist_field, elt, rbe, rec); - var_idx = hist_field->var.idx; - tracing_map_set_var(elt, var_idx, hist_val); - } -@@ -1510,10 +1892,11 @@ static void event_hist_trigger(struct ev + hist_val = hist_field->fn(hist_field, elt, rbe, rec); +@@ -1704,6 +2118,7 @@ static void event_hist_trigger(struct ev struct hist_trigger_data *hist_data = data->private_data; bool use_compound_key = (hist_data->n_keys > 1); unsigned long entries[HIST_STACKTRACE_DEPTH]; + u64 var_ref_vals[TRACING_MAP_VARS_MAX]; char compound_key[HIST_KEY_SIZE_MAX]; -+ struct tracing_map_elt *elt = NULL; + struct tracing_map_elt *elt = NULL; struct stack_trace stacktrace; - struct hist_field *key_field; -- struct tracing_map_elt *elt; - u64 field_contents; - void *key = NULL; - unsigned int i; -@@ -1534,7 +1917,7 @@ static void event_hist_trigger(struct ev - - key = entries; - } else { -- field_contents = key_field->fn(key_field, rec, rbe); -+ field_contents = key_field->fn(key_field, elt, rbe, rec); - if (key_field->flags & HIST_FIELD_FL_STRING) { - key = (void *)(unsigned long)field_contents; - use_compound_key = true; -@@ -1549,9 +1932,15 @@ static void event_hist_trigger(struct ev +@@ -1743,9 +2158,15 @@ static void event_hist_trigger(struct ev if (use_compound_key) key = compound_key; @@ -935,17 +752,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static void hist_trigger_stacktrace_print(struct seq_file *m, -@@ -1608,7 +1997,8 @@ hist_trigger_entry_print(struct seq_file - seq_printf(m, "%s: [%llx] %-55s", field_name, - uval, str); - } else if (key_field->flags & HIST_FIELD_FL_EXECNAME) { -- char *comm = elt->private_data; -+ struct hist_elt_data *elt_data = elt->private_data; -+ char *comm = elt_data->comm; - - uval = *(u64 *)(key + key_field->offset); - seq_printf(m, "%s: %-16s[%10llu]", field_name, -@@ -1653,7 +2043,8 @@ hist_trigger_entry_print(struct seq_file +@@ -1848,7 +2269,8 @@ hist_trigger_entry_print(struct seq_file field_name = hist_field_name(hist_data->fields[i], 0); if (hist_data->fields[i]->flags & HIST_FIELD_FL_VAR || @@ -955,7 +762,7 @@ Signed-off-by: Sebastian Andrzej Siewior continue; if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) { -@@ -1925,7 +2316,11 @@ static void event_hist_trigger_free(stru +@@ -2098,7 +2520,11 @@ static void event_hist_trigger_free(stru if (!data->ref) { if (data->name) del_named_trigger(data); @@ -967,7 +774,7 @@ Signed-off-by: Sebastian Andrzej Siewior destroy_hist_data(hist_data); } } -@@ -2139,23 +2534,55 @@ static int hist_register_trigger(char *g +@@ -2311,23 +2737,55 @@ static int hist_register_trigger(char *g goto out; } @@ -1027,8 +834,8 @@ Signed-off-by: Sebastian Andrzej Siewior static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file) -@@ -2186,10 +2613,32 @@ static void hist_unregister_trigger(char - tracing_set_time_stamp_abs(file->tr, false); +@@ -2360,10 +2818,29 @@ static void hist_unregister_trigger(char + } } +static bool hist_file_check_refs(struct trace_event_file *file) @@ -1036,14 +843,11 @@ Signed-off-by: Sebastian Andrzej Siewior + struct hist_trigger_data *hist_data; + struct event_trigger_data *test; + -+ printk("func: %s\n", __func__); -+ + list_for_each_entry_rcu(test, &file->triggers, list) { + if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { + hist_data = test->private_data; + if (check_var_refs(hist_data)) + return true; -+ break; + } + } + @@ -1055,12 +859,12 @@ Signed-off-by: Sebastian Andrzej Siewior struct event_trigger_data *test, *n; + if (hist_file_check_refs(file)) -+ return; ++ return; + list_for_each_entry_safe(test, n, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { list_del_rcu(&test->list); -@@ -2262,6 +2711,11 @@ static int event_hist_trigger_func(struc +@@ -2436,6 +2913,11 @@ static int event_hist_trigger_func(struc } if (remove) { @@ -1072,7 +876,7 @@ Signed-off-by: Sebastian Andrzej Siewior cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); ret = 0; goto out_free; -@@ -2279,14 +2733,33 @@ static int event_hist_trigger_func(struc +@@ -2453,14 +2935,33 @@ static int event_hist_trigger_func(struc goto out_free; } else if (ret < 0) goto out_free; @@ -1108,7 +912,7 @@ Signed-off-by: Sebastian Andrzej Siewior destroy_hist_data(hist_data); --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c -@@ -919,6 +919,12 @@ void set_named_trigger_data(struct event +@@ -909,6 +909,12 @@ void set_named_trigger_data(struct event data->named_data = named_data; } diff --git a/debian/patches/features/all/rt/0029-hrtimer-Implement-SOFT-HARD-clock-base-selection.patch b/debian/patches/features/all/rt/0029-hrtimer-Implement-SOFT-HARD-clock-base-selection.patch new file mode 100644 index 000000000..138d4367b --- /dev/null +++ b/debian/patches/features/all/rt/0029-hrtimer-Implement-SOFT-HARD-clock-base-selection.patch @@ -0,0 +1,56 @@ +From: Anna-Maria Gleixner +Date: Sun, 22 Oct 2017 23:40:07 +0200 +Subject: [PATCH 29/36] hrtimer: Implement SOFT/HARD clock base selection +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +All prerequisites to handle hrtimers for expiry in either hard or soft +interrupt context are in place. + +Add the missing bit in hrtimer_init() which associates the timer to the +hard or the soft irq clock base. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/time/hrtimer.c | 15 +++++++++++---- + 1 file changed, 11 insertions(+), 4 deletions(-) + +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -1220,8 +1220,9 @@ static inline int hrtimer_clockid_to_bas + static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, + enum hrtimer_mode mode) + { ++ bool softtimer = !!(mode & HRTIMER_MODE_SOFT); ++ int base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; + struct hrtimer_cpu_base *cpu_base; +- int base; + + memset(timer, 0, sizeof(struct hrtimer)); + +@@ -1235,7 +1236,8 @@ static void __hrtimer_init(struct hrtime + if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL) + clock_id = CLOCK_MONOTONIC; + +- base = hrtimer_clockid_to_base(clock_id); ++ base += hrtimer_clockid_to_base(clock_id); ++ timer->is_soft = softtimer; + timer->base = &cpu_base->clock_base[base]; + timerqueue_init(&timer->node); + } +@@ -1244,8 +1246,13 @@ static void __hrtimer_init(struct hrtime + * hrtimer_init - initialize a timer to the given clock + * @timer: the timer to be initialized + * @clock_id: the clock to be used +- * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or +- * relative (HRTIMER_MODE_REL); pinned is not considered here! ++ * @mode: The modes which are relevant for intitialization: ++ * HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT, ++ * HRTIMER_MODE_REL_SOFT ++ * ++ * The PINNED variants of the above can be handed in, ++ * but the PINNED bit is ignored as pinning happens ++ * when the hrtimer is started + */ + void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, + enum hrtimer_mode mode) diff --git a/debian/patches/features/all/rt/0021-tracing-Add-hist-trigger-action-hook.patch b/debian/patches/features/all/rt/0029-tracing-Add-hist-trigger-action-hook.patch similarity index 72% rename from debian/patches/features/all/rt/0021-tracing-Add-hist-trigger-action-hook.patch rename to debian/patches/features/all/rt/0029-tracing-Add-hist-trigger-action-hook.patch index 09d5dfb60..92c91b486 100644 --- a/debian/patches/features/all/rt/0021-tracing-Add-hist-trigger-action-hook.patch +++ b/debian/patches/features/all/rt/0029-tracing-Add-hist-trigger-action-hook.patch @@ -1,7 +1,7 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:22 -0500 -Subject: [PATCH 21/32] tracing: Add hist trigger action hook -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Date: Fri, 22 Sep 2017 15:00:00 -0500 +Subject: [PATCH 29/42] tracing: Add hist trigger action hook +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Add a hook for executing extra actions whenever a histogram entry is added or updated. @@ -17,8 +17,8 @@ implemented on top of it in later patches. Signed-off-by: Tom Zanussi Signed-off-by: Sebastian Andrzej Siewior --- - kernel/trace/trace_events_hist.c | 114 +++++++++++++++++++++++++++++++++++++-- - 1 file changed, 111 insertions(+), 3 deletions(-) + kernel/trace/trace_events_hist.c | 91 +++++++++++++++++++++++++++++++++++++-- + 1 file changed, 88 insertions(+), 3 deletions(-) --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -30,17 +30,17 @@ Signed-off-by: Sebastian Andrzej Siewior enum field_op_id { FIELD_OP_NONE, -@@ -233,6 +234,9 @@ struct hist_trigger_attrs { - +@@ -241,6 +242,9 @@ struct hist_trigger_attrs { char *assignment_str[TRACING_MAP_VARS_MAX]; unsigned int n_assignments; -+ + + char *action_str[HIST_ACTIONS_MAX]; + unsigned int n_actions; ++ + struct var_defs var_defs; }; - struct hist_trigger_data { -@@ -252,6 +256,21 @@ struct hist_trigger_data { +@@ -261,6 +265,21 @@ struct hist_trigger_data { bool remove; struct hist_field *var_refs[TRACING_MAP_VARS_MAX]; unsigned int n_var_refs; @@ -62,7 +62,7 @@ Signed-off-by: Sebastian Andrzej Siewior }; static u64 hist_field_timestamp(struct hist_field *hist_field, -@@ -681,6 +700,9 @@ static void destroy_hist_trigger_attrs(s +@@ -710,6 +729,9 @@ static void destroy_hist_trigger_attrs(s for (i = 0; i < attrs->n_assignments; i++) kfree(attrs->assignment_str[i]); @@ -72,7 +72,7 @@ Signed-off-by: Sebastian Andrzej Siewior kfree(attrs->name); kfree(attrs->sort_key_str); kfree(attrs->keys_str); -@@ -688,6 +710,16 @@ static void destroy_hist_trigger_attrs(s +@@ -717,6 +739,16 @@ static void destroy_hist_trigger_attrs(s kfree(attrs); } @@ -89,7 +89,7 @@ Signed-off-by: Sebastian Andrzej Siewior static int parse_assignment(char *str, struct hist_trigger_attrs *attrs) { int ret = 0; -@@ -755,8 +787,9 @@ static struct hist_trigger_attrs *parse_ +@@ -800,8 +832,9 @@ static struct hist_trigger_attrs *parse_ else if (strcmp(str, "clear") == 0) attrs->clear = true; else { @@ -101,7 +101,7 @@ Signed-off-by: Sebastian Andrzej Siewior } } -@@ -1722,11 +1755,63 @@ static int create_sort_keys(struct hist_ +@@ -1949,11 +1982,42 @@ static int create_sort_keys(struct hist_ return ret; } @@ -129,27 +129,6 @@ Signed-off-by: Sebastian Andrzej Siewior + + return ret; +} -+ -+static void print_actions(struct seq_file *m, -+ struct hist_trigger_data *hist_data, -+ struct tracing_map_elt *elt) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < hist_data->n_actions; i++) { -+ struct action_data *data = hist_data->actions[i]; -+ } -+} -+ -+static void print_actions_spec(struct seq_file *m, -+ struct hist_trigger_data *hist_data) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < hist_data->n_actions; i++) { -+ struct action_data *data = hist_data->actions[i]; -+ } -+} + static void destroy_hist_data(struct hist_trigger_data *hist_data) { @@ -165,7 +144,7 @@ Signed-off-by: Sebastian Andrzej Siewior kfree(hist_data); } -@@ -1886,6 +1971,20 @@ static inline void add_to_key(char *comp +@@ -2112,6 +2176,20 @@ static inline void add_to_key(char *comp memcpy(compound_key + key_field->offset, key, size); } @@ -186,7 +165,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void event_hist_trigger(struct event_trigger_data *data, void *rec, struct ring_buffer_event *rbe) { -@@ -1941,6 +2040,9 @@ static void event_hist_trigger(struct ev +@@ -2167,6 +2245,9 @@ static void event_hist_trigger(struct ev return; hist_trigger_elt_update(hist_data, elt, rec, rbe, var_ref_vals); @@ -196,16 +175,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static void hist_trigger_stacktrace_print(struct seq_file *m, -@@ -2278,6 +2380,8 @@ static int event_hist_trigger_print(stru - } - seq_printf(m, ":size=%u", (1 << hist_data->map->map_bits)); - -+ print_actions_spec(m, hist_data); -+ - if (data->filter_str) - seq_printf(m, " if %s", data->filter_str); - -@@ -2740,6 +2844,10 @@ static int event_hist_trigger_func(struc +@@ -2942,6 +3023,10 @@ static int event_hist_trigger_func(struc if (has_hist_vars(hist_data)) save_hist_vars(hist_data); @@ -216,7 +186,7 @@ Signed-off-by: Sebastian Andrzej Siewior ret = tracing_map_init(hist_data->map); if (ret) goto out_unreg; -@@ -2761,8 +2869,8 @@ static int event_hist_trigger_func(struc +@@ -2963,8 +3048,8 @@ static int event_hist_trigger_func(struc remove_hist_vars(hist_data); kfree(trigger_data); diff --git a/debian/patches/features/all/rt/0030-can-bcm-Replace-hrtimer_tasklet-with-softirq-based-h.patch b/debian/patches/features/all/rt/0030-can-bcm-Replace-hrtimer_tasklet-with-softirq-based-h.patch new file mode 100644 index 000000000..e8f1ade79 --- /dev/null +++ b/debian/patches/features/all/rt/0030-can-bcm-Replace-hrtimer_tasklet-with-softirq-based-h.patch @@ -0,0 +1,315 @@ +From: Thomas Gleixner +Date: Sun, 22 Oct 2017 23:40:08 +0200 +Subject: [PATCH 30/36] can/bcm: Replace hrtimer_tasklet with softirq based + hrtimer +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Switch the timer to HRTIMER_MODE_SOFT, which executed the timer +callback in softirq context and remove the hrtimer_tasklet. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Anna-Maria Gleixner +Cc: Oliver Hartkopp +Cc: Marc Kleine-Budde +Cc: linux-can@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior +--- + net/can/bcm.c | 156 +++++++++++++++++++--------------------------------------- + 1 file changed, 52 insertions(+), 104 deletions(-) + +--- a/net/can/bcm.c ++++ b/net/can/bcm.c +@@ -102,7 +102,6 @@ struct bcm_op { + unsigned long frames_abs, frames_filtered; + struct bcm_timeval ival1, ival2; + struct hrtimer timer, thrtimer; +- struct tasklet_struct tsklet, thrtsklet; + ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg; + int rx_ifindex; + int cfsiz; +@@ -364,25 +363,34 @@ static void bcm_send_to_user(struct bcm_ + } + } + +-static void bcm_tx_start_timer(struct bcm_op *op) ++static bool bcm_tx_set_expiry(struct bcm_op *op, struct hrtimer *hrt) + { ++ ktime_t ival; ++ + if (op->kt_ival1 && op->count) +- hrtimer_start(&op->timer, +- ktime_add(ktime_get(), op->kt_ival1), +- HRTIMER_MODE_ABS); ++ ival = op->kt_ival1; + else if (op->kt_ival2) +- hrtimer_start(&op->timer, +- ktime_add(ktime_get(), op->kt_ival2), +- HRTIMER_MODE_ABS); ++ ival = op->kt_ival2; ++ else ++ return false; ++ ++ hrtimer_set_expires(hrt, ktime_add(ktime_get(), ival)); ++ return true; + } + +-static void bcm_tx_timeout_tsklet(unsigned long data) ++static void bcm_tx_start_timer(struct bcm_op *op) + { +- struct bcm_op *op = (struct bcm_op *)data; ++ if (bcm_tx_set_expiry(op, &op->timer)) ++ hrtimer_start_expires(&op->timer, HRTIMER_MODE_ABS_SOFT); ++} ++ ++/* bcm_tx_timeout_handler - performs cyclic CAN frame transmissions */ ++static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) ++{ ++ struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); + struct bcm_msg_head msg_head; + + if (op->kt_ival1 && (op->count > 0)) { +- + op->count--; + if (!op->count && (op->flags & TX_COUNTEVT)) { + +@@ -399,22 +407,12 @@ static void bcm_tx_timeout_tsklet(unsign + } + bcm_can_tx(op); + +- } else if (op->kt_ival2) ++ } else if (op->kt_ival2) { + bcm_can_tx(op); ++ } + +- bcm_tx_start_timer(op); +-} +- +-/* +- * bcm_tx_timeout_handler - performs cyclic CAN frame transmissions +- */ +-static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) +-{ +- struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); +- +- tasklet_schedule(&op->tsklet); +- +- return HRTIMER_NORESTART; ++ return bcm_tx_set_expiry(op, &op->timer) ? ++ HRTIMER_RESTART : HRTIMER_NORESTART; + } + + /* +@@ -480,7 +478,7 @@ static void bcm_rx_update_and_send(struc + /* do not send the saved data - only start throttle timer */ + hrtimer_start(&op->thrtimer, + ktime_add(op->kt_lastmsg, op->kt_ival2), +- HRTIMER_MODE_ABS); ++ HRTIMER_MODE_ABS_SOFT); + return; + } + +@@ -539,14 +537,21 @@ static void bcm_rx_starttimer(struct bcm + return; + + if (op->kt_ival1) +- hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL); ++ hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL_SOFT); + } + +-static void bcm_rx_timeout_tsklet(unsigned long data) ++/* bcm_rx_timeout_handler - when the (cyclic) CAN frame reception timed out */ ++static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) + { +- struct bcm_op *op = (struct bcm_op *)data; ++ struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); + struct bcm_msg_head msg_head; + ++ /* if user wants to be informed, when cyclic CAN-Messages come back */ ++ if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) { ++ /* clear received CAN frames to indicate 'nothing received' */ ++ memset(op->last_frames, 0, op->nframes * op->cfsiz); ++ } ++ + /* create notification to user */ + msg_head.opcode = RX_TIMEOUT; + msg_head.flags = op->flags; +@@ -557,25 +562,6 @@ static void bcm_rx_timeout_tsklet(unsign + msg_head.nframes = 0; + + bcm_send_to_user(op, &msg_head, NULL, 0); +-} +- +-/* +- * bcm_rx_timeout_handler - when the (cyclic) CAN frame reception timed out +- */ +-static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) +-{ +- struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); +- +- /* schedule before NET_RX_SOFTIRQ */ +- tasklet_hi_schedule(&op->tsklet); +- +- /* no restart of the timer is done here! */ +- +- /* if user wants to be informed, when cyclic CAN-Messages come back */ +- if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) { +- /* clear received CAN frames to indicate 'nothing received' */ +- memset(op->last_frames, 0, op->nframes * op->cfsiz); +- } + + return HRTIMER_NORESTART; + } +@@ -583,14 +569,12 @@ static enum hrtimer_restart bcm_rx_timeo + /* + * bcm_rx_do_flush - helper for bcm_rx_thr_flush + */ +-static inline int bcm_rx_do_flush(struct bcm_op *op, int update, +- unsigned int index) ++static inline int bcm_rx_do_flush(struct bcm_op *op, unsigned int index) + { + struct canfd_frame *lcf = op->last_frames + op->cfsiz * index; + + if ((op->last_frames) && (lcf->flags & RX_THR)) { +- if (update) +- bcm_rx_changed(op, lcf); ++ bcm_rx_changed(op, lcf); + return 1; + } + return 0; +@@ -598,11 +582,8 @@ static inline int bcm_rx_do_flush(struct + + /* + * bcm_rx_thr_flush - Check for throttled data and send it to the userspace +- * +- * update == 0 : just check if throttled data is available (any irq context) +- * update == 1 : check and send throttled data to userspace (soft_irq context) + */ +-static int bcm_rx_thr_flush(struct bcm_op *op, int update) ++static int bcm_rx_thr_flush(struct bcm_op *op) + { + int updated = 0; + +@@ -611,24 +592,16 @@ static int bcm_rx_thr_flush(struct bcm_o + + /* for MUX filter we start at index 1 */ + for (i = 1; i < op->nframes; i++) +- updated += bcm_rx_do_flush(op, update, i); ++ updated += bcm_rx_do_flush(op, i); + + } else { + /* for RX_FILTER_ID and simple filter */ +- updated += bcm_rx_do_flush(op, update, 0); ++ updated += bcm_rx_do_flush(op, 0); + } + + return updated; + } + +-static void bcm_rx_thr_tsklet(unsigned long data) +-{ +- struct bcm_op *op = (struct bcm_op *)data; +- +- /* push the changed data to the userspace */ +- bcm_rx_thr_flush(op, 1); +-} +- + /* + * bcm_rx_thr_handler - the time for blocked content updates is over now: + * Check for throttled data and send it to the userspace +@@ -637,9 +610,7 @@ static enum hrtimer_restart bcm_rx_thr_h + { + struct bcm_op *op = container_of(hrtimer, struct bcm_op, thrtimer); + +- tasklet_schedule(&op->thrtsklet); +- +- if (bcm_rx_thr_flush(op, 0)) { ++ if (bcm_rx_thr_flush(op)) { + hrtimer_forward(hrtimer, ktime_get(), op->kt_ival2); + return HRTIMER_RESTART; + } else { +@@ -735,23 +706,8 @@ static struct bcm_op *bcm_find_op(struct + + static void bcm_remove_op(struct bcm_op *op) + { +- if (op->tsklet.func) { +- while (test_bit(TASKLET_STATE_SCHED, &op->tsklet.state) || +- test_bit(TASKLET_STATE_RUN, &op->tsklet.state) || +- hrtimer_active(&op->timer)) { +- hrtimer_cancel(&op->timer); +- tasklet_kill(&op->tsklet); +- } +- } +- +- if (op->thrtsklet.func) { +- while (test_bit(TASKLET_STATE_SCHED, &op->thrtsklet.state) || +- test_bit(TASKLET_STATE_RUN, &op->thrtsklet.state) || +- hrtimer_active(&op->thrtimer)) { +- hrtimer_cancel(&op->thrtimer); +- tasklet_kill(&op->thrtsklet); +- } +- } ++ hrtimer_cancel(&op->timer); ++ hrtimer_cancel(&op->thrtimer); + + if ((op->frames) && (op->frames != &op->sframe)) + kfree(op->frames); +@@ -979,15 +935,13 @@ static int bcm_tx_setup(struct bcm_msg_h + op->ifindex = ifindex; + + /* initialize uninitialized (kzalloc) structure */ +- hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ hrtimer_init(&op->timer, CLOCK_MONOTONIC, ++ HRTIMER_MODE_REL_SOFT); + op->timer.function = bcm_tx_timeout_handler; + +- /* initialize tasklet for tx countevent notification */ +- tasklet_init(&op->tsklet, bcm_tx_timeout_tsklet, +- (unsigned long) op); +- + /* currently unused in tx_ops */ +- hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, ++ HRTIMER_MODE_REL_SOFT); + + /* add this bcm_op to the list of the tx_ops */ + list_add(&op->list, &bo->tx_ops); +@@ -1150,20 +1104,14 @@ static int bcm_rx_setup(struct bcm_msg_h + op->rx_ifindex = ifindex; + + /* initialize uninitialized (kzalloc) structure */ +- hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ hrtimer_init(&op->timer, CLOCK_MONOTONIC, ++ HRTIMER_MODE_REL_SOFT); + op->timer.function = bcm_rx_timeout_handler; + +- /* initialize tasklet for rx timeout notification */ +- tasklet_init(&op->tsklet, bcm_rx_timeout_tsklet, +- (unsigned long) op); +- +- hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, ++ HRTIMER_MODE_REL_SOFT); + op->thrtimer.function = bcm_rx_thr_handler; + +- /* initialize tasklet for rx throttle handling */ +- tasklet_init(&op->thrtsklet, bcm_rx_thr_tsklet, +- (unsigned long) op); +- + /* add this bcm_op to the list of the rx_ops */ + list_add(&op->list, &bo->rx_ops); + +@@ -1209,12 +1157,12 @@ static int bcm_rx_setup(struct bcm_msg_h + */ + op->kt_lastmsg = 0; + hrtimer_cancel(&op->thrtimer); +- bcm_rx_thr_flush(op, 1); ++ bcm_rx_thr_flush(op); + } + + if ((op->flags & STARTTIMER) && op->kt_ival1) + hrtimer_start(&op->timer, op->kt_ival1, +- HRTIMER_MODE_REL); ++ HRTIMER_MODE_REL_SOFT); + } + + /* now we can register for can_ids, if we added a new bcm_op */ diff --git a/debian/patches/features/all/rt/0022-tracing-Add-support-for-synthetic-events.patch b/debian/patches/features/all/rt/0030-tracing-Add-support-for-synthetic-events.patch similarity index 69% rename from debian/patches/features/all/rt/0022-tracing-Add-support-for-synthetic-events.patch rename to debian/patches/features/all/rt/0030-tracing-Add-support-for-synthetic-events.patch index 579548ffe..07fca5a2d 100644 --- a/debian/patches/features/all/rt/0022-tracing-Add-support-for-synthetic-events.patch +++ b/debian/patches/features/all/rt/0030-tracing-Add-support-for-synthetic-events.patch @@ -1,7 +1,7 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:23 -0500 -Subject: [PATCH 22/32] tracing: Add support for 'synthetic' events -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Date: Fri, 22 Sep 2017 15:00:01 -0500 +Subject: [PATCH 30/42] tracing: Add support for 'synthetic' events +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Synthetic events are user-defined events generated from hist trigger variables saved from one or more other events. @@ -48,12 +48,12 @@ discussed in a subsequent patch. Signed-off-by: Tom Zanussi Signed-off-by: Sebastian Andrzej Siewior --- - kernel/trace/trace_events_hist.c | 738 +++++++++++++++++++++++++++++++++++++++ - 1 file changed, 738 insertions(+) + kernel/trace/trace_events_hist.c | 865 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 865 insertions(+) --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c -@@ -20,10 +20,14 @@ +@@ -20,10 +20,16 @@ #include #include #include @@ -64,26 +64,31 @@ Signed-off-by: Sebastian Andrzej Siewior +#define SYNTH_SYSTEM "synthetic" +#define SYNTH_FIELDS_MAX 16 ++ ++#define STR_VAR_LEN_MAX 32 /* must be multiple of sizeof(u64) */ + struct hist_field; typedef u64 (*hist_field_fn_t) (struct hist_field *field, -@@ -261,6 +265,23 @@ struct hist_trigger_data { +@@ -270,6 +276,26 @@ struct hist_trigger_data { unsigned int n_actions; }; +struct synth_field { + char *type; + char *name; -+ unsigned int size; ++ size_t size; + bool is_signed; ++ bool is_string; +}; + +struct synth_event { + struct list_head list; ++ int ref; + char *name; + struct synth_field **fields; + unsigned int n_fields; ++ unsigned int n_u64; + struct trace_event_class class; + struct trace_event_call call; + struct tracepoint *tp; @@ -92,7 +97,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct action_data; typedef void (*action_fn_t) (struct hist_trigger_data *hist_data, -@@ -273,6 +294,688 @@ struct action_data { +@@ -282,6 +308,800 @@ struct action_data { unsigned int var_ref_idx; }; @@ -101,7 +106,6 @@ Signed-off-by: Sebastian Andrzej Siewior + +struct synth_trace_event { + struct trace_entry ent; -+ int n_fields; + u64 fields[]; +}; + @@ -110,192 +114,35 @@ Signed-off-by: Sebastian Andrzej Siewior + struct synth_trace_event trace; + int offset = offsetof(typeof(trace), fields); + struct synth_event *event = call->data; -+ unsigned int i, size; ++ unsigned int i, size, n_u64; + char *name, *type; + bool is_signed; + int ret = 0; + -+ for (i = 0; i < event->n_fields; i++) { ++ for (i = 0, n_u64 = 0; i < event->n_fields; i++) { + size = event->fields[i]->size; + is_signed = event->fields[i]->is_signed; + type = event->fields[i]->type; + name = event->fields[i]->name; + ret = trace_define_field(call, type, name, offset, size, + is_signed, FILTER_OTHER); -+ offset += sizeof(u64); ++ if (ret) ++ break; ++ ++ if (event->fields[i]->is_string) { ++ offset += STR_VAR_LEN_MAX; ++ n_u64 += STR_VAR_LEN_MAX / sizeof(u64); ++ } else { ++ offset += sizeof(u64); ++ n_u64++; ++ } + } + ++ event->n_u64 = n_u64; ++ + return ret; +} + -+static enum print_line_t print_synth_event(struct trace_iterator *iter, -+ int flags, -+ struct trace_event *event) -+{ -+ struct trace_array *tr = iter->tr; -+ struct trace_seq *s = &iter->seq; -+ struct synth_trace_event *entry; -+ struct synth_event *se; -+ unsigned int i; -+ -+ entry = (struct synth_trace_event *)iter->ent; -+ se = container_of(event, struct synth_event, call.event); -+ -+ trace_seq_printf(s, "%s: ", se->name); -+ -+ for (i = 0; i < entry->n_fields; i++) { -+ if (trace_seq_has_overflowed(s)) -+ goto end; -+ -+ /* parameter types */ -+ if (tr->trace_flags & TRACE_ITER_VERBOSE) -+ trace_seq_printf(s, "%s ", "u64"); -+ -+ /* parameter values */ -+ trace_seq_printf(s, "%s=%llu%s", se->fields[i]->name, -+ entry->fields[i], -+ i == entry->n_fields - 1 ? "" : ", "); -+ } -+end: -+ trace_seq_putc(s, '\n'); -+ -+ return trace_handle_return(s); -+} -+ -+static struct trace_event_functions synth_event_funcs = { -+ .trace = print_synth_event -+}; -+ -+static notrace void trace_event_raw_event_synth(void *__data, -+ u64 *var_ref_vals, -+ unsigned int var_ref_idx) -+{ -+ struct trace_event_file *trace_file = __data; -+ struct synth_trace_event *entry; -+ struct trace_event_buffer fbuffer; -+ int fields_size; -+ unsigned int i; -+ -+ struct synth_event *event; -+ -+ event = trace_file->event_call->data; -+ -+ if (trace_trigger_soft_disabled(trace_file)) -+ return; -+ -+ fields_size = event->n_fields * sizeof(u64); -+ -+ entry = trace_event_buffer_reserve(&fbuffer, trace_file, -+ sizeof(*entry) + fields_size); -+ if (!entry) -+ return; -+ -+ entry->n_fields = event->n_fields; -+ -+ for (i = 0; i < event->n_fields; i++) -+ entry->fields[i] = var_ref_vals[var_ref_idx + i]; -+ -+ trace_event_buffer_commit(&fbuffer); -+} -+ -+static void free_synth_event_print_fmt(struct trace_event_call *call) -+{ -+ if (call) -+ kfree(call->print_fmt); -+} -+ -+static int __set_synth_event_print_fmt(struct synth_event *event, -+ char *buf, int len) -+{ -+ int pos = 0; -+ int i; -+ -+ /* When len=0, we just calculate the needed length */ -+#define LEN_OR_ZERO (len ? len - pos : 0) -+ -+ pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); -+ for (i = 0; i < event->n_fields; i++) { -+ pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s", -+ event->fields[i]->name, sizeof(u64), -+ i == event->n_fields - 1 ? "" : ", "); -+ } -+ pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); -+ -+ for (i = 0; i < event->n_fields; i++) { -+ pos += snprintf(buf + pos, LEN_OR_ZERO, -+ ", ((u64)(REC->%s))", event->fields[i]->name); -+ } -+ -+#undef LEN_OR_ZERO -+ -+ /* return the length of print_fmt */ -+ return pos; -+} -+ -+static int set_synth_event_print_fmt(struct trace_event_call *call) -+{ -+ struct synth_event *event = call->data; -+ char *print_fmt; -+ int len; -+ -+ /* First: called with 0 length to calculate the needed length */ -+ len = __set_synth_event_print_fmt(event, NULL, 0); -+ -+ print_fmt = kmalloc(len + 1, GFP_KERNEL); -+ if (!print_fmt) -+ return -ENOMEM; -+ -+ /* Second: actually write the @print_fmt */ -+ __set_synth_event_print_fmt(event, print_fmt, len + 1); -+ call->print_fmt = print_fmt; -+ -+ return 0; -+} -+ -+int dynamic_trace_event_reg(struct trace_event_call *call, -+ enum trace_reg type, void *data) -+{ -+ struct trace_event_file *file = data; -+ -+ WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT)); -+ switch (type) { -+ case TRACE_REG_REGISTER: -+ return dynamic_tracepoint_probe_register(call->tp, -+ call->class->probe, -+ file); -+ case TRACE_REG_UNREGISTER: -+ tracepoint_probe_unregister(call->tp, -+ call->class->probe, -+ file, true); -+ return 0; -+ -+#ifdef CONFIG_PERF_EVENTS -+ case TRACE_REG_PERF_REGISTER: -+ return dynamic_tracepoint_probe_register(call->tp, -+ call->class->perf_probe, -+ call); -+ case TRACE_REG_PERF_UNREGISTER: -+ tracepoint_probe_unregister(call->tp, -+ call->class->perf_probe, -+ call, true); -+ return 0; -+ case TRACE_REG_PERF_OPEN: -+ case TRACE_REG_PERF_CLOSE: -+ case TRACE_REG_PERF_ADD: -+ case TRACE_REG_PERF_DEL: -+ return 0; -+#endif -+ } -+ return 0; -+} -+ -+static void free_synth_field(struct synth_field *field) -+{ -+ kfree(field->type); -+ kfree(field->name); -+ kfree(field); -+} -+ +static bool synth_field_signed(char *type) +{ + if (strncmp(type, "u", 1) == 0) @@ -304,9 +151,49 @@ Signed-off-by: Sebastian Andrzej Siewior + return true; +} + -+static unsigned int synth_field_size(char *type) ++static int synth_field_is_string(char *type) +{ -+ unsigned int size = 0; ++ if (strstr(type, "char[") != NULL) ++ return true; ++ ++ return false; ++} ++ ++static int synth_field_string_size(char *type) ++{ ++ char buf[4], *end, *start; ++ unsigned int len; ++ int size, err; ++ ++ start = strstr(type, "char["); ++ if (start == NULL) ++ return -EINVAL; ++ start += strlen("char["); ++ ++ end = strchr(type, ']'); ++ if (!end || end < start) ++ return -EINVAL; ++ ++ len = end - start; ++ if (len > 3) ++ return -EINVAL; ++ ++ strncpy(buf, start, len); ++ buf[len] = '\0'; ++ ++ err = kstrtouint(buf, 0, &size); ++ if (err) ++ return err; ++ ++ if (size > STR_VAR_LEN_MAX) ++ return -EINVAL; ++ ++ return size; ++} ++ ++static int synth_field_size(char *type) ++{ ++ int size = 0; + + if (strcmp(type, "s64") == 0) + size = sizeof(s64); @@ -338,12 +225,208 @@ Signed-off-by: Sebastian Andrzej Siewior + size = sizeof(unsigned long); + else if (strcmp(type, "pid_t") == 0) + size = sizeof(pid_t); -+ else if (strstr(type, "[") == 0) -+ size = sizeof(u64); ++ else if (synth_field_is_string(type)) ++ size = synth_field_string_size(type); + + return size; +} + ++static const char *synth_field_fmt(char *type) ++{ ++ const char *fmt = "%llu"; ++ ++ if (strcmp(type, "s64") == 0) ++ fmt = "%lld"; ++ else if (strcmp(type, "u64") == 0) ++ fmt = "%llu"; ++ else if (strcmp(type, "s32") == 0) ++ fmt = "%d"; ++ else if (strcmp(type, "u32") == 0) ++ fmt = "%u"; ++ else if (strcmp(type, "s16") == 0) ++ fmt = "%d"; ++ else if (strcmp(type, "u16") == 0) ++ fmt = "%u"; ++ else if (strcmp(type, "s8") == 0) ++ fmt = "%d"; ++ else if (strcmp(type, "u8") == 0) ++ fmt = "%u"; ++ else if (strcmp(type, "char") == 0) ++ fmt = "%d"; ++ else if (strcmp(type, "unsigned char") == 0) ++ fmt = "%u"; ++ else if (strcmp(type, "int") == 0) ++ fmt = "%d"; ++ else if (strcmp(type, "unsigned int") == 0) ++ fmt = "%u"; ++ else if (strcmp(type, "long") == 0) ++ fmt = "%ld"; ++ else if (strcmp(type, "unsigned long") == 0) ++ fmt = "%lu"; ++ else if (strcmp(type, "pid_t") == 0) ++ fmt = "%d"; ++ else if (strstr(type, "[") == 0) ++ fmt = "%s"; ++ ++ return fmt; ++} ++ ++static enum print_line_t print_synth_event(struct trace_iterator *iter, ++ int flags, ++ struct trace_event *event) ++{ ++ struct trace_array *tr = iter->tr; ++ struct trace_seq *s = &iter->seq; ++ struct synth_trace_event *entry; ++ struct synth_event *se; ++ unsigned int i, n_u64; ++ char print_fmt[32]; ++ const char *fmt; ++ ++ entry = (struct synth_trace_event *)iter->ent; ++ se = container_of(event, struct synth_event, call.event); ++ ++ trace_seq_printf(s, "%s: ", se->name); ++ ++ for (i = 0, n_u64 = 0; i < se->n_fields; i++) { ++ if (trace_seq_has_overflowed(s)) ++ goto end; ++ ++ fmt = synth_field_fmt(se->fields[i]->type); ++ ++ /* parameter types */ ++ if (tr->trace_flags & TRACE_ITER_VERBOSE) ++ trace_seq_printf(s, "%s ", fmt); ++ ++ snprintf(print_fmt, sizeof(print_fmt), "%%s=%s%%s", fmt); ++ ++ /* parameter values */ ++ if (se->fields[i]->is_string) { ++ trace_seq_printf(s, print_fmt, se->fields[i]->name, ++ (char *)(long)entry->fields[n_u64], ++ i == se->n_fields - 1 ? "" : " "); ++ n_u64 += STR_VAR_LEN_MAX / sizeof(u64); ++ } else { ++ trace_seq_printf(s, print_fmt, se->fields[i]->name, ++ entry->fields[n_u64], ++ i == se->n_fields - 1 ? "" : " "); ++ n_u64++; ++ } ++ } ++end: ++ trace_seq_putc(s, '\n'); ++ ++ return trace_handle_return(s); ++} ++ ++static struct trace_event_functions synth_event_funcs = { ++ .trace = print_synth_event ++}; ++ ++static notrace void trace_event_raw_event_synth(void *__data, ++ u64 *var_ref_vals, ++ unsigned int var_ref_idx) ++{ ++ struct trace_event_file *trace_file = __data; ++ struct synth_trace_event *entry; ++ struct trace_event_buffer fbuffer; ++ struct synth_event *event; ++ unsigned int i, n_u64; ++ int fields_size = 0; ++ ++ event = trace_file->event_call->data; ++ ++ if (trace_trigger_soft_disabled(trace_file)) ++ return; ++ ++ fields_size = event->n_u64 * sizeof(u64); ++ ++ entry = trace_event_buffer_reserve(&fbuffer, trace_file, ++ sizeof(*entry) + fields_size); ++ if (!entry) ++ return; ++ ++ for (i = 0, n_u64 = 0; i < event->n_fields; i++) { ++ if (event->fields[i]->is_string) { ++ char *str_val = (char *)(long)var_ref_vals[var_ref_idx + i]; ++ char *str_field = (char *)&entry->fields[n_u64]; ++ ++ strncpy(str_field, str_val, STR_VAR_LEN_MAX); ++ n_u64 += STR_VAR_LEN_MAX / sizeof(u64); ++ } else { ++ entry->fields[i] = var_ref_vals[var_ref_idx + i]; ++ n_u64++; ++ } ++ } ++ ++ trace_event_buffer_commit(&fbuffer); ++} ++ ++static void free_synth_event_print_fmt(struct trace_event_call *call) ++{ ++ if (call) { ++ kfree(call->print_fmt); ++ call->print_fmt = NULL; ++ } ++} ++ ++static int __set_synth_event_print_fmt(struct synth_event *event, ++ char *buf, int len) ++{ ++ const char *fmt; ++ int pos = 0; ++ int i; ++ ++ /* When len=0, we just calculate the needed length */ ++#define LEN_OR_ZERO (len ? len - pos : 0) ++ ++ pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); ++ for (i = 0; i < event->n_fields; i++) { ++ fmt = synth_field_fmt(event->fields[i]->type); ++ pos += snprintf(buf + pos, LEN_OR_ZERO, "%s=%s%s", ++ event->fields[i]->name, fmt, ++ i == event->n_fields - 1 ? "" : ", "); ++ } ++ pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); ++ ++ for (i = 0; i < event->n_fields; i++) { ++ pos += snprintf(buf + pos, LEN_OR_ZERO, ++ ", REC->%s", event->fields[i]->name); ++ } ++ ++#undef LEN_OR_ZERO ++ ++ /* return the length of print_fmt */ ++ return pos; ++} ++ ++static int set_synth_event_print_fmt(struct trace_event_call *call) ++{ ++ struct synth_event *event = call->data; ++ char *print_fmt; ++ int len; ++ ++ /* First: called with 0 length to calculate the needed length */ ++ len = __set_synth_event_print_fmt(event, NULL, 0); ++ ++ print_fmt = kmalloc(len + 1, GFP_KERNEL); ++ if (!print_fmt) ++ return -ENOMEM; ++ ++ /* Second: actually write the @print_fmt */ ++ __set_synth_event_print_fmt(event, print_fmt, len + 1); ++ call->print_fmt = print_fmt; ++ ++ return 0; ++} ++ ++static void free_synth_field(struct synth_field *field) ++{ ++ kfree(field->type); ++ kfree(field->name); ++ kfree(field); ++} ++ +static struct synth_field *parse_synth_field(char *field_type, + char *field_name) +{ @@ -372,8 +455,10 @@ Signed-off-by: Sebastian Andrzej Siewior + goto free; + } + strcat(field->type, field_type); -+ if (array) ++ if (array) { + strcat(field->type, array); ++ *array = '\0'; ++ } + + field->size = synth_field_size(field->type); + if (!field->size) { @@ -381,6 +466,9 @@ Signed-off-by: Sebastian Andrzej Siewior + goto free; + } + ++ if (synth_field_is_string(field->type)) ++ field->is_string = true; ++ + field->is_signed = synth_field_signed(field->type); + + field->name = kstrdup(field_name, GFP_KERNEL); @@ -408,47 +496,43 @@ Signed-off-by: Sebastian Andrzej Siewior +static struct tracepoint *alloc_synth_tracepoint(char *name) +{ + struct tracepoint *tp; -+ int ret = 0; + + tp = kzalloc(sizeof(*tp), GFP_KERNEL); -+ if (!tp) { -+ ret = -ENOMEM; -+ goto free; -+ } ++ if (!tp) ++ return ERR_PTR(-ENOMEM); + + tp->name = kstrdup(name, GFP_KERNEL); + if (!tp->name) { -+ ret = -ENOMEM; -+ goto free; ++ kfree(tp); ++ return ERR_PTR(-ENOMEM); + } + + return tp; -+ free: -+ free_synth_tracepoint(tp); -+ -+ return ERR_PTR(ret); +} + ++typedef void (*synth_probe_func_t) (void *__data, u64 *var_ref_vals, ++ unsigned int var_ref_idx); ++ +static inline void trace_synth(struct synth_event *event, u64 *var_ref_vals, + unsigned int var_ref_idx) +{ + struct tracepoint *tp = event->tp; + + if (unlikely(atomic_read(&tp->key.enabled) > 0)) { -+ struct tracepoint_func *it_func_ptr; -+ void *it_func; ++ struct tracepoint_func *probe_func_ptr; ++ synth_probe_func_t probe_func; + void *__data; + + if (!(cpu_online(raw_smp_processor_id()))) + return; + -+ it_func_ptr = rcu_dereference_sched((tp)->funcs); -+ if (it_func_ptr) { ++ probe_func_ptr = rcu_dereference_sched((tp)->funcs); ++ if (probe_func_ptr) { + do { -+ it_func = (it_func_ptr)->func; -+ __data = (it_func_ptr)->data; -+ ((void(*)(void *__data, u64 *var_ref_vals, unsigned int var_ref_idx))(it_func))(__data, var_ref_vals, var_ref_idx); -+ } while ((++it_func_ptr)->func); ++ probe_func = probe_func_ptr->func; ++ __data = probe_func_ptr->data; ++ probe_func(__data, var_ref_vals, var_ref_idx); ++ } while ((++probe_func_ptr)->func); + } + } +} @@ -465,6 +549,7 @@ Signed-off-by: Sebastian Andrzej Siewior + return NULL; +} + ++/* This function releases synth_event_mutex */ +static int register_synth_event(struct synth_event *event) +{ + struct trace_event_call *call = &event->call; @@ -494,11 +579,19 @@ Signed-off-by: Sebastian Andrzej Siewior + goto out; + } + call->flags = TRACE_EVENT_FL_TRACEPOINT; -+ call->class->reg = dynamic_trace_event_reg; ++ call->class->reg = trace_event_reg; + call->class->probe = trace_event_raw_event_synth; + call->data = event; + call->tp = event->tp; ++ ++ /* ++ * trace_add_event_call() grabs event_mutex, but that can ++ * deadlock with a hist trigger cmd already holding it that ++ * can grab synth_event_mutex ++ */ ++ mutex_unlock(&synth_event_mutex); + ret = trace_add_event_call(call); ++ mutex_lock(&synth_event_mutex); + if (ret) { + pr_warn("Failed to register synthetic event: %s\n", + trace_event_name(call)); @@ -507,7 +600,9 @@ Signed-off-by: Sebastian Andrzej Siewior + + ret = set_synth_event_print_fmt(call); + if (ret < 0) { ++ mutex_unlock(&synth_event_mutex); + trace_remove_event_call(call); ++ mutex_lock(&synth_event_mutex); + goto err; + } + out: @@ -517,26 +612,27 @@ Signed-off-by: Sebastian Andrzej Siewior + goto out; +} + ++/* This function releases synth_event_mutex */ +static int unregister_synth_event(struct synth_event *event) +{ + struct trace_event_call *call = &event->call; + int ret; + ++ mutex_unlock(&synth_event_mutex); + ret = trace_remove_event_call(call); -+ if (ret) { -+ pr_warn("Failed to remove synthetic event: %s\n", -+ trace_event_name(call)); -+ free_synth_event_print_fmt(call); -+ unregister_trace_event(&call->event); -+ } ++ mutex_lock(&synth_event_mutex); + + return ret; +} + -+static void remove_synth_event(struct synth_event *event) ++static int remove_synth_event(struct synth_event *event) +{ -+ unregister_synth_event(event); -+ list_del(&event->list); ++ int ret = unregister_synth_event(event); ++ ++ if (!ret) ++ list_del(&event->list); ++ ++ return ret; +} + +static int add_synth_event(struct synth_event *event) @@ -604,9 +700,10 @@ Signed-off-by: Sebastian Andrzej Siewior + return event; +} + ++/* This function releases synth_event_mutex */ +static int create_synth_event(int argc, char **argv) +{ -+ struct synth_field *fields[SYNTH_FIELDS_MAX]; ++ struct synth_field *field, *fields[SYNTH_FIELDS_MAX]; + struct synth_event *event = NULL; + bool delete_event = false; + int i, n_fields = 0, ret = 0; @@ -622,7 +719,7 @@ Signed-off-by: Sebastian Andrzej Siewior + */ + if (argc < 1) { + ret = -EINVAL; -+ goto err; ++ goto out; + } + + name = argv[0]; @@ -634,10 +731,16 @@ Signed-off-by: Sebastian Andrzej Siewior + event = find_synth_event(name); + if (event) { + if (delete_event) { -+ remove_synth_event(event); -+ goto err; -+ } else -+ ret = -EEXIST; ++ if (event->ref) { ++ ret = -EBUSY; ++ goto out; ++ } ++ ret = remove_synth_event(event); ++ if (!ret) ++ free_synth_event(event); ++ goto out; ++ } ++ ret = -EEXIST; + goto out; + } else if (delete_event) { + ret = -EINVAL; @@ -646,7 +749,7 @@ Signed-off-by: Sebastian Andrzej Siewior + + if (argc < 2) { + ret = -EINVAL; -+ goto err; ++ goto out; + } + + for (i = 1; i < argc - 1; i++) { @@ -654,16 +757,21 @@ Signed-off-by: Sebastian Andrzej Siewior + continue; + if (n_fields == SYNTH_FIELDS_MAX) { + ret = -EINVAL; -+ goto out; -+ } -+ fields[n_fields] = parse_synth_field(argv[i], argv[i + 1]); -+ if (!fields[n_fields]) + goto err; ++ } ++ ++ field = parse_synth_field(argv[i], argv[i + 1]); ++ if (IS_ERR(field)) { ++ ret = PTR_ERR(field); ++ goto err; ++ } ++ fields[n_fields] = field; + i++; n_fields++; + } ++ + if (i < argc) { + ret = -EINVAL; -+ goto out; ++ goto err; + } + + event = alloc_synth_event(name, n_fields, fields); @@ -686,6 +794,7 @@ Signed-off-by: Sebastian Andrzej Siewior + goto out; +} + ++/* This function releases synth_event_mutex */ +static int release_all_synth_events(void) +{ + struct synth_event *event, *e; @@ -693,11 +802,19 @@ Signed-off-by: Sebastian Andrzej Siewior + + mutex_lock(&synth_event_mutex); + -+ list_for_each_entry_safe(event, e, &synth_event_list, list) { -+ remove_synth_event(event); -+ free_synth_event(event); ++ list_for_each_entry(event, &synth_event_list, list) { ++ if (event->ref) { ++ ret = -EBUSY; ++ goto out; ++ } + } + ++ list_for_each_entry_safe(event, e, &synth_event_list, list) { ++ ret = remove_synth_event(event); ++ if (!ret) ++ free_synth_event(event); ++ } ++ out: + mutex_unlock(&synth_event_mutex); + + return ret; @@ -781,7 +898,45 @@ Signed-off-by: Sebastian Andrzej Siewior static u64 hist_field_timestamp(struct hist_field *hist_field, struct tracing_map_elt *elt, struct ring_buffer_event *rbe, -@@ -3028,3 +3731,38 @@ static __init void unregister_trigger_hi +@@ -2942,6 +3762,8 @@ static int event_hist_trigger_func(struc + struct hist_trigger_attrs *attrs; + struct event_trigger_ops *trigger_ops; + struct hist_trigger_data *hist_data; ++ struct synth_event *se; ++ const char *se_name; + bool remove = false; + char *trigger; + int ret = 0; +@@ -3000,6 +3822,14 @@ static int event_hist_trigger_func(struc + } + + cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); ++ ++ mutex_lock(&synth_event_mutex); ++ se_name = trace_event_name(file->event_call); ++ se = find_synth_event(se_name); ++ if (se) ++ se->ref--; ++ mutex_unlock(&synth_event_mutex); ++ + ret = 0; + goto out_free; + } +@@ -3017,6 +3847,13 @@ static int event_hist_trigger_func(struc + } else if (ret < 0) + goto out_free; + ++ mutex_lock(&synth_event_mutex); ++ se_name = trace_event_name(file->event_call); ++ se = find_synth_event(se_name); ++ if (se) ++ se->ref++; ++ mutex_unlock(&synth_event_mutex); ++ + if (get_named_trigger_data(trigger_data)) + goto enable; + +@@ -3207,3 +4044,31 @@ static __init void unregister_trigger_hi return ret; } @@ -789,16 +944,9 @@ Signed-off-by: Sebastian Andrzej Siewior +static __init int trace_events_hist_init(void) +{ + struct dentry *entry = NULL; -+ struct trace_array *tr; + struct dentry *d_tracer; + int err = 0; + -+ tr = top_trace_array(); -+ if (!tr) { -+ err = -ENODEV; -+ goto err; -+ } -+ + d_tracer = tracing_init_dentry(); + if (IS_ERR(d_tracer)) { + err = PTR_ERR(d_tracer); @@ -806,7 +954,7 @@ Signed-off-by: Sebastian Andrzej Siewior + } + + entry = tracefs_create_file("synthetic_events", 0644, d_tracer, -+ tr, &synth_events_fops); ++ NULL, &synth_events_fops); + if (!entry) { + err = -ENODEV; + goto err; diff --git a/debian/patches/features/all/rt/0031-mac80211_hwsim-Replace-hrtimer-tasklet-with-softirq-.patch b/debian/patches/features/all/rt/0031-mac80211_hwsim-Replace-hrtimer-tasklet-with-softirq-.patch new file mode 100644 index 000000000..c05e3ffc4 --- /dev/null +++ b/debian/patches/features/all/rt/0031-mac80211_hwsim-Replace-hrtimer-tasklet-with-softirq-.patch @@ -0,0 +1,136 @@ +From: Thomas Gleixner +Date: Sun, 22 Oct 2017 23:40:09 +0200 +Subject: [PATCH 31/36] mac80211_hwsim: Replace hrtimer tasklet with softirq + hrtimer +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Switch the timer to HRTIMER_MODE_SOFT, which executed the timer +callback in softirq context and remove the hrtimer_tasklet. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Anna-Maria Gleixner +Cc: Johannes Berg +Cc: Kalle Valo +Cc: linux-wireless@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/net/wireless/mac80211_hwsim.c | 44 +++++++++++++++------------------- + 1 file changed, 20 insertions(+), 24 deletions(-) + +--- a/drivers/net/wireless/mac80211_hwsim.c ++++ b/drivers/net/wireless/mac80211_hwsim.c +@@ -537,7 +537,7 @@ struct mac80211_hwsim_data { + unsigned int rx_filter; + bool started, idle, scanning; + struct mutex mutex; +- struct tasklet_hrtimer beacon_timer; ++ struct hrtimer beacon_timer; + enum ps_mode { + PS_DISABLED, PS_ENABLED, PS_AUTO_POLL, PS_MANUAL_POLL + } ps; +@@ -1418,7 +1418,7 @@ static void mac80211_hwsim_stop(struct i + { + struct mac80211_hwsim_data *data = hw->priv; + data->started = false; +- tasklet_hrtimer_cancel(&data->beacon_timer); ++ hrtimer_cancel(&data->beacon_timer); + wiphy_debug(hw->wiphy, "%s\n", __func__); + } + +@@ -1541,14 +1541,12 @@ static enum hrtimer_restart + mac80211_hwsim_beacon(struct hrtimer *timer) + { + struct mac80211_hwsim_data *data = +- container_of(timer, struct mac80211_hwsim_data, +- beacon_timer.timer); ++ container_of(timer, struct mac80211_hwsim_data, beacon_timer); + struct ieee80211_hw *hw = data->hw; + u64 bcn_int = data->beacon_int; +- ktime_t next_bcn; + + if (!data->started) +- goto out; ++ return HRTIMER_NORESTART; + + ieee80211_iterate_active_interfaces_atomic( + hw, IEEE80211_IFACE_ITER_NORMAL, +@@ -1560,11 +1558,9 @@ mac80211_hwsim_beacon(struct hrtimer *ti + data->bcn_delta = 0; + } + +- next_bcn = ktime_add(hrtimer_get_expires(timer), +- ns_to_ktime(bcn_int * 1000)); +- tasklet_hrtimer_start(&data->beacon_timer, next_bcn, HRTIMER_MODE_ABS); +-out: +- return HRTIMER_NORESTART; ++ hrtimer_forward(&data->beacon_timer, hrtimer_get_expires(timer), ++ ns_to_ktime(bcn_int * NSEC_PER_USEC)); ++ return HRTIMER_RESTART; + } + + static const char * const hwsim_chanwidths[] = { +@@ -1638,15 +1634,15 @@ static int mac80211_hwsim_config(struct + mutex_unlock(&data->mutex); + + if (!data->started || !data->beacon_int) +- tasklet_hrtimer_cancel(&data->beacon_timer); +- else if (!hrtimer_is_queued(&data->beacon_timer.timer)) { ++ hrtimer_cancel(&data->beacon_timer); ++ else if (!hrtimer_is_queued(&data->beacon_timer)) { + u64 tsf = mac80211_hwsim_get_tsf(hw, NULL); + u32 bcn_int = data->beacon_int; + u64 until_tbtt = bcn_int - do_div(tsf, bcn_int); + +- tasklet_hrtimer_start(&data->beacon_timer, +- ns_to_ktime(until_tbtt * 1000), +- HRTIMER_MODE_REL); ++ hrtimer_start(&data->beacon_timer, ++ ns_to_ktime(until_tbtt * 1000), ++ HRTIMER_MODE_REL_SOFT); + } + + return 0; +@@ -1709,7 +1705,7 @@ static void mac80211_hwsim_bss_info_chan + info->enable_beacon, info->beacon_int); + vp->bcn_en = info->enable_beacon; + if (data->started && +- !hrtimer_is_queued(&data->beacon_timer.timer) && ++ !hrtimer_is_queued(&data->beacon_timer) && + info->enable_beacon) { + u64 tsf, until_tbtt; + u32 bcn_int; +@@ -1717,9 +1713,9 @@ static void mac80211_hwsim_bss_info_chan + tsf = mac80211_hwsim_get_tsf(hw, vif); + bcn_int = data->beacon_int; + until_tbtt = bcn_int - do_div(tsf, bcn_int); +- tasklet_hrtimer_start(&data->beacon_timer, +- ns_to_ktime(until_tbtt * 1000), +- HRTIMER_MODE_REL); ++ hrtimer_start(&data->beacon_timer, ++ ns_to_ktime(until_tbtt * 1000), ++ HRTIMER_MODE_REL_SOFT); + } else if (!info->enable_beacon) { + unsigned int count = 0; + ieee80211_iterate_active_interfaces_atomic( +@@ -1728,7 +1724,7 @@ static void mac80211_hwsim_bss_info_chan + wiphy_debug(hw->wiphy, " beaconing vifs remaining: %u", + count); + if (count == 0) { +- tasklet_hrtimer_cancel(&data->beacon_timer); ++ hrtimer_cancel(&data->beacon_timer); + data->beacon_int = 0; + } + } +@@ -2720,9 +2716,9 @@ static int mac80211_hwsim_new_radio(stru + data->debugfs, + data, &hwsim_simulate_radar); + +- tasklet_hrtimer_init(&data->beacon_timer, +- mac80211_hwsim_beacon, +- CLOCK_MONOTONIC, HRTIMER_MODE_ABS); ++ hrtimer_init(&data->beacon_timer, CLOCK_MONOTONIC, ++ HRTIMER_MODE_ABS_SOFT); ++ data->beacon_timer.function = mac80211_hwsim_beacon; + + spin_lock_bh(&hwsim_radio_lock); + list_add_tail(&data->list, &hwsim_radios); diff --git a/debian/patches/features/all/rt/0031-tracing-Add-support-for-field-variables.patch b/debian/patches/features/all/rt/0031-tracing-Add-support-for-field-variables.patch new file mode 100644 index 000000000..2379f9978 --- /dev/null +++ b/debian/patches/features/all/rt/0031-tracing-Add-support-for-field-variables.patch @@ -0,0 +1,589 @@ +From: Tom Zanussi +Date: Fri, 22 Sep 2017 15:00:02 -0500 +Subject: [PATCH 31/42] tracing: Add support for 'field variables' +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Users should be able to directly specify event fields in hist trigger +'actions' rather than being forced to explicitly create a variable for +that purpose. + +Add support allowing fields to be used directly in actions, which +essentially does just that - creates 'invisible' variables for each +bare field specified in an action. If a bare field refers to a field +on another (matching) event, it even creates a special histogram for +the purpose (since variables can't be defined on an existing histogram +after histogram creation). + +Here's a simple example that demonstrates both. Basically the +onmatch() action creates a list of variables corresponding to the +parameters of the synthetic event to be generated, and then uses those +values to generate the event. So for the wakeup_latency synthetic +event 'call' below the first param, $wakeup_lat, is a variable defined +explicitly on sched_switch, where 'next_pid' is just a normal field on +sched_switch, and prio is a normal field on sched_waking. + +Since the mechanism works on variables, those two normal fields just +have 'invisible' variables created internally for them. In the case of +'prio', which is on another event, we actually need to create an +additional hist trigger and define the invisible event on that, since +once a hist trigger is defined, variables can't be added to it later. + + echo 'wakeup_latency u64 lat; pid_t pid; int prio' >> + /sys/kernel/debug/tracing/synthetic_events + + echo 'hist:keys=pid:ts0=$common_timestamp.usecs >> + /sys/kernel/debug/tracing/events/sched/sched_waking/trigger + +echo 'hist:keys=next_pid:wakeup_lat=$common_timestamp.usecs-$ts0: + onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,prio) + >> /sys/kernel/debug/tracing/events/sched/sched_switch/trigger + +Signed-off-by: Tom Zanussi +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 450 ++++++++++++++++++++++++++++++++++++++- + 1 file changed, 449 insertions(+), 1 deletion(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -254,6 +254,16 @@ struct hist_trigger_attrs { + struct var_defs var_defs; + }; + ++struct field_var { ++ struct hist_field *var; ++ struct hist_field *val; ++}; ++ ++struct field_var_hist { ++ struct hist_trigger_data *hist_data; ++ char *cmd; ++}; ++ + struct hist_trigger_data { + struct hist_field *fields[HIST_FIELDS_MAX]; + unsigned int n_vals; +@@ -274,6 +284,14 @@ struct hist_trigger_data { + + struct action_data *actions[HIST_ACTIONS_MAX]; + unsigned int n_actions; ++ ++ struct hist_field *synth_var_refs[SYNTH_FIELDS_MAX]; ++ unsigned int n_synth_var_refs; ++ struct field_var *field_vars[SYNTH_FIELDS_MAX]; ++ unsigned int n_field_vars; ++ unsigned int n_field_var_str; ++ struct field_var_hist *field_var_hists[SYNTH_FIELDS_MAX]; ++ unsigned int n_field_var_hists; + }; + + struct synth_field { +@@ -1394,6 +1412,7 @@ static struct hist_field *find_event_var + struct hist_elt_data { + char *comm; + u64 *var_ref_vals; ++ char *field_var_str[SYNTH_FIELDS_MAX]; + }; + + static u64 hist_field_var_ref(struct hist_field *hist_field, +@@ -1687,6 +1706,11 @@ static inline void save_comm(char *comm, + + static void hist_elt_data_free(struct hist_elt_data *elt_data) + { ++ unsigned int i; ++ ++ for (i = 0; i < SYNTH_FIELDS_MAX; i++) ++ kfree(elt_data->field_var_str[i]); ++ + kfree(elt_data->comm); + kfree(elt_data); + } +@@ -1704,7 +1728,7 @@ static int hist_trigger_elt_data_alloc(s + unsigned int size = TASK_COMM_LEN + 1; + struct hist_elt_data *elt_data; + struct hist_field *key_field; +- unsigned int i; ++ unsigned int i, n_str; + + elt_data = kzalloc(sizeof(*elt_data), GFP_KERNEL); + if (!elt_data) +@@ -1723,6 +1747,18 @@ static int hist_trigger_elt_data_alloc(s + } + } + ++ n_str = hist_data->n_field_var_str; ++ ++ size = STR_VAR_LEN_MAX; ++ ++ for (i = 0; i < n_str; i++) { ++ elt_data->field_var_str[i] = kzalloc(size, GFP_KERNEL); ++ if (!elt_data->field_var_str[i]) { ++ hist_elt_data_free(elt_data); ++ return -ENOMEM; ++ } ++ } ++ + elt->private_data = elt_data; + + return 0; +@@ -2381,6 +2417,387 @@ static struct hist_field *parse_expr(str + return ERR_PTR(ret); + } + ++static char *find_trigger_filter(struct hist_trigger_data *hist_data, ++ struct trace_event_file *file) ++{ ++ struct event_trigger_data *test; ++ ++ list_for_each_entry_rcu(test, &file->triggers, list) { ++ if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { ++ if (test->private_data == hist_data) ++ return test->filter_str; ++ } ++ } ++ ++ return NULL; ++} ++ ++static struct event_command trigger_hist_cmd; ++static int event_hist_trigger_func(struct event_command *cmd_ops, ++ struct trace_event_file *file, ++ char *glob, char *cmd, char *param); ++ ++static bool compatible_keys(struct hist_trigger_data *target_hist_data, ++ struct hist_trigger_data *hist_data, ++ unsigned int n_keys) ++{ ++ struct hist_field *target_hist_field, *hist_field; ++ unsigned int n, i, j; ++ ++ if (hist_data->n_fields - hist_data->n_vals != n_keys) ++ return false; ++ ++ i = hist_data->n_vals; ++ j = target_hist_data->n_vals; ++ ++ for (n = 0; n < n_keys; n++) { ++ hist_field = hist_data->fields[i + n]; ++ target_hist_field = hist_data->fields[j + n]; ++ ++ if (strcmp(hist_field->type, target_hist_field->type) != 0) ++ return false; ++ if (hist_field->size != target_hist_field->size) ++ return false; ++ if (hist_field->is_signed != target_hist_field->is_signed) ++ return false; ++ } ++ ++ return true; ++} ++ ++static struct hist_trigger_data * ++find_compatible_hist(struct hist_trigger_data *target_hist_data, ++ struct trace_event_file *file) ++{ ++ struct hist_trigger_data *hist_data; ++ struct event_trigger_data *test; ++ unsigned int n_keys; ++ ++ n_keys = target_hist_data->n_fields - target_hist_data->n_vals; ++ ++ list_for_each_entry_rcu(test, &file->triggers, list) { ++ if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { ++ hist_data = test->private_data; ++ ++ if (compatible_keys(target_hist_data, hist_data, n_keys)) ++ return hist_data; ++ } ++ } ++ ++ return NULL; ++} ++ ++static struct trace_event_file *event_file(struct trace_array *tr, ++ char *system, char *event_name) ++{ ++ struct trace_event_file *file; ++ ++ file = find_event_file(tr, system, event_name); ++ if (!file) ++ return ERR_PTR(-EINVAL); ++ ++ return file; ++} ++ ++static struct hist_field * ++create_field_var_hist(struct hist_trigger_data *target_hist_data, ++ char *system, char *event_name, char *field_name) ++{ ++ struct trace_array *tr = target_hist_data->event_file->tr; ++ struct hist_field *event_var = ERR_PTR(-EINVAL); ++ struct hist_trigger_data *hist_data; ++ unsigned int i, n, first = true; ++ struct field_var_hist *var_hist; ++ struct trace_event_file *file; ++ struct hist_field *key_field; ++ char *saved_filter; ++ char *cmd; ++ int ret; ++ ++ if (target_hist_data->n_field_var_hists >= SYNTH_FIELDS_MAX) ++ return ERR_PTR(-EINVAL); ++ ++ file = event_file(tr, system, event_name); ++ ++ if (IS_ERR(file)) { ++ ret = PTR_ERR(file); ++ return ERR_PTR(ret); ++ } ++ ++ hist_data = find_compatible_hist(target_hist_data, file); ++ if (!hist_data) ++ return ERR_PTR(-EINVAL); ++ ++ var_hist = kzalloc(sizeof(*var_hist), GFP_KERNEL); ++ if (!var_hist) ++ return ERR_PTR(-ENOMEM); ++ ++ cmd = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL); ++ if (!cmd) { ++ kfree(var_hist); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ strcat(cmd, "keys="); ++ ++ for_each_hist_key_field(i, hist_data) { ++ key_field = hist_data->fields[i]; ++ if (!first) ++ strcat(cmd, ","); ++ strcat(cmd, key_field->field->name); ++ first = false; ++ } ++ ++ strcat(cmd, ":synthetic_"); ++ strcat(cmd, field_name); ++ strcat(cmd, "="); ++ strcat(cmd, field_name); ++ ++ saved_filter = find_trigger_filter(hist_data, file); ++ if (saved_filter) { ++ strcat(cmd, " if "); ++ strcat(cmd, saved_filter); ++ } ++ ++ var_hist->cmd = kstrdup(cmd, GFP_KERNEL); ++ if (!var_hist->cmd) { ++ kfree(cmd); ++ kfree(var_hist); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ var_hist->hist_data = hist_data; ++ ++ ret = event_hist_trigger_func(&trigger_hist_cmd, file, ++ "", "hist", cmd); ++ if (ret) { ++ kfree(cmd); ++ kfree(var_hist->cmd); ++ kfree(var_hist); ++ return ERR_PTR(ret); ++ } ++ ++ strcpy(cmd, "synthetic_"); ++ strcat(cmd, field_name); ++ ++ event_var = find_event_var(tr, system, event_name, cmd); ++ if (!event_var) { ++ kfree(cmd); ++ kfree(var_hist->cmd); ++ kfree(var_hist); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ n = target_hist_data->n_field_var_hists; ++ target_hist_data->field_var_hists[n] = var_hist; ++ target_hist_data->n_field_var_hists++; ++ ++ return event_var; ++} ++ ++static struct hist_field * ++find_target_event_var(struct hist_trigger_data *hist_data, ++ char *system, char *event_name, char *var_name) ++{ ++ struct trace_event_file *file = hist_data->event_file; ++ struct hist_field *hist_field = NULL; ++ ++ if (system) { ++ struct trace_event_call *call; ++ ++ if (!event_name) ++ return NULL; ++ ++ call = file->event_call; ++ ++ if (strcmp(system, call->class->system) != 0) ++ return NULL; ++ ++ if (strcmp(event_name, trace_event_name(call)) != 0) ++ return NULL; ++ } ++ ++ hist_field = find_var_field(hist_data, var_name); ++ ++ return hist_field; ++} ++ ++static inline void __update_field_vars(struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *rec, ++ struct field_var **field_vars, ++ unsigned int n_field_vars, ++ unsigned int field_var_str_start) ++{ ++ struct hist_elt_data *elt_data = elt->private_data; ++ unsigned int i, j, var_idx; ++ u64 var_val; ++ ++ for (i = 0, j = field_var_str_start; i < n_field_vars; i++) { ++ struct field_var *field_var = field_vars[i]; ++ struct hist_field *var = field_var->var; ++ struct hist_field *val = field_var->val; ++ ++ var_val = val->fn(val, elt, rbe, rec); ++ var_idx = var->var.idx; ++ ++ if (val->flags & HIST_FIELD_FL_STRING) { ++ char *str = elt_data->field_var_str[j++]; ++ char *val_str = (char *)(uintptr_t)var_val; ++ ++ strncpy(str, val_str, STR_VAR_LEN_MAX); ++ var_val = (u64)(uintptr_t)str; ++ } ++ tracing_map_set_var(elt, var_idx, var_val); ++ } ++} ++ ++static void update_field_vars(struct hist_trigger_data *hist_data, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *rec) ++{ ++ __update_field_vars(elt, rbe, rec, hist_data->field_vars, ++ hist_data->n_field_vars, 0); ++} ++ ++static struct hist_field *create_var(struct hist_trigger_data *hist_data, ++ struct trace_event_file *file, ++ char *name, int size, const char *type) ++{ ++ struct hist_field *var; ++ int idx; ++ ++ if (find_var(file, name) && !hist_data->remove) { ++ var = ERR_PTR(-EINVAL); ++ goto out; ++ } ++ ++ var = kzalloc(sizeof(struct hist_field), GFP_KERNEL); ++ if (!var) { ++ var = ERR_PTR(-ENOMEM); ++ goto out; ++ } ++ ++ idx = tracing_map_add_var(hist_data->map); ++ if (idx < 0) { ++ kfree(var); ++ var = ERR_PTR(-EINVAL); ++ goto out; ++ } ++ ++ var->flags = HIST_FIELD_FL_VAR; ++ var->var.idx = idx; ++ var->var.hist_data = var->hist_data = hist_data; ++ var->size = size; ++ var->var.name = kstrdup(name, GFP_KERNEL); ++ var->type = kstrdup(type, GFP_KERNEL); ++ if (!var->var.name || !var->type) { ++ kfree(var->var.name); ++ kfree(var->type); ++ kfree(var); ++ var = ERR_PTR(-ENOMEM); ++ } ++ out: ++ return var; ++} ++ ++static struct field_var *create_field_var(struct hist_trigger_data *hist_data, ++ struct trace_event_file *file, ++ char *field_name) ++{ ++ struct hist_field *val = NULL, *var = NULL; ++ unsigned long flags = HIST_FIELD_FL_VAR; ++ struct field_var *field_var; ++ int ret = 0; ++ ++ if (hist_data->n_field_vars >= SYNTH_FIELDS_MAX) { ++ ret = -EINVAL; ++ goto err; ++ } ++ ++ val = parse_atom(hist_data, file, field_name, &flags, NULL); ++ if (IS_ERR(val)) { ++ ret = PTR_ERR(val); ++ goto err; ++ } ++ ++ var = create_var(hist_data, file, field_name, val->size, val->type); ++ if (IS_ERR(var)) { ++ kfree(val); ++ ret = PTR_ERR(var); ++ goto err; ++ } ++ ++ field_var = kzalloc(sizeof(struct field_var), GFP_KERNEL); ++ if (!field_var) { ++ kfree(val); ++ kfree(var); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ field_var->var = var; ++ field_var->val = val; ++ out: ++ return field_var; ++ err: ++ field_var = ERR_PTR(ret); ++ goto out; ++} ++ ++static struct field_var * ++create_target_field_var(struct hist_trigger_data *hist_data, ++ char *system, char *event_name, char *var_name) ++{ ++ struct trace_event_file *file = hist_data->event_file; ++ ++ if (system) { ++ struct trace_event_call *call; ++ ++ if (!event_name) ++ return NULL; ++ ++ call = file->event_call; ++ ++ if (strcmp(system, call->class->system) != 0) ++ return NULL; ++ ++ if (strcmp(event_name, trace_event_name(call)) != 0) ++ return NULL; ++ } ++ ++ return create_field_var(hist_data, file, var_name); ++} ++ ++static void destroy_field_var(struct field_var *field_var) ++{ ++ if (!field_var) ++ return; ++ ++ destroy_hist_field(field_var->var, 0); ++ destroy_hist_field(field_var->val, 0); ++ ++ kfree(field_var); ++} ++ ++static void destroy_field_vars(struct hist_trigger_data *hist_data) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < hist_data->n_field_vars; i++) ++ destroy_field_var(hist_data->field_vars[i]); ++} ++ ++static void save_field_var(struct hist_trigger_data *hist_data, ++ struct field_var *field_var) ++{ ++ hist_data->field_vars[hist_data->n_field_vars++] = field_var; ++ ++ if (field_var->val->flags & HIST_FIELD_FL_STRING) ++ hist_data->n_field_var_str++; ++} ++ + static int create_hitcount_val(struct hist_trigger_data *hist_data) + { + hist_data->fields[HITCOUNT_IDX] = +@@ -2827,6 +3244,16 @@ static int create_actions(struct hist_tr + return ret; + } + ++static void destroy_field_var_hists(struct hist_trigger_data *hist_data) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < hist_data->n_field_var_hists; i++) { ++ kfree(hist_data->field_var_hists[i]->cmd); ++ kfree(hist_data->field_var_hists[i]); ++ } ++} ++ + static void destroy_hist_data(struct hist_trigger_data *hist_data) + { + if (!hist_data) +@@ -2837,6 +3264,8 @@ static void destroy_hist_data(struct his + tracing_map_destroy(hist_data->map); + + destroy_actions(hist_data); ++ destroy_field_vars(hist_data); ++ destroy_field_var_hists(hist_data); + + kfree(hist_data); + } +@@ -2970,6 +3399,8 @@ static void hist_trigger_elt_update(stru + tracing_map_set_var(elt, var_idx, hist_val); + } + } ++ ++ update_field_vars(hist_data, elt, rbe, rec); + } + + static inline void add_to_key(char *compound_key, void *key, +@@ -3687,6 +4118,21 @@ static bool hist_trigger_check_refs(stru + return false; + } + ++static void unregister_field_var_hists(struct hist_trigger_data *hist_data) ++{ ++ struct trace_event_file *file; ++ unsigned int i; ++ char *cmd; ++ int ret; ++ ++ for (i = 0; i < hist_data->n_field_var_hists; i++) { ++ file = hist_data->field_var_hists[i]->hist_data->event_file; ++ cmd = hist_data->field_var_hists[i]->cmd; ++ ret = event_hist_trigger_func(&trigger_hist_cmd, file, ++ "!hist", "hist", cmd); ++ } ++} ++ + static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops, + struct event_trigger_data *data, + struct trace_event_file *file) +@@ -3702,6 +4148,7 @@ static void hist_unregister_trigger(char + if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { + if (!hist_trigger_match(data, test, named_data, false)) + continue; ++ unregister_field_var_hists(test->private_data); + unregistered = true; + list_del_rcu(&test->list); + trace_event_trigger_enable_disable(file, 0); +@@ -3744,6 +4191,7 @@ static void hist_unreg_all(struct trace_ + + list_for_each_entry_safe(test, n, &file->triggers, list) { + if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { ++ unregister_field_var_hists(test->private_data); + list_del_rcu(&test->list); + trace_event_trigger_enable_disable(file, 0); + update_cond_flag(file); diff --git a/debian/patches/features/all/rt/0032-tracing-Add-onmatch-hist-trigger-action-support.patch b/debian/patches/features/all/rt/0032-tracing-Add-onmatch-hist-trigger-action-support.patch new file mode 100644 index 000000000..c0125b934 --- /dev/null +++ b/debian/patches/features/all/rt/0032-tracing-Add-onmatch-hist-trigger-action-support.patch @@ -0,0 +1,555 @@ +From: Tom Zanussi +Date: Fri, 22 Sep 2017 15:00:03 -0500 +Subject: [PATCH 32/42] tracing: Add 'onmatch' hist trigger action support +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Add an 'onmatch(matching.event).(param list)' +hist trigger action which is invoked with the set of variables or +event fields named in the 'param list'. The result is the generation +of a synthetic event that consists of the values contained in those +variables and/or fields at the time the invoking event was hit. + +As an example the below defines a simple synthetic event using a +variable defined on the sched_wakeup_new event, and shows the event +definition with unresolved fields, since the sched_wakeup_new event +with the testpid variable hasn't been defined yet: + + # echo 'wakeup_new_test pid_t pid; int prio' >> \ + /sys/kernel/debug/tracing/synthetic_events + + # cat /sys/kernel/debug/tracing/synthetic_events + wakeup_new_test pid_t pid; int prio + +The following hist trigger both defines a testpid variable and +specifies an onmatch() trace action that uses that variable along with +a non-variable field to generate a wakeup_new_test synthetic event +whenever a sched_wakeup_new event occurs, which because of the 'if +comm == "cyclictest"' filter only happens when the executable is +cyclictest: + + # echo 'hist:testpid=pid:keys=$testpid:\ + onmatch(sched.sched_wakeup_new).wakeup_new_test($testpid, prio) \ + if comm=="cyclictest"' >> \ + /sys/kernel/debug/tracing/events/sched/sched_wakeup_new/trigger + +Creating and displaying a histogram based on those events is now just +a matter of using the fields and new synthetic event in the +tracing/events/synthetic directory, as usual: + + # echo 'hist:keys=pid,prio:sort=pid,prio' >> \ + /sys/kernel/debug/tracing/events/synthetic/wakeup_new_test/trigger + +Signed-off-by: Tom Zanussi +Signed-off-by: Rajvi Jingar +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 406 +++++++++++++++++++++++++++++++++++++-- + 1 file changed, 394 insertions(+), 12 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -323,7 +323,18 @@ typedef void (*action_fn_t) (struct hist + + struct action_data { + action_fn_t fn; +- unsigned int var_ref_idx; ++ unsigned int n_params; ++ char *params[SYNTH_FIELDS_MAX]; ++ ++ union { ++ struct { ++ unsigned int var_ref_idx; ++ char *match_event; ++ char *match_event_system; ++ char *synth_event_name; ++ struct synth_event *synth_event; ++ } onmatch; ++ }; + }; + + static LIST_HEAD(synth_event_list); +@@ -925,6 +936,21 @@ static struct synth_event *alloc_synth_e + return event; + } + ++static void action_trace(struct hist_trigger_data *hist_data, ++ struct tracing_map_elt *elt, void *rec, ++ struct ring_buffer_event *rbe, ++ struct action_data *data, u64 *var_ref_vals) ++{ ++ struct synth_event *event = data->onmatch.synth_event; ++ ++ trace_synth(event, var_ref_vals, data->onmatch.var_ref_idx); ++} ++ ++struct hist_var_data { ++ struct list_head list; ++ struct hist_trigger_data *hist_data; ++}; ++ + /* This function releases synth_event_mutex */ + static int create_synth_event(int argc, char **argv) + { +@@ -967,10 +993,8 @@ static int create_synth_event(int argc, + } + ret = -EEXIST; + goto out; +- } else if (delete_event) { +- ret = -EINVAL; ++ } else if (delete_event) + goto out; +- } + + if (argc < 2) { + ret = -EINVAL; +@@ -1136,11 +1160,6 @@ static u64 hist_field_timestamp(struct h + return ts; + } + +-struct hist_var_data { +- struct list_head list; +- struct hist_trigger_data *hist_data; +-}; +- + static struct hist_field *check_var_ref(struct hist_field *hist_field, + struct hist_trigger_data *var_data, + unsigned int var_idx) +@@ -1580,11 +1599,21 @@ static void destroy_hist_trigger_attrs(s + + static int parse_action(char *str, struct hist_trigger_attrs *attrs) + { +- int ret = 0; ++ int ret = -EINVAL; + + if (attrs->n_actions >= HIST_ACTIONS_MAX) + return ret; + ++ if ((strncmp(str, "onmatch(", strlen("onmatch(")) == 0)) { ++ attrs->action_str[attrs->n_actions] = kstrdup(str, GFP_KERNEL); ++ if (!attrs->action_str[attrs->n_actions]) { ++ ret = -ENOMEM; ++ return ret; ++ } ++ attrs->n_actions++; ++ ret = 0; ++ } ++ + return ret; + } + +@@ -2452,7 +2481,7 @@ static bool compatible_keys(struct hist_ + + for (n = 0; n < n_keys; n++) { + hist_field = hist_data->fields[i + n]; +- target_hist_field = hist_data->fields[j + n]; ++ target_hist_field = target_hist_data->fields[j + n]; + + if (strcmp(hist_field->type, target_hist_field->type) != 0) + return false; +@@ -2770,6 +2799,27 @@ create_target_field_var(struct hist_trig + return create_field_var(hist_data, file, var_name); + } + ++static void onmatch_destroy(struct action_data *data) ++{ ++ unsigned int i; ++ ++ mutex_lock(&synth_event_mutex); ++ ++ kfree(data->onmatch.match_event); ++ kfree(data->onmatch.match_event_system); ++ kfree(data->onmatch.synth_event_name); ++ ++ for (i = 0; i < data->n_params; i++) ++ kfree(data->params[i]); ++ ++ if (data->onmatch.synth_event) ++ data->onmatch.synth_event->ref--; ++ ++ kfree(data); ++ ++ mutex_unlock(&synth_event_mutex); ++} ++ + static void destroy_field_var(struct field_var *field_var) + { + if (!field_var) +@@ -2798,6 +2848,283 @@ static void save_field_var(struct hist_t + hist_data->n_field_var_str++; + } + ++ ++static void destroy_synth_var_refs(struct hist_trigger_data *hist_data) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < hist_data->n_synth_var_refs; i++) ++ destroy_hist_field(hist_data->synth_var_refs[i], 0); ++} ++ ++static void save_synth_var_ref(struct hist_trigger_data *hist_data, ++ struct hist_field *var_ref) ++{ ++ hist_data->synth_var_refs[hist_data->n_synth_var_refs++] = var_ref; ++ ++ hist_data->var_refs[hist_data->n_var_refs] = var_ref; ++ var_ref->var_ref_idx = hist_data->n_var_refs++; ++} ++ ++static int check_synth_field(struct synth_event *event, ++ struct hist_field *hist_field, ++ unsigned int field_pos) ++{ ++ struct synth_field *field; ++ ++ if (field_pos >= event->n_fields) ++ return -EINVAL; ++ ++ field = event->fields[field_pos]; ++ ++ if (strcmp(field->type, hist_field->type) != 0) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static int parse_action_params(char *params, struct action_data *data) ++{ ++ char *param, *saved_param; ++ int ret = 0; ++ ++ while (params) { ++ if (data->n_params >= SYNTH_FIELDS_MAX) ++ goto out; ++ ++ param = strsep(¶ms, ","); ++ if (!param) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ param = strstrip(param); ++ if (strlen(param) < 2) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ saved_param = kstrdup(param, GFP_KERNEL); ++ if (!saved_param) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ data->params[data->n_params++] = saved_param; ++ } ++ out: ++ return ret; ++} ++ ++static struct hist_field * ++onmatch_find_var(struct hist_trigger_data *hist_data, struct action_data *data, ++ char *system, char *event, char *var) ++{ ++ struct trace_array *tr = hist_data->event_file->tr; ++ struct hist_field *hist_field; ++ ++ var++; /* skip '$' */ ++ ++ hist_field = find_target_event_var(hist_data, system, event, var); ++ if (!hist_field) { ++ if (!system) { ++ system = data->onmatch.match_event_system; ++ event = data->onmatch.match_event; ++ } ++ ++ hist_field = find_event_var(tr, system, event, var); ++ } ++ ++ return hist_field; ++} ++ ++static struct hist_field * ++onmatch_create_field_var(struct hist_trigger_data *hist_data, ++ struct action_data *data, char *system, ++ char *event, char *var) ++{ ++ struct hist_field *hist_field = NULL; ++ struct field_var *field_var; ++ ++ field_var = create_target_field_var(hist_data, system, event, var); ++ if (IS_ERR(field_var)) ++ goto out; ++ ++ if (field_var) { ++ save_field_var(hist_data, field_var); ++ hist_field = field_var->var; ++ } else { ++ if (!system) { ++ system = data->onmatch.match_event_system; ++ event = data->onmatch.match_event; ++ } ++ ++ hist_field = create_field_var_hist(hist_data, system, event, var); ++ if (IS_ERR(hist_field)) ++ goto free; ++ } ++ out: ++ return hist_field; ++ free: ++ destroy_field_var(field_var); ++ hist_field = NULL; ++ goto out; ++} ++ ++static int onmatch_create(struct hist_trigger_data *hist_data, ++ struct trace_event_file *file, ++ struct action_data *data) ++{ ++ char *event_name, *param, *system = NULL; ++ struct hist_field *hist_field, *var_ref; ++ unsigned int i, var_ref_idx; ++ unsigned int field_pos = 0; ++ struct synth_event *event; ++ int ret = 0; ++ ++ mutex_lock(&synth_event_mutex); ++ event = find_synth_event(data->onmatch.synth_event_name); ++ if (!event) { ++ mutex_unlock(&synth_event_mutex); ++ return -EINVAL; ++ } ++ mutex_unlock(&synth_event_mutex); ++ ++ var_ref_idx = hist_data->n_var_refs; ++ ++ for (i = 0; i < data->n_params; i++) { ++ char *p; ++ ++ p = param = kstrdup(data->params[i], GFP_KERNEL); ++ if (!param) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ system = strsep(¶m, "."); ++ if (!param) { ++ param = (char *)system; ++ system = event_name = NULL; ++ } else { ++ event_name = strsep(¶m, "."); ++ if (!param) { ++ kfree(p); ++ ret = -EINVAL; ++ goto out; ++ } ++ } ++ ++ if (param[0] == '$') ++ hist_field = onmatch_find_var(hist_data, data, system, ++ event_name, param); ++ else ++ hist_field = onmatch_create_field_var(hist_data, data, ++ system, ++ event_name, ++ param); ++ ++ if (!hist_field) { ++ kfree(p); ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (check_synth_field(event, hist_field, field_pos) == 0) { ++ var_ref = create_var_ref(hist_field); ++ if (!var_ref) { ++ kfree(p); ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ save_synth_var_ref(hist_data, var_ref); ++ field_pos++; ++ kfree(p); ++ continue; ++ } ++ ++ kfree(p); ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (field_pos != event->n_fields) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ data->fn = action_trace; ++ data->onmatch.synth_event = event; ++ data->onmatch.var_ref_idx = var_ref_idx; ++ hist_data->actions[hist_data->n_actions++] = data; ++ event->ref++; ++ out: ++ return ret; ++} ++ ++static struct action_data *onmatch_parse(struct trace_array *tr, char *str) ++{ ++ char *match_event, *match_event_system; ++ char *synth_event_name, *params; ++ struct action_data *data; ++ int ret = -EINVAL; ++ ++ data = kzalloc(sizeof(*data), GFP_KERNEL); ++ if (!data) ++ return ERR_PTR(-ENOMEM); ++ ++ match_event = strsep(&str, ")"); ++ if (!match_event || !str) ++ goto free; ++ ++ match_event_system = strsep(&match_event, "."); ++ if (!match_event) ++ goto free; ++ ++ if (IS_ERR(event_file(tr, match_event_system, match_event))) ++ goto free; ++ ++ data->onmatch.match_event = kstrdup(match_event, GFP_KERNEL); ++ if (!data->onmatch.match_event) { ++ ret = -ENOMEM; ++ goto free; ++ } ++ ++ data->onmatch.match_event_system = kstrdup(match_event_system, GFP_KERNEL); ++ if (!data->onmatch.match_event_system) { ++ ret = -ENOMEM; ++ goto free; ++ } ++ ++ strsep(&str, "."); ++ if (!str) ++ goto free; ++ ++ synth_event_name = strsep(&str, "("); ++ if (!synth_event_name || !str) ++ goto free; ++ ++ data->onmatch.synth_event_name = kstrdup(synth_event_name, GFP_KERNEL); ++ if (!data->onmatch.synth_event_name) { ++ ret = -ENOMEM; ++ goto free; ++ } ++ ++ params = strsep(&str, ")"); ++ if (!params || !str || (str && strlen(str))) ++ goto free; ++ ++ ret = parse_action_params(params, data); ++ if (ret) ++ goto free; ++ out: ++ return data; ++ free: ++ onmatch_destroy(data); ++ data = ERR_PTR(ret); ++ goto out; ++} ++ + static int create_hitcount_val(struct hist_trigger_data *hist_data) + { + hist_data->fields[HITCOUNT_IDX] = +@@ -3226,24 +3553,76 @@ static void destroy_actions(struct hist_ + for (i = 0; i < hist_data->n_actions; i++) { + struct action_data *data = hist_data->actions[i]; + +- kfree(data); ++ if (data->fn == action_trace) ++ onmatch_destroy(data); ++ else ++ kfree(data); + } + } + + static int create_actions(struct hist_trigger_data *hist_data, + struct trace_event_file *file) + { ++ struct trace_array *tr = hist_data->event_file->tr; ++ struct action_data *data; + unsigned int i; + int ret = 0; + char *str; + + for (i = 0; i < hist_data->attrs->n_actions; i++) { + str = hist_data->attrs->action_str[i]; ++ ++ if (strncmp(str, "onmatch(", strlen("onmatch(")) == 0) { ++ char *action_str = str + strlen("onmatch("); ++ ++ data = onmatch_parse(tr, action_str); ++ if (IS_ERR(data)) ++ return PTR_ERR(data); ++ ++ ret = onmatch_create(hist_data, file, data); ++ if (ret) { ++ onmatch_destroy(data); ++ return ret; ++ } ++ } + } + + return ret; + } + ++static void print_onmatch_spec(struct seq_file *m, ++ struct hist_trigger_data *hist_data, ++ struct action_data *data) ++{ ++ unsigned int i; ++ ++ seq_printf(m, ":onmatch(%s.%s).", data->onmatch.match_event_system, ++ data->onmatch.match_event); ++ ++ seq_printf(m, "%s(", data->onmatch.synth_event->name); ++ ++ for (i = 0; i < data->n_params; i++) { ++ if (i) ++ seq_puts(m, ","); ++ seq_printf(m, "%s", data->params[i]); ++ } ++ ++ seq_puts(m, ")"); ++} ++ ++static void print_actions_spec(struct seq_file *m, ++ struct hist_trigger_data *hist_data) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < hist_data->n_actions; i++) { ++ struct action_data *data = hist_data->actions[i]; ++ ++ if (data->fn == action_trace) ++ print_onmatch_spec(m, hist_data, data); ++ } ++} ++ + static void destroy_field_var_hists(struct hist_trigger_data *hist_data) + { + unsigned int i; +@@ -3266,6 +3645,7 @@ static void destroy_hist_data(struct his + destroy_actions(hist_data); + destroy_field_vars(hist_data); + destroy_field_var_hists(hist_data); ++ destroy_synth_var_refs(hist_data); + + kfree(hist_data); + } +@@ -3814,6 +4194,8 @@ static int event_hist_trigger_print(stru + } + seq_printf(m, ":size=%u", (1 << hist_data->map->map_bits)); + ++ print_actions_spec(m, hist_data); ++ + if (data->filter_str) + seq_printf(m, " if %s", data->filter_str); + diff --git a/debian/patches/features/all/rt/0032-xfrm-Replace-hrtimer-tasklet-with-softirq-hrtimer.patch b/debian/patches/features/all/rt/0032-xfrm-Replace-hrtimer-tasklet-with-softirq-hrtimer.patch new file mode 100644 index 000000000..d058c09b5 --- /dev/null +++ b/debian/patches/features/all/rt/0032-xfrm-Replace-hrtimer-tasklet-with-softirq-hrtimer.patch @@ -0,0 +1,133 @@ +From: Thomas Gleixner +Date: Sun, 22 Oct 2017 23:40:10 +0200 +Subject: [PATCH 32/36] xfrm: Replace hrtimer tasklet with softirq hrtimer +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Switch the timer to HRTIMER_MODE_SOFT, which executed the timer +callback in softirq context and remove the hrtimer_tasklet. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Anna-Maria Gleixner +Cc: Steffen Klassert +Cc: Herbert Xu +Cc: "David S. Miller" +Cc: netdev@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior +--- + include/net/xfrm.h | 2 +- + net/xfrm/xfrm_state.c | 30 ++++++++++++++++++------------ + 2 files changed, 19 insertions(+), 13 deletions(-) + +--- a/include/net/xfrm.h ++++ b/include/net/xfrm.h +@@ -217,7 +217,7 @@ struct xfrm_state { + struct xfrm_stats stats; + + struct xfrm_lifetime_cur curlft; +- struct tasklet_hrtimer mtimer; ++ struct hrtimer mtimer; + + struct xfrm_state_offload xso; + +--- a/net/xfrm/xfrm_state.c ++++ b/net/xfrm/xfrm_state.c +@@ -426,7 +426,7 @@ static void xfrm_put_mode(struct xfrm_mo + + static void xfrm_state_gc_destroy(struct xfrm_state *x) + { +- tasklet_hrtimer_cancel(&x->mtimer); ++ hrtimer_cancel(&x->mtimer); + del_timer_sync(&x->rtimer); + kfree(x->aead); + kfree(x->aalg); +@@ -471,8 +471,8 @@ static void xfrm_state_gc_task(struct wo + + static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me) + { +- struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer); +- struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer); ++ struct xfrm_state *x = container_of(me, struct xfrm_state, mtimer); ++ enum hrtimer_restart ret = HRTIMER_NORESTART; + unsigned long now = get_seconds(); + long next = LONG_MAX; + int warn = 0; +@@ -536,7 +536,8 @@ static enum hrtimer_restart xfrm_timer_h + km_state_expired(x, 0, 0); + resched: + if (next != LONG_MAX) { +- tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL); ++ hrtimer_forward_now(&x->mtimer, ktime_set(next, 0)); ++ ret = HRTIMER_RESTART; + } + + goto out; +@@ -553,7 +554,7 @@ static enum hrtimer_restart xfrm_timer_h + + out: + spin_unlock(&x->lock); +- return HRTIMER_NORESTART; ++ return ret; + } + + static void xfrm_replay_timer_handler(unsigned long data); +@@ -572,8 +573,8 @@ struct xfrm_state *xfrm_state_alloc(stru + INIT_HLIST_NODE(&x->bydst); + INIT_HLIST_NODE(&x->bysrc); + INIT_HLIST_NODE(&x->byspi); +- tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, +- CLOCK_BOOTTIME, HRTIMER_MODE_ABS); ++ hrtimer_init(&x->mtimer, CLOCK_BOOTTIME, HRTIMER_MODE_ABS_SOFT); ++ x->mtimer.function = xfrm_timer_handler; + setup_timer(&x->rtimer, xfrm_replay_timer_handler, + (unsigned long)x); + x->curlft.add_time = get_seconds(); +@@ -1030,7 +1031,9 @@ xfrm_state_find(const xfrm_address_t *da + hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); + } + x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; +- tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); ++ hrtimer_start(&x->mtimer, ++ ktime_set(net->xfrm.sysctl_acq_expires, 0), ++ HRTIMER_MODE_REL_SOFT); + net->xfrm.state_num++; + xfrm_hash_grow_check(net, x->bydst.next != NULL); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); +@@ -1141,7 +1144,7 @@ static void __xfrm_state_insert(struct x + hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); + } + +- tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL); ++ hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT); + if (x->replay_maxage) + mod_timer(&x->rtimer, jiffies + x->replay_maxage); + +@@ -1245,7 +1248,9 @@ static struct xfrm_state *__find_acq_cor + x->mark.m = m->m; + x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; + xfrm_state_hold(x); +- tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); ++ hrtimer_start(&x->mtimer, ++ ktime_set(net->xfrm.sysctl_acq_expires, 0), ++ HRTIMER_MODE_REL_SOFT); + list_add(&x->km.all, &net->xfrm.state_all); + hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h); + h = xfrm_src_hash(net, daddr, saddr, family); +@@ -1544,7 +1549,8 @@ int xfrm_state_update(struct xfrm_state + memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); + x1->km.dying = 0; + +- tasklet_hrtimer_start(&x1->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL); ++ hrtimer_start(&x1->mtimer, ktime_set(1, 0), ++ HRTIMER_MODE_REL_SOFT); + if (x1->curlft.use_time) + xfrm_state_check_expire(x1); + +@@ -1568,7 +1574,7 @@ int xfrm_state_check_expire(struct xfrm_ + if (x->curlft.bytes >= x->lft.hard_byte_limit || + x->curlft.packets >= x->lft.hard_packet_limit) { + x->km.state = XFRM_STATE_EXPIRED; +- tasklet_hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL); ++ hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL_SOFT); + return -EINVAL; + } + diff --git a/debian/patches/features/all/rt/0033-softirq-Remove-tasklet_hrtimer.patch b/debian/patches/features/all/rt/0033-softirq-Remove-tasklet_hrtimer.patch new file mode 100644 index 000000000..fb84e6859 --- /dev/null +++ b/debian/patches/features/all/rt/0033-softirq-Remove-tasklet_hrtimer.patch @@ -0,0 +1,110 @@ +From: Thomas Gleixner +Date: Sun, 22 Oct 2017 23:40:11 +0200 +Subject: [PATCH 33/36] softirq: Remove tasklet_hrtimer +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +There are no more tasklet_hrtimer users of this interface. +Remove it. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/interrupt.h | 25 ---------------------- + kernel/softirq.c | 51 ---------------------------------------------- + 2 files changed, 76 deletions(-) + +--- a/include/linux/interrupt.h ++++ b/include/linux/interrupt.h +@@ -633,31 +633,6 @@ extern void tasklet_kill_immediate(struc + extern void tasklet_init(struct tasklet_struct *t, + void (*func)(unsigned long), unsigned long data); + +-struct tasklet_hrtimer { +- struct hrtimer timer; +- struct tasklet_struct tasklet; +- enum hrtimer_restart (*function)(struct hrtimer *); +-}; +- +-extern void +-tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer, +- enum hrtimer_restart (*function)(struct hrtimer *), +- clockid_t which_clock, enum hrtimer_mode mode); +- +-static inline +-void tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time, +- const enum hrtimer_mode mode) +-{ +- hrtimer_start(&ttimer->timer, time, mode); +-} +- +-static inline +-void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer) +-{ +- hrtimer_cancel(&ttimer->timer); +- tasklet_kill(&ttimer->tasklet); +-} +- + /* + * Autoprobing for irqs: + * +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -594,57 +594,6 @@ void tasklet_kill(struct tasklet_struct + } + EXPORT_SYMBOL(tasklet_kill); + +-/* +- * tasklet_hrtimer +- */ +- +-/* +- * The trampoline is called when the hrtimer expires. It schedules a tasklet +- * to run __tasklet_hrtimer_trampoline() which in turn will call the intended +- * hrtimer callback, but from softirq context. +- */ +-static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer) +-{ +- struct tasklet_hrtimer *ttimer = +- container_of(timer, struct tasklet_hrtimer, timer); +- +- tasklet_hi_schedule(&ttimer->tasklet); +- return HRTIMER_NORESTART; +-} +- +-/* +- * Helper function which calls the hrtimer callback from +- * tasklet/softirq context +- */ +-static void __tasklet_hrtimer_trampoline(unsigned long data) +-{ +- struct tasklet_hrtimer *ttimer = (void *)data; +- enum hrtimer_restart restart; +- +- restart = ttimer->function(&ttimer->timer); +- if (restart != HRTIMER_NORESTART) +- hrtimer_restart(&ttimer->timer); +-} +- +-/** +- * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks +- * @ttimer: tasklet_hrtimer which is initialized +- * @function: hrtimer callback function which gets called from softirq context +- * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME) +- * @mode: hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL) +- */ +-void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer, +- enum hrtimer_restart (*function)(struct hrtimer *), +- clockid_t which_clock, enum hrtimer_mode mode) +-{ +- hrtimer_init(&ttimer->timer, which_clock, mode); +- ttimer->timer.function = __hrtimer_tasklet_trampoline; +- tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline, +- (unsigned long)ttimer); +- ttimer->function = function; +-} +-EXPORT_SYMBOL_GPL(tasklet_hrtimer_init); +- + void __init softirq_init(void) + { + int cpu; diff --git a/debian/patches/features/all/rt/0024-tracing-Add-onmax-hist-trigger-action-support.patch b/debian/patches/features/all/rt/0033-tracing-Add-onmax-hist-trigger-action-support.patch similarity index 73% rename from debian/patches/features/all/rt/0024-tracing-Add-onmax-hist-trigger-action-support.patch rename to debian/patches/features/all/rt/0033-tracing-Add-onmax-hist-trigger-action-support.patch index 7041a5818..0d13d5e49 100644 --- a/debian/patches/features/all/rt/0024-tracing-Add-onmax-hist-trigger-action-support.patch +++ b/debian/patches/features/all/rt/0033-tracing-Add-onmax-hist-trigger-action-support.patch @@ -1,7 +1,7 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:25 -0500 -Subject: [PATCH 24/32] tracing: Add 'onmax' hist trigger action support -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Date: Fri, 22 Sep 2017 15:00:04 -0500 +Subject: [PATCH 33/42] tracing: Add 'onmax' hist trigger action support +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Add an 'onmax(var).save(field,...)' hist trigger action which is invoked whenever an event exceeds the current maximum. @@ -21,12 +21,12 @@ the timestamp difference is calculated. If the resulting latency exceeds the current maximum latency, the specified save() values are saved: - # echo 'hist:keys=pid:ts0=common_timestamp.usecs \ + # echo 'hist:keys=pid:ts0=$common_timestamp.usecs \ if comm=="cyclictest"' >> \ /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger # echo 'hist:keys=next_pid:\ - wakeup_lat=common_timestamp.usecs-$ts0:\ + wakeup_lat=$common_timestamp.usecs-$ts0:\ onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) \ if next_comm=="cyclictest"' >> \ /sys/kernel/debug/tracing/events/sched/sched_switch/trigger @@ -36,28 +36,31 @@ corresponding to the max are displayed following the rest of the fields: # cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist - { next_pid: 2255 } hitcount: 239 \ - common_timestamp-$ts0: 0 - max: 27 next_comm: cyclictest \ - prev_pid: 0 prev_prio: 120 prev_comm: swapper/1 \ - { next_pid: 2256 } hitcount: 2355 common_timestamp-$ts0: 0 \ - max: 49 next_comm: cyclictest \ - prev_pid: 0 prev_prio: 120 prev_comm: swapper/0 + + { next_pid: 3728 } hitcount: 199 \ + max: 123 next_comm: cyclictest prev_pid: 0 \ + prev_prio: 120 prev_comm: swapper/3 + { next_pid: 3730 } hitcount: 1321 \ + max: 15 next_comm: cyclictest prev_pid: 0 \ + prev_prio: 120 prev_comm: swapper/1 + { next_pid: 3729 } hitcount: 1973\ + max: 25 next_comm: cyclictest prev_pid: 0 \ + prev_prio: 120 prev_comm: swapper/0 Totals: - Hits: 12970 - Entries: 2 - Dropped: 0 + Hits: 3493 + Entries: 3 + Dropped: 0 Signed-off-by: Tom Zanussi Signed-off-by: Sebastian Andrzej Siewior --- - kernel/trace/trace_events_hist.c | 310 ++++++++++++++++++++++++++++++++++----- - 1 file changed, 276 insertions(+), 34 deletions(-) + kernel/trace/trace_events_hist.c | 332 ++++++++++++++++++++++++++++++++++----- + 1 file changed, 297 insertions(+), 35 deletions(-) --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c -@@ -282,6 +282,10 @@ struct hist_trigger_data { +@@ -292,6 +292,10 @@ struct hist_trigger_data { unsigned int n_field_var_str; struct field_var_hist *field_var_hists[SYNTH_FIELDS_MAX]; unsigned int n_field_var_hists; @@ -68,20 +71,22 @@ Signed-off-by: Sebastian Andrzej Siewior }; struct synth_field { -@@ -318,6 +322,12 @@ struct action_data { - char *match_event_system; - char *synth_event_name; - struct synth_event *synth_event; +@@ -334,6 +338,14 @@ struct action_data { + char *synth_event_name; + struct synth_event *synth_event; + } onmatch; + -+ char *onmax_var_str; -+ char *onmax_fn_name; -+ unsigned int max_var_ref_idx; -+ struct hist_field *max_var; -+ struct hist_field *onmax_var; ++ struct { ++ char *var_str; ++ char *fn_name; ++ unsigned int max_var_ref_idx; ++ struct hist_field *max_var; ++ struct hist_field *var; ++ } onmax; + }; }; - static LIST_HEAD(synth_event_list); -@@ -1493,7 +1503,8 @@ static int parse_action(char *str, struc +@@ -1604,7 +1616,8 @@ static int parse_action(char *str, struc if (attrs->n_actions >= HIST_ACTIONS_MAX) return ret; @@ -91,25 +96,16 @@ Signed-off-by: Sebastian Andrzej Siewior attrs->action_str[attrs->n_actions] = kstrdup(str, GFP_KERNEL); if (!attrs->action_str[attrs->n_actions]) { ret = -ENOMEM; -@@ -1612,7 +1623,7 @@ static void hist_trigger_elt_data_free(s - struct hist_elt_data *private_data = elt->private_data; - unsigned int i, n_str; - -- n_str = hist_data->n_field_var_str; -+ n_str = hist_data->n_field_var_str + hist_data->n_max_var_str; - - for (i = 0; i < n_str; i++) - kfree(private_data->field_var_str[i]); -@@ -1647,7 +1658,7 @@ static int hist_trigger_elt_data_alloc(s +@@ -1776,7 +1789,7 @@ static int hist_trigger_elt_data_alloc(s } } - n_str = hist_data->n_field_var_str; + n_str = hist_data->n_field_var_str + hist_data->n_max_var_str; - for (i = 0; i < n_str; i++) { - elt_data->field_var_str[i] = kzalloc(size, GFP_KERNEL); -@@ -2504,6 +2515,15 @@ static void update_field_vars(struct his + size = STR_VAR_LEN_MAX; + +@@ -2690,6 +2703,15 @@ static void update_field_vars(struct his hist_data->n_field_vars, 0); } @@ -125,7 +121,7 @@ Signed-off-by: Sebastian Andrzej Siewior static struct hist_field *create_var(struct hist_trigger_data *hist_data, struct trace_event_file *file, char *name, int size, const char *type) -@@ -2613,6 +2633,222 @@ create_target_field_var(struct hist_trig +@@ -2799,6 +2821,229 @@ create_target_field_var(struct hist_trig return create_field_var(hist_data, file, var_name); } @@ -134,7 +130,7 @@ Signed-off-by: Sebastian Andrzej Siewior + struct tracing_map_elt *elt, + struct action_data *data) +{ -+ unsigned int i, save_var_idx, max_idx = data->max_var->var.idx; ++ unsigned int i, save_var_idx, max_idx = data->onmax.max_var->var.idx; + + seq_printf(m, "\n\tmax: %10llu", tracing_map_read_var(elt, max_idx)); + @@ -148,7 +144,7 @@ Signed-off-by: Sebastian Andrzej Siewior + val = tracing_map_read_var(elt, save_var_idx); + + if (save_val->flags & HIST_FIELD_FL_STRING) { -+ seq_printf(m, " %s: %-50s", save_var->var.name, ++ seq_printf(m, " %s: %-32s", save_var->var.name, + (char *)(uintptr_t)(val)); + } else + seq_printf(m, " %s: %10llu", save_var->var.name, val); @@ -160,8 +156,8 @@ Signed-off-by: Sebastian Andrzej Siewior + struct ring_buffer_event *rbe, + struct action_data *data, u64 *var_ref_vals) +{ -+ unsigned int max_idx = data->max_var->var.idx; -+ unsigned int max_var_ref_idx = data->max_var_ref_idx; ++ unsigned int max_idx = data->onmax.max_var->var.idx; ++ unsigned int max_var_ref_idx = data->onmax.max_var_ref_idx; + + u64 var_val, max_val; + @@ -180,11 +176,11 @@ Signed-off-by: Sebastian Andrzej Siewior +{ + unsigned int i; + -+ destroy_hist_field(data->max_var, 0); -+ destroy_hist_field(data->onmax_var, 0); ++ destroy_hist_field(data->onmax.max_var, 0); ++ destroy_hist_field(data->onmax.var, 0); + -+ kfree(data->onmax_var_str); -+ kfree(data->onmax_fn_name); ++ kfree(data->onmax.var_str); ++ kfree(data->onmax.fn_name); + + for (i = 0; i < data->n_params; i++) + kfree(data->params[i]); @@ -206,7 +202,7 @@ Signed-off-by: Sebastian Andrzej Siewior + unsigned int i; + int ret = 0; + -+ onmax_var_str = data->onmax_var_str; ++ onmax_var_str = data->onmax.var_str; + if (onmax_var_str[0] != '$') + return -EINVAL; + onmax_var_str++; @@ -221,32 +217,30 @@ Signed-off-by: Sebastian Andrzej Siewior + if (!ref_field) + return -ENOMEM; + -+ ref_field->var.idx = var_field->var.idx; -+ ref_field->var.hist_data = hist_data; -+ ref_field->name = kstrdup(var_field->var.name, GFP_KERNEL); -+ ref_field->type = kstrdup(var_field->type, GFP_KERNEL); -+ if (!ref_field->name || !ref_field->type) { ++ if (init_var_ref(ref_field, var_field)) { + destroy_hist_field(ref_field, 0); + ret = -ENOMEM; + goto out; + } + hist_data->var_refs[hist_data->n_var_refs] = ref_field; + ref_field->var_ref_idx = hist_data->n_var_refs++; -+ data->onmax_var = ref_field; ++ data->onmax.var = ref_field; + + data->fn = onmax_save; -+ data->max_var_ref_idx = var_ref_idx; ++ data->onmax.max_var_ref_idx = var_ref_idx; + max_var = create_var(hist_data, file, "max", sizeof(u64), "u64"); + if (IS_ERR(max_var)) { + ret = PTR_ERR(max_var); + goto out; + } -+ data->max_var = max_var; ++ data->onmax.max_var = max_var; + + for (i = 0; i < data->n_params; i++) { + param = kstrdup(data->params[i], GFP_KERNEL); -+ if (!param) ++ if (!param) { ++ ret = -ENOMEM; + goto out; ++ } + + field_var = create_target_field_var(hist_data, NULL, NULL, param); + if (IS_ERR(field_var)) { @@ -277,8 +271,10 @@ Signed-off-by: Sebastian Andrzej Siewior + goto out; + + param = strsep(¶ms, ","); -+ if (!param) ++ if (!param) { ++ ret = -EINVAL; + goto out; ++ } + + param = strstrip(param); + if (strlen(param) < 2) { @@ -311,7 +307,11 @@ Signed-off-by: Sebastian Andrzej Siewior + onmax_var_str = strsep(&str, ")"); + if (!onmax_var_str || !str) + return ERR_PTR(-EINVAL); -+ data->onmax_var_str = kstrdup(onmax_var_str, GFP_KERNEL); ++ data->onmax.var_str = kstrdup(onmax_var_str, GFP_KERNEL); ++ if (!data->onmax.var_str) { ++ ret = -ENOMEM; ++ goto free; ++ } + + strsep(&str, "."); + if (!str) @@ -324,16 +324,19 @@ Signed-off-by: Sebastian Andrzej Siewior + if (strncmp(onmax_fn_name, "save", strlen("save")) == 0) { + char *params = strsep(&str, ")"); + -+ if (!params) ++ if (!params) { ++ ret = -EINVAL; + goto free; ++ } + + ret = parse_action_params(params, data); + if (ret) + goto free; -+ } -+ data->onmax_fn_name = kstrdup(onmax_fn_name, GFP_KERNEL); ++ } else ++ goto free; + -+ if (!data->onmax_var_str || !data->onmax_fn_name) { ++ data->onmax.fn_name = kstrdup(onmax_fn_name, GFP_KERNEL); ++ if (!data->onmax.fn_name) { + ret = -ENOMEM; + goto free; + } @@ -348,7 +351,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void onmatch_destroy(struct action_data *data) { unsigned int i; -@@ -2689,37 +2925,6 @@ static int check_synth_field(struct synt +@@ -2883,39 +3128,6 @@ static int check_synth_field(struct synt return 0; } @@ -362,8 +365,10 @@ Signed-off-by: Sebastian Andrzej Siewior - goto out; - - param = strsep(¶ms, ","); -- if (!param) +- if (!param) { +- ret = -EINVAL; - goto out; +- } - - param = strstrip(param); - if (strlen(param) < 2) { @@ -386,7 +391,7 @@ Signed-off-by: Sebastian Andrzej Siewior static struct hist_field * onmatch_find_var(struct hist_trigger_data *hist_data, struct action_data *data, char *system, char *event, char *var) -@@ -3313,6 +3518,8 @@ static void destroy_actions(struct hist_ +@@ -3555,6 +3767,8 @@ static void destroy_actions(struct hist_ if (data->fn == action_trace) onmatch_destroy(data); @@ -395,7 +400,7 @@ Signed-off-by: Sebastian Andrzej Siewior else kfree(data); } -@@ -3341,6 +3548,18 @@ static int create_actions(struct hist_tr +@@ -3584,12 +3798,56 @@ static int create_actions(struct hist_tr onmatch_destroy(data); return ret; } @@ -414,16 +419,23 @@ Signed-off-by: Sebastian Andrzej Siewior } } -@@ -3355,9 +3574,30 @@ static void print_actions(struct seq_fil + return ret; + } - for (i = 0; i < hist_data->n_actions; i++) { - struct action_data *data = hist_data->actions[i]; ++static void print_actions(struct seq_file *m, ++ struct hist_trigger_data *hist_data, ++ struct tracing_map_elt *elt) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < hist_data->n_actions; i++) { ++ struct action_data *data = hist_data->actions[i]; + + if (data->fn == onmax_save) + onmax_print(m, hist_data, elt, data); - } - } - ++ } ++} ++ +static void print_onmax_spec(struct seq_file *m, + struct hist_trigger_data *hist_data, + struct action_data *data) @@ -431,8 +443,8 @@ Signed-off-by: Sebastian Andrzej Siewior + unsigned int i; + + seq_puts(m, ":onmax("); -+ seq_printf(m, "%s", data->onmax_var_str); -+ seq_printf(m, ").%s(", data->onmax_fn_name); ++ seq_printf(m, "%s", data->onmax.var_str); ++ seq_printf(m, ").%s(", data->onmax.fn_name); + + for (i = 0; i < hist_data->n_max_vars; i++) { + seq_printf(m, "%s", hist_data->max_vars[i]->var->var.name); @@ -445,7 +457,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void print_onmatch_spec(struct seq_file *m, struct hist_trigger_data *hist_data, struct action_data *data) -@@ -3388,6 +3628,8 @@ static void print_actions_spec(struct se +@@ -3620,6 +3878,8 @@ static void print_actions_spec(struct se if (data->fn == action_trace) print_onmatch_spec(m, hist_data, data); @@ -454,3 +466,12 @@ Signed-off-by: Sebastian Andrzej Siewior } } +@@ -3994,6 +4254,8 @@ hist_trigger_entry_print(struct seq_file + } + } + ++ print_actions(m, hist_data, elt); ++ + seq_puts(m, "\n"); + } + diff --git a/debian/patches/features/all/rt/0034-ALSA-dummy-Replace-tasklet-with-softirq-hrtimer.patch b/debian/patches/features/all/rt/0034-ALSA-dummy-Replace-tasklet-with-softirq-hrtimer.patch new file mode 100644 index 000000000..9f855fc66 --- /dev/null +++ b/debian/patches/features/all/rt/0034-ALSA-dummy-Replace-tasklet-with-softirq-hrtimer.patch @@ -0,0 +1,99 @@ +From: Thomas Gleixner +Date: Sun, 22 Oct 2017 23:40:12 +0200 +Subject: [PATCH 34/36] ALSA/dummy: Replace tasklet with softirq hrtimer +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The tasklet is used to defer the execution of snd_pcm_period_elapsed() to +the softirq context. Using the HRTIMER_MODE_SOFT mode invokes the timer +callback in softirq context as well which renders the tasklet useless. + +[o-takashi: avoid stall due to a call of hrtimer_cancel() on a callback + of hrtimer] +Signed-off-by: Thomas Gleixner +Signed-off-by: Anna-Maria Gleixner +Cc: Jaroslav Kysela +Cc: Takashi Iwai +Cc: Takashi Sakamoto +Cc: alsa-devel@alsa-project.org +Link: http://lkml.kernel.org/r/20170905161820.jtysvxtfleunbbmf@breakpoint.cc +Signed-off-by: Sebastian Andrzej Siewior +--- + sound/drivers/dummy.c | 27 ++++++++++++--------------- + 1 file changed, 12 insertions(+), 15 deletions(-) + +--- a/sound/drivers/dummy.c ++++ b/sound/drivers/dummy.c +@@ -376,17 +376,9 @@ struct dummy_hrtimer_pcm { + ktime_t period_time; + atomic_t running; + struct hrtimer timer; +- struct tasklet_struct tasklet; + struct snd_pcm_substream *substream; + }; + +-static void dummy_hrtimer_pcm_elapsed(unsigned long priv) +-{ +- struct dummy_hrtimer_pcm *dpcm = (struct dummy_hrtimer_pcm *)priv; +- if (atomic_read(&dpcm->running)) +- snd_pcm_period_elapsed(dpcm->substream); +-} +- + static enum hrtimer_restart dummy_hrtimer_callback(struct hrtimer *timer) + { + struct dummy_hrtimer_pcm *dpcm; +@@ -394,7 +386,14 @@ static enum hrtimer_restart dummy_hrtime + dpcm = container_of(timer, struct dummy_hrtimer_pcm, timer); + if (!atomic_read(&dpcm->running)) + return HRTIMER_NORESTART; +- tasklet_schedule(&dpcm->tasklet); ++ /* ++ * In cases of XRUN and draining, this calls .trigger to stop PCM ++ * substream. ++ */ ++ snd_pcm_period_elapsed(dpcm->substream); ++ if (!atomic_read(&dpcm->running)) ++ return HRTIMER_NORESTART; ++ + hrtimer_forward_now(timer, dpcm->period_time); + return HRTIMER_RESTART; + } +@@ -404,7 +403,7 @@ static int dummy_hrtimer_start(struct sn + struct dummy_hrtimer_pcm *dpcm = substream->runtime->private_data; + + dpcm->base_time = hrtimer_cb_get_time(&dpcm->timer); +- hrtimer_start(&dpcm->timer, dpcm->period_time, HRTIMER_MODE_REL); ++ hrtimer_start(&dpcm->timer, dpcm->period_time, HRTIMER_MODE_REL_SOFT); + atomic_set(&dpcm->running, 1); + return 0; + } +@@ -414,14 +413,14 @@ static int dummy_hrtimer_stop(struct snd + struct dummy_hrtimer_pcm *dpcm = substream->runtime->private_data; + + atomic_set(&dpcm->running, 0); +- hrtimer_cancel(&dpcm->timer); ++ if (!hrtimer_callback_running(&dpcm->timer)) ++ hrtimer_cancel(&dpcm->timer); + return 0; + } + + static inline void dummy_hrtimer_sync(struct dummy_hrtimer_pcm *dpcm) + { + hrtimer_cancel(&dpcm->timer); +- tasklet_kill(&dpcm->tasklet); + } + + static snd_pcm_uframes_t +@@ -466,12 +465,10 @@ static int dummy_hrtimer_create(struct s + if (!dpcm) + return -ENOMEM; + substream->runtime->private_data = dpcm; +- hrtimer_init(&dpcm->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ hrtimer_init(&dpcm->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); + dpcm->timer.function = dummy_hrtimer_callback; + dpcm->substream = substream; + atomic_set(&dpcm->running, 0); +- tasklet_init(&dpcm->tasklet, dummy_hrtimer_pcm_elapsed, +- (unsigned long)dpcm); + return 0; + } + diff --git a/debian/patches/features/all/rt/0025-tracing-Allow-whitespace-to-surround-hist-trigger-fi.patch b/debian/patches/features/all/rt/0034-tracing-Allow-whitespace-to-surround-hist-trigger-fi.patch similarity index 53% rename from debian/patches/features/all/rt/0025-tracing-Allow-whitespace-to-surround-hist-trigger-fi.patch rename to debian/patches/features/all/rt/0034-tracing-Allow-whitespace-to-surround-hist-trigger-fi.patch index 9bbb5bb86..3c3236684 100644 --- a/debian/patches/features/all/rt/0025-tracing-Allow-whitespace-to-surround-hist-trigger-fi.patch +++ b/debian/patches/features/all/rt/0034-tracing-Allow-whitespace-to-surround-hist-trigger-fi.patch @@ -1,37 +1,45 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:26 -0500 -Subject: [PATCH 25/32] tracing: Allow whitespace to surround hist trigger +Date: Fri, 22 Sep 2017 15:00:05 -0500 +Subject: [PATCH 34/42] tracing: Allow whitespace to surround hist trigger filter -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The existing code only allows for one space before and after the 'if' specifying the filter for a hist trigger. Add code to make that more -permissive as far as whitespace goes. +permissive as far as whitespace goes. Specifically, we want to allow +spaces in the trigger itself now that we have additional syntax +(onmatch/onmax) where spaces are more natural e.g. spaces after commas +in param lists. Signed-off-by: Tom Zanussi Signed-off-by: Sebastian Andrzej Siewior --- - kernel/trace/trace_events_hist.c | 19 +++++++++++++++---- - 1 file changed, 15 insertions(+), 4 deletions(-) + kernel/trace/trace_events_hist.c | 24 +++++++++++++++++++----- + 1 file changed, 19 insertions(+), 5 deletions(-) --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c -@@ -4632,7 +4632,7 @@ static int event_hist_trigger_func(struc - struct event_trigger_ops *trigger_ops; - struct hist_trigger_data *hist_data; +@@ -4857,7 +4857,7 @@ static int event_hist_trigger_func(struc + struct synth_event *se; + const char *se_name; bool remove = false; - char *trigger; + char *trigger, *p; int ret = 0; if (!param) -@@ -4642,9 +4642,19 @@ static int event_hist_trigger_func(struc +@@ -4866,10 +4866,23 @@ static int event_hist_trigger_func(struc + if (glob[0] == '!') remove = true; - /* separate the trigger from the filter (k:v [if filter]) */ +- /* separate the trigger from the filter (k:v [if filter]) */ - trigger = strsep(¶m, " \t"); - if (!trigger) - return -EINVAL; ++ /* ++ * separate the trigger from the filter (k:v [if filter]) ++ * allowing for whitespace in the trigger ++ */ + trigger = param; + p = strstr(param, " if"); + if (!p) @@ -48,7 +56,7 @@ Signed-off-by: Sebastian Andrzej Siewior attrs = parse_hist_trigger_attrs(trigger); if (IS_ERR(attrs)) -@@ -4694,6 +4704,7 @@ static int event_hist_trigger_func(struc +@@ -4927,6 +4940,7 @@ static int event_hist_trigger_func(struc } ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file); diff --git a/debian/patches/features/all/rt/0027-tracing-Add-cpu-field-for-hist-triggers.patch b/debian/patches/features/all/rt/0035-tracing-Add-cpu-field-for-hist-triggers.patch similarity index 62% rename from debian/patches/features/all/rt/0027-tracing-Add-cpu-field-for-hist-triggers.patch rename to debian/patches/features/all/rt/0035-tracing-Add-cpu-field-for-hist-triggers.patch index 645553f4c..1a0269f68 100644 --- a/debian/patches/features/all/rt/0027-tracing-Add-cpu-field-for-hist-triggers.patch +++ b/debian/patches/features/all/rt/0035-tracing-Add-cpu-field-for-hist-triggers.patch @@ -1,7 +1,7 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:28 -0500 -Subject: [PATCH 27/32] tracing: Add cpu field for hist triggers -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Date: Fri, 22 Sep 2017 15:00:06 -0500 +Subject: [PATCH 35/42] tracing: Add cpu field for hist triggers +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz A common key to use in a histogram is the cpuid - add a new cpu 'synthetic' field for that purpose. This field is named cpu rather @@ -13,8 +13,8 @@ Signed-off-by: Tom Zanussi Signed-off-by: Sebastian Andrzej Siewior --- Documentation/trace/events.txt | 18 ++++++++++++++++++ - kernel/trace/trace_events_hist.c | 30 +++++++++++++++++++++++++++--- - 2 files changed, 45 insertions(+), 3 deletions(-) + kernel/trace/trace_events_hist.c | 28 +++++++++++++++++++++++++++- + 2 files changed, 45 insertions(+), 1 deletion(-) --- a/Documentation/trace/events.txt +++ b/Documentation/trace/events.txt @@ -22,15 +22,15 @@ Signed-off-by: Sebastian Andrzej Siewior The examples below provide a more concrete illustration of the concepts and typical usage patterns discussed above. -+ 'synthetic' event fields ++ 'special' event fields + ------------------------ + -+ There are a number of 'synthetic fields' available for use as keys -+ or values in a hist trigger. These look like and behave as if they -+ were event fields, but aren't actually part of the event's field -+ definition or format file. They are however available for any ++ There are a number of 'special event fields' available for use as ++ keys or values in a hist trigger. These look like and behave as if ++ they were actual event fields, but aren't really part of the event's ++ field definition or format file. They are however available for any + event, and can be used anywhere an actual event field could be. -+ 'Synthetic' field names are always prefixed with a '$' character to ++ 'Special' field names are always prefixed with a '$' character to + indicate that they're not normal fields (with the exception of + 'cpu', for compatibility with existing filter usage): + @@ -45,15 +45,15 @@ Signed-off-by: Sebastian Andrzej Siewior --------------------------- --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c -@@ -224,6 +224,7 @@ enum hist_field_flags { - HIST_FIELD_FL_VAR_ONLY = 8192, - HIST_FIELD_FL_EXPR = 16384, - HIST_FIELD_FL_VAR_REF = 32768, -+ HIST_FIELD_FL_CPU = 65536, +@@ -226,6 +226,7 @@ enum hist_field_flags { + HIST_FIELD_FL_VAR_ONLY = 1 << 13, + HIST_FIELD_FL_EXPR = 1 << 14, + HIST_FIELD_FL_VAR_REF = 1 << 15, ++ HIST_FIELD_FL_CPU = 1 << 16, }; - struct hist_trigger_attrs { -@@ -1081,6 +1082,16 @@ static u64 hist_field_timestamp(struct h + struct var_defs { +@@ -1172,6 +1173,16 @@ static u64 hist_field_timestamp(struct h return ts; } @@ -62,7 +62,7 @@ Signed-off-by: Sebastian Andrzej Siewior + struct ring_buffer_event *rbe, + void *event) +{ -+ int cpu = raw_smp_processor_id(); ++ int cpu = smp_processor_id(); + + return cpu; +} @@ -70,7 +70,7 @@ Signed-off-by: Sebastian Andrzej Siewior static struct hist_field *check_var_ref(struct hist_field *hist_field, struct hist_trigger_data *var_data, unsigned int var_idx) -@@ -1407,6 +1418,8 @@ static const char *hist_field_name(struc +@@ -1520,6 +1531,8 @@ static const char *hist_field_name(struc field_name = hist_field_name(field->operands[0], ++level); else if (field->flags & HIST_FIELD_FL_TIMESTAMP) field_name = "$common_timestamp"; @@ -79,7 +79,7 @@ Signed-off-by: Sebastian Andrzej Siewior else if (field->flags & HIST_FIELD_FL_EXPR || field->flags & HIST_FIELD_FL_VAR_REF) field_name = field->name; -@@ -1848,6 +1861,15 @@ static struct hist_field *create_hist_fi +@@ -2012,6 +2025,15 @@ static struct hist_field *create_hist_fi goto out; } @@ -95,7 +95,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (WARN_ON_ONCE(!field)) goto out; -@@ -1980,7 +2002,9 @@ parse_field(struct hist_trigger_data *hi +@@ -2204,7 +2226,9 @@ parse_field(struct hist_trigger_data *hi hist_data->enable_timestamps = true; if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS) hist_data->attrs->ts_in_usecs = true; @@ -104,25 +104,9 @@ Signed-off-by: Sebastian Andrzej Siewior + *flags |= HIST_FIELD_FL_CPU; + else { field = trace_find_event_field(file->event_call, field_name); - if (!field) - return ERR_PTR(-EINVAL); -@@ -3019,7 +3043,6 @@ static int onmatch_create(struct hist_tr - goto out; - } - } -- - if (param[0] == '$') - hist_field = onmatch_find_var(hist_data, data, system, - event_name, param); -@@ -3034,7 +3057,6 @@ static int onmatch_create(struct hist_tr - ret = -EINVAL; - goto out; - } -- - if (check_synth_field(event, hist_field, field_pos) == 0) { - var_ref = create_var_ref(hist_field); - if (!var_ref) { -@@ -4128,6 +4150,8 @@ static void hist_field_print(struct seq_ + if (!field || !field->size) { + field = ERR_PTR(-EINVAL); +@@ -4353,6 +4377,8 @@ static void hist_field_print(struct seq_ if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP) seq_puts(m, "$common_timestamp"); diff --git a/debian/patches/features/all/rt/0035-usb-gadget-NCM-Replace-tasklet-with-softirq-hrtimer.patch b/debian/patches/features/all/rt/0035-usb-gadget-NCM-Replace-tasklet-with-softirq-hrtimer.patch new file mode 100644 index 000000000..270f81788 --- /dev/null +++ b/debian/patches/features/all/rt/0035-usb-gadget-NCM-Replace-tasklet-with-softirq-hrtimer.patch @@ -0,0 +1,97 @@ +From: Thomas Gleixner +Date: Sun, 22 Oct 2017 23:40:14 +0200 +Subject: [PATCH 35/36] usb/gadget/NCM: Replace tasklet with softirq hrtimer +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The tx_tasklet tasklet is used in invoke the hrtimer (task_timer) in +softirq context. This can be also achieved without the tasklet but +with HRTIMER_MODE_SOFT as hrtimer mode. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Anna-Maria Gleixner +Cc: Felipe Balbi +Cc: linux-usb@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/usb/gadget/function/f_ncm.c | 30 +++++++----------------------- + 1 file changed, 7 insertions(+), 23 deletions(-) + +--- a/drivers/usb/gadget/function/f_ncm.c ++++ b/drivers/usb/gadget/function/f_ncm.c +@@ -77,9 +77,7 @@ struct f_ncm { + struct sk_buff *skb_tx_ndp; + u16 ndp_dgram_count; + bool timer_force_tx; +- struct tasklet_struct tx_tasklet; + struct hrtimer task_timer; +- + bool timer_stopping; + }; + +@@ -1108,7 +1106,7 @@ static struct sk_buff *ncm_wrap_ntb(stru + + /* Delay the timer. */ + hrtimer_start(&ncm->task_timer, TX_TIMEOUT_NSECS, +- HRTIMER_MODE_REL); ++ HRTIMER_MODE_REL_SOFT); + + /* Add the datagram position entries */ + ntb_ndp = skb_put_zero(ncm->skb_tx_ndp, dgram_idx_len); +@@ -1152,17 +1150,15 @@ static struct sk_buff *ncm_wrap_ntb(stru + } + + /* +- * This transmits the NTB if there are frames waiting. ++ * The transmit should only be run if no skb data has been sent ++ * for a certain duration. + */ +-static void ncm_tx_tasklet(unsigned long data) ++static enum hrtimer_restart ncm_tx_timeout(struct hrtimer *data) + { +- struct f_ncm *ncm = (void *)data; +- +- if (ncm->timer_stopping) +- return; ++ struct f_ncm *ncm = container_of(data, struct f_ncm, task_timer); + + /* Only send if data is available. */ +- if (ncm->skb_tx_data) { ++ if (!ncm->timer_stopping && ncm->skb_tx_data) { + ncm->timer_force_tx = true; + + /* XXX This allowance of a NULL skb argument to ndo_start_xmit +@@ -1175,16 +1171,6 @@ static void ncm_tx_tasklet(unsigned long + + ncm->timer_force_tx = false; + } +-} +- +-/* +- * The transmit should only be run if no skb data has been sent +- * for a certain duration. +- */ +-static enum hrtimer_restart ncm_tx_timeout(struct hrtimer *data) +-{ +- struct f_ncm *ncm = container_of(data, struct f_ncm, task_timer); +- tasklet_schedule(&ncm->tx_tasklet); + return HRTIMER_NORESTART; + } + +@@ -1517,8 +1503,7 @@ static int ncm_bind(struct usb_configura + ncm->port.open = ncm_open; + ncm->port.close = ncm_close; + +- tasklet_init(&ncm->tx_tasklet, ncm_tx_tasklet, (unsigned long) ncm); +- hrtimer_init(&ncm->task_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ hrtimer_init(&ncm->task_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); + ncm->task_timer.function = ncm_tx_timeout; + + DBG(cdev, "CDC Network: %s speed IN/%s OUT/%s NOTIFY/%s\n", +@@ -1627,7 +1612,6 @@ static void ncm_unbind(struct usb_config + DBG(c->cdev, "ncm unbind\n"); + + hrtimer_cancel(&ncm->task_timer); +- tasklet_kill(&ncm->tx_tasklet); + + ncm_string_defs[0].id = 0; + usb_free_all_descriptors(f); diff --git a/debian/patches/features/all/rt/0036-net-mvpp2-Replace-tasklet-with-softirq-hrtimer.patch b/debian/patches/features/all/rt/0036-net-mvpp2-Replace-tasklet-with-softirq-hrtimer.patch new file mode 100644 index 000000000..1078560ab --- /dev/null +++ b/debian/patches/features/all/rt/0036-net-mvpp2-Replace-tasklet-with-softirq-hrtimer.patch @@ -0,0 +1,133 @@ +From: Thomas Gleixner +Date: Sun, 22 Oct 2017 23:40:15 +0200 +Subject: [PATCH 36/36] net/mvpp2: Replace tasklet with softirq hrtimer +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The tx_done_tasklet tasklet is used in invoke the hrtimer +(mvpp2_hr_timer_cb) in softirq context. This can be also achieved without +the tasklet but with HRTIMER_MODE_SOFT as hrtimer mode. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Anna-Maria Gleixner +Cc: "David S. Miller" +Cc: Thomas Petazzoni +Cc: netdev@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/net/ethernet/marvell/mvpp2.c | 62 ++++++++++++++--------------------- + 1 file changed, 25 insertions(+), 37 deletions(-) + +--- a/drivers/net/ethernet/marvell/mvpp2.c ++++ b/drivers/net/ethernet/marvell/mvpp2.c +@@ -830,9 +830,8 @@ struct mvpp2_pcpu_stats { + /* Per-CPU port control */ + struct mvpp2_port_pcpu { + struct hrtimer tx_done_timer; ++ struct net_device *dev; + bool timer_scheduled; +- /* Tasklet for egress finalization */ +- struct tasklet_struct tx_done_tasklet; + }; + + struct mvpp2_queue_vector { +@@ -5979,46 +5978,34 @@ static void mvpp2_link_event(struct net_ + } + } + +-static void mvpp2_timer_set(struct mvpp2_port_pcpu *port_pcpu) +-{ +- ktime_t interval; +- +- if (!port_pcpu->timer_scheduled) { +- port_pcpu->timer_scheduled = true; +- interval = MVPP2_TXDONE_HRTIMER_PERIOD_NS; +- hrtimer_start(&port_pcpu->tx_done_timer, interval, +- HRTIMER_MODE_REL_PINNED); +- } +-} +- +-static void mvpp2_tx_proc_cb(unsigned long data) ++static enum hrtimer_restart mvpp2_hr_timer_cb(struct hrtimer *timer) + { +- struct net_device *dev = (struct net_device *)data; +- struct mvpp2_port *port = netdev_priv(dev); +- struct mvpp2_port_pcpu *port_pcpu = this_cpu_ptr(port->pcpu); ++ struct net_device *dev; ++ struct mvpp2_port *port; ++ struct mvpp2_port_pcpu *port_pcpu; + unsigned int tx_todo, cause; + ++ port_pcpu = container_of(timer, struct mvpp2_port_pcpu, tx_done_timer); ++ dev = port_pcpu->dev; ++ + if (!netif_running(dev)) +- return; ++ return HRTIMER_NORESTART; ++ + port_pcpu->timer_scheduled = false; ++ port = netdev_priv(dev); + + /* Process all the Tx queues */ + cause = (1 << port->ntxqs) - 1; + tx_todo = mvpp2_tx_done(port, cause, smp_processor_id()); + + /* Set the timer in case not all the packets were processed */ +- if (tx_todo) +- mvpp2_timer_set(port_pcpu); +-} +- +-static enum hrtimer_restart mvpp2_hr_timer_cb(struct hrtimer *timer) +-{ +- struct mvpp2_port_pcpu *port_pcpu = container_of(timer, +- struct mvpp2_port_pcpu, +- tx_done_timer); +- +- tasklet_schedule(&port_pcpu->tx_done_tasklet); ++ if (tx_todo && !port_pcpu->timer_scheduled) { ++ port_pcpu->timer_scheduled = true; ++ hrtimer_forward_now(&port_pcpu->tx_done_timer, ++ MVPP2_TXDONE_HRTIMER_PERIOD_NS); + ++ return HRTIMER_RESTART; ++ } + return HRTIMER_NORESTART; + } + +@@ -6507,7 +6494,12 @@ static int mvpp2_tx(struct sk_buff *skb, + txq_pcpu->count > 0) { + struct mvpp2_port_pcpu *port_pcpu = this_cpu_ptr(port->pcpu); + +- mvpp2_timer_set(port_pcpu); ++ if (!port_pcpu->timer_scheduled) { ++ port_pcpu->timer_scheduled = true; ++ hrtimer_start(&port_pcpu->tx_done_timer, ++ MVPP2_TXDONE_HRTIMER_PERIOD_NS, ++ HRTIMER_MODE_REL_PINNED_SOFT); ++ } + } + + return NETDEV_TX_OK; +@@ -6896,7 +6888,6 @@ static int mvpp2_stop(struct net_device + + hrtimer_cancel(&port_pcpu->tx_done_timer); + port_pcpu->timer_scheduled = false; +- tasklet_kill(&port_pcpu->tx_done_tasklet); + } + } + mvpp2_cleanup_rxqs(port); +@@ -7664,13 +7655,10 @@ static int mvpp2_port_probe(struct platf + port_pcpu = per_cpu_ptr(port->pcpu, cpu); + + hrtimer_init(&port_pcpu->tx_done_timer, CLOCK_MONOTONIC, +- HRTIMER_MODE_REL_PINNED); ++ HRTIMER_MODE_REL_PINNED_SOFT); + port_pcpu->tx_done_timer.function = mvpp2_hr_timer_cb; + port_pcpu->timer_scheduled = false; +- +- tasklet_init(&port_pcpu->tx_done_tasklet, +- mvpp2_tx_proc_cb, +- (unsigned long)dev); ++ port_pcpu->dev = dev; + } + } + diff --git a/debian/patches/features/all/rt/0036-tracing-Add-hist-trigger-support-for-variable-refere.patch b/debian/patches/features/all/rt/0036-tracing-Add-hist-trigger-support-for-variable-refere.patch new file mode 100644 index 000000000..19b50416a --- /dev/null +++ b/debian/patches/features/all/rt/0036-tracing-Add-hist-trigger-support-for-variable-refere.patch @@ -0,0 +1,142 @@ +From: Tom Zanussi +Date: Fri, 22 Sep 2017 15:00:07 -0500 +Subject: [PATCH 36/42] tracing: Add hist trigger support for variable + reference aliases +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Add support for alias=$somevar where alias can be used as +onmatch($alias). + +Aliases are a way of creating a new name for an existing variable, for +flexibly in making naming more clear in certain cases. For example in +the below the user perhaps feels that using $new_lat in the synthetic +event invocation is opaque or doesn't fit well stylistically with +previous triggers, so creates an alias of $new_lat named $latency and +uses that in the call instead: + + # echo 'hist:keys=next_pid:new_lat=$common_timestamp.usecs' > + /sys/kernel/debug/tracing/events/sched/sched_switch/trigger + + # echo 'hist:keys=pid:latency=$new_lat: + onmatch(sched.sched_switch).wake2($latency,pid)' > + /sys/kernel/debug/tracing/events/synthetic/wake1/trigger + +Signed-off-by: Tom Zanussi +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 61 +++++++++++++++++++++++++++++++++++++-- + 1 file changed, 58 insertions(+), 3 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -227,6 +227,7 @@ enum hist_field_flags { + HIST_FIELD_FL_EXPR = 1 << 14, + HIST_FIELD_FL_VAR_REF = 1 << 15, + HIST_FIELD_FL_CPU = 1 << 16, ++ HIST_FIELD_FL_ALIAS = 1 << 17, + }; + + struct var_defs { +@@ -1527,7 +1528,8 @@ static const char *hist_field_name(struc + + if (field->field) + field_name = field->field->name; +- else if (field->flags & HIST_FIELD_FL_LOG2) ++ else if (field->flags & HIST_FIELD_FL_LOG2 || ++ field->flags & HIST_FIELD_FL_ALIAS) + field_name = hist_field_name(field->operands[0], ++level); + else if (field->flags & HIST_FIELD_FL_TIMESTAMP) + field_name = "$common_timestamp"; +@@ -1983,7 +1985,7 @@ static struct hist_field *create_hist_fi + + hist_field->hist_data = hist_data; + +- if (flags & HIST_FIELD_FL_EXPR) ++ if (flags & HIST_FIELD_FL_EXPR || flags & HIST_FIELD_FL_ALIAS) + goto out; /* caller will populate */ + + if (flags & HIST_FIELD_FL_VAR_REF) { +@@ -2241,6 +2243,29 @@ parse_field(struct hist_trigger_data *hi + return field; + } + ++static struct hist_field *create_alias(struct hist_trigger_data *hist_data, ++ struct hist_field *var_ref, ++ char *var_name) ++{ ++ struct hist_field *alias = NULL; ++ unsigned long flags = HIST_FIELD_FL_ALIAS | HIST_FIELD_FL_VAR | ++ HIST_FIELD_FL_VAR_ONLY; ++ ++ alias = create_hist_field(hist_data, NULL, flags, var_name); ++ if (!alias) ++ return NULL; ++ ++ alias->fn = var_ref->fn; ++ alias->operands[0] = var_ref; ++ ++ if (init_var_ref(alias, var_ref)) { ++ destroy_hist_field(alias, 0); ++ return NULL; ++ } ++ ++ return alias; ++} ++ + struct hist_field *parse_atom(struct hist_trigger_data *hist_data, + struct trace_event_file *file, char *str, + unsigned long *flags, char *var_name) +@@ -2275,6 +2300,13 @@ struct hist_field *parse_atom(struct his + if (hist_field) { + hist_data->var_refs[hist_data->n_var_refs] = hist_field; + hist_field->var_ref_idx = hist_data->n_var_refs++; ++ if (var_name) { ++ hist_field = create_alias(hist_data, hist_field, var_name); ++ if (!hist_field) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ } + return hist_field; + } + } else +@@ -2378,6 +2410,26 @@ static int check_expr_operands(struct hi + unsigned long operand1_flags = operand1->flags; + unsigned long operand2_flags = operand2->flags; + ++ if ((operand1_flags & HIST_FIELD_FL_VAR_REF) || ++ (operand1_flags & HIST_FIELD_FL_ALIAS)) { ++ struct hist_field *var; ++ ++ var = find_var_field(operand1->var.hist_data, operand1->name); ++ if (!var) ++ return -EINVAL; ++ operand1_flags = var->flags; ++ } ++ ++ if ((operand2_flags & HIST_FIELD_FL_VAR_REF) || ++ (operand2_flags & HIST_FIELD_FL_ALIAS)) { ++ struct hist_field *var; ++ ++ var = find_var_field(operand2->var.hist_data, operand2->name); ++ if (!var) ++ return -EINVAL; ++ operand2_flags = var->flags; ++ } ++ + if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) != + (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS)) + return -EINVAL; +@@ -4379,8 +4431,11 @@ static void hist_field_print(struct seq_ + seq_puts(m, "$common_timestamp"); + else if (hist_field->flags & HIST_FIELD_FL_CPU) + seq_puts(m, "cpu"); +- else if (field_name) ++ else if (field_name) { ++ if (hist_field->flags & HIST_FIELD_FL_ALIAS) ++ seq_putc(m, '$'); + seq_printf(m, "%s", field_name); ++ } + + if (hist_field->flags) { + const char *flags_str = get_hist_field_flags(hist_field); diff --git a/debian/patches/features/all/rt/0029-tracing-Add-last-error-error-facility-for-hist-trigg.patch b/debian/patches/features/all/rt/0037-tracing-Add-last-error-error-facility-for-hist-trigg.patch similarity index 65% rename from debian/patches/features/all/rt/0029-tracing-Add-last-error-error-facility-for-hist-trigg.patch rename to debian/patches/features/all/rt/0037-tracing-Add-last-error-error-facility-for-hist-trigg.patch index 5338cc2f2..bff03fa9e 100644 --- a/debian/patches/features/all/rt/0029-tracing-Add-last-error-error-facility-for-hist-trigg.patch +++ b/debian/patches/features/all/rt/0037-tracing-Add-last-error-error-facility-for-hist-trigg.patch @@ -1,8 +1,8 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:30 -0500 -Subject: [PATCH 29/32] tracing: Add 'last error' error facility for hist +Date: Fri, 22 Sep 2017 15:00:08 -0500 +Subject: [PATCH 37/42] tracing: Add 'last error' error facility for hist triggers -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz With the addition of variables and actions, it's become necessary to provide more detailed error information to users about syntax errors. @@ -27,8 +27,8 @@ Signed-off-by: Tom Zanussi Signed-off-by: Sebastian Andrzej Siewior --- Documentation/trace/events.txt | 19 ++++ - kernel/trace/trace_events_hist.c | 181 ++++++++++++++++++++++++++++++++++++--- - 2 files changed, 188 insertions(+), 12 deletions(-) + kernel/trace/trace_events_hist.c | 162 +++++++++++++++++++++++++++++++++++---- + 2 files changed, 168 insertions(+), 13 deletions(-) --- a/Documentation/trace/events.txt +++ b/Documentation/trace/events.txt @@ -60,53 +60,30 @@ Signed-off-by: Sebastian Andrzej Siewior --------------------------- --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c -@@ -288,6 +288,7 @@ struct hist_trigger_data { - struct field_var *max_vars[SYNTH_FIELDS_MAX]; - unsigned int n_max_vars; - unsigned int n_max_var_str; -+ char *last_err; - }; - - struct synth_field { -@@ -332,6 +333,83 @@ struct action_data { - struct hist_field *onmax_var; +@@ -351,6 +351,65 @@ struct action_data { + }; }; + -+static char *hist_err_str; -+static char *last_hist_cmd; -+ -+static int hist_err_alloc(void) -+{ -+ int ret = 0; -+ -+ last_hist_cmd = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL); -+ hist_err_str = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL); -+ if (!last_hist_cmd || !hist_err_str) -+ ret = -ENOMEM; -+ -+ return ret; -+} ++static char last_hist_cmd[MAX_FILTER_STR_VAL]; ++static char hist_err_str[MAX_FILTER_STR_VAL]; + +static void last_cmd_set(char *str) +{ -+ if (!last_hist_cmd || !str) ++ if (!str) + return; + -+ if (strlen(last_hist_cmd) > MAX_FILTER_STR_VAL - 1) -+ return; -+ -+ strcpy(last_hist_cmd, str); ++ strncpy(last_hist_cmd, str, MAX_FILTER_STR_VAL - 1); +} + +static void hist_err(char *str, char *var) +{ + int maxlen = MAX_FILTER_STR_VAL - 1; + -+ if (strlen(hist_err_str)) ++ if (!str) + return; + -+ if (!hist_err_str || !str) ++ if (strlen(hist_err_str)) + return; + + if (!var) @@ -124,26 +101,23 @@ Signed-off-by: Sebastian Andrzej Siewior + char err[MAX_FILTER_STR_VAL]; + + if (system && var) -+ sprintf(err, "%s.%s.%s", system, event, var); ++ snprintf(err, MAX_FILTER_STR_VAL, "%s.%s.%s", system, event, var); + else if (system) -+ sprintf(err, "%s.%s", system, event); ++ snprintf(err, MAX_FILTER_STR_VAL, "%s.%s", system, event); + else -+ strcpy(err, var); ++ strncpy(err, var, MAX_FILTER_STR_VAL); + + hist_err(str, err); +} + +static void hist_err_clear(void) +{ -+ if (!hist_err_str) -+ return; -+ + hist_err_str[0] = '\0'; +} + +static bool have_hist_err(void) +{ -+ if (hist_err_str && strlen(hist_err_str)) ++ if (strlen(hist_err_str)) + return true; + + return false; @@ -152,7 +126,7 @@ Signed-off-by: Sebastian Andrzej Siewior static LIST_HEAD(synth_event_list); static DEFINE_MUTEX(synth_event_mutex); -@@ -1954,12 +2032,21 @@ static struct hist_field *create_var_ref +@@ -2132,9 +2191,18 @@ static struct hist_field *create_var_ref return ref_field; } @@ -164,18 +138,15 @@ Signed-off-by: Sebastian Andrzej Siewior + return false; +} + - static struct hist_field *parse_var_ref(char *system, char *event_name, - char *var_name) + static bool is_var_ref(char *var_name) { - struct hist_field *var_field = NULL, *ref_field = NULL; - - if (!var_name || strlen(var_name) < 2 || var_name[0] != '$') + if (!var_name || strlen(var_name) < 2 || var_name[0] != '$' || + is_common_field(var_name)) - return NULL; + return false; - var_name++; -@@ -1968,6 +2055,10 @@ static struct hist_field *parse_var_ref( + return true; +@@ -2186,6 +2254,10 @@ static struct hist_field *parse_var_ref( if (var_field) ref_field = create_var_ref(var_field); @@ -186,7 +157,19 @@ Signed-off-by: Sebastian Andrzej Siewior return ref_field; } -@@ -2426,8 +2517,11 @@ create_field_var_hist(struct hist_trigge +@@ -2431,8 +2503,10 @@ static int check_expr_operands(struct hi + } + + if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) != +- (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS)) ++ (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS)) { ++ hist_err("Timestamp units in expression don't match", NULL); + return -EINVAL; ++ } + + return 0; + } +@@ -2632,19 +2706,27 @@ create_field_var_hist(struct hist_trigge char *cmd; int ret; @@ -197,11 +180,8 @@ Signed-off-by: Sebastian Andrzej Siewior return ERR_PTR(-EINVAL); + } - tr = top_trace_array(); - if (!tr) -@@ -2435,13 +2529,18 @@ create_field_var_hist(struct hist_trigge + file = event_file(tr, system, event_name); - file = event_file(system, event_name); if (IS_ERR(file)) { + hist_err_event("onmatch: Event file not found: ", + system, event_name, field_name); @@ -219,7 +199,7 @@ Signed-off-by: Sebastian Andrzej Siewior var_hist = kzalloc(sizeof(*var_hist), GFP_KERNEL); if (!var_hist) -@@ -2489,6 +2588,8 @@ create_field_var_hist(struct hist_trigge +@@ -2692,6 +2774,8 @@ create_field_var_hist(struct hist_trigge kfree(cmd); kfree(var_hist->cmd); kfree(var_hist); @@ -228,7 +208,7 @@ Signed-off-by: Sebastian Andrzej Siewior return ERR_PTR(ret); } -@@ -2500,6 +2601,8 @@ create_field_var_hist(struct hist_trigge +@@ -2703,6 +2787,8 @@ create_field_var_hist(struct hist_trigge kfree(cmd); kfree(var_hist->cmd); kfree(var_hist); @@ -237,7 +217,7 @@ Signed-off-by: Sebastian Andrzej Siewior return ERR_PTR(-EINVAL); } -@@ -2636,18 +2739,21 @@ static struct field_var *create_field_va +@@ -2839,18 +2925,21 @@ static struct field_var *create_field_va int ret = 0; if (hist_data->n_field_vars >= SYNTH_FIELDS_MAX) { @@ -259,10 +239,10 @@ Signed-off-by: Sebastian Andrzej Siewior kfree(val); ret = PTR_ERR(var); goto err; -@@ -2772,14 +2878,18 @@ static int onmax_create(struct hist_trig +@@ -2975,14 +3064,18 @@ static int onmax_create(struct hist_trig int ret = 0; - onmax_var_str = data->onmax_var_str; + onmax_var_str = data->onmax.var_str; - if (onmax_var_str[0] != '$') + if (onmax_var_str[0] != '$') { + hist_err("onmax: For onmax(x), x must be a variable: ", onmax_var_str); @@ -280,15 +260,15 @@ Signed-off-by: Sebastian Andrzej Siewior flags = HIST_FIELD_FL_VAR_REF; ref_field = create_hist_field(hist_data, NULL, flags, NULL); -@@ -2803,6 +2913,7 @@ static int onmax_create(struct hist_trig - data->max_var_ref_idx = var_ref_idx; +@@ -3002,6 +3095,7 @@ static int onmax_create(struct hist_trig + data->onmax.max_var_ref_idx = var_ref_idx; max_var = create_var(hist_data, file, "max", sizeof(u64), "u64"); if (IS_ERR(max_var)) { + hist_err("onmax: Couldn't create onmax variable: ", "max"); ret = PTR_ERR(max_var); goto out; } -@@ -2815,6 +2926,7 @@ static int onmax_create(struct hist_trig +@@ -3016,6 +3110,7 @@ static int onmax_create(struct hist_trig field_var = create_target_field_var(hist_data, NULL, NULL, param); if (IS_ERR(field_var)) { @@ -296,7 +276,7 @@ Signed-off-by: Sebastian Andrzej Siewior ret = PTR_ERR(field_var); kfree(param); goto out; -@@ -2847,6 +2959,7 @@ static int parse_action_params(char *par +@@ -3050,6 +3145,7 @@ static int parse_action_params(char *par param = strstrip(param); if (strlen(param) < 2) { @@ -304,8 +284,8 @@ Signed-off-by: Sebastian Andrzej Siewior ret = -EINVAL; goto out; } -@@ -3004,6 +3117,9 @@ onmatch_find_var(struct hist_trigger_dat - hist_field = find_event_var(system, event, var); +@@ -3223,6 +3319,9 @@ onmatch_find_var(struct hist_trigger_dat + hist_field = find_event_var(tr, system, event, var); } + if (!hist_field) @@ -314,23 +294,15 @@ Signed-off-by: Sebastian Andrzej Siewior return hist_field; } -@@ -3055,6 +3171,7 @@ static int onmatch_create(struct hist_tr - - event = find_synth_event(data->synth_event_name); +@@ -3273,6 +3372,7 @@ static int onmatch_create(struct hist_tr + mutex_lock(&synth_event_mutex); + event = find_synth_event(data->onmatch.synth_event_name); if (!event) { -+ hist_err("onmatch: Couldn't find synthetic event: ", data->synth_event_name); - ret = -EINVAL; - goto out; ++ hist_err("onmatch: Couldn't find synthetic event: ", data->onmatch.synth_event_name); + mutex_unlock(&synth_event_mutex); + return -EINVAL; } -@@ -3094,6 +3211,7 @@ static int onmatch_create(struct hist_tr - ret = -EINVAL; - goto out; - } -+ - if (check_synth_field(event, hist_field, field_pos) == 0) { - var_ref = create_var_ref(hist_field); - if (!var_ref) { -@@ -3108,12 +3226,15 @@ static int onmatch_create(struct hist_tr +@@ -3331,12 +3431,15 @@ static int onmatch_create(struct hist_tr continue; } @@ -346,7 +318,7 @@ Signed-off-by: Sebastian Andrzej Siewior ret = -EINVAL; goto out; } -@@ -3141,31 +3262,44 @@ static struct action_data *onmatch_parse +@@ -3362,15 +3465,22 @@ static struct action_data *onmatch_parse return ERR_PTR(-ENOMEM); match_event = strsep(&str, ")"); @@ -363,15 +335,17 @@ Signed-off-by: Sebastian Andrzej Siewior goto free; + } -- if (IS_ERR(event_file(match_event_system, match_event))) -+ if (IS_ERR(event_file(match_event_system, match_event))) { +- if (IS_ERR(event_file(tr, match_event_system, match_event))) ++ if (IS_ERR(event_file(tr, match_event_system, match_event))) { + hist_err_event("onmatch: Invalid subsystem or event name: ", + match_event_system, match_event, NULL); goto free; + } - data->match_event = kstrdup(match_event, GFP_KERNEL); - data->match_event_system = kstrdup(match_event_system, GFP_KERNEL); + data->onmatch.match_event = kstrdup(match_event, GFP_KERNEL); + if (!data->onmatch.match_event) { +@@ -3385,12 +3495,16 @@ static struct action_data *onmatch_parse + } strsep(&str, "."); - if (!str) @@ -386,7 +360,11 @@ Signed-off-by: Sebastian Andrzej Siewior + hist_err("onmatch: Missing opening paramlist paren: ", synth_event_name); goto free; + } - data->synth_event_name = kstrdup(synth_event_name, GFP_KERNEL); + + data->onmatch.synth_event_name = kstrdup(synth_event_name, GFP_KERNEL); + if (!data->onmatch.synth_event_name) { +@@ -3399,8 +3513,10 @@ static struct action_data *onmatch_parse + } params = strsep(&str, ")"); - if (!params || !str || (str && strlen(str))) @@ -397,39 +375,30 @@ Signed-off-by: Sebastian Andrzej Siewior ret = parse_action_params(params, data); if (ret) -@@ -3217,6 +3351,7 @@ static int create_val_field(struct hist_ - if (field_str && var_name) { - if (find_var(file, var_name) && - !hist_data->remove) { -+ hist_err("Variable already defined: ", var_name); - ret = -EINVAL; - goto out; - } -@@ -3224,6 +3359,7 @@ static int create_val_field(struct hist_ - flags |= HIST_FIELD_FL_VAR; - hist_data->n_vars++; - if (hist_data->n_vars > TRACING_MAP_VARS_MAX) { -+ hist_err("Too many variables defined: ", var_name); - ret = -EINVAL; - goto out; - } -@@ -3234,6 +3370,7 @@ static int create_val_field(struct hist_ - field_str = var_name; - var_name = NULL; - } else { -+ hist_err("Malformed assignment: ", var_name); - ret = -EINVAL; - goto out; +@@ -3480,12 +3596,14 @@ static int create_var_field(struct hist_ + return -EINVAL; + + if (find_var(file, var_name) && !hist_data->remove) { ++ hist_err("Variable already defined: ", var_name); + return -EINVAL; } -@@ -3248,6 +3385,7 @@ static int create_val_field(struct hist_ - hist_field = parse_atom(hist_data, file, field_str, - &flags, var_name); - if (IS_ERR(hist_field)) { -+ hist_err("Unable to parse atom: ", field_str); - ret = PTR_ERR(hist_field); - goto out; - } -@@ -4138,6 +4276,11 @@ static int hist_show(struct seq_file *m, + + flags |= HIST_FIELD_FL_VAR; + hist_data->n_vars++; + if (hist_data->n_vars > TRACING_MAP_VARS_MAX) { ++ hist_err("Too many variables defined: ", var_name); + return -EINVAL; + } + +@@ -3676,6 +3794,7 @@ static int parse_var_defs(struct hist_tr + + var_name = strsep(&field_str, "="); + if (!var_name || !field_str) { ++ hist_err("Malformed assignment: ", var_name); + ret = -EINVAL; + goto free; + } +@@ -4402,6 +4521,11 @@ static int hist_show(struct seq_file *m, hist_trigger_show(m, data, n++); } @@ -441,7 +410,7 @@ Signed-off-by: Sebastian Andrzej Siewior out_unlock: mutex_unlock(&event_mutex); -@@ -4509,6 +4652,7 @@ static int hist_register_trigger(char *g +@@ -4748,6 +4872,7 @@ static int hist_register_trigger(char *g if (named_data) { if (!hist_trigger_match(data, named_data, named_data, true)) { @@ -449,7 +418,7 @@ Signed-off-by: Sebastian Andrzej Siewior ret = -EINVAL; goto out; } -@@ -4528,13 +4672,16 @@ static int hist_register_trigger(char *g +@@ -4767,13 +4892,16 @@ static int hist_register_trigger(char *g test->paused = false; else if (hist_data->attrs->clear) hist_clear(test); @@ -467,7 +436,7 @@ Signed-off-by: Sebastian Andrzej Siewior ret = -ENOENT; goto out; } -@@ -4701,6 +4848,11 @@ static int event_hist_trigger_func(struc +@@ -4941,6 +5069,11 @@ static int event_hist_trigger_func(struc char *trigger, *p; int ret = 0; @@ -479,7 +448,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!param) return -EINVAL; -@@ -4804,6 +4956,9 @@ static int event_hist_trigger_func(struc +@@ -5062,6 +5195,9 @@ static int event_hist_trigger_func(struc /* Just return zero, not the number of registered triggers */ ret = 0; out: @@ -489,12 +458,3 @@ Signed-off-by: Sebastian Andrzej Siewior return ret; out_unreg: cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); -@@ -5002,6 +5157,8 @@ static __init int trace_events_hist_init - goto err; - } - -+ hist_err_alloc(); -+ - return err; - err: - pr_warn("Could not create tracefs 'synthetic_events' entry\n"); diff --git a/debian/patches/features/all/rt/0030-tracing-Add-inter-event-hist-trigger-Documentation.patch b/debian/patches/features/all/rt/0038-tracing-Add-inter-event-hist-trigger-Documentation.patch similarity index 91% rename from debian/patches/features/all/rt/0030-tracing-Add-inter-event-hist-trigger-Documentation.patch rename to debian/patches/features/all/rt/0038-tracing-Add-inter-event-hist-trigger-Documentation.patch index 4f9c3dd83..ff3516359 100644 --- a/debian/patches/features/all/rt/0030-tracing-Add-inter-event-hist-trigger-Documentation.patch +++ b/debian/patches/features/all/rt/0038-tracing-Add-inter-event-hist-trigger-Documentation.patch @@ -1,16 +1,17 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:31 -0500 -Subject: [PATCH 30/32] tracing: Add inter-event hist trigger Documentation -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Date: Fri, 22 Sep 2017 15:00:09 -0500 +Subject: [PATCH 38/42] tracing: Add inter-event hist trigger Documentation +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Add background and details on inter-event hist triggers, including hist variables, synthetic events, and actions. Signed-off-by: Tom Zanussi +Signed-off-by: Baohong Liu Signed-off-by: Sebastian Andrzej Siewior --- - Documentation/trace/events.txt | 376 +++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 376 insertions(+) + Documentation/trace/events.txt | 385 +++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 385 insertions(+) --- a/Documentation/trace/events.txt +++ b/Documentation/trace/events.txt @@ -22,7 +23,7 @@ Signed-off-by: Sebastian Andrzej Siewior Note that in general the semantics of a given field aren't interpreted when applying a modifier to it, but there are some -@@ -2101,3 +2102,378 @@ triggers (you have to use '!' for each o +@@ -2101,3 +2102,387 @@ triggers (you have to use '!' for each o Hits: 489 Entries: 7 Dropped: 0 @@ -125,14 +126,15 @@ Signed-off-by: Sebastian Andrzej Siewior +creates a variable named 'ts0' for a histogram entry with the key +'next_pid': + -+ # echo 'hist:keys=next_pid:vals=ts0=$common_timestamp ... >> event/trigger ++ # echo 'hist:keys=next_pid:vals=$ts0:ts0=$common_timestamp ... >> \ ++ event/trigger + +The ts0 variable can be accessed by any subsequent event having the +same pid as 'next_pid'. + +Variable references are formed by prepending the variable name with +the '$' sign. Thus for example, the ts0 variable above would be -+referenced as '$ts0' in subsequent expressions. ++referenced as '$ts0' in expressions. + +Because 'vals=' is used, the $common_timestamp variable value above +will also be summed as a normal histogram value would (though for a @@ -140,7 +142,7 @@ Signed-off-by: Sebastian Andrzej Siewior + +The below shows that a key value can also be saved in the same way: + -+ # echo 'hist:key=timer_pid=common_pid ...' >> event/trigger ++ # echo 'hist:timer_pid=common_pid:key=timer_pid ...' >> event/trigger + +If a variable isn't a key variable or prefixed with 'vals=', the +associated event field will be saved in a variable but won't be summed @@ -152,7 +154,15 @@ Signed-off-by: Sebastian Andrzej Siewior +result in both ts0 and b being created as variables, with both +common_timestamp and field1 additionally being summed as values: + -+ # echo 'hist:keys=pid:vals=ts0=$common_timestamp,b=field1 ... >> event/trigger ++ # echo 'hist:keys=pid:vals=$ts0,$b:ts0=$common_timestamp,b=field1 ... >> \ ++ event/trigger ++ ++Note that variable assignments can appear either preceding or ++following their use. The command below behaves identically to the ++command above: ++ ++ # echo 'hist:keys=pid:ts0=$common_timestamp,b=field1:vals=$ts0,$b ... >> \ ++ event/trigger + +Any number of variables not bound to a 'vals=' prefix can also be +assigned by simply separating them with colons. Below is the same @@ -294,7 +304,7 @@ Signed-off-by: Sebastian Andrzej Siewior + occurs, which because of the 'if comm == "cyclictest"' filter only + happens when the executable is cyclictest: + -+ # echo 'hist:keys=testpid=pid:onmatch(sched.sched_wakeup_new).\ ++ # echo 'hist:keys=$testpid:testpid=pid:onmatch(sched.sched_wakeup_new).\ + wakeup_new_test($testpid) if comm=="cyclictest"' >> \ + /sys/kernel/debug/tracing/events/sched/sched_wakeup_new/trigger + @@ -320,12 +330,12 @@ Signed-off-by: Sebastian Andrzej Siewior + # echo 'wakeup_latency u64 lat; pid_t pid; int prio' >> \ + /sys/kernel/debug/tracing/synthetic_events + -+ Next, we specify that whenever we see a sched_wakeup event for a ++ Next, we specify that whenever we see a sched_waking event for a + cyclictest thread, save the timestamp in a 'ts0' variable: + -+ # echo 'hist:keys=saved_pid=pid:ts0=$common_timestamp.usecs \ ++ # echo 'hist:keys=$saved_pid:saved_pid=pid:ts0=$common_timestamp.usecs \ + if comm=="cyclictest"' >> \ -+ /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger ++ /sys/kernel/debug/tracing/events/sched/sched_waking/trigger + + Then, when the corresponding thread is actually scheduled onto the + CPU by a sched_switch event, calculate the latency and use that @@ -333,7 +343,7 @@ Signed-off-by: Sebastian Andrzej Siewior + wakeup_latency synthetic event: + + # echo 'hist:keys=next_pid:wakeup_lat=$common_timestamp.usecs-$ts0:\ -+ onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,\ ++ onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,\ + $saved_pid,next_prio) if next_comm=="cyclictest"' >> \ + /sys/kernel/debug/tracing/events/sched/sched_switch/trigger + @@ -349,7 +359,7 @@ Signed-off-by: Sebastian Andrzej Siewior + + # cat /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/hist + -+ - onmax(var).save(field,...) ++ - onmax(var).save(field,.. .) + + The 'onmax(var).save(field,...)' hist trigger action is invoked + whenever the value of 'var' associated with a histogram entry @@ -363,8 +373,8 @@ Signed-off-by: Sebastian Andrzej Siewior + displaying the saved values will be printed. + + As an example the below defines a couple of hist triggers, one for -+ sched_wakeup and another for sched_switch, keyed on pid. Whenever -+ a sched_wakeup occurs, the timestamp is saved in the entry ++ sched_waking and another for sched_switch, keyed on pid. Whenever ++ a sched_waking occurs, the timestamp is saved in the entry + corresponding to the current pid, and when the scheduler switches + back to that pid, the timestamp difference is calculated. If the + resulting latency, stored in wakeup_lat, exceeds the current @@ -373,7 +383,7 @@ Signed-off-by: Sebastian Andrzej Siewior + + # echo 'hist:keys=pid:ts0=$common_timestamp.usecs \ + if comm=="cyclictest"' >> \ -+ /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger ++ /sys/kernel/debug/tracing/events/sched/sched_waking/trigger + + # echo 'hist:keys=next_pid:\ + wakeup_lat=$common_timestamp.usecs-$ts0:\ diff --git a/debian/patches/features/all/rt/0031-tracing-Make-tracing_set_clock-non-static.patch b/debian/patches/features/all/rt/0039-tracing-Make-tracing_set_clock-non-static.patch similarity index 77% rename from debian/patches/features/all/rt/0031-tracing-Make-tracing_set_clock-non-static.patch rename to debian/patches/features/all/rt/0039-tracing-Make-tracing_set_clock-non-static.patch index f0ea1b952..7e799eb76 100644 --- a/debian/patches/features/all/rt/0031-tracing-Make-tracing_set_clock-non-static.patch +++ b/debian/patches/features/all/rt/0039-tracing-Make-tracing_set_clock-non-static.patch @@ -1,7 +1,7 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:32 -0500 -Subject: [PATCH 31/32] tracing: Make tracing_set_clock() non-static -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Date: Fri, 22 Sep 2017 15:00:10 -0500 +Subject: [PATCH 39/42] tracing: Make tracing_set_clock() non-static +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Allow tracing code outside of trace.c to access tracing_set_clock(). @@ -19,7 +19,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c -@@ -5887,7 +5887,7 @@ static int tracing_clock_show(struct seq +@@ -6226,7 +6226,7 @@ static int tracing_clock_show(struct seq return 0; } @@ -30,7 +30,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h -@@ -279,6 +279,7 @@ extern int trace_array_get(struct trace_ +@@ -289,6 +289,7 @@ extern int trace_array_get(struct trace_ extern void trace_array_put(struct trace_array *tr); extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs); diff --git a/debian/patches/features/all/rt/0032-tracing-Add-a-clock-attribute-for-hist-triggers.patch b/debian/patches/features/all/rt/0040-tracing-Add-a-clock-attribute-for-hist-triggers.patch similarity index 69% rename from debian/patches/features/all/rt/0032-tracing-Add-a-clock-attribute-for-hist-triggers.patch rename to debian/patches/features/all/rt/0040-tracing-Add-a-clock-attribute-for-hist-triggers.patch index e40f00990..7a8284d09 100644 --- a/debian/patches/features/all/rt/0032-tracing-Add-a-clock-attribute-for-hist-triggers.patch +++ b/debian/patches/features/all/rt/0040-tracing-Add-a-clock-attribute-for-hist-triggers.patch @@ -1,7 +1,7 @@ From: Tom Zanussi -Date: Mon, 26 Jun 2017 17:49:33 -0500 -Subject: [PATCH 32/32] tracing: Add a clock attribute for hist triggers -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Date: Fri, 22 Sep 2017 15:00:11 -0500 +Subject: [PATCH 40/42] tracing: Add a clock attribute for hist triggers +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The default clock if timestamps are used in a histogram is "global". If timestamps aren't used, the clock is irrelevant. @@ -10,11 +10,12 @@ Use the "clock=" param only if you want to override the default "global" clock for a histogram with timestamps. Signed-off-by: Tom Zanussi +Signed-off-by: Rajvi Jingar Signed-off-by: Sebastian Andrzej Siewior --- Documentation/trace/events.txt | 9 +++++++++ - kernel/trace/trace_events_hist.c | 34 +++++++++++++++++++++++++++++++--- - 2 files changed, 40 insertions(+), 3 deletions(-) + kernel/trace/trace_events_hist.c | 38 ++++++++++++++++++++++++++++++++++++-- + 2 files changed, 45 insertions(+), 2 deletions(-) --- a/Documentation/trace/events.txt +++ b/Documentation/trace/events.txt @@ -26,7 +27,7 @@ Signed-off-by: Sebastian Andrzej Siewior +histogram, the trace buffer is automatically switched over to using +absolute timestamps and the "global" trace clock, in order to avoid +bogus timestamp differences with other clocks that aren't coherent -+across CPUs. This can be overriden by specifying one of the other ++across CPUs. This can be overridden by specifying one of the other +trace clocks instead, using the "clock=XXX" hist trigger attribute, +where XXX is any of the clocks listed in the tracing/trace_clock +pseudo-file. @@ -36,7 +37,7 @@ Signed-off-by: Sebastian Andrzej Siewior 6.3.1 Histogram Variables --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c -@@ -233,6 +233,7 @@ struct hist_trigger_attrs { +@@ -241,6 +241,7 @@ struct hist_trigger_attrs { char *vals_str; char *sort_key_str; char *name; @@ -44,7 +45,7 @@ Signed-off-by: Sebastian Andrzej Siewior bool pause; bool cont; bool clear; -@@ -1586,6 +1587,7 @@ static void destroy_hist_trigger_attrs(s +@@ -1680,6 +1681,7 @@ static void destroy_hist_trigger_attrs(s kfree(attrs->sort_key_str); kfree(attrs->keys_str); kfree(attrs->vals_str); @@ -52,12 +53,11 @@ Signed-off-by: Sebastian Andrzej Siewior kfree(attrs); } -@@ -1625,7 +1627,16 @@ static int parse_assignment(char *str, s - attrs->sort_key_str = kstrdup(str, GFP_KERNEL); - else if (strncmp(str, "name=", strlen("name=")) == 0) - attrs->name = kstrdup(str, GFP_KERNEL); -- else if (strncmp(str, "size=", strlen("size=")) == 0) { -+ else if (strncmp(str, "clock=", strlen("clock=")) == 0) { +@@ -1735,6 +1737,19 @@ static int parse_assignment(char *str, s + ret = -ENOMEM; + goto out; + } ++ } else if (strncmp(str, "clock=", strlen("clock=")) == 0) { + strsep(&str, "="); + if (!str) { + ret = -EINVAL; @@ -66,24 +66,29 @@ Signed-off-by: Sebastian Andrzej Siewior + + str = strstrip(str); + attrs->clock = kstrdup(str, GFP_KERNEL); -+ } else if (strncmp(str, "size=", strlen("size=")) == 0) { ++ if (!attrs->clock) { ++ ret = -ENOMEM; ++ goto out; ++ } + } else if (strncmp(str, "size=", strlen("size=")) == 0) { int map_bits = parse_map_size(str); - if (map_bits < 0) { -@@ -1688,6 +1699,12 @@ static struct hist_trigger_attrs *parse_ +@@ -1798,6 +1813,14 @@ static struct hist_trigger_attrs *parse_ goto free; } + if (!attrs->clock) { + attrs->clock = kstrdup("global", GFP_KERNEL); -+ if (!attrs->clock) ++ if (!attrs->clock) { ++ ret = -ENOMEM; + goto free; ++ } + } + return attrs; free: destroy_hist_trigger_attrs(attrs); -@@ -4437,6 +4454,8 @@ static int event_hist_trigger_print(stru +@@ -4660,6 +4683,8 @@ static int event_hist_trigger_print(stru seq_puts(m, ".descending"); } seq_printf(m, ":size=%u", (1 << hist_data->map->map_bits)); @@ -92,7 +97,7 @@ Signed-off-by: Sebastian Andrzej Siewior print_actions_spec(m, hist_data); -@@ -4702,10 +4721,19 @@ static int hist_register_trigger(char *g +@@ -4922,10 +4947,19 @@ static int hist_register_trigger(char *g goto out; } diff --git a/debian/patches/features/all/rt/0041-tracing-Increase-trace_recursive_lock-limit-for-synt.patch b/debian/patches/features/all/rt/0041-tracing-Increase-trace_recursive_lock-limit-for-synt.patch new file mode 100644 index 000000000..659504f5e --- /dev/null +++ b/debian/patches/features/all/rt/0041-tracing-Increase-trace_recursive_lock-limit-for-synt.patch @@ -0,0 +1,44 @@ +From: Tom Zanussi +Date: Fri, 22 Sep 2017 15:00:12 -0500 +Subject: [PATCH 41/42] tracing: Increase trace_recursive_lock() limit for + synthetic events +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Synthetic event generation needs to happen while the current event is +still in progress, so add 1 to the trace_recursive_lock() recursion +limit to account for that. + +Because we also want to allow for the possibility of a synthetic event +being generated from another synthetic event, add an additional +increment for that as well. + +Signed-off-by: Tom Zanussi +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/ring_buffer.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -2590,16 +2590,16 @@ rb_wakeups(struct ring_buffer *buffer, s + * IRQ context + * NMI context + * +- * If for some reason the ring buffer starts to recurse, we +- * only allow that to happen at most 4 times (one for each +- * context). If it happens 5 times, then we consider this a +- * recusive loop and do not let it go further. ++ * If for some reason the ring buffer starts to recurse, we only allow ++ * that to happen at most 6 times (one for each context, plus possibly ++ * two levels of synthetic event generation). If it happens 7 times, ++ * then we consider this a recusive loop and do not let it go further. + */ + + static __always_inline int + trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) + { +- if (cpu_buffer->current_context >= 4) ++ if (cpu_buffer->current_context >= 6) + return 1; + + cpu_buffer->current_context++; diff --git a/debian/patches/features/all/rt/0042-tracing-Add-inter-event-blurb-to-HIST_TRIGGERS-confi.patch b/debian/patches/features/all/rt/0042-tracing-Add-inter-event-blurb-to-HIST_TRIGGERS-confi.patch new file mode 100644 index 000000000..acb77cd9b --- /dev/null +++ b/debian/patches/features/all/rt/0042-tracing-Add-inter-event-blurb-to-HIST_TRIGGERS-confi.patch @@ -0,0 +1,28 @@ +From: Tom Zanussi +Date: Fri, 22 Sep 2017 15:00:13 -0500 +Subject: [PATCH 42/42] tracing: Add inter-event blurb to HIST_TRIGGERS config + option +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +So that users know that inter-event tracing is supported as part of +the HIST_TRIGGERS option, include text to that effect in the help +text. + +Signed-off-by: Tom Zanussi +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/Kconfig | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/kernel/trace/Kconfig ++++ b/kernel/trace/Kconfig +@@ -585,6 +585,9 @@ config HIST_TRIGGERS + event activity as an initial guide for further investigation + using more advanced tools. + ++ Inter-event tracing of quantities such as latencies is also ++ supported using hist triggers under this option. ++ + See Documentation/trace/events.txt. + If in doubt, say N. + diff --git a/debian/patches/features/all/rt/ARM-enable-irq-in-translation-section-permission-fau.patch b/debian/patches/features/all/rt/ARM-enable-irq-in-translation-section-permission-fau.patch index 951631960..28e6374f7 100644 --- a/debian/patches/features/all/rt/ARM-enable-irq-in-translation-section-permission-fau.patch +++ b/debian/patches/features/all/rt/ARM-enable-irq-in-translation-section-permission-fau.patch @@ -1,7 +1,7 @@ From: "Yadi.hu" Date: Wed, 10 Dec 2014 10:32:09 +0800 Subject: ARM: enable irq in translation/section permission fault handlers -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Probably happens on all ARM, with CONFIG_PREEMPT_RT_FULL @@ -64,7 +64,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c -@@ -431,6 +431,9 @@ do_translation_fault(unsigned long addr, +@@ -434,6 +434,9 @@ do_translation_fault(unsigned long addr, if (addr < TASK_SIZE) return do_page_fault(addr, fsr, regs); @@ -74,7 +74,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (user_mode(regs)) goto bad_area; -@@ -498,6 +501,9 @@ do_translation_fault(unsigned long addr, +@@ -501,6 +504,9 @@ do_translation_fault(unsigned long addr, static int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { diff --git a/debian/patches/features/all/rt/Bluetooth-avoid-recursive-locking-in-hci_send_to_cha.patch b/debian/patches/features/all/rt/Bluetooth-avoid-recursive-locking-in-hci_send_to_cha.patch new file mode 100644 index 000000000..8b427c78a --- /dev/null +++ b/debian/patches/features/all/rt/Bluetooth-avoid-recursive-locking-in-hci_send_to_cha.patch @@ -0,0 +1,71 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 21 Sep 2017 15:35:57 +0200 +Subject: Bluetooth: avoid recursive locking in + hci_send_to_channel() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Mart reported a deadlock in -RT in the call path: + hci_send_monitor_ctrl_event() -> hci_send_to_channel() + +because both functions acquire the same read lock hci_sk_list.lock. This +is also a mainline issue because the qrwlock implementation is writer +fair (the traditional rwlock implementation is reader biased). + +To avoid the deadlock there is now __hci_send_to_channel() which expects +the readlock to be held. + +Cc: Marcel Holtmann +Cc: Johan Hedberg +Cc: rt-stable@vger.kernel.org +Fixes: 38ceaa00d02d ("Bluetooth: Add support for sending MGMT commands and events to monitor") +Reported-by: Mart van de Wege +Signed-off-by: Sebastian Andrzej Siewior +--- + net/bluetooth/hci_sock.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +--- a/net/bluetooth/hci_sock.c ++++ b/net/bluetooth/hci_sock.c +@@ -251,15 +251,13 @@ void hci_send_to_sock(struct hci_dev *hd + } + + /* Send frame to sockets with specific channel */ +-void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, +- int flag, struct sock *skip_sk) ++static void __hci_send_to_channel(unsigned short channel, struct sk_buff *skb, ++ int flag, struct sock *skip_sk) + { + struct sock *sk; + + BT_DBG("channel %u len %d", channel, skb->len); + +- read_lock(&hci_sk_list.lock); +- + sk_for_each(sk, &hci_sk_list.head) { + struct sk_buff *nskb; + +@@ -285,6 +283,13 @@ void hci_send_to_channel(unsigned short + kfree_skb(nskb); + } + ++} ++ ++void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, ++ int flag, struct sock *skip_sk) ++{ ++ read_lock(&hci_sk_list.lock); ++ __hci_send_to_channel(channel, skb, flag, skip_sk); + read_unlock(&hci_sk_list.lock); + } + +@@ -388,8 +393,8 @@ void hci_send_monitor_ctrl_event(struct + hdr->index = index; + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + +- hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, +- HCI_SOCK_TRUSTED, NULL); ++ __hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, ++ HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + diff --git a/debian/patches/features/all/rt/CPUFREQ-Loongson2-drop-set_cpus_allowed_ptr.patch b/debian/patches/features/all/rt/CPUFREQ-Loongson2-drop-set_cpus_allowed_ptr.patch deleted file mode 100644 index 689d032bc..000000000 --- a/debian/patches/features/all/rt/CPUFREQ-Loongson2-drop-set_cpus_allowed_ptr.patch +++ /dev/null @@ -1,43 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Tue, 4 Apr 2017 17:43:55 +0200 -Subject: [PATCH] CPUFREQ: Loongson2: drop set_cpus_allowed_ptr() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -It is pure mystery to me why we need to be on a specific CPU while -looking up a value in an array. -My best shot at this is that before commit d4019f0a92ab ("cpufreq: move -freq change notifications to cpufreq core") it was required to invoke -cpufreq_notify_transition() on a special CPU. - -Since it looks like a waste, remove it. - -Cc: "Rafael J. Wysocki" -Cc: Viresh Kumar -Cc: linux-pm@vger.kernel.org -Signed-off-by: Sebastian Andrzej Siewior ---- - drivers/cpufreq/loongson2_cpufreq.c | 7 ------- - 1 file changed, 7 deletions(-) - ---- a/drivers/cpufreq/loongson2_cpufreq.c -+++ b/drivers/cpufreq/loongson2_cpufreq.c -@@ -51,19 +51,12 @@ static int loongson2_cpu_freq_notifier(s - static int loongson2_cpufreq_target(struct cpufreq_policy *policy, - unsigned int index) - { -- unsigned int cpu = policy->cpu; -- cpumask_t cpus_allowed; - unsigned int freq; - -- cpus_allowed = current->cpus_allowed; -- set_cpus_allowed_ptr(current, cpumask_of(cpu)); -- - freq = - ((cpu_clock_freq / 1000) * - loongson2_clockmod_table[index].driver_data) / 8; - -- set_cpus_allowed_ptr(current, &cpus_allowed); -- - /* setting the cpu frequency */ - clk_set_rate(policy->clk, freq * 1000); - diff --git a/debian/patches/features/all/rt/HACK-printk-drop-the-logbuf_lock-more-often.patch b/debian/patches/features/all/rt/HACK-printk-drop-the-logbuf_lock-more-often.patch index ab3cc98d1..145b18a80 100644 --- a/debian/patches/features/all/rt/HACK-printk-drop-the-logbuf_lock-more-often.patch +++ b/debian/patches/features/all/rt/HACK-printk-drop-the-logbuf_lock-more-often.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 21 Mar 2013 19:01:05 +0100 Subject: printk: Drop the logbuf_lock more often -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The lock is hold with irgs off. The latency drops 500us+ on my arm bugs with a "full" buffer after executing "dmesg" on the shell. @@ -13,7 +13,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -1409,6 +1409,8 @@ static int syslog_print_all(char __user +@@ -1407,6 +1407,8 @@ static int syslog_print_all(char __user { char *text; int len = 0; @@ -22,7 +22,7 @@ Signed-off-by: Sebastian Andrzej Siewior text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); if (!text) -@@ -1420,6 +1422,14 @@ static int syslog_print_all(char __user +@@ -1418,6 +1420,14 @@ static int syslog_print_all(char __user u64 seq; u32 idx; @@ -37,7 +37,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Find first record that fits, including all following records, * into the user-provided buffer for this dump. -@@ -1432,6 +1442,14 @@ static int syslog_print_all(char __user +@@ -1430,6 +1440,14 @@ static int syslog_print_all(char __user len += msg_print_text(msg, true, NULL, 0); idx = log_next(idx); seq++; @@ -52,7 +52,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /* move first record forward until length fits into the buffer */ -@@ -1443,6 +1461,14 @@ static int syslog_print_all(char __user +@@ -1441,6 +1459,14 @@ static int syslog_print_all(char __user len -= msg_print_text(msg, true, NULL, 0); idx = log_next(idx); seq++; @@ -67,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /* last message fitting into this dump */ -@@ -1481,6 +1507,7 @@ static int syslog_print_all(char __user +@@ -1479,6 +1505,7 @@ static int syslog_print_all(char __user clear_seq = log_next_seq; clear_idx = log_next_idx; } diff --git a/debian/patches/features/all/rt/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch b/debian/patches/features/all/rt/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch index 18d7dcc0e..582927cb8 100644 --- a/debian/patches/features/all/rt/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch +++ b/debian/patches/features/all/rt/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch @@ -1,7 +1,7 @@ From: Josh Cartwright Date: Thu, 11 Feb 2016 11:54:01 -0600 Subject: KVM: arm/arm64: downgrade preempt_disable()d region to migrate_disable() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz kvm_arch_vcpu_ioctl_run() disables the use of preemption when updating the vgic and timer states to prevent the calling task from migrating to @@ -18,21 +18,21 @@ Reported-by: Manish Jaggi Signed-off-by: Josh Cartwright Signed-off-by: Sebastian Andrzej Siewior --- - arch/arm/kvm/arm.c | 6 +++--- + virt/kvm/arm/arm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) ---- a/arch/arm/kvm/arm.c -+++ b/arch/arm/kvm/arm.c -@@ -632,7 +632,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v +--- a/virt/kvm/arm/arm.c ++++ b/virt/kvm/arm/arm.c +@@ -650,7 +650,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v * involves poking the GIC, which must be done in a * non-preemptible context. */ - preempt_disable(); + migrate_disable(); + kvm_pmu_flush_hwstate(vcpu); - kvm_timer_flush_hwstate(vcpu); - kvm_vgic_flush_hwstate(vcpu); -@@ -653,7 +653,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v + +@@ -687,7 +687,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v kvm_pmu_sync_hwstate(vcpu); kvm_timer_sync_hwstate(vcpu); kvm_vgic_sync_hwstate(vcpu); @@ -41,7 +41,7 @@ Signed-off-by: Sebastian Andrzej Siewior continue; } -@@ -709,7 +709,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v +@@ -742,7 +742,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v kvm_vgic_sync_hwstate(vcpu); diff --git a/debian/patches/features/all/rt/KVM-lapic-mark-LAPIC-timer-handler-as-irqsafe.patch b/debian/patches/features/all/rt/KVM-lapic-mark-LAPIC-timer-handler-as-irqsafe.patch deleted file mode 100644 index 665399311..000000000 --- a/debian/patches/features/all/rt/KVM-lapic-mark-LAPIC-timer-handler-as-irqsafe.patch +++ /dev/null @@ -1,26 +0,0 @@ -From: Marcelo Tosatti -Date: Wed, 8 Apr 2015 20:33:25 -0300 -Subject: KVM: lapic: mark LAPIC timer handler as irqsafe -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Since lapic timer handler only wakes up a simple waitqueue, -it can be executed from hardirq context. - -Reduces average cyclictest latency by 3us. - -Signed-off-by: Marcelo Tosatti -Signed-off-by: Sebastian Andrzej Siewior ---- - arch/x86/kvm/lapic.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/arch/x86/kvm/lapic.c -+++ b/arch/x86/kvm/lapic.c -@@ -2062,6 +2062,7 @@ int kvm_create_lapic(struct kvm_vcpu *vc - hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, - HRTIMER_MODE_ABS_PINNED); - apic->lapic_timer.timer.function = apic_timer_fn; -+ apic->lapic_timer.timer.irqsafe = 1; - - /* - * APIC is created enabled. This will prevent kvm_lapic_set_base from diff --git a/debian/patches/features/all/rt/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch b/debian/patches/features/all/rt/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch index 3645ee81b..e9fc44f46 100644 --- a/debian/patches/features/all/rt/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch +++ b/debian/patches/features/all/rt/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch @@ -5,7 +5,7 @@ Cc: Anna Schumaker , linux-nfs@vger.kernel.org, linux-kernel@vger.kernel.org, tglx@linutronix.de Subject: NFSv4: replace seqcount_t with a seqlock_t -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The raw_write_seqcount_begin() in nfs4_reclaim_open_state() bugs me because it maps to preempt_disable() in -RT which I can't have at this @@ -47,7 +47,7 @@ Signed-off-by: Sebastian Andrzej Siewior put_nfs_open_context(ctx); --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h -@@ -111,7 +111,7 @@ struct nfs4_state_owner { +@@ -112,7 +112,7 @@ struct nfs4_state_owner { unsigned long so_flags; struct list_head so_states; struct nfs_seqid_counter so_seqid; @@ -58,7 +58,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c -@@ -2608,7 +2608,7 @@ static int _nfs4_open_and_get_state(stru +@@ -2641,7 +2641,7 @@ static int _nfs4_open_and_get_state(stru unsigned int seq; int ret; @@ -67,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior ret = _nfs4_proc_open(opendata); if (ret != 0) -@@ -2646,7 +2646,7 @@ static int _nfs4_open_and_get_state(stru +@@ -2679,7 +2679,7 @@ static int _nfs4_open_and_get_state(stru if (d_inode(dentry) == state->inode) { nfs_inode_attach_open_context(ctx); @@ -78,7 +78,7 @@ Signed-off-by: Sebastian Andrzej Siewior out: --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c -@@ -488,7 +488,7 @@ nfs4_alloc_state_owner(struct nfs_server +@@ -494,7 +494,7 @@ nfs4_alloc_state_owner(struct nfs_server nfs4_init_seqid_counter(&sp->so_seqid); atomic_set(&sp->so_count, 1); INIT_LIST_HEAD(&sp->so_lru); @@ -87,7 +87,7 @@ Signed-off-by: Sebastian Andrzej Siewior mutex_init(&sp->so_delegreturn_mutex); return sp; } -@@ -1510,8 +1510,12 @@ static int nfs4_reclaim_open_state(struc +@@ -1516,8 +1516,12 @@ static int nfs4_reclaim_open_state(struc * recovering after a network partition or a reboot from a * server that doesn't support a grace period. */ @@ -101,7 +101,7 @@ Signed-off-by: Sebastian Andrzej Siewior restart: list_for_each_entry(state, &sp->so_states, open_states) { if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) -@@ -1580,14 +1584,20 @@ static int nfs4_reclaim_open_state(struc +@@ -1586,14 +1590,20 @@ static int nfs4_reclaim_open_state(struc spin_lock(&sp->so_lock); goto restart; } diff --git a/debian/patches/features/all/rt/RCU-we-need-to-skip-that-warning-but-only-on-sleepin.patch b/debian/patches/features/all/rt/RCU-we-need-to-skip-that-warning-but-only-on-sleepin.patch new file mode 100644 index 000000000..e236435b4 --- /dev/null +++ b/debian/patches/features/all/rt/RCU-we-need-to-skip-that-warning-but-only-on-sleepin.patch @@ -0,0 +1,36 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 21 Sep 2017 14:25:13 +0200 +Subject: [PATCH] RCU: we need to skip that warning but only on sleeping + locks +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +This check is okay for upstream. On RT we trigger this while blocking on +sleeping lock. In this case, it is okay to schedule() within a RCU +section. +Since spin_lock() and read_lock() disables migration it should be okay +to test for this as an indication whether or not a sleeping lock is +held. The ->pi_blocked_on member won't work becasuse it might also be +set on regular mutexes. + +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/rcu/tree_plugin.h | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/kernel/rcu/tree_plugin.h ++++ b/kernel/rcu/tree_plugin.h +@@ -323,9 +323,13 @@ static void rcu_preempt_note_context_swi + struct task_struct *t = current; + struct rcu_data *rdp; + struct rcu_node *rnp; ++ int mg_counter = 0; + + RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_preempt_note_context_switch() invoked with interrupts enabled!!!\n"); +- WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0); ++#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP) ++ mg_counter = t->migrate_disable; ++#endif ++ WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0 && !mg_counter); + if (t->rcu_read_lock_nesting > 0 && + !t->rcu_read_unlock_special.b.blocked) { + diff --git a/debian/patches/features/all/rt/acpi-rt-Convert-acpi_gbl_hardware-lock-back-to-a-raw.patch b/debian/patches/features/all/rt/acpi-rt-Convert-acpi_gbl_hardware-lock-back-to-a-raw.patch index c137c40c4..8dbc932e7 100644 --- a/debian/patches/features/all/rt/acpi-rt-Convert-acpi_gbl_hardware-lock-back-to-a-raw.patch +++ b/debian/patches/features/all/rt/acpi-rt-Convert-acpi_gbl_hardware-lock-back-to-a-raw.patch @@ -1,7 +1,7 @@ From: Steven Rostedt Date: Wed, 13 Feb 2013 09:26:05 -0500 Subject: acpi/rt: Convert acpi_gbl_hardware lock back to a raw_spinlock_t -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz We hit the following bug with 3.6-rt: @@ -143,7 +143,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* Delete the reader/writer lock */ --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h -@@ -133,6 +133,7 @@ +@@ -134,6 +134,7 @@ #define acpi_cache_t struct kmem_cache #define acpi_spinlock spinlock_t * @@ -151,7 +151,7 @@ Signed-off-by: Sebastian Andrzej Siewior #define acpi_cpu_flags unsigned long /* Use native linux version of acpi_os_allocate_zeroed */ -@@ -151,6 +152,20 @@ +@@ -152,6 +153,20 @@ #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock diff --git a/debian/patches/features/all/rt/add_migrate_disable.patch b/debian/patches/features/all/rt/add_migrate_disable.patch index ff4b0066f..c7ec0c3d2 100644 --- a/debian/patches/features/all/rt/add_migrate_disable.patch +++ b/debian/patches/features/all/rt/add_migrate_disable.patch @@ -1,19 +1,19 @@ From: Sebastian Andrzej Siewior Date: Sat, 27 May 2017 19:02:06 +0200 Subject: kernel/sched/core: add migrate_disable() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz --- include/linux/preempt.h | 23 ++++++++ include/linux/sched.h | 7 ++ include/linux/smp.h | 3 + - kernel/sched/core.c | 134 +++++++++++++++++++++++++++++++++++++++++++++++- + kernel/sched/core.c | 130 +++++++++++++++++++++++++++++++++++++++++++++++- kernel/sched/debug.c | 4 + - 5 files changed, 169 insertions(+), 2 deletions(-) + 5 files changed, 165 insertions(+), 2 deletions(-) --- a/include/linux/preempt.h +++ b/include/linux/preempt.h -@@ -184,6 +184,22 @@ do { \ +@@ -185,6 +185,22 @@ do { \ #define preemptible() (preempt_count() == 0 && !irqs_disabled()) @@ -36,7 +36,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4 #ifdef CONFIG_PREEMPT #define preempt_enable() \ do { \ -@@ -252,6 +268,13 @@ do { \ +@@ -253,6 +269,13 @@ do { \ #define preempt_enable_notrace() barrier() #define preemptible() 0 @@ -52,7 +52,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4 #ifdef MODULE --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -537,6 +537,13 @@ struct task_struct { +@@ -580,6 +580,13 @@ struct task_struct { int nr_cpus_allowed; const cpumask_t *cpus_ptr; cpumask_t cpus_mask; @@ -68,7 +68,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4 int rcu_read_lock_nesting; --- a/include/linux/smp.h +++ b/include/linux/smp.h -@@ -197,6 +197,9 @@ static inline int get_boot_cpu_id(void) +@@ -202,6 +202,9 @@ static inline int get_boot_cpu_id(void) #define get_cpu() ({ preempt_disable(); smp_processor_id(); }) #define put_cpu() preempt_enable() @@ -80,7 +80,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4 * boot command line: --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -1047,7 +1047,15 @@ void set_cpus_allowed_common(struct task +@@ -1023,7 +1023,15 @@ void set_cpus_allowed_common(struct task p->nr_cpus_allowed = cpumask_weight(new_mask); } @@ -97,7 +97,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4 { struct rq *rq = task_rq(p); bool queued, running; -@@ -1076,6 +1084,20 @@ void do_set_cpus_allowed(struct task_str +@@ -1052,6 +1060,20 @@ void do_set_cpus_allowed(struct task_str set_curr_task(rq, p); } @@ -118,7 +118,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4 /* * Change a given task's CPU affinity. Migrate the thread to a * proper CPU and schedule it away if the CPU it's executing on -@@ -1134,9 +1156,16 @@ static int __set_cpus_allowed_ptr(struct +@@ -1110,9 +1132,16 @@ static int __set_cpus_allowed_ptr(struct } /* Can the task run on the task's current CPU? If so, we're done */ @@ -136,7 +136,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4 dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); if (task_running(rq, p) || p->state == TASK_WAKING) { struct migration_arg arg = { p, dest_cpu }; -@@ -7357,3 +7386,104 @@ const u32 sched_prio_to_wmult[40] = { +@@ -6760,3 +6789,100 @@ const u32 sched_prio_to_wmult[40] = { /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717, /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, }; @@ -162,8 +162,6 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4 + return; + } + -+ /* get_online_cpus(); */ -+ + preempt_disable(); + p->migrate_disable = 1; + @@ -232,18 +230,16 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4 + preempt_enable(); + stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); + tlb_migrate_finish(p->mm); -+ /* put_online_cpus(); */ + return; + } + } -+ /* put_online_cpus(); */ + preempt_enable(); +} +EXPORT_SYMBOL(migrate_enable); +#endif --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c -@@ -958,6 +958,10 @@ void proc_sched_show_task(struct task_st +@@ -1017,6 +1017,10 @@ void proc_sched_show_task(struct task_st P(dl.runtime); P(dl.deadline); } diff --git a/debian/patches/features/all/rt/apparmor-use-a-locallock-instead-preempt_disable.patch b/debian/patches/features/all/rt/apparmor-use-a-locallock-instead-preempt_disable.patch new file mode 100644 index 000000000..31eb50bbd --- /dev/null +++ b/debian/patches/features/all/rt/apparmor-use-a-locallock-instead-preempt_disable.patch @@ -0,0 +1,84 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 11 Oct 2017 17:43:49 +0200 +Subject: apparmor: use a locallock instead preempt_disable() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +get_buffers() disables preemption which acts as a lock for the per-CPU +variable. Since we can't disable preemption here on RT, a local_lock is +lock is used in order to remain on the same CPU and not to have more +than one user within the critical section. + +Signed-off-by: Sebastian Andrzej Siewior +--- + security/apparmor/include/path.h | 21 +++++++++++++++++---- + security/apparmor/lsm.c | 2 +- + 2 files changed, 18 insertions(+), 5 deletions(-) + +--- a/security/apparmor/include/path.h ++++ b/security/apparmor/include/path.h +@@ -39,9 +39,10 @@ struct aa_buffers { + }; + + #include +-#include ++#include + + DECLARE_PER_CPU(struct aa_buffers, aa_buffers); ++DECLARE_LOCAL_IRQ_LOCK(aa_buffers_lock); + + #define COUNT_ARGS(X...) COUNT_ARGS_HELPER(, ##X, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + #define COUNT_ARGS_HELPER(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, n, X...) n +@@ -55,12 +56,24 @@ DECLARE_PER_CPU(struct aa_buffers, aa_bu + + #define for_each_cpu_buffer(I) for ((I) = 0; (I) < MAX_PATH_BUFFERS; (I)++) + +-#ifdef CONFIG_DEBUG_PREEMPT ++#ifdef CONFIG_PREEMPT_RT_BASE ++ ++static inline void AA_BUG_PREEMPT_ENABLED(const char *s) ++{ ++ struct local_irq_lock *lv; ++ ++ lv = this_cpu_ptr(&aa_buffers_lock); ++ WARN_ONCE(lv->owner != current, ++ "__get_buffer without aa_buffers_lock\n"); ++} ++ ++#elif defined(CONFIG_DEBUG_PREEMPT) + #define AA_BUG_PREEMPT_ENABLED(X) AA_BUG(preempt_count() <= 0, X) + #else + #define AA_BUG_PREEMPT_ENABLED(X) /* nop */ + #endif + ++ + #define __get_buffer(N) ({ \ + struct aa_buffers *__cpu_var; \ + AA_BUG_PREEMPT_ENABLED("__get_buffer without preempt disabled"); \ +@@ -73,14 +86,14 @@ DECLARE_PER_CPU(struct aa_buffers, aa_bu + + #define get_buffers(X...) \ + do { \ +- preempt_disable(); \ ++ local_lock(aa_buffers_lock); \ + __get_buffers(X); \ + } while (0) + + #define put_buffers(X, Y...) \ + do { \ + __put_buffers(X, Y); \ +- preempt_enable(); \ ++ local_unlock(aa_buffers_lock); \ + } while (0) + + #endif /* __AA_PATH_H */ +--- a/security/apparmor/lsm.c ++++ b/security/apparmor/lsm.c +@@ -44,7 +44,7 @@ + int apparmor_initialized; + + DEFINE_PER_CPU(struct aa_buffers, aa_buffers); +- ++DEFINE_LOCAL_IRQ_LOCK(aa_buffers_lock); + + /* + * LSM hook functions diff --git a/debian/patches/features/all/rt/arch-arm64-Add-lazy-preempt-support.patch b/debian/patches/features/all/rt/arch-arm64-Add-lazy-preempt-support.patch index 3d97511b1..1db6b7ec9 100644 --- a/debian/patches/features/all/rt/arch-arm64-Add-lazy-preempt-support.patch +++ b/debian/patches/features/all/rt/arch-arm64-Add-lazy-preempt-support.patch @@ -1,7 +1,7 @@ From: Anders Roxell Date: Thu, 14 May 2015 17:52:17 +0200 Subject: arch/arm64: Add lazy preempt support -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz arm64 is missing support for PREEMPT_RT. The main feature which is lacking is support for lazy preemption. The arch-specific entry code, @@ -13,15 +13,15 @@ indicate that support for full RT preemption is now available. Signed-off-by: Anders Roxell --- arch/arm64/Kconfig | 1 + - arch/arm64/include/asm/thread_info.h | 7 +++++-- + arch/arm64/include/asm/thread_info.h | 6 +++++- arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kernel/entry.S | 12 +++++++++--- arch/arm64/kernel/signal.c | 2 +- - 5 files changed, 17 insertions(+), 6 deletions(-) + 5 files changed, 17 insertions(+), 5 deletions(-) --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig -@@ -96,6 +96,7 @@ config ARM64 +@@ -103,6 +103,7 @@ config ARM64 select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -31,7 +31,7 @@ Signed-off-by: Anders Roxell select HAVE_SYSCALL_TRACEPOINTS --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h -@@ -51,6 +51,7 @@ struct thread_info { +@@ -43,6 +43,7 @@ struct thread_info { u64 ttbr0; /* saved TTBR0_EL1 */ #endif int preempt_count; /* 0 => preemptable, <0 => bug */ @@ -39,15 +39,15 @@ Signed-off-by: Anders Roxell }; #define INIT_THREAD_INFO(tsk) \ -@@ -86,6 +87,7 @@ struct thread_info { - #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ +@@ -82,6 +83,7 @@ void arch_setup_new_exec(void); #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ -+#define TIF_NEED_RESCHED_LAZY 5 + #define TIF_FSCHECK 5 /* Check FS is USER_DS on return */ ++#define TIF_NEED_RESCHED_LAZY 6 #define TIF_NOHZ 7 #define TIF_SYSCALL_TRACE 8 #define TIF_SYSCALL_AUDIT 9 -@@ -101,6 +103,7 @@ struct thread_info { +@@ -97,6 +99,7 @@ void arch_setup_new_exec(void); #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) @@ -55,13 +55,13 @@ Signed-off-by: Anders Roxell #define _TIF_NOHZ (1 << TIF_NOHZ) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -@@ -111,8 +114,8 @@ struct thread_info { +@@ -108,8 +111,9 @@ void arch_setup_new_exec(void); #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ -- _TIF_UPROBE) -- -+ _TIF_UPROBE | _TIF_NEED_RESCHED_LAZY) +- _TIF_UPROBE | _TIF_FSCHECK) ++ _TIF_UPROBE | _TIF_FSCHECK | _TIF_NEED_RESCHED_LAZY) + +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ @@ -78,7 +78,7 @@ Signed-off-by: Anders Roxell DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S -@@ -488,11 +488,16 @@ ENDPROC(el1_sync) +@@ -570,11 +570,16 @@ ENDPROC(el1_sync) #ifdef CONFIG_PREEMPT ldr w24, [tsk, #TSK_TI_PREEMPT] // get preempt count @@ -98,7 +98,7 @@ Signed-off-by: Anders Roxell #endif #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_on -@@ -506,6 +511,7 @@ ENDPROC(el1_irq) +@@ -588,6 +593,7 @@ ENDPROC(el1_irq) 1: bl preempt_schedule_irq // irq en/disable is done inside ldr x0, [tsk, #TSK_TI_FLAGS] // get new tasks TI_FLAGS tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? @@ -108,10 +108,10 @@ Signed-off-by: Anders Roxell --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c -@@ -409,7 +409,7 @@ asmlinkage void do_notify_resume(struct - */ - trace_hardirqs_off(); - do { +@@ -755,7 +755,7 @@ asmlinkage void do_notify_resume(struct + /* Check valid user FS if needed */ + addr_limit_user_check(); + - if (thread_flags & _TIF_NEED_RESCHED) { + if (thread_flags & _TIF_NEED_RESCHED_MASK) { schedule(); diff --git a/debian/patches/features/all/rt/arm-at91-pit-remove-irq-handler-when-clock-is-unused.patch b/debian/patches/features/all/rt/arm-at91-pit-remove-irq-handler-when-clock-is-unused.patch index ce1fad96a..8f64e4c75 100644 --- a/debian/patches/features/all/rt/arm-at91-pit-remove-irq-handler-when-clock-is-unused.patch +++ b/debian/patches/features/all/rt/arm-at91-pit-remove-irq-handler-when-clock-is-unused.patch @@ -1,7 +1,7 @@ From: Benedikt Spranger Date: Sat, 6 Mar 2010 17:47:10 +0100 Subject: ARM: AT91: PIT: Remove irq handler when clock event is unused -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Setup and remove the interrupt handler in clock event mode selection. This avoids calling the (shared) interrupt handler when the device is diff --git a/debian/patches/features/all/rt/arm-at91-tclib-default-to-tclib-timer-for-rt.patch b/debian/patches/features/all/rt/arm-at91-tclib-default-to-tclib-timer-for-rt.patch index 9f93eb667..3a5f27f73 100644 --- a/debian/patches/features/all/rt/arm-at91-tclib-default-to-tclib-timer-for-rt.patch +++ b/debian/patches/features/all/rt/arm-at91-tclib-default-to-tclib-timer-for-rt.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Sat, 1 May 2010 18:29:35 +0200 Subject: ARM: at91: tclib: Default to tclib timer for RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz RT is not too happy about the shared timer interrupt in AT91 devices. Default to tclib timer for RT. diff --git a/debian/patches/features/all/rt/arm-convert-boot-lock-to-raw.patch b/debian/patches/features/all/rt/arm-convert-boot-lock-to-raw.patch index 86974f297..b38f89a7c 100644 --- a/debian/patches/features/all/rt/arm-convert-boot-lock-to-raw.patch +++ b/debian/patches/features/all/rt/arm-convert-boot-lock-to-raw.patch @@ -1,7 +1,7 @@ From: Frank Rowand Date: Mon, 19 Sep 2011 14:51:14 -0700 Subject: arm: Convert arm boot_lock to raw -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The arm boot_lock is used by the secondary processor startup code. The locking task is the idle thread, which has idle->sched_class == &idle_sched_class. diff --git a/debian/patches/features/all/rt/arm-enable-highmem-for-rt.patch b/debian/patches/features/all/rt/arm-enable-highmem-for-rt.patch index f4bb57787..7dbab1cef 100644 --- a/debian/patches/features/all/rt/arm-enable-highmem-for-rt.patch +++ b/debian/patches/features/all/rt/arm-enable-highmem-for-rt.patch @@ -1,7 +1,7 @@ Subject: arm: Enable highmem for rt From: Thomas Gleixner Date: Wed, 13 Feb 2013 11:03:11 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz fixup highmem for ARM. @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/arm/include/asm/switch_to.h +++ b/arch/arm/include/asm/switch_to.h -@@ -3,6 +3,13 @@ +@@ -4,6 +4,13 @@ #include @@ -28,7 +28,7 @@ Signed-off-by: Thomas Gleixner /* * For v7 SMP cores running a preemptible kernel we may be pre-empted * during a TLB maintenance operation, so execute an inner-shareable dsb -@@ -25,6 +32,7 @@ extern struct task_struct *__switch_to(s +@@ -26,6 +33,7 @@ extern struct task_struct *__switch_to(s #define switch_to(prev,next,last) \ do { \ __complete_pending_tlbi(); \ @@ -164,7 +164,7 @@ Signed-off-by: Thomas Gleixner +#endif --- a/include/linux/highmem.h +++ b/include/linux/highmem.h -@@ -7,6 +7,7 @@ +@@ -8,6 +8,7 @@ #include #include #include diff --git a/debian/patches/features/all/rt/arm-highmem-flush-tlb-on-unmap.patch b/debian/patches/features/all/rt/arm-highmem-flush-tlb-on-unmap.patch index db337b56c..d6b0de65b 100644 --- a/debian/patches/features/all/rt/arm-highmem-flush-tlb-on-unmap.patch +++ b/debian/patches/features/all/rt/arm-highmem-flush-tlb-on-unmap.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Mon, 11 Mar 2013 21:37:27 +0100 Subject: arm/highmem: Flush tlb on unmap -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The tlb should be flushed on unmap and thus make the mapping entry invalid. This is only done in the non-debug case which does not look diff --git a/debian/patches/features/all/rt/arm-include-definition-for-cpumask_t.patch b/debian/patches/features/all/rt/arm-include-definition-for-cpumask_t.patch index 508241b2f..e4f067c67 100644 --- a/debian/patches/features/all/rt/arm-include-definition-for-cpumask_t.patch +++ b/debian/patches/features/all/rt/arm-include-definition-for-cpumask_t.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 22 Dec 2016 17:28:33 +0100 Subject: [PATCH] arm: include definition for cpumask_t -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz This definition gets pulled in by other files. With the (later) split of RCU and spinlock.h it won't compile anymore. @@ -14,7 +14,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/arch/arm/include/asm/irq.h +++ b/arch/arm/include/asm/irq.h -@@ -22,6 +22,8 @@ +@@ -23,6 +23,8 @@ #endif #ifndef __ASSEMBLY__ diff --git a/debian/patches/features/all/rt/arm-kprobe-replace-patch_lock-to-raw-lock.patch b/debian/patches/features/all/rt/arm-kprobe-replace-patch_lock-to-raw-lock.patch index d5e41afdb..c33d015d8 100644 --- a/debian/patches/features/all/rt/arm-kprobe-replace-patch_lock-to-raw-lock.patch +++ b/debian/patches/features/all/rt/arm-kprobe-replace-patch_lock-to-raw-lock.patch @@ -1,7 +1,7 @@ From: Yang Shi Date: Thu, 10 Nov 2016 16:17:55 -0800 Subject: [PATCH] arm: kprobe: replace patch_lock to raw lock -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz When running kprobe on -rt kernel, the below bug is caught: @@ -41,7 +41,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/arch/arm/kernel/patch.c +++ b/arch/arm/kernel/patch.c -@@ -15,7 +15,7 @@ struct patch { +@@ -16,7 +16,7 @@ struct patch { unsigned int insn; }; @@ -50,7 +50,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) __acquires(&patch_lock) -@@ -32,7 +32,7 @@ static void __kprobes *patch_map(void *a +@@ -33,7 +33,7 @@ static void __kprobes *patch_map(void *a return addr; if (flags) @@ -59,7 +59,7 @@ Signed-off-by: Sebastian Andrzej Siewior else __acquire(&patch_lock); -@@ -47,7 +47,7 @@ static void __kprobes patch_unmap(int fi +@@ -48,7 +48,7 @@ static void __kprobes patch_unmap(int fi clear_fixmap(fixmap); if (flags) diff --git a/debian/patches/features/all/rt/arm-preempt-lazy-support.patch b/debian/patches/features/all/rt/arm-preempt-lazy-support.patch index 016dfb249..b4c58216a 100644 --- a/debian/patches/features/all/rt/arm-preempt-lazy-support.patch +++ b/debian/patches/features/all/rt/arm-preempt-lazy-support.patch @@ -1,7 +1,7 @@ Subject: arm: Add support for lazy preemption From: Thomas Gleixner Date: Wed, 31 Oct 2012 12:04:11 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Implement the arm pieces for lazy preempt. @@ -17,7 +17,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig -@@ -81,6 +81,7 @@ config ARM +@@ -85,6 +85,7 @@ config ARM select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -114,9 +114,9 @@ Signed-off-by: Thomas Gleixner __und_fault: --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S -@@ -41,7 +41,9 @@ - UNWIND(.cantunwind ) - disable_irq_notrace @ disable interrupts +@@ -53,7 +53,9 @@ saved_pc .req lr + cmp r2, #TASK_SIZE + blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) @@ -124,14 +124,14 @@ Signed-off-by: Thomas Gleixner + tst r1, #_TIF_SECCOMP bne fast_work_pending - /* perform architecture specific actions before user return */ -@@ -67,8 +69,11 @@ ENDPROC(ret_fast_syscall) - str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 - disable_irq_notrace @ disable interrupts + +@@ -83,8 +85,11 @@ ENDPROC(ret_fast_syscall) + cmp r2, #TASK_SIZE + blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) -+ bne do_slower_path ++ bne do_slower_path + tst r1, #_TIF_SECCOMP beq no_work_pending +do_slower_path: @@ -140,7 +140,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c -@@ -572,7 +572,8 @@ do_work_pending(struct pt_regs *regs, un +@@ -615,7 +615,8 @@ do_work_pending(struct pt_regs *regs, un */ trace_hardirqs_off(); do { diff --git a/debian/patches/features/all/rt/arm-unwind-use_raw_lock.patch b/debian/patches/features/all/rt/arm-unwind-use_raw_lock.patch index 2bc9c34c2..5fcd44407 100644 --- a/debian/patches/features/all/rt/arm-unwind-use_raw_lock.patch +++ b/debian/patches/features/all/rt/arm-unwind-use_raw_lock.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Fri, 20 Sep 2013 14:31:54 +0200 Subject: arm/unwind: use a raw_spin_lock -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Mostly unwind is done with irqs enabled however SLUB may call it with irqs disabled while creating a new SLUB cache. diff --git a/debian/patches/features/all/rt/arm64-cpufeature-don-t-use-mutex-in-bringup-path.patch b/debian/patches/features/all/rt/arm64-cpufeature-don-t-use-mutex-in-bringup-path.patch deleted file mode 100644 index 780b4b28d..000000000 --- a/debian/patches/features/all/rt/arm64-cpufeature-don-t-use-mutex-in-bringup-path.patch +++ /dev/null @@ -1,170 +0,0 @@ -From: Mark Rutland -Date: Tue, 16 May 2017 15:18:05 +0100 -Subject: [PATCH] arm64/cpufeature: don't use mutex in bringup path -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Commit b2bb439ad99a1497daa392a527c0e52c69915ce9 upstream - -Currently, cpus_set_cap() calls static_branch_enable_cpuslocked(), which -must take the jump_label mutex. - -We call cpus_set_cap() in the secondary bringup path, from the idle -thread where interrupts are disabled. Taking a mutex in this path "is a -NONO" regardless of whether it's contended, and something we must avoid. -We didn't spot this until recently, as ___might_sleep() won't warn for -this case until all CPUs have been brought up. - -This patch avoids taking the mutex in the secondary bringup path. The -poking of static keys is deferred until enable_cpu_capabilities(), which -runs in a suitable context on the boot CPU. To account for the static -keys being set later, cpus_have_const_cap() is updated to use another -static key to check whether the const cap keys have been initialised, -falling back to the caps bitmap until this is the case. - -This means that users of cpus_have_const_cap() gain should only gain a -single additional NOP in the fast path once the const caps are -initialised, but should always see the current cap value. - -The hyp code should never dereference the caps array, since the caps are -initialized before we run the module initcall to initialise hyp. A check -is added to the hyp init code to document this requirement. - -This change will sidestep a number of issues when the upcoming hotplug -locking rework is merged. - -Signed-off-by: Mark Rutland -Reviewed-by: Marc Zyniger -Reviewed-by: Suzuki Poulose -Acked-by: Will Deacon -Cc: Christoffer Dall -Cc: Peter Zijlstra -Cc: Sebastian Sewior -Cc: Thomas Gleixner -Signed-off-by: Catalin Marinas -Signed-off-by: Sebastian Andrzej Siewior ---- - arch/arm64/include/asm/cpufeature.h | 12 ++++++++++-- - arch/arm64/include/asm/kvm_host.h | 8 ++++++-- - arch/arm64/kernel/cpufeature.c | 23 +++++++++++++++++++++-- - 3 files changed, 37 insertions(+), 6 deletions(-) - ---- a/arch/arm64/include/asm/cpufeature.h -+++ b/arch/arm64/include/asm/cpufeature.h -@@ -115,6 +115,7 @@ struct arm64_cpu_capabilities { - - extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); - extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; -+extern struct static_key_false arm64_const_caps_ready; - - bool this_cpu_has_cap(unsigned int cap); - -@@ -124,7 +125,7 @@ static inline bool cpu_have_feature(unsi - } - - /* System capability check for constant caps */ --static inline bool cpus_have_const_cap(int num) -+static inline bool __cpus_have_const_cap(int num) - { - if (num >= ARM64_NCAPS) - return false; -@@ -138,6 +139,14 @@ static inline bool cpus_have_cap(unsigne - return test_bit(num, cpu_hwcaps); - } - -+static inline bool cpus_have_const_cap(int num) -+{ -+ if (static_branch_likely(&arm64_const_caps_ready)) -+ return __cpus_have_const_cap(num); -+ else -+ return cpus_have_cap(num); -+} -+ - static inline void cpus_set_cap(unsigned int num) - { - if (num >= ARM64_NCAPS) { -@@ -145,7 +154,6 @@ static inline void cpus_set_cap(unsigned - num, ARM64_NCAPS); - } else { - __set_bit(num, cpu_hwcaps); -- static_branch_enable(&cpu_hwcap_keys[num]); - } - } - ---- a/arch/arm64/include/asm/kvm_host.h -+++ b/arch/arm64/include/asm/kvm_host.h -@@ -24,6 +24,7 @@ - - #include - #include -+#include - #include - #include - #include -@@ -356,9 +357,12 @@ static inline void __cpu_init_hyp_mode(p - unsigned long vector_ptr) - { - /* -- * Call initialization code, and switch to the full blown -- * HYP code. -+ * Call initialization code, and switch to the full blown HYP code. -+ * If the cpucaps haven't been finalized yet, something has gone very -+ * wrong, and hyp will crash and burn when it uses any -+ * cpus_have_const_cap() wrapper. - */ -+ BUG_ON(!static_branch_likely(&arm64_const_caps_ready)); - __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr); - } - ---- a/arch/arm64/kernel/cpufeature.c -+++ b/arch/arm64/kernel/cpufeature.c -@@ -975,8 +975,16 @@ void update_cpu_capabilities(const struc - */ - void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) - { -- for (; caps->matches; caps++) -- if (caps->enable && cpus_have_cap(caps->capability)) -+ for (; caps->matches; caps++) { -+ unsigned int num = caps->capability; -+ -+ if (!cpus_have_cap(num)) -+ continue; -+ -+ /* Ensure cpus_have_const_cap(num) works */ -+ static_branch_enable(&cpu_hwcap_keys[num]); -+ -+ if (caps->enable) { - /* - * Use stop_machine() as it schedules the work allowing - * us to modify PSTATE, instead of on_each_cpu() which -@@ -984,6 +992,8 @@ void __init enable_cpu_capabilities(cons - * we return. - */ - stop_machine(caps->enable, NULL, cpu_online_mask); -+ } -+ } - } - - /* -@@ -1086,6 +1096,14 @@ static void __init setup_feature_capabil - enable_cpu_capabilities(arm64_features); - } - -+DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready); -+EXPORT_SYMBOL(arm64_const_caps_ready); -+ -+static void __init mark_const_caps_ready(void) -+{ -+ static_branch_enable(&arm64_const_caps_ready); -+} -+ - /* - * Check if the current CPU has a given feature capability. - * Should be called from non-preemptible context. -@@ -1112,6 +1130,7 @@ void __init setup_cpu_features(void) - /* Set the CPU feature capabilies */ - setup_feature_capabilities(); - enable_errata_workarounds(); -+ mark_const_caps_ready(); - setup_elf_hwcaps(arm64_elf_hwcaps); - - if (system_supports_32bit_el0()) diff --git a/debian/patches/features/all/rt/arm64-xen--Make-XEN-depend-on-non-rt.patch b/debian/patches/features/all/rt/arm64-xen--Make-XEN-depend-on-non-rt.patch index 4377aa873..52bd7a88c 100644 --- a/debian/patches/features/all/rt/arm64-xen--Make-XEN-depend-on-non-rt.patch +++ b/debian/patches/features/all/rt/arm64-xen--Make-XEN-depend-on-non-rt.patch @@ -1,7 +1,7 @@ Subject: arm64/xen: Make XEN depend on !RT From: Thomas Gleixner Date: Mon, 12 Oct 2015 11:18:40 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz It's not ready and probably never will be, unless xen folks have a look at it. @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig -@@ -742,7 +742,7 @@ config XEN_DOM0 +@@ -774,7 +774,7 @@ config XEN_DOM0 config XEN bool "Xen guest support on ARM64" diff --git a/debian/patches/features/all/rt/at91_dont_enable_disable_clock.patch b/debian/patches/features/all/rt/at91_dont_enable_disable_clock.patch index 82e306a2f..e3ca7f037 100644 --- a/debian/patches/features/all/rt/at91_dont_enable_disable_clock.patch +++ b/debian/patches/features/all/rt/at91_dont_enable_disable_clock.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 09 Mar 2016 10:51:06 +0100 Subject: arm: at91: do not disable/enable clocks in a row -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Currently the driver will disable the clock and enable it one line later if it is switching from periodic mode into one shot. @@ -14,7 +14,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/drivers/clocksource/tcb_clksrc.c +++ b/drivers/clocksource/tcb_clksrc.c -@@ -74,6 +74,7 @@ static struct clocksource clksrc = { +@@ -126,6 +126,7 @@ static struct clocksource clksrc = { struct tc_clkevt_device { struct clock_event_device clkevt; struct clk *clk; @@ -22,7 +22,7 @@ Signed-off-by: Sebastian Andrzej Siewior void __iomem *regs; }; -@@ -91,6 +92,24 @@ static struct tc_clkevt_device *to_tc_cl +@@ -143,6 +144,24 @@ static struct tc_clkevt_device *to_tc_cl */ static u32 timer_clock; @@ -47,10 +47,10 @@ Signed-off-by: Sebastian Andrzej Siewior static int tc_shutdown(struct clock_event_device *d) { struct tc_clkevt_device *tcd = to_tc_clkevt(d); -@@ -98,8 +117,14 @@ static int tc_shutdown(struct clock_even +@@ -150,8 +169,14 @@ static int tc_shutdown(struct clock_even - __raw_writel(0xff, regs + ATMEL_TC_REG(2, IDR)); - __raw_writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR)); + writel(0xff, regs + ATMEL_TC_REG(2, IDR)); + writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR)); + return 0; +} + @@ -63,7 +63,7 @@ Signed-off-by: Sebastian Andrzej Siewior return 0; } -@@ -112,7 +137,7 @@ static int tc_set_oneshot(struct clock_e +@@ -164,7 +189,7 @@ static int tc_set_oneshot(struct clock_e if (clockevent_state_oneshot(d) || clockevent_state_periodic(d)) tc_shutdown(d); @@ -71,8 +71,8 @@ Signed-off-by: Sebastian Andrzej Siewior + tc_clk_enable(d); /* slow clock, count up to RC, then irq and stop */ - __raw_writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE | -@@ -134,7 +159,7 @@ static int tc_set_periodic(struct clock_ + writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE | +@@ -186,7 +211,7 @@ static int tc_set_periodic(struct clock_ /* By not making the gentime core emulate periodic mode on top * of oneshot, we get lower overhead and improved accuracy. */ @@ -80,8 +80,8 @@ Signed-off-by: Sebastian Andrzej Siewior + tc_clk_enable(d); /* slow clock, count up to RC, then irq and restart */ - __raw_writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO, -@@ -168,7 +193,7 @@ static struct tc_clkevt_device clkevt = + writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO, +@@ -220,7 +245,7 @@ static struct tc_clkevt_device clkevt = /* Should be lower than at91rm9200's system timer */ .rating = 125, .set_next_event = tc_next_event, diff --git a/debian/patches/features/all/rt/ata-disable-interrupts-if-non-rt.patch b/debian/patches/features/all/rt/ata-disable-interrupts-if-non-rt.patch index 9bc75e8af..38adf6d1d 100644 --- a/debian/patches/features/all/rt/ata-disable-interrupts-if-non-rt.patch +++ b/debian/patches/features/all/rt/ata-disable-interrupts-if-non-rt.patch @@ -1,7 +1,7 @@ From: Steven Rostedt Date: Fri, 3 Jul 2009 08:44:29 -0500 Subject: ata: Do not disable interrupts in ide code for preempt-rt -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Use the local_irq_*_nort variants. @@ -10,8 +10,8 @@ Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- - drivers/ata/libata-sff.c | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) + drivers/ata/libata-sff.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -27,39 +27,3 @@ Signed-off-by: Thomas Gleixner return consumed; } -@@ -720,7 +720,7 @@ static void ata_pio_sector(struct ata_qu - unsigned long flags; - - /* FIXME: use a bounce buffer */ -- local_irq_save(flags); -+ local_irq_save_nort(flags); - buf = kmap_atomic(page); - - /* do the actual data transfer */ -@@ -728,7 +728,7 @@ static void ata_pio_sector(struct ata_qu - do_write); - - kunmap_atomic(buf); -- local_irq_restore(flags); -+ local_irq_restore_nort(flags); - } else { - buf = page_address(page); - ap->ops->sff_data_xfer(qc, buf + offset, qc->sect_size, -@@ -865,7 +865,7 @@ static int __atapi_pio_bytes(struct ata_ - unsigned long flags; - - /* FIXME: use bounce buffer */ -- local_irq_save(flags); -+ local_irq_save_nort(flags); - buf = kmap_atomic(page); - - /* do the actual data transfer */ -@@ -873,7 +873,7 @@ static int __atapi_pio_bytes(struct ata_ - count, rw); - - kunmap_atomic(buf); -- local_irq_restore(flags); -+ local_irq_restore_nort(flags); - } else { - buf = page_address(page); - consumed = ap->ops->sff_data_xfer(qc, buf + offset, diff --git a/debian/patches/features/all/rt/block-blk-mq-use-swait.patch b/debian/patches/features/all/rt/block-blk-mq-use-swait.patch index b81f22492..88ca5edcc 100644 --- a/debian/patches/features/all/rt/block-blk-mq-use-swait.patch +++ b/debian/patches/features/all/rt/block-blk-mq-use-swait.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Fri, 13 Feb 2015 11:01:26 +0100 Subject: block: blk-mq: Use swait -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz | BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:914 | in_atomic(): 1, irqs_disabled(): 0, pid: 255, name: kworker/u257:6 @@ -46,16 +46,16 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/block/blk-core.c +++ b/block/blk-core.c -@@ -678,7 +678,7 @@ int blk_queue_enter(struct request_queue - if (nowait) - return -EBUSY; +@@ -783,7 +783,7 @@ int blk_queue_enter(struct request_queue + */ + smp_rmb(); - ret = wait_event_interruptible(q->mq_freeze_wq, + ret = swait_event_interruptible(q->mq_freeze_wq, !atomic_read(&q->mq_freeze_depth) || blk_queue_dying(q)); if (blk_queue_dying(q)) -@@ -698,7 +698,7 @@ static void blk_queue_usage_counter_rele +@@ -803,7 +803,7 @@ static void blk_queue_usage_counter_rele struct request_queue *q = container_of(ref, struct request_queue, q_usage_counter); @@ -64,7 +64,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static void blk_rq_timed_out_timer(unsigned long data) -@@ -766,7 +766,7 @@ struct request_queue *blk_alloc_queue_no +@@ -878,7 +878,7 @@ struct request_queue *blk_alloc_queue_no q->bypass_depth = 1; __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); @@ -75,7 +75,7 @@ Signed-off-by: Sebastian Andrzej Siewior * Init percpu_ref in atomic mode so that it's faster to shutdown. --- a/block/blk-mq.c +++ b/block/blk-mq.c -@@ -79,14 +79,14 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_st +@@ -132,14 +132,14 @@ EXPORT_SYMBOL_GPL(blk_freeze_queue_start void blk_mq_freeze_queue_wait(struct request_queue *q) { @@ -92,7 +92,7 @@ Signed-off-by: Sebastian Andrzej Siewior percpu_ref_is_zero(&q->q_usage_counter), timeout); } -@@ -127,7 +127,7 @@ void blk_mq_unfreeze_queue(struct reques +@@ -180,7 +180,7 @@ void blk_mq_unfreeze_queue(struct reques WARN_ON_ONCE(freeze_depth < 0); if (!freeze_depth) { percpu_ref_reinit(&q->q_usage_counter); @@ -101,7 +101,7 @@ Signed-off-by: Sebastian Andrzej Siewior } } EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); -@@ -173,7 +173,7 @@ void blk_mq_wake_waiters(struct request_ +@@ -261,7 +261,7 @@ void blk_mq_wake_waiters(struct request_ * dying, we need to ensure that processes currently waiting on * the queue are notified as well. */ @@ -112,7 +112,7 @@ Signed-off-by: Sebastian Andrzej Siewior bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h -@@ -566,7 +566,7 @@ struct request_queue { +@@ -581,7 +581,7 @@ struct request_queue { struct throtl_data *td; #endif struct rcu_head rcu_head; diff --git a/debian/patches/features/all/rt/block-mq-don-t-complete-requests-via-IPI.patch b/debian/patches/features/all/rt/block-mq-don-t-complete-requests-via-IPI.patch index deaf13f6a..e7e9b47cb 100644 --- a/debian/patches/features/all/rt/block-mq-don-t-complete-requests-via-IPI.patch +++ b/debian/patches/features/all/rt/block-mq-don-t-complete-requests-via-IPI.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 29 Jan 2015 15:10:08 +0100 Subject: block/mq: don't complete requests via IPI -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The IPI runs in hardirq context and there are sleeping locks. This patch moves the completion into a workqueue. @@ -9,10 +9,10 @@ moves the completion into a workqueue. Signed-off-by: Sebastian Andrzej Siewior --- block/blk-core.c | 3 +++ - block/blk-mq.c | 24 ++++++++++++++++++++++++ + block/blk-mq.c | 23 +++++++++++++++++++++++ include/linux/blk-mq.h | 2 +- include/linux/blkdev.h | 3 +++ - 4 files changed, 31 insertions(+), 1 deletion(-) + 4 files changed, 30 insertions(+), 1 deletion(-) --- a/block/blk-core.c +++ b/block/blk-core.c @@ -28,8 +28,8 @@ Signed-off-by: Sebastian Andrzej Siewior rq->__sector = (sector_t) -1; --- a/block/blk-mq.c +++ b/block/blk-mq.c -@@ -213,6 +213,9 @@ void blk_mq_rq_ctx_init(struct request_q - rq->errors = 0; +@@ -318,6 +318,9 @@ static struct request *blk_mq_rq_ctx_ini + /* tag was already set */ rq->extra_len = 0; +#ifdef CONFIG_PREEMPT_RT_FULL @@ -38,7 +38,7 @@ Signed-off-by: Sebastian Andrzej Siewior INIT_LIST_HEAD(&rq->timeout_list); rq->timeout = 0; -@@ -395,6 +398,17 @@ void blk_mq_end_request(struct request * +@@ -512,12 +515,24 @@ void blk_mq_end_request(struct request * } EXPORT_SYMBOL(blk_mq_end_request); @@ -56,16 +56,14 @@ Signed-off-by: Sebastian Andrzej Siewior static void __blk_mq_complete_request_remote(void *data) { struct request *rq = data; -@@ -402,6 +416,8 @@ static void __blk_mq_complete_request_re + rq->q->softirq_done_fn(rq); } - +#endif -+ - static void blk_mq_ipi_complete_request(struct request *rq) + + static void __blk_mq_complete_request(struct request *rq) { - struct blk_mq_ctx *ctx = rq->mq_ctx; -@@ -418,10 +434,18 @@ static void blk_mq_ipi_complete_request( +@@ -542,10 +557,18 @@ static void __blk_mq_complete_request(st shared = cpus_share_cache(cpu, ctx->cpu); if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) { @@ -86,7 +84,7 @@ Signed-off-by: Sebastian Andrzej Siewior } --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h -@@ -218,7 +218,7 @@ static inline u16 blk_mq_unique_tag_to_t +@@ -226,7 +226,7 @@ static inline u16 blk_mq_unique_tag_to_t return unique_tag & BLK_MQ_UNIQUE_TAG_MASK; } @@ -94,10 +92,10 @@ Signed-off-by: Sebastian Andrzej Siewior +void __blk_mq_complete_request_remote_work(struct work_struct *work); int blk_mq_request_started(struct request *rq); void blk_mq_start_request(struct request *rq); - void blk_mq_end_request(struct request *rq, int error); + void blk_mq_end_request(struct request *rq, blk_status_t error); --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h -@@ -128,6 +128,9 @@ typedef __u32 __bitwise req_flags_t; +@@ -134,6 +134,9 @@ typedef __u32 __bitwise req_flags_t; */ struct request { struct list_head queuelist; @@ -105,5 +103,5 @@ Signed-off-by: Sebastian Andrzej Siewior + struct work_struct work; +#endif union { - struct call_single_data csd; + call_single_data_t csd; u64 fifo_time; diff --git a/debian/patches/features/all/rt/block-mq-drop-preempt-disable.patch b/debian/patches/features/all/rt/block-mq-drop-preempt-disable.patch index 36613bb80..d1b7d0d35 100644 --- a/debian/patches/features/all/rt/block-mq-drop-preempt-disable.patch +++ b/debian/patches/features/all/rt/block-mq-drop-preempt-disable.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Tue, 14 Jul 2015 14:26:34 +0200 Subject: block/mq: do not invoke preempt_disable() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz preempt_disable() and get_cpu() don't play well together with the sleeping locks it tries to allocate later. @@ -14,7 +14,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/block/blk-mq.c +++ b/block/blk-mq.c -@@ -413,7 +413,7 @@ static void blk_mq_ipi_complete_request( +@@ -537,7 +537,7 @@ static void __blk_mq_complete_request(st return; } @@ -23,7 +23,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags)) shared = cpus_share_cache(cpu, ctx->cpu); -@@ -425,7 +425,7 @@ static void blk_mq_ipi_complete_request( +@@ -549,7 +549,7 @@ static void __blk_mq_complete_request(st } else { rq->q->softirq_done_fn(rq); } @@ -31,8 +31,8 @@ Signed-off-by: Sebastian Andrzej Siewior + put_cpu_light(); } - static void blk_mq_stat_add(struct request *rq) -@@ -1143,14 +1143,14 @@ static void __blk_mq_delay_run_hw_queue( + /** +@@ -1197,14 +1197,14 @@ static void __blk_mq_delay_run_hw_queue( return; if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) { @@ -49,4 +49,4 @@ Signed-off-by: Sebastian Andrzej Siewior + put_cpu_light(); } - if (msecs == 0) + kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx), diff --git a/debian/patches/features/all/rt/block-mq-use-cpu_light.patch b/debian/patches/features/all/rt/block-mq-use-cpu_light.patch index 964b162a4..fdc85efee 100644 --- a/debian/patches/features/all/rt/block-mq-use-cpu_light.patch +++ b/debian/patches/features/all/rt/block-mq-use-cpu_light.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 9 Apr 2014 10:37:23 +0200 Subject: block: mq: use cpu_light() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz there is a might sleep splat because get_cpu() disables preemption and later we grab a lock. As a workaround for this we use get_cpu_light(). @@ -13,7 +13,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/block/blk-mq.h +++ b/block/blk-mq.h -@@ -130,12 +130,12 @@ static inline struct blk_mq_ctx *__blk_m +@@ -98,12 +98,12 @@ static inline struct blk_mq_ctx *__blk_m */ static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q) { diff --git a/debian/patches/features/all/rt/block-shorten-interrupt-disabled-regions.patch b/debian/patches/features/all/rt/block-shorten-interrupt-disabled-regions.patch index a742afb20..cb766cad3 100644 --- a/debian/patches/features/all/rt/block-shorten-interrupt-disabled-regions.patch +++ b/debian/patches/features/all/rt/block-shorten-interrupt-disabled-regions.patch @@ -1,7 +1,7 @@ Subject: block: Shorten interrupt disabled regions From: Thomas Gleixner Date: Wed, 22 Jun 2011 19:47:02 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Moving the blk_sched_flush_plug() call out of the interrupt/preempt disabled region in the scheduler allows us to replace @@ -48,7 +48,7 @@ Link: http://lkml.kernel.org/r/20110622174919.025446432@linutronix.de --- a/block/blk-core.c +++ b/block/blk-core.c -@@ -3186,7 +3186,7 @@ static void queue_unplugged(struct reque +@@ -3288,7 +3288,7 @@ static void queue_unplugged(struct reque blk_run_queue_async(q); else __blk_run_queue(q); @@ -57,7 +57,7 @@ Link: http://lkml.kernel.org/r/20110622174919.025446432@linutronix.de } static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule) -@@ -3234,7 +3234,6 @@ EXPORT_SYMBOL(blk_check_plugged); +@@ -3336,7 +3336,6 @@ EXPORT_SYMBOL(blk_check_plugged); void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) { struct request_queue *q; @@ -65,7 +65,7 @@ Link: http://lkml.kernel.org/r/20110622174919.025446432@linutronix.de struct request *rq; LIST_HEAD(list); unsigned int depth; -@@ -3254,11 +3253,6 @@ void blk_flush_plug_list(struct blk_plug +@@ -3356,11 +3355,6 @@ void blk_flush_plug_list(struct blk_plug q = NULL; depth = 0; @@ -77,7 +77,7 @@ Link: http://lkml.kernel.org/r/20110622174919.025446432@linutronix.de while (!list_empty(&list)) { rq = list_entry_rq(list.next); list_del_init(&rq->queuelist); -@@ -3271,7 +3265,7 @@ void blk_flush_plug_list(struct blk_plug +@@ -3373,7 +3367,7 @@ void blk_flush_plug_list(struct blk_plug queue_unplugged(q, depth, from_schedule); q = rq->q; depth = 0; @@ -86,7 +86,7 @@ Link: http://lkml.kernel.org/r/20110622174919.025446432@linutronix.de } /* -@@ -3298,8 +3292,6 @@ void blk_flush_plug_list(struct blk_plug +@@ -3400,8 +3394,6 @@ void blk_flush_plug_list(struct blk_plug */ if (q) queue_unplugged(q, depth, from_schedule); diff --git a/debian/patches/features/all/rt/block-use-cpu-chill.patch b/debian/patches/features/all/rt/block-use-cpu-chill.patch index 182c6fad6..4615f6ff7 100644 --- a/debian/patches/features/all/rt/block-use-cpu-chill.patch +++ b/debian/patches/features/all/rt/block-use-cpu-chill.patch @@ -1,7 +1,7 @@ Subject: block: Use cpu_chill() for retry loops From: Thomas Gleixner Date: Thu, 20 Dec 2012 18:28:26 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Retry loops on RT might loop forever when the modifying side was preempted. Steven also observed a live lock when there was a @@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner --- a/block/blk-ioc.c +++ b/block/blk-ioc.c -@@ -8,6 +8,7 @@ +@@ -9,6 +9,7 @@ #include #include #include @@ -26,7 +26,7 @@ Signed-off-by: Thomas Gleixner #include "blk.h" -@@ -117,7 +118,7 @@ static void ioc_release_fn(struct work_s +@@ -118,7 +119,7 @@ static void ioc_release_fn(struct work_s spin_unlock(q->queue_lock); } else { spin_unlock_irqrestore(&ioc->lock, flags); @@ -35,7 +35,7 @@ Signed-off-by: Thomas Gleixner spin_lock_irqsave_nested(&ioc->lock, flags, 1); } } -@@ -201,7 +202,7 @@ void put_io_context_active(struct io_con +@@ -202,7 +203,7 @@ void put_io_context_active(struct io_con spin_unlock(icq->q->queue_lock); } else { spin_unlock_irqrestore(&ioc->lock, flags); diff --git a/debian/patches/features/all/rt/bug-rt-dependend-variants.patch b/debian/patches/features/all/rt/bug-rt-dependend-variants.patch index 047c1929a..4de442e4f 100644 --- a/debian/patches/features/all/rt/bug-rt-dependend-variants.patch +++ b/debian/patches/features/all/rt/bug-rt-dependend-variants.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:29:58 -0500 Subject: bug: BUG_ON/WARN_ON variants dependend on RT/!RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Introduce RT/NON-RT WARN/BUG statements to avoid ifdefs in the code. @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h -@@ -215,6 +215,20 @@ void __warn(const char *file, int line, +@@ -233,6 +233,20 @@ void __warn(const char *file, int line, # define WARN_ON_SMP(x) ({0;}) #endif diff --git a/debian/patches/features/all/rt/cgroups-scheduling-while-atomic-in-cgroup-code.patch b/debian/patches/features/all/rt/cgroups-scheduling-while-atomic-in-cgroup-code.patch index 279e7263a..22b0b5cb5 100644 --- a/debian/patches/features/all/rt/cgroups-scheduling-while-atomic-in-cgroup-code.patch +++ b/debian/patches/features/all/rt/cgroups-scheduling-while-atomic-in-cgroup-code.patch @@ -1,7 +1,7 @@ From: Mike Galbraith Date: Sat, 21 Jun 2014 10:09:48 +0200 Subject: memcontrol: Prevent scheduling while atomic in cgroup code -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz mm, memcg: make refill_stock() use get_cpu_light() @@ -43,7 +43,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/mm/memcontrol.c +++ b/mm/memcontrol.c -@@ -1685,6 +1685,7 @@ struct memcg_stock_pcp { +@@ -1723,6 +1723,7 @@ struct memcg_stock_pcp { #define FLUSHING_CACHED_CHARGE 0 }; static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock); @@ -51,7 +51,7 @@ Signed-off-by: Sebastian Andrzej Siewior static DEFINE_MUTEX(percpu_charge_mutex); /** -@@ -1707,7 +1708,7 @@ static bool consume_stock(struct mem_cgr +@@ -1745,7 +1746,7 @@ static bool consume_stock(struct mem_cgr if (nr_pages > CHARGE_BATCH) return ret; @@ -60,7 +60,7 @@ Signed-off-by: Sebastian Andrzej Siewior stock = this_cpu_ptr(&memcg_stock); if (memcg == stock->cached && stock->nr_pages >= nr_pages) { -@@ -1715,7 +1716,7 @@ static bool consume_stock(struct mem_cgr +@@ -1753,7 +1754,7 @@ static bool consume_stock(struct mem_cgr ret = true; } @@ -69,10 +69,10 @@ Signed-off-by: Sebastian Andrzej Siewior return ret; } -@@ -1742,13 +1743,13 @@ static void drain_local_stock(struct wor - struct memcg_stock_pcp *stock; - unsigned long flags; - +@@ -1784,13 +1785,13 @@ static void drain_local_stock(struct wor + * The only protection from memory hotplug vs. drain_stock races is + * that we always operate on local CPU stock here with IRQ disabled + */ - local_irq_save(flags); + local_lock_irqsave(memcg_stock_ll, flags); @@ -85,7 +85,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /* -@@ -1760,7 +1761,7 @@ static void refill_stock(struct mem_cgro +@@ -1802,7 +1803,7 @@ static void refill_stock(struct mem_cgro struct memcg_stock_pcp *stock; unsigned long flags; @@ -94,9 +94,9 @@ Signed-off-by: Sebastian Andrzej Siewior stock = this_cpu_ptr(&memcg_stock); if (stock->cached != memcg) { /* reset if necessary */ -@@ -1769,7 +1770,7 @@ static void refill_stock(struct mem_cgro - } - stock->nr_pages += nr_pages; +@@ -1814,7 +1815,7 @@ static void refill_stock(struct mem_cgro + if (stock->nr_pages > CHARGE_BATCH) + drain_stock(stock); - local_irq_restore(flags); + local_unlock_irqrestore(memcg_stock_ll, flags); diff --git a/debian/patches/features/all/rt/cgroups-use-simple-wait-in-css_release.patch b/debian/patches/features/all/rt/cgroups-use-simple-wait-in-css_release.patch index 50b5459aa..657769bf9 100644 --- a/debian/patches/features/all/rt/cgroups-use-simple-wait-in-css_release.patch +++ b/debian/patches/features/all/rt/cgroups-use-simple-wait-in-css_release.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Fri, 13 Feb 2015 15:52:24 +0100 Subject: cgroups: use simple wait in css_release() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz To avoid: |BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:914 @@ -35,7 +35,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h -@@ -17,6 +17,7 @@ +@@ -19,6 +19,7 @@ #include #include #include @@ -43,17 +43,17 @@ Signed-off-by: Sebastian Andrzej Siewior #ifdef CONFIG_CGROUPS -@@ -139,6 +140,7 @@ struct cgroup_subsys_state { +@@ -152,6 +153,7 @@ struct cgroup_subsys_state { /* percpu_ref killing and RCU release */ struct rcu_head rcu_head; struct work_struct destroy_work; + struct swork_event destroy_swork; - }; - /* + /* + * PI: the parent css. Placed here for cache proximity to following --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c -@@ -3895,10 +3895,10 @@ static void css_free_rcu_fn(struct rcu_h +@@ -4500,10 +4500,10 @@ static void css_free_rcu_fn(struct rcu_h queue_work(cgroup_destroy_wq, &css->destroy_work); } @@ -66,7 +66,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct cgroup_subsys *ss = css->ss; struct cgroup *cgrp = css->cgroup; -@@ -3943,8 +3943,8 @@ static void css_release(struct percpu_re +@@ -4554,8 +4554,8 @@ static void css_release(struct percpu_re struct cgroup_subsys_state *css = container_of(ref, struct cgroup_subsys_state, refcnt); @@ -77,7 +77,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static void init_and_link_css(struct cgroup_subsys_state *css, -@@ -4601,6 +4601,7 @@ static int __init cgroup_wq_init(void) +@@ -5261,6 +5261,7 @@ static int __init cgroup_wq_init(void) */ cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); BUG_ON(!cgroup_destroy_wq); diff --git a/debian/patches/features/all/rt/char-random-don-t-print-that-the-init-is-done.patch b/debian/patches/features/all/rt/char-random-don-t-print-that-the-init-is-done.patch index c76d6d695..0e933ea5f 100644 --- a/debian/patches/features/all/rt/char-random-don-t-print-that-the-init-is-done.patch +++ b/debian/patches/features/all/rt/char-random-don-t-print-that-the-init-is-done.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Tue, 30 May 2017 16:39:01 +0200 Subject: char/random: don't print that the init is done -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz On RT we run into circular locking with pendingb_lock (workqueue), port_lock_key (uart) and the primary_crng (random): diff --git a/debian/patches/features/all/rt/clockevents-drivers-timer-atmel-pit-fix-double-free_.patch b/debian/patches/features/all/rt/clockevents-drivers-timer-atmel-pit-fix-double-free_.patch index 9c1efd323..bf0138afb 100644 --- a/debian/patches/features/all/rt/clockevents-drivers-timer-atmel-pit-fix-double-free_.patch +++ b/debian/patches/features/all/rt/clockevents-drivers-timer-atmel-pit-fix-double-free_.patch @@ -1,7 +1,7 @@ From: Alexandre Belloni Date: Thu, 17 Mar 2016 21:09:43 +0100 Subject: [PATCH] clockevents/drivers/timer-atmel-pit: fix double free_irq -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz clockevents_exchange_device() changes the state from detached to shutdown and so at that point the IRQ has not yet been requested. diff --git a/debian/patches/features/all/rt/clocksource-tclib-allow-higher-clockrates.patch b/debian/patches/features/all/rt/clocksource-tclib-allow-higher-clockrates.patch index 0d731aedc..45ff5f515 100644 --- a/debian/patches/features/all/rt/clocksource-tclib-allow-higher-clockrates.patch +++ b/debian/patches/features/all/rt/clocksource-tclib-allow-higher-clockrates.patch @@ -1,7 +1,7 @@ From: Benedikt Spranger Date: Mon, 8 Mar 2010 18:57:04 +0100 Subject: clocksource: TCLIB: Allow higher clock rates for clock events -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz As default the TCLIB uses the 32KiHz base clock rate for clock events. Add a compile time selection to allow higher clock resulution. @@ -17,7 +17,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/clocksource/tcb_clksrc.c +++ b/drivers/clocksource/tcb_clksrc.c -@@ -23,8 +23,7 @@ +@@ -25,8 +25,7 @@ * this 32 bit free-running counter. the second channel is not used. * * - The third channel may be used to provide a 16-bit clockevent @@ -27,7 +27,7 @@ Signed-off-by: Thomas Gleixner * * A boot clocksource and clockevent source are also currently needed, * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so -@@ -75,6 +74,7 @@ struct tc_clkevt_device { +@@ -127,6 +126,7 @@ struct tc_clkevt_device { struct clock_event_device clkevt; struct clk *clk; bool clk_enabled; @@ -35,7 +35,7 @@ Signed-off-by: Thomas Gleixner void __iomem *regs; }; -@@ -83,13 +83,6 @@ static struct tc_clkevt_device *to_tc_cl +@@ -135,13 +135,6 @@ static struct tc_clkevt_device *to_tc_cl return container_of(clkevt, struct tc_clkevt_device, clkevt); } @@ -49,29 +49,29 @@ Signed-off-by: Thomas Gleixner static u32 timer_clock; static void tc_clk_disable(struct clock_event_device *d) -@@ -139,7 +132,7 @@ static int tc_set_oneshot(struct clock_e +@@ -191,7 +184,7 @@ static int tc_set_oneshot(struct clock_e tc_clk_enable(d); - /* slow clock, count up to RC, then irq and stop */ + /* count up to RC, then irq and stop */ - __raw_writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE | + writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR)); - __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER)); -@@ -161,10 +154,10 @@ static int tc_set_periodic(struct clock_ + writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER)); +@@ -213,10 +206,10 @@ static int tc_set_periodic(struct clock_ */ tc_clk_enable(d); - /* slow clock, count up to RC, then irq and restart */ + /* count up to RC, then irq and restart */ - __raw_writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO, + writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR)); -- __raw_writel((32768 + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC)); -+ __raw_writel((tcd->freq + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC)); +- writel((32768 + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC)); ++ writel((tcd->freq + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC)); /* Enable clock and interrupts on RC compare */ - __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER)); -@@ -191,7 +184,11 @@ static struct tc_clkevt_device clkevt = + writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER)); +@@ -243,7 +236,11 @@ static struct tc_clkevt_device clkevt = .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, /* Should be lower than at91rm9200's system timer */ @@ -83,7 +83,7 @@ Signed-off-by: Thomas Gleixner .set_next_event = tc_next_event, .set_state_shutdown = tc_shutdown_clk_off, .set_state_periodic = tc_set_periodic, -@@ -213,8 +210,9 @@ static irqreturn_t ch2_irq(int irq, void +@@ -265,8 +262,9 @@ static irqreturn_t ch2_irq(int irq, void return IRQ_NONE; } @@ -94,7 +94,7 @@ Signed-off-by: Thomas Gleixner int ret; struct clk *t2_clk = tc->clk[2]; int irq = tc->irq[2]; -@@ -235,7 +233,11 @@ static int __init setup_clkevents(struct +@@ -287,7 +285,11 @@ static int __init setup_clkevents(struct clkevt.regs = tc->regs; clkevt.clk = t2_clk; @@ -107,7 +107,7 @@ Signed-off-by: Thomas Gleixner clkevt.clkevt.cpumask = cpumask_of(0); -@@ -246,7 +248,7 @@ static int __init setup_clkevents(struct +@@ -298,7 +300,7 @@ static int __init setup_clkevents(struct return ret; } @@ -116,7 +116,7 @@ Signed-off-by: Thomas Gleixner return ret; } -@@ -383,7 +385,11 @@ static int __init tcb_clksrc_init(void) +@@ -435,7 +437,11 @@ static int __init tcb_clksrc_init(void) goto err_disable_t1; /* channel 2: periodic and oneshot timer support */ diff --git a/debian/patches/features/all/rt/completion-use-simple-wait-queues.patch b/debian/patches/features/all/rt/completion-use-simple-wait-queues.patch index 1d17ce606..ea4fd4aca 100644 --- a/debian/patches/features/all/rt/completion-use-simple-wait-queues.patch +++ b/debian/patches/features/all/rt/completion-use-simple-wait-queues.patch @@ -1,7 +1,7 @@ Subject: completion: Use simple wait queues From: Thomas Gleixner Date: Fri, 11 Jan 2013 11:23:51 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Completions have no long lasting callbacks and therefor do not need the complex waitqueue variant. Use simple waitqueues which reduces the @@ -12,19 +12,19 @@ Signed-off-by: Thomas Gleixner drivers/net/wireless/intersil/orinoco/orinoco_usb.c | 2 - drivers/usb/gadget/function/f_fs.c | 2 - drivers/usb/gadget/legacy/inode.c | 4 +- - include/linux/completion.h | 9 ++--- + include/linux/completion.h | 10 ++--- include/linux/suspend.h | 6 +++ include/linux/swait.h | 1 kernel/power/hibernate.c | 7 ++++ - kernel/power/suspend.c | 5 +++ - kernel/sched/completion.c | 32 ++++++++++---------- - kernel/sched/core.c | 10 +++++- - kernel/sched/swait.c | 20 ++++++++++++ - 11 files changed, 71 insertions(+), 27 deletions(-) + kernel/power/suspend.c | 4 ++ + kernel/sched/completion.c | 34 ++++++++++---------- + kernel/sched/core.c | 10 ++++- + kernel/sched/swait.c | 20 +++++++++++ + 11 files changed, 72 insertions(+), 28 deletions(-) --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c -@@ -696,7 +696,7 @@ static void ezusb_req_ctx_wait(struct ez +@@ -697,7 +697,7 @@ static void ezusb_req_ctx_wait(struct ez while (!ctx->done.done && msecs--) udelay(1000); } else { @@ -35,18 +35,18 @@ Signed-off-by: Thomas Gleixner break; --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c -@@ -1594,7 +1594,7 @@ static void ffs_data_put(struct ffs_data +@@ -1610,7 +1610,7 @@ static void ffs_data_put(struct ffs_data pr_info("%s(): freeing\n", __func__); ffs_data_clear(ffs); BUG_ON(waitqueue_active(&ffs->ev.waitq) || -- waitqueue_active(&ffs->ep0req_completion.wait)); -+ swait_active(&ffs->ep0req_completion.wait)); +- waitqueue_active(&ffs->ep0req_completion.wait) || ++ swait_active(&ffs->ep0req_completion.wait) || + waitqueue_active(&ffs->wait)); + destroy_workqueue(ffs->io_completion_wq); kfree(ffs->dev_name); - kfree(ffs); - } --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c -@@ -345,7 +345,7 @@ ep_io (struct ep_data *epdata, void *buf +@@ -347,7 +347,7 @@ ep_io (struct ep_data *epdata, void *buf spin_unlock_irq (&epdata->dev->lock); if (likely (value == 0)) { @@ -55,7 +55,7 @@ Signed-off-by: Thomas Gleixner if (value != 0) { spin_lock_irq (&epdata->dev->lock); if (likely (epdata->ep != NULL)) { -@@ -354,7 +354,7 @@ ep_io (struct ep_data *epdata, void *buf +@@ -356,7 +356,7 @@ ep_io (struct ep_data *epdata, void *buf usb_ep_dequeue (epdata->ep, epdata->req); spin_unlock_irq (&epdata->dev->lock); @@ -66,32 +66,40 @@ Signed-off-by: Thomas Gleixner } else { --- a/include/linux/completion.h +++ b/include/linux/completion.h -@@ -7,8 +7,7 @@ - * Atomic wait-for-completion handler data structures. +@@ -9,7 +9,7 @@ * See kernel/sched/completion.c for details. */ -- + -#include +#include - - /* - * struct completion - structure used to maintain state for a "completion" -@@ -24,11 +23,11 @@ + #ifdef CONFIG_LOCKDEP_COMPLETIONS + #include + #endif +@@ -28,7 +28,7 @@ */ struct completion { unsigned int done; - wait_queue_head_t wait; + struct swait_queue_head wait; - }; + #ifdef CONFIG_LOCKDEP_COMPLETIONS + struct lockdep_map_cross map; + #endif +@@ -67,11 +67,11 @@ static inline void complete_release_comm + #ifdef CONFIG_LOCKDEP_COMPLETIONS + #define COMPLETION_INITIALIZER(work) \ +- { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait), \ ++ { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait), \ + STATIC_CROSS_LOCKDEP_MAP_INIT("(complete)" #work, &(work)) } + #else #define COMPLETION_INITIALIZER(work) \ - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } + { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) } + #endif #define COMPLETION_INITIALIZER_ONSTACK(work) \ - ({ init_completion(&work); work; }) -@@ -73,7 +72,7 @@ struct completion { - static inline void init_completion(struct completion *x) +@@ -117,7 +117,7 @@ static inline void complete_release_comm + static inline void __init_completion(struct completion *x) { x->done = 0; - init_waitqueue_head(&x->wait); @@ -101,7 +109,7 @@ Signed-off-by: Thomas Gleixner /** --- a/include/linux/suspend.h +++ b/include/linux/suspend.h -@@ -193,6 +193,12 @@ struct platform_freeze_ops { +@@ -196,6 +196,12 @@ struct platform_s2idle_ops { void (*end)(void); }; @@ -112,11 +120,11 @@ Signed-off-by: Thomas Gleixner +#endif + #ifdef CONFIG_SUSPEND - /** - * suspend_set_ops - set platform dependent suspend operations + extern suspend_state_t mem_sleep_current; + extern suspend_state_t mem_sleep_default; --- a/include/linux/swait.h +++ b/include/linux/swait.h -@@ -87,6 +87,7 @@ static inline int swait_active(struct sw +@@ -147,6 +147,7 @@ static inline bool swq_has_sleeper(struc extern void swake_up(struct swait_queue_head *q); extern void swake_up_all(struct swait_queue_head *q); extern void swake_up_locked(struct swait_queue_head *q); @@ -146,17 +154,17 @@ Signed-off-by: Thomas Gleixner lock_system_sleep(); /* The snapshot device should not be opened while we're running */ if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { -@@ -769,6 +775,7 @@ int hibernate(void) +@@ -770,6 +776,7 @@ int hibernate(void) atomic_inc(&snapshot_device_available); Unlock: unlock_system_sleep(); + pm_in_action = false; - return error; - } + pr_info("hibernation exit\n"); + return error; --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c -@@ -546,6 +546,8 @@ static int enter_state(suspend_state_t s +@@ -593,6 +593,8 @@ static int enter_state(suspend_state_t s return error; } @@ -165,31 +173,35 @@ Signed-off-by: Thomas Gleixner /** * pm_suspend - Externally visible function for suspending the system. * @state: System sleep state to enter. -@@ -560,6 +562,8 @@ int pm_suspend(suspend_state_t state) +@@ -607,6 +609,7 @@ int pm_suspend(suspend_state_t state) if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX) return -EINVAL; + pm_in_action = true; -+ + pr_info("suspend entry (%s)\n", mem_sleep_labels[state]); error = enter_state(state); if (error) { - suspend_stats.fail++; -@@ -567,6 +571,7 @@ int pm_suspend(suspend_state_t state) - } else { +@@ -616,6 +619,7 @@ int pm_suspend(suspend_state_t state) suspend_stats.success++; } + pr_info("suspend exit\n"); + pm_in_action = false; return error; } EXPORT_SYMBOL(pm_suspend); --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c -@@ -31,11 +31,11 @@ void complete(struct completion *x) +@@ -32,7 +32,7 @@ void complete(struct completion *x) { unsigned long flags; - spin_lock_irqsave(&x->wait.lock, flags); + raw_spin_lock_irqsave(&x->wait.lock, flags); + + /* + * Perform commit of crossrelease here. +@@ -41,8 +41,8 @@ void complete(struct completion *x) + if (x->done != UINT_MAX) x->done++; - __wake_up_locked(&x->wait, TASK_NORMAL, 1); @@ -199,7 +211,7 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(complete); -@@ -52,10 +52,10 @@ void complete_all(struct completion *x) +@@ -66,10 +66,10 @@ void complete_all(struct completion *x) { unsigned long flags; @@ -213,14 +225,14 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(complete_all); -@@ -64,20 +64,20 @@ do_wait_for_common(struct completion *x, +@@ -78,20 +78,20 @@ do_wait_for_common(struct completion *x, long (*action)(long), long timeout, int state) { if (!x->done) { - DECLARE_WAITQUEUE(wait, current); + DECLARE_SWAITQUEUE(wait); -- __add_wait_queue_tail_exclusive(&x->wait, &wait); +- __add_wait_queue_entry_tail_exclusive(&x->wait, &wait); + __prepare_to_swait(&x->wait, &wait); do { if (signal_pending_state(state, current)) { @@ -239,19 +251,19 @@ Signed-off-by: Thomas Gleixner if (!x->done) return timeout; } -@@ -92,9 +92,9 @@ static inline long __sched - { - might_sleep(); +@@ -108,9 +108,9 @@ static inline long __sched + + complete_acquire(x); - spin_lock_irq(&x->wait.lock); + raw_spin_lock_irq(&x->wait.lock); timeout = do_wait_for_common(x, action, timeout, state); - spin_unlock_irq(&x->wait.lock); + raw_spin_unlock_irq(&x->wait.lock); - return timeout; - } -@@ -280,12 +280,12 @@ bool try_wait_for_completion(struct comp + complete_release(x); + +@@ -299,12 +299,12 @@ bool try_wait_for_completion(struct comp if (!READ_ONCE(x->done)) return 0; @@ -266,18 +278,20 @@ Signed-off-by: Thomas Gleixner return ret; } EXPORT_SYMBOL(try_wait_for_completion); -@@ -314,7 +314,7 @@ bool completion_done(struct completion * - * after it's acquired the lock. +@@ -330,8 +330,8 @@ bool completion_done(struct completion * + * otherwise we can end up freeing the completion before complete() + * is done referencing it. */ - smp_rmb(); -- spin_unlock_wait(&x->wait.lock); -+ raw_spin_unlock_wait(&x->wait.lock); +- spin_lock_irqsave(&x->wait.lock, flags); +- spin_unlock_irqrestore(&x->wait.lock, flags); ++ raw_spin_lock_irqsave(&x->wait.lock, flags); ++ raw_spin_unlock_irqrestore(&x->wait.lock, flags); return true; } EXPORT_SYMBOL(completion_done); --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -7487,7 +7487,10 @@ void migrate_disable(void) +@@ -6930,7 +6930,10 @@ void migrate_disable(void) return; } #ifdef CONFIG_SCHED_DEBUG @@ -289,7 +303,7 @@ Signed-off-by: Thomas Gleixner #endif if (p->migrate_disable) { -@@ -7520,7 +7523,10 @@ void migrate_enable(void) +@@ -6960,7 +6963,10 @@ void migrate_enable(void) } #ifdef CONFIG_SCHED_DEBUG @@ -303,14 +317,15 @@ Signed-off-by: Thomas Gleixner WARN_ON_ONCE(p->migrate_disable <= 0); --- a/kernel/sched/swait.c +++ b/kernel/sched/swait.c -@@ -1,5 +1,6 @@ +@@ -1,6 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0 #include #include +#include void __init_swait_queue_head(struct swait_queue_head *q, const char *name, struct lock_class_key *key) -@@ -29,6 +30,25 @@ void swake_up_locked(struct swait_queue_ +@@ -30,6 +31,25 @@ void swake_up_locked(struct swait_queue_ } EXPORT_SYMBOL(swake_up_locked); diff --git a/debian/patches/features/all/rt/cond-resched-lock-rt-tweak.patch b/debian/patches/features/all/rt/cond-resched-lock-rt-tweak.patch index 66bd19fdc..380ee1b82 100644 --- a/debian/patches/features/all/rt/cond-resched-lock-rt-tweak.patch +++ b/debian/patches/features/all/rt/cond-resched-lock-rt-tweak.patch @@ -1,7 +1,7 @@ Subject: sched: Use the proper LOCK_OFFSET for cond_resched() From: Thomas Gleixner Date: Sun, 17 Jul 2011 22:51:33 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz RT does not increment preempt count when a 'sleeping' spinlock is locked. Update PREEMPT_LOCK_OFFSET for that case. @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/preempt.h +++ b/include/linux/preempt.h -@@ -117,7 +117,11 @@ +@@ -118,7 +118,11 @@ /* * The preempt_count offset after spin_lock() */ diff --git a/debian/patches/features/all/rt/cond-resched-softirq-rt.patch b/debian/patches/features/all/rt/cond-resched-softirq-rt.patch index 60ceceb90..ef5907700 100644 --- a/debian/patches/features/all/rt/cond-resched-softirq-rt.patch +++ b/debian/patches/features/all/rt/cond-resched-softirq-rt.patch @@ -1,7 +1,7 @@ Subject: sched: Take RT softirq semantics into account in cond_resched() From: Thomas Gleixner Date: Thu, 14 Jul 2011 09:56:44 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The softirq semantics work different on -RT. There is no SOFTIRQ_MASK in the preemption counter which leads to the BUG_ON() statement in @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1509,12 +1509,16 @@ extern int __cond_resched_lock(spinlock_ +@@ -1606,12 +1606,16 @@ extern int __cond_resched_lock(spinlock_ __cond_resched_lock(lock); \ }) @@ -35,7 +35,7 @@ Signed-off-by: Thomas Gleixner { --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -5096,6 +5096,7 @@ int __cond_resched_lock(spinlock_t *lock +@@ -4946,6 +4946,7 @@ int __cond_resched_lock(spinlock_t *lock } EXPORT_SYMBOL(__cond_resched_lock); @@ -43,7 +43,7 @@ Signed-off-by: Thomas Gleixner int __sched __cond_resched_softirq(void) { BUG_ON(!in_softirq()); -@@ -5109,6 +5110,7 @@ int __sched __cond_resched_softirq(void) +@@ -4959,6 +4960,7 @@ int __sched __cond_resched_softirq(void) return 0; } EXPORT_SYMBOL(__cond_resched_softirq); diff --git a/debian/patches/features/all/rt/connector-cn_proc-Protect-send_msg-with-a-local-lock.patch b/debian/patches/features/all/rt/connector-cn_proc-Protect-send_msg-with-a-local-lock.patch index 8f3b0c42e..e09acbb03 100644 --- a/debian/patches/features/all/rt/connector-cn_proc-Protect-send_msg-with-a-local-lock.patch +++ b/debian/patches/features/all/rt/connector-cn_proc-Protect-send_msg-with-a-local-lock.patch @@ -2,7 +2,7 @@ From: Mike Galbraith Date: Sun, 16 Oct 2016 05:11:54 +0200 Subject: [PATCH] connector/cn_proc: Protect send_msg() with a local lock on RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz |BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:931 |in_atomic(): 1, irqs_disabled(): 0, pid: 31807, name: sleep diff --git a/debian/patches/features/all/rt/cpu-hotplug--Implement-CPU-pinning.patch b/debian/patches/features/all/rt/cpu-hotplug--Implement-CPU-pinning.patch new file mode 100644 index 000000000..43556a55c --- /dev/null +++ b/debian/patches/features/all/rt/cpu-hotplug--Implement-CPU-pinning.patch @@ -0,0 +1,111 @@ +Subject: cpu/hotplug: Implement CPU pinning +From: Thomas Gleixner +Date: Wed, 19 Jul 2017 17:31:20 +0200 +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Signed-off-by: Thomas Gleixner +--- + include/linux/sched.h | 1 + + kernel/cpu.c | 40 ++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 41 insertions(+) + +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -589,6 +589,7 @@ struct task_struct { + #if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP) + int migrate_disable; + int migrate_disable_update; ++ int pinned_on_cpu; + # ifdef CONFIG_SCHED_DEBUG + int migrate_disable_atomic; + # endif +--- a/kernel/cpu.c ++++ b/kernel/cpu.c +@@ -73,6 +73,11 @@ static DEFINE_PER_CPU(struct cpuhp_cpu_s + .fail = CPUHP_INVALID, + }; + ++#ifdef CONFIG_HOTPLUG_CPU ++static DEFINE_PER_CPU(struct rt_rw_lock, cpuhp_pin_lock) = \ ++ __RWLOCK_RT_INITIALIZER(cpuhp_pin_lock); ++#endif ++ + #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP) + static struct lockdep_map cpuhp_state_up_map = + STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map); +@@ -291,7 +296,30 @@ static int cpu_hotplug_disabled; + */ + void pin_current_cpu(void) + { ++ struct rt_rw_lock *cpuhp_pin; ++ unsigned int cpu; ++ int ret; + ++again: ++ cpuhp_pin = this_cpu_ptr(&cpuhp_pin_lock); ++ ret = __read_rt_trylock(cpuhp_pin); ++ if (ret) { ++ current->pinned_on_cpu = smp_processor_id(); ++ return; ++ } ++ cpu = smp_processor_id(); ++ preempt_lazy_enable(); ++ preempt_enable(); ++ ++ __read_rt_lock(cpuhp_pin); ++ ++ preempt_disable(); ++ preempt_lazy_disable(); ++ if (cpu != smp_processor_id()) { ++ __read_rt_unlock(cpuhp_pin); ++ goto again; ++ } ++ current->pinned_on_cpu = cpu; + } + + /** +@@ -299,6 +327,13 @@ void pin_current_cpu(void) + */ + void unpin_current_cpu(void) + { ++ struct rt_rw_lock *cpuhp_pin = this_cpu_ptr(&cpuhp_pin_lock); ++ ++ if (WARN_ON(current->pinned_on_cpu != smp_processor_id())) ++ cpuhp_pin = per_cpu_ptr(&cpuhp_pin_lock, current->pinned_on_cpu); ++ ++ current->pinned_on_cpu = -1; ++ __read_rt_unlock(cpuhp_pin); + } + + DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock); +@@ -768,6 +803,7 @@ static int take_cpu_down(void *_param) + + static int takedown_cpu(unsigned int cpu) + { ++ struct rt_rw_lock *cpuhp_pin = per_cpu_ptr(&cpuhp_pin_lock, cpu); + struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); + int err; + +@@ -781,11 +817,14 @@ static int takedown_cpu(unsigned int cpu + */ + irq_lock_sparse(); + ++ __write_rt_lock(cpuhp_pin); ++ + /* + * So now all preempt/rcu users must observe !cpu_active(). + */ + err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu)); + if (err) { ++ __write_rt_unlock(cpuhp_pin); + /* CPU refused to die */ + irq_unlock_sparse(); + /* Unpark the hotplug thread so we can rollback there */ +@@ -804,6 +843,7 @@ static int takedown_cpu(unsigned int cpu + wait_for_ap_thread(st, false); + BUG_ON(st->state != CPUHP_AP_IDLE_DEAD); + ++ __write_rt_unlock(cpuhp_pin); + /* Interrupts are moved away from the dying cpu, reenable alloc/free */ + irq_unlock_sparse(); + diff --git a/debian/patches/features/all/rt/cpu-hotplug-Document-why-PREEMPT_RT-uses-a-spinlock.patch b/debian/patches/features/all/rt/cpu-hotplug-Document-why-PREEMPT_RT-uses-a-spinlock.patch deleted file mode 100644 index ee6afb1ca..000000000 --- a/debian/patches/features/all/rt/cpu-hotplug-Document-why-PREEMPT_RT-uses-a-spinlock.patch +++ /dev/null @@ -1,56 +0,0 @@ -From: Steven Rostedt -Date: Thu, 5 Dec 2013 09:16:52 -0500 -Subject: cpu hotplug: Document why PREEMPT_RT uses a spinlock -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -The patch: - - cpu: Make hotplug.lock a "sleeping" spinlock on RT - - Tasks can block on hotplug.lock in pin_current_cpu(), but their - state might be != RUNNING. So the mutex wakeup will set the state - unconditionally to RUNNING. That might cause spurious unexpected - wakeups. We could provide a state preserving mutex_lock() function, - but this is semantically backwards. So instead we convert the - hotplug.lock() to a spinlock for RT, which has the state preserving - semantics already. - -Fixed a bug where the hotplug lock on PREEMPT_RT can be called after a -task set its state to TASK_UNINTERRUPTIBLE and before it called -schedule. If the hotplug_lock used a mutex, and there was contention, -the current task's state would be turned to TASK_RUNNABLE and the -schedule call will not sleep. This caused unexpected results. - -Although the patch had a description of the change, the code had no -comments about it. This causes confusion to those that review the code, -and as PREEMPT_RT is held in a quilt queue and not git, it's not as easy -to see why a change was made. Even if it was in git, the code should -still have a comment for something as subtle as this. - -Document the rational for using a spinlock on PREEMPT_RT in the hotplug -lock code. - -Reported-by: Nicholas Mc Guire -Signed-off-by: Steven Rostedt -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/cpu.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - ---- a/kernel/cpu.c -+++ b/kernel/cpu.c -@@ -255,6 +255,14 @@ struct hotplug_pcp { - int grab_lock; - struct completion synced; - #ifdef CONFIG_PREEMPT_RT_FULL -+ /* -+ * Note, on PREEMPT_RT, the hotplug lock must save the state of -+ * the task, otherwise the mutex will cause the task to fail -+ * to sleep when required. (Because it's called from migrate_disable()) -+ * -+ * The spinlock_t on PREEMPT_RT is a mutex that saves the task's -+ * state. -+ */ - spinlock_t lock; - #else - struct mutex mutex; diff --git a/debian/patches/features/all/rt/cpu-rt-make-hotplug-lock-a-sleeping-spinlock-on-rt.patch b/debian/patches/features/all/rt/cpu-rt-make-hotplug-lock-a-sleeping-spinlock-on-rt.patch deleted file mode 100644 index 5b393849b..000000000 --- a/debian/patches/features/all/rt/cpu-rt-make-hotplug-lock-a-sleeping-spinlock-on-rt.patch +++ /dev/null @@ -1,115 +0,0 @@ -Subject: cpu: Make hotplug.lock a "sleeping" spinlock on RT -From: Steven Rostedt -Date: Fri, 02 Mar 2012 10:36:57 -0500 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Tasks can block on hotplug.lock in pin_current_cpu(), but their state -might be != RUNNING. So the mutex wakeup will set the state -unconditionally to RUNNING. That might cause spurious unexpected -wakeups. We could provide a state preserving mutex_lock() function, -but this is semantically backwards. So instead we convert the -hotplug.lock() to a spinlock for RT, which has the state preserving -semantics already. - -Signed-off-by: Steven Rostedt -Cc: Carsten Emde -Cc: John Kacur -Cc: Peter Zijlstra -Cc: Clark Williams - -Link: http://lkml.kernel.org/r/1330702617.25686.265.camel@gandalf.stny.rr.com -Signed-off-by: Thomas Gleixner ---- - kernel/cpu.c | 32 +++++++++++++++++++++++++------- - 1 file changed, 25 insertions(+), 7 deletions(-) - ---- a/kernel/cpu.c -+++ b/kernel/cpu.c -@@ -205,10 +205,16 @@ static int cpu_hotplug_disabled; - - static struct { - struct task_struct *active_writer; -+ - /* wait queue to wake up the active_writer */ - wait_queue_head_t wq; -+#ifdef CONFIG_PREEMPT_RT_FULL -+ /* Makes the lock keep the task's state */ -+ spinlock_t lock; -+#else - /* verifies that no writer will get active while readers are active */ - struct mutex lock; -+#endif - /* - * Also blocks the new readers during - * an ongoing cpu hotplug operation. -@@ -221,12 +227,24 @@ static struct { - } cpu_hotplug = { - .active_writer = NULL, - .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq), -+#ifdef CONFIG_PREEMPT_RT_FULL -+ .lock = __SPIN_LOCK_UNLOCKED(cpu_hotplug.lock), -+#else - .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), -+#endif - #ifdef CONFIG_DEBUG_LOCK_ALLOC - .dep_map = STATIC_LOCKDEP_MAP_INIT("cpu_hotplug.dep_map", &cpu_hotplug.dep_map), - #endif - }; - -+#ifdef CONFIG_PREEMPT_RT_FULL -+# define hotplug_lock() rt_spin_lock__no_mg(&cpu_hotplug.lock) -+# define hotplug_unlock() rt_spin_unlock__no_mg(&cpu_hotplug.lock) -+#else -+# define hotplug_lock() mutex_lock(&cpu_hotplug.lock) -+# define hotplug_unlock() mutex_unlock(&cpu_hotplug.lock) -+#endif -+ - /* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */ - #define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map) - #define cpuhp_lock_acquire_tryread() \ -@@ -263,8 +281,8 @@ void pin_current_cpu(void) - return; - } - preempt_enable(); -- mutex_lock(&cpu_hotplug.lock); -- mutex_unlock(&cpu_hotplug.lock); -+ hotplug_lock(); -+ hotplug_unlock(); - preempt_disable(); - goto retry; - } -@@ -337,9 +355,9 @@ void get_online_cpus(void) - if (cpu_hotplug.active_writer == current) - return; - cpuhp_lock_acquire_read(); -- mutex_lock(&cpu_hotplug.lock); -+ hotplug_lock(); - atomic_inc(&cpu_hotplug.refcount); -- mutex_unlock(&cpu_hotplug.lock); -+ hotplug_unlock(); - } - EXPORT_SYMBOL_GPL(get_online_cpus); - -@@ -392,11 +410,11 @@ void cpu_hotplug_begin(void) - cpuhp_lock_acquire(); - - for (;;) { -- mutex_lock(&cpu_hotplug.lock); -+ hotplug_lock(); - prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE); - if (likely(!atomic_read(&cpu_hotplug.refcount))) - break; -- mutex_unlock(&cpu_hotplug.lock); -+ hotplug_unlock(); - schedule(); - } - finish_wait(&cpu_hotplug.wq, &wait); -@@ -405,7 +423,7 @@ void cpu_hotplug_begin(void) - void cpu_hotplug_done(void) - { - cpu_hotplug.active_writer = NULL; -- mutex_unlock(&cpu_hotplug.lock); -+ hotplug_unlock(); - cpuhp_lock_release(); - } - diff --git a/debian/patches/features/all/rt/cpu-rt-rework-cpu-down.patch b/debian/patches/features/all/rt/cpu-rt-rework-cpu-down.patch deleted file mode 100644 index 974dfdaaa..000000000 --- a/debian/patches/features/all/rt/cpu-rt-rework-cpu-down.patch +++ /dev/null @@ -1,526 +0,0 @@ -From: Steven Rostedt -Date: Mon, 16 Jul 2012 08:07:43 +0000 -Subject: cpu/rt: Rework cpu down for PREEMPT_RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Bringing a CPU down is a pain with the PREEMPT_RT kernel because -tasks can be preempted in many more places than in non-RT. In -order to handle per_cpu variables, tasks may be pinned to a CPU -for a while, and even sleep. But these tasks need to be off the CPU -if that CPU is going down. - -Several synchronization methods have been tried, but when stressed -they failed. This is a new approach. - -A sync_tsk thread is still created and tasks may still block on a -lock when the CPU is going down, but how that works is a bit different. -When cpu_down() starts, it will create the sync_tsk and wait on it -to inform that current tasks that are pinned on the CPU are no longer -pinned. But new tasks that are about to be pinned will still be allowed -to do so at this time. - -Then the notifiers are called. Several notifiers will bring down tasks -that will enter these locations. Some of these tasks will take locks -of other tasks that are on the CPU. If we don't let those other tasks -continue, but make them block until CPU down is done, the tasks that -the notifiers are waiting on will never complete as they are waiting -for the locks held by the tasks that are blocked. - -Thus we still let the task pin the CPU until the notifiers are done. -After the notifiers run, we then make new tasks entering the pinned -CPU sections grab a mutex and wait. This mutex is now a per CPU mutex -in the hotplug_pcp descriptor. - -To help things along, a new function in the scheduler code is created -called migrate_me(). This function will try to migrate the current task -off the CPU this is going down if possible. When the sync_tsk is created, -all tasks will then try to migrate off the CPU going down. There are -several cases that this wont work, but it helps in most cases. - -After the notifiers are called and if a task can't migrate off but enters -the pin CPU sections, it will be forced to wait on the hotplug_pcp mutex -until the CPU down is complete. Then the scheduler will force the migration -anyway. - -Also, I found that THREAD_BOUND need to also be accounted for in the -pinned CPU, and the migrate_disable no longer treats them special. -This helps fix issues with ksoftirqd and workqueue that unbind on CPU down. - -Signed-off-by: Steven Rostedt -Signed-off-by: Thomas Gleixner - ---- - include/linux/sched.h | 7 + - kernel/cpu.c | 236 +++++++++++++++++++++++++++++++++++++++++--------- - kernel/sched/core.c | 78 ++++++++++++++++ - 3 files changed, 280 insertions(+), 41 deletions(-) - ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -1342,6 +1342,10 @@ extern int task_can_attach(struct task_s - #ifdef CONFIG_SMP - extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); - extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); -+int migrate_me(void); -+void tell_sched_cpu_down_begin(int cpu); -+void tell_sched_cpu_down_done(int cpu); -+ - #else - static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) - { -@@ -1352,6 +1356,9 @@ static inline int set_cpus_allowed_ptr(s - return -EINVAL; - return 0; - } -+static inline int migrate_me(void) { return 0; } -+static inline void tell_sched_cpu_down_begin(int cpu) { } -+static inline void tell_sched_cpu_down_done(int cpu) { } - #endif - - #ifndef cpu_relax_yield ---- a/kernel/cpu.c -+++ b/kernel/cpu.c -@@ -205,16 +205,10 @@ static int cpu_hotplug_disabled; - - static struct { - struct task_struct *active_writer; -- - /* wait queue to wake up the active_writer */ - wait_queue_head_t wq; --#ifdef CONFIG_PREEMPT_RT_FULL -- /* Makes the lock keep the task's state */ -- spinlock_t lock; --#else - /* verifies that no writer will get active while readers are active */ - struct mutex lock; --#endif - /* - * Also blocks the new readers during - * an ongoing cpu hotplug operation. -@@ -227,24 +221,12 @@ static struct { - } cpu_hotplug = { - .active_writer = NULL, - .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq), --#ifdef CONFIG_PREEMPT_RT_FULL -- .lock = __SPIN_LOCK_UNLOCKED(cpu_hotplug.lock), --#else - .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), --#endif - #ifdef CONFIG_DEBUG_LOCK_ALLOC - .dep_map = STATIC_LOCKDEP_MAP_INIT("cpu_hotplug.dep_map", &cpu_hotplug.dep_map), - #endif - }; - --#ifdef CONFIG_PREEMPT_RT_FULL --# define hotplug_lock() rt_spin_lock__no_mg(&cpu_hotplug.lock) --# define hotplug_unlock() rt_spin_unlock__no_mg(&cpu_hotplug.lock) --#else --# define hotplug_lock() mutex_lock(&cpu_hotplug.lock) --# define hotplug_unlock() mutex_unlock(&cpu_hotplug.lock) --#endif -- - /* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */ - #define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map) - #define cpuhp_lock_acquire_tryread() \ -@@ -252,12 +234,42 @@ static struct { - #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) - #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) - -+/** -+ * hotplug_pcp - per cpu hotplug descriptor -+ * @unplug: set when pin_current_cpu() needs to sync tasks -+ * @sync_tsk: the task that waits for tasks to finish pinned sections -+ * @refcount: counter of tasks in pinned sections -+ * @grab_lock: set when the tasks entering pinned sections should wait -+ * @synced: notifier for @sync_tsk to tell cpu_down it's finished -+ * @mutex: the mutex to make tasks wait (used when @grab_lock is true) -+ * @mutex_init: zero if the mutex hasn't been initialized yet. -+ * -+ * Although @unplug and @sync_tsk may point to the same task, the @unplug -+ * is used as a flag and still exists after @sync_tsk has exited and -+ * @sync_tsk set to NULL. -+ */ - struct hotplug_pcp { - struct task_struct *unplug; -+ struct task_struct *sync_tsk; - int refcount; -+ int grab_lock; - struct completion synced; -+#ifdef CONFIG_PREEMPT_RT_FULL -+ spinlock_t lock; -+#else -+ struct mutex mutex; -+#endif -+ int mutex_init; - }; - -+#ifdef CONFIG_PREEMPT_RT_FULL -+# define hotplug_lock(hp) rt_spin_lock__no_mg(&(hp)->lock) -+# define hotplug_unlock(hp) rt_spin_unlock__no_mg(&(hp)->lock) -+#else -+# define hotplug_lock(hp) mutex_lock(&(hp)->mutex) -+# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex) -+#endif -+ - static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp); - - /** -@@ -271,18 +283,39 @@ static DEFINE_PER_CPU(struct hotplug_pcp - void pin_current_cpu(void) - { - struct hotplug_pcp *hp; -+ int force = 0; - - retry: - hp = this_cpu_ptr(&hotplug_pcp); - -- if (!hp->unplug || hp->refcount || preempt_count() > 1 || -+ if (!hp->unplug || hp->refcount || force || preempt_count() > 1 || - hp->unplug == current) { - hp->refcount++; - return; - } -- preempt_enable(); -- hotplug_lock(); -- hotplug_unlock(); -+ if (hp->grab_lock) { -+ preempt_enable(); -+ hotplug_lock(hp); -+ hotplug_unlock(hp); -+ } else { -+ preempt_enable(); -+ /* -+ * Try to push this task off of this CPU. -+ */ -+ if (!migrate_me()) { -+ preempt_disable(); -+ hp = this_cpu_ptr(&hotplug_pcp); -+ if (!hp->grab_lock) { -+ /* -+ * Just let it continue it's already pinned -+ * or about to sleep. -+ */ -+ force = 1; -+ goto retry; -+ } -+ preempt_enable(); -+ } -+ } - preempt_disable(); - goto retry; - } -@@ -303,26 +336,84 @@ void unpin_current_cpu(void) - wake_up_process(hp->unplug); - } - --/* -- * FIXME: Is this really correct under all circumstances ? -- */ -+static void wait_for_pinned_cpus(struct hotplug_pcp *hp) -+{ -+ set_current_state(TASK_UNINTERRUPTIBLE); -+ while (hp->refcount) { -+ schedule_preempt_disabled(); -+ set_current_state(TASK_UNINTERRUPTIBLE); -+ } -+} -+ - static int sync_unplug_thread(void *data) - { - struct hotplug_pcp *hp = data; - - preempt_disable(); - hp->unplug = current; -+ wait_for_pinned_cpus(hp); -+ -+ /* -+ * This thread will synchronize the cpu_down() with threads -+ * that have pinned the CPU. When the pinned CPU count reaches -+ * zero, we inform the cpu_down code to continue to the next step. -+ */ - set_current_state(TASK_UNINTERRUPTIBLE); -- while (hp->refcount) { -- schedule_preempt_disabled(); -+ preempt_enable(); -+ complete(&hp->synced); -+ -+ /* -+ * If all succeeds, the next step will need tasks to wait till -+ * the CPU is offline before continuing. To do this, the grab_lock -+ * is set and tasks going into pin_current_cpu() will block on the -+ * mutex. But we still need to wait for those that are already in -+ * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop() -+ * will kick this thread out. -+ */ -+ while (!hp->grab_lock && !kthread_should_stop()) { -+ schedule(); -+ set_current_state(TASK_UNINTERRUPTIBLE); -+ } -+ -+ /* Make sure grab_lock is seen before we see a stale completion */ -+ smp_mb(); -+ -+ /* -+ * Now just before cpu_down() enters stop machine, we need to make -+ * sure all tasks that are in pinned CPU sections are out, and new -+ * tasks will now grab the lock, keeping them from entering pinned -+ * CPU sections. -+ */ -+ if (!kthread_should_stop()) { -+ preempt_disable(); -+ wait_for_pinned_cpus(hp); -+ preempt_enable(); -+ complete(&hp->synced); -+ } -+ -+ set_current_state(TASK_UNINTERRUPTIBLE); -+ while (!kthread_should_stop()) { -+ schedule(); - set_current_state(TASK_UNINTERRUPTIBLE); - } - set_current_state(TASK_RUNNING); -- preempt_enable(); -- complete(&hp->synced); -+ -+ /* -+ * Force this thread off this CPU as it's going down and -+ * we don't want any more work on this CPU. -+ */ -+ current->flags &= ~PF_NO_SETAFFINITY; -+ do_set_cpus_allowed(current, cpu_present_mask); -+ migrate_me(); - return 0; - } - -+static void __cpu_unplug_sync(struct hotplug_pcp *hp) -+{ -+ wake_up_process(hp->sync_tsk); -+ wait_for_completion(&hp->synced); -+} -+ - /* - * Start the sync_unplug_thread on the target cpu and wait for it to - * complete. -@@ -330,23 +421,83 @@ static int sync_unplug_thread(void *data - static int cpu_unplug_begin(unsigned int cpu) - { - struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); -- struct task_struct *tsk; -+ int err; -+ -+ /* Protected by cpu_hotplug.lock */ -+ if (!hp->mutex_init) { -+#ifdef CONFIG_PREEMPT_RT_FULL -+ spin_lock_init(&hp->lock); -+#else -+ mutex_init(&hp->mutex); -+#endif -+ hp->mutex_init = 1; -+ } -+ -+ /* Inform the scheduler to migrate tasks off this CPU */ -+ tell_sched_cpu_down_begin(cpu); - - init_completion(&hp->synced); -- tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu); -- if (IS_ERR(tsk)) -- return (PTR_ERR(tsk)); -- kthread_bind(tsk, cpu); -- wake_up_process(tsk); -- wait_for_completion(&hp->synced); -+ -+ hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu); -+ if (IS_ERR(hp->sync_tsk)) { -+ err = PTR_ERR(hp->sync_tsk); -+ hp->sync_tsk = NULL; -+ return err; -+ } -+ kthread_bind(hp->sync_tsk, cpu); -+ -+ /* -+ * Wait for tasks to get out of the pinned sections, -+ * it's still OK if new tasks enter. Some CPU notifiers will -+ * wait for tasks that are going to enter these sections and -+ * we must not have them block. -+ */ -+ __cpu_unplug_sync(hp); -+ - return 0; - } - -+static void cpu_unplug_sync(unsigned int cpu) -+{ -+ struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); -+ -+ init_completion(&hp->synced); -+ /* The completion needs to be initialzied before setting grab_lock */ -+ smp_wmb(); -+ -+ /* Grab the mutex before setting grab_lock */ -+ hotplug_lock(hp); -+ hp->grab_lock = 1; -+ -+ /* -+ * The CPU notifiers have been completed. -+ * Wait for tasks to get out of pinned CPU sections and have new -+ * tasks block until the CPU is completely down. -+ */ -+ __cpu_unplug_sync(hp); -+ -+ /* All done with the sync thread */ -+ kthread_stop(hp->sync_tsk); -+ hp->sync_tsk = NULL; -+} -+ - static void cpu_unplug_done(unsigned int cpu) - { - struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); - - hp->unplug = NULL; -+ /* Let all tasks know cpu unplug is finished before cleaning up */ -+ smp_wmb(); -+ -+ if (hp->sync_tsk) -+ kthread_stop(hp->sync_tsk); -+ -+ if (hp->grab_lock) { -+ hotplug_unlock(hp); -+ /* protected by cpu_hotplug.lock */ -+ hp->grab_lock = 0; -+ } -+ tell_sched_cpu_down_done(cpu); - } - - void get_online_cpus(void) -@@ -355,9 +506,9 @@ void get_online_cpus(void) - if (cpu_hotplug.active_writer == current) - return; - cpuhp_lock_acquire_read(); -- hotplug_lock(); -+ mutex_lock(&cpu_hotplug.lock); - atomic_inc(&cpu_hotplug.refcount); -- hotplug_unlock(); -+ mutex_unlock(&cpu_hotplug.lock); - } - EXPORT_SYMBOL_GPL(get_online_cpus); - -@@ -410,11 +561,11 @@ void cpu_hotplug_begin(void) - cpuhp_lock_acquire(); - - for (;;) { -- hotplug_lock(); -+ mutex_lock(&cpu_hotplug.lock); - prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE); - if (likely(!atomic_read(&cpu_hotplug.refcount))) - break; -- hotplug_unlock(); -+ mutex_unlock(&cpu_hotplug.lock); - schedule(); - } - finish_wait(&cpu_hotplug.wq, &wait); -@@ -423,7 +574,7 @@ void cpu_hotplug_begin(void) - void cpu_hotplug_done(void) - { - cpu_hotplug.active_writer = NULL; -- hotplug_unlock(); -+ mutex_unlock(&cpu_hotplug.lock); - cpuhp_lock_release(); - } - -@@ -816,6 +967,9 @@ static int takedown_cpu(unsigned int cpu - kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread); - smpboot_park_threads(cpu); - -+ /* Notifiers are done. Don't let any more tasks pin this CPU. */ -+ cpu_unplug_sync(cpu); -+ - /* - * Prevent irq alloc/free while the dying cpu reorganizes the - * interrupt affinities. ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -1122,6 +1122,84 @@ void do_set_cpus_allowed(struct task_str - __do_set_cpus_allowed_tail(p, new_mask); - } - -+static DEFINE_PER_CPU(struct cpumask, sched_cpumasks); -+static DEFINE_MUTEX(sched_down_mutex); -+static cpumask_t sched_down_cpumask; -+ -+void tell_sched_cpu_down_begin(int cpu) -+{ -+ mutex_lock(&sched_down_mutex); -+ cpumask_set_cpu(cpu, &sched_down_cpumask); -+ mutex_unlock(&sched_down_mutex); -+} -+ -+void tell_sched_cpu_down_done(int cpu) -+{ -+ mutex_lock(&sched_down_mutex); -+ cpumask_clear_cpu(cpu, &sched_down_cpumask); -+ mutex_unlock(&sched_down_mutex); -+} -+ -+/** -+ * migrate_me - try to move the current task off this cpu -+ * -+ * Used by the pin_current_cpu() code to try to get tasks -+ * to move off the current CPU as it is going down. -+ * It will only move the task if the task isn't pinned to -+ * the CPU (with migrate_disable, affinity or NO_SETAFFINITY) -+ * and the task has to be in a RUNNING state. Otherwise the -+ * movement of the task will wake it up (change its state -+ * to running) when the task did not expect it. -+ * -+ * Returns 1 if it succeeded in moving the current task -+ * 0 otherwise. -+ */ -+int migrate_me(void) -+{ -+ struct task_struct *p = current; -+ struct migration_arg arg; -+ struct cpumask *cpumask; -+ const struct cpumask *mask; -+ unsigned int dest_cpu; -+ struct rq_flags rf; -+ struct rq *rq; -+ -+ /* -+ * We can not migrate tasks bounded to a CPU or tasks not -+ * running. The movement of the task will wake it up. -+ */ -+ if (p->flags & PF_NO_SETAFFINITY || p->state) -+ return 0; -+ -+ mutex_lock(&sched_down_mutex); -+ rq = task_rq_lock(p, &rf); -+ -+ cpumask = this_cpu_ptr(&sched_cpumasks); -+ mask = p->cpus_ptr; -+ -+ cpumask_andnot(cpumask, mask, &sched_down_cpumask); -+ -+ if (!cpumask_weight(cpumask)) { -+ /* It's only on this CPU? */ -+ task_rq_unlock(rq, p, &rf); -+ mutex_unlock(&sched_down_mutex); -+ return 0; -+ } -+ -+ dest_cpu = cpumask_any_and(cpu_active_mask, cpumask); -+ -+ arg.task = p; -+ arg.dest_cpu = dest_cpu; -+ -+ task_rq_unlock(rq, p, &rf); -+ -+ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); -+ tlb_migrate_finish(p->mm); -+ mutex_unlock(&sched_down_mutex); -+ -+ return 1; -+} -+ - /* - * Change a given task's CPU affinity. Migrate the thread to a - * proper CPU and schedule it away if the CPU it's executing on diff --git a/debian/patches/features/all/rt/cpu_chill-Add-a-UNINTERRUPTIBLE-hrtimer_nanosleep.patch b/debian/patches/features/all/rt/cpu_chill-Add-a-UNINTERRUPTIBLE-hrtimer_nanosleep.patch index a009b8dad..66f38a854 100644 --- a/debian/patches/features/all/rt/cpu_chill-Add-a-UNINTERRUPTIBLE-hrtimer_nanosleep.patch +++ b/debian/patches/features/all/rt/cpu_chill-Add-a-UNINTERRUPTIBLE-hrtimer_nanosleep.patch @@ -1,7 +1,7 @@ From: Steven Rostedt Date: Tue, 4 Mar 2014 12:28:32 -0500 Subject: cpu_chill: Add a UNINTERRUPTIBLE hrtimer_nanosleep -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz We hit another bug that was caused by switching cpu_chill() from msleep() to hrtimer_nanosleep(). @@ -29,20 +29,20 @@ Reported-by: Ulrich Obergfell Signed-off-by: Steven Rostedt Signed-off-by: Sebastian Andrzej Siewior --- - kernel/time/hrtimer.c | 25 ++++++++++++++++++------- - 1 file changed, 18 insertions(+), 7 deletions(-) + kernel/time/hrtimer.c | 24 +++++++++++++++++------- + 1 file changed, 17 insertions(+), 7 deletions(-) --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c -@@ -1601,12 +1601,13 @@ void hrtimer_init_sleeper(struct hrtimer +@@ -1739,12 +1739,13 @@ int nanosleep_copyout(struct restart_blo + return -ERESTART_RESTARTBLOCK; } - EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); -static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) +static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode, + unsigned long state) { - hrtimer_init_sleeper(t, current); + struct restart_block *restart; do { - set_current_state(TASK_INTERRUPTIBLE); @@ -50,57 +50,53 @@ Signed-off-by: Sebastian Andrzej Siewior hrtimer_start_expires(&t->timer, mode); if (likely(t->task)) -@@ -1648,7 +1649,8 @@ long __sched hrtimer_nanosleep_restart(s - HRTIMER_MODE_ABS); +@@ -1782,13 +1783,15 @@ static long __sched hrtimer_nanosleep_re + hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid, + HRTIMER_MODE_ABS, current); hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); - -- if (do_nanosleep(&t, HRTIMER_MODE_ABS)) +- ret = do_nanosleep(&t, HRTIMER_MODE_ABS); + /* cpu_chill() does not care about restart state. */ -+ if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE)) - goto out; - - rmtp = restart->nanosleep.rmtp; -@@ -1665,8 +1667,10 @@ long __sched hrtimer_nanosleep_restart(s ++ ret = do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE); + destroy_hrtimer_on_stack(&t.timer); return ret; } --long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, +-long hrtimer_nanosleep(const struct timespec64 *rqtp, - const enum hrtimer_mode mode, const clockid_t clockid) -+static long -+__hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, -+ const enum hrtimer_mode mode, const clockid_t clockid, -+ unsigned long state) ++static long __hrtimer_nanosleep(const struct timespec64 *rqtp, ++ const enum hrtimer_mode mode, const clockid_t clockid, ++ unsigned long state) { struct restart_block *restart; struct hrtimer_sleeper t; -@@ -1679,7 +1683,7 @@ long hrtimer_nanosleep(struct timespec * +@@ -1801,7 +1804,7 @@ long hrtimer_nanosleep(const struct time - hrtimer_init_on_stack(&t.timer, clockid, mode); - hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack); -- if (do_nanosleep(&t, mode)) -+ if (do_nanosleep(&t, mode, state)) + hrtimer_init_sleeper_on_stack(&t, clockid, mode, current); + hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack); +- ret = do_nanosleep(&t, mode); ++ ret = do_nanosleep(&t, mode, state); + if (ret != -ERESTART_RESTARTBLOCK) goto out; - /* Absolute timers do not update the rmtp value and restart: */ -@@ -1706,6 +1710,12 @@ long hrtimer_nanosleep(struct timespec * +@@ -1820,6 +1823,12 @@ long hrtimer_nanosleep(const struct time return ret; } -+long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, ++long hrtimer_nanosleep(const struct timespec64 *rqtp, + const enum hrtimer_mode mode, const clockid_t clockid) +{ -+ return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE); ++ return __hrtimer_nanosleep(rqtp, mode, clockid, TASK_INTERRUPTIBLE); +} + SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, struct timespec __user *, rmtp) { -@@ -1732,7 +1742,8 @@ void cpu_chill(void) +@@ -1867,7 +1876,8 @@ void cpu_chill(void) unsigned int freeze_flag = current->flags & PF_NOFREEZE; current->flags |= PF_NOFREEZE; -- hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC); -+ __hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC, +- hrtimer_nanosleep(&tu, HRTIMER_MODE_REL_HARD, CLOCK_MONOTONIC); ++ __hrtimer_nanosleep(&tu, HRTIMER_MODE_REL_HARD, CLOCK_MONOTONIC, + TASK_UNINTERRUPTIBLE); if (!freeze_flag) current->flags &= ~PF_NOFREEZE; diff --git a/debian/patches/features/all/rt/cpu_down_move_migrate_enable_back.patch b/debian/patches/features/all/rt/cpu_down_move_migrate_enable_back.patch deleted file mode 100644 index 939713a78..000000000 --- a/debian/patches/features/all/rt/cpu_down_move_migrate_enable_back.patch +++ /dev/null @@ -1,53 +0,0 @@ -From: Tiejun Chen -Subject: cpu_down: move migrate_enable() back -Date: Thu, 7 Nov 2013 10:06:07 +0800 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Commit 08c1ab68, "hotplug-use-migrate-disable.patch", intends to -use migrate_enable()/migrate_disable() to replace that combination -of preempt_enable() and preempt_disable(), but actually in -!CONFIG_PREEMPT_RT_FULL case, migrate_enable()/migrate_disable() -are still equal to preempt_enable()/preempt_disable(). So that -followed cpu_hotplug_begin()/cpu_unplug_begin(cpu) would go schedule() -to trigger schedule_debug() like this: - -_cpu_down() - | - + migrate_disable() = preempt_disable() - | - + cpu_hotplug_begin() or cpu_unplug_begin() - | - + schedule() - | - + __schedule() - | - + preempt_disable(); - | - + __schedule_bug() is true! - -So we should move migrate_enable() as the original scheme. - - -Signed-off-by: Tiejun Chen ---- - kernel/cpu.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/cpu.c -+++ b/kernel/cpu.c -@@ -1094,6 +1094,7 @@ static int __ref _cpu_down(unsigned int - goto restore_cpus; - } - -+ migrate_enable(); - cpu_hotplug_begin(); - ret = cpu_unplug_begin(cpu); - if (ret) { -@@ -1140,7 +1141,6 @@ static int __ref _cpu_down(unsigned int - cpu_unplug_done(cpu); - out_cancel: - cpu_hotplug_done(); -- migrate_enable(); - restore_cpus: - set_cpus_allowed_ptr(current, cpumask_org); - free_cpumask_var(cpumask_org); diff --git a/debian/patches/features/all/rt/cpu_pm-replace-raw_notifier-to-atomic_notifier.patch b/debian/patches/features/all/rt/cpu_pm-replace-raw_notifier-to-atomic_notifier.patch deleted file mode 100644 index fda4ad678..000000000 --- a/debian/patches/features/all/rt/cpu_pm-replace-raw_notifier-to-atomic_notifier.patch +++ /dev/null @@ -1,173 +0,0 @@ -From: Alex Shi -Date: Thu, 6 Jul 2017 16:47:46 +0800 -Subject: [PATCH] cpu_pm: replace raw_notifier to atomic_notifier -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -This patch replace a rwlock and raw notifier by atomic notifier which -protected by spin_lock and rcu. - -The first to reason to have this replace is due to a 'scheduling while - atomic' bug of RT kernel on arm/arm64 platform. On arm/arm64, rwlock -cpu_pm_notifier_lock in cpu_pm cause a potential schedule after irq -disable in idle call chain: - -cpu_startup_entry - cpu_idle_loop - local_irq_disable() - cpuidle_idle_call - call_cpuidle - cpuidle_enter - cpuidle_enter_state - ->enter :arm_enter_idle_state - cpu_pm_enter/exit - CPU_PM_CPU_IDLE_ENTER - read_lock(&cpu_pm_notifier_lock); <-- sleep in idle - __rt_spin_lock(); - schedule(); - -The kernel panic is here: -[ 4.609601] BUG: scheduling while atomic: swapper/1/0/0x00000002 -[ 4.609608] [] arm_enter_idle_state+0x18/0x70 -[ 4.609614] Modules linked in: -[ 4.609615] [] cpuidle_enter_state+0xf0/0x218 -[ 4.609620] [] cpuidle_enter+0x18/0x20 -[ 4.609626] Preemption disabled at: -[ 4.609627] [] call_cpuidle+0x24/0x40 -[ 4.609635] [] schedule_preempt_disabled+0x1c/0x28 -[ 4.609639] [] cpu_startup_entry+0x154/0x1f8 -[ 4.609645] [] secondary_start_kernel+0x15c/0x1a0 - -Daniel Lezcano said this notification is needed on arm/arm64 platforms. -Sebastian suggested using atomic_notifier instead of rwlock, which is not -only removing the sleeping in idle, but also getting better latency -improvement. - -This patch passed Fengguang's 0day testing. - -Signed-off-by: Alex Shi -Cc: Sebastian Andrzej Siewior -Cc: Thomas Gleixner -Cc: Anders Roxell -Cc: Rik van Riel -Cc: Steven Rostedt -Cc: Rafael J. Wysocki -Cc: Daniel Lezcano -Cc: linux-rt-users -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/cpu_pm.c | 43 ++++++------------------------------------- - 1 file changed, 6 insertions(+), 37 deletions(-) - ---- a/kernel/cpu_pm.c -+++ b/kernel/cpu_pm.c -@@ -22,14 +22,13 @@ - #include - #include - --static DEFINE_RWLOCK(cpu_pm_notifier_lock); --static RAW_NOTIFIER_HEAD(cpu_pm_notifier_chain); -+static ATOMIC_NOTIFIER_HEAD(cpu_pm_notifier_chain); - - static int cpu_pm_notify(enum cpu_pm_event event, int nr_to_call, int *nr_calls) - { - int ret; - -- ret = __raw_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL, -+ ret = __atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL, - nr_to_call, nr_calls); - - return notifier_to_errno(ret); -@@ -47,14 +46,7 @@ static int cpu_pm_notify(enum cpu_pm_eve - */ - int cpu_pm_register_notifier(struct notifier_block *nb) - { -- unsigned long flags; -- int ret; -- -- write_lock_irqsave(&cpu_pm_notifier_lock, flags); -- ret = raw_notifier_chain_register(&cpu_pm_notifier_chain, nb); -- write_unlock_irqrestore(&cpu_pm_notifier_lock, flags); -- -- return ret; -+ return atomic_notifier_chain_register(&cpu_pm_notifier_chain, nb); - } - EXPORT_SYMBOL_GPL(cpu_pm_register_notifier); - -@@ -69,14 +61,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_register_notifi - */ - int cpu_pm_unregister_notifier(struct notifier_block *nb) - { -- unsigned long flags; -- int ret; -- -- write_lock_irqsave(&cpu_pm_notifier_lock, flags); -- ret = raw_notifier_chain_unregister(&cpu_pm_notifier_chain, nb); -- write_unlock_irqrestore(&cpu_pm_notifier_lock, flags); -- -- return ret; -+ return atomic_notifier_chain_unregister(&cpu_pm_notifier_chain, nb); - } - EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier); - -@@ -100,7 +85,6 @@ int cpu_pm_enter(void) - int nr_calls; - int ret = 0; - -- read_lock(&cpu_pm_notifier_lock); - ret = cpu_pm_notify(CPU_PM_ENTER, -1, &nr_calls); - if (ret) - /* -@@ -108,7 +92,6 @@ int cpu_pm_enter(void) - * PM entry who are notified earlier to prepare for it. - */ - cpu_pm_notify(CPU_PM_ENTER_FAILED, nr_calls - 1, NULL); -- read_unlock(&cpu_pm_notifier_lock); - - return ret; - } -@@ -128,13 +111,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_enter); - */ - int cpu_pm_exit(void) - { -- int ret; -- -- read_lock(&cpu_pm_notifier_lock); -- ret = cpu_pm_notify(CPU_PM_EXIT, -1, NULL); -- read_unlock(&cpu_pm_notifier_lock); -- -- return ret; -+ return cpu_pm_notify(CPU_PM_EXIT, -1, NULL); - } - EXPORT_SYMBOL_GPL(cpu_pm_exit); - -@@ -159,7 +136,6 @@ int cpu_cluster_pm_enter(void) - int nr_calls; - int ret = 0; - -- read_lock(&cpu_pm_notifier_lock); - ret = cpu_pm_notify(CPU_CLUSTER_PM_ENTER, -1, &nr_calls); - if (ret) - /* -@@ -167,7 +143,6 @@ int cpu_cluster_pm_enter(void) - * PM entry who are notified earlier to prepare for it. - */ - cpu_pm_notify(CPU_CLUSTER_PM_ENTER_FAILED, nr_calls - 1, NULL); -- read_unlock(&cpu_pm_notifier_lock); - - return ret; - } -@@ -190,13 +165,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); - */ - int cpu_cluster_pm_exit(void) - { -- int ret; -- -- read_lock(&cpu_pm_notifier_lock); -- ret = cpu_pm_notify(CPU_CLUSTER_PM_EXIT, -1, NULL); -- read_unlock(&cpu_pm_notifier_lock); -- -- return ret; -+ return cpu_pm_notify(CPU_CLUSTER_PM_EXIT, -1, NULL); - } - EXPORT_SYMBOL_GPL(cpu_cluster_pm_exit); - diff --git a/debian/patches/features/all/rt/cpufreq-drop-K8-s-driver-from-beeing-selected.patch b/debian/patches/features/all/rt/cpufreq-drop-K8-s-driver-from-beeing-selected.patch index bc16940bd..12b1e9210 100644 --- a/debian/patches/features/all/rt/cpufreq-drop-K8-s-driver-from-beeing-selected.patch +++ b/debian/patches/features/all/rt/cpufreq-drop-K8-s-driver-from-beeing-selected.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 9 Apr 2015 15:23:01 +0200 Subject: cpufreq: drop K8's driver from beeing selected -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Ralf posted a picture of a backtrace from diff --git a/debian/patches/features/all/rt/cpumask-disable-offstack-on-rt.patch b/debian/patches/features/all/rt/cpumask-disable-offstack-on-rt.patch index a65b2b6b2..9562aa22d 100644 --- a/debian/patches/features/all/rt/cpumask-disable-offstack-on-rt.patch +++ b/debian/patches/features/all/rt/cpumask-disable-offstack-on-rt.patch @@ -1,7 +1,7 @@ Subject: cpumask: Disable CONFIG_CPUMASK_OFFSTACK for RT From: Thomas Gleixner Date: Wed, 14 Dec 2011 01:03:49 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz There are "valid" GFP_ATOMIC allocations such as @@ -47,7 +47,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig -@@ -908,7 +908,7 @@ config IOMMU_HELPER +@@ -921,7 +921,7 @@ config IOMMU_HELPER config MAXSMP bool "Enable Maximum number of SMP Processors and NUMA Nodes" depends on X86_64 && SMP && DEBUG_KERNEL @@ -58,7 +58,7 @@ Signed-off-by: Thomas Gleixner If unsure, say N. --- a/lib/Kconfig +++ b/lib/Kconfig -@@ -409,6 +409,7 @@ config CHECK_SIGNATURE +@@ -428,6 +428,7 @@ config CHECK_SIGNATURE config CPUMASK_OFFSTACK bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS diff --git a/debian/patches/features/all/rt/cpuset-Convert-callback_lock-to-raw_spinlock_t.patch b/debian/patches/features/all/rt/cpuset-Convert-callback_lock-to-raw_spinlock_t.patch index 5e607ea41..d6d03cbb9 100644 --- a/debian/patches/features/all/rt/cpuset-Convert-callback_lock-to-raw_spinlock_t.patch +++ b/debian/patches/features/all/rt/cpuset-Convert-callback_lock-to-raw_spinlock_t.patch @@ -1,7 +1,7 @@ From: Mike Galbraith Date: Sun, 8 Jan 2017 09:32:25 +0100 Subject: [PATCH] cpuset: Convert callback_lock to raw_spinlock_t -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The two commits below add up to a cpuset might_sleep() splat for RT: @@ -51,7 +51,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c -@@ -286,7 +286,7 @@ static struct cpuset top_cpuset = { +@@ -288,7 +288,7 @@ static struct cpuset top_cpuset = { */ static DEFINE_MUTEX(cpuset_mutex); @@ -60,7 +60,7 @@ Signed-off-by: Sebastian Andrzej Siewior static struct workqueue_struct *cpuset_migrate_mm_wq; -@@ -909,9 +909,9 @@ static void update_cpumasks_hier(struct +@@ -926,9 +926,9 @@ static void update_cpumasks_hier(struct continue; rcu_read_unlock(); @@ -70,9 +70,9 @@ Signed-off-by: Sebastian Andrzej Siewior - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); - WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && + WARN_ON(!is_in_v2_mode() && !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); -@@ -976,9 +976,9 @@ static int update_cpumask(struct cpuset +@@ -993,9 +993,9 @@ static int update_cpumask(struct cpuset if (retval < 0) return retval; @@ -84,7 +84,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* use trialcs->cpus_allowed as a temp variable */ update_cpumasks_hier(cs, trialcs->cpus_allowed); -@@ -1178,9 +1178,9 @@ static void update_nodemasks_hier(struct +@@ -1179,9 +1179,9 @@ static void update_nodemasks_hier(struct continue; rcu_read_unlock(); @@ -94,9 +94,9 @@ Signed-off-by: Sebastian Andrzej Siewior - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); - WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && + WARN_ON(!is_in_v2_mode() && !nodes_equal(cp->mems_allowed, cp->effective_mems)); -@@ -1248,9 +1248,9 @@ static int update_nodemask(struct cpuset +@@ -1249,9 +1249,9 @@ static int update_nodemask(struct cpuset if (retval < 0) goto done; @@ -108,7 +108,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* use trialcs->mems_allowed as a temp variable */ update_nodemasks_hier(cs, &trialcs->mems_allowed); -@@ -1341,9 +1341,9 @@ static int update_flag(cpuset_flagbits_t +@@ -1342,9 +1342,9 @@ static int update_flag(cpuset_flagbits_t spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) || (is_spread_page(cs) != is_spread_page(trialcs))); @@ -120,7 +120,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) rebuild_sched_domains_locked(); -@@ -1758,7 +1758,7 @@ static int cpuset_common_seq_show(struct +@@ -1759,7 +1759,7 @@ static int cpuset_common_seq_show(struct cpuset_filetype_t type = seq_cft(sf)->private; int ret = 0; @@ -129,7 +129,7 @@ Signed-off-by: Sebastian Andrzej Siewior switch (type) { case FILE_CPULIST: -@@ -1777,7 +1777,7 @@ static int cpuset_common_seq_show(struct +@@ -1778,7 +1778,7 @@ static int cpuset_common_seq_show(struct ret = -EINVAL; } @@ -138,13 +138,13 @@ Signed-off-by: Sebastian Andrzej Siewior return ret; } -@@ -1991,12 +1991,12 @@ static int cpuset_css_online(struct cgro +@@ -1993,12 +1993,12 @@ static int cpuset_css_online(struct cgro cpuset_inc(); - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); - if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) { + if (is_in_v2_mode()) { cpumask_copy(cs->effective_cpus, parent->effective_cpus); cs->effective_mems = parent->effective_mems; } @@ -153,7 +153,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) goto out_unlock; -@@ -2023,12 +2023,12 @@ static int cpuset_css_online(struct cgro +@@ -2025,12 +2025,12 @@ static int cpuset_css_online(struct cgro } rcu_read_unlock(); @@ -168,16 +168,16 @@ Signed-off-by: Sebastian Andrzej Siewior out_unlock: mutex_unlock(&cpuset_mutex); return 0; -@@ -2067,7 +2067,7 @@ static void cpuset_css_free(struct cgrou +@@ -2069,7 +2069,7 @@ static void cpuset_css_free(struct cgrou static void cpuset_bind(struct cgroup_subsys_state *root_css) { mutex_lock(&cpuset_mutex); - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); - if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) { + if (is_in_v2_mode()) { cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); -@@ -2078,7 +2078,7 @@ static void cpuset_bind(struct cgroup_su +@@ -2080,7 +2080,7 @@ static void cpuset_bind(struct cgroup_su top_cpuset.mems_allowed = top_cpuset.effective_mems; } @@ -186,7 +186,7 @@ Signed-off-by: Sebastian Andrzej Siewior mutex_unlock(&cpuset_mutex); } -@@ -2179,12 +2179,12 @@ hotplug_update_tasks_legacy(struct cpuse +@@ -2178,12 +2178,12 @@ hotplug_update_tasks_legacy(struct cpuse { bool is_empty; @@ -201,7 +201,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Don't call update_tasks_cpumask() if the cpuset becomes empty, -@@ -2221,10 +2221,10 @@ hotplug_update_tasks(struct cpuset *cs, +@@ -2220,10 +2220,10 @@ hotplug_update_tasks(struct cpuset *cs, if (nodes_empty(*new_mems)) *new_mems = parent_cs(cs)->effective_mems; @@ -214,7 +214,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (cpus_updated) update_tasks_cpumask(cs); -@@ -2310,21 +2310,21 @@ static void cpuset_hotplug_workfn(struct +@@ -2316,21 +2316,21 @@ static void cpuset_hotplug_workfn(struct /* synchronize cpus_allowed to cpu_active_mask */ if (cpus_updated) { @@ -240,7 +240,7 @@ Signed-off-by: Sebastian Andrzej Siewior update_tasks_nodemask(&top_cpuset); } -@@ -2422,11 +2422,11 @@ void cpuset_cpus_allowed(struct task_str +@@ -2429,11 +2429,11 @@ void cpuset_cpus_allowed(struct task_str { unsigned long flags; @@ -254,7 +254,7 @@ Signed-off-by: Sebastian Andrzej Siewior } void cpuset_cpus_allowed_fallback(struct task_struct *tsk) -@@ -2474,11 +2474,11 @@ nodemask_t cpuset_mems_allowed(struct ta +@@ -2481,11 +2481,11 @@ nodemask_t cpuset_mems_allowed(struct ta nodemask_t mask; unsigned long flags; @@ -268,7 +268,7 @@ Signed-off-by: Sebastian Andrzej Siewior return mask; } -@@ -2570,14 +2570,14 @@ bool __cpuset_node_allowed(int node, gfp +@@ -2577,14 +2577,14 @@ bool __cpuset_node_allowed(int node, gfp return true; /* Not hardwall and node outside mems_allowed: scan up cpusets */ diff --git a/debian/patches/features/all/rt/crypto-Reduce-preempt-disabled-regions-more-algos.patch b/debian/patches/features/all/rt/crypto-Reduce-preempt-disabled-regions-more-algos.patch index f4f942b90..60c4794d7 100644 --- a/debian/patches/features/all/rt/crypto-Reduce-preempt-disabled-regions-more-algos.patch +++ b/debian/patches/features/all/rt/crypto-Reduce-preempt-disabled-regions-more-algos.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Fri, 21 Feb 2014 17:24:04 +0100 Subject: crypto: Reduce preempt disabled regions, more algos -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Don Estabrook reported | kernel: WARNING: CPU: 2 PID: 858 at kernel/sched/core.c:2428 migrate_disable+0xed/0x100() @@ -98,7 +98,7 @@ Signed-off-by: Sebastian Andrzej Siewior return err; } -@@ -311,7 +309,7 @@ static unsigned int __ctr_crypt(struct b +@@ -310,7 +308,7 @@ static unsigned int __ctr_crypt(struct b static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { @@ -107,7 +107,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct blkcipher_walk walk; int err; -@@ -320,13 +318,12 @@ static int ctr_crypt(struct blkcipher_de +@@ -319,13 +317,12 @@ static int ctr_crypt(struct blkcipher_de desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { @@ -125,7 +125,7 @@ Signed-off-by: Sebastian Andrzej Siewior err = blkcipher_walk_done(desc, &walk, 0); --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c -@@ -39,7 +39,7 @@ static int __glue_ecb_crypt_128bit(const +@@ -40,7 +40,7 @@ static int __glue_ecb_crypt_128bit(const void *ctx = crypto_blkcipher_ctx(desc->tfm); const unsigned int bsize = 128 / 8; unsigned int nbytes, i, func_bytes; @@ -134,7 +134,7 @@ Signed-off-by: Sebastian Andrzej Siewior int err; err = blkcipher_walk_virt(desc, walk); -@@ -49,7 +49,7 @@ static int __glue_ecb_crypt_128bit(const +@@ -50,7 +50,7 @@ static int __glue_ecb_crypt_128bit(const u8 *wdst = walk->dst.virt.addr; fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, @@ -143,7 +143,7 @@ Signed-off-by: Sebastian Andrzej Siewior for (i = 0; i < gctx->num_funcs; i++) { func_bytes = bsize * gctx->funcs[i].num_blocks; -@@ -71,10 +71,10 @@ static int __glue_ecb_crypt_128bit(const +@@ -72,10 +72,10 @@ static int __glue_ecb_crypt_128bit(const } done: @@ -155,7 +155,7 @@ Signed-off-by: Sebastian Andrzej Siewior return err; } -@@ -194,7 +194,7 @@ int glue_cbc_decrypt_128bit(const struct +@@ -192,7 +192,7 @@ int glue_cbc_decrypt_128bit(const struct struct scatterlist *src, unsigned int nbytes) { const unsigned int bsize = 128 / 8; @@ -164,7 +164,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct blkcipher_walk walk; int err; -@@ -203,12 +203,12 @@ int glue_cbc_decrypt_128bit(const struct +@@ -201,12 +201,12 @@ int glue_cbc_decrypt_128bit(const struct while ((nbytes = walk.nbytes)) { fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, @@ -179,7 +179,7 @@ Signed-off-by: Sebastian Andrzej Siewior return err; } EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); -@@ -277,7 +277,7 @@ int glue_ctr_crypt_128bit(const struct c +@@ -275,7 +275,7 @@ int glue_ctr_crypt_128bit(const struct c struct scatterlist *src, unsigned int nbytes) { const unsigned int bsize = 128 / 8; @@ -188,7 +188,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct blkcipher_walk walk; int err; -@@ -286,13 +286,12 @@ int glue_ctr_crypt_128bit(const struct c +@@ -284,13 +284,12 @@ int glue_ctr_crypt_128bit(const struct c while ((nbytes = walk.nbytes) >= bsize) { fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, @@ -204,7 +204,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (walk.nbytes) { glue_ctr_crypt_final_128bit( gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); -@@ -382,7 +381,7 @@ int glue_xts_crypt_128bit(const struct c +@@ -380,7 +379,7 @@ int glue_xts_crypt_128bit(const struct c void *tweak_ctx, void *crypt_ctx) { const unsigned int bsize = 128 / 8; @@ -213,7 +213,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct blkcipher_walk walk; int err; -@@ -395,21 +394,21 @@ int glue_xts_crypt_128bit(const struct c +@@ -393,21 +392,21 @@ int glue_xts_crypt_128bit(const struct c /* set minimum length to bsize, for tweak_fn */ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, diff --git a/debian/patches/features/all/rt/debugobjects-rt.patch b/debian/patches/features/all/rt/debugobjects-rt.patch index aaa3b0632..c58a43a9f 100644 --- a/debian/patches/features/all/rt/debugobjects-rt.patch +++ b/debian/patches/features/all/rt/debugobjects-rt.patch @@ -1,7 +1,7 @@ Subject: debugobjects: Make RT aware From: Thomas Gleixner Date: Sun, 17 Jul 2011 21:41:35 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Avoid filling the pool / allocating memory with irqs off(). @@ -12,7 +12,7 @@ Signed-off-by: Thomas Gleixner --- a/lib/debugobjects.c +++ b/lib/debugobjects.c -@@ -334,7 +334,10 @@ static void +@@ -336,7 +336,10 @@ static void struct debug_obj *obj; unsigned long flags; diff --git a/debian/patches/features/all/rt/delayacct-use-raw_spinlocks.patch b/debian/patches/features/all/rt/delayacct-use-raw_spinlocks.patch index c64423632..64d386bb4 100644 --- a/debian/patches/features/all/rt/delayacct-use-raw_spinlocks.patch +++ b/debian/patches/features/all/rt/delayacct-use-raw_spinlocks.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Sat, 20 May 2017 12:32:23 +0200 Subject: [PATCH] delayacct: use raw_spinlocks -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz try_to_wake_up() might invoke delayacct_blkio_end() while holding the pi_lock. The lock is only held for a short amount of time so it should diff --git a/debian/patches/features/all/rt/dm-make-rt-aware.patch b/debian/patches/features/all/rt/dm-make-rt-aware.patch index d0bda3b37..f01aeb4b7 100644 --- a/debian/patches/features/all/rt/dm-make-rt-aware.patch +++ b/debian/patches/features/all/rt/dm-make-rt-aware.patch @@ -1,7 +1,7 @@ Subject: dm: Make rt aware From: Thomas Gleixner Date: Mon, 14 Nov 2011 23:06:09 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Use the BUG_ON_NORT variant for the irq_disabled() checks. RT has interrupts legitimately enabled here as we cant deadlock against the @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c -@@ -667,7 +667,7 @@ static void dm_old_request_fn(struct req +@@ -671,7 +671,7 @@ static void dm_old_request_fn(struct req /* Establish tio->ti before queuing work (map_tio_request) */ tio->ti = ti; kthread_queue_work(&md->kworker, &tio->work); diff --git a/debian/patches/features/all/rt/drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch b/debian/patches/features/all/rt/drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch index 3d5449168..52bf2cfd5 100644 --- a/debian/patches/features/all/rt/drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch +++ b/debian/patches/features/all/rt/drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch @@ -2,7 +2,7 @@ From: Mike Galbraith Date: Thu, 31 Mar 2016 04:08:28 +0200 Subject: [PATCH] drivers/block/zram: Replace bit spinlocks with rtmutex for -rt -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz They're nondeterministic, and lead to ___might_sleep() splats in -rt. OTOH, they're a lot less wasteful than an rtmutex per page. @@ -10,119 +10,62 @@ OTOH, they're a lot less wasteful than an rtmutex per page. Signed-off-by: Mike Galbraith Signed-off-by: Sebastian Andrzej Siewior --- - drivers/block/zram/zram_drv.c | 30 ++++++++++++++++-------------- - drivers/block/zram/zram_drv.h | 41 +++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 57 insertions(+), 14 deletions(-) + drivers/block/zram/zram_drv.c | 26 ++++++++++++++++++++++++++ + drivers/block/zram/zram_drv.h | 3 +++ + 2 files changed, 29 insertions(+) --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c -@@ -461,6 +461,8 @@ static struct zram_meta *zram_meta_alloc - goto out_error; - } +@@ -756,6 +756,30 @@ static DEVICE_ATTR_RO(io_stat); + static DEVICE_ATTR_RO(mm_stat); + static DEVICE_ATTR_RO(debug_stat); -+ zram_meta_init_table_locks(meta, disksize); ++#ifdef CONFIG_PREEMPT_RT_BASE ++static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) ++{ ++ size_t index; + - return meta; ++ for (index = 0; index < num_pages; index++) ++ spin_lock_init(&zram->table[index].lock); ++} ++ ++static void zram_slot_lock(struct zram *zram, u32 index) ++{ ++ spin_lock(&zram->table[index].lock); ++ __set_bit(ZRAM_ACCESS, &zram->table[index].value); ++} ++ ++static void zram_slot_unlock(struct zram *zram, u32 index) ++{ ++ __clear_bit(ZRAM_ACCESS, &zram->table[index].value); ++ spin_unlock(&zram->table[index].lock); ++} ++ ++#else ++static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { } ++ + static void zram_slot_lock(struct zram *zram, u32 index) + { + bit_spin_lock(ZRAM_ACCESS, &zram->table[index].value); +@@ -765,6 +789,7 @@ static void zram_slot_unlock(struct zram + { + bit_spin_unlock(ZRAM_ACCESS, &zram->table[index].value); + } ++#endif - out_error: -@@ -511,12 +513,12 @@ static int zram_decompress_page(struct z - unsigned long handle; - unsigned int size; - -- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); -+ zram_lock_table(&meta->table[index]); - handle = meta->table[index].handle; - size = zram_get_obj_size(meta, index); - - if (!handle || zram_test_flag(meta, index, ZRAM_SAME)) { -- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); -+ zram_unlock_table(&meta->table[index]); - zram_fill_page(mem, PAGE_SIZE, meta->table[index].element); - return 0; - } -@@ -531,7 +533,7 @@ static int zram_decompress_page(struct z - zcomp_stream_put(zram->comp); - } - zs_unmap_object(meta->mem_pool, handle); -- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); -+ zram_unlock_table(&meta->table[index]); - - /* Should NEVER happen. Return bio error if it does. */ - if (unlikely(ret)) { -@@ -551,14 +553,14 @@ static int zram_bvec_read(struct zram *z - struct zram_meta *meta = zram->meta; - page = bvec->bv_page; - -- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); -+ zram_lock_table(&meta->table[index]); - if (unlikely(!meta->table[index].handle) || - zram_test_flag(meta, index, ZRAM_SAME)) { -- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); -+ zram_unlock_table(&meta->table[index]); - handle_same_page(bvec, meta->table[index].element); - return 0; - } -- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); -+ zram_unlock_table(&meta->table[index]); - - if (is_partial_io(bvec)) - /* Use a temporary buffer to decompress the page */ -@@ -636,11 +638,11 @@ static int zram_bvec_write(struct zram * - if (user_mem) - kunmap_atomic(user_mem); - /* Free memory associated with this sector now. */ -- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); -+ zram_lock_table(&meta->table[index]); - zram_free_page(zram, index); - zram_set_flag(meta, index, ZRAM_SAME); - zram_set_element(meta, index, element); -- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); -+ zram_unlock_table(&meta->table[index]); - - atomic64_inc(&zram->stats.same_pages); - ret = 0; -@@ -731,12 +733,12 @@ static int zram_bvec_write(struct zram * - * Free memory associated with this sector - * before overwriting unused sectors. - */ -- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); -+ zram_lock_table(&meta->table[index]); - zram_free_page(zram, index); - - meta->table[index].handle = handle; - zram_set_obj_size(meta, index, clen); -- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); -+ zram_unlock_table(&meta->table[index]); - - /* Update stats */ - atomic64_add(clen, &zram->stats.compr_data_size); -@@ -779,9 +781,9 @@ static void zram_bio_discard(struct zram + static void zram_meta_free(struct zram *zram, u64 disksize) + { +@@ -794,6 +819,7 @@ static bool zram_meta_alloc(struct zram + return false; } - while (n >= PAGE_SIZE) { -- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); -+ zram_lock_table(&meta->table[index]); - zram_free_page(zram, index); -- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); -+ zram_unlock_table(&meta->table[index]); - atomic64_inc(&zram->stats.notify_free); - index++; - n -= PAGE_SIZE; -@@ -905,9 +907,9 @@ static void zram_slot_free_notify(struct - zram = bdev->bd_disk->private_data; - meta = zram->meta; - -- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); -+ zram_lock_table(&meta->table[index]); - zram_free_page(zram, index); -- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); -+ zram_unlock_table(&meta->table[index]); - atomic64_inc(&zram->stats.notify_free); ++ zram_meta_init_table_locks(zram, num_pages); + return true; } --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h -@@ -76,6 +76,9 @@ struct zram_table_entry { +@@ -77,6 +77,9 @@ struct zram_table_entry { unsigned long element; }; unsigned long value; @@ -132,46 +75,3 @@ Signed-off-by: Sebastian Andrzej Siewior }; struct zram_stats { -@@ -120,4 +123,42 @@ struct zram { - */ - bool claim; /* Protected by bdev->bd_mutex */ - }; -+ -+#ifndef CONFIG_PREEMPT_RT_BASE -+static inline void zram_lock_table(struct zram_table_entry *table) -+{ -+ bit_spin_lock(ZRAM_ACCESS, &table->value); -+} -+ -+static inline void zram_unlock_table(struct zram_table_entry *table) -+{ -+ bit_spin_unlock(ZRAM_ACCESS, &table->value); -+} -+ -+static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) { } -+#else /* CONFIG_PREEMPT_RT_BASE */ -+static inline void zram_lock_table(struct zram_table_entry *table) -+{ -+ spin_lock(&table->lock); -+ __set_bit(ZRAM_ACCESS, &table->value); -+} -+ -+static inline void zram_unlock_table(struct zram_table_entry *table) -+{ -+ __clear_bit(ZRAM_ACCESS, &table->value); -+ spin_unlock(&table->lock); -+} -+ -+static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) -+{ -+ size_t num_pages = disksize >> PAGE_SHIFT; -+ size_t index; -+ -+ for (index = 0; index < num_pages; index++) { -+ spinlock_t *lock = &meta->table[index].lock; -+ spin_lock_init(lock); -+ } -+} -+#endif /* CONFIG_PREEMPT_RT_BASE */ -+ - #endif diff --git a/debian/patches/features/all/rt/drivers-net-8139-disable-irq-nosync.patch b/debian/patches/features/all/rt/drivers-net-8139-disable-irq-nosync.patch index daa9f7fb4..3efdc012f 100644 --- a/debian/patches/features/all/rt/drivers-net-8139-disable-irq-nosync.patch +++ b/debian/patches/features/all/rt/drivers-net-8139-disable-irq-nosync.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:29:24 -0500 Subject: drivers/net: Use disable_irq_nosync() in 8139too -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Use disable_irq_nosync() instead of disable_irq() as this might be called in atomic context with netpoll. @@ -15,7 +15,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c -@@ -2223,7 +2223,7 @@ static void rtl8139_poll_controller(stru +@@ -2224,7 +2224,7 @@ static void rtl8139_poll_controller(stru struct rtl8139_private *tp = netdev_priv(dev); const int irq = tp->pci_dev->irq; diff --git a/debian/patches/features/all/rt/drivers-net-vortex-fix-locking-issues.patch b/debian/patches/features/all/rt/drivers-net-vortex-fix-locking-issues.patch index 915317826..29dd22200 100644 --- a/debian/patches/features/all/rt/drivers-net-vortex-fix-locking-issues.patch +++ b/debian/patches/features/all/rt/drivers-net-vortex-fix-locking-issues.patch @@ -1,7 +1,7 @@ From: Steven Rostedt Date: Fri, 3 Jul 2009 08:30:00 -0500 Subject: drivers/net: vortex fix locking issues -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Argh, cut and paste wasn't enough... diff --git a/debian/patches/features/all/rt/drivers-random-reduce-preempt-disabled-region.patch b/debian/patches/features/all/rt/drivers-random-reduce-preempt-disabled-region.patch index 40d687c69..4071798f5 100644 --- a/debian/patches/features/all/rt/drivers-random-reduce-preempt-disabled-region.patch +++ b/debian/patches/features/all/rt/drivers-random-reduce-preempt-disabled-region.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:29:30 -0500 Subject: drivers: random: Reduce preempt disabled region -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz No need to keep preemption disabled across the whole function. diff --git a/debian/patches/features/all/rt/drivers-tty-fix-omap-lock-crap.patch b/debian/patches/features/all/rt/drivers-tty-fix-omap-lock-crap.patch index e6b5a3726..ed6bcd515 100644 --- a/debian/patches/features/all/rt/drivers-tty-fix-omap-lock-crap.patch +++ b/debian/patches/features/all/rt/drivers-tty-fix-omap-lock-crap.patch @@ -1,7 +1,7 @@ Subject: tty/serial/omap: Make the locking RT aware From: Thomas Gleixner Date: Thu, 28 Jul 2011 13:32:57 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The lock is a sleeping lock and local_irq_save() is not the optimsation we are looking for. Redo it to make it work on -RT and @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c -@@ -1312,13 +1312,10 @@ serial_omap_console_write(struct console +@@ -1311,13 +1311,10 @@ serial_omap_console_write(struct console pm_runtime_get_sync(up->dev); @@ -31,7 +31,7 @@ Signed-off-by: Thomas Gleixner /* * First save the IER then disable the interrupts -@@ -1347,8 +1344,7 @@ serial_omap_console_write(struct console +@@ -1346,8 +1343,7 @@ serial_omap_console_write(struct console pm_runtime_mark_last_busy(up->dev); pm_runtime_put_autosuspend(up->dev); if (locked) diff --git a/debian/patches/features/all/rt/drivers-tty-pl011-irq-disable-madness.patch b/debian/patches/features/all/rt/drivers-tty-pl011-irq-disable-madness.patch index 5f9fb16b7..7daac38b4 100644 --- a/debian/patches/features/all/rt/drivers-tty-pl011-irq-disable-madness.patch +++ b/debian/patches/features/all/rt/drivers-tty-pl011-irq-disable-madness.patch @@ -1,7 +1,7 @@ Subject: tty/serial/pl011: Make the locking work on RT From: Thomas Gleixner Date: Tue, 08 Jan 2013 21:36:51 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The lock is a sleeping lock and local_irq_save() is not the optimsation we are looking for. Redo it to make it work on -RT and non-RT. @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c -@@ -2222,13 +2222,19 @@ pl011_console_write(struct console *co, +@@ -2220,13 +2220,19 @@ pl011_console_write(struct console *co, clk_enable(uap->clk); @@ -36,7 +36,7 @@ Signed-off-by: Thomas Gleixner /* * First save the CR then disable the interrupts -@@ -2254,8 +2260,7 @@ pl011_console_write(struct console *co, +@@ -2252,8 +2258,7 @@ pl011_console_write(struct console *co, pl011_write(old_cr, uap, REG_CR); if (locked) diff --git a/debian/patches/features/all/rt/drivers-zram-Don-t-disable-preemption-in-zcomp_strea.patch b/debian/patches/features/all/rt/drivers-zram-Don-t-disable-preemption-in-zcomp_strea.patch index 7107260fb..8097a4fd6 100644 --- a/debian/patches/features/all/rt/drivers-zram-Don-t-disable-preemption-in-zcomp_strea.patch +++ b/debian/patches/features/all/rt/drivers-zram-Don-t-disable-preemption-in-zcomp_strea.patch @@ -2,7 +2,7 @@ From: Mike Galbraith Date: Thu, 20 Oct 2016 11:15:22 +0200 Subject: [PATCH] drivers/zram: Don't disable preemption in zcomp_stream_get/put() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz In v4.7, the driver switched to percpu compression streams, disabling preemption via get/put_cpu_ptr(). Use a per-zcomp_strm lock here. We @@ -16,12 +16,12 @@ Signed-off-by: Sebastian Andrzej Siewior --- drivers/block/zram/zcomp.c | 12 ++++++++++-- drivers/block/zram/zcomp.h | 1 + - drivers/block/zram/zram_drv.c | 6 +++--- - 3 files changed, 14 insertions(+), 5 deletions(-) + drivers/block/zram/zram_drv.c | 5 +++-- + 3 files changed, 14 insertions(+), 4 deletions(-) --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c -@@ -118,12 +118,19 @@ ssize_t zcomp_available_show(const char +@@ -116,12 +116,19 @@ ssize_t zcomp_available_show(const char struct zcomp_strm *zcomp_stream_get(struct zcomp *comp) { @@ -43,7 +43,7 @@ Signed-off-by: Sebastian Andrzej Siewior } int zcomp_compress(struct zcomp_strm *zstrm, -@@ -173,6 +180,7 @@ int zcomp_cpu_up_prepare(unsigned int cp +@@ -171,6 +178,7 @@ int zcomp_cpu_up_prepare(unsigned int cp pr_err("Can't allocate a compression stream\n"); return -ENOMEM; } @@ -63,30 +63,35 @@ Signed-off-by: Sebastian Andrzej Siewior /* dynamic per-device compression frontend */ --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c -@@ -512,6 +512,7 @@ static int zram_decompress_page(struct z - struct zram_meta *meta = zram->meta; +@@ -871,6 +871,7 @@ static int __zram_bvec_read(struct zram unsigned long handle; unsigned int size; + void *src, *dst; + struct zcomp_strm *zstrm; - zram_lock_table(&meta->table[index]); - handle = meta->table[index].handle; -@@ -523,16 +524,15 @@ static int zram_decompress_page(struct z - return 0; - } + if (zram_wb_enabled(zram)) { + zram_slot_lock(zram, index); +@@ -905,6 +906,7 @@ static int __zram_bvec_read(struct zram + + size = zram_get_obj_size(zram, index); + zstrm = zcomp_stream_get(zram->comp); - cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); + src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); if (size == PAGE_SIZE) { - memcpy(mem, cmem, PAGE_SIZE); + dst = kmap_atomic(page); +@@ -912,14 +914,13 @@ static int __zram_bvec_read(struct zram + kunmap_atomic(dst); + ret = 0; } else { - struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); -- - ret = zcomp_decompress(zstrm, cmem, size, mem); + + dst = kmap_atomic(page); + ret = zcomp_decompress(zstrm, src, size, dst); + kunmap_atomic(dst); - zcomp_stream_put(zram->comp); } - zs_unmap_object(meta->mem_pool, handle); + zs_unmap_object(zram->mem_pool, handle); + zcomp_stream_put(zram->comp); - zram_unlock_table(&meta->table[index]); + zram_slot_unlock(zram, index); /* Should NEVER happen. Return bio error if it does. */ diff --git a/debian/patches/features/all/rt/drivers-zram-fix-zcomp_stream_get-smp_processor_id-u.patch b/debian/patches/features/all/rt/drivers-zram-fix-zcomp_stream_get-smp_processor_id-u.patch new file mode 100644 index 000000000..9e062126a --- /dev/null +++ b/debian/patches/features/all/rt/drivers-zram-fix-zcomp_stream_get-smp_processor_id-u.patch @@ -0,0 +1,38 @@ +From: Mike Galbraith +Date: Wed, 23 Aug 2017 11:57:29 +0200 +Subject: [PATCH] drivers/zram: fix zcomp_stream_get() smp_processor_id() use + in preemptible code +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Use get_local_ptr() instead this_cpu_ptr() to avoid a warning regarding +smp_processor_id() in preemptible code. +raw_cpu_ptr() would be fine, too because the per-CPU data structure is +protected with a spin lock so it does not matter much if we take the +other one. + +Cc: stable-rt@vger.kernel.org +Signed-off-by: Mike Galbraith +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/block/zram/zcomp.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/block/zram/zcomp.c ++++ b/drivers/block/zram/zcomp.c +@@ -118,7 +118,7 @@ struct zcomp_strm *zcomp_stream_get(stru + { + struct zcomp_strm *zstrm; + +- zstrm = *this_cpu_ptr(comp->stream); ++ zstrm = *get_local_ptr(comp->stream); + spin_lock(&zstrm->zcomp_lock); + return zstrm; + } +@@ -129,6 +129,7 @@ void zcomp_stream_put(struct zcomp *comp + + zstrm = *this_cpu_ptr(comp->stream); + spin_unlock(&zstrm->zcomp_lock); ++ put_local_ptr(zstrm); + } + + int zcomp_compress(struct zcomp_strm *zstrm, diff --git a/debian/patches/features/all/rt/drm-i915-drop-trace_i915_gem_ring_dispatch-onrt.patch b/debian/patches/features/all/rt/drm-i915-drop-trace_i915_gem_ring_dispatch-onrt.patch deleted file mode 100644 index fb0a6750e..000000000 --- a/debian/patches/features/all/rt/drm-i915-drop-trace_i915_gem_ring_dispatch-onrt.patch +++ /dev/null @@ -1,59 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Thu, 25 Apr 2013 18:12:52 +0200 -Subject: drm/i915: drop trace_i915_gem_ring_dispatch on rt -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -This tracepoint is responsible for: - -|[<814cc358>] __schedule_bug+0x4d/0x59 -|[<814d24cc>] __schedule+0x88c/0x930 -|[<814d3b90>] ? _raw_spin_unlock_irqrestore+0x40/0x50 -|[<814d3b95>] ? _raw_spin_unlock_irqrestore+0x45/0x50 -|[<810b57b5>] ? task_blocks_on_rt_mutex+0x1f5/0x250 -|[<814d27d9>] schedule+0x29/0x70 -|[<814d3423>] rt_spin_lock_slowlock+0x15b/0x278 -|[<814d3786>] rt_spin_lock+0x26/0x30 -|[] gen6_gt_force_wake_get+0x29/0x60 [i915] -|[] gen6_ring_get_irq+0x5f/0x100 [i915] -|[] ftrace_raw_event_i915_gem_ring_dispatch+0xe3/0x100 [i915] -|[] i915_gem_do_execbuffer.isra.13+0xbd3/0x1430 [i915] -|[<810f8943>] ? trace_buffer_unlock_commit+0x43/0x60 -|[<8113e8d2>] ? ftrace_raw_event_kmem_alloc+0xd2/0x180 -|[<8101d063>] ? native_sched_clock+0x13/0x80 -|[] i915_gem_execbuffer2+0x99/0x280 [i915] -|[] drm_ioctl+0x4c3/0x570 [drm] -|[<8101d0d9>] ? sched_clock+0x9/0x10 -|[] ? i915_gem_execbuffer+0x480/0x480 [i915] -|[<810f1c18>] ? rb_commit+0x68/0xa0 -|[<810f1c6c>] ? ring_buffer_unlock_commit+0x1c/0xa0 -|[<81197467>] do_vfs_ioctl+0x97/0x540 -|[<81021318>] ? ftrace_raw_event_sys_enter+0xd8/0x130 -|[<811979a1>] sys_ioctl+0x91/0xb0 -|[<814db931>] tracesys+0xe1/0xe6 - -Chris Wilson does not like to move i915_trace_irq_get() out of the macro - -|No. This enables the IRQ, as well as making a number of -|very expensively serialised read, unconditionally. - -so it is gone now on RT. - - -Reported-by: Joakim Hernberg -Signed-off-by: Sebastian Andrzej Siewior ---- - drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 ++ - 1 file changed, 2 insertions(+) - ---- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c -+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c -@@ -1445,7 +1445,9 @@ execbuf_submit(struct i915_execbuffer_pa - if (ret) - return ret; - -+#ifndef CONFIG_PREEMPT_RT_BASE - trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); -+#endif - - i915_gem_execbuffer_move_to_active(vmas, params->request); - diff --git a/debian/patches/features/all/rt/drm-i915-init-spinlock-properly-on-RT.patch b/debian/patches/features/all/rt/drm-i915-init-spinlock-properly-on-RT.patch index 507c0c172..e778daa79 100644 --- a/debian/patches/features/all/rt/drm-i915-init-spinlock-properly-on-RT.patch +++ b/debian/patches/features/all/rt/drm-i915-init-spinlock-properly-on-RT.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Mon, 29 May 2017 15:33:52 +0200 Subject: [PATCH] drm/i915: init spinlock properly on -RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz THe lockinit is opencoded so need to fix it up… @@ -12,16 +12,16 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/drivers/gpu/drm/i915/i915_gem_timeline.c +++ b/drivers/gpu/drm/i915/i915_gem_timeline.c -@@ -50,7 +50,12 @@ static int __i915_gem_timeline_init(stru - tl->fence_context = fences++; - tl->common = timeline; +@@ -34,7 +34,12 @@ static void __intel_timeline_init(struct + tl->fence_context = context; + tl->common = parent; #ifdef CONFIG_DEBUG_SPINLOCK +# ifdef CONFIG_PREEMPT_RT_FULL -+ rt_mutex_init(&tl->lock.lock); -+ __rt_spin_lock_init(&tl->lock, lockname, lockclass); ++ rt_mutex_init(&tl->lock.lock); ++ __rt_spin_lock_init(&tl->lock, lockname, lockclass); +# else - __raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass); + __raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass); +# endif #else - spin_lock_init(&tl->lock); + spin_lock_init(&tl->lock); #endif diff --git a/debian/patches/features/all/rt/drmi915_Use_local_lockunlock_irq()_in_intel_pipe_update_startend().patch b/debian/patches/features/all/rt/drmi915_Use_local_lockunlock_irq()_in_intel_pipe_update_startend().patch index 443c49578..5dda228e6 100644 --- a/debian/patches/features/all/rt/drmi915_Use_local_lockunlock_irq()_in_intel_pipe_update_startend().patch +++ b/debian/patches/features/all/rt/drmi915_Use_local_lockunlock_irq()_in_intel_pipe_update_startend().patch @@ -1,7 +1,7 @@ Subject: drm,i915: Use local_lock/unlock_irq() in intel_pipe_update_start/end() From: Mike Galbraith Date: Sat, 27 Feb 2016 09:01:42 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz [ 8.014039] BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:918 @@ -57,12 +57,12 @@ Cc: Sebastian Andrzej Siewior Cc: linux-rt-users Signed-off-by: Thomas Gleixner --- - drivers/gpu/drm/i915/intel_sprite.c | 11 +++++++---- - 1 file changed, 7 insertions(+), 4 deletions(-) + drivers/gpu/drm/i915/intel_sprite.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c -@@ -35,6 +35,7 @@ +@@ -36,6 +36,7 @@ #include #include #include @@ -70,17 +70,17 @@ Signed-off-by: Thomas Gleixner #include "intel_drv.h" #include "intel_frontbuffer.h" #include -@@ -65,6 +66,8 @@ int intel_usecs_to_scanlines(const struc - 1000 * adjusted_mode->crtc_htotal); +@@ -67,7 +68,7 @@ int intel_usecs_to_scanlines(const struc } + #define VBLANK_EVASION_TIME_US 100 +- +static DEFINE_LOCAL_IRQ_LOCK(pipe_update_lock); -+ /** * intel_pipe_update_start() - start update of a set of display registers * @crtc: the crtc of which the registers are going to be updated -@@ -98,7 +101,7 @@ void intel_pipe_update_start(struct inte - min = vblank_start - intel_usecs_to_scanlines(adjusted_mode, 100); +@@ -102,7 +103,7 @@ void intel_pipe_update_start(struct inte + VBLANK_EVASION_TIME_US); max = vblank_start - 1; - local_irq_disable(); @@ -88,7 +88,7 @@ Signed-off-by: Thomas Gleixner if (min <= 0 || max <= 0) return; -@@ -128,11 +131,11 @@ void intel_pipe_update_start(struct inte +@@ -132,11 +133,11 @@ void intel_pipe_update_start(struct inte break; } @@ -102,12 +102,12 @@ Signed-off-by: Thomas Gleixner } finish_wait(wq, &wait); -@@ -202,7 +205,7 @@ void intel_pipe_update_end(struct intel_ +@@ -201,7 +202,7 @@ void intel_pipe_update_end(struct intel_ crtc->base.state->event = NULL; } - local_irq_enable(); + local_unlock_irq(pipe_update_lock); - if (crtc->debug.start_vbl_count && - crtc->debug.start_vbl_count != end_vbl_count) { + if (intel_vgpu_active(dev_priv)) + return; diff --git a/debian/patches/features/all/rt/drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch b/debian/patches/features/all/rt/drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch index 1ca8ba9b6..690dbeef8 100644 --- a/debian/patches/features/all/rt/drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch +++ b/debian/patches/features/all/rt/drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch @@ -1,7 +1,7 @@ Subject: drm,radeon,i915: Use preempt_disable/enable_rt() where recommended From: Mike Galbraith Date: Sat, 27 Feb 2016 08:09:11 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz DRM folks identified the spots, so use them. @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c -@@ -867,6 +867,7 @@ static int i915_get_crtc_scanoutpos(stru +@@ -867,6 +867,7 @@ static bool i915_get_crtc_scanoutpos(str spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ @@ -24,7 +24,7 @@ Signed-off-by: Thomas Gleixner /* Get optional system timestamp before query. */ if (stime) -@@ -918,6 +919,7 @@ static int i915_get_crtc_scanoutpos(stru +@@ -918,6 +919,7 @@ static bool i915_get_crtc_scanoutpos(str *etime = ktime_get(); /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ @@ -34,7 +34,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c -@@ -1845,6 +1845,7 @@ int radeon_get_crtc_scanoutpos(struct dr +@@ -1839,6 +1839,7 @@ int radeon_get_crtc_scanoutpos(struct dr struct radeon_device *rdev = dev->dev_private; /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ @@ -42,7 +42,7 @@ Signed-off-by: Thomas Gleixner /* Get optional system timestamp before query. */ if (stime) -@@ -1937,6 +1938,7 @@ int radeon_get_crtc_scanoutpos(struct dr +@@ -1931,6 +1932,7 @@ int radeon_get_crtc_scanoutpos(struct dr *etime = ktime_get(); /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ diff --git a/debian/patches/features/all/rt/epoll-use-get-cpu-light.patch b/debian/patches/features/all/rt/epoll-use-get-cpu-light.patch index 500df268e..7eb09dfde 100644 --- a/debian/patches/features/all/rt/epoll-use-get-cpu-light.patch +++ b/debian/patches/features/all/rt/epoll-use-get-cpu-light.patch @@ -1,7 +1,7 @@ Subject: fs/epoll: Do not disable preemption on RT From: Thomas Gleixner Date: Fri, 08 Jul 2011 16:35:35 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz ep_call_nested() takes a sleeping lock so we can't disable preemption. The light version is enough since ep_call_nested() doesn't mind beeing @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/fs/eventpoll.c +++ b/fs/eventpoll.c -@@ -510,12 +510,12 @@ static int ep_poll_wakeup_proc(void *pri +@@ -587,12 +587,12 @@ static int ep_poll_wakeup_proc(void *pri */ static void ep_poll_safewake(wait_queue_head_t *wq) { diff --git a/debian/patches/features/all/rt/fs-aio-simple-simple-work.patch b/debian/patches/features/all/rt/fs-aio-simple-simple-work.patch index 878111a29..273f10eca 100644 --- a/debian/patches/features/all/rt/fs-aio-simple-simple-work.patch +++ b/debian/patches/features/all/rt/fs-aio-simple-simple-work.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Mon, 16 Feb 2015 18:49:10 +0100 Subject: fs/aio: simple simple work -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz |BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:768 |in_atomic(): 1, irqs_disabled(): 0, pid: 26, name: rcuos/2 @@ -55,7 +55,7 @@ Signed-off-by: Sebastian Andrzej Siewior aio_mnt = kern_mount(&aio_fs); if (IS_ERR(aio_mnt)) panic("Failed to create aio fs mount."); -@@ -581,9 +583,9 @@ static int kiocb_cancel(struct aio_kiocb +@@ -588,9 +590,9 @@ static int kiocb_cancel(struct aio_kiocb return cancel(&kiocb->common); } @@ -67,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior pr_debug("freeing %p\n", ctx); -@@ -602,8 +604,8 @@ static void free_ioctx_reqs(struct percp +@@ -609,8 +611,8 @@ static void free_ioctx_reqs(struct percp if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) complete(&ctx->rq_wait->comp); @@ -78,7 +78,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /* -@@ -611,9 +613,9 @@ static void free_ioctx_reqs(struct percp +@@ -618,9 +620,9 @@ static void free_ioctx_reqs(struct percp * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - * now it's safe to cancel any that need to be. */ @@ -90,7 +90,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct aio_kiocb *req; spin_lock_irq(&ctx->ctx_lock); -@@ -632,6 +634,14 @@ static void free_ioctx_users(struct perc +@@ -639,6 +641,14 @@ static void free_ioctx_users(struct perc percpu_ref_put(&ctx->reqs); } diff --git a/debian/patches/features/all/rt/fs-block-rt-support.patch b/debian/patches/features/all/rt/fs-block-rt-support.patch index b49980046..e9226b14b 100644 --- a/debian/patches/features/all/rt/fs-block-rt-support.patch +++ b/debian/patches/features/all/rt/fs-block-rt-support.patch @@ -1,7 +1,7 @@ Subject: block: Turn off warning which is bogus on RT From: Thomas Gleixner Date: Tue, 14 Jun 2011 17:05:09 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz On -RT the context is always with IRQs enabled. Ignore this warning on -RT. @@ -12,12 +12,12 @@ Signed-off-by: Thomas Gleixner --- a/block/blk-core.c +++ b/block/blk-core.c -@@ -214,7 +214,7 @@ EXPORT_SYMBOL(blk_start_queue_async); - **/ +@@ -280,7 +280,7 @@ EXPORT_SYMBOL(blk_start_queue_async); void blk_start_queue(struct request_queue *q) { -- WARN_ON(!irqs_disabled()); -+ WARN_ON_NONRT(!irqs_disabled()); + lockdep_assert_held(q->queue_lock); +- WARN_ON(!in_interrupt() && !irqs_disabled()); ++ WARN_ON_NONRT(!in_interrupt() && !irqs_disabled()); + WARN_ON_ONCE(q->mq_ops); queue_flag_clear(QUEUE_FLAG_STOPPED, q); - __blk_run_queue(q); diff --git a/debian/patches/features/all/rt/fs-dcache-bringt-back-explicit-INIT_HLIST_BL_HEAD-in.patch b/debian/patches/features/all/rt/fs-dcache-bringt-back-explicit-INIT_HLIST_BL_HEAD-in.patch new file mode 100644 index 000000000..461768acf --- /dev/null +++ b/debian/patches/features/all/rt/fs-dcache-bringt-back-explicit-INIT_HLIST_BL_HEAD-in.patch @@ -0,0 +1,52 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 13 Sep 2017 12:32:34 +0200 +Subject: [PATCH] fs/dcache: bringt back explicit INIT_HLIST_BL_HEAD init +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Commit 3d375d78593c ("mm: update callers to use HASH_ZERO flag") removed +INIT_HLIST_BL_HEAD and uses the ZERO flag instead for the init. However +on RT we have also a spinlock which needs an init call so we can't use +that. + +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/dcache.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -3578,6 +3578,8 @@ static int __init set_dhash_entries(char + + static void __init dcache_init_early(void) + { ++ unsigned int loop; ++ + /* If hashes are distributed across NUMA nodes, defer + * hash allocation until vmalloc space is available. + */ +@@ -3594,10 +3596,14 @@ static void __init dcache_init_early(voi + &d_hash_mask, + 0, + 0); ++ ++ for (loop = 0; loop < (1U << d_hash_shift); loop++) ++ INIT_HLIST_BL_HEAD(dentry_hashtable + loop); + } + + static void __init dcache_init(void) + { ++ unsigned int loop; + /* + * A constructor could be added for stable state like the lists, + * but it is probably not worth it because of the cache nature +@@ -3620,6 +3626,10 @@ static void __init dcache_init(void) + &d_hash_mask, + 0, + 0); ++ ++ for (loop = 0; loop < (1U << d_hash_shift); loop++) ++ INIT_HLIST_BL_HEAD(dentry_hashtable + loop); ++ + } + + /* SLAB cache for __getname() consumers */ diff --git a/debian/patches/features/all/rt/fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch b/debian/patches/features/all/rt/fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch new file mode 100644 index 000000000..5f7e08da3 --- /dev/null +++ b/debian/patches/features/all/rt/fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch @@ -0,0 +1,119 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 20 Oct 2017 11:29:53 +0200 +Subject: [PATCH] fs/dcache: disable preemption on i_dir_seq's write side +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +i_dir_seq is an opencoded seqcounter. Based on the code it looks like we +could have two writers in parallel despite the fact that the d_lock is +held. The problem is that during the write process on RT the preemption +is still enabled and if this process is interrupted by a reader with RT +priority then we lock up. +To avoid that lock up I am disabling the preemption during the update. +The rename of i_dir_seq is here to ensure to catch new write sides in +future. + +Cc: stable-rt@vger.kernel.org +Reported-by: Oleg.Karfich@wago.com +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/dcache.c | 12 +++++++----- + fs/inode.c | 2 +- + fs/libfs.c | 6 ++++-- + include/linux/fs.h | 2 +- + 4 files changed, 13 insertions(+), 9 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -2405,9 +2405,10 @@ EXPORT_SYMBOL(d_rehash); + static inline unsigned start_dir_add(struct inode *dir) + { + ++ preempt_disable_rt(); + for (;;) { +- unsigned n = dir->i_dir_seq; +- if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n) ++ unsigned n = dir->__i_dir_seq; ++ if (!(n & 1) && cmpxchg(&dir->__i_dir_seq, n, n + 1) == n) + return n; + cpu_relax(); + } +@@ -2415,7 +2416,8 @@ static inline unsigned start_dir_add(str + + static inline void end_dir_add(struct inode *dir, unsigned n) + { +- smp_store_release(&dir->i_dir_seq, n + 2); ++ smp_store_release(&dir->__i_dir_seq, n + 2); ++ preempt_enable_rt(); + } + + static void d_wait_lookup(struct dentry *dentry) +@@ -2448,7 +2450,7 @@ struct dentry *d_alloc_parallel(struct d + + retry: + rcu_read_lock(); +- seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1; ++ seq = smp_load_acquire(&parent->d_inode->__i_dir_seq) & ~1; + r_seq = read_seqbegin(&rename_lock); + dentry = __d_lookup_rcu(parent, name, &d_seq); + if (unlikely(dentry)) { +@@ -2470,7 +2472,7 @@ struct dentry *d_alloc_parallel(struct d + goto retry; + } + hlist_bl_lock(b); +- if (unlikely(parent->d_inode->i_dir_seq != seq)) { ++ if (unlikely(parent->d_inode->__i_dir_seq != seq)) { + hlist_bl_unlock(b); + rcu_read_unlock(); + goto retry; +--- a/fs/inode.c ++++ b/fs/inode.c +@@ -154,7 +154,7 @@ int inode_init_always(struct super_block + inode->i_bdev = NULL; + inode->i_cdev = NULL; + inode->i_link = NULL; +- inode->i_dir_seq = 0; ++ inode->__i_dir_seq = 0; + inode->i_rdev = 0; + inode->dirtied_when = 0; + +--- a/fs/libfs.c ++++ b/fs/libfs.c +@@ -90,7 +90,7 @@ static struct dentry *next_positive(stru + struct list_head *from, + int count) + { +- unsigned *seq = &parent->d_inode->i_dir_seq, n; ++ unsigned *seq = &parent->d_inode->__i_dir_seq, n; + struct dentry *res; + struct list_head *p; + bool skipped; +@@ -123,8 +123,9 @@ static struct dentry *next_positive(stru + static void move_cursor(struct dentry *cursor, struct list_head *after) + { + struct dentry *parent = cursor->d_parent; +- unsigned n, *seq = &parent->d_inode->i_dir_seq; ++ unsigned n, *seq = &parent->d_inode->__i_dir_seq; + spin_lock(&parent->d_lock); ++ preempt_disable_rt(); + for (;;) { + n = *seq; + if (!(n & 1) && cmpxchg(seq, n, n + 1) == n) +@@ -137,6 +138,7 @@ static void move_cursor(struct dentry *c + else + list_add_tail(&cursor->d_child, &parent->d_subdirs); + smp_store_release(seq, n + 2); ++ preempt_enable_rt(); + spin_unlock(&parent->d_lock); + } + +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -655,7 +655,7 @@ struct inode { + struct block_device *i_bdev; + struct cdev *i_cdev; + char *i_link; +- unsigned i_dir_seq; ++ unsigned __i_dir_seq; + }; + + __u32 i_generation; diff --git a/debian/patches/features/all/rt/fs-dcache-init-in_lookup_hashtable.patch b/debian/patches/features/all/rt/fs-dcache-init-in_lookup_hashtable.patch deleted file mode 100644 index 165960886..000000000 --- a/debian/patches/features/all/rt/fs-dcache-init-in_lookup_hashtable.patch +++ /dev/null @@ -1,28 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Wed, 14 Sep 2016 17:57:03 +0200 -Subject: [PATCH] fs/dcache: init in_lookup_hashtable -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -in_lookup_hashtable was introduced in commit 94bdd655caba ("parallel -lookups machinery, part 3") and never initialized but since it is in -the data it is all zeros. But we need this for -RT. - -Signed-off-by: Sebastian Andrzej Siewior ---- - fs/dcache.c | 5 +++++ - 1 file changed, 5 insertions(+) - ---- a/fs/dcache.c -+++ b/fs/dcache.c -@@ -3610,6 +3610,11 @@ EXPORT_SYMBOL(d_genocide); - - void __init vfs_caches_init_early(void) - { -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(in_lookup_hashtable); i++) -+ INIT_HLIST_BL_HEAD(&in_lookup_hashtable[i]); -+ - dcache_init_early(); - inode_init_early(); - } diff --git a/debian/patches/features/all/rt/fs-dcache-use-cpu-chill-in-trylock-loops.patch b/debian/patches/features/all/rt/fs-dcache-use-cpu-chill-in-trylock-loops.patch index 7bfd9a5d6..f39ca12c5 100644 --- a/debian/patches/features/all/rt/fs-dcache-use-cpu-chill-in-trylock-loops.patch +++ b/debian/patches/features/all/rt/fs-dcache-use-cpu-chill-in-trylock-loops.patch @@ -1,7 +1,7 @@ Subject: fs: dcache: Use cpu_chill() in trylock loops From: Thomas Gleixner Date: Wed, 07 Mar 2012 21:00:34 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Retry loops on RT might loop forever when the modifying side was preempted. Use cpu_chill() instead of cpu_relax() to let the system @@ -18,14 +18,14 @@ Signed-off-by: Thomas Gleixner --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h -@@ -32,6 +32,7 @@ +@@ -20,6 +20,7 @@ #include #include #include +#include - #include #include - + #include + #include --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -148,7 +148,7 @@ static struct dentry *get_next_positive_ @@ -47,7 +47,7 @@ Signed-off-by: Thomas Gleixner #include #include #include -@@ -750,6 +751,8 @@ static inline bool fast_dput(struct dent +@@ -784,6 +785,8 @@ static inline bool fast_dput(struct dent */ void dput(struct dentry *dentry) { @@ -56,7 +56,7 @@ Signed-off-by: Thomas Gleixner if (unlikely(!dentry)) return; -@@ -788,9 +791,18 @@ void dput(struct dentry *dentry) +@@ -820,9 +823,18 @@ void dput(struct dentry *dentry) return; kill_it: @@ -78,7 +78,7 @@ Signed-off-by: Thomas Gleixner goto repeat; } } -@@ -2330,7 +2342,7 @@ void d_delete(struct dentry * dentry) +@@ -2360,7 +2372,7 @@ void d_delete(struct dentry * dentry) if (dentry->d_lockref.count == 1) { if (!spin_trylock(&inode->i_lock)) { spin_unlock(&dentry->d_lock); @@ -97,7 +97,7 @@ Signed-off-by: Thomas Gleixner #include #include #include -@@ -357,7 +358,7 @@ int __mnt_want_write(struct vfsmount *m) +@@ -355,7 +356,7 @@ int __mnt_want_write(struct vfsmount *m) smp_mb(); while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) { preempt_enable(); diff --git a/debian/patches/features/all/rt/fs-dcache-use-swait_queue-instead-of-waitqueue.patch b/debian/patches/features/all/rt/fs-dcache-use-swait_queue-instead-of-waitqueue.patch index 42542c7e0..477275f7f 100644 --- a/debian/patches/features/all/rt/fs-dcache-use-swait_queue-instead-of-waitqueue.patch +++ b/debian/patches/features/all/rt/fs-dcache-use-swait_queue-instead-of-waitqueue.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 14 Sep 2016 14:35:49 +0200 Subject: [PATCH] fs/dcache: use swait_queue instead of waitqueue -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz __d_lookup_done() invokes wake_up_all() while holding a hlist_bl_lock() which disables preemption. As a workaround convert it to swait. @@ -34,7 +34,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/fs/dcache.c +++ b/fs/dcache.c -@@ -2402,21 +2402,24 @@ static inline void end_dir_add(struct in +@@ -2434,21 +2434,24 @@ static inline void end_dir_add(struct in static void d_wait_lookup(struct dentry *dentry) { @@ -70,7 +70,7 @@ Signed-off-by: Sebastian Andrzej Siewior { unsigned int hash = name->hash; struct hlist_bl_head *b = in_lookup_hash(parent, hash); -@@ -2525,7 +2528,7 @@ void __d_lookup_done(struct dentry *dent +@@ -2557,7 +2560,7 @@ void __d_lookup_done(struct dentry *dent hlist_bl_lock(b); dentry->d_flags &= ~DCACHE_PAR_LOOKUP; __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); @@ -81,7 +81,7 @@ Signed-off-by: Sebastian Andrzej Siewior INIT_HLIST_NODE(&dentry->d_u.d_alias); --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c -@@ -1191,7 +1191,7 @@ static int fuse_direntplus_link(struct f +@@ -1187,7 +1187,7 @@ static int fuse_direntplus_link(struct f struct inode *dir = d_inode(parent); struct fuse_conn *fc; struct inode *inode; @@ -92,7 +92,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* --- a/fs/namei.c +++ b/fs/namei.c -@@ -1628,7 +1628,7 @@ static struct dentry *lookup_slow(const +@@ -1637,7 +1637,7 @@ static struct dentry *lookup_slow(const { struct dentry *dentry = ERR_PTR(-ENOENT), *old; struct inode *inode = dir->d_inode; @@ -101,7 +101,7 @@ Signed-off-by: Sebastian Andrzej Siewior inode_lock_shared(inode); /* Don't go there if it's already dead */ -@@ -3069,7 +3069,7 @@ static int lookup_open(struct nameidata +@@ -3110,7 +3110,7 @@ static int lookup_open(struct nameidata struct dentry *dentry; int error, create_error = 0; umode_t mode = op->mode; @@ -112,7 +112,7 @@ Signed-off-by: Sebastian Andrzej Siewior return -ENOENT; --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c -@@ -491,7 +491,7 @@ static +@@ -452,7 +452,7 @@ static void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) { struct qstr filename = QSTR_INIT(entry->name, entry->len); @@ -121,7 +121,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct dentry *dentry; struct dentry *alias; struct inode *dir = d_inode(parent); -@@ -1493,7 +1493,7 @@ int nfs_atomic_open(struct inode *dir, s +@@ -1443,7 +1443,7 @@ int nfs_atomic_open(struct inode *dir, s struct file *file, unsigned open_flags, umode_t mode, int *opened) { @@ -132,7 +132,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct iattr attr = { .ia_valid = ATTR_OPEN }; --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c -@@ -12,7 +12,7 @@ +@@ -13,7 +13,7 @@ #include #include #include @@ -141,7 +141,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include #include -@@ -205,7 +205,7 @@ nfs_async_unlink(struct dentry *dentry, +@@ -206,7 +206,7 @@ nfs_async_unlink(struct dentry *dentry, goto out_free_name; } data->res.dir_attr = &data->dir_attr; @@ -152,7 +152,7 @@ Signed-off-by: Sebastian Andrzej Siewior spin_lock(&dentry->d_lock); --- a/fs/proc/base.c +++ b/fs/proc/base.c -@@ -1836,7 +1836,7 @@ bool proc_fill_cache(struct file *file, +@@ -1878,7 +1878,7 @@ bool proc_fill_cache(struct file *file, child = d_hash_and_lookup(dir, &qname); if (!child) { @@ -163,7 +163,7 @@ Signed-off-by: Sebastian Andrzej Siewior goto end_instantiate; --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c -@@ -665,7 +665,7 @@ static bool proc_sys_fill_cache(struct f +@@ -679,7 +679,7 @@ static bool proc_sys_fill_cache(struct f child = d_lookup(dir, &qname); if (!child) { @@ -174,7 +174,7 @@ Signed-off-by: Sebastian Andrzej Siewior return false; --- a/include/linux/dcache.h +++ b/include/linux/dcache.h -@@ -101,7 +101,7 @@ struct dentry { +@@ -107,7 +107,7 @@ struct dentry { union { struct list_head d_lru; /* LRU list */ @@ -183,7 +183,7 @@ Signed-off-by: Sebastian Andrzej Siewior }; struct list_head d_child; /* child of parent list */ struct list_head d_subdirs; /* our children */ -@@ -231,7 +231,7 @@ extern void d_set_d_op(struct dentry *de +@@ -237,7 +237,7 @@ extern void d_set_d_op(struct dentry *de extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, @@ -194,7 +194,7 @@ Signed-off-by: Sebastian Andrzej Siewior extern struct dentry * d_exact_alias(struct dentry *, struct inode *); --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h -@@ -1513,7 +1513,7 @@ struct nfs_unlinkdata { +@@ -1530,7 +1530,7 @@ struct nfs_unlinkdata { struct nfs_removeargs args; struct nfs_removeres res; struct dentry *dentry; @@ -205,9 +205,9 @@ Signed-off-by: Sebastian Andrzej Siewior long timeout; --- a/kernel/sched/swait.c +++ b/kernel/sched/swait.c -@@ -74,6 +74,7 @@ void swake_up_all(struct swait_queue_hea - if (!swait_active(q)) - return; +@@ -69,6 +69,7 @@ void swake_up_all(struct swait_queue_hea + struct swait_queue *curr; + LIST_HEAD(tmp); + WARN_ON(irqs_disabled()); raw_spin_lock_irq(&q->lock); diff --git a/debian/patches/features/all/rt/fs-jbd-replace-bh_state-lock.patch b/debian/patches/features/all/rt/fs-jbd-replace-bh_state-lock.patch index cc9831774..478169e63 100644 --- a/debian/patches/features/all/rt/fs-jbd-replace-bh_state-lock.patch +++ b/debian/patches/features/all/rt/fs-jbd-replace-bh_state-lock.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Fri, 18 Mar 2011 10:11:25 +0100 Subject: fs: jbd/jbd2: Make state lock and journal head lock rt safe -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz bit_spin_locks break under RT. @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h -@@ -77,6 +77,10 @@ struct buffer_head { +@@ -78,6 +78,10 @@ struct buffer_head { atomic_t b_count; /* users using this buffer_head */ #ifdef CONFIG_PREEMPT_RT_BASE spinlock_t b_uptodate_lock; @@ -25,7 +25,7 @@ Signed-off-by: Thomas Gleixner #endif }; -@@ -108,6 +112,10 @@ static inline void buffer_head_init_lock +@@ -109,6 +113,10 @@ static inline void buffer_head_init_lock { #ifdef CONFIG_PREEMPT_RT_BASE spin_lock_init(&bh->b_uptodate_lock); diff --git a/debian/patches/features/all/rt/fs-jbd2-pull-your-plug-when-waiting-for-space.patch b/debian/patches/features/all/rt/fs-jbd2-pull-your-plug-when-waiting-for-space.patch index 18e81f71c..53d50f6bb 100644 --- a/debian/patches/features/all/rt/fs-jbd2-pull-your-plug-when-waiting-for-space.patch +++ b/debian/patches/features/all/rt/fs-jbd2-pull-your-plug-when-waiting-for-space.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Mon, 17 Feb 2014 17:30:03 +0100 Subject: fs: jbd2: pull your plug when waiting for space -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Two cps in parallel managed to stall the the ext4 fs. It seems that journal code is either waiting for locks or sleeping waiting for diff --git a/debian/patches/features/all/rt/fs-namespace-preemption-fix.patch b/debian/patches/features/all/rt/fs-namespace-preemption-fix.patch index dbe8df308..cfeba7985 100644 --- a/debian/patches/features/all/rt/fs-namespace-preemption-fix.patch +++ b/debian/patches/features/all/rt/fs-namespace-preemption-fix.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Sun, 19 Jul 2009 08:44:27 -0500 Subject: fs: namespace preemption fix -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz On RT we cannot loop with preemption disabled here as mnt_make_readonly() might have been preempted. We can safely enable @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner --- a/fs/namespace.c +++ b/fs/namespace.c -@@ -355,8 +355,11 @@ int __mnt_want_write(struct vfsmount *m) +@@ -353,8 +353,11 @@ int __mnt_want_write(struct vfsmount *m) * incremented count after it has set MNT_WRITE_HOLD. */ smp_mb(); diff --git a/debian/patches/features/all/rt/fs-nfs-turn-rmdir_sem-into-a-semaphore.patch b/debian/patches/features/all/rt/fs-nfs-turn-rmdir_sem-into-a-semaphore.patch index c224b8ebd..f9def46db 100644 --- a/debian/patches/features/all/rt/fs-nfs-turn-rmdir_sem-into-a-semaphore.patch +++ b/debian/patches/features/all/rt/fs-nfs-turn-rmdir_sem-into-a-semaphore.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 15 Sep 2016 10:51:27 +0200 Subject: [PATCH] fs/nfs: turn rmdir_sem into a semaphore -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The RW semaphore had a reader side which used the _non_owner version because it most likely took the reader lock in one thread and released it @@ -22,7 +22,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c -@@ -1813,7 +1813,11 @@ int nfs_rmdir(struct inode *dir, struct +@@ -1763,7 +1763,11 @@ int nfs_rmdir(struct inode *dir, struct trace_nfs_rmdir_enter(dir, dentry); if (d_really_is_positive(dentry)) { @@ -34,7 +34,7 @@ Signed-off-by: Sebastian Andrzej Siewior error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); /* Ensure the VFS deletes this inode */ switch (error) { -@@ -1823,7 +1827,11 @@ int nfs_rmdir(struct inode *dir, struct +@@ -1773,7 +1777,11 @@ int nfs_rmdir(struct inode *dir, struct case -ENOENT: nfs_dentry_handle_enoent(dentry); } @@ -48,21 +48,21 @@ Signed-off-by: Sebastian Andrzej Siewior trace_nfs_rmdir_exit(dir, dentry, error); --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c -@@ -1984,7 +1984,11 @@ static void init_once(void *foo) - nfsi->nrequests = 0; - nfsi->commit_info.ncommit = 0; +@@ -2014,7 +2014,11 @@ static void init_once(void *foo) + atomic_long_set(&nfsi->nrequests, 0); + atomic_long_set(&nfsi->commit_info.ncommit, 0); atomic_set(&nfsi->commit_info.rpcs_out, 0); +#ifdef CONFIG_PREEMPT_RT_BASE + sema_init(&nfsi->rmdir_sem, 1); +#else init_rwsem(&nfsi->rmdir_sem); +#endif + mutex_init(&nfsi->commit_mutex); nfs4_init_once(nfsi); } - --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c -@@ -51,6 +51,29 @@ static void nfs_async_unlink_done(struct +@@ -52,6 +52,29 @@ static void nfs_async_unlink_done(struct rpc_restart_call_prepare(task); } @@ -92,7 +92,7 @@ Signed-off-by: Sebastian Andrzej Siewior /** * nfs_async_unlink_release - Release the sillydelete data. * @task: rpc_task of the sillydelete -@@ -64,7 +87,7 @@ static void nfs_async_unlink_release(voi +@@ -65,7 +88,7 @@ static void nfs_async_unlink_release(voi struct dentry *dentry = data->dentry; struct super_block *sb = dentry->d_sb; @@ -101,7 +101,7 @@ Signed-off-by: Sebastian Andrzej Siewior d_lookup_done(dentry); nfs_free_unlinkdata(data); dput(dentry); -@@ -117,10 +140,10 @@ static int nfs_call_unlink(struct dentry +@@ -118,10 +141,10 @@ static int nfs_call_unlink(struct dentry struct inode *dir = d_inode(dentry->d_parent); struct dentry *alias; @@ -114,7 +114,7 @@ Signed-off-by: Sebastian Andrzej Siewior return 0; } if (!d_in_lookup(alias)) { -@@ -142,7 +165,7 @@ static int nfs_call_unlink(struct dentry +@@ -143,7 +166,7 @@ static int nfs_call_unlink(struct dentry ret = 0; spin_unlock(&alias->d_lock); dput(alias); @@ -125,15 +125,15 @@ Signed-off-by: Sebastian Andrzej Siewior * point dentry is definitely not a root, so we won't need --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h -@@ -161,7 +161,11 @@ struct nfs_inode { +@@ -162,7 +162,11 @@ struct nfs_inode { /* Readers: in-flight sillydelete RPC calls */ /* Writers: rmdir */ +#ifdef CONFIG_PREEMPT_RT_BASE -+ struct semaphore rmdir_sem; ++ struct semaphore rmdir_sem; +#else struct rw_semaphore rmdir_sem; +#endif + struct mutex commit_mutex; #if IS_ENABLED(CONFIG_NFS_V4) - struct nfs4_cached_acl *nfs4_acl; diff --git a/debian/patches/features/all/rt/fs-ntfs-disable-interrupt-non-rt.patch b/debian/patches/features/all/rt/fs-ntfs-disable-interrupt-non-rt.patch index fc05adf5c..9522dbd5f 100644 --- a/debian/patches/features/all/rt/fs-ntfs-disable-interrupt-non-rt.patch +++ b/debian/patches/features/all/rt/fs-ntfs-disable-interrupt-non-rt.patch @@ -1,7 +1,7 @@ From: Mike Galbraith Date: Fri, 3 Jul 2009 08:44:12 -0500 Subject: fs: ntfs: disable interrupt only on !RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz On Sat, 2007-10-27 at 11:44 +0200, Ingo Molnar wrote: > * Nick Piggin wrote: diff --git a/debian/patches/features/all/rt/fs-replace-bh_uptodate_lock-for-rt.patch b/debian/patches/features/all/rt/fs-replace-bh_uptodate_lock-for-rt.patch index 3e2196260..4975d101f 100644 --- a/debian/patches/features/all/rt/fs-replace-bh_uptodate_lock-for-rt.patch +++ b/debian/patches/features/all/rt/fs-replace-bh_uptodate_lock-for-rt.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Fri, 18 Mar 2011 09:18:52 +0100 Subject: buffer_head: Replace bh_uptodate_lock for -rt -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Wrap the bit_spin_lock calls into a separate inline and add the RT replacements with a real spinlock. @@ -9,13 +9,15 @@ replacements with a real spinlock. Signed-off-by: Thomas Gleixner --- fs/buffer.c | 21 +++++++-------------- + fs/ext4/page-io.c | 6 ++---- fs/ntfs/aops.c | 10 +++------- + fs/xfs/xfs_aops.c | 6 ++---- include/linux/buffer_head.h | 34 ++++++++++++++++++++++++++++++++++ - 3 files changed, 44 insertions(+), 21 deletions(-) + 5 files changed, 48 insertions(+), 29 deletions(-) --- a/fs/buffer.c +++ b/fs/buffer.c -@@ -303,8 +303,7 @@ static void end_buffer_async_read(struct +@@ -302,8 +302,7 @@ static void end_buffer_async_read(struct * decide that the page is now completely done. */ first = page_buffers(page); @@ -25,7 +27,7 @@ Signed-off-by: Thomas Gleixner clear_buffer_async_read(bh); unlock_buffer(bh); tmp = bh; -@@ -317,8 +316,7 @@ static void end_buffer_async_read(struct +@@ -316,8 +315,7 @@ static void end_buffer_async_read(struct } tmp = tmp->b_this_page; } while (tmp != bh); @@ -35,7 +37,7 @@ Signed-off-by: Thomas Gleixner /* * If none of the buffers had errors and they are all -@@ -330,9 +328,7 @@ static void end_buffer_async_read(struct +@@ -329,9 +327,7 @@ static void end_buffer_async_read(struct return; still_busy: @@ -46,7 +48,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -360,8 +356,7 @@ void end_buffer_async_write(struct buffe +@@ -358,8 +354,7 @@ void end_buffer_async_write(struct buffe } first = page_buffers(page); @@ -56,7 +58,7 @@ Signed-off-by: Thomas Gleixner clear_buffer_async_write(bh); unlock_buffer(bh); -@@ -373,15 +368,12 @@ void end_buffer_async_write(struct buffe +@@ -371,15 +366,12 @@ void end_buffer_async_write(struct buffe } tmp = tmp->b_this_page; } @@ -74,7 +76,7 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(end_buffer_async_write); -@@ -3426,6 +3418,7 @@ struct buffer_head *alloc_buffer_head(gf +@@ -3409,6 +3401,7 @@ struct buffer_head *alloc_buffer_head(gf struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); if (ret) { INIT_LIST_HEAD(&ret->b_assoc_buffers); @@ -82,6 +84,28 @@ Signed-off-by: Thomas Gleixner preempt_disable(); __this_cpu_inc(bh_accounting.nr); recalc_bh_state(); +--- a/fs/ext4/page-io.c ++++ b/fs/ext4/page-io.c +@@ -95,8 +95,7 @@ static void ext4_finish_bio(struct bio * + * We check all buffers in the page under BH_Uptodate_Lock + * to avoid races with other end io clearing async_write flags + */ +- local_irq_save(flags); +- bit_spin_lock(BH_Uptodate_Lock, &head->b_state); ++ flags = bh_uptodate_lock_irqsave(head); + do { + if (bh_offset(bh) < bio_start || + bh_offset(bh) + bh->b_size > bio_end) { +@@ -108,8 +107,7 @@ static void ext4_finish_bio(struct bio * + if (bio->bi_status) + buffer_io_error(bh); + } while ((bh = bh->b_this_page) != head); +- bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); +- local_irq_restore(flags); ++ bh_uptodate_unlock_irqrestore(head, flags); + if (!under_io) { + #ifdef CONFIG_EXT4_FS_ENCRYPTION + if (data_page) --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -108,8 +108,7 @@ static void ntfs_end_buffer_async_read(s @@ -115,9 +139,31 @@ Signed-off-by: Thomas Gleixner } /** +--- a/fs/xfs/xfs_aops.c ++++ b/fs/xfs/xfs_aops.c +@@ -120,8 +120,7 @@ xfs_finish_page_writeback( + ASSERT(bvec->bv_offset + bvec->bv_len <= PAGE_SIZE); + ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0); + +- local_irq_save(flags); +- bit_spin_lock(BH_Uptodate_Lock, &head->b_state); ++ flags = bh_uptodate_lock_irqsave(head); + do { + if (off >= bvec->bv_offset && + off < bvec->bv_offset + bvec->bv_len) { +@@ -143,8 +142,7 @@ xfs_finish_page_writeback( + } + off += bh->b_size; + } while ((bh = bh->b_this_page) != head); +- bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); +- local_irq_restore(flags); ++ bh_uptodate_unlock_irqrestore(head, flags); + + if (!busy) + end_page_writeback(bvec->bv_page); --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h -@@ -75,8 +75,42 @@ struct buffer_head { +@@ -76,8 +76,42 @@ struct buffer_head { struct address_space *b_assoc_map; /* mapping this buffer is associated with */ atomic_t b_count; /* users using this buffer_head */ diff --git a/debian/patches/features/all/rt/ftrace-Fix-trace-header-alignment.patch b/debian/patches/features/all/rt/ftrace-Fix-trace-header-alignment.patch index 7c14ed4c3..4a365575a 100644 --- a/debian/patches/features/all/rt/ftrace-Fix-trace-header-alignment.patch +++ b/debian/patches/features/all/rt/ftrace-Fix-trace-header-alignment.patch @@ -1,7 +1,7 @@ From: Mike Galbraith Date: Sun, 16 Oct 2016 05:08:30 +0200 Subject: [PATCH] ftrace: Fix trace header alignment -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Line up helper arrows to the right column. @@ -10,12 +10,12 @@ Signed-off-by: Mike Galbraith [bigeasy: fixup function tracer header] Signed-off-by: Sebastian Andrzej Siewior --- - kernel/trace/trace.c | 32 ++++++++++++++++---------------- - 1 file changed, 16 insertions(+), 16 deletions(-) + kernel/trace/trace.c | 22 +++++++++++----------- + 1 file changed, 11 insertions(+), 11 deletions(-) --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c -@@ -3121,17 +3121,17 @@ get_total_entries(struct trace_buffer *b +@@ -3343,17 +3343,17 @@ get_total_entries(struct trace_buffer *b static void print_lat_help_header(struct seq_file *m) { @@ -44,20 +44,3 @@ Signed-off-by: Sebastian Andrzej Siewior } static void print_event_info(struct trace_buffer *buf, struct seq_file *m) -@@ -3160,11 +3160,11 @@ static void print_func_help_header_irq(s - "# |/ _-----=> need-resched_lazy\n" - "# || / _---=> hardirq/softirq\n" - "# ||| / _--=> preempt-depth\n" -- "# |||| /_--=> preempt-lazy-depth\n" -- "# ||||| _-=> migrate-disable \n" -- "# ||||| / delay\n" -- "# TASK-PID CPU# |||||| TIMESTAMP FUNCTION\n" -- "# | | | |||||| | |\n"); -+ "# |||| / _-=> preempt-lazy-depth\n" -+ "# ||||| / _-=> migrate-disable \n" -+ "# |||||| / delay\n" -+ "# TASK-PID CPU# ||||||| TIMESTAMP FUNCTION\n" -+ "# | | | ||||||| | |\n"); - } - - void diff --git a/debian/patches/features/all/rt/ftrace-migrate-disable-tracing.patch b/debian/patches/features/all/rt/ftrace-migrate-disable-tracing.patch index 5d5e749d4..b57cf27e7 100644 --- a/debian/patches/features/all/rt/ftrace-migrate-disable-tracing.patch +++ b/debian/patches/features/all/rt/ftrace-migrate-disable-tracing.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Sun, 17 Jul 2011 21:56:42 +0200 Subject: trace: Add migrate-disabled counter to tracing output -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Signed-off-by: Thomas Gleixner --- @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h -@@ -61,6 +61,8 @@ struct trace_entry { +@@ -62,6 +62,8 @@ struct trace_entry { unsigned char flags; unsigned char preempt_count; int pid; @@ -24,7 +24,7 @@ Signed-off-by: Thomas Gleixner #define TRACE_EVENT_TYPE_MAX \ --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c -@@ -1954,6 +1954,8 @@ tracing_generic_entry_update(struct trac +@@ -2141,6 +2141,8 @@ tracing_generic_entry_update(struct trac ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) | (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); @@ -33,7 +33,7 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL_GPL(tracing_generic_entry_update); -@@ -3122,9 +3124,10 @@ static void print_lat_help_header(struct +@@ -3344,9 +3346,10 @@ static void print_lat_help_header(struct "# | / _----=> need-resched \n" "# || / _---=> hardirq/softirq \n" "# ||| / _--=> preempt-depth \n" @@ -60,7 +60,7 @@ Signed-off-by: Thomas Gleixner } --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c -@@ -484,6 +484,11 @@ int trace_print_lat_fmt(struct trace_seq +@@ -493,6 +493,11 @@ int trace_print_lat_fmt(struct trace_seq else trace_seq_putc(s, '.'); diff --git a/debian/patches/features/all/rt/futex-Ensure-lock-unlock-symetry-versus-pi_lock-and-.patch b/debian/patches/features/all/rt/futex-Ensure-lock-unlock-symetry-versus-pi_lock-and-.patch index 6380d4b2c..d5b8d5182 100644 --- a/debian/patches/features/all/rt/futex-Ensure-lock-unlock-symetry-versus-pi_lock-and-.patch +++ b/debian/patches/features/all/rt/futex-Ensure-lock-unlock-symetry-versus-pi_lock-and-.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Fri, 1 Mar 2013 11:17:42 +0100 Subject: futex: Ensure lock/unlock symetry versus pi_lock and hash bucket lock -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz In exit_pi_state_list() we have the following locking construct: @@ -31,13 +31,13 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/futex.c +++ b/kernel/futex.c -@@ -911,7 +911,9 @@ void exit_pi_state_list(struct task_stru - * task still owns the PI-state: - */ +@@ -936,7 +936,9 @@ void exit_pi_state_list(struct task_stru if (head->next != next) { + /* retain curr->pi_lock for the loop invariant */ + raw_spin_unlock(&pi_state->pi_mutex.wait_lock); + raw_spin_unlock_irq(&curr->pi_lock); spin_unlock(&hb->lock); + raw_spin_lock_irq(&curr->pi_lock); + put_pi_state(pi_state); continue; } - diff --git a/debian/patches/features/all/rt/futex-requeue-pi-fix.patch b/debian/patches/features/all/rt/futex-requeue-pi-fix.patch index f0c92bd31..30841a0b5 100644 --- a/debian/patches/features/all/rt/futex-requeue-pi-fix.patch +++ b/debian/patches/features/all/rt/futex-requeue-pi-fix.patch @@ -1,7 +1,7 @@ From: Steven Rostedt Date: Tue, 14 Jul 2015 14:26:34 +0200 Subject: futex: Fix bug on when a requeued RT task times out -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Requeue with timeout causes a bug with PREEMPT_RT_FULL. @@ -66,7 +66,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -1722,6 +1723,35 @@ int __rt_mutex_start_proxy_lock(struct r +@@ -1730,6 +1731,35 @@ int __rt_mutex_start_proxy_lock(struct r if (try_to_take_rt_mutex(lock, task, NULL)) return 1; @@ -104,7 +104,7 @@ Signed-off-by: Thomas Gleixner RT_MUTEX_FULL_CHAINWALK); --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h -@@ -100,6 +100,7 @@ enum rtmutex_chainwalk { +@@ -130,6 +130,7 @@ enum rtmutex_chainwalk { * PI-futex support (proxy locking functions, etc.): */ #define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1) diff --git a/debian/patches/features/all/rt/futex-rt_mutex-Fix-rt_mutex_cleanup_proxy_lock.patch b/debian/patches/features/all/rt/futex-rt_mutex-Fix-rt_mutex_cleanup_proxy_lock.patch deleted file mode 100644 index b57be1d77..000000000 --- a/debian/patches/features/all/rt/futex-rt_mutex-Fix-rt_mutex_cleanup_proxy_lock.patch +++ /dev/null @@ -1,126 +0,0 @@ -From: Peter Zijlstra -Date: Mon, 22 May 2017 13:04:50 -0700 -Subject: [PATCH] futex,rt_mutex: Fix rt_mutex_cleanup_proxy_lock() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Markus reported that the glibc/nptl/tst-robustpi8 test was failing after -commit: - - cfafcd117da0 ("futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock()") - -The following trace shows the problem: - - ld-linux-x86-64-2161 [019] .... 410.760971: SyS_futex: 00007ffbeb76b028: 80000875 op=FUTEX_LOCK_PI - ld-linux-x86-64-2161 [019] ...1 410.760972: lock_pi_update_atomic: 00007ffbeb76b028: curval=80000875 uval=80000875 newval=80000875 ret=0 - ld-linux-x86-64-2165 [011] .... 410.760978: SyS_futex: 00007ffbeb76b028: 80000875 op=FUTEX_UNLOCK_PI - ld-linux-x86-64-2165 [011] d..1 410.760979: do_futex: 00007ffbeb76b028: curval=80000875 uval=80000875 newval=80000871 ret=0 - ld-linux-x86-64-2165 [011] .... 410.760980: SyS_futex: 00007ffbeb76b028: 80000871 ret=0000 - ld-linux-x86-64-2161 [019] .... 410.760980: SyS_futex: 00007ffbeb76b028: 80000871 ret=ETIMEDOUT - -Task 2165 does an UNLOCK_PI, assigning the lock to the waiter task 2161 -which then returns with -ETIMEDOUT. That wrecks the lock state, because now -the owner isn't aware it acquired the lock and removes the pending robust -list entry. - -If 2161 is killed, the robust list will not clear out this futex and the -subsequent acquire on this futex will then (correctly) result in -ESRCH -which is unexpected by glibc, triggers an internal assertion and dies. - -Task 2161 Task 2165 - -rt_mutex_wait_proxy_lock() - timeout(); - /* T2161 is still queued in the waiter list */ - return -ETIMEDOUT; - - futex_unlock_pi() - spin_lock(hb->lock); - rtmutex_unlock() - remove_rtmutex_waiter(T2161); - mark_lock_available(); - /* Make the next waiter owner of the user space side */ - futex_uval = 2161; - spin_unlock(hb->lock); -spin_lock(hb->lock); -rt_mutex_cleanup_proxy_lock() - if (rtmutex_owner() !== current) - ... - return FAIL; -.... -return -ETIMEOUT; - -This means that rt_mutex_cleanup_proxy_lock() needs to call -try_to_take_rt_mutex() so it can take over the rtmutex correctly which was -assigned by the waker. If the rtmutex is owned by some other task then this -call is harmless and just confirmes that the waiter is not able to acquire -it. - -While there, fix what looks like a merge error which resulted in -rt_mutex_cleanup_proxy_lock() having two calls to -fixup_rt_mutex_waiters() and rt_mutex_wait_proxy_lock() not having any. -Both should have one, since both potentially touch the waiter list. - -Fixes: 38d589f2fd08 ("futex,rt_mutex: Restructure rt_mutex_finish_proxy_lock()") -Reported-by: Markus Trippelsdorf -Bug-Spotted-by: Thomas Gleixner -Signed-off-by: Peter Zijlstra (Intel) -Cc: Florian Weimer -Cc: Darren Hart -Cc: Sebastian Andrzej Siewior -Cc: Markus Trippelsdorf -Link: http://lkml.kernel.org/r/20170519154850.mlomgdsd26drq5j6@hirez.programming.kicks-ass.net -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/locking/rtmutex.c | 24 ++++++++++++++++++------ - 1 file changed, 18 insertions(+), 6 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1785,12 +1785,14 @@ int rt_mutex_wait_proxy_lock(struct rt_m - int ret; - - raw_spin_lock_irq(&lock->wait_lock); -- -- set_current_state(TASK_INTERRUPTIBLE); -- - /* sleep on the mutex */ -+ set_current_state(TASK_INTERRUPTIBLE); - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); -- -+ /* -+ * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might -+ * have to fix that up. -+ */ -+ fixup_rt_mutex_waiters(lock); - raw_spin_unlock_irq(&lock->wait_lock); - - return ret; -@@ -1822,15 +1824,25 @@ bool rt_mutex_cleanup_proxy_lock(struct - - raw_spin_lock_irq(&lock->wait_lock); - /* -+ * Do an unconditional try-lock, this deals with the lock stealing -+ * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter() -+ * sets a NULL owner. -+ * -+ * We're not interested in the return value, because the subsequent -+ * test on rt_mutex_owner() will infer that. If the trylock succeeded, -+ * we will own the lock and it will have removed the waiter. If we -+ * failed the trylock, we're still not owner and we need to remove -+ * ourselves. -+ */ -+ try_to_take_rt_mutex(lock, current, waiter); -+ /* - * Unless we're the owner; we're still enqueued on the wait_list. - * So check if we became owner, if not, take us off the wait_list. - */ - if (rt_mutex_owner(lock) != current) { - remove_waiter(lock, waiter); -- fixup_rt_mutex_waiters(lock); - cleanup = true; - } -- - /* - * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might - * have to fix that up. diff --git a/debian/patches/features/all/rt/futex-rtmutex-Cure-RT-double-blocking-issue.patch b/debian/patches/features/all/rt/futex-rtmutex-Cure-RT-double-blocking-issue.patch deleted file mode 100644 index 0665abc21..000000000 --- a/debian/patches/features/all/rt/futex-rtmutex-Cure-RT-double-blocking-issue.patch +++ /dev/null @@ -1,61 +0,0 @@ -From: Thomas Gleixner -Date: Tue, 9 May 2017 17:11:10 +0200 -Subject: [PATCH] futex/rtmutex: Cure RT double blocking issue -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -RT has a problem when the wait on a futex/rtmutex got interrupted by a -timeout or a signal. task->pi_blocked_on is still set when returning from -rt_mutex_wait_proxy_lock(). The task must acquire the hash bucket lock -after this. - -If the hash bucket lock is contended then the -BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in -task_blocks_on_rt_mutex() will trigger. - -This can be avoided by clearing task->pi_blocked_on in the return path of -rt_mutex_wait_proxy_lock() which removes the task from the boosting chain -of the rtmutex. That's correct because the task is not longer blocked on -it. - -Signed-off-by: Thomas Gleixner -Reported-by: Engleder Gerhard -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/locking/rtmutex.c | 19 +++++++++++++++++++ - 1 file changed, 19 insertions(+) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -2407,6 +2407,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m - struct hrtimer_sleeper *to, - struct rt_mutex_waiter *waiter) - { -+ struct task_struct *tsk = current; - int ret; - - raw_spin_lock_irq(&lock->wait_lock); -@@ -2418,6 +2419,24 @@ int rt_mutex_wait_proxy_lock(struct rt_m - * have to fix that up. - */ - fixup_rt_mutex_waiters(lock); -+ /* -+ * RT has a problem here when the wait got interrupted by a timeout -+ * or a signal. task->pi_blocked_on is still set. The task must -+ * acquire the hash bucket lock when returning from this function. -+ * -+ * If the hash bucket lock is contended then the -+ * BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in -+ * task_blocks_on_rt_mutex() will trigger. This can be avoided by -+ * clearing task->pi_blocked_on which removes the task from the -+ * boosting chain of the rtmutex. That's correct because the task -+ * is not longer blocked on it. -+ */ -+ if (ret) { -+ raw_spin_lock(&tsk->pi_lock); -+ tsk->pi_blocked_on = NULL; -+ raw_spin_unlock(&tsk->pi_lock); -+ } -+ - raw_spin_unlock_irq(&lock->wait_lock); - - return ret; diff --git a/debian/patches/features/all/rt/futex-workaround-migrate_disable-enable-in-different.patch b/debian/patches/features/all/rt/futex-workaround-migrate_disable-enable-in-different.patch index b0a5ab75e..1fcf7ddb2 100644 --- a/debian/patches/features/all/rt/futex-workaround-migrate_disable-enable-in-different.patch +++ b/debian/patches/features/all/rt/futex-workaround-migrate_disable-enable-in-different.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Wed, 8 Mar 2017 14:23:35 +0100 Subject: [PATCH] futex: workaround migrate_disable/enable in different context -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz migrate_disable()/migrate_enable() takes a different path in atomic() vs !atomic() context. These little hacks ensure that we don't underflow / overflow @@ -11,12 +11,12 @@ enabled and unlock it with interrupts disabled. Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior --- - kernel/futex.c | 20 ++++++++++++++++++++ - 1 file changed, 20 insertions(+) + kernel/futex.c | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) --- a/kernel/futex.c +++ b/kernel/futex.c -@@ -2671,9 +2671,18 @@ static int futex_lock_pi(u32 __user *uad +@@ -2762,9 +2762,18 @@ static int futex_lock_pi(u32 __user *uad * lock handoff sequence. */ raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); @@ -35,7 +35,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (ret) { if (ret == 1) -@@ -2817,10 +2826,21 @@ static int futex_unlock_pi(u32 __user *u +@@ -2911,11 +2920,21 @@ static int futex_unlock_pi(u32 __user *u * observed. */ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); @@ -49,7 +49,7 @@ Signed-off-by: Sebastian Andrzej Siewior + migrate_disable(); spin_unlock(&hb->lock); -+ /* Drops pi_state->pi_mutex.wait_lock */ + /* drops pi_state->pi_mutex.wait_lock */ ret = wake_futex_pi(uaddr, uval, pi_state); + migrate_enable(); diff --git a/debian/patches/features/all/rt/genirq-disable-irqpoll-on-rt.patch b/debian/patches/features/all/rt/genirq-disable-irqpoll-on-rt.patch index fe414f3d5..bf1275ba0 100644 --- a/debian/patches/features/all/rt/genirq-disable-irqpoll-on-rt.patch +++ b/debian/patches/features/all/rt/genirq-disable-irqpoll-on-rt.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:29:57 -0500 Subject: genirq: Disable irqpoll on -rt -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Creates long latencies for no value @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c -@@ -444,6 +444,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable ir +@@ -445,6 +445,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable ir static int __init irqfixup_setup(char *str) { @@ -25,7 +25,7 @@ Signed-off-by: Thomas Gleixner irqfixup = 1; printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n"); printk(KERN_WARNING "This may impact system performance.\n"); -@@ -456,6 +460,10 @@ module_param(irqfixup, int, 0644); +@@ -457,6 +461,10 @@ module_param(irqfixup, int, 0644); static int __init irqpoll_setup(char *str) { diff --git a/debian/patches/features/all/rt/genirq-do-not-invoke-the-affinity-callback-via-a-wor.patch b/debian/patches/features/all/rt/genirq-do-not-invoke-the-affinity-callback-via-a-wor.patch index ec49700b9..9d2f6995d 100644 --- a/debian/patches/features/all/rt/genirq-do-not-invoke-the-affinity-callback-via-a-wor.patch +++ b/debian/patches/features/all/rt/genirq-do-not-invoke-the-affinity-callback-via-a-wor.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 21 Aug 2013 17:48:46 +0200 Subject: genirq: Do not invoke the affinity callback via a workqueue on RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Joe Korty reported, that __irq_set_affinity_locked() schedules a workqueue while holding a rawlock which results in a might_sleep() @@ -16,7 +16,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h -@@ -14,6 +14,7 @@ +@@ -15,6 +15,7 @@ #include #include #include @@ -24,7 +24,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include #include -@@ -218,6 +219,7 @@ extern void resume_device_irqs(void); +@@ -229,6 +230,7 @@ extern void resume_device_irqs(void); * struct irq_affinity_notify - context for notification of IRQ affinity changes * @irq: Interrupt to which notification applies * @kref: Reference count, for internal use @@ -32,7 +32,7 @@ Signed-off-by: Sebastian Andrzej Siewior * @work: Work item, for internal use * @notify: Function to be called on change. This will be * called in process context. -@@ -229,7 +231,11 @@ extern void resume_device_irqs(void); +@@ -240,7 +242,11 @@ extern void resume_device_irqs(void); struct irq_affinity_notify { unsigned int irq; struct kref kref; @@ -46,7 +46,7 @@ Signed-off-by: Sebastian Andrzej Siewior }; --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c -@@ -237,7 +237,12 @@ int irq_set_affinity_locked(struct irq_d +@@ -226,7 +226,12 @@ int irq_set_affinity_locked(struct irq_d if (desc->affinity_notify) { kref_get(&desc->affinity_notify->kref); @@ -59,7 +59,7 @@ Signed-off-by: Sebastian Andrzej Siewior } irqd_set(data, IRQD_AFFINITY_SET); -@@ -275,10 +280,8 @@ int irq_set_affinity_hint(unsigned int i +@@ -264,10 +269,8 @@ int irq_set_affinity_hint(unsigned int i } EXPORT_SYMBOL_GPL(irq_set_affinity_hint); @@ -71,7 +71,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct irq_desc *desc = irq_to_desc(notify->irq); cpumask_var_t cpumask; unsigned long flags; -@@ -300,6 +303,35 @@ static void irq_affinity_notify(struct w +@@ -289,6 +292,35 @@ static void irq_affinity_notify(struct w kref_put(¬ify->kref, notify->release); } @@ -107,7 +107,7 @@ Signed-off-by: Sebastian Andrzej Siewior /** * irq_set_affinity_notifier - control notification of IRQ affinity changes * @irq: Interrupt for which to enable/disable notification -@@ -328,7 +360,12 @@ irq_set_affinity_notifier(unsigned int i +@@ -317,7 +349,12 @@ irq_set_affinity_notifier(unsigned int i if (notify) { notify->irq = irq; kref_init(¬ify->kref); diff --git a/debian/patches/features/all/rt/genirq-force-threading.patch b/debian/patches/features/all/rt/genirq-force-threading.patch index 8d71a4977..aa1de8967 100644 --- a/debian/patches/features/all/rt/genirq-force-threading.patch +++ b/debian/patches/features/all/rt/genirq-force-threading.patch @@ -1,7 +1,7 @@ Subject: genirq: Force interrupt thread on RT From: Thomas Gleixner Date: Sun, 03 Apr 2011 11:57:29 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Force threaded_irqs and optimize the code (force_irqthreads) in regard to this. @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h -@@ -418,9 +418,13 @@ extern int irq_set_irqchip_state(unsigne +@@ -429,9 +429,13 @@ extern int irq_set_irqchip_state(unsigne bool state); #ifdef CONFIG_IRQ_FORCED_THREADING diff --git a/debian/patches/features/all/rt/genirq-update-irq_set_irqchip_state-documentation.patch b/debian/patches/features/all/rt/genirq-update-irq_set_irqchip_state-documentation.patch index e9f819489..6f0197b14 100644 --- a/debian/patches/features/all/rt/genirq-update-irq_set_irqchip_state-documentation.patch +++ b/debian/patches/features/all/rt/genirq-update-irq_set_irqchip_state-documentation.patch @@ -1,7 +1,7 @@ From: Josh Cartwright Date: Thu, 11 Feb 2016 11:54:00 -0600 Subject: genirq: update irq_set_irqchip_state documentation -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz On -rt kernels, the use of migrate_disable()/migrate_enable() is sufficient to guarantee a task isn't moved to another CPU. Update the @@ -15,7 +15,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c -@@ -2115,7 +2115,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state) +@@ -2191,7 +2191,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state) * This call sets the internal irqchip state of an interrupt, * depending on the value of @which. * diff --git a/debian/patches/features/all/rt/greybus-audio-don-t-inclide-rwlock.h-directly.patch b/debian/patches/features/all/rt/greybus-audio-don-t-inclide-rwlock.h-directly.patch new file mode 100644 index 000000000..49e6ebcaf --- /dev/null +++ b/debian/patches/features/all/rt/greybus-audio-don-t-inclide-rwlock.h-directly.patch @@ -0,0 +1,31 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 5 Oct 2017 14:38:52 +0200 +Subject: [PATCH] greybus: audio: don't inclide rwlock.h directly. +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +rwlock.h should not be included directly. Instead linux/splinlock.h +should be included. One thing it does is to break the RT build. + +Cc: Vaibhav Agarwal +Cc: Mark Greer +Cc: Johan Hovold +Cc: Alex Elder +Cc: Greg Kroah-Hartman +Cc: greybus-dev@lists.linaro.org +Cc: devel@driverdev.osuosl.org +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/staging/greybus/audio_manager.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/staging/greybus/audio_manager.c ++++ b/drivers/staging/greybus/audio_manager.c +@@ -10,7 +10,7 @@ + #include + #include + #include +-#include ++#include + #include + + #include "audio_manager.h" diff --git a/debian/patches/features/all/rt/hotplug-Use-set_cpus_allowed_ptr-in-sync_unplug_thre.patch b/debian/patches/features/all/rt/hotplug-Use-set_cpus_allowed_ptr-in-sync_unplug_thre.patch deleted file mode 100644 index 835f88084..000000000 --- a/debian/patches/features/all/rt/hotplug-Use-set_cpus_allowed_ptr-in-sync_unplug_thre.patch +++ /dev/null @@ -1,47 +0,0 @@ -From: Mike Galbraith -Date: Tue, 24 Mar 2015 08:14:49 +0100 -Subject: hotplug: Use set_cpus_allowed_ptr() in sync_unplug_thread() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -do_set_cpus_allowed() is not safe vs ->sched_class change. - -crash> bt -PID: 11676 TASK: ffff88026f979da0 CPU: 22 COMMAND: "sync_unplug/22" - #0 [ffff880274d25bc8] machine_kexec at ffffffff8103b41c - #1 [ffff880274d25c18] crash_kexec at ffffffff810d881a - #2 [ffff880274d25cd8] oops_end at ffffffff81525818 - #3 [ffff880274d25cf8] do_invalid_op at ffffffff81003096 - #4 [ffff880274d25d90] invalid_op at ffffffff8152d3de - [exception RIP: set_cpus_allowed_rt+18] - RIP: ffffffff8109e012 RSP: ffff880274d25e48 RFLAGS: 00010202 - RAX: ffffffff8109e000 RBX: ffff88026f979da0 RCX: ffff8802770cb6e8 - RDX: 0000000000000000 RSI: ffffffff81add700 RDI: ffff88026f979da0 - RBP: ffff880274d25e78 R8: ffffffff816112e0 R9: 0000000000000001 - R10: 0000000000000001 R11: 0000000000011940 R12: ffff88026f979da0 - R13: ffff8802770cb6d0 R14: ffff880274d25fd8 R15: 0000000000000000 - ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 - #5 [ffff880274d25e60] do_set_cpus_allowed at ffffffff8108e65f - #6 [ffff880274d25e80] sync_unplug_thread at ffffffff81058c08 - #7 [ffff880274d25ed8] kthread at ffffffff8107cad6 - #8 [ffff880274d25f50] ret_from_fork at ffffffff8152bbbc -crash> task_struct ffff88026f979da0 | grep class - sched_class = 0xffffffff816111e0 , - -Signed-off-by: Mike Galbraith - -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/cpu.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/cpu.c -+++ b/kernel/cpu.c -@@ -413,7 +413,7 @@ static int sync_unplug_thread(void *data - * we don't want any more work on this CPU. - */ - current->flags &= ~PF_NO_SETAFFINITY; -- do_set_cpus_allowed(current, cpu_present_mask); -+ set_cpus_allowed_ptr(current, cpu_present_mask); - migrate_me(); - return 0; - } diff --git a/debian/patches/features/all/rt/hotplug-duct-tape-RT-rwlock-usage-for-non-RT.patch b/debian/patches/features/all/rt/hotplug-duct-tape-RT-rwlock-usage-for-non-RT.patch new file mode 100644 index 000000000..3f7a5ecb4 --- /dev/null +++ b/debian/patches/features/all/rt/hotplug-duct-tape-RT-rwlock-usage-for-non-RT.patch @@ -0,0 +1,96 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 4 Aug 2017 18:31:00 +0200 +Subject: [PATCH] hotplug: duct-tape RT-rwlock usage for non-RT +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +This type is only available on -RT. We need to craft something for +non-RT. Since the only migrate_disable() user is -RT only, there is no +damage. + +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/cpu.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +--- a/kernel/cpu.c ++++ b/kernel/cpu.c +@@ -73,7 +73,7 @@ static DEFINE_PER_CPU(struct cpuhp_cpu_s + .fail = CPUHP_INVALID, + }; + +-#ifdef CONFIG_HOTPLUG_CPU ++#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PREEMPT_RT_FULL) + static DEFINE_PER_CPU(struct rt_rw_lock, cpuhp_pin_lock) = \ + __RWLOCK_RT_INITIALIZER(cpuhp_pin_lock); + #endif +@@ -296,6 +296,7 @@ static int cpu_hotplug_disabled; + */ + void pin_current_cpu(void) + { ++#ifdef CONFIG_PREEMPT_RT_FULL + struct rt_rw_lock *cpuhp_pin; + unsigned int cpu; + int ret; +@@ -320,6 +321,7 @@ void pin_current_cpu(void) + goto again; + } + current->pinned_on_cpu = cpu; ++#endif + } + + /** +@@ -327,6 +329,7 @@ void pin_current_cpu(void) + */ + void unpin_current_cpu(void) + { ++#ifdef CONFIG_PREEMPT_RT_FULL + struct rt_rw_lock *cpuhp_pin = this_cpu_ptr(&cpuhp_pin_lock); + + if (WARN_ON(current->pinned_on_cpu != smp_processor_id())) +@@ -334,6 +337,7 @@ void unpin_current_cpu(void) + + current->pinned_on_cpu = -1; + __read_rt_unlock(cpuhp_pin); ++#endif + } + + DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock); +@@ -803,7 +807,9 @@ static int take_cpu_down(void *_param) + + static int takedown_cpu(unsigned int cpu) + { ++#ifdef CONFIG_PREEMPT_RT_FULL + struct rt_rw_lock *cpuhp_pin = per_cpu_ptr(&cpuhp_pin_lock, cpu); ++#endif + struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); + int err; + +@@ -817,14 +823,18 @@ static int takedown_cpu(unsigned int cpu + */ + irq_lock_sparse(); + ++#ifdef CONFIG_PREEMPT_RT_FULL + __write_rt_lock(cpuhp_pin); ++#endif + + /* + * So now all preempt/rcu users must observe !cpu_active(). + */ + err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu)); + if (err) { ++#ifdef CONFIG_PREEMPT_RT_FULL + __write_rt_unlock(cpuhp_pin); ++#endif + /* CPU refused to die */ + irq_unlock_sparse(); + /* Unpark the hotplug thread so we can rollback there */ +@@ -843,7 +853,9 @@ static int takedown_cpu(unsigned int cpu + wait_for_ap_thread(st, false); + BUG_ON(st->state != CPUHP_AP_IDLE_DEAD); + ++#ifdef CONFIG_PREEMPT_RT_FULL + __write_rt_unlock(cpuhp_pin); ++#endif + /* Interrupts are moved away from the dying cpu, reenable alloc/free */ + irq_unlock_sparse(); + diff --git a/debian/patches/features/all/rt/hotplug-light-get-online-cpus.patch b/debian/patches/features/all/rt/hotplug-light-get-online-cpus.patch index 648f6e681..af2744748 100644 --- a/debian/patches/features/all/rt/hotplug-light-get-online-cpus.patch +++ b/debian/patches/features/all/rt/hotplug-light-get-online-cpus.patch @@ -1,7 +1,7 @@ Subject: hotplug: Lightweight get online cpus From: Thomas Gleixner Date: Wed, 15 Jun 2011 12:36:06 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz get_online_cpus() is a heavy weight function which involves a global mutex. migrate_disable() wants a simpler construct which prevents only @@ -13,10 +13,10 @@ tasks on the cpu which should be brought down. Signed-off-by: Thomas Gleixner --- - include/linux/cpu.h | 5 ++ - kernel/cpu.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++++ - kernel/sched/core.c | 4 + - 3 files changed, 127 insertions(+) + include/linux/cpu.h | 5 +++++ + kernel/cpu.c | 15 +++++++++++++++ + kernel/sched/core.c | 4 ++++ + 3 files changed, 24 insertions(+) --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -27,178 +27,53 @@ Signed-off-by: Thomas Gleixner +extern void pin_current_cpu(void); +extern void unpin_current_cpu(void); - #else /* CONFIG_HOTPLUG_CPU */ + #else /* CONFIG_HOTPLUG_CPU */ -@@ -118,6 +120,9 @@ static inline void cpu_hotplug_done(void - #define put_online_cpus() do { } while (0) - #define cpu_hotplug_disable() do { } while (0) - #define cpu_hotplug_enable() do { } while (0) -+static inline void pin_current_cpu(void) { } -+static inline void unpin_current_cpu(void) { } +@@ -119,6 +121,9 @@ static inline void cpus_read_unlock(void + static inline void lockdep_assert_cpus_held(void) { } + static inline void cpu_hotplug_disable(void) { } + static inline void cpu_hotplug_enable(void) { } ++static inline void pin_current_cpu(void) { } ++static inline void unpin_current_cpu(void) { } + - #endif /* CONFIG_HOTPLUG_CPU */ + #endif /* !CONFIG_HOTPLUG_CPU */ - #ifdef CONFIG_PM_SLEEP_SMP + /* Wrappers which go away once all code is converted */ --- a/kernel/cpu.c +++ b/kernel/cpu.c -@@ -234,6 +234,100 @@ static struct { - #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) - #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) +@@ -286,6 +286,21 @@ static int cpu_hotplug_disabled; + + #ifdef CONFIG_HOTPLUG_CPU -+struct hotplug_pcp { -+ struct task_struct *unplug; -+ int refcount; -+ struct completion synced; -+}; -+ -+static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp); -+ +/** + * pin_current_cpu - Prevent the current cpu from being unplugged -+ * -+ * Lightweight version of get_online_cpus() to prevent cpu from being -+ * unplugged when code runs in a migration disabled region. -+ * -+ * Must be called with preemption disabled (preempt_count = 1)! + */ +void pin_current_cpu(void) +{ -+ struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp); + -+retry: -+ if (!hp->unplug || hp->refcount || preempt_count() > 1 || -+ hp->unplug == current) { -+ hp->refcount++; -+ return; -+ } -+ preempt_enable(); -+ mutex_lock(&cpu_hotplug.lock); -+ mutex_unlock(&cpu_hotplug.lock); -+ preempt_disable(); -+ goto retry; +} + +/** + * unpin_current_cpu - Allow unplug of current cpu -+ * -+ * Must be called with preemption or interrupts disabled! + */ +void unpin_current_cpu(void) +{ -+ struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp); -+ -+ WARN_ON(hp->refcount <= 0); -+ -+ /* This is safe. sync_unplug_thread is pinned to this cpu */ -+ if (!--hp->refcount && hp->unplug && hp->unplug != current) -+ wake_up_process(hp->unplug); +} + -+/* -+ * FIXME: Is this really correct under all circumstances ? -+ */ -+static int sync_unplug_thread(void *data) -+{ -+ struct hotplug_pcp *hp = data; -+ -+ preempt_disable(); -+ hp->unplug = current; -+ set_current_state(TASK_UNINTERRUPTIBLE); -+ while (hp->refcount) { -+ schedule_preempt_disabled(); -+ set_current_state(TASK_UNINTERRUPTIBLE); -+ } -+ set_current_state(TASK_RUNNING); -+ preempt_enable(); -+ complete(&hp->synced); -+ return 0; -+} -+ -+/* -+ * Start the sync_unplug_thread on the target cpu and wait for it to -+ * complete. -+ */ -+static int cpu_unplug_begin(unsigned int cpu) -+{ -+ struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); -+ struct task_struct *tsk; -+ -+ init_completion(&hp->synced); -+ tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d\n", cpu); -+ if (IS_ERR(tsk)) -+ return (PTR_ERR(tsk)); -+ kthread_bind(tsk, cpu); -+ wake_up_process(tsk); -+ wait_for_completion(&hp->synced); -+ return 0; -+} -+ -+static void cpu_unplug_done(unsigned int cpu) -+{ -+ struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); -+ -+ hp->unplug = NULL; -+} + DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock); - void get_online_cpus(void) - { -@@ -776,6 +870,8 @@ static int __ref _cpu_down(unsigned int - { - struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); - int prev_state, ret = 0; -+ int mycpu; -+ cpumask_var_t cpumask; - - if (num_online_cpus() == 1) - return -EBUSY; -@@ -783,7 +879,27 @@ static int __ref _cpu_down(unsigned int - if (!cpu_present(cpu)) - return -EINVAL; - -+ /* Move the downtaker off the unplug cpu */ -+ if (!alloc_cpumask_var(&cpumask, GFP_KERNEL)) -+ return -ENOMEM; -+ cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu)); -+ set_cpus_allowed_ptr(current, cpumask); -+ free_cpumask_var(cpumask); -+ preempt_disable(); -+ mycpu = smp_processor_id(); -+ if (mycpu == cpu) { -+ printk(KERN_ERR "Yuck! Still on unplug CPU\n!"); -+ preempt_enable(); -+ return -EBUSY; -+ } -+ preempt_enable(); -+ - cpu_hotplug_begin(); -+ ret = cpu_unplug_begin(cpu); -+ if (ret) { -+ printk("cpu_unplug_begin(%d) failed\n", cpu); -+ goto out_cancel; -+ } - - cpuhp_tasks_frozen = tasks_frozen; - -@@ -821,6 +937,8 @@ static int __ref _cpu_down(unsigned int - } - - out: -+ cpu_unplug_done(cpu); -+out_cancel: - cpu_hotplug_done(); - return ret; - } + void cpus_read_lock(void) --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -7471,6 +7471,7 @@ void migrate_disable(void) - /* get_online_cpus(); */ +@@ -6912,6 +6912,7 @@ void migrate_disable(void) + } preempt_disable(); + pin_current_cpu(); - p->migrate_disable = 1; - p->cpus_ptr = cpumask_of(smp_processor_id()); -@@ -7535,13 +7536,16 @@ void migrate_enable(void) + migrate_disable_update_cpus_allowed(p); + p->migrate_disable = 1; +@@ -6974,12 +6975,15 @@ void migrate_enable(void) arg.task = p; arg.dest_cpu = dest_cpu; @@ -206,12 +81,11 @@ Signed-off-by: Thomas Gleixner preempt_enable(); stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); tlb_migrate_finish(p->mm); - /* put_online_cpus(); */ + return; } } + unpin_current_cpu(); - /* put_online_cpus(); */ preempt_enable(); } + EXPORT_SYMBOL(migrate_enable); diff --git a/debian/patches/features/all/rt/hotplug-sync_unplug-no-27-5cn-27-in-task-name.patch b/debian/patches/features/all/rt/hotplug-sync_unplug-no-27-5cn-27-in-task-name.patch deleted file mode 100644 index 8ae8f22a7..000000000 --- a/debian/patches/features/all/rt/hotplug-sync_unplug-no-27-5cn-27-in-task-name.patch +++ /dev/null @@ -1,25 +0,0 @@ -Subject: hotplug: sync_unplug: No "\n" in task name -From: Yong Zhang -Date: Sun, 16 Oct 2011 18:56:43 +0800 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Otherwise the output will look a little odd. - -Signed-off-by: Yong Zhang -Link: http://lkml.kernel.org/r/1318762607-2261-2-git-send-email-yong.zhang0@gmail.com -Signed-off-by: Thomas Gleixner ---- - kernel/cpu.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/cpu.c -+++ b/kernel/cpu.c -@@ -313,7 +313,7 @@ static int cpu_unplug_begin(unsigned int - struct task_struct *tsk; - - init_completion(&hp->synced); -- tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d\n", cpu); -+ tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu); - if (IS_ERR(tsk)) - return (PTR_ERR(tsk)); - kthread_bind(tsk, cpu); diff --git a/debian/patches/features/all/rt/hotplug-use-migrate-disable.patch b/debian/patches/features/all/rt/hotplug-use-migrate-disable.patch deleted file mode 100644 index f36a435ab..000000000 --- a/debian/patches/features/all/rt/hotplug-use-migrate-disable.patch +++ /dev/null @@ -1,40 +0,0 @@ -Subject: hotplug: Use migrate disable on unplug -From: Thomas Gleixner -Date: Sun, 17 Jul 2011 19:35:29 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Migration needs to be disabled accross the unplug handling to make -sure that the unplug thread is off the unplugged cpu. - -Signed-off-by: Thomas Gleixner ---- - kernel/cpu.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - ---- a/kernel/cpu.c -+++ b/kernel/cpu.c -@@ -887,14 +887,13 @@ static int __ref _cpu_down(unsigned int - cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu)); - set_cpus_allowed_ptr(current, cpumask); - free_cpumask_var(cpumask); -- preempt_disable(); -+ migrate_disable(); - mycpu = smp_processor_id(); - if (mycpu == cpu) { - printk(KERN_ERR "Yuck! Still on unplug CPU\n!"); -- preempt_enable(); -+ migrate_enable(); - return -EBUSY; - } -- preempt_enable(); - - cpu_hotplug_begin(); - ret = cpu_unplug_begin(cpu); -@@ -942,6 +941,7 @@ static int __ref _cpu_down(unsigned int - cpu_unplug_done(cpu); - out_cancel: - cpu_hotplug_done(); -+ migrate_enable(); - return ret; - } - diff --git a/debian/patches/features/all/rt/hrtimer-Move-schedule_work-call-to-helper-thread.patch b/debian/patches/features/all/rt/hrtimer-Move-schedule_work-call-to-helper-thread.patch index 6e4648980..41e2be975 100644 --- a/debian/patches/features/all/rt/hrtimer-Move-schedule_work-call-to-helper-thread.patch +++ b/debian/patches/features/all/rt/hrtimer-Move-schedule_work-call-to-helper-thread.patch @@ -1,7 +1,7 @@ From: Yang Shi Date: Mon, 16 Sep 2013 14:09:19 -0700 Subject: hrtimer: Move schedule_work call to helper thread -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz When run ltp leapsec_timer test, the following call trace is caught: @@ -52,7 +52,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c -@@ -695,6 +695,29 @@ static void hrtimer_switch_to_hres(void) +@@ -728,6 +728,29 @@ static void hrtimer_switch_to_hres(void) retrigger_next_event(NULL); } @@ -82,7 +82,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void clock_was_set_work(struct work_struct *work) { clock_was_set(); -@@ -710,6 +733,7 @@ void clock_was_set_delayed(void) +@@ -743,6 +766,7 @@ void clock_was_set_delayed(void) { schedule_work(&hrtimer_work); } diff --git a/debian/patches/features/all/rt/hrtimer-by-timers-by-default-into-the-softirq-context.patch b/debian/patches/features/all/rt/hrtimer-by-timers-by-default-into-the-softirq-context.patch new file mode 100644 index 000000000..cdd3576cf --- /dev/null +++ b/debian/patches/features/all/rt/hrtimer-by-timers-by-default-into-the-softirq-context.patch @@ -0,0 +1,212 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 3 Jul 2009 08:44:31 -0500 +Subject: hrtimer: by timers by default into the softirq context +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +We can't have hrtimers callbacks running in hardirq context on RT. Therefore +the timers are deferred to the softirq context by default. +There are few timers which expect to be run in hardirq context even on RT. +Those are: +- very short running where low latency is critical (kvm lapic) +- timers which take raw locks and need run in hard-irq context (perf, sched) +- wake up related timer (kernel side of clock_nanosleep() and so on) + +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/x86/kvm/lapic.c | 2 +- + include/linux/hrtimer.h | 6 ++++++ + kernel/events/core.c | 4 ++-- + kernel/sched/core.c | 2 +- + kernel/sched/deadline.c | 2 +- + kernel/sched/rt.c | 4 ++-- + kernel/time/hrtimer.c | 34 +++++++++++++++++++++++++++++++--- + kernel/time/tick-broadcast-hrtimer.c | 2 +- + kernel/time/tick-sched.c | 2 +- + kernel/watchdog.c | 2 +- + 10 files changed, 47 insertions(+), 13 deletions(-) + +--- a/arch/x86/kvm/lapic.c ++++ b/arch/x86/kvm/lapic.c +@@ -2093,7 +2093,7 @@ int kvm_create_lapic(struct kvm_vcpu *vc + apic->vcpu = vcpu; + + hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, +- HRTIMER_MODE_ABS_PINNED); ++ HRTIMER_MODE_ABS_PINNED_HARD); + apic->lapic_timer.timer.function = apic_timer_fn; + + /* +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -42,6 +42,7 @@ enum hrtimer_mode { + HRTIMER_MODE_REL = 0x01, + HRTIMER_MODE_PINNED = 0x02, + HRTIMER_MODE_SOFT = 0x04, ++ HRTIMER_MODE_HARD = 0x08, + + HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, + HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, +@@ -52,6 +53,11 @@ enum hrtimer_mode { + HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT, + HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT, + ++ HRTIMER_MODE_ABS_HARD = HRTIMER_MODE_ABS | HRTIMER_MODE_HARD, ++ HRTIMER_MODE_REL_HARD = HRTIMER_MODE_REL | HRTIMER_MODE_HARD, ++ ++ HRTIMER_MODE_ABS_PINNED_HARD = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_HARD, ++ HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD, + }; + + /* +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -1042,7 +1042,7 @@ static void __perf_mux_hrtimer_init(stru + cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval); + + raw_spin_lock_init(&cpuctx->hrtimer_lock); +- hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); ++ hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); + timer->function = perf_mux_hrtimer_handler; + } + +@@ -8705,7 +8705,7 @@ static void perf_swevent_init_hrtimer(st + if (!is_sampling_event(event)) + return; + +- hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); + hwc->hrtimer.function = perf_swevent_hrtimer; + + /* +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -341,7 +341,7 @@ static void init_rq_hrtick(struct rq *rq + rq->hrtick_csd.info = rq; + #endif + +- hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); + rq->hrtick_timer.function = hrtick; + } + #else /* CONFIG_SCHED_HRTICK */ +--- a/kernel/sched/deadline.c ++++ b/kernel/sched/deadline.c +@@ -1020,7 +1020,7 @@ void init_dl_task_timer(struct sched_dl_ + { + struct hrtimer *timer = &dl_se->dl_timer; + +- hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); + timer->function = dl_task_timer; + } + +--- a/kernel/sched/rt.c ++++ b/kernel/sched/rt.c +@@ -47,8 +47,8 @@ void init_rt_bandwidth(struct rt_bandwid + + raw_spin_lock_init(&rt_b->rt_runtime_lock); + +- hrtimer_init(&rt_b->rt_period_timer, +- CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC, ++ HRTIMER_MODE_REL_HARD); + rt_b->rt_period_timer.function = sched_rt_period_timer; + } + +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -423,7 +423,7 @@ static inline void debug_hrtimer_activat + * match, when a timer is started via__hrtimer_start_range_ns(). + */ + if (modecheck) +- WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); ++ WARN_ON_ONCE((mode & HRTIMER_MODE_SOFT) & !timer->is_soft); + + debug_object_activate(timer, &hrtimer_debug_descr); + } +@@ -1247,10 +1247,17 @@ static inline int hrtimer_clockid_to_bas + static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, + enum hrtimer_mode mode) + { +- bool softtimer = !!(mode & HRTIMER_MODE_SOFT); +- int base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; ++ bool softtimer; ++ int base; + struct hrtimer_cpu_base *cpu_base; + ++ softtimer = !!(mode & HRTIMER_MODE_SOFT); ++#ifdef CONFIG_PREEMPT_RT_FULL ++ if (!softtimer && !(mode & HRTIMER_MODE_HARD)) ++ softtimer = true; ++#endif ++ base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; ++ + memset(timer, 0, sizeof(struct hrtimer)); + + cpu_base = raw_cpu_ptr(&hrtimer_bases); +@@ -1630,11 +1637,32 @@ static enum hrtimer_restart hrtimer_wake + return HRTIMER_NORESTART; + } + ++#ifdef CONFIG_PREEMPT_RT_FULL ++static bool task_is_realtime(struct task_struct *tsk) ++{ ++ int policy = tsk->policy; ++ ++ if (policy == SCHED_FIFO || policy == SCHED_RR) ++ return true; ++ if (policy == SCHED_DEADLINE) ++ return true; ++ return false; ++} ++#endif ++ + static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, + clockid_t clock_id, + enum hrtimer_mode mode, + struct task_struct *task) + { ++#ifdef CONFIG_PREEMPT_RT_FULL ++ if (!(mode & (HRTIMER_MODE_SOFT | HRTIMER_MODE_HARD))) { ++ if (task_is_realtime(current) || system_state != SYSTEM_RUNNING) ++ mode |= HRTIMER_MODE_HARD; ++ else ++ mode |= HRTIMER_MODE_SOFT; ++ } ++#endif + __hrtimer_init(&sl->timer, clock_id, mode); + sl->timer.function = hrtimer_wakeup; + sl->task = task; +--- a/kernel/time/tick-broadcast-hrtimer.c ++++ b/kernel/time/tick-broadcast-hrtimer.c +@@ -106,7 +106,7 @@ static enum hrtimer_restart bc_handler(s + + void tick_setup_hrtimer_broadcast(void) + { +- hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); ++ hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); + bctimer.function = bc_handler; + clockevents_register_device(&ce_broadcast_hrtimer); + } +--- a/kernel/time/tick-sched.c ++++ b/kernel/time/tick-sched.c +@@ -1226,7 +1226,7 @@ void tick_setup_sched_timer(void) + /* + * Emulate tick processing via per-CPU hrtimers: + */ +- hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); ++ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); + ts->sched_timer.function = tick_sched_timer; + + /* Get the next period (per-CPU) */ +--- a/kernel/watchdog.c ++++ b/kernel/watchdog.c +@@ -462,7 +462,7 @@ static void watchdog_enable(unsigned int + * Start the timer first to prevent the NMI watchdog triggering + * before the timer has a chance to fire. + */ +- hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); + hrtimer->function = watchdog_timer_fn; + hrtimer_start(hrtimer, ns_to_ktime(sample_period), + HRTIMER_MODE_REL_PINNED); diff --git a/debian/patches/features/all/rt/hrtimer-consolidate-hrtimer_init-hrtimer_init_sleepe.patch b/debian/patches/features/all/rt/hrtimer-consolidate-hrtimer_init-hrtimer_init_sleepe.patch new file mode 100644 index 000000000..5e72ab173 --- /dev/null +++ b/debian/patches/features/all/rt/hrtimer-consolidate-hrtimer_init-hrtimer_init_sleepe.patch @@ -0,0 +1,247 @@ +From: Sebastian Andrzej Siewior +Date: Mon, 4 Sep 2017 18:31:50 +0200 +Subject: [PATCH] hrtimer: consolidate hrtimer_init() + hrtimer_init_sleeper() + calls +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +hrtimer_init_sleeper() calls require a prior initialisation of the +hrtimer object with hrtimer_init(). Lets make the initialisation of the +hrtimer object part of hrtimer_init_sleeper(). + +Signed-off-by: Sebastian Andrzej Siewior +--- + block/blk-mq.c | 3 +-- + include/linux/hrtimer.h | 19 ++++++++++++++++--- + include/linux/wait.h | 4 ++-- + kernel/futex.c | 19 ++++++++----------- + kernel/time/hrtimer.c | 46 ++++++++++++++++++++++++++++++++++++---------- + net/core/pktgen.c | 4 ++-- + 6 files changed, 65 insertions(+), 30 deletions(-) + +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -2800,10 +2800,9 @@ static bool blk_mq_poll_hybrid_sleep(str + kt = nsecs; + + mode = HRTIMER_MODE_REL; +- hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode); ++ hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode, current); + hrtimer_set_expires(&hs.timer, kt); + +- hrtimer_init_sleeper(&hs, current); + do { + if (test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags)) + break; +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -363,10 +363,17 @@ DECLARE_PER_CPU(struct tick_device, tick + /* Initialize timers: */ + extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock, + enum hrtimer_mode mode); ++extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, ++ enum hrtimer_mode mode, ++ struct task_struct *task); + + #ifdef CONFIG_DEBUG_OBJECTS_TIMERS + extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock, + enum hrtimer_mode mode); ++extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, ++ clockid_t clock_id, ++ enum hrtimer_mode mode, ++ struct task_struct *task); + + extern void destroy_hrtimer_on_stack(struct hrtimer *timer); + #else +@@ -376,6 +383,15 @@ static inline void hrtimer_init_on_stack + { + hrtimer_init(timer, which_clock, mode); + } ++ ++static inline void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, ++ clockid_t clock_id, ++ enum hrtimer_mode mode, ++ struct task_struct *task) ++{ ++ hrtimer_init_sleeper(sl, clock_id, mode, task); ++} ++ + static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { } + #endif + +@@ -478,9 +494,6 @@ extern long hrtimer_nanosleep(const stru + const enum hrtimer_mode mode, + const clockid_t clockid); + +-extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, +- struct task_struct *tsk); +- + extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta, + const enum hrtimer_mode mode); + extern int schedule_hrtimeout_range_clock(ktime_t *expires, +--- a/include/linux/wait.h ++++ b/include/linux/wait.h +@@ -486,8 +486,8 @@ do { \ + int __ret = 0; \ + struct hrtimer_sleeper __t; \ + \ +- hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); \ +- hrtimer_init_sleeper(&__t, current); \ ++ hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC, HRTIMER_MODE_REL, \ ++ current); \ + if ((timeout) != KTIME_MAX) \ + hrtimer_start_range_ns(&__t.timer, timeout, \ + current->timer_slack_ns, \ +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -2588,10 +2588,9 @@ static int futex_wait(u32 __user *uaddr, + if (abs_time) { + to = &timeout; + +- hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? +- CLOCK_REALTIME : CLOCK_MONOTONIC, +- HRTIMER_MODE_ABS); +- hrtimer_init_sleeper(to, current); ++ hrtimer_init_sleeper_on_stack(to, (flags & FLAGS_CLOCKRT) ? ++ CLOCK_REALTIME : CLOCK_MONOTONIC, ++ HRTIMER_MODE_ABS, current); + hrtimer_set_expires_range_ns(&to->timer, *abs_time, + current->timer_slack_ns); + } +@@ -2690,9 +2689,8 @@ static int futex_lock_pi(u32 __user *uad + + if (time) { + to = &timeout; +- hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME, +- HRTIMER_MODE_ABS); +- hrtimer_init_sleeper(to, current); ++ hrtimer_init_sleeper_on_stack(to, CLOCK_REALTIME, ++ HRTIMER_MODE_ABS, current); + hrtimer_set_expires(&to->timer, *time); + } + +@@ -3108,10 +3106,9 @@ static int futex_wait_requeue_pi(u32 __u + + if (abs_time) { + to = &timeout; +- hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? +- CLOCK_REALTIME : CLOCK_MONOTONIC, +- HRTIMER_MODE_ABS); +- hrtimer_init_sleeper(to, current); ++ hrtimer_init_sleeper_on_stack(to, (flags & FLAGS_CLOCKRT) ? ++ CLOCK_REALTIME : CLOCK_MONOTONIC, ++ HRTIMER_MODE_ABS, current); + hrtimer_set_expires_range_ns(&to->timer, *abs_time, + current->timer_slack_ns); + } +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -1602,13 +1602,44 @@ static enum hrtimer_restart hrtimer_wake + return HRTIMER_NORESTART; + } + +-void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) ++static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, ++ clockid_t clock_id, ++ enum hrtimer_mode mode, ++ struct task_struct *task) + { ++ __hrtimer_init(&sl->timer, clock_id, mode); + sl->timer.function = hrtimer_wakeup; + sl->task = task; + } ++ ++/** ++ * hrtimer_init_sleeper - initialize sleeper to the given clock ++ * @sl: sleeper to be initialized ++ * @clock_id: the clock to be used ++ * @mode: timer mode abs/rel ++ * @task: the task to wake up ++ */ ++void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, ++ enum hrtimer_mode mode, struct task_struct *task) ++{ ++ debug_init(&sl->timer, clock_id, mode); ++ __hrtimer_init_sleeper(sl, clock_id, mode, task); ++ ++} + EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); + ++#ifdef CONFIG_DEBUG_OBJECTS_TIMERS ++void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, ++ clockid_t clock_id, ++ enum hrtimer_mode mode, ++ struct task_struct *task) ++{ ++ debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr); ++ __hrtimer_init_sleeper(sl, clock_id, mode, task); ++} ++EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack); ++#endif ++ + int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts) + { + switch(restart->nanosleep.type) { +@@ -1632,8 +1663,6 @@ static int __sched do_nanosleep(struct h + { + struct restart_block *restart; + +- hrtimer_init_sleeper(t, current); +- + do { + set_current_state(TASK_INTERRUPTIBLE); + hrtimer_start_expires(&t->timer, mode); +@@ -1670,10 +1699,9 @@ static long __sched hrtimer_nanosleep_re + struct hrtimer_sleeper t; + int ret; + +- hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid, +- HRTIMER_MODE_ABS); ++ hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid, ++ HRTIMER_MODE_ABS, current); + hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); +- + ret = do_nanosleep(&t, HRTIMER_MODE_ABS); + destroy_hrtimer_on_stack(&t.timer); + return ret; +@@ -1691,7 +1719,7 @@ long hrtimer_nanosleep(const struct time + if (dl_task(current) || rt_task(current)) + slack = 0; + +- hrtimer_init_on_stack(&t.timer, clockid, mode); ++ hrtimer_init_sleeper_on_stack(&t, clockid, mode, current); + hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack); + ret = do_nanosleep(&t, mode); + if (ret != -ERESTART_RESTARTBLOCK) +@@ -1876,11 +1904,9 @@ schedule_hrtimeout_range_clock(ktime_t * + return -EINTR; + } + +- hrtimer_init_on_stack(&t.timer, clock_id, mode); ++ hrtimer_init_sleeper_on_stack(&t, clock_id, mode, current); + hrtimer_set_expires_range_ns(&t.timer, *expires, delta); + +- hrtimer_init_sleeper(&t, current); +- + hrtimer_start_expires(&t.timer, mode); + + if (likely(t.task)) +--- a/net/core/pktgen.c ++++ b/net/core/pktgen.c +@@ -2252,7 +2252,8 @@ static void spin(struct pktgen_dev *pkt_ + s64 remaining; + struct hrtimer_sleeper t; + +- hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); ++ hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_ABS, ++ current); + hrtimer_set_expires(&t.timer, spin_until); + + remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer)); +@@ -2267,7 +2268,6 @@ static void spin(struct pktgen_dev *pkt_ + } while (ktime_compare(end_time, spin_until) < 0); + } else { + /* see do_nanosleep */ +- hrtimer_init_sleeper(&t, current); + do { + set_current_state(TASK_INTERRUPTIBLE); + hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS); diff --git a/debian/patches/features/all/rt/hrtimer-enfore-64byte-alignment.patch b/debian/patches/features/all/rt/hrtimer-enfore-64byte-alignment.patch deleted file mode 100644 index e52d12e1d..000000000 --- a/debian/patches/features/all/rt/hrtimer-enfore-64byte-alignment.patch +++ /dev/null @@ -1,28 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Wed, 23 Dec 2015 20:57:41 +0100 -Subject: hrtimer: enfore 64byte alignment -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -The patch "hrtimer: Fixup hrtimer callback changes for preempt-rt" adds -a list_head expired to struct hrtimer_clock_base and with it we run into -BUILD_BUG_ON(sizeof(struct hrtimer_clock_base) > HRTIMER_CLOCK_BASE_ALIGN); - -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/hrtimer.h | 4 ---- - 1 file changed, 4 deletions(-) - ---- a/include/linux/hrtimer.h -+++ b/include/linux/hrtimer.h -@@ -112,11 +112,7 @@ struct hrtimer_sleeper { - struct task_struct *task; - }; - --#ifdef CONFIG_64BIT - # define HRTIMER_CLOCK_BASE_ALIGN 64 --#else --# define HRTIMER_CLOCK_BASE_ALIGN 32 --#endif - - /** - * struct hrtimer_clock_base - the timer base for a specific clock diff --git a/debian/patches/features/all/rt/hrtimer-fixup-hrtimer-callback-changes-for-preempt-r.patch b/debian/patches/features/all/rt/hrtimer-fixup-hrtimer-callback-changes-for-preempt-r.patch deleted file mode 100644 index d1f76671d..000000000 --- a/debian/patches/features/all/rt/hrtimer-fixup-hrtimer-callback-changes-for-preempt-r.patch +++ /dev/null @@ -1,338 +0,0 @@ -From: Thomas Gleixner -Date: Fri, 3 Jul 2009 08:44:31 -0500 -Subject: hrtimer: Fixup hrtimer callback changes for preempt-rt -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -In preempt-rt we can not call the callbacks which take sleeping locks -from the timer interrupt context. - -Bring back the softirq split for now, until we fixed the signal -delivery problem for real. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Ingo Molnar - ---- - include/linux/hrtimer.h | 7 ++ - kernel/sched/core.c | 1 - kernel/sched/rt.c | 1 - kernel/time/hrtimer.c | 143 ++++++++++++++++++++++++++++++++++++++++++++--- - kernel/time/tick-sched.c | 1 - kernel/watchdog.c | 1 - 6 files changed, 145 insertions(+), 9 deletions(-) - ---- a/include/linux/hrtimer.h -+++ b/include/linux/hrtimer.h -@@ -86,6 +86,8 @@ enum hrtimer_restart { - * was armed. - * @function: timer expiry callback function - * @base: pointer to the timer base (per cpu and per clock) -+ * @cb_entry: list entry to defer timers from hardirq context -+ * @irqsafe: timer can run in hardirq context - * @state: state information (See bit values above) - * @is_rel: Set if the timer was armed relative - * -@@ -96,6 +98,8 @@ struct hrtimer { - ktime_t _softexpires; - enum hrtimer_restart (*function)(struct hrtimer *); - struct hrtimer_clock_base *base; -+ struct list_head cb_entry; -+ int irqsafe; - u8 state; - u8 is_rel; - }; -@@ -121,6 +125,7 @@ struct hrtimer_sleeper { - * timer to a base on another cpu. - * @clockid: clock id for per_cpu support - * @active: red black tree root node for the active timers -+ * @expired: list head for deferred timers. - * @get_time: function to retrieve the current time of the clock - * @offset: offset of this clock to the monotonic base - */ -@@ -129,6 +134,7 @@ struct hrtimer_clock_base { - int index; - clockid_t clockid; - struct timerqueue_head active; -+ struct list_head expired; - ktime_t (*get_time)(void); - ktime_t offset; - } __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN))); -@@ -172,6 +178,7 @@ struct hrtimer_cpu_base { - raw_spinlock_t lock; - seqcount_t seq; - struct hrtimer *running; -+ struct hrtimer *running_soft; - unsigned int cpu; - unsigned int active_bases; - unsigned int clock_was_set_seq; ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -352,6 +352,7 @@ static void init_rq_hrtick(struct rq *rq - - hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - rq->hrtick_timer.function = hrtick; -+ rq->hrtick_timer.irqsafe = 1; - } - #else /* CONFIG_SCHED_HRTICK */ - static inline void hrtick_clear(struct rq *rq) ---- a/kernel/sched/rt.c -+++ b/kernel/sched/rt.c -@@ -48,6 +48,7 @@ void init_rt_bandwidth(struct rt_bandwid - - hrtimer_init(&rt_b->rt_period_timer, - CLOCK_MONOTONIC, HRTIMER_MODE_REL); -+ rt_b->rt_period_timer.irqsafe = 1; - rt_b->rt_period_timer.function = sched_rt_period_timer; - } - ---- a/kernel/time/hrtimer.c -+++ b/kernel/time/hrtimer.c -@@ -719,11 +719,8 @@ static inline int hrtimer_is_hres_enable - static inline void hrtimer_switch_to_hres(void) { } - static inline void - hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { } --static inline int hrtimer_reprogram(struct hrtimer *timer, -- struct hrtimer_clock_base *base) --{ -- return 0; --} -+static inline void hrtimer_reprogram(struct hrtimer *timer, -+ struct hrtimer_clock_base *base) { } - static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } - static inline void retrigger_next_event(void *arg) { } - -@@ -844,7 +841,7 @@ void hrtimer_wait_for_timer(const struct - { - struct hrtimer_clock_base *base = timer->base; - -- if (base && base->cpu_base && !hrtimer_hres_active()) -+ if (base && base->cpu_base && !timer->irqsafe) - wait_event(base->cpu_base->wait, - !(hrtimer_callback_running(timer))); - } -@@ -894,6 +891,11 @@ static void __remove_hrtimer(struct hrti - if (!(state & HRTIMER_STATE_ENQUEUED)) - return; - -+ if (unlikely(!list_empty(&timer->cb_entry))) { -+ list_del_init(&timer->cb_entry); -+ return; -+ } -+ - if (!timerqueue_del(&base->active, &timer->node)) - cpu_base->active_bases &= ~(1 << base->index); - -@@ -1134,6 +1136,7 @@ static void __hrtimer_init(struct hrtime - - base = hrtimer_clockid_to_base(clock_id); - timer->base = &cpu_base->clock_base[base]; -+ INIT_LIST_HEAD(&timer->cb_entry); - timerqueue_init(&timer->node); - } - -@@ -1168,6 +1171,7 @@ bool hrtimer_active(const struct hrtimer - seq = raw_read_seqcount_begin(&cpu_base->seq); - - if (timer->state != HRTIMER_STATE_INACTIVE || -+ cpu_base->running_soft == timer || - cpu_base->running == timer) - return true; - -@@ -1265,10 +1269,109 @@ static void __run_hrtimer(struct hrtimer - cpu_base->running = NULL; - } - -+#ifdef CONFIG_PREEMPT_RT_BASE -+static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer, -+ struct hrtimer_clock_base *base) -+{ -+ int leftmost; -+ -+ if (restart != HRTIMER_NORESTART && -+ !(timer->state & HRTIMER_STATE_ENQUEUED)) { -+ -+ leftmost = enqueue_hrtimer(timer, base); -+ if (!leftmost) -+ return; -+#ifdef CONFIG_HIGH_RES_TIMERS -+ if (!hrtimer_is_hres_active(timer)) { -+ /* -+ * Kick to reschedule the next tick to handle the new timer -+ * on dynticks target. -+ */ -+ if (base->cpu_base->nohz_active) -+ wake_up_nohz_cpu(base->cpu_base->cpu); -+ } else { -+ -+ hrtimer_reprogram(timer, base); -+ } -+#endif -+ } -+} -+ -+/* -+ * The changes in mainline which removed the callback modes from -+ * hrtimer are not yet working with -rt. The non wakeup_process() -+ * based callbacks which involve sleeping locks need to be treated -+ * seperately. -+ */ -+static void hrtimer_rt_run_pending(void) -+{ -+ enum hrtimer_restart (*fn)(struct hrtimer *); -+ struct hrtimer_cpu_base *cpu_base; -+ struct hrtimer_clock_base *base; -+ struct hrtimer *timer; -+ int index, restart; -+ -+ local_irq_disable(); -+ cpu_base = &per_cpu(hrtimer_bases, smp_processor_id()); -+ -+ raw_spin_lock(&cpu_base->lock); -+ -+ for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { -+ base = &cpu_base->clock_base[index]; -+ -+ while (!list_empty(&base->expired)) { -+ timer = list_first_entry(&base->expired, -+ struct hrtimer, cb_entry); -+ -+ /* -+ * Same as the above __run_hrtimer function -+ * just we run with interrupts enabled. -+ */ -+ debug_deactivate(timer); -+ cpu_base->running_soft = timer; -+ raw_write_seqcount_barrier(&cpu_base->seq); -+ -+ __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); -+ fn = timer->function; -+ -+ raw_spin_unlock_irq(&cpu_base->lock); -+ restart = fn(timer); -+ raw_spin_lock_irq(&cpu_base->lock); -+ -+ hrtimer_rt_reprogram(restart, timer, base); -+ raw_write_seqcount_barrier(&cpu_base->seq); -+ -+ WARN_ON_ONCE(cpu_base->running_soft != timer); -+ cpu_base->running_soft = NULL; -+ } -+ } -+ -+ raw_spin_unlock_irq(&cpu_base->lock); -+ -+ wake_up_timer_waiters(cpu_base); -+} -+ -+static int hrtimer_rt_defer(struct hrtimer *timer) -+{ -+ if (timer->irqsafe) -+ return 0; -+ -+ __remove_hrtimer(timer, timer->base, timer->state, 0); -+ list_add_tail(&timer->cb_entry, &timer->base->expired); -+ return 1; -+} -+ -+#else -+ -+static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; } -+ -+#endif -+ - static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) - { - struct hrtimer_clock_base *base = cpu_base->clock_base; - unsigned int active = cpu_base->active_bases; -+ int raise = 0; - - for (; active; base++, active >>= 1) { - struct timerqueue_node *node; -@@ -1299,9 +1402,14 @@ static void __hrtimer_run_queues(struct - if (basenow < hrtimer_get_softexpires_tv64(timer)) - break; - -- __run_hrtimer(cpu_base, base, timer, &basenow); -+ if (!hrtimer_rt_defer(timer)) -+ __run_hrtimer(cpu_base, base, timer, &basenow); -+ else -+ raise = 1; - } - } -+ if (raise) -+ raise_softirq_irqoff(HRTIMER_SOFTIRQ); - } - - #ifdef CONFIG_HIGH_RES_TIMERS -@@ -1443,8 +1551,6 @@ void hrtimer_run_queues(void) - now = hrtimer_update_base(cpu_base); - __hrtimer_run_queues(cpu_base, now); - raw_spin_unlock(&cpu_base->lock); -- -- wake_up_timer_waiters(cpu_base); - } - - /* -@@ -1466,6 +1572,7 @@ static enum hrtimer_restart hrtimer_wake - void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) - { - sl->timer.function = hrtimer_wakeup; -+ sl->timer.irqsafe = 1; - sl->task = task; - } - EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); -@@ -1600,6 +1707,7 @@ int hrtimers_prepare_cpu(unsigned int cp - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { - cpu_base->clock_base[i].cpu_base = cpu_base; - timerqueue_init_head(&cpu_base->clock_base[i].active); -+ INIT_LIST_HEAD(&cpu_base->clock_base[i].expired); - } - - cpu_base->cpu = cpu; -@@ -1676,9 +1784,26 @@ int hrtimers_dead_cpu(unsigned int scpu) - - #endif /* CONFIG_HOTPLUG_CPU */ - -+#ifdef CONFIG_PREEMPT_RT_BASE -+ -+static void run_hrtimer_softirq(struct softirq_action *h) -+{ -+ hrtimer_rt_run_pending(); -+} -+ -+static void hrtimers_open_softirq(void) -+{ -+ open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq); -+} -+ -+#else -+static void hrtimers_open_softirq(void) { } -+#endif -+ - void __init hrtimers_init(void) - { - hrtimers_prepare_cpu(smp_processor_id()); -+ hrtimers_open_softirq(); - } - - /** ---- a/kernel/time/tick-sched.c -+++ b/kernel/time/tick-sched.c -@@ -1197,6 +1197,7 @@ void tick_setup_sched_timer(void) - * Emulate tick processing via per-CPU hrtimers: - */ - hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); -+ ts->sched_timer.irqsafe = 1; - ts->sched_timer.function = tick_sched_timer; - - /* Get the next period (per-CPU) */ ---- a/kernel/watchdog.c -+++ b/kernel/watchdog.c -@@ -384,6 +384,7 @@ static void watchdog_enable(unsigned int - /* kick off the timer for the hardlockup detector */ - hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hrtimer->function = watchdog_timer_fn; -+ hrtimer->irqsafe = 1; - - /* Enable the perf event */ - watchdog_nmi_enable(cpu); diff --git a/debian/patches/features/all/rt/hrtimers-prepare-full-preemption.patch b/debian/patches/features/all/rt/hrtimers-prepare-full-preemption.patch index 6e5659c6e..a7b8d1948 100644 --- a/debian/patches/features/all/rt/hrtimers-prepare-full-preemption.patch +++ b/debian/patches/features/all/rt/hrtimers-prepare-full-preemption.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:29:34 -0500 Subject: hrtimers: Prepare full preemption -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Make cancellation of a running callback in softirq context safe against preemption. @@ -26,17 +26,17 @@ Signed-off-by: Thomas Gleixner struct hrtimer_clock_base; struct hrtimer_cpu_base; -@@ -191,6 +192,9 @@ struct hrtimer_cpu_base { - unsigned int nr_hangs; - unsigned int max_hang_time; - #endif +@@ -215,6 +216,9 @@ struct hrtimer_cpu_base { + ktime_t expires_next; + struct hrtimer *next_timer; + ktime_t softirq_expires_next; +#ifdef CONFIG_PREEMPT_RT_BASE + wait_queue_head_t wait; +#endif + struct hrtimer *softirq_next_timer; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; } ____cacheline_aligned; - -@@ -400,6 +404,13 @@ static inline void hrtimer_restart(struc +@@ -432,6 +436,13 @@ static inline void hrtimer_restart(struc hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } @@ -50,18 +50,18 @@ Signed-off-by: Thomas Gleixner /* Query timers: */ extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust); -@@ -424,7 +435,7 @@ static inline int hrtimer_is_queued(stru +@@ -456,7 +467,7 @@ static inline int hrtimer_is_queued(stru * Helper function to check, whether the timer is running the callback * function */ -static inline int hrtimer_callback_running(struct hrtimer *timer) +static inline int hrtimer_callback_running(const struct hrtimer *timer) { - return timer->base->cpu_base->running == timer; + return timer->base->running == timer; } --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c -@@ -827,6 +827,32 @@ u64 hrtimer_forward(struct hrtimer *time +@@ -930,6 +930,33 @@ u64 hrtimer_forward(struct hrtimer *time } EXPORT_SYMBOL_GPL(hrtimer_forward); @@ -82,7 +82,8 @@ Signed-off-by: Thomas Gleixner +{ + struct hrtimer_clock_base *base = timer->base; + -+ if (base && base->cpu_base && !hrtimer_hres_active()) ++ if (base && base->cpu_base && ++ base->index >= HRTIMER_BASE_MONOTONIC_SOFT) + wait_event(base->cpu_base->wait, + !(hrtimer_callback_running(timer))); +} @@ -94,7 +95,7 @@ Signed-off-by: Thomas Gleixner /* * enqueue_hrtimer - internal function to (re)start a timer * -@@ -1032,7 +1058,7 @@ int hrtimer_cancel(struct hrtimer *timer +@@ -1156,7 +1183,7 @@ int hrtimer_cancel(struct hrtimer *timer if (ret >= 0) return ret; @@ -103,19 +104,18 @@ Signed-off-by: Thomas Gleixner } } EXPORT_SYMBOL_GPL(hrtimer_cancel); -@@ -1417,6 +1443,8 @@ void hrtimer_run_queues(void) - now = hrtimer_update_base(cpu_base); - __hrtimer_run_queues(cpu_base, now); - raw_spin_unlock(&cpu_base->lock); -+ +@@ -1430,6 +1457,7 @@ static __latent_entropy void hrtimer_run + hrtimer_update_softirq_timer(cpu_base, true); + + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); + wake_up_timer_waiters(cpu_base); } - /* -@@ -1576,6 +1604,9 @@ int hrtimers_prepare_cpu(unsigned int cp - - cpu_base->cpu = cpu; - hrtimer_init_hres(cpu_base); + #ifdef CONFIG_HIGH_RES_TIMERS +@@ -1792,6 +1820,9 @@ int hrtimers_prepare_cpu(unsigned int cp + cpu_base->hres_active = 0; + cpu_base->expires_next = KTIME_MAX; + cpu_base->softirq_expires_next = KTIME_MAX; +#ifdef CONFIG_PREEMPT_RT_BASE + init_waitqueue_head(&cpu_base->wait); +#endif @@ -124,7 +124,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c -@@ -195,6 +195,7 @@ int do_setitimer(int which, struct itime +@@ -214,6 +214,7 @@ int do_setitimer(int which, struct itime /* We are sharing ->siglock with it_real_fn() */ if (hrtimer_try_to_cancel(timer) < 0) { spin_unlock_irq(&tsk->sighand->siglock); @@ -134,14 +134,14 @@ Signed-off-by: Thomas Gleixner expires = timeval_to_ktime(value->it_value); --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c -@@ -829,6 +829,20 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_ +@@ -791,6 +791,20 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_ return overrun; } +/* + * Protected by RCU! + */ -+static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr) ++static void timer_wait_for_callback(const struct k_clock *kc, struct k_itimer *timr) +{ +#ifdef CONFIG_PREEMPT_RT_FULL + if (kc->timer_set == common_timer_set) @@ -152,31 +152,31 @@ Signed-off-by: Thomas Gleixner +#endif +} + - /* Set a POSIX.1b interval timer. */ - /* timr->it_lock is taken. */ - static int -@@ -906,6 +920,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, + static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires, + bool absolute, bool sigev_none) + { +@@ -885,6 +899,7 @@ static int do_timer_settime(timer_t time if (!timr) return -EINVAL; + rcu_read_lock(); - kc = clockid_to_kclock(timr->it_clock); + kc = timr->kclock; if (WARN_ON_ONCE(!kc || !kc->timer_set)) error = -EINVAL; -@@ -914,9 +929,12 @@ SYSCALL_DEFINE4(timer_settime, timer_t, +@@ -893,9 +908,12 @@ static int do_timer_settime(timer_t time unlock_timer(timr, flag); if (error == TIMER_RETRY) { + timer_wait_for_callback(kc, timr); - rtn = NULL; // We already got the old time... + old_spec64 = NULL; // We already got the old time... + rcu_read_unlock(); goto retry; } + rcu_read_unlock(); - if (old_setting && !error && - copy_to_user(old_setting, &old_spec, sizeof (old_spec))) -@@ -954,10 +972,15 @@ SYSCALL_DEFINE1(timer_delete, timer_t, t + return error; + } +@@ -977,10 +995,15 @@ SYSCALL_DEFINE1(timer_delete, timer_t, t if (!timer) return -EINVAL; @@ -192,7 +192,7 @@ Signed-off-by: Thomas Gleixner spin_lock(¤t->sighand->siglock); list_del(&timer->list); -@@ -983,8 +1006,18 @@ static void itimer_delete(struct k_itime +@@ -1006,8 +1029,18 @@ static void itimer_delete(struct k_itime retry_delete: spin_lock_irqsave(&timer->it_lock, flags); diff --git a/debian/patches/features/all/rt/i915-bogus-warning-from-i915-when-running-on-PREEMPT.patch b/debian/patches/features/all/rt/i915-bogus-warning-from-i915-when-running-on-PREEMPT.patch deleted file mode 100644 index 4bed7ed5e..000000000 --- a/debian/patches/features/all/rt/i915-bogus-warning-from-i915-when-running-on-PREEMPT.patch +++ /dev/null @@ -1,30 +0,0 @@ -From: Clark Williams -Date: Tue, 26 May 2015 10:43:43 -0500 -Subject: i915: bogus warning from i915 when running on PREEMPT_RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -The i915 driver has a 'WARN_ON(!in_interrupt())' in the display -handler, which whines constanly on the RT kernel (since the interrupt -is actually handled in a threaded handler and not actual interrupt -context). - -Change the WARN_ON to WARN_ON_NORT - -Tested-by: Joakim Hernberg -Signed-off-by: Clark Williams -Signed-off-by: Sebastian Andrzej Siewior ---- - drivers/gpu/drm/i915/intel_display.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/drivers/gpu/drm/i915/intel_display.c -+++ b/drivers/gpu/drm/i915/intel_display.c -@@ -12115,7 +12115,7 @@ void intel_check_page_flip(struct drm_i9 - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - struct intel_flip_work *work; - -- WARN_ON(!in_interrupt()); -+ WARN_ON_NONRT(!in_interrupt()); - - if (crtc == NULL) - return; diff --git a/debian/patches/features/all/rt/ide-use-nort-local-irq-variants.patch b/debian/patches/features/all/rt/ide-use-nort-local-irq-variants.patch index 6a67c106b..354f09e28 100644 --- a/debian/patches/features/all/rt/ide-use-nort-local-irq-variants.patch +++ b/debian/patches/features/all/rt/ide-use-nort-local-irq-variants.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:30:16 -0500 Subject: ide: Do not disable interrupts for PREEMPT-RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Use the local_irq_*_nort variants. diff --git a/debian/patches/features/all/rt/infiniband-mellanox-ib-use-nort-irq.patch b/debian/patches/features/all/rt/infiniband-mellanox-ib-use-nort-irq.patch index 6700dafe8..c0ab94b2f 100644 --- a/debian/patches/features/all/rt/infiniband-mellanox-ib-use-nort-irq.patch +++ b/debian/patches/features/all/rt/infiniband-mellanox-ib-use-nort-irq.patch @@ -1,7 +1,7 @@ From: Sven-Thorsten Dietrich Date: Fri, 3 Jul 2009 08:30:35 -0500 Subject: infiniband: Mellanox IB driver patch use _nort() primitives -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Fixes in_atomic stack-dump, when Mellanox module is loaded into the RT Kernel. @@ -21,7 +21,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c -@@ -902,7 +902,7 @@ void ipoib_mcast_restart_task(struct wor +@@ -895,7 +895,7 @@ void ipoib_mcast_restart_task(struct wor ipoib_dbg_mcast(priv, "restarting multicast task\n"); @@ -30,12 +30,12 @@ Signed-off-by: Thomas Gleixner netif_addr_lock(dev); spin_lock(&priv->lock); -@@ -984,7 +984,7 @@ void ipoib_mcast_restart_task(struct wor +@@ -977,7 +977,7 @@ void ipoib_mcast_restart_task(struct wor spin_unlock(&priv->lock); netif_addr_unlock(dev); - local_irq_restore(flags); + local_irq_restore_nort(flags); - /* - * make sure the in-flight joins have finished before we attempt + ipoib_mcast_remove_list(&remove_list); + diff --git a/debian/patches/features/all/rt/inpt-gameport-use-local-irq-nort.patch b/debian/patches/features/all/rt/inpt-gameport-use-local-irq-nort.patch index f4d8c831f..bf8b86193 100644 --- a/debian/patches/features/all/rt/inpt-gameport-use-local-irq-nort.patch +++ b/debian/patches/features/all/rt/inpt-gameport-use-local-irq-nort.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:30:16 -0500 Subject: input: gameport: Do not disable interrupts on PREEMPT_RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Use the _nort() primitives. diff --git a/debian/patches/features/all/rt/iommu-amd--Use-WARN_ON_NORT.patch b/debian/patches/features/all/rt/iommu-amd--Use-WARN_ON_NORT.patch index e43e7a1b4..9893ddf79 100644 --- a/debian/patches/features/all/rt/iommu-amd--Use-WARN_ON_NORT.patch +++ b/debian/patches/features/all/rt/iommu-amd--Use-WARN_ON_NORT.patch @@ -1,7 +1,7 @@ Subject: iommu/amd: Use WARN_ON_NORT in __attach_device() From: Thomas Gleixner Date: Sat, 27 Feb 2016 10:22:23 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz RT does not disable interrupts here, but the protection is still correct. Fixup the WARN_ON so it won't yell on RT. @@ -17,7 +17,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c -@@ -1929,10 +1929,10 @@ static int __attach_device(struct iommu_ +@@ -1944,10 +1944,10 @@ static int __attach_device(struct iommu_ int ret; /* @@ -31,7 +31,7 @@ Signed-off-by: Thomas Gleixner /* lock domain */ spin_lock(&domain->lock); -@@ -2100,10 +2100,10 @@ static void __detach_device(struct iommu +@@ -2115,10 +2115,10 @@ static void __detach_device(struct iommu struct protection_domain *domain; /* diff --git a/debian/patches/features/all/rt/iommu-iova-Use-raw_cpu_ptr-instead-of-get_cpu_ptr-fo.patch b/debian/patches/features/all/rt/iommu-iova-Use-raw_cpu_ptr-instead-of-get_cpu_ptr-fo.patch new file mode 100644 index 000000000..b0a403bba --- /dev/null +++ b/debian/patches/features/all/rt/iommu-iova-Use-raw_cpu_ptr-instead-of-get_cpu_ptr-fo.patch @@ -0,0 +1,43 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 21 Sep 2017 17:21:40 +0200 +Subject: [PATCH] iommu/iova: Use raw_cpu_ptr() instead of get_cpu_ptr() for + ->fq +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +get_cpu_ptr() disabled preemption and returns the ->fq object of the +current CPU. raw_cpu_ptr() does the same except that it not disable +preemption which means the scheduler can move it to another CPU after it +obtained the per-CPU object. +In this case this is not bad because the data structure itself is +protected with a spin_lock. This change shouldn't matter however on RT +it does because the sleeping lock can't be accessed with disabled +preemption. + +Cc: Joerg Roedel +Cc: iommu@lists.linux-foundation.org +Reported-by: vinadhy@gmail.com +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/iommu/iova.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/drivers/iommu/iova.c ++++ b/drivers/iommu/iova.c +@@ -570,7 +570,7 @@ void queue_iova(struct iova_domain *iova + unsigned long pfn, unsigned long pages, + unsigned long data) + { +- struct iova_fq *fq = get_cpu_ptr(iovad->fq); ++ struct iova_fq *fq = raw_cpu_ptr(iovad->fq); + unsigned long flags; + unsigned idx; + +@@ -600,8 +600,6 @@ void queue_iova(struct iova_domain *iova + if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0) + mod_timer(&iovad->fq_timer, + jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); +- +- put_cpu_ptr(iovad->fq); + } + EXPORT_SYMBOL_GPL(queue_iova); + diff --git a/debian/patches/features/all/rt/iommu-iova-don-t-disable-preempt-around-this_cpu_ptr.patch b/debian/patches/features/all/rt/iommu-iova-don-t-disable-preempt-around-this_cpu_ptr.patch deleted file mode 100644 index 279156102..000000000 --- a/debian/patches/features/all/rt/iommu-iova-don-t-disable-preempt-around-this_cpu_ptr.patch +++ /dev/null @@ -1,82 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Thu, 15 Sep 2016 16:58:19 +0200 -Subject: [PATCH] iommu/iova: don't disable preempt around this_cpu_ptr() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Commit 583248e6620a ("iommu/iova: Disable preemption around use of -this_cpu_ptr()") disables preemption while accessing a per-CPU variable. -This does keep lockdep quiet. However I don't see the point why it is -bad if we get migrated after its access to another CPU. -__iova_rcache_insert() and __iova_rcache_get() immediately locks the -variable after obtaining it - before accessing its members. -_If_ we get migrated away after retrieving the address of cpu_rcache -before taking the lock then the *other* task on the same CPU will -retrieve the same address of cpu_rcache and will spin on the lock. - -alloc_iova_fast() disables preemption while invoking -free_cpu_cached_iovas() on each CPU. The function itself uses -per_cpu_ptr() which does not trigger a warning (like this_cpu_ptr() -does) because it assumes the caller knows what he does because he might -access the data structure from a different CPU (which means he needs -protection against concurrent access). - -Signed-off-by: Sebastian Andrzej Siewior ---- - drivers/iommu/iova.c | 9 +++------ - 1 file changed, 3 insertions(+), 6 deletions(-) - ---- a/drivers/iommu/iova.c -+++ b/drivers/iommu/iova.c -@@ -22,6 +22,7 @@ - #include - #include - #include -+#include - - static bool iova_rcache_insert(struct iova_domain *iovad, - unsigned long pfn, -@@ -419,10 +420,8 @@ alloc_iova_fast(struct iova_domain *iova - - /* Try replenishing IOVAs by flushing rcache. */ - flushed_rcache = true; -- preempt_disable(); - for_each_online_cpu(cpu) - free_cpu_cached_iovas(cpu, iovad); -- preempt_enable(); - goto retry; - } - -@@ -750,7 +749,7 @@ static bool __iova_rcache_insert(struct - bool can_insert = false; - unsigned long flags; - -- cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); -+ cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); - spin_lock_irqsave(&cpu_rcache->lock, flags); - - if (!iova_magazine_full(cpu_rcache->loaded)) { -@@ -780,7 +779,6 @@ static bool __iova_rcache_insert(struct - iova_magazine_push(cpu_rcache->loaded, iova_pfn); - - spin_unlock_irqrestore(&cpu_rcache->lock, flags); -- put_cpu_ptr(rcache->cpu_rcaches); - - if (mag_to_free) { - iova_magazine_free_pfns(mag_to_free, iovad); -@@ -814,7 +812,7 @@ static unsigned long __iova_rcache_get(s - bool has_pfn = false; - unsigned long flags; - -- cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); -+ cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); - spin_lock_irqsave(&cpu_rcache->lock, flags); - - if (!iova_magazine_empty(cpu_rcache->loaded)) { -@@ -836,7 +834,6 @@ static unsigned long __iova_rcache_get(s - iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn); - - spin_unlock_irqrestore(&cpu_rcache->lock, flags); -- put_cpu_ptr(rcache->cpu_rcaches); - - return iova_pfn; - } diff --git a/debian/patches/features/all/rt/iommu-vt-d-don-t-disable-preemption-while-accessing-.patch b/debian/patches/features/all/rt/iommu-vt-d-don-t-disable-preemption-while-accessing-.patch deleted file mode 100644 index 31728e3ab..000000000 --- a/debian/patches/features/all/rt/iommu-vt-d-don-t-disable-preemption-while-accessing-.patch +++ /dev/null @@ -1,59 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Thu, 15 Sep 2016 17:16:44 +0200 -Subject: [PATCH] iommu/vt-d: don't disable preemption while accessing - deferred_flush() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -get_cpu() disables preemption and returns the current CPU number. The -CPU number is later only used once while retrieving the address of the -local's CPU deferred_flush pointer. -We can instead use raw_cpu_ptr() while we remain preemptible. The worst -thing that can happen is that flush_unmaps_timeout() is invoked multiple -times: once by taskA after seeing HIGH_WATER_MARK and then preempted to -another CPU and then by taskB which saw HIGH_WATER_MARK on the same CPU -as taskA. It is also likely that ->size got from HIGH_WATER_MARK to 0 -right after its read because another CPU invoked flush_unmaps_timeout() -for this CPU. -The access to flush_data is protected by a spinlock so even if we get -migrated to another CPU or preempted - the data structure is protected. - -While at it, I marked deferred_flush static since I can't find a -reference to it outside of this file. - -Signed-off-by: Sebastian Andrzej Siewior ---- - drivers/iommu/intel-iommu.c | 8 ++------ - 1 file changed, 2 insertions(+), 6 deletions(-) - ---- a/drivers/iommu/intel-iommu.c -+++ b/drivers/iommu/intel-iommu.c -@@ -480,7 +480,7 @@ struct deferred_flush_data { - struct deferred_flush_table *tables; - }; - --DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush); -+static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush); - - /* bitmap for indexing intel_iommus */ - static int g_num_of_iommus; -@@ -3720,10 +3720,8 @@ static void add_unmap(struct dmar_domain - struct intel_iommu *iommu; - struct deferred_flush_entry *entry; - struct deferred_flush_data *flush_data; -- unsigned int cpuid; - -- cpuid = get_cpu(); -- flush_data = per_cpu_ptr(&deferred_flush, cpuid); -+ flush_data = raw_cpu_ptr(&deferred_flush); - - /* Flush all CPUs' entries to avoid deferring too much. If - * this becomes a bottleneck, can just flush us, and rely on -@@ -3756,8 +3754,6 @@ static void add_unmap(struct dmar_domain - } - flush_data->size++; - spin_unlock_irqrestore(&flush_data->lock, flags); -- -- put_cpu(); - } - - static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) diff --git a/debian/patches/features/all/rt/irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch b/debian/patches/features/all/rt/irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch index 6074fc50f..85c7a9684 100644 --- a/debian/patches/features/all/rt/irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch +++ b/debian/patches/features/all/rt/irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch @@ -1,7 +1,7 @@ Subject: genirq: Allow disabling of softirq processing in irq thread context From: Thomas Gleixner Date: Tue, 31 Jan 2012 13:01:27 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The processing of softirqs in irq thread context is a performance gain for the non-rt workloads of a system, but it's counterproductive for @@ -21,7 +21,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h -@@ -61,6 +61,7 @@ +@@ -63,6 +63,7 @@ * interrupt handler after suspending interrupts. For system * wakeup devices users need to implement wakeup detection in * their interrupt handlers. @@ -29,7 +29,7 @@ Signed-off-by: Thomas Gleixner */ #define IRQF_SHARED 0x00000080 #define IRQF_PROBE_SHARED 0x00000100 -@@ -74,6 +75,7 @@ +@@ -76,6 +77,7 @@ #define IRQF_NO_THREAD 0x00010000 #define IRQF_EARLY_RESUME 0x00020000 #define IRQF_COND_SUSPEND 0x00040000 @@ -39,7 +39,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/irq.h +++ b/include/linux/irq.h -@@ -72,6 +72,7 @@ enum irqchip_irq_state; +@@ -74,6 +74,7 @@ enum irqchip_irq_state; * IRQ_IS_POLLED - Always polled by another interrupt. Exclude * it from the spurious interrupt detection * mechanism and from core side polling. @@ -47,7 +47,7 @@ Signed-off-by: Thomas Gleixner * IRQ_DISABLE_UNLAZY - Disable lazy irq disable */ enum { -@@ -99,13 +100,14 @@ enum { +@@ -101,13 +102,14 @@ enum { IRQ_PER_CPU_DEVID = (1 << 17), IRQ_IS_POLLED = (1 << 18), IRQ_DISABLE_UNLAZY = (1 << 19), @@ -65,7 +65,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c -@@ -883,7 +883,15 @@ irq_forced_thread_fn(struct irq_desc *de +@@ -885,7 +885,15 @@ irq_forced_thread_fn(struct irq_desc *de local_bh_disable(); ret = action->thread_fn(action->irq, action->dev_id); irq_finalize_oneshot(desc, action); @@ -82,19 +82,19 @@ Signed-off-by: Thomas Gleixner return ret; } -@@ -1342,6 +1350,9 @@ static int +@@ -1362,6 +1370,9 @@ static int irqd_set(&desc->irq_data, IRQD_NO_BALANCING); } + if (new->flags & IRQF_NO_SOFTIRQ_CALL) + irq_settings_set_no_softirq_call(desc); + - /* Set default affinity mask once everything is setup */ - setup_affinity(desc, mask); - + if (irq_settings_can_autoenable(desc)) { + irq_startup(desc, IRQ_RESEND, IRQ_START_COND); + } else { --- a/kernel/irq/settings.h +++ b/kernel/irq/settings.h -@@ -16,6 +16,7 @@ enum { +@@ -17,6 +17,7 @@ enum { _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID, _IRQ_IS_POLLED = IRQ_IS_POLLED, _IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY, @@ -102,7 +102,7 @@ Signed-off-by: Thomas Gleixner _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, }; -@@ -30,6 +31,7 @@ enum { +@@ -31,6 +32,7 @@ enum { #define IRQ_PER_CPU_DEVID GOT_YOU_MORON #define IRQ_IS_POLLED GOT_YOU_MORON #define IRQ_DISABLE_UNLAZY GOT_YOU_MORON @@ -110,7 +110,7 @@ Signed-off-by: Thomas Gleixner #undef IRQF_MODIFY_MASK #define IRQF_MODIFY_MASK GOT_YOU_MORON -@@ -40,6 +42,16 @@ irq_settings_clr_and_set(struct irq_desc +@@ -41,6 +43,16 @@ irq_settings_clr_and_set(struct irq_desc desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK); } diff --git a/debian/patches/features/all/rt/irqwork-Move-irq-safe-work-to-irq-context.patch b/debian/patches/features/all/rt/irqwork-Move-irq-safe-work-to-irq-context.patch index ffb7c44f1..22dbaa222 100644 --- a/debian/patches/features/all/rt/irqwork-Move-irq-safe-work-to-irq-context.patch +++ b/debian/patches/features/all/rt/irqwork-Move-irq-safe-work-to-irq-context.patch @@ -1,7 +1,7 @@ Subject: irqwork: Move irq safe work to irq context From: Thomas Gleixner Date: Sun, 15 Nov 2015 18:40:17 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz On architectures where arch_irq_work_has_interrupt() returns false, we end up running the irq safe work from the softirq context. That @@ -23,7 +23,7 @@ Cc: stable-rt@vger.kernel.org --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h -@@ -52,4 +52,10 @@ static inline bool irq_work_needs_cpu(vo +@@ -53,4 +53,10 @@ static inline bool irq_work_needs_cpu(vo static inline void irq_work_run(void) { } #endif @@ -56,7 +56,7 @@ Cc: stable-rt@vger.kernel.org * Synchronize against the irq_work @entry, ensures the entry is not --- a/kernel/time/timer.c +++ b/kernel/time/timer.c -@@ -1604,7 +1604,7 @@ void update_process_times(int user_tick) +@@ -1646,7 +1646,7 @@ void update_process_times(int user_tick) scheduler_tick(); run_local_timers(); rcu_check_callbacks(user_tick); @@ -65,7 +65,7 @@ Cc: stable-rt@vger.kernel.org if (in_irq()) irq_work_tick(); #endif -@@ -1645,9 +1645,7 @@ static __latent_entropy void run_timer_s +@@ -1687,9 +1687,7 @@ static __latent_entropy void run_timer_s { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); @@ -73,6 +73,6 @@ Cc: stable-rt@vger.kernel.org - irq_work_tick(); -#endif + irq_work_tick_soft(); - - __run_timers(base); - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) + /* + * must_forward_clk must be cleared before running timers so that any + * timer functions that call mod_timer will not try to forward the diff --git a/debian/patches/features/all/rt/irqwork-push_most_work_into_softirq_context.patch b/debian/patches/features/all/rt/irqwork-push_most_work_into_softirq_context.patch index 8e0dfb7d0..726744a36 100644 --- a/debian/patches/features/all/rt/irqwork-push_most_work_into_softirq_context.patch +++ b/debian/patches/features/all/rt/irqwork-push_most_work_into_softirq_context.patch @@ -1,7 +1,7 @@ Subject: irqwork: push most work into softirq context From: Sebastian Andrzej Siewior Date: Tue, 23 Jun 2015 15:32:51 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Initially we defered all irqwork into softirq because we didn't want the latency spikes if perf or another user was busy and delayed the RT task. @@ -24,12 +24,12 @@ Signed-off-by: Sebastian Andrzej Siewior kernel/irq_work.c | 47 ++++++++++++++++++++++++++++++++++------------- kernel/sched/rt.c | 1 + kernel/time/tick-sched.c | 1 + - kernel/time/timer.c | 6 +++++- - 5 files changed, 42 insertions(+), 14 deletions(-) + kernel/time/timer.c | 5 ++++- + 5 files changed, 41 insertions(+), 14 deletions(-) --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h -@@ -16,6 +16,7 @@ +@@ -17,6 +17,7 @@ #define IRQ_WORK_BUSY 2UL #define IRQ_WORK_FLAGS 3UL #define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */ @@ -154,7 +154,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* We start is dequeued state, because no RT tasks are queued */ --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c -@@ -224,6 +224,7 @@ static void nohz_full_kick_func(struct i +@@ -230,6 +230,7 @@ static void nohz_full_kick_func(struct i static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { .func = nohz_full_kick_func, @@ -164,7 +164,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* --- a/kernel/time/timer.c +++ b/kernel/time/timer.c -@@ -1604,7 +1604,7 @@ void update_process_times(int user_tick) +@@ -1646,7 +1646,7 @@ void update_process_times(int user_tick) scheduler_tick(); run_local_timers(); rcu_check_callbacks(user_tick); @@ -173,14 +173,13 @@ Signed-off-by: Sebastian Andrzej Siewior if (in_irq()) irq_work_tick(); #endif -@@ -1645,6 +1645,10 @@ static __latent_entropy void run_timer_s +@@ -1687,6 +1687,9 @@ static __latent_entropy void run_timer_s { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) + irq_work_tick(); +#endif -+ - __run_timers(base); - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) - __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); + /* + * must_forward_clk must be cleared before running timers so that any + * timer functions that call mod_timer will not try to forward the diff --git a/debian/patches/features/all/rt/jump-label-rt.patch b/debian/patches/features/all/rt/jump-label-rt.patch index 5772e0ce9..e15151af1 100644 --- a/debian/patches/features/all/rt/jump-label-rt.patch +++ b/debian/patches/features/all/rt/jump-label-rt.patch @@ -1,7 +1,7 @@ Subject: jump-label: disable if stop_machine() is used From: Thomas Gleixner Date: Wed, 08 Jul 2015 17:14:48 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Some architectures are using stop_machine() while switching the opcode which leads to latency spikes. @@ -25,10 +25,10 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig -@@ -42,7 +42,7 @@ config ARM +@@ -45,7 +45,7 @@ config ARM + select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 - select HAVE_ARCH_HARDENED_USERCOPY - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT_BASE select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU diff --git a/debian/patches/features/all/rt/kconfig-disable-a-few-options-rt.patch b/debian/patches/features/all/rt/kconfig-disable-a-few-options-rt.patch index 65dc66cb3..631a91f9e 100644 --- a/debian/patches/features/all/rt/kconfig-disable-a-few-options-rt.patch +++ b/debian/patches/features/all/rt/kconfig-disable-a-few-options-rt.patch @@ -1,7 +1,7 @@ Subject: kconfig: Disable config options which are not RT compatible From: Thomas Gleixner Date: Sun, 24 Jul 2011 12:11:43 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Disable stuff which is known to have issues on RT @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/Kconfig +++ b/arch/Kconfig -@@ -12,6 +12,7 @@ config OPROFILE +@@ -17,6 +17,7 @@ config OPROFILE tristate "OProfile system profiling" depends on PROFILING depends on HAVE_OPROFILE @@ -23,7 +23,7 @@ Signed-off-by: Thomas Gleixner help --- a/mm/Kconfig +++ b/mm/Kconfig -@@ -410,7 +410,7 @@ config NOMMU_INITIAL_TRIM_EXCESS +@@ -385,7 +385,7 @@ config NOMMU_INITIAL_TRIM_EXCESS config TRANSPARENT_HUGEPAGE bool "Transparent Hugepage Support" diff --git a/debian/patches/features/all/rt/kconfig-preempt-rt-full.patch b/debian/patches/features/all/rt/kconfig-preempt-rt-full.patch index 23389bb5c..e4c41f879 100644 --- a/debian/patches/features/all/rt/kconfig-preempt-rt-full.patch +++ b/debian/patches/features/all/rt/kconfig-preempt-rt-full.patch @@ -1,7 +1,7 @@ Subject: kconfig: Add PREEMPT_RT_FULL From: Thomas Gleixner Date: Wed, 29 Jun 2011 14:58:57 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Introduce the final symbol for PREEMPT_RT_FULL. @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/init/Makefile +++ b/init/Makefile -@@ -35,4 +35,4 @@ mounts-$(CONFIG_BLK_DEV_MD) += do_mounts +@@ -36,4 +36,4 @@ mounts-$(CONFIG_BLK_DEV_MD) += do_mounts include/generated/compile.h: FORCE @$($(quiet)chk_compile.h) $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ @@ -39,7 +39,7 @@ Signed-off-by: Thomas Gleixner config PREEMPT_COUNT --- a/scripts/mkcompile_h +++ b/scripts/mkcompile_h -@@ -4,7 +4,8 @@ TARGET=$1 +@@ -5,7 +5,8 @@ TARGET=$1 ARCH=$2 SMP=$3 PREEMPT=$4 @@ -49,7 +49,7 @@ Signed-off-by: Thomas Gleixner vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; } -@@ -57,6 +58,7 @@ UTS_VERSION="#$VERSION" +@@ -58,6 +59,7 @@ UTS_VERSION="#$VERSION" CONFIG_FLAGS="" if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi diff --git a/debian/patches/features/all/rt/kernel-SRCU-provide-a-static-initializer.patch b/debian/patches/features/all/rt/kernel-SRCU-provide-a-static-initializer.patch index d0660e168..4e5b78110 100644 --- a/debian/patches/features/all/rt/kernel-SRCU-provide-a-static-initializer.patch +++ b/debian/patches/features/all/rt/kernel-SRCU-provide-a-static-initializer.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Tue, 19 Mar 2013 14:44:30 +0100 Subject: kernel/SRCU: provide a static initializer -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz There are macros for static initializer for the three out of four possible notifier types, that are: @@ -14,13 +14,14 @@ complete. Signed-off-by: Sebastian Andrzej Siewior --- - include/linux/notifier.h | 34 +++++++++++++++++++++++++--------- - include/linux/srcu.h | 6 +++--- - 2 files changed, 28 insertions(+), 12 deletions(-) + include/linux/notifier.h | 42 +++++++++++++++++++++++++++++++++--------- + include/linux/srcutiny.h | 6 +++--- + include/linux/srcutree.h | 6 +++--- + 3 files changed, 39 insertions(+), 15 deletions(-) --- a/include/linux/notifier.h +++ b/include/linux/notifier.h -@@ -6,7 +6,7 @@ +@@ -7,7 +7,7 @@ * * Alan Cox */ @@ -29,7 +30,7 @@ Signed-off-by: Sebastian Andrzej Siewior #ifndef _LINUX_NOTIFIER_H #define _LINUX_NOTIFIER_H #include -@@ -42,9 +42,7 @@ +@@ -43,9 +43,7 @@ * in srcu_notifier_call_chain(): no cache bounces and no memory barriers. * As compensation, srcu_notifier_chain_unregister() is rather expensive. * SRCU notifier chains should be used when the chain will be called very @@ -40,7 +41,7 @@ Signed-off-by: Sebastian Andrzej Siewior */ struct notifier_block; -@@ -90,7 +88,7 @@ struct srcu_notifier_head { +@@ -91,7 +89,7 @@ struct srcu_notifier_head { (name)->head = NULL; \ } while (0) @@ -49,7 +50,7 @@ Signed-off-by: Sebastian Andrzej Siewior extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); #define srcu_cleanup_notifier_head(name) \ cleanup_srcu_struct(&(name)->srcu); -@@ -103,7 +101,13 @@ extern void srcu_init_notifier_head(stru +@@ -104,7 +102,13 @@ extern void srcu_init_notifier_head(stru .head = NULL } #define RAW_NOTIFIER_INIT(name) { \ .head = NULL } @@ -64,15 +65,23 @@ Signed-off-by: Sebastian Andrzej Siewior #define ATOMIC_NOTIFIER_HEAD(name) \ struct atomic_notifier_head name = \ -@@ -115,6 +119,18 @@ extern void srcu_init_notifier_head(stru +@@ -116,6 +120,26 @@ extern void srcu_init_notifier_head(stru struct raw_notifier_head name = \ RAW_NOTIFIER_INIT(name) ++#ifdef CONFIG_TREE_SRCU +#define _SRCU_NOTIFIER_HEAD(name, mod) \ -+ static DEFINE_PER_CPU(struct srcu_array, \ -+ name##_head_srcu_array); \ ++ static DEFINE_PER_CPU(struct srcu_data, \ ++ name##_head_srcu_data); \ + mod struct srcu_notifier_head name = \ -+ SRCU_NOTIFIER_INIT(name, name##_head_srcu_array) ++ SRCU_NOTIFIER_INIT(name, name##_head_srcu_data) ++ ++#else ++#define _SRCU_NOTIFIER_HEAD(name, mod) \ ++ mod struct srcu_notifier_head name = \ ++ SRCU_NOTIFIER_INIT(name, name) ++ ++#endif + +#define SRCU_NOTIFIER_HEAD(name) \ + _SRCU_NOTIFIER_HEAD(name, ) @@ -83,7 +92,7 @@ Signed-off-by: Sebastian Andrzej Siewior #ifdef __KERNEL__ extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh, -@@ -184,12 +200,12 @@ static inline int notifier_to_errno(int +@@ -185,12 +209,12 @@ static inline int notifier_to_errno(int /* * Declared notifiers so far. I can imagine quite a few more chains @@ -99,27 +108,49 @@ Signed-off-by: Sebastian Andrzej Siewior /* CPU notfiers are defined in include/linux/cpu.h. */ /* netdevice notifiers are defined in include/linux/netdevice.h */ ---- a/include/linux/srcu.h -+++ b/include/linux/srcu.h -@@ -84,10 +84,10 @@ int init_srcu_struct(struct srcu_struct +--- a/include/linux/srcutiny.h ++++ b/include/linux/srcutiny.h +@@ -43,7 +43,7 @@ struct srcu_struct { - void process_srcu(struct work_struct *work); + void srcu_drive_gp(struct work_struct *wp); + +-#define __SRCU_STRUCT_INIT(name) \ ++#define __SRCU_STRUCT_INIT(name, __ignored) \ + { \ + .srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq), \ + .srcu_cb_tail = &name.srcu_cb_head, \ +@@ -56,9 +56,9 @@ void srcu_drive_gp(struct work_struct *w + * Tree SRCU, which needs some per-CPU data. + */ + #define DEFINE_SRCU(name) \ +- struct srcu_struct name = __SRCU_STRUCT_INIT(name) ++ struct srcu_struct name = __SRCU_STRUCT_INIT(name, name) + #define DEFINE_STATIC_SRCU(name) \ +- static struct srcu_struct name = __SRCU_STRUCT_INIT(name) ++ static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name) + + void synchronize_srcu(struct srcu_struct *sp); + +--- a/include/linux/srcutree.h ++++ b/include/linux/srcutree.h +@@ -104,9 +104,9 @@ struct srcu_struct { + #define SRCU_STATE_SCAN1 1 + #define SRCU_STATE_SCAN2 2 -#define __SRCU_STRUCT_INIT(name) \ +#define __SRCU_STRUCT_INIT(name, pcpu_name) \ { \ - .completed = -300, \ -- .per_cpu_ref = &name##_srcu_array, \ -+ .per_cpu_ref = &pcpu_name, \ - .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ - .running = false, \ - .batch_queue = RCU_BATCH_INIT(name.batch_queue), \ -@@ -119,7 +119,7 @@ void process_srcu(struct work_struct *wo +- .sda = &name##_srcu_data, \ ++ .sda = &pcpu_name, \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ + .srcu_gp_seq_needed = 0 - 1, \ + __SRCU_DEP_MAP_INIT(name) \ +@@ -133,7 +133,7 @@ struct srcu_struct { */ #define __DEFINE_SRCU(name, is_static) \ - static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\ + static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data);\ - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) -+ is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_array) ++ is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_data) #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) diff --git a/debian/patches/features/all/rt/kernel-cpu-fix-cpu-down-problem-if-kthread-s-cpu-is-.patch b/debian/patches/features/all/rt/kernel-cpu-fix-cpu-down-problem-if-kthread-s-cpu-is-.patch deleted file mode 100644 index 585f5b513..000000000 --- a/debian/patches/features/all/rt/kernel-cpu-fix-cpu-down-problem-if-kthread-s-cpu-is-.patch +++ /dev/null @@ -1,86 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Fri, 7 Jun 2013 22:37:06 +0200 -Subject: kernel/cpu: fix cpu down problem if kthread's cpu is going down -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -If kthread is pinned to CPUx and CPUx is going down then we get into -trouble: -- first the unplug thread is created -- it will set itself to hp->unplug. As a result, every task that is - going to take a lock, has to leave the CPU. -- the CPU_DOWN_PREPARE notifier are started. The worker thread will - start a new process for the "high priority worker". - Now kthread would like to take a lock but since it can't leave the CPU - it will never complete its task. - -We could fire the unplug thread after the notifier but then the cpu is -no longer marked "online" and the unplug thread will run on CPU0 which -was fixed before :) - -So instead the unplug thread is started and kept waiting until the -notfier complete their work. - -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/cpu.c | 15 +++++++++++++-- - 1 file changed, 13 insertions(+), 2 deletions(-) - ---- a/kernel/cpu.c -+++ b/kernel/cpu.c -@@ -254,6 +254,7 @@ struct hotplug_pcp { - int refcount; - int grab_lock; - struct completion synced; -+ struct completion unplug_wait; - #ifdef CONFIG_PREEMPT_RT_FULL - /* - * Note, on PREEMPT_RT, the hotplug lock must save the state of -@@ -357,6 +358,7 @@ static int sync_unplug_thread(void *data - { - struct hotplug_pcp *hp = data; - -+ wait_for_completion(&hp->unplug_wait); - preempt_disable(); - hp->unplug = current; - wait_for_pinned_cpus(hp); -@@ -422,6 +424,14 @@ static void __cpu_unplug_sync(struct hot - wait_for_completion(&hp->synced); - } - -+static void __cpu_unplug_wait(unsigned int cpu) -+{ -+ struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); -+ -+ complete(&hp->unplug_wait); -+ wait_for_completion(&hp->synced); -+} -+ - /* - * Start the sync_unplug_thread on the target cpu and wait for it to - * complete. -@@ -445,6 +455,7 @@ static int cpu_unplug_begin(unsigned int - tell_sched_cpu_down_begin(cpu); - - init_completion(&hp->synced); -+ init_completion(&hp->unplug_wait); - - hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu); - if (IS_ERR(hp->sync_tsk)) { -@@ -460,8 +471,7 @@ static int cpu_unplug_begin(unsigned int - * wait for tasks that are going to enter these sections and - * we must not have them block. - */ -- __cpu_unplug_sync(hp); -- -+ wake_up_process(hp->sync_tsk); - return 0; - } - -@@ -971,6 +981,7 @@ static int takedown_cpu(unsigned int cpu - struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); - int err; - -+ __cpu_unplug_wait(cpu); - /* Park the smpboot threads */ - kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread); - smpboot_park_threads(cpu); diff --git a/debian/patches/features/all/rt/kernel-hotplug-restore-original-cpu-mask-oncpu-down.patch b/debian/patches/features/all/rt/kernel-hotplug-restore-original-cpu-mask-oncpu-down.patch deleted file mode 100644 index e89872d44..000000000 --- a/debian/patches/features/all/rt/kernel-hotplug-restore-original-cpu-mask-oncpu-down.patch +++ /dev/null @@ -1,59 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Fri, 14 Jun 2013 17:16:35 +0200 -Subject: kernel/hotplug: restore original cpu mask oncpu/down -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -If a task which is allowed to run only on CPU X puts CPU Y down then it -will be allowed on all CPUs but the on CPU Y after it comes back from -kernel. This patch ensures that we don't lose the initial setting unless -the CPU the task is running is going down. - - -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/cpu.c | 13 ++++++++++++- - 1 file changed, 12 insertions(+), 1 deletion(-) - ---- a/kernel/cpu.c -+++ b/kernel/cpu.c -@@ -1065,6 +1065,7 @@ static int __ref _cpu_down(unsigned int - int prev_state, ret = 0; - int mycpu; - cpumask_var_t cpumask; -+ cpumask_var_t cpumask_org; - - if (num_online_cpus() == 1) - return -EBUSY; -@@ -1075,6 +1076,12 @@ static int __ref _cpu_down(unsigned int - /* Move the downtaker off the unplug cpu */ - if (!alloc_cpumask_var(&cpumask, GFP_KERNEL)) - return -ENOMEM; -+ if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL)) { -+ free_cpumask_var(cpumask); -+ return -ENOMEM; -+ } -+ -+ cpumask_copy(cpumask_org, ¤t->cpus_mask); - cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu)); - set_cpus_allowed_ptr(current, cpumask); - free_cpumask_var(cpumask); -@@ -1083,7 +1090,8 @@ static int __ref _cpu_down(unsigned int - if (mycpu == cpu) { - printk(KERN_ERR "Yuck! Still on unplug CPU\n!"); - migrate_enable(); -- return -EBUSY; -+ ret = -EBUSY; -+ goto restore_cpus; - } - - cpu_hotplug_begin(); -@@ -1133,6 +1141,9 @@ static int __ref _cpu_down(unsigned int - out_cancel: - cpu_hotplug_done(); - migrate_enable(); -+restore_cpus: -+ set_cpus_allowed_ptr(current, cpumask_org); -+ free_cpumask_var(cpumask_org); - return ret; - } - diff --git a/debian/patches/features/all/rt/kernel-locking-use-an-exclusive-wait_q-for-sleeper.patch b/debian/patches/features/all/rt/kernel-locking-use-an-exclusive-wait_q-for-sleeper.patch deleted file mode 100644 index 05a371c63..000000000 --- a/debian/patches/features/all/rt/kernel-locking-use-an-exclusive-wait_q-for-sleeper.patch +++ /dev/null @@ -1,142 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Thu, 22 Jun 2017 17:53:34 +0200 -Subject: [PATCH] kernel/locking: use an exclusive wait_q for sleepers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -If a task is queued as a sleeper for a wakeup and never goes to -schedule() (because it just obtained the lock) then it will receive a -spurious wake up which is not "bad", it is considered. Until that wake -up happens this task can no be enqueued for any wake ups handled by the -WAKE_Q infrastructure (because a task can only be enqueued once). This -wouldn't be bad if we would use the same wakeup mechanism for the wake -up of sleepers as we do for "normal" wake ups. But we don't… - -So. - T1 T2 T3 - spin_lock(x) spin_unlock(x); - wake_q_add_sleeper(q1, T1) - spin_unlock(x) - set_state(TASK_INTERRUPTIBLE) - if (!condition) - schedule() - condition = true - wake_q_add(q2, T1) - // T1 not added, still enqueued - wake_up_q(q2) - wake_up_q_sleeper(q1) - // T1 not woken up, wrong task state - -In order to solve this race this patch adds a wake_q_node for the -sleeper case. - -Reported-by: Mike Galbraith -Cc: stable-rt@vger.kernel.org -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/sched.h | 1 + - include/linux/sched/wake_q.h | 16 ++++++++++++++-- - kernel/fork.c | 1 + - kernel/locking/rtmutex.c | 2 +- - kernel/sched/core.c | 21 ++++++++++++++++----- - 5 files changed, 33 insertions(+), 8 deletions(-) - ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -800,6 +800,7 @@ struct task_struct { - raw_spinlock_t pi_lock; - - struct wake_q_node wake_q; -+ struct wake_q_node wake_q_sleeper; - - #ifdef CONFIG_RT_MUTEXES - /* PI waiters blocked on a rt_mutex held by this task: */ ---- a/include/linux/sched/wake_q.h -+++ b/include/linux/sched/wake_q.h -@@ -46,8 +46,20 @@ static inline void wake_q_init(struct wa - head->lastp = &head->first; - } - --extern void wake_q_add(struct wake_q_head *head, -- struct task_struct *task); -+extern void __wake_q_add(struct wake_q_head *head, -+ struct task_struct *task, bool sleeper); -+static inline void wake_q_add(struct wake_q_head *head, -+ struct task_struct *task) -+{ -+ __wake_q_add(head, task, false); -+} -+ -+static inline void wake_q_add_sleeper(struct wake_q_head *head, -+ struct task_struct *task) -+{ -+ __wake_q_add(head, task, true); -+} -+ - extern void __wake_up_q(struct wake_q_head *head, bool sleeper); - static inline void wake_up_q(struct wake_q_head *head) - { ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -575,6 +575,7 @@ static struct task_struct *dup_task_stru - tsk->splice_pipe = NULL; - tsk->task_frag.page = NULL; - tsk->wake_q.next = NULL; -+ tsk->wake_q_sleeper.next = NULL; - - account_kernel_stack(tsk, 1); - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1463,7 +1463,7 @@ static void mark_wakeup_next_waiter(stru - */ - preempt_disable(); - if (waiter->savestate) -- wake_q_add(wake_sleeper_q, waiter->task); -+ wake_q_add_sleeper(wake_sleeper_q, waiter->task); - else - wake_q_add(wake_q, waiter->task); - raw_spin_unlock(¤t->pi_lock); ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -437,9 +437,15 @@ static bool set_nr_if_polling(struct tas - #endif - #endif - --void wake_q_add(struct wake_q_head *head, struct task_struct *task) -+void __wake_q_add(struct wake_q_head *head, struct task_struct *task, -+ bool sleeper) - { -- struct wake_q_node *node = &task->wake_q; -+ struct wake_q_node *node; -+ -+ if (sleeper) -+ node = &task->wake_q_sleeper; -+ else -+ node = &task->wake_q; - - /* - * Atomically grab the task, if ->wake_q is !nil already it means -@@ -468,12 +474,17 @@ void __wake_up_q(struct wake_q_head *hea - while (node != WAKE_Q_TAIL) { - struct task_struct *task; - -- task = container_of(node, struct task_struct, wake_q); -+ if (sleeper) -+ task = container_of(node, struct task_struct, wake_q_sleeper); -+ else -+ task = container_of(node, struct task_struct, wake_q); - BUG_ON(!task); - /* Task can safely be re-inserted now: */ - node = node->next; -- task->wake_q.next = NULL; -- -+ if (sleeper) -+ task->wake_q_sleeper.next = NULL; -+ else -+ task->wake_q.next = NULL; - /* - * wake_up_process() implies a wmb() to pair with the queueing - * in wake_q_add() so as not to miss wakeups. diff --git a/debian/patches/features/all/rt/kernel-perf-mark-perf_cpu_context-s-timer-as-irqsafe.patch b/debian/patches/features/all/rt/kernel-perf-mark-perf_cpu_context-s-timer-as-irqsafe.patch deleted file mode 100644 index ea918fd4f..000000000 --- a/debian/patches/features/all/rt/kernel-perf-mark-perf_cpu_context-s-timer-as-irqsafe.patch +++ /dev/null @@ -1,25 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Thu, 4 Feb 2016 16:38:10 +0100 -Subject: [PATCH] kernel/perf: mark perf_cpu_context's timer as irqsafe -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Otherwise we get a WARN_ON() backtrace and some events are reported as -"not counted". - -Cc: stable-rt@vger.kernel.org -Reported-by: Yang Shi -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/events/core.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/kernel/events/core.c -+++ b/kernel/events/core.c -@@ -1043,6 +1043,7 @@ static void __perf_mux_hrtimer_init(stru - raw_spin_lock_init(&cpuctx->hrtimer_lock); - hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); - timer->function = perf_mux_hrtimer_handler; -+ timer->irqsafe = 1; - } - - static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx) diff --git a/debian/patches/features/all/rt/kernel-printk-Don-t-try-to-print-from-IRQ-NMI-region.patch b/debian/patches/features/all/rt/kernel-printk-Don-t-try-to-print-from-IRQ-NMI-region.patch index a635e502f..cf7578530 100644 --- a/debian/patches/features/all/rt/kernel-printk-Don-t-try-to-print-from-IRQ-NMI-region.patch +++ b/debian/patches/features/all/rt/kernel-printk-Don-t-try-to-print-from-IRQ-NMI-region.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 19 May 2016 17:45:27 +0200 Subject: [PATCH] kernel/printk: Don't try to print from IRQ/NMI region -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz On -RT we try to acquire sleeping locks which might lead to warnings from lockdep or a warn_on() from spin_try_lock() (which is a rtmutex on @@ -16,7 +16,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -1630,6 +1630,11 @@ static void call_console_drivers(const c +@@ -1617,6 +1617,11 @@ static void call_console_drivers(const c if (!console_drivers) return; @@ -28,7 +28,7 @@ Signed-off-by: Sebastian Andrzej Siewior migrate_disable(); for_each_console(con) { if (exclusive_console && con != exclusive_console) -@@ -2357,6 +2362,11 @@ void console_unblank(void) +@@ -2349,6 +2354,11 @@ void console_unblank(void) { struct console *c; diff --git a/debian/patches/features/all/rt/kernel-sched-Provide-a-pointer-to-the-valid-CPU-mask.patch b/debian/patches/features/all/rt/kernel-sched-Provide-a-pointer-to-the-valid-CPU-mask.patch index d46c35fae..a16385bbd 100644 --- a/debian/patches/features/all/rt/kernel-sched-Provide-a-pointer-to-the-valid-CPU-mask.patch +++ b/debian/patches/features/all/rt/kernel-sched-Provide-a-pointer-to-the-valid-CPU-mask.patch @@ -4,7 +4,7 @@ Subject: [PATCH] kernel: sched: Provide a pointer to the valid CPU mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz In commit 4b53a3412d66 ("sched/core: Remove the tsk_nr_cpus_allowed() wrapper") the tsk_nr_cpus_allowed() wrapper was removed. There was not @@ -71,7 +71,7 @@ Signed-off-by: Sebastian Andrzej Siewior include/linux/init_task.h | 3 +- include/linux/sched.h | 5 ++- kernel/cgroup/cpuset.c | 2 - - kernel/fork.c | 2 + + kernel/fork.c | 3 +- kernel/sched/core.c | 42 ++++++++++++++--------------- kernel/sched/cpudeadline.c | 4 +- kernel/sched/cpupri.c | 4 +- @@ -81,7 +81,7 @@ Signed-off-by: Sebastian Andrzej Siewior kernel/trace/trace_hwlat.c | 2 - lib/smp_processor_id.c | 2 - samples/trace_events/trace-events-sample.c | 2 - - 24 files changed, 78 insertions(+), 76 deletions(-) + 24 files changed, 78 insertions(+), 77 deletions(-) --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -116,7 +116,7 @@ Signed-off-by: Sebastian Andrzej Siewior } while(0) --- a/arch/mips/kernel/mips-mt-fpaff.c +++ b/arch/mips/kernel/mips-mt-fpaff.c -@@ -176,7 +176,7 @@ asmlinkage long mipsmt_sys_sched_getaffi +@@ -177,7 +177,7 @@ asmlinkage long mipsmt_sys_sched_getaffi if (retval) goto out_unlock; @@ -127,7 +127,7 @@ Signed-off-by: Sebastian Andrzej Siewior out_unlock: --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c -@@ -1193,12 +1193,12 @@ static void mt_ase_fp_affinity(void) +@@ -1192,12 +1192,12 @@ static void mt_ase_fp_affinity(void) * restricted the allowed set to exclude any CPUs with FPUs, * we'll skip the procedure. */ @@ -275,7 +275,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!find_hca(cpu, &unit) && unit >= 0) --- a/fs/proc/array.c +++ b/fs/proc/array.c -@@ -364,9 +364,9 @@ static inline void task_context_switch_c +@@ -361,9 +361,9 @@ static inline void task_context_switch_c static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) { seq_printf(m, "Cpus_allowed:\t%*pb\n", @@ -289,7 +289,7 @@ Signed-off-by: Sebastian Andrzej Siewior int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, --- a/include/linux/init_task.h +++ b/include/linux/init_task.h -@@ -226,7 +226,8 @@ extern struct cred init_cred; +@@ -234,7 +234,8 @@ extern struct cred init_cred; .static_prio = MAX_PRIO-20, \ .normal_prio = MAX_PRIO-20, \ .policy = SCHED_NORMAL, \ @@ -301,7 +301,7 @@ Signed-off-by: Sebastian Andrzej Siewior .active_mm = &init_mm, \ --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -535,7 +535,8 @@ struct task_struct { +@@ -578,7 +578,8 @@ struct task_struct { unsigned int policy; int nr_cpus_allowed; @@ -311,7 +311,7 @@ Signed-off-by: Sebastian Andrzej Siewior #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; -@@ -1224,7 +1225,7 @@ extern struct pid *cad_pid; +@@ -1315,7 +1316,7 @@ extern struct pid *cad_pid; #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ @@ -322,7 +322,7 @@ Signed-off-by: Sebastian Andrzej Siewior #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c -@@ -2092,7 +2092,7 @@ static void cpuset_fork(struct task_stru +@@ -2094,7 +2094,7 @@ static void cpuset_fork(struct task_stru if (task_css_is_root(task, cpuset_cgrp_id)) return; @@ -333,10 +333,11 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -539,6 +539,8 @@ static struct task_struct *dup_task_stru - tsk->stack_canary = get_random_long(); +@@ -564,7 +564,8 @@ static struct task_struct *dup_task_stru + #ifdef CONFIG_CC_STACKPROTECTOR + tsk->stack_canary = get_random_canary(); #endif - +- + if (orig->cpus_ptr == &orig->cpus_mask) + tsk->cpus_ptr = &tsk->cpus_mask; /* @@ -344,16 +345,16 @@ Signed-off-by: Sebastian Andrzej Siewior * parent) --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -986,7 +986,7 @@ static struct rq *__migrate_task(struct - return rq; +@@ -960,7 +960,7 @@ static struct rq *__migrate_task(struct + } /* Affinity changed (again). */ - if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) + if (!cpumask_test_cpu(dest_cpu, p->cpus_ptr)) return rq; - rq = move_queued_task(rq, p, dest_cpu); -@@ -1012,7 +1012,7 @@ static int migration_cpu_stop(void *data + update_rq_clock(rq); +@@ -988,7 +988,7 @@ static int migration_cpu_stop(void *data local_irq_disable(); /* * We need to explicitly wake pending tasks before running @@ -362,7 +363,7 @@ Signed-off-by: Sebastian Andrzej Siewior * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. */ sched_ttwu_pending(); -@@ -1043,7 +1043,7 @@ static int migration_cpu_stop(void *data +@@ -1019,7 +1019,7 @@ static int migration_cpu_stop(void *data */ void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) { @@ -371,7 +372,7 @@ Signed-off-by: Sebastian Andrzej Siewior p->nr_cpus_allowed = cpumask_weight(new_mask); } -@@ -1113,7 +1113,7 @@ static int __set_cpus_allowed_ptr(struct +@@ -1089,7 +1089,7 @@ static int __set_cpus_allowed_ptr(struct goto out; } @@ -380,7 +381,7 @@ Signed-off-by: Sebastian Andrzej Siewior goto out; if (!cpumask_intersects(new_mask, cpu_valid_mask)) { -@@ -1264,10 +1264,10 @@ static int migrate_swap_stop(void *data) +@@ -1250,10 +1250,10 @@ static int migrate_swap_stop(void *data) if (task_cpu(arg->src_task) != arg->src_cpu) goto unlock; @@ -393,7 +394,7 @@ Signed-off-by: Sebastian Andrzej Siewior goto unlock; __migrate_swap_task(arg->src_task, arg->dst_cpu); -@@ -1308,10 +1308,10 @@ int migrate_swap(struct task_struct *cur +@@ -1294,10 +1294,10 @@ int migrate_swap(struct task_struct *cur if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu)) goto out; @@ -406,7 +407,7 @@ Signed-off-by: Sebastian Andrzej Siewior goto out; trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu); -@@ -1455,7 +1455,7 @@ void kick_process(struct task_struct *p) +@@ -1441,7 +1441,7 @@ void kick_process(struct task_struct *p) EXPORT_SYMBOL_GPL(kick_process); /* @@ -415,7 +416,7 @@ Signed-off-by: Sebastian Andrzej Siewior * * A few notes on cpu_active vs cpu_online: * -@@ -1495,14 +1495,14 @@ static int select_fallback_rq(int cpu, s +@@ -1481,14 +1481,14 @@ static int select_fallback_rq(int cpu, s for_each_cpu(dest_cpu, nodemask) { if (!cpu_active(dest_cpu)) continue; @@ -432,7 +433,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu)) continue; if (!cpu_online(dest_cpu)) -@@ -1547,7 +1547,7 @@ static int select_fallback_rq(int cpu, s +@@ -1533,7 +1533,7 @@ static int select_fallback_rq(int cpu, s } /* @@ -441,7 +442,7 @@ Signed-off-by: Sebastian Andrzej Siewior */ static inline int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) -@@ -1557,11 +1557,11 @@ int select_task_rq(struct task_struct *p +@@ -1543,11 +1543,11 @@ int select_task_rq(struct task_struct *p if (p->nr_cpus_allowed > 1) cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); else @@ -455,7 +456,7 @@ Signed-off-by: Sebastian Andrzej Siewior * CPU. * * Since this is common to all placement strategies, this lives here. -@@ -1569,7 +1569,7 @@ int select_task_rq(struct task_struct *p +@@ -1555,7 +1555,7 @@ int select_task_rq(struct task_struct *p * [ this allows ->select_task() to simply return task_cpu(p) and * not worry about this generic constraint ] */ @@ -464,7 +465,7 @@ Signed-off-by: Sebastian Andrzej Siewior !cpu_online(cpu))) cpu = select_fallback_rq(task_cpu(p), p); -@@ -2543,7 +2543,7 @@ void wake_up_new_task(struct task_struct +@@ -2445,7 +2445,7 @@ void wake_up_new_task(struct task_struct #ifdef CONFIG_SMP /* * Fork balancing, do it here and not earlier because: @@ -473,7 +474,7 @@ Signed-off-by: Sebastian Andrzej Siewior * - any previously selected CPU might disappear through hotplug * * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, -@@ -4315,7 +4315,7 @@ static int __sched_setscheduler(struct t +@@ -4162,7 +4162,7 @@ static int __sched_setscheduler(struct t * the entire root_domain to become SCHED_DEADLINE. We * will also fail if there's no bandwidth available. */ @@ -482,7 +483,7 @@ Signed-off-by: Sebastian Andrzej Siewior rq->rd->dl_bw.bw == 0) { task_rq_unlock(rq, p, &rf); return -EPERM; -@@ -4909,7 +4909,7 @@ long sched_getaffinity(pid_t pid, struct +@@ -4756,7 +4756,7 @@ long sched_getaffinity(pid_t pid, struct goto out_unlock; raw_spin_lock_irqsave(&p->pi_lock, flags); @@ -491,7 +492,7 @@ Signed-off-by: Sebastian Andrzej Siewior raw_spin_unlock_irqrestore(&p->pi_lock, flags); out_unlock: -@@ -5469,7 +5469,7 @@ int task_can_attach(struct task_struct * +@@ -5321,7 +5321,7 @@ int task_can_attach(struct task_struct * * allowed nodes is unnecessary. Thus, cpusets are not * applicable for such threads. This prevents checking for * success of set_cpus_allowed_ptr() on all attached tasks @@ -500,7 +501,7 @@ Signed-off-by: Sebastian Andrzej Siewior */ if (p->flags & PF_NO_SETAFFINITY) { ret = -EINVAL; -@@ -5525,7 +5525,7 @@ int migrate_task_to(struct task_struct * +@@ -5348,7 +5348,7 @@ int migrate_task_to(struct task_struct * if (curr_cpu == target_cpu) return 0; @@ -509,8 +510,8 @@ Signed-off-by: Sebastian Andrzej Siewior return -EINVAL; /* TODO: This is not properly updating schedstats */ -@@ -5665,7 +5665,7 @@ static void migrate_tasks(struct rq *dea - next->sched_class->put_prev_task(rq, next); +@@ -5485,7 +5485,7 @@ static void migrate_tasks(struct rq *dea + put_prev_task(rq, next); /* - * Rules for changing task_struct::cpus_allowed are holding @@ -520,19 +521,22 @@ Signed-off-by: Sebastian Andrzej Siewior * --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c -@@ -128,10 +128,10 @@ int cpudl_find(struct cpudl *cp, struct +@@ -127,13 +127,13 @@ int cpudl_find(struct cpudl *cp, struct const struct sched_dl_entity *dl_se = &p->dl; if (later_mask && - cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) { + cpumask_and(later_mask, cp->free_cpus, p->cpus_ptr)) { - best_cpu = cpumask_any(later_mask); - goto out; -- } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) && -+ } else if (cpumask_test_cpu(cpudl_maximum(cp), p->cpus_ptr) && - dl_time_before(dl_se->deadline, cp->elements[0].dl)) { - best_cpu = cpudl_maximum(cp); - if (later_mask) + return 1; + } else { + int best_cpu = cpudl_maximum(cp); + WARN_ON(best_cpu != -1 && !cpu_present(best_cpu)); + +- if (cpumask_test_cpu(best_cpu, &p->cpus_allowed) && ++ if (cpumask_test_cpu(best_cpu, p->cpus_ptr) && + dl_time_before(dl_se->deadline, cp->elements[0].dl)) { + if (later_mask) + cpumask_set_cpu(best_cpu, later_mask); --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c @@ -103,11 +103,11 @@ int cpupri_find(struct cpupri *cp, struc @@ -551,7 +555,7 @@ Signed-off-by: Sebastian Andrzej Siewior * We have to ensure that we have at least one bit --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c -@@ -252,7 +252,7 @@ static struct rq *dl_task_offline_migrat +@@ -504,7 +504,7 @@ static struct rq *dl_task_offline_migrat * If we cannot preempt any rq, fall back to pick any * online cpu. */ @@ -560,7 +564,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (cpu >= nr_cpu_ids) { /* * Fail to find any suitable cpu. -@@ -1286,7 +1286,7 @@ static void set_curr_task_dl(struct rq * +@@ -1749,7 +1749,7 @@ static void set_curr_task_dl(struct rq * static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && @@ -569,7 +573,7 @@ Signed-off-by: Sebastian Andrzej Siewior return 1; return 0; } -@@ -1435,7 +1435,7 @@ static struct rq *find_lock_later_rq(str +@@ -1899,7 +1899,7 @@ static struct rq *find_lock_later_rq(str /* Retry if something changed. */ if (double_lock_balance(rq, later_rq)) { if (unlikely(task_rq(task) != rq || @@ -580,7 +584,7 @@ Signed-off-by: Sebastian Andrzej Siewior !task_on_rq_queued(task))) { --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c -@@ -1553,7 +1553,7 @@ static void task_numa_compare(struct tas +@@ -1596,7 +1596,7 @@ static void task_numa_compare(struct tas */ if (cur) { /* Skip this swap candidate if cannot move to the source cpu */ @@ -589,7 +593,7 @@ Signed-off-by: Sebastian Andrzej Siewior goto unlock; /* -@@ -1663,7 +1663,7 @@ static void task_numa_find_cpu(struct ta +@@ -1706,7 +1706,7 @@ static void task_numa_find_cpu(struct ta for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) { /* Skip this CPU if the source task cannot migrate */ @@ -598,34 +602,34 @@ Signed-off-by: Sebastian Andrzej Siewior continue; env->dst_cpu = cpu; -@@ -5460,7 +5460,7 @@ find_idlest_group(struct sched_domain *s +@@ -5475,7 +5475,7 @@ find_idlest_group(struct sched_domain *s /* Skip over this group if it has no CPUs allowed */ - if (!cpumask_intersects(sched_group_cpus(group), + if (!cpumask_intersects(sched_group_span(group), - &p->cpus_allowed)) + p->cpus_ptr)) continue; local_group = cpumask_test_cpu(this_cpu, -@@ -5580,7 +5580,7 @@ find_idlest_cpu(struct sched_group *grou - return cpumask_first(sched_group_cpus(group)); +@@ -5595,7 +5595,7 @@ find_idlest_cpu(struct sched_group *grou + return cpumask_first(sched_group_span(group)); /* Traverse only the allowed CPUs */ -- for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) { -+ for_each_cpu_and(i, sched_group_cpus(group), p->cpus_ptr) { +- for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) { ++ for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) { if (idle_cpu(i)) { struct rq *rq = cpu_rq(i); struct cpuidle_state *idle = idle_get_state(rq); -@@ -5719,7 +5719,7 @@ static int select_idle_core(struct task_ +@@ -5697,7 +5697,7 @@ static int select_idle_core(struct task_ if (!test_idle_cores(target, false)) return -1; - cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed); + cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); - for_each_cpu_wrap(core, cpus, target, wrap) { + for_each_cpu_wrap(core, cpus, target) { bool idle = true; -@@ -5753,7 +5753,7 @@ static int select_idle_smt(struct task_s +@@ -5731,7 +5731,7 @@ static int select_idle_smt(struct task_s return -1; for_each_cpu(cpu, cpu_smt_mask(target)) { @@ -634,16 +638,16 @@ Signed-off-by: Sebastian Andrzej Siewior continue; if (idle_cpu(cpu)) return cpu; -@@ -5805,7 +5805,7 @@ static int select_idle_cpu(struct task_s - time = local_clock(); - - for_each_cpu_wrap(cpu, sched_domain_span(sd), target, wrap) { +@@ -5794,7 +5794,7 @@ static int select_idle_cpu(struct task_s + for_each_cpu_wrap(cpu, sched_domain_span(sd), target) { + if (!--nr) + return -1; - if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) continue; if (idle_cpu(cpu)) break; -@@ -5960,7 +5960,7 @@ select_task_rq_fair(struct task_struct * +@@ -5949,7 +5949,7 @@ select_task_rq_fair(struct task_struct * if (sd_flag & SD_BALANCE_WAKE) { record_wakee(p); want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) @@ -652,7 +656,7 @@ Signed-off-by: Sebastian Andrzej Siewior } rcu_read_lock(); -@@ -6693,14 +6693,14 @@ int can_migrate_task(struct task_struct +@@ -6698,14 +6698,14 @@ int can_migrate_task(struct task_struct /* * We do not migrate tasks that are: * 1) throttled_lb_pair, or @@ -669,7 +673,7 @@ Signed-off-by: Sebastian Andrzej Siewior int cpu; schedstat_inc(p->se.statistics.nr_failed_migrations_affine); -@@ -6720,7 +6720,7 @@ int can_migrate_task(struct task_struct +@@ -6725,7 +6725,7 @@ int can_migrate_task(struct task_struct /* Prevent to re-select dst_cpu via env's cpus */ for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) { @@ -678,7 +682,7 @@ Signed-off-by: Sebastian Andrzej Siewior env->flags |= LBF_DST_PINNED; env->new_dst_cpu = cpu; break; -@@ -7254,7 +7254,7 @@ check_cpu_capacity(struct rq *rq, struct +@@ -7294,7 +7294,7 @@ check_cpu_capacity(struct rq *rq, struct /* * Group imbalance indicates (and tries to solve) the problem where balancing @@ -687,7 +691,7 @@ Signed-off-by: Sebastian Andrzej Siewior * * Imagine a situation of two groups of 4 cpus each and 4 tasks each with a * cpumask covering 1 cpu of the first group and 3 cpus of the second group. -@@ -7828,7 +7828,7 @@ static struct sched_group *find_busiest_ +@@ -7870,7 +7870,7 @@ static struct sched_group *find_busiest_ /* * If the busiest group is imbalanced the below checks don't * work because they assume all things are equal, which typically @@ -696,7 +700,7 @@ Signed-off-by: Sebastian Andrzej Siewior */ if (busiest->group_type == group_imbalanced) goto force_balance; -@@ -8213,7 +8213,7 @@ static int load_balance(int this_cpu, st +@@ -8262,7 +8262,7 @@ static int load_balance(int this_cpu, st * if the curr task on busiest cpu can't be * moved to this_cpu */ @@ -707,7 +711,7 @@ Signed-off-by: Sebastian Andrzej Siewior env.flags |= LBF_ALL_PINNED; --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c -@@ -1591,7 +1591,7 @@ static void put_prev_task_rt(struct rq * +@@ -1603,7 +1603,7 @@ static void put_prev_task_rt(struct rq * static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && @@ -716,7 +720,7 @@ Signed-off-by: Sebastian Andrzej Siewior return 1; return 0; } -@@ -1726,7 +1726,7 @@ static struct rq *find_lock_lowest_rq(st +@@ -1738,7 +1738,7 @@ static struct rq *find_lock_lowest_rq(st * Also make sure that it wasn't scheduled on its rq. */ if (unlikely(task_rq(task) != rq || @@ -738,7 +742,7 @@ Signed-off-by: Sebastian Andrzej Siewior get_online_cpus(); --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c -@@ -22,7 +22,7 @@ notrace static unsigned int check_preemp +@@ -23,7 +23,7 @@ notrace static unsigned int check_preemp * Kernel threads bound to a single CPU can safely use * smp_processor_id(): */ diff --git a/debian/patches/features/all/rt/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch b/debian/patches/features/all/rt/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch index 98789a11c..97b86122b 100644 --- a/debian/patches/features/all/rt/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch +++ b/debian/patches/features/all/rt/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch @@ -2,7 +2,7 @@ From: Sebastian Andrzej Siewior Date: Mon, 21 Nov 2016 19:31:08 +0100 Subject: [PATCH] kernel/sched: move stack + kprobe clean up to __put_task_struct() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz There is no need to free the stack before the task struct. This also comes handy on -RT because we can't free memory in preempt disabled @@ -17,15 +17,15 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -87,6 +87,7 @@ - #include - #include - #include +@@ -40,6 +40,7 @@ + #include + #include + #include +#include - - #include - #include -@@ -398,6 +399,15 @@ void __put_task_struct(struct task_struc + #include + #include + #include +@@ -417,6 +418,15 @@ void __put_task_struct(struct task_struc WARN_ON(atomic_read(&tsk->usage)); WARN_ON(tsk == current); @@ -43,7 +43,7 @@ Signed-off-by: Sebastian Andrzej Siewior security_task_free(tsk); --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -2795,15 +2795,6 @@ static struct rq *finish_task_switch(str +@@ -2706,15 +2706,6 @@ static struct rq *finish_task_switch(str if (prev->sched_class->task_dead) prev->sched_class->task_dead(prev); diff --git a/debian/patches/features/all/rt/kernel-softirq-unlock-with-irqs-on.patch b/debian/patches/features/all/rt/kernel-softirq-unlock-with-irqs-on.patch index 8e516b138..c872bfd29 100644 --- a/debian/patches/features/all/rt/kernel-softirq-unlock-with-irqs-on.patch +++ b/debian/patches/features/all/rt/kernel-softirq-unlock-with-irqs-on.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Tue, 9 Feb 2016 18:17:18 +0100 Subject: kernel: softirq: unlock with irqs on -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz We unlock the lock while the interrupts are off. This isn't a problem now but will get because the migrate_disable() + enable are not diff --git a/debian/patches/features/all/rt/kgb-serial-hackaround.patch b/debian/patches/features/all/rt/kgb-serial-hackaround.patch index 4af829437..bf73831c8 100644 --- a/debian/patches/features/all/rt/kgb-serial-hackaround.patch +++ b/debian/patches/features/all/rt/kgb-serial-hackaround.patch @@ -1,7 +1,7 @@ From: Jason Wessel Date: Thu, 28 Jul 2011 12:42:23 -0500 Subject: kgdb/serial: Short term workaround -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz On 07/27/2011 04:37 PM, Thomas Gleixner wrote: > - KGDB (not yet disabled) is reportedly unusable on -rt right now due @@ -33,8 +33,8 @@ Jason. +#include #include #include - #include -@@ -3181,6 +3182,8 @@ void serial8250_console_write(struct uar + #include +@@ -3216,6 +3217,8 @@ void serial8250_console_write(struct uar if (port->sysrq || oops_in_progress) locked = 0; diff --git a/debian/patches/features/all/rt/leds-trigger-disable-CPU-trigger-on-RT.patch b/debian/patches/features/all/rt/leds-trigger-disable-CPU-trigger-on-RT.patch index c574daf97..f14d24545 100644 --- a/debian/patches/features/all/rt/leds-trigger-disable-CPU-trigger-on-RT.patch +++ b/debian/patches/features/all/rt/leds-trigger-disable-CPU-trigger-on-RT.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 23 Jan 2014 14:45:59 +0100 Subject: leds: trigger: disable CPU trigger on -RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz as it triggers: |CPU: 0 PID: 0 Comm: swapper Not tainted 3.12.8-rt10 #141 diff --git a/debian/patches/features/all/rt/list_bl-fixup-bogus-lockdep-warning.patch b/debian/patches/features/all/rt/list_bl-fixup-bogus-lockdep-warning.patch index 2e901257a..944bbac4e 100644 --- a/debian/patches/features/all/rt/list_bl-fixup-bogus-lockdep-warning.patch +++ b/debian/patches/features/all/rt/list_bl-fixup-bogus-lockdep-warning.patch @@ -1,7 +1,7 @@ From: Josh Cartwright Date: Thu, 31 Mar 2016 00:04:25 -0500 Subject: [PATCH] list_bl: fixup bogus lockdep warning -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz At first glance, the use of 'static inline' seems appropriate for INIT_HLIST_BL_HEAD(). @@ -75,7 +75,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/linux/list_bl.h +++ b/include/linux/list_bl.h -@@ -42,13 +42,15 @@ struct hlist_bl_node { +@@ -43,13 +43,15 @@ struct hlist_bl_node { struct hlist_bl_node *next, **pprev; }; diff --git a/debian/patches/features/all/rt/list_bl.h-make-list-head-locking-RT-safe.patch b/debian/patches/features/all/rt/list_bl.h-make-list-head-locking-RT-safe.patch index d6094c276..576229479 100644 --- a/debian/patches/features/all/rt/list_bl.h-make-list-head-locking-RT-safe.patch +++ b/debian/patches/features/all/rt/list_bl.h-make-list-head-locking-RT-safe.patch @@ -1,7 +1,7 @@ From: Paul Gortmaker Date: Fri, 21 Jun 2013 15:07:25 -0400 Subject: list_bl: Make list head locking RT safe -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz As per changes in include/linux/jbd_common.h for avoiding the bit_spin_locks on RT ("fs: jbd/jbd2: Make state lock and journal @@ -53,7 +53,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/linux/list_bl.h +++ b/include/linux/list_bl.h -@@ -2,6 +2,7 @@ +@@ -3,6 +3,7 @@ #define _LINUX_LIST_BL_H #include @@ -61,7 +61,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include /* -@@ -32,13 +33,22 @@ +@@ -33,13 +34,22 @@ struct hlist_bl_head { struct hlist_bl_node *first; @@ -86,7 +86,7 @@ Signed-off-by: Sebastian Andrzej Siewior static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h) { -@@ -118,12 +128,26 @@ static inline void hlist_bl_del_init(str +@@ -119,12 +129,26 @@ static inline void hlist_bl_del_init(str static inline void hlist_bl_lock(struct hlist_bl_head *b) { diff --git a/debian/patches/features/all/rt/local-irq-rt-depending-variants.patch b/debian/patches/features/all/rt/local-irq-rt-depending-variants.patch index 58dfa95b8..a8a29e4f1 100644 --- a/debian/patches/features/all/rt/local-irq-rt-depending-variants.patch +++ b/debian/patches/features/all/rt/local-irq-rt-depending-variants.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Tue, 21 Jul 2009 22:34:14 +0200 Subject: rt: local_irq_* variants depending on RT/!RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Add local_irq_*_(no)rt variant which are mainly used to break interrupt disabled sections on PREEMPT_RT or to explicitely disable @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h -@@ -196,7 +196,7 @@ extern void devm_free_irq(struct device +@@ -207,7 +207,7 @@ extern void devm_free_irq(struct device #ifdef CONFIG_LOCKDEP # define local_irq_enable_in_hardirq() do { } while (0) #else @@ -27,7 +27,7 @@ Signed-off-by: Thomas Gleixner extern void disable_irq_nosync(unsigned int irq); --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h -@@ -148,4 +148,23 @@ +@@ -165,4 +165,23 @@ do { \ #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) diff --git a/debian/patches/features/all/rt/locallock-add-local_lock_on.patch b/debian/patches/features/all/rt/locallock-add-local_lock_on.patch deleted file mode 100644 index accc3ff3f..000000000 --- a/debian/patches/features/all/rt/locallock-add-local_lock_on.patch +++ /dev/null @@ -1,32 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Fri, 27 May 2016 15:11:51 +0200 -Subject: [PATCH] locallock: add local_lock_on() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/locallock.h | 6 ++++++ - 1 file changed, 6 insertions(+) - ---- a/include/linux/locallock.h -+++ b/include/linux/locallock.h -@@ -60,6 +60,9 @@ static inline void __local_lock(struct l - #define local_lock(lvar) \ - do { __local_lock(&get_local_var(lvar)); } while (0) - -+#define local_lock_on(lvar, cpu) \ -+ do { __local_lock(&per_cpu(lvar, cpu)); } while (0) -+ - static inline int __local_trylock(struct local_irq_lock *lv) - { - if (lv->owner != current && spin_trylock_local(&lv->lock)) { -@@ -98,6 +101,9 @@ static inline void __local_unlock(struct - put_local_var(lvar); \ - } while (0) - -+#define local_unlock_on(lvar, cpu) \ -+ do { __local_unlock(&per_cpu(lvar, cpu)); } while (0) -+ - static inline void __local_lock_irq(struct local_irq_lock *lv) - { - spin_lock_irqsave(&lv->lock, lv->flags); diff --git a/debian/patches/features/all/rt/localversion.patch b/debian/patches/features/all/rt/localversion.patch index 1999d5ce9..ffbbdbe3e 100644 --- a/debian/patches/features/all/rt/localversion.patch +++ b/debian/patches/features/all/rt/localversion.patch @@ -1,7 +1,7 @@ Subject: Add localversion for -RT release From: Thomas Gleixner Date: Fri, 08 Jul 2011 20:25:16 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Signed-off-by: Thomas Gleixner --- @@ -11,4 +11,4 @@ Signed-off-by: Thomas Gleixner --- /dev/null +++ b/localversion-rt @@ -0,0 +1 @@ -+-rt7 ++-rt1 diff --git a/debian/patches/features/all/rt/lockdep-Fix-compilation-error-for-CONFIG_MODULES-and.patch b/debian/patches/features/all/rt/lockdep-Fix-compilation-error-for-CONFIG_MODULES-and.patch deleted file mode 100644 index 324e3e3fd..000000000 --- a/debian/patches/features/all/rt/lockdep-Fix-compilation-error-for-CONFIG_MODULES-and.patch +++ /dev/null @@ -1,56 +0,0 @@ -From: Dan Murphy -Date: Fri, 24 Feb 2017 08:41:49 -0600 -Subject: [PATCH] lockdep: Fix compilation error for !CONFIG_MODULES and - !CONFIG_SMP -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -When CONFIG_MODULES is not set then it fails to compile in lockdep: - -|kernel/locking/lockdep.c: In function 'look_up_lock_class': -|kernel/locking/lockdep.c:684:12: error: implicit declaration of function -| '__is_module_percpu_address' [-Werror=implicit-function-declaration] - -If CONFIG_MODULES is set but CONFIG_SMP is not, then it compiles but -fails link at the end: - -|kernel/locking/lockdep.c:684: undefined reference to `__is_module_percpu_address' -|kernel/built-in.o:(.debug_addr+0x1e674): undefined reference to `__is_module_percpu_address' - -This patch adds the function for both cases. - -Signed-off-by: Dan Murphy -[bigeasy: merge the two patches from Dan into one, adapt changelog] -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/module.h | 5 +++++ - kernel/module.c | 5 +++++ - 2 files changed, 10 insertions(+) - ---- a/include/linux/module.h -+++ b/include/linux/module.h -@@ -661,6 +661,11 @@ static inline bool is_module_percpu_addr - return false; - } - -+static inline bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr) -+{ -+ return false; -+} -+ - static inline bool is_module_text_address(unsigned long addr) - { - return false; ---- a/kernel/module.c -+++ b/kernel/module.c -@@ -739,6 +739,11 @@ bool is_module_percpu_address(unsigned l - return false; - } - -+bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr) -+{ -+ return false; -+} -+ - #endif /* CONFIG_SMP */ - - #define MODINFO_ATTR(field) \ diff --git a/debian/patches/features/all/rt/lockdep-Fix-per-cpu-static-objects.patch b/debian/patches/features/all/rt/lockdep-Fix-per-cpu-static-objects.patch deleted file mode 100644 index 0ef2784cc..000000000 --- a/debian/patches/features/all/rt/lockdep-Fix-per-cpu-static-objects.patch +++ /dev/null @@ -1,124 +0,0 @@ -From: Peter Zijlstra -Date: Mon, 20 Mar 2017 12:26:55 +0100 -Subject: [PATCH] lockdep: Fix per-cpu static objects -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Since commit 383776fa7527 ("locking/lockdep: Handle statically initialized -PER_CPU locks properly") we try to collapse per-cpu locks into a single -class by giving them all the same key. For this key we choose the canonical -address of the per-cpu object, which would be the offset into the per-cpu -area. - -This has two problems: - - - there is a case where we run !0 lock->key through static_obj() and - expect this to pass; it doesn't for canonical pointers. - - - 0 is a valid canonical address. - -Cure both issues by redefining the canonical address as the address of the -per-cpu variable on the boot CPU. - -Since I didn't want to rely on CPU0 being the boot-cpu, or even existing at -all, track the boot CPU in a variable. - -Fixes: 383776fa7527 ("locking/lockdep: Handle statically initialized PER_CPU locks properly") -Reported-by: kernel test robot -Signed-off-by: Peter Zijlstra (Intel) -Tested-by: Borislav Petkov -Cc: Sebastian Andrzej Siewior -Cc: linux-mm@kvack.org -Cc: wfg@linux.intel.com -Cc: kernel test robot -Cc: LKP -Link: http://lkml.kernel.org/r/20170320114108.kbvcsuepem45j5cr@hirez.programming.kicks-ass.net -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/smp.h | 12 ++++++++++++ - kernel/cpu.c | 6 ++++++ - kernel/module.c | 6 +++++- - mm/percpu.c | 5 ++++- - 4 files changed, 27 insertions(+), 2 deletions(-) - ---- a/include/linux/smp.h -+++ b/include/linux/smp.h -@@ -120,6 +120,13 @@ extern unsigned int setup_max_cpus; - extern void __init setup_nr_cpu_ids(void); - extern void __init smp_init(void); - -+extern int __boot_cpu_id; -+ -+static inline int get_boot_cpu_id(void) -+{ -+ return __boot_cpu_id; -+} -+ - #else /* !SMP */ - - static inline void smp_send_stop(void) { } -@@ -158,6 +165,11 @@ static inline void smp_init(void) { up_l - static inline void smp_init(void) { } - #endif - -+static inline int get_boot_cpu_id(void) -+{ -+ return 0; -+} -+ - #endif /* !SMP */ - - /* ---- a/kernel/cpu.c -+++ b/kernel/cpu.c -@@ -1125,6 +1125,8 @@ core_initcall(cpu_hotplug_pm_sync_init); - - #endif /* CONFIG_PM_SLEEP_SMP */ - -+int __boot_cpu_id; -+ - #endif /* CONFIG_SMP */ - - /* Boot processor state steps */ -@@ -1815,6 +1817,10 @@ void __init boot_cpu_init(void) - set_cpu_active(cpu, true); - set_cpu_present(cpu, true); - set_cpu_possible(cpu, true); -+ -+#ifdef CONFIG_SMP -+ __boot_cpu_id = cpu; -+#endif - } - - /* ---- a/kernel/module.c -+++ b/kernel/module.c -@@ -682,8 +682,12 @@ bool __is_module_percpu_address(unsigned - void *va = (void *)addr; - - if (va >= start && va < start + mod->percpu_size) { -- if (can_addr) -+ if (can_addr) { - *can_addr = (unsigned long) (va - start); -+ *can_addr += (unsigned long) -+ per_cpu_ptr(mod->percpu, -+ get_boot_cpu_id()); -+ } - preempt_enable(); - return true; - } ---- a/mm/percpu.c -+++ b/mm/percpu.c -@@ -1296,8 +1296,11 @@ bool __is_kernel_percpu_address(unsigned - void *va = (void *)addr; - - if (va >= start && va < start + static_size) { -- if (can_addr) -+ if (can_addr) { - *can_addr = (unsigned long) (va - start); -+ *can_addr += (unsigned long) -+ per_cpu_ptr(base, get_boot_cpu_id()); -+ } - return true; - } - } diff --git a/debian/patches/features/all/rt/lockdep-Handle-statically-initialized-PER_CPU-locks-.patch b/debian/patches/features/all/rt/lockdep-Handle-statically-initialized-PER_CPU-locks-.patch deleted file mode 100644 index 593017059..000000000 --- a/debian/patches/features/all/rt/lockdep-Handle-statically-initialized-PER_CPU-locks-.patch +++ /dev/null @@ -1,269 +0,0 @@ -From: Thomas Gleixner -Date: Fri, 17 Feb 2017 19:44:39 +0100 -Subject: [PATCH] lockdep: Handle statically initialized PER_CPU locks proper -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -If a PER_CPU struct which contains a spin_lock is statically initialized -via: - -DEFINE_PER_CPU(struct foo, bla) = { - .lock = __SPIN_LOCK_UNLOCKED(bla.lock) -}; - -then lockdep assigns a seperate key to each lock because the logic for -assigning a key to statically initialized locks is to use the address as -the key. With per CPU locks the address is obvioulsy different on each CPU. - -That's wrong, because all locks should have the same key. - -To solve this the following modifications are required: - - 1) Extend the is_kernel/module_percpu_addr() functions to hand back the - canonical address of the per CPU address, i.e. the per CPU address - minus the per CPU offset. - - 2) Check the lock address with these functions and if the per CPU check - matches use the returned canonical address as the lock key, so all per - CPU locks have the same key. - - 3) Move the static_obj(key) check into look_up_lock_class() so this check - can be avoided for statically initialized per CPU locks. That's - required because the canonical address fails the static_obj(key) check - for obvious reasons. - -Reported-by: Mike Galbraith -Cc: stable-rt@vger.kernel.org -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/module.h | 1 + - include/linux/percpu.h | 1 + - kernel/locking/lockdep.c | 35 ++++++++++++++++++++++++----------- - kernel/module.c | 31 +++++++++++++++++++------------ - mm/percpu.c | 37 +++++++++++++++++++++++-------------- - 5 files changed, 68 insertions(+), 37 deletions(-) - ---- a/include/linux/module.h -+++ b/include/linux/module.h -@@ -493,6 +493,7 @@ static inline int module_is_live(struct - struct module *__module_text_address(unsigned long addr); - struct module *__module_address(unsigned long addr); - bool is_module_address(unsigned long addr); -+bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr); - bool is_module_percpu_address(unsigned long addr); - bool is_module_text_address(unsigned long addr); - ---- a/include/linux/percpu.h -+++ b/include/linux/percpu.h -@@ -110,6 +110,7 @@ extern int __init pcpu_page_first_chunk( - #endif - - extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); -+extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr); - extern bool is_kernel_percpu_address(unsigned long addr); - - #if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) ---- a/kernel/locking/lockdep.c -+++ b/kernel/locking/lockdep.c -@@ -660,6 +660,7 @@ look_up_lock_class(struct lockdep_map *l - struct lockdep_subclass_key *key; - struct hlist_head *hash_head; - struct lock_class *class; -+ bool is_static = false; - - if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { - debug_locks_off(); -@@ -673,10 +674,23 @@ look_up_lock_class(struct lockdep_map *l - - /* - * Static locks do not have their class-keys yet - for them the key -- * is the lock object itself: -- */ -- if (unlikely(!lock->key)) -- lock->key = (void *)lock; -+ * is the lock object itself. If the lock is in the per cpu area, -+ * the canonical address of the lock (per cpu offset removed) is -+ * used. -+ */ -+ if (unlikely(!lock->key)) { -+ unsigned long can_addr, addr = (unsigned long)lock; -+ -+ if (__is_kernel_percpu_address(addr, &can_addr)) -+ lock->key = (void *)can_addr; -+ else if (__is_module_percpu_address(addr, &can_addr)) -+ lock->key = (void *)can_addr; -+ else if (static_obj(lock)) -+ lock->key = (void *)lock; -+ else -+ return ERR_PTR(-EINVAL); -+ is_static = true; -+ } - - /* - * NOTE: the class-key must be unique. For dynamic locks, a static -@@ -708,7 +722,7 @@ look_up_lock_class(struct lockdep_map *l - } - } - -- return NULL; -+ return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL); - } - - /* -@@ -726,19 +740,18 @@ register_lock_class(struct lockdep_map * - DEBUG_LOCKS_WARN_ON(!irqs_disabled()); - - class = look_up_lock_class(lock, subclass); -- if (likely(class)) -+ if (likely(!IS_ERR_OR_NULL(class))) - goto out_set_class_cache; - - /* - * Debug-check: all keys must be persistent! -- */ -- if (!static_obj(lock->key)) { -+ */ -+ if (IS_ERR(class)) { - debug_locks_off(); - printk("INFO: trying to register non-static key.\n"); - printk("the code is fine but needs lockdep annotation.\n"); - printk("turning off the locking correctness validator.\n"); - dump_stack(); -- - return NULL; - } - -@@ -3419,7 +3432,7 @@ static int match_held_lock(struct held_l - * Clearly if the lock hasn't been acquired _ever_, we're not - * holding it either, so report failure. - */ -- if (!class) -+ if (IS_ERR_OR_NULL(class)) - return 0; - - /* -@@ -4172,7 +4185,7 @@ void lockdep_reset_lock(struct lockdep_m - * If the class exists we look it up and zap it: - */ - class = look_up_lock_class(lock, j); -- if (class) -+ if (!IS_ERR_OR_NULL(class)) - zap_class(class); - } - /* ---- a/kernel/module.c -+++ b/kernel/module.c -@@ -665,16 +665,7 @@ static void percpu_modcopy(struct module - memcpy(per_cpu_ptr(mod->percpu, cpu), from, size); - } - --/** -- * is_module_percpu_address - test whether address is from module static percpu -- * @addr: address to test -- * -- * Test whether @addr belongs to module static percpu area. -- * -- * RETURNS: -- * %true if @addr is from module static percpu area -- */ --bool is_module_percpu_address(unsigned long addr) -+bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr) - { - struct module *mod; - unsigned int cpu; -@@ -688,9 +679,11 @@ bool is_module_percpu_address(unsigned l - continue; - for_each_possible_cpu(cpu) { - void *start = per_cpu_ptr(mod->percpu, cpu); -+ void *va = (void *)addr; - -- if ((void *)addr >= start && -- (void *)addr < start + mod->percpu_size) { -+ if (va >= start && va < start + mod->percpu_size) { -+ if (can_addr) -+ *can_addr = (unsigned long) (va - start); - preempt_enable(); - return true; - } -@@ -701,6 +694,20 @@ bool is_module_percpu_address(unsigned l - return false; - } - -+/** -+ * is_module_percpu_address - test whether address is from module static percpu -+ * @addr: address to test -+ * -+ * Test whether @addr belongs to module static percpu area. -+ * -+ * RETURNS: -+ * %true if @addr is from module static percpu area -+ */ -+bool is_module_percpu_address(unsigned long addr) -+{ -+ return __is_module_percpu_address(addr, NULL); -+} -+ - #else /* ... !CONFIG_SMP */ - - static inline void __percpu *mod_percpu(struct module *mod) ---- a/mm/percpu.c -+++ b/mm/percpu.c -@@ -1284,18 +1284,7 @@ void free_percpu(void __percpu *ptr) - } - EXPORT_SYMBOL_GPL(free_percpu); - --/** -- * is_kernel_percpu_address - test whether address is from static percpu area -- * @addr: address to test -- * -- * Test whether @addr belongs to in-kernel static percpu area. Module -- * static percpu areas are not considered. For those, use -- * is_module_percpu_address(). -- * -- * RETURNS: -- * %true if @addr is from in-kernel static percpu area, %false otherwise. -- */ --bool is_kernel_percpu_address(unsigned long addr) -+bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr) - { - #ifdef CONFIG_SMP - const size_t static_size = __per_cpu_end - __per_cpu_start; -@@ -1304,16 +1293,36 @@ bool is_kernel_percpu_address(unsigned l - - for_each_possible_cpu(cpu) { - void *start = per_cpu_ptr(base, cpu); -+ void *va = (void *)addr; - -- if ((void *)addr >= start && (void *)addr < start + static_size) -+ if (va >= start && va < start + static_size) { -+ if (can_addr) -+ *can_addr = (unsigned long) (va - start); - return true; -- } -+ } -+ } - #endif - /* on UP, can't distinguish from other static vars, always false */ - return false; - } - - /** -+ * is_kernel_percpu_address - test whether address is from static percpu area -+ * @addr: address to test -+ * -+ * Test whether @addr belongs to in-kernel static percpu area. Module -+ * static percpu areas are not considered. For those, use -+ * is_module_percpu_address(). -+ * -+ * RETURNS: -+ * %true if @addr is from in-kernel static percpu area, %false otherwise. -+ */ -+bool is_kernel_percpu_address(unsigned long addr) -+{ -+ return __is_kernel_percpu_address(addr, NULL); -+} -+ -+/** - * per_cpu_ptr_to_phys - convert translated percpu address to physical address - * @addr: the address to be converted to physical address - * diff --git a/debian/patches/features/all/rt/lockdep-disable-self-test.patch b/debian/patches/features/all/rt/lockdep-disable-self-test.patch new file mode 100644 index 000000000..2dadffb65 --- /dev/null +++ b/debian/patches/features/all/rt/lockdep-disable-self-test.patch @@ -0,0 +1,29 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 17 Oct 2017 16:36:18 +0200 +Subject: [PATCH] lockdep: disable self-test +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The self-test wasn't always 100% accurate for RT. We disabled a few +tests which failed because they had a different semantic for RT. Some +still reported false positives. Now the selftest locks up the system +during boot and it needs to be investigated… + +Signed-off-by: Sebastian Andrzej Siewior +--- + lib/Kconfig.debug | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/lib/Kconfig.debug ++++ b/lib/Kconfig.debug +@@ -1199,7 +1199,7 @@ config DEBUG_ATOMIC_SLEEP + + config DEBUG_LOCKING_API_SELFTESTS + bool "Locking API boot-time self-tests" +- depends on DEBUG_KERNEL ++ depends on DEBUG_KERNEL && !PREEMPT_RT_FULL + help + Say Y here if you want the kernel to run a short self-test during + bootup. The self-test checks whether common types of locking bugs diff --git a/debian/patches/features/all/rt/lockdep-no-softirq-accounting-on-rt.patch b/debian/patches/features/all/rt/lockdep-no-softirq-accounting-on-rt.patch index 80fd5d03f..ea34da364 100644 --- a/debian/patches/features/all/rt/lockdep-no-softirq-accounting-on-rt.patch +++ b/debian/patches/features/all/rt/lockdep-no-softirq-accounting-on-rt.patch @@ -1,28 +1,36 @@ Subject: lockdep: Make it RT aware From: Thomas Gleixner Date: Sun, 17 Jul 2011 18:51:23 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz teach lockdep that we don't really do softirqs on -RT. Signed-off-by: Thomas Gleixner --- - include/linux/irqflags.h | 10 +++++++--- + include/linux/irqflags.h | 26 +++++++++++++++----------- kernel/locking/lockdep.c | 2 ++ - 2 files changed, 9 insertions(+), 3 deletions(-) + 2 files changed, 17 insertions(+), 11 deletions(-) --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h -@@ -25,8 +25,6 @@ - # define trace_softirqs_enabled(p) ((p)->softirqs_enabled) - # define trace_hardirq_enter() do { current->hardirq_context++; } while (0) - # define trace_hardirq_exit() do { current->hardirq_context--; } while (0) --# define lockdep_softirq_enter() do { current->softirq_context++; } while (0) --# define lockdep_softirq_exit() do { current->softirq_context--; } while (0) +@@ -34,16 +34,6 @@ do { \ + current->hardirq_context--; \ + crossrelease_hist_end(XHLOCK_HARD); \ + } while (0) +-# define lockdep_softirq_enter() \ +-do { \ +- current->softirq_context++; \ +- crossrelease_hist_start(XHLOCK_SOFT); \ +-} while (0) +-# define lockdep_softirq_exit() \ +-do { \ +- current->softirq_context--; \ +- crossrelease_hist_end(XHLOCK_SOFT); \ +-} while (0) # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, #else # define trace_hardirqs_on() do { } while (0) -@@ -39,9 +37,15 @@ +@@ -56,9 +46,23 @@ do { \ # define trace_softirqs_enabled(p) 0 # define trace_hardirq_enter() do { } while (0) # define trace_hardirq_exit() do { } while (0) @@ -30,8 +38,16 @@ Signed-off-by: Thomas Gleixner +#endif + +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL) -+# define lockdep_softirq_enter() do { current->softirq_context++; } while (0) -+# define lockdep_softirq_exit() do { current->softirq_context--; } while (0) ++# define lockdep_softirq_enter() \ ++do { \ ++ current->softirq_context++; \ ++ crossrelease_hist_start(XHLOCK_SOFT); \ ++} while (0) ++# define lockdep_softirq_exit() \ ++do { \ ++ current->softirq_context--; \ ++ crossrelease_hist_end(XHLOCK_SOFT); \ ++} while (0) +#else # define lockdep_softirq_enter() do { } while (0) # define lockdep_softirq_exit() do { } while (0) @@ -41,7 +57,7 @@ Signed-off-by: Thomas Gleixner #if defined(CONFIG_IRQSOFF_TRACER) || \ --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c -@@ -3715,6 +3715,7 @@ static void check_flags(unsigned long fl +@@ -3917,6 +3917,7 @@ static void check_flags(unsigned long fl } } @@ -49,7 +65,7 @@ Signed-off-by: Thomas Gleixner /* * We dont accurately track softirq state in e.g. * hardirq contexts (such as on 4KSTACKS), so only -@@ -3729,6 +3730,7 @@ static void check_flags(unsigned long fl +@@ -3931,6 +3932,7 @@ static void check_flags(unsigned long fl DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); } } diff --git a/debian/patches/features/all/rt/lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch b/debian/patches/features/all/rt/lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch index 89baca3d1..4a308ab9e 100644 --- a/debian/patches/features/all/rt/lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch +++ b/debian/patches/features/all/rt/lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch @@ -1,7 +1,7 @@ From: Josh Cartwright Date: Wed, 28 Jan 2015 13:08:45 -0600 Subject: lockdep: selftest: fix warnings due to missing PREEMPT_RT conditionals -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz "lockdep: Selftest: Only do hardirq context test for raw spinlock" disabled the execution of certain tests with PREEMPT_RT_FULL, but did @@ -29,7 +29,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c -@@ -590,6 +590,8 @@ GENERATE_TESTCASE(init_held_rsem) +@@ -742,6 +742,8 @@ GENERATE_TESTCASE(init_held_rtmutex); #include "locking-selftest-spin-hardirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin) @@ -38,7 +38,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include "locking-selftest-rlock-hardirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock) -@@ -605,9 +607,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_ +@@ -757,9 +759,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_ #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock) @@ -51,7 +51,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Enabling hardirqs with a softirq-safe lock held: */ -@@ -640,6 +645,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A +@@ -792,6 +797,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A #undef E1 #undef E2 @@ -60,7 +60,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Enabling irqs with an irq-safe lock held: */ -@@ -663,6 +670,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A +@@ -815,6 +822,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A #include "locking-selftest-spin-hardirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin) @@ -69,7 +69,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include "locking-selftest-rlock-hardirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock) -@@ -678,6 +687,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B +@@ -830,6 +839,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) @@ -78,7 +78,7 @@ Signed-off-by: Sebastian Andrzej Siewior #undef E1 #undef E2 -@@ -709,6 +720,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B +@@ -861,6 +872,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B #include "locking-selftest-spin-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin) @@ -87,7 +87,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include "locking-selftest-rlock-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock) -@@ -724,6 +737,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_ +@@ -876,6 +889,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_ #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) @@ -96,7 +96,7 @@ Signed-off-by: Sebastian Andrzej Siewior #undef E1 #undef E2 #undef E3 -@@ -757,6 +772,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_ +@@ -909,6 +924,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_ #include "locking-selftest-spin-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin) @@ -105,7 +105,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include "locking-selftest-rlock-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock) -@@ -772,10 +789,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_ +@@ -924,10 +941,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_ #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock) @@ -120,7 +120,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * read-lock / write-lock irq inversion. * -@@ -838,6 +859,10 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inver +@@ -990,6 +1011,10 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inver #undef E2 #undef E3 @@ -131,7 +131,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * read-lock / write-lock recursion that is actually safe. */ -@@ -876,6 +901,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_ +@@ -1028,6 +1053,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_ #undef E2 #undef E3 diff --git a/debian/patches/features/all/rt/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch b/debian/patches/features/all/rt/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch index 5ecc3b28f..0be0e4bcb 100644 --- a/debian/patches/features/all/rt/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch +++ b/debian/patches/features/all/rt/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch @@ -1,7 +1,7 @@ Subject: lockdep: selftest: Only do hardirq context test for raw spinlock From: Yong Zhang Date: Mon, 16 Apr 2012 15:01:56 +0800 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz From: Yong Zhang @@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c -@@ -1858,6 +1858,7 @@ void locking_selftest(void) +@@ -2057,6 +2057,7 @@ void locking_selftest(void) printk(" --------------------------------------------------------------------------\n"); @@ -26,7 +26,7 @@ Signed-off-by: Thomas Gleixner /* * irq-context testcases: */ -@@ -1870,6 +1871,28 @@ void locking_selftest(void) +@@ -2069,6 +2070,28 @@ void locking_selftest(void) DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion); // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2); diff --git a/debian/patches/features/all/rt/locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch b/debian/patches/features/all/rt/locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch new file mode 100644 index 000000000..d4f44c242 --- /dev/null +++ b/debian/patches/features/all/rt/locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch @@ -0,0 +1,228 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 4 Aug 2017 17:40:42 +0200 +Subject: [PATCH 1/2] locking: don't check for __LINUX_SPINLOCK_TYPES_H on -RT + archs +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Upstream uses arch_spinlock_t within spinlock_t and requests that +spinlock_types.h header file is included first. +On -RT we have the rt_mutex with its raw_lock wait_lock which needs +architectures' spinlock_types.h header file for its definition. However +we need rt_mutex first because it is used to build the spinlock_t so +that check does not work for us. +Therefore I am dropping that check. + +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/alpha/include/asm/spinlock_types.h | 4 ---- + arch/arm/include/asm/spinlock_types.h | 4 ---- + arch/arm64/include/asm/spinlock_types.h | 4 ---- + arch/blackfin/include/asm/spinlock_types.h | 4 ---- + arch/hexagon/include/asm/spinlock_types.h | 4 ---- + arch/ia64/include/asm/spinlock_types.h | 4 ---- + arch/m32r/include/asm/spinlock_types.h | 4 ---- + arch/metag/include/asm/spinlock_types.h | 4 ---- + arch/mn10300/include/asm/spinlock_types.h | 4 ---- + arch/powerpc/include/asm/spinlock_types.h | 4 ---- + arch/s390/include/asm/spinlock_types.h | 4 ---- + arch/sh/include/asm/spinlock_types.h | 4 ---- + arch/tile/include/asm/spinlock_types.h | 4 ---- + arch/xtensa/include/asm/spinlock_types.h | 4 ---- + include/linux/spinlock_types_up.h | 4 ---- + 15 files changed, 60 deletions(-) + +--- a/arch/alpha/include/asm/spinlock_types.h ++++ b/arch/alpha/include/asm/spinlock_types.h +@@ -2,10 +2,6 @@ + #ifndef _ALPHA_SPINLOCK_TYPES_H + #define _ALPHA_SPINLOCK_TYPES_H + +-#ifndef __LINUX_SPINLOCK_TYPES_H +-# error "please don't include this file directly" +-#endif +- + typedef struct { + volatile unsigned int lock; + } arch_spinlock_t; +--- a/arch/arm/include/asm/spinlock_types.h ++++ b/arch/arm/include/asm/spinlock_types.h +@@ -2,10 +2,6 @@ + #ifndef __ASM_SPINLOCK_TYPES_H + #define __ASM_SPINLOCK_TYPES_H + +-#ifndef __LINUX_SPINLOCK_TYPES_H +-# error "please don't include this file directly" +-#endif +- + #define TICKET_SHIFT 16 + + typedef struct { +--- a/arch/arm64/include/asm/spinlock_types.h ++++ b/arch/arm64/include/asm/spinlock_types.h +@@ -16,10 +16,6 @@ + #ifndef __ASM_SPINLOCK_TYPES_H + #define __ASM_SPINLOCK_TYPES_H + +-#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H) +-# error "please don't include this file directly" +-#endif +- + #include + + #define TICKET_SHIFT 16 +--- a/arch/blackfin/include/asm/spinlock_types.h ++++ b/arch/blackfin/include/asm/spinlock_types.h +@@ -7,10 +7,6 @@ + #ifndef __ASM_SPINLOCK_TYPES_H + #define __ASM_SPINLOCK_TYPES_H + +-#ifndef __LINUX_SPINLOCK_TYPES_H +-# error "please don't include this file directly" +-#endif +- + #include + + typedef struct { +--- a/arch/hexagon/include/asm/spinlock_types.h ++++ b/arch/hexagon/include/asm/spinlock_types.h +@@ -21,10 +21,6 @@ + #ifndef _ASM_SPINLOCK_TYPES_H + #define _ASM_SPINLOCK_TYPES_H + +-#ifndef __LINUX_SPINLOCK_TYPES_H +-# error "please don't include this file directly" +-#endif +- + typedef struct { + volatile unsigned int lock; + } arch_spinlock_t; +--- a/arch/ia64/include/asm/spinlock_types.h ++++ b/arch/ia64/include/asm/spinlock_types.h +@@ -2,10 +2,6 @@ + #ifndef _ASM_IA64_SPINLOCK_TYPES_H + #define _ASM_IA64_SPINLOCK_TYPES_H + +-#ifndef __LINUX_SPINLOCK_TYPES_H +-# error "please don't include this file directly" +-#endif +- + typedef struct { + volatile unsigned int lock; + } arch_spinlock_t; +--- a/arch/m32r/include/asm/spinlock_types.h ++++ b/arch/m32r/include/asm/spinlock_types.h +@@ -2,10 +2,6 @@ + #ifndef _ASM_M32R_SPINLOCK_TYPES_H + #define _ASM_M32R_SPINLOCK_TYPES_H + +-#ifndef __LINUX_SPINLOCK_TYPES_H +-# error "please don't include this file directly" +-#endif +- + typedef struct { + volatile int slock; + } arch_spinlock_t; +--- a/arch/metag/include/asm/spinlock_types.h ++++ b/arch/metag/include/asm/spinlock_types.h +@@ -2,10 +2,6 @@ + #ifndef _ASM_METAG_SPINLOCK_TYPES_H + #define _ASM_METAG_SPINLOCK_TYPES_H + +-#ifndef __LINUX_SPINLOCK_TYPES_H +-# error "please don't include this file directly" +-#endif +- + typedef struct { + volatile unsigned int lock; + } arch_spinlock_t; +--- a/arch/mn10300/include/asm/spinlock_types.h ++++ b/arch/mn10300/include/asm/spinlock_types.h +@@ -2,10 +2,6 @@ + #ifndef _ASM_SPINLOCK_TYPES_H + #define _ASM_SPINLOCK_TYPES_H + +-#ifndef __LINUX_SPINLOCK_TYPES_H +-# error "please don't include this file directly" +-#endif +- + typedef struct arch_spinlock { + unsigned int slock; + } arch_spinlock_t; +--- a/arch/powerpc/include/asm/spinlock_types.h ++++ b/arch/powerpc/include/asm/spinlock_types.h +@@ -2,10 +2,6 @@ + #ifndef _ASM_POWERPC_SPINLOCK_TYPES_H + #define _ASM_POWERPC_SPINLOCK_TYPES_H + +-#ifndef __LINUX_SPINLOCK_TYPES_H +-# error "please don't include this file directly" +-#endif +- + typedef struct { + volatile unsigned int slock; + } arch_spinlock_t; +--- a/arch/s390/include/asm/spinlock_types.h ++++ b/arch/s390/include/asm/spinlock_types.h +@@ -2,10 +2,6 @@ + #ifndef __ASM_SPINLOCK_TYPES_H + #define __ASM_SPINLOCK_TYPES_H + +-#ifndef __LINUX_SPINLOCK_TYPES_H +-# error "please don't include this file directly" +-#endif +- + typedef struct { + int lock; + } __attribute__ ((aligned (4))) arch_spinlock_t; +--- a/arch/sh/include/asm/spinlock_types.h ++++ b/arch/sh/include/asm/spinlock_types.h +@@ -2,10 +2,6 @@ + #ifndef __ASM_SH_SPINLOCK_TYPES_H + #define __ASM_SH_SPINLOCK_TYPES_H + +-#ifndef __LINUX_SPINLOCK_TYPES_H +-# error "please don't include this file directly" +-#endif +- + typedef struct { + volatile unsigned int lock; + } arch_spinlock_t; +--- a/arch/tile/include/asm/spinlock_types.h ++++ b/arch/tile/include/asm/spinlock_types.h +@@ -15,10 +15,6 @@ + #ifndef _ASM_TILE_SPINLOCK_TYPES_H + #define _ASM_TILE_SPINLOCK_TYPES_H + +-#ifndef __LINUX_SPINLOCK_TYPES_H +-# error "please don't include this file directly" +-#endif +- + #ifdef __tilegx__ + + /* Low 15 bits are "next"; high 15 bits are "current". */ +--- a/arch/xtensa/include/asm/spinlock_types.h ++++ b/arch/xtensa/include/asm/spinlock_types.h +@@ -2,10 +2,6 @@ + #ifndef __ASM_SPINLOCK_TYPES_H + #define __ASM_SPINLOCK_TYPES_H + +-#ifndef __LINUX_SPINLOCK_TYPES_H +-# error "please don't include this file directly" +-#endif +- + typedef struct { + volatile unsigned int slock; + } arch_spinlock_t; +--- a/include/linux/spinlock_types_up.h ++++ b/include/linux/spinlock_types_up.h +@@ -1,10 +1,6 @@ + #ifndef __LINUX_SPINLOCK_TYPES_UP_H + #define __LINUX_SPINLOCK_TYPES_UP_H + +-#ifndef __LINUX_SPINLOCK_TYPES_H +-# error "please don't include this file directly" +-#endif +- + /* + * include/linux/spinlock_types_up.h - spinlock type definitions for UP + * diff --git a/debian/patches/features/all/rt/locking-locktorture-Do-NOT-include-rwlock.h-directly.patch b/debian/patches/features/all/rt/locking-locktorture-Do-NOT-include-rwlock.h-directly.patch index fdb8563b2..ff770fa1f 100644 --- a/debian/patches/features/all/rt/locking-locktorture-Do-NOT-include-rwlock.h-directly.patch +++ b/debian/patches/features/all/rt/locking-locktorture-Do-NOT-include-rwlock.h-directly.patch @@ -1,7 +1,7 @@ From: "Wolfgang M. Reimer" Date: Tue, 21 Jul 2015 16:20:07 +0200 Subject: locking: locktorture: Do NOT include rwlock.h directly -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Including rwlock.h directly will cause kernel builds to fail if CONFIG_PREEMPT_RT_FULL is defined. The correct header file diff --git a/debian/patches/features/all/rt/locking-rt-mutex-fix-deadlock-in-device-mapper-block.patch b/debian/patches/features/all/rt/locking-rt-mutex-fix-deadlock-in-device-mapper-block.patch new file mode 100644 index 000000000..45ab7ef4c --- /dev/null +++ b/debian/patches/features/all/rt/locking-rt-mutex-fix-deadlock-in-device-mapper-block.patch @@ -0,0 +1,74 @@ +From: Mikulas Patocka +Date: Mon, 13 Nov 2017 12:56:53 -0500 +Subject: [PATCH] locking/rt-mutex: fix deadlock in device mapper / block-IO +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +When some block device driver creates a bio and submits it to another +block device driver, the bio is added to current->bio_list (in order to +avoid unbounded recursion). + +However, this queuing of bios can cause deadlocks, in order to avoid them, +device mapper registers a function flush_current_bio_list. This function +is called when device mapper driver blocks. It redirects bios queued on +current->bio_list to helper workqueues, so that these bios can proceed +even if the driver is blocked. + +The problem with CONFIG_PREEMPT_RT_FULL is that when the device mapper +driver blocks, it won't call flush_current_bio_list (because +tsk_is_pi_blocked returns true in sched_submit_work), so deadlocks in +block device stack can happen. + +Note that we can't call blk_schedule_flush_plug if tsk_is_pi_blocked +returns true - that would cause +BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in +task_blocks_on_rt_mutex when flush_current_bio_list attempts to take a +spinlock. + +So the proper fix is to call blk_schedule_flush_plug in rt_mutex_fastlock, +when fast acquire failed and when the task is about to block. + +CC: stable-rt@vger.kernel.org +[bigeasy: The deadlock is not device-mapper specific, it can also occur + in plain EXT4] +Signed-off-by: Mikulas Patocka +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/locking/rtmutex.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + + #include "rtmutex_common.h" + +@@ -1926,6 +1927,15 @@ rt_mutex_fastlock(struct rt_mutex *lock, + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) + return 0; + ++ /* ++ * If rt_mutex blocks, the function sched_submit_work will not call ++ * blk_schedule_flush_plug (because tsk_is_pi_blocked would be true). ++ * We must call blk_schedule_flush_plug here, if we don't call it, ++ * a deadlock in device mapper may happen. ++ */ ++ if (unlikely(blk_needs_flush_plug(current))) ++ blk_schedule_flush_plug(current); ++ + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK, ww_ctx); + } + +@@ -1943,6 +1953,9 @@ rt_mutex_timed_fastlock(struct rt_mutex + likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) + return 0; + ++ if (unlikely(blk_needs_flush_plug(current))) ++ blk_schedule_flush_plug(current); ++ + return slowfn(lock, state, timeout, chwalk, ww_ctx); + } + diff --git a/debian/patches/features/all/rt/locking-rtmutex-don-t-drop-the-wait_lock-twice.patch b/debian/patches/features/all/rt/locking-rtmutex-don-t-drop-the-wait_lock-twice.patch new file mode 100644 index 000000000..e604198d4 --- /dev/null +++ b/debian/patches/features/all/rt/locking-rtmutex-don-t-drop-the-wait_lock-twice.patch @@ -0,0 +1,30 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 7 Sep 2017 12:38:47 +0200 +Subject: locking/rtmutex: don't drop the wait_lock twice +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Since the futex rework, __rt_mutex_start_proxy_lock() does no longer +acquire the wait_lock so it must not drop it. Otherwise the lock is not +only unlocked twice but also the preemption counter is underflown. + +It is okay to remove that line because this function does not disable +interrupts nor does it acquire the ->wait_lock. The caller does this so it is +wrong do it here (after the futex rework). + +Cc: rt-stable@vger.kernel.org #v4.9.18-rt14+ +Reported-by: Gusenleitner Klaus +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/locking/rtmutex.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1753,7 +1753,6 @@ int __rt_mutex_start_proxy_lock(struct r + raw_spin_lock(&task->pi_lock); + if (task->pi_blocked_on) { + raw_spin_unlock(&task->pi_lock); +- raw_spin_unlock_irq(&lock->wait_lock); + return -EAGAIN; + } + task->pi_blocked_on = PI_REQUEUE_INPROGRESS; diff --git a/debian/patches/features/all/rt/locking-rtmutex-re-init-the-wait_lock-in-rt_mutex_in.patch b/debian/patches/features/all/rt/locking-rtmutex-re-init-the-wait_lock-in-rt_mutex_in.patch new file mode 100644 index 000000000..5c34db0ba --- /dev/null +++ b/debian/patches/features/all/rt/locking-rtmutex-re-init-the-wait_lock-in-rt_mutex_in.patch @@ -0,0 +1,33 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 16 Nov 2017 16:48:48 +0100 +Subject: [PATCH] locking/rtmutex: re-init the wait_lock in + rt_mutex_init_proxy_locked() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +We could provide a key-class for the lockdep (and fixup all callers) or +move the init to all callers (like it was) in order to avoid lockdep +seeing a double-lock of the wait_lock. + +Reported-by: Fernando Lopez-Lezcano +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/locking/rtmutex.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -2260,6 +2260,14 @@ void rt_mutex_init_proxy_locked(struct r + struct task_struct *proxy_owner) + { + __rt_mutex_init(lock, NULL, NULL); ++#ifdef CONFIG_DEBUG_SPINLOCK ++ /* ++ * get another key class for the wait_lock. LOCK_PI and UNLOCK_PI is ++ * holding the ->wait_lock of the proxy_lock while unlocking a sleeping ++ * lock. ++ */ ++ raw_spin_lock_init(&lock->wait_lock); ++#endif + debug_rt_mutex_proxy_lock(lock, proxy_owner); + rt_mutex_set_owner(lock, proxy_owner); + } diff --git a/debian/patches/features/all/rt/md-disable-bcache.patch b/debian/patches/features/all/rt/md-disable-bcache.patch index ae31a7046..815553164 100644 --- a/debian/patches/features/all/rt/md-disable-bcache.patch +++ b/debian/patches/features/all/rt/md-disable-bcache.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 29 Aug 2013 11:48:57 +0200 Subject: md: disable bcache -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz It uses anon semaphores |drivers/md/bcache/request.c: In function ‘cached_dev_write_complete’: diff --git a/debian/patches/features/all/rt/md-raid5-do-not-disable-interrupts.patch b/debian/patches/features/all/rt/md-raid5-do-not-disable-interrupts.patch new file mode 100644 index 000000000..42ce5a790 --- /dev/null +++ b/debian/patches/features/all/rt/md-raid5-do-not-disable-interrupts.patch @@ -0,0 +1,55 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 17 Nov 2017 16:21:00 +0100 +Subject: [PATCH] md/raid5: do not disable interrupts +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +|BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:974 +|in_atomic(): 0, irqs_disabled(): 1, pid: 2992, name: lvm +|CPU: 2 PID: 2992 Comm: lvm Not tainted 4.13.10-rt3+ #54 +|Call Trace: +| dump_stack+0x4f/0x65 +| ___might_sleep+0xfc/0x150 +| atomic_dec_and_spin_lock+0x3c/0x80 +| raid5_release_stripe+0x73/0x110 +| grow_one_stripe+0xce/0xf0 +| setup_conf+0x841/0xaa0 +| raid5_run+0x7e7/0xa40 +| md_run+0x515/0xaf0 +| raid_ctr+0x147d/0x25e0 +| dm_table_add_target+0x155/0x320 +| table_load+0x103/0x320 +| ctl_ioctl+0x1d9/0x510 +| dm_ctl_ioctl+0x9/0x10 +| do_vfs_ioctl+0x8e/0x670 +| SyS_ioctl+0x3c/0x70 +| entry_SYSCALL_64_fastpath+0x17/0x98 + +The interrupts were disabled because ->device_lock is taken with +interrupts disabled. + +Cc: stable-rt@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/md/raid5.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/md/raid5.c ++++ b/drivers/md/raid5.c +@@ -410,7 +410,7 @@ void raid5_release_stripe(struct stripe_ + md_wakeup_thread(conf->mddev->thread); + return; + slow_path: +- local_irq_save(flags); ++ local_irq_save_nort(flags); + /* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */ + if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) { + INIT_LIST_HEAD(&list); +@@ -419,7 +419,7 @@ void raid5_release_stripe(struct stripe_ + spin_unlock(&conf->device_lock); + release_inactive_stripe_list(conf, &list, hash); + } +- local_irq_restore(flags); ++ local_irq_restore_nort(flags); + } + + static inline void remove_hash(struct stripe_head *sh) diff --git a/debian/patches/features/all/rt/md-raid5-percpu-handling-rt-aware.patch b/debian/patches/features/all/rt/md-raid5-percpu-handling-rt-aware.patch index 2030c2dcb..84cf97ee0 100644 --- a/debian/patches/features/all/rt/md-raid5-percpu-handling-rt-aware.patch +++ b/debian/patches/features/all/rt/md-raid5-percpu-handling-rt-aware.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Tue, 6 Apr 2010 16:51:31 +0200 Subject: md: raid5: Make raid5_percpu handling RT aware -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz __raid_run_ops() disables preemption with get_cpu() around the access to the raid5_percpu variables. That causes scheduling while atomic @@ -21,7 +21,7 @@ Tested-by: Udo van den Heuvel --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c -@@ -1986,8 +1986,9 @@ static void raid_run_ops(struct stripe_h +@@ -2064,8 +2064,9 @@ static void raid_run_ops(struct stripe_h struct raid5_percpu *percpu; unsigned long cpu; @@ -32,7 +32,7 @@ Tested-by: Udo van den Heuvel if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { ops_run_biofill(sh); overlap_clear++; -@@ -2043,7 +2044,8 @@ static void raid_run_ops(struct stripe_h +@@ -2124,7 +2125,8 @@ static void raid_run_ops(struct stripe_h if (test_and_clear_bit(R5_Overlap, &dev->flags)) wake_up(&sh->raid_conf->wait_for_overlap); } @@ -41,8 +41,8 @@ Tested-by: Udo van den Heuvel + put_cpu_light(); } - static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, -@@ -6664,6 +6666,7 @@ static int raid456_cpu_up_prepare(unsign + static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh) +@@ -6793,6 +6795,7 @@ static int raid456_cpu_up_prepare(unsign __func__, cpu); return -ENOMEM; } @@ -50,7 +50,7 @@ Tested-by: Udo van den Heuvel return 0; } -@@ -6674,7 +6677,6 @@ static int raid5_alloc_percpu(struct r5c +@@ -6803,7 +6806,6 @@ static int raid5_alloc_percpu(struct r5c conf->percpu = alloc_percpu(struct raid5_percpu); if (!conf->percpu) return -ENOMEM; @@ -60,7 +60,7 @@ Tested-by: Udo van den Heuvel conf->scribble_disks = max(conf->raid_disks, --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h -@@ -643,6 +643,7 @@ struct r5conf { +@@ -624,6 +624,7 @@ struct r5conf { int recovery_disabled; /* per cpu variables */ struct raid5_percpu { diff --git a/debian/patches/features/all/rt/mfd-syscon-atmel-smc-include-string.h.patch b/debian/patches/features/all/rt/mfd-syscon-atmel-smc-include-string.h.patch new file mode 100644 index 000000000..3257be927 --- /dev/null +++ b/debian/patches/features/all/rt/mfd-syscon-atmel-smc-include-string.h.patch @@ -0,0 +1,23 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 4 Oct 2017 09:55:58 +0200 +Subject: [PATCH] mfd: syscon: atmel-smc: include string.h +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The string.h header file is needed for the memset() definition. The RT +build fails because it is not pulled in via other header files. + +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/mfd/atmel-smc.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/mfd/atmel-smc.c ++++ b/drivers/mfd/atmel-smc.c +@@ -12,6 +12,7 @@ + */ + + #include ++#include + + /** + * atmel_smc_cs_conf_init - initialize a SMC CS conf diff --git a/debian/patches/features/all/rt/mips-disable-highmem-on-rt.patch b/debian/patches/features/all/rt/mips-disable-highmem-on-rt.patch index 8aeab69f2..3fbdf4518 100644 --- a/debian/patches/features/all/rt/mips-disable-highmem-on-rt.patch +++ b/debian/patches/features/all/rt/mips-disable-highmem-on-rt.patch @@ -1,7 +1,7 @@ Subject: mips: Disable highmem on RT From: Thomas Gleixner Date: Mon, 18 Jul 2011 17:10:12 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The current highmem handling on -RT is not compatible and needs fixups. @@ -12,7 +12,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig -@@ -2520,7 +2520,7 @@ config MIPS_ASID_BITS_VARIABLE +@@ -2519,7 +2519,7 @@ config MIPS_ASID_BITS_VARIABLE # config HIGHMEM bool "High Memory Support" diff --git a/debian/patches/features/all/rt/mm--rt--Fix-generic-kmap_atomic-for-RT.patch b/debian/patches/features/all/rt/mm--rt--Fix-generic-kmap_atomic-for-RT.patch index 43681b398..0b462ca29 100644 --- a/debian/patches/features/all/rt/mm--rt--Fix-generic-kmap_atomic-for-RT.patch +++ b/debian/patches/features/all/rt/mm--rt--Fix-generic-kmap_atomic-for-RT.patch @@ -1,7 +1,7 @@ Subject: mm: rt: Fix generic kmap_atomic for RT From: Thomas Gleixner Date: Sat, 19 Sep 2015 10:15:00 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The update to 4.1 brought in the mainline variant of the pagefault disable distangling from preempt count. That introduced a @@ -21,7 +21,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/highmem.h +++ b/include/linux/highmem.h -@@ -65,7 +65,7 @@ static inline void kunmap(struct page *p +@@ -66,7 +66,7 @@ static inline void kunmap(struct page *p static inline void *kmap_atomic(struct page *page) { @@ -30,7 +30,7 @@ Signed-off-by: Thomas Gleixner pagefault_disable(); return page_address(page); } -@@ -74,7 +74,7 @@ static inline void *kmap_atomic(struct p +@@ -75,7 +75,7 @@ static inline void *kmap_atomic(struct p static inline void __kunmap_atomic(void *addr) { pagefault_enable(); diff --git a/debian/patches/features/all/rt/mm-backing-dev-don-t-disable-IRQs-in-wb_congested_pu.patch b/debian/patches/features/all/rt/mm-backing-dev-don-t-disable-IRQs-in-wb_congested_pu.patch index 0fea4d4ff..d0e1c27cd 100644 --- a/debian/patches/features/all/rt/mm-backing-dev-don-t-disable-IRQs-in-wb_congested_pu.patch +++ b/debian/patches/features/all/rt/mm-backing-dev-don-t-disable-IRQs-in-wb_congested_pu.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Fri, 5 Feb 2016 12:17:14 +0100 Subject: mm: backing-dev: don't disable IRQs in wb_congested_put() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz it triggers: |BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:930 @@ -28,7 +28,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/mm/backing-dev.c +++ b/mm/backing-dev.c -@@ -459,9 +459,9 @@ void wb_congested_put(struct bdi_writeba +@@ -482,9 +482,9 @@ void wb_congested_put(struct bdi_writeba { unsigned long flags; diff --git a/debian/patches/features/all/rt/mm-bounce-local-irq-save-nort.patch b/debian/patches/features/all/rt/mm-bounce-local-irq-save-nort.patch index 63ddb1d0b..5f744faae 100644 --- a/debian/patches/features/all/rt/mm-bounce-local-irq-save-nort.patch +++ b/debian/patches/features/all/rt/mm-bounce-local-irq-save-nort.patch @@ -1,7 +1,7 @@ Subject: mm: bounce: Use local_irq_save_nort From: Thomas Gleixner Date: Wed, 09 Jan 2013 10:33:09 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz kmap_atomic() is preemptible on RT. @@ -12,7 +12,7 @@ Signed-off-by: Thomas Gleixner --- a/block/bounce.c +++ b/block/bounce.c -@@ -55,11 +55,11 @@ static void bounce_copy_vec(struct bio_v +@@ -66,11 +66,11 @@ static void bounce_copy_vec(struct bio_v unsigned long flags; unsigned char *vto; diff --git a/debian/patches/features/all/rt/mm-convert-swap-to-percpu-locked.patch b/debian/patches/features/all/rt/mm-convert-swap-to-percpu-locked.patch index 95143e7bb..343440b89 100644 --- a/debian/patches/features/all/rt/mm-convert-swap-to-percpu-locked.patch +++ b/debian/patches/features/all/rt/mm-convert-swap-to-percpu-locked.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:29:51 -0500 Subject: mm/swap: Convert to percpu locked -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Replace global locks (get_cpu + local_irq_save) with "local_locks()". Currently there is one of for "rotate" and one for "swap". @@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/swap.h +++ b/include/linux/swap.h -@@ -269,6 +269,7 @@ extern unsigned long nr_free_pagecache_p +@@ -312,6 +312,7 @@ extern unsigned long nr_free_pagecache_p /* linux/mm/swap.c */ @@ -28,7 +28,7 @@ Signed-off-by: Thomas Gleixner extern void lru_cache_add_file(struct page *page); --- a/mm/compaction.c +++ b/mm/compaction.c -@@ -1601,10 +1601,12 @@ static enum compact_result compact_zone( +@@ -1634,10 +1634,12 @@ static enum compact_result compact_zone( block_start_pfn(cc->migrate_pfn, cc->order); if (cc->last_migrated_pfn < current_block_start) { @@ -45,7 +45,7 @@ Signed-off-by: Thomas Gleixner } --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -6787,8 +6787,9 @@ void __init free_area_init(unsigned long +@@ -6857,8 +6857,9 @@ void __init free_area_init(unsigned long static int page_alloc_cpu_dead(unsigned int cpu) { @@ -75,7 +75,7 @@ Signed-off-by: Thomas Gleixner /* * This path almost never happens for VM activity - pages are normally -@@ -242,11 +245,11 @@ void rotate_reclaimable_page(struct page +@@ -252,11 +255,11 @@ void rotate_reclaimable_page(struct page unsigned long flags; get_page(page); @@ -89,7 +89,7 @@ Signed-off-by: Thomas Gleixner } } -@@ -296,12 +299,13 @@ void activate_page(struct page *page) +@@ -306,12 +309,13 @@ void activate_page(struct page *page) { page = compound_head(page); if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { @@ -105,7 +105,7 @@ Signed-off-by: Thomas Gleixner } } -@@ -328,7 +332,7 @@ void activate_page(struct page *page) +@@ -338,7 +342,7 @@ void activate_page(struct page *page) static void __lru_cache_activate_page(struct page *page) { @@ -114,7 +114,7 @@ Signed-off-by: Thomas Gleixner int i; /* -@@ -350,7 +354,7 @@ static void __lru_cache_activate_page(st +@@ -360,7 +364,7 @@ static void __lru_cache_activate_page(st } } @@ -123,7 +123,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -392,12 +396,12 @@ EXPORT_SYMBOL(mark_page_accessed); +@@ -402,12 +406,12 @@ EXPORT_SYMBOL(mark_page_accessed); static void __lru_cache_add(struct page *page) { @@ -138,7 +138,7 @@ Signed-off-by: Thomas Gleixner } /** -@@ -595,9 +599,9 @@ void lru_add_drain_cpu(int cpu) +@@ -613,9 +617,9 @@ void lru_add_drain_cpu(int cpu) unsigned long flags; /* No harm done if a racing interrupt already did this */ @@ -150,7 +150,7 @@ Signed-off-by: Thomas Gleixner } pvec = &per_cpu(lru_deactivate_file_pvecs, cpu); -@@ -629,11 +633,12 @@ void deactivate_file_page(struct page *p +@@ -647,11 +651,12 @@ void deactivate_file_page(struct page *p return; if (likely(get_page_unless_zero(page))) { @@ -165,19 +165,19 @@ Signed-off-by: Thomas Gleixner } } -@@ -648,19 +653,20 @@ void deactivate_file_page(struct page *p - void deactivate_page(struct page *page) +@@ -666,19 +671,20 @@ void mark_page_lazyfree(struct page *pag { - if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { -- struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs); + if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && + !PageSwapCache(page) && !PageUnevictable(page)) { +- struct pagevec *pvec = &get_cpu_var(lru_lazyfree_pvecs); + struct pagevec *pvec = &get_locked_var(swapvec_lock, -+ lru_deactivate_pvecs); ++ lru_lazyfree_pvecs); get_page(page); if (!pagevec_add(pvec, page) || PageCompound(page)) - pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); -- put_cpu_var(lru_deactivate_pvecs); -+ put_locked_var(swapvec_lock, lru_deactivate_pvecs); + pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL); +- put_cpu_var(lru_lazyfree_pvecs); ++ put_locked_var(swapvec_lock, lru_lazyfree_pvecs); } } diff --git a/debian/patches/features/all/rt/mm-disable-sloub-rt.patch b/debian/patches/features/all/rt/mm-disable-sloub-rt.patch index 36f9f55ae..410d1f430 100644 --- a/debian/patches/features/all/rt/mm-disable-sloub-rt.patch +++ b/debian/patches/features/all/rt/mm-disable-sloub-rt.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:44:03 -0500 Subject: mm: Allow only slub on RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Disable SLAB and SLOB on -RT. Only SLUB is adopted to -RT needs. @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/init/Kconfig +++ b/init/Kconfig -@@ -1825,6 +1825,7 @@ choice +@@ -1526,6 +1526,7 @@ choice config SLAB bool "SLAB" @@ -22,7 +22,7 @@ Signed-off-by: Thomas Gleixner select HAVE_HARDENED_USERCOPY_ALLOCATOR help The regular slab allocator that is established and known to work -@@ -1845,6 +1846,7 @@ config SLUB +@@ -1546,6 +1547,7 @@ config SLUB config SLOB depends on EXPERT bool "SLOB (Simple Allocator)" diff --git a/debian/patches/features/all/rt/mm-enable-slub.patch b/debian/patches/features/all/rt/mm-enable-slub.patch index 158b71ea3..d51111553 100644 --- a/debian/patches/features/all/rt/mm-enable-slub.patch +++ b/debian/patches/features/all/rt/mm-enable-slub.patch @@ -1,7 +1,7 @@ Subject: mm: Enable SLUB for RT From: Thomas Gleixner Date: Thu, 25 Oct 2012 10:32:35 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Make SLUB RT aware by converting locks to raw and using free lists to move the freeing out of the lock held region. @@ -9,12 +9,12 @@ move the freeing out of the lock held region. Signed-off-by: Thomas Gleixner --- mm/slab.h | 4 + - mm/slub.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++++-------------- - 2 files changed, 109 insertions(+), 29 deletions(-) + mm/slub.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++++-------------- + 2 files changed, 110 insertions(+), 30 deletions(-) --- a/mm/slab.h +++ b/mm/slab.h -@@ -465,7 +465,11 @@ static inline void slab_post_alloc_hook( +@@ -454,7 +454,11 @@ static inline void slab_post_alloc_hook( * The slab lists for all objects. */ struct kmem_cache_node { @@ -28,7 +28,7 @@ Signed-off-by: Thomas Gleixner struct list_head slabs_partial; /* partial list first, better asm code */ --- a/mm/slub.c +++ b/mm/slub.c -@@ -1146,7 +1146,7 @@ static noinline int free_debug_processin +@@ -1180,7 +1180,7 @@ static noinline int free_debug_processin unsigned long uninitialized_var(flags); int ret = 0; @@ -37,7 +37,7 @@ Signed-off-by: Thomas Gleixner slab_lock(page); if (s->flags & SLAB_CONSISTENCY_CHECKS) { -@@ -1181,7 +1181,7 @@ static noinline int free_debug_processin +@@ -1215,7 +1215,7 @@ static noinline int free_debug_processin bulk_cnt, cnt); slab_unlock(page); @@ -46,7 +46,7 @@ Signed-off-by: Thomas Gleixner if (!ret) slab_fix(s, "Object at 0x%p not freed", object); return ret; -@@ -1309,6 +1309,12 @@ static inline void dec_slabs_node(struct +@@ -1343,6 +1343,12 @@ static inline void dec_slabs_node(struct #endif /* CONFIG_SLUB_DEBUG */ @@ -59,7 +59,7 @@ Signed-off-by: Thomas Gleixner /* * Hooks for other subsystems that check memory allocations. In a typical * production configuration these hooks all should produce no code at all. -@@ -1535,7 +1541,11 @@ static struct page *allocate_slab(struct +@@ -1569,7 +1575,11 @@ static struct page *allocate_slab(struct flags &= gfp_allowed_mask; @@ -71,7 +71,7 @@ Signed-off-by: Thomas Gleixner local_irq_enable(); flags |= s->allocflags; -@@ -1610,7 +1620,11 @@ static struct page *allocate_slab(struct +@@ -1644,7 +1654,11 @@ static struct page *allocate_slab(struct page->frozen = 1; out: @@ -83,7 +83,7 @@ Signed-off-by: Thomas Gleixner local_irq_disable(); if (!page) return NULL; -@@ -1670,6 +1684,16 @@ static void __free_slab(struct kmem_cach +@@ -1704,6 +1718,16 @@ static void __free_slab(struct kmem_cach __free_pages(page, order); } @@ -100,7 +100,7 @@ Signed-off-by: Thomas Gleixner #define need_reserve_slab_rcu \ (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head)) -@@ -1701,6 +1725,12 @@ static void free_slab(struct kmem_cache +@@ -1735,6 +1759,12 @@ static void free_slab(struct kmem_cache } call_rcu(head, rcu_free_slab); @@ -113,7 +113,7 @@ Signed-off-by: Thomas Gleixner } else __free_slab(s, page); } -@@ -1808,7 +1838,7 @@ static void *get_partial_node(struct kme +@@ -1842,7 +1872,7 @@ static void *get_partial_node(struct kme if (!n || !n->nr_partial) return NULL; @@ -122,7 +122,7 @@ Signed-off-by: Thomas Gleixner list_for_each_entry_safe(page, page2, &n->partial, lru) { void *t; -@@ -1833,7 +1863,7 @@ static void *get_partial_node(struct kme +@@ -1867,7 +1897,7 @@ static void *get_partial_node(struct kme break; } @@ -131,7 +131,7 @@ Signed-off-by: Thomas Gleixner return object; } -@@ -2079,7 +2109,7 @@ static void deactivate_slab(struct kmem_ +@@ -2113,7 +2143,7 @@ static void deactivate_slab(struct kmem_ * that acquire_slab() will see a slab page that * is frozen */ @@ -140,7 +140,7 @@ Signed-off-by: Thomas Gleixner } } else { m = M_FULL; -@@ -2090,7 +2120,7 @@ static void deactivate_slab(struct kmem_ +@@ -2124,7 +2154,7 @@ static void deactivate_slab(struct kmem_ * slabs from diagnostic functions will not see * any frozen slabs. */ @@ -149,7 +149,7 @@ Signed-off-by: Thomas Gleixner } } -@@ -2125,7 +2155,7 @@ static void deactivate_slab(struct kmem_ +@@ -2159,7 +2189,7 @@ static void deactivate_slab(struct kmem_ goto redo; if (lock) @@ -158,7 +158,7 @@ Signed-off-by: Thomas Gleixner if (m == M_FREE) { stat(s, DEACTIVATE_EMPTY); -@@ -2157,10 +2187,10 @@ static void unfreeze_partials(struct kme +@@ -2194,10 +2224,10 @@ static void unfreeze_partials(struct kme n2 = get_node(s, page_to_nid(page)); if (n != n2) { if (n) @@ -171,7 +171,7 @@ Signed-off-by: Thomas Gleixner } do { -@@ -2189,7 +2219,7 @@ static void unfreeze_partials(struct kme +@@ -2226,7 +2256,7 @@ static void unfreeze_partials(struct kme } if (n) @@ -180,7 +180,7 @@ Signed-off-by: Thomas Gleixner while (discard_page) { page = discard_page; -@@ -2228,14 +2258,21 @@ static void put_cpu_partial(struct kmem_ +@@ -2265,14 +2295,21 @@ static void put_cpu_partial(struct kmem_ pobjects = oldpage->pobjects; pages = oldpage->pages; if (drain && pobjects > s->cpu_partial) { @@ -202,7 +202,7 @@ Signed-off-by: Thomas Gleixner oldpage = NULL; pobjects = 0; pages = 0; -@@ -2307,7 +2344,22 @@ static bool has_cpu_slab(int cpu, void * +@@ -2342,7 +2379,22 @@ static bool has_cpu_slab(int cpu, void * static void flush_all(struct kmem_cache *s) { @@ -225,7 +225,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -2362,10 +2414,10 @@ static unsigned long count_partial(struc +@@ -2397,10 +2449,10 @@ static unsigned long count_partial(struc unsigned long x = 0; struct page *page; @@ -238,7 +238,7 @@ Signed-off-by: Thomas Gleixner return x; } #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */ -@@ -2503,8 +2555,10 @@ static inline void *get_freelist(struct +@@ -2538,8 +2590,10 @@ static inline void *get_freelist(struct * already disabled (which is the case for bulk allocation). */ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, @@ -250,7 +250,7 @@ Signed-off-by: Thomas Gleixner void *freelist; struct page *page; -@@ -2564,6 +2618,13 @@ static void *___slab_alloc(struct kmem_c +@@ -2595,6 +2649,13 @@ static void *___slab_alloc(struct kmem_c VM_BUG_ON(!c->page->frozen); c->freelist = get_freepointer(s, freelist); c->tid = next_tid(c->tid); @@ -264,16 +264,25 @@ Signed-off-by: Thomas Gleixner return freelist; new_slab: -@@ -2595,7 +2656,7 @@ static void *___slab_alloc(struct kmem_c - deactivate_slab(s, page, get_freepointer(s, freelist)); - c->page = NULL; - c->freelist = NULL; +@@ -2610,7 +2671,7 @@ static void *___slab_alloc(struct kmem_c + + if (unlikely(!freelist)) { + slab_out_of_memory(s, gfpflags, node); +- return NULL; ++ goto out; + } + + page = c->page; +@@ -2623,7 +2684,7 @@ static void *___slab_alloc(struct kmem_c + goto new_slab; /* Slab failed checks. Next slab needed */ + + deactivate_slab(s, page, get_freepointer(s, freelist), c); - return freelist; + goto out; } /* -@@ -2607,6 +2668,7 @@ static void *__slab_alloc(struct kmem_ca +@@ -2635,6 +2696,7 @@ static void *__slab_alloc(struct kmem_ca { void *p; unsigned long flags; @@ -281,7 +290,7 @@ Signed-off-by: Thomas Gleixner local_irq_save(flags); #ifdef CONFIG_PREEMPT -@@ -2618,8 +2680,9 @@ static void *__slab_alloc(struct kmem_ca +@@ -2646,8 +2708,9 @@ static void *__slab_alloc(struct kmem_ca c = this_cpu_ptr(s->cpu_slab); #endif @@ -292,7 +301,7 @@ Signed-off-by: Thomas Gleixner return p; } -@@ -2805,7 +2868,7 @@ static void __slab_free(struct kmem_cach +@@ -2833,7 +2896,7 @@ static void __slab_free(struct kmem_cach do { if (unlikely(n)) { @@ -301,7 +310,7 @@ Signed-off-by: Thomas Gleixner n = NULL; } prior = page->freelist; -@@ -2837,7 +2900,7 @@ static void __slab_free(struct kmem_cach +@@ -2865,7 +2928,7 @@ static void __slab_free(struct kmem_cach * Otherwise the list_lock will synchronize with * other processors updating the list of slabs. */ @@ -310,7 +319,7 @@ Signed-off-by: Thomas Gleixner } } -@@ -2879,7 +2942,7 @@ static void __slab_free(struct kmem_cach +@@ -2907,7 +2970,7 @@ static void __slab_free(struct kmem_cach add_partial(n, page, DEACTIVATE_TO_TAIL); stat(s, FREE_ADD_PARTIAL); } @@ -319,7 +328,7 @@ Signed-off-by: Thomas Gleixner return; slab_empty: -@@ -2894,7 +2957,7 @@ static void __slab_free(struct kmem_cach +@@ -2922,7 +2985,7 @@ static void __slab_free(struct kmem_cach remove_full(s, n, page); } @@ -328,7 +337,7 @@ Signed-off-by: Thomas Gleixner stat(s, FREE_SLAB); discard_slab(s, page); } -@@ -3099,6 +3162,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca +@@ -3127,6 +3190,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca void **p) { struct kmem_cache_cpu *c; @@ -336,7 +345,7 @@ Signed-off-by: Thomas Gleixner int i; /* memcg and kmem_cache debug support */ -@@ -3122,7 +3186,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca +@@ -3150,7 +3214,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca * of re-populating per CPU c->freelist */ p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, @@ -345,7 +354,7 @@ Signed-off-by: Thomas Gleixner if (unlikely(!p[i])) goto error; -@@ -3134,6 +3198,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca +@@ -3162,6 +3226,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca } c->tid = next_tid(c->tid); local_irq_enable(); @@ -353,7 +362,7 @@ Signed-off-by: Thomas Gleixner /* Clear memory outside IRQ disabled fastpath loop */ if (unlikely(flags & __GFP_ZERO)) { -@@ -3281,7 +3346,7 @@ static void +@@ -3309,7 +3374,7 @@ static void init_kmem_cache_node(struct kmem_cache_node *n) { n->nr_partial = 0; @@ -362,7 +371,7 @@ Signed-off-by: Thomas Gleixner INIT_LIST_HEAD(&n->partial); #ifdef CONFIG_SLUB_DEBUG atomic_long_set(&n->nr_slabs, 0); -@@ -3625,6 +3690,10 @@ static void list_slab_objects(struct kme +@@ -3663,6 +3728,10 @@ static void list_slab_objects(struct kme const char *text) { #ifdef CONFIG_SLUB_DEBUG @@ -373,7 +382,7 @@ Signed-off-by: Thomas Gleixner void *addr = page_address(page); void *p; unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) * -@@ -3645,6 +3714,7 @@ static void list_slab_objects(struct kme +@@ -3683,6 +3752,7 @@ static void list_slab_objects(struct kme slab_unlock(page); kfree(map); #endif @@ -381,7 +390,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -3658,7 +3728,7 @@ static void free_partial(struct kmem_cac +@@ -3696,7 +3766,7 @@ static void free_partial(struct kmem_cac struct page *page, *h; BUG_ON(irqs_disabled()); @@ -390,7 +399,7 @@ Signed-off-by: Thomas Gleixner list_for_each_entry_safe(page, h, &n->partial, lru) { if (!page->inuse) { remove_partial(n, page); -@@ -3668,7 +3738,7 @@ static void free_partial(struct kmem_cac +@@ -3706,7 +3776,7 @@ static void free_partial(struct kmem_cac "Objects remaining in %s on __kmem_cache_shutdown()"); } } @@ -399,7 +408,7 @@ Signed-off-by: Thomas Gleixner list_for_each_entry_safe(page, h, &discard, lru) discard_slab(s, page); -@@ -3912,7 +3982,7 @@ int __kmem_cache_shrink(struct kmem_cach +@@ -3950,7 +4020,7 @@ int __kmem_cache_shrink(struct kmem_cach for (i = 0; i < SHRINK_PROMOTE_MAX; i++) INIT_LIST_HEAD(promote + i); @@ -408,7 +417,7 @@ Signed-off-by: Thomas Gleixner /* * Build lists of slabs to discard or promote. -@@ -3943,7 +4013,7 @@ int __kmem_cache_shrink(struct kmem_cach +@@ -3981,7 +4051,7 @@ int __kmem_cache_shrink(struct kmem_cach for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--) list_splice(promote + i, &n->partial); @@ -417,7 +426,7 @@ Signed-off-by: Thomas Gleixner /* Release empty slabs */ list_for_each_entry_safe(page, t, &discard, lru) -@@ -4156,6 +4226,12 @@ void __init kmem_cache_init(void) +@@ -4194,6 +4264,12 @@ void __init kmem_cache_init(void) { static __initdata struct kmem_cache boot_kmem_cache, boot_kmem_cache_node; @@ -430,7 +439,7 @@ Signed-off-by: Thomas Gleixner if (debug_guardpage_minorder()) slub_max_order = 0; -@@ -4364,7 +4440,7 @@ static int validate_slab_node(struct kme +@@ -4402,7 +4478,7 @@ static int validate_slab_node(struct kme struct page *page; unsigned long flags; @@ -439,7 +448,7 @@ Signed-off-by: Thomas Gleixner list_for_each_entry(page, &n->partial, lru) { validate_slab_slab(s, page, map); -@@ -4386,7 +4462,7 @@ static int validate_slab_node(struct kme +@@ -4424,7 +4500,7 @@ static int validate_slab_node(struct kme s->name, count, atomic_long_read(&n->nr_slabs)); out: @@ -448,7 +457,7 @@ Signed-off-by: Thomas Gleixner return count; } -@@ -4574,12 +4650,12 @@ static int list_locations(struct kmem_ca +@@ -4612,12 +4688,12 @@ static int list_locations(struct kmem_ca if (!atomic_long_read(&n->nr_slabs)) continue; diff --git a/debian/patches/features/all/rt/mm-make-vmstat-rt-aware.patch b/debian/patches/features/all/rt/mm-make-vmstat-rt-aware.patch index 758cf8a1e..75bbb6659 100644 --- a/debian/patches/features/all/rt/mm-make-vmstat-rt-aware.patch +++ b/debian/patches/features/all/rt/mm-make-vmstat-rt-aware.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:30:13 -0500 Subject: mm/vmstat: Protect per cpu variables with preempt disable on RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Disable preemption on -RT for the vmstat code. On vanila the code runs in IRQ-off regions while on -RT it is not. "preempt_disable" ensures that the @@ -39,7 +39,7 @@ Signed-off-by: Thomas Gleixner static inline void count_vm_events(enum vm_event_item item, long delta) --- a/mm/vmstat.c +++ b/mm/vmstat.c -@@ -245,6 +245,7 @@ void __mod_zone_page_state(struct zone * +@@ -249,6 +249,7 @@ void __mod_zone_page_state(struct zone * long x; long t; @@ -47,7 +47,7 @@ Signed-off-by: Thomas Gleixner x = delta + __this_cpu_read(*p); t = __this_cpu_read(pcp->stat_threshold); -@@ -254,6 +255,7 @@ void __mod_zone_page_state(struct zone * +@@ -258,6 +259,7 @@ void __mod_zone_page_state(struct zone * x = 0; } __this_cpu_write(*p, x); @@ -55,7 +55,7 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(__mod_zone_page_state); -@@ -265,6 +267,7 @@ void __mod_node_page_state(struct pglist +@@ -269,6 +271,7 @@ void __mod_node_page_state(struct pglist long x; long t; @@ -63,7 +63,7 @@ Signed-off-by: Thomas Gleixner x = delta + __this_cpu_read(*p); t = __this_cpu_read(pcp->stat_threshold); -@@ -274,6 +277,7 @@ void __mod_node_page_state(struct pglist +@@ -278,6 +281,7 @@ void __mod_node_page_state(struct pglist x = 0; } __this_cpu_write(*p, x); @@ -71,7 +71,7 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(__mod_node_page_state); -@@ -306,6 +310,7 @@ void __inc_zone_state(struct zone *zone, +@@ -310,6 +314,7 @@ void __inc_zone_state(struct zone *zone, s8 __percpu *p = pcp->vm_stat_diff + item; s8 v, t; @@ -79,7 +79,7 @@ Signed-off-by: Thomas Gleixner v = __this_cpu_inc_return(*p); t = __this_cpu_read(pcp->stat_threshold); if (unlikely(v > t)) { -@@ -314,6 +319,7 @@ void __inc_zone_state(struct zone *zone, +@@ -318,6 +323,7 @@ void __inc_zone_state(struct zone *zone, zone_page_state_add(v + overstep, zone, item); __this_cpu_write(*p, -overstep); } @@ -87,7 +87,7 @@ Signed-off-by: Thomas Gleixner } void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) -@@ -322,6 +328,7 @@ void __inc_node_state(struct pglist_data +@@ -326,6 +332,7 @@ void __inc_node_state(struct pglist_data s8 __percpu *p = pcp->vm_node_stat_diff + item; s8 v, t; @@ -95,7 +95,7 @@ Signed-off-by: Thomas Gleixner v = __this_cpu_inc_return(*p); t = __this_cpu_read(pcp->stat_threshold); if (unlikely(v > t)) { -@@ -330,6 +337,7 @@ void __inc_node_state(struct pglist_data +@@ -334,6 +341,7 @@ void __inc_node_state(struct pglist_data node_page_state_add(v + overstep, pgdat, item); __this_cpu_write(*p, -overstep); } @@ -103,7 +103,7 @@ Signed-off-by: Thomas Gleixner } void __inc_zone_page_state(struct page *page, enum zone_stat_item item) -@@ -350,6 +358,7 @@ void __dec_zone_state(struct zone *zone, +@@ -354,6 +362,7 @@ void __dec_zone_state(struct zone *zone, s8 __percpu *p = pcp->vm_stat_diff + item; s8 v, t; @@ -111,7 +111,7 @@ Signed-off-by: Thomas Gleixner v = __this_cpu_dec_return(*p); t = __this_cpu_read(pcp->stat_threshold); if (unlikely(v < - t)) { -@@ -358,6 +367,7 @@ void __dec_zone_state(struct zone *zone, +@@ -362,6 +371,7 @@ void __dec_zone_state(struct zone *zone, zone_page_state_add(v - overstep, zone, item); __this_cpu_write(*p, overstep); } @@ -119,7 +119,7 @@ Signed-off-by: Thomas Gleixner } void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) -@@ -366,6 +376,7 @@ void __dec_node_state(struct pglist_data +@@ -370,6 +380,7 @@ void __dec_node_state(struct pglist_data s8 __percpu *p = pcp->vm_node_stat_diff + item; s8 v, t; @@ -127,7 +127,7 @@ Signed-off-by: Thomas Gleixner v = __this_cpu_dec_return(*p); t = __this_cpu_read(pcp->stat_threshold); if (unlikely(v < - t)) { -@@ -374,6 +385,7 @@ void __dec_node_state(struct pglist_data +@@ -378,6 +389,7 @@ void __dec_node_state(struct pglist_data node_page_state_add(v - overstep, pgdat, item); __this_cpu_write(*p, overstep); } diff --git a/debian/patches/features/all/rt/mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch b/debian/patches/features/all/rt/mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch index 4a426a4d1..c301d3119 100644 --- a/debian/patches/features/all/rt/mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch +++ b/debian/patches/features/all/rt/mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch @@ -1,7 +1,7 @@ From: Yang Shi Subject: mm/memcontrol: Don't call schedule_work_on in preemption disabled context Date: Wed, 30 Oct 2013 11:48:33 -0700 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The following trace is triggered when running ltp oom test cases: @@ -49,21 +49,21 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/mm/memcontrol.c +++ b/mm/memcontrol.c -@@ -1782,7 +1782,7 @@ static void drain_all_stock(struct mem_c - return; - /* Notify other cpus that system-wide "drain" is running */ - get_online_cpus(); +@@ -1831,7 +1831,7 @@ static void drain_all_stock(struct mem_c + * as well as workers from this path always operate on the local + * per-cpu data. CPU up doesn't touch memcg_stock at all. + */ - curcpu = get_cpu(); + curcpu = get_cpu_light(); for_each_online_cpu(cpu) { struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); struct mem_cgroup *memcg; -@@ -1799,7 +1799,7 @@ static void drain_all_stock(struct mem_c - schedule_work_on(cpu, &stock->work); +@@ -1851,7 +1851,7 @@ static void drain_all_stock(struct mem_c } + css_put(&memcg->css); } - put_cpu(); + put_cpu_light(); - put_online_cpus(); mutex_unlock(&percpu_charge_mutex); } + diff --git a/debian/patches/features/all/rt/mm-memcontrol-do_not_disable_irq.patch b/debian/patches/features/all/rt/mm-memcontrol-do_not_disable_irq.patch index f45de9698..d5e0c2984 100644 --- a/debian/patches/features/all/rt/mm-memcontrol-do_not_disable_irq.patch +++ b/debian/patches/features/all/rt/mm-memcontrol-do_not_disable_irq.patch @@ -1,15 +1,15 @@ From: Sebastian Andrzej Siewior Subject: mm/memcontrol: Replace local_irq_disable with local locks Date: Wed, 28 Jan 2015 17:14:16 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz There are a few local_irq_disable() which then take sleeping locks. This patch converts them local locks. Signed-off-by: Sebastian Andrzej Siewior --- - mm/memcontrol.c | 20 ++++++++++++++------ - 1 file changed, 14 insertions(+), 6 deletions(-) + mm/memcontrol.c | 24 ++++++++++++++++-------- + 1 file changed, 16 insertions(+), 8 deletions(-) --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -30,7 +30,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* Whether legacy memory+swap accounting is active */ static bool do_memsw_account(void) { -@@ -4535,12 +4538,12 @@ static int mem_cgroup_move_account(struc +@@ -4621,12 +4624,12 @@ static int mem_cgroup_move_account(struc ret = 0; @@ -45,7 +45,7 @@ Signed-off-by: Sebastian Andrzej Siewior out_unlock: unlock_page(page); out: -@@ -5422,10 +5425,10 @@ void mem_cgroup_commit_charge(struct pag +@@ -5569,10 +5572,10 @@ void mem_cgroup_commit_charge(struct pag commit_charge(page, memcg, lrucare); @@ -58,32 +58,46 @@ Signed-off-by: Sebastian Andrzej Siewior if (do_memsw_account() && PageSwapCache(page)) { swp_entry_t entry = { .val = page_private(page) }; -@@ -5481,14 +5484,14 @@ static void uncharge_batch(struct mem_cg - memcg_oom_recover(memcg); +@@ -5641,7 +5644,7 @@ static void uncharge_batch(const struct + memcg_oom_recover(ug->memcg); } - local_irq_save(flags); + local_lock_irqsave(event_lock, flags); - __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon); - __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file); - __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge); - __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout); - __this_cpu_add(memcg->stat->nr_page_events, nr_pages); - memcg_check_events(memcg, dummy_page); + __this_cpu_sub(ug->memcg->stat->count[MEMCG_RSS], ug->nr_anon); + __this_cpu_sub(ug->memcg->stat->count[MEMCG_CACHE], ug->nr_file); + __this_cpu_sub(ug->memcg->stat->count[MEMCG_RSS_HUGE], ug->nr_huge); +@@ -5649,7 +5652,7 @@ static void uncharge_batch(const struct + __this_cpu_add(ug->memcg->stat->events[PGPGOUT], ug->pgpgout); + __this_cpu_add(ug->memcg->stat->nr_page_events, nr_pages); + memcg_check_events(ug->memcg, ug->dummy_page); - local_irq_restore(flags); + local_unlock_irqrestore(event_lock, flags); - if (!mem_cgroup_is_root(memcg)) - css_put_many(&memcg->css, nr_pages); -@@ -5838,6 +5841,7 @@ void mem_cgroup_swapout(struct page *pag - { + if (!mem_cgroup_is_root(ug->memcg)) + css_put_many(&ug->memcg->css, nr_pages); +@@ -5812,10 +5815,10 @@ void mem_cgroup_migrate(struct page *old + + commit_charge(newpage, memcg, false); + +- local_irq_save(flags); ++ local_lock_irqsave(event_lock, flags); + mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages); + memcg_check_events(memcg, newpage); +- local_irq_restore(flags); ++ local_unlock_irqrestore(event_lock, flags); + } + + DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key); +@@ -5993,6 +5996,7 @@ void mem_cgroup_swapout(struct page *pag struct mem_cgroup *memcg, *swap_memcg; + unsigned int nr_entries; unsigned short oldid; + unsigned long flags; VM_BUG_ON_PAGE(PageLRU(page), page); VM_BUG_ON_PAGE(page_count(page), page); -@@ -5878,12 +5882,16 @@ void mem_cgroup_swapout(struct page *pag +@@ -6038,13 +6042,17 @@ void mem_cgroup_swapout(struct page *pag * important here to have the interrupts disabled because it is the * only synchronisation we have for udpating the per-CPU variables. */ @@ -91,7 +105,8 @@ Signed-off-by: Sebastian Andrzej Siewior +#ifndef CONFIG_PREEMPT_RT_BASE VM_BUG_ON(!irqs_disabled()); +#endif - mem_cgroup_charge_statistics(memcg, page, false, -1); + mem_cgroup_charge_statistics(memcg, page, PageTransHuge(page), + -nr_entries); memcg_check_events(memcg, page); if (!mem_cgroup_is_root(memcg)) @@ -99,4 +114,4 @@ Signed-off-by: Sebastian Andrzej Siewior + local_unlock_irqrestore(event_lock, flags); } - /* + /** diff --git a/debian/patches/features/all/rt/mm-memcontrol-mem_cgroup_migrate-replace-another-loc.patch b/debian/patches/features/all/rt/mm-memcontrol-mem_cgroup_migrate-replace-another-loc.patch deleted file mode 100644 index 816d1a8a3..000000000 --- a/debian/patches/features/all/rt/mm-memcontrol-mem_cgroup_migrate-replace-another-loc.patch +++ /dev/null @@ -1,30 +0,0 @@ -From: Mike Galbraith -Date: Sun, 5 Jun 2016 08:11:13 +0200 -Subject: [PATCH] mm/memcontrol: mem_cgroup_migrate() - replace another - local_irq_disable() w. local_lock_irq() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -v4.6 grew a local_irq_disable() in mm/memcontrol.c::mem_cgroup_migrate(). -Convert it to use the existing local lock (event_lock) like the others. - -Signed-off-by: Mike Galbraith -Signed-off-by: Sebastian Andrzej Siewior ---- - mm/memcontrol.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/mm/memcontrol.c -+++ b/mm/memcontrol.c -@@ -5646,10 +5646,10 @@ void mem_cgroup_migrate(struct page *old - - commit_charge(newpage, memcg, false); - -- local_irq_save(flags); -+ local_lock_irqsave(event_lock, flags); - mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages); - memcg_check_events(memcg, newpage); -- local_irq_restore(flags); -+ local_unlock_irqrestore(event_lock, flags); - } - - DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key); diff --git a/debian/patches/features/all/rt/mm-page-alloc-use-local-lock-on-target-cpu.patch b/debian/patches/features/all/rt/mm-page-alloc-use-local-lock-on-target-cpu.patch index 06b8b2997..7fb2077ee 100644 --- a/debian/patches/features/all/rt/mm-page-alloc-use-local-lock-on-target-cpu.patch +++ b/debian/patches/features/all/rt/mm-page-alloc-use-local-lock-on-target-cpu.patch @@ -1,7 +1,7 @@ Subject: mm: page_alloc: Use local_lock_on() instead of plain spinlock From: Thomas Gleixner Date: Thu, 27 Sep 2012 11:11:46 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The plain spinlock while sufficient does not update the local_lock internals. Use a proper local_lock function instead to ease debugging. @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -291,9 +291,9 @@ static DEFINE_LOCAL_IRQ_LOCK(pa_lock); +@@ -292,9 +292,9 @@ static DEFINE_LOCAL_IRQ_LOCK(pa_lock); #ifdef CONFIG_PREEMPT_RT_BASE # define cpu_lock_irqsave(cpu, flags) \ diff --git a/debian/patches/features/all/rt/mm-page_alloc-reduce-lock-sections-further.patch b/debian/patches/features/all/rt/mm-page_alloc-reduce-lock-sections-further.patch index 51e031adb..8ae00eb8f 100644 --- a/debian/patches/features/all/rt/mm-page_alloc-reduce-lock-sections-further.patch +++ b/debian/patches/features/all/rt/mm-page_alloc-reduce-lock-sections-further.patch @@ -1,7 +1,7 @@ From: Peter Zijlstra Date: Fri Jul 3 08:44:37 2009 -0500 Subject: mm: page_alloc: Reduce lock sections further -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Split out the pages which are to be freed into a separate list and call free_pages_bulk() outside of the percpu page allocator locks. @@ -9,12 +9,12 @@ call free_pages_bulk() outside of the percpu page allocator locks. Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner --- - mm/page_alloc.c | 94 +++++++++++++++++++++++++++++++++++++++----------------- - 1 file changed, 66 insertions(+), 28 deletions(-) + mm/page_alloc.c | 93 +++++++++++++++++++++++++++++++++++++++----------------- + 1 file changed, 65 insertions(+), 28 deletions(-) --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -1099,7 +1099,7 @@ static bool bulkfree_pcp_prepare(struct +@@ -1100,7 +1100,7 @@ static bool bulkfree_pcp_prepare(struct #endif /* CONFIG_DEBUG_VM */ /* @@ -23,7 +23,7 @@ Signed-off-by: Thomas Gleixner * Assumes all pages on list are in same zone, and of same order. * count is the number of pages to free. * -@@ -1110,19 +1110,58 @@ static bool bulkfree_pcp_prepare(struct +@@ -1111,15 +1111,53 @@ static bool bulkfree_pcp_prepare(struct * pinned" detection logic. */ static void free_pcppages_bulk(struct zone *zone, int count, @@ -32,21 +32,16 @@ Signed-off-by: Thomas Gleixner { - int migratetype = 0; - int batch_free = 0; - unsigned long nr_scanned; bool isolated_pageblocks; + unsigned long flags; -+ -+ spin_lock_irqsave(&zone->lock, flags); - spin_lock(&zone->lock); ++ spin_lock_irqsave(&zone->lock, flags); isolated_pageblocks = has_isolate_pageblock(zone); - nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); - if (nr_scanned) - __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); + while (!list_empty(list)) { + struct page *page; -+ int mt; /* migratetype of the to-be-freed page */ ++ int mt; /* migratetype of the to-be-freed page */ + + page = list_first_entry(list, struct page, lru); + /* must delete as __free_one_page list manipulates */ @@ -86,7 +81,7 @@ Signed-off-by: Thomas Gleixner while (count) { struct page *page; struct list_head *list; -@@ -1138,7 +1177,7 @@ static void free_pcppages_bulk(struct zo +@@ -1135,7 +1173,7 @@ static void free_pcppages_bulk(struct zo batch_free++; if (++migratetype == MIGRATE_PCPTYPES) migratetype = 0; @@ -95,7 +90,7 @@ Signed-off-by: Thomas Gleixner } while (list_empty(list)); /* This is the only non-empty list. Free them all. */ -@@ -1146,27 +1185,12 @@ static void free_pcppages_bulk(struct zo +@@ -1143,27 +1181,12 @@ static void free_pcppages_bulk(struct zo batch_free = count; do { @@ -124,18 +119,16 @@ Signed-off-by: Thomas Gleixner } static void free_one_page(struct zone *zone, -@@ -1175,7 +1199,9 @@ static void free_one_page(struct zone *z +@@ -1171,13 +1194,15 @@ static void free_one_page(struct zone *z + unsigned int order, int migratetype) { - unsigned long nr_scanned; - spin_lock(&zone->lock); + unsigned long flags; + + spin_lock_irqsave(&zone->lock, flags); - nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); - if (nr_scanned) - __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); -@@ -1185,7 +1211,7 @@ static void free_one_page(struct zone *z + if (unlikely(has_isolate_pageblock(zone) || + is_migrate_isolate(migratetype))) { migratetype = get_pfnblock_migratetype(page, pfn); } __free_one_page(page, pfn, zone, order, migratetype); @@ -144,7 +137,7 @@ Signed-off-by: Thomas Gleixner } static void __meminit __init_single_page(struct page *page, unsigned long pfn, -@@ -2299,16 +2325,18 @@ static int rmqueue_bulk(struct zone *zon +@@ -2384,16 +2409,18 @@ static int rmqueue_bulk(struct zone *zon void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) { unsigned long flags; @@ -164,7 +157,7 @@ Signed-off-by: Thomas Gleixner } #endif -@@ -2324,16 +2352,21 @@ static void drain_pages_zone(unsigned in +@@ -2409,16 +2436,21 @@ static void drain_pages_zone(unsigned in unsigned long flags; struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; @@ -188,7 +181,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -2556,8 +2589,13 @@ void free_hot_cold_page(struct page *pag +@@ -2656,8 +2688,13 @@ void free_hot_cold_page(struct page *pag pcp->count++; if (pcp->count >= pcp->high) { unsigned long batch = READ_ONCE(pcp->batch); diff --git a/debian/patches/features/all/rt/mm-page_alloc-rt-friendly-per-cpu-pages.patch b/debian/patches/features/all/rt/mm-page_alloc-rt-friendly-per-cpu-pages.patch index 46c0d47ec..f8d666484 100644 --- a/debian/patches/features/all/rt/mm-page_alloc-rt-friendly-per-cpu-pages.patch +++ b/debian/patches/features/all/rt/mm-page_alloc-rt-friendly-per-cpu-pages.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:29:37 -0500 Subject: mm: page_alloc: rt-friendly per-cpu pages -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz rt-friendly per-cpu pages: convert the irqs-off per-cpu locking method into a preemptible, explicit-per-cpu-locks method. @@ -26,7 +26,7 @@ Signed-off-by: Thomas Gleixner #include #include #include -@@ -286,6 +287,18 @@ EXPORT_SYMBOL(nr_node_ids); +@@ -287,6 +288,18 @@ EXPORT_SYMBOL(nr_node_ids); EXPORT_SYMBOL(nr_online_nodes); #endif @@ -45,7 +45,7 @@ Signed-off-by: Thomas Gleixner int page_group_by_mobility_disabled __read_mostly; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT -@@ -1258,10 +1271,10 @@ static void __free_pages_ok(struct page +@@ -1250,10 +1263,10 @@ static void __free_pages_ok(struct page return; migratetype = get_pfnblock_migratetype(page, pfn); @@ -58,7 +58,7 @@ Signed-off-by: Thomas Gleixner } static void __init __free_pages_boot_core(struct page *page, unsigned int order) -@@ -2288,14 +2301,14 @@ void drain_zone_pages(struct zone *zone, +@@ -2373,14 +2386,14 @@ void drain_zone_pages(struct zone *zone, unsigned long flags; int to_drain, batch; @@ -75,7 +75,7 @@ Signed-off-by: Thomas Gleixner } #endif -@@ -2312,7 +2325,7 @@ static void drain_pages_zone(unsigned in +@@ -2397,7 +2410,7 @@ static void drain_pages_zone(unsigned in struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; @@ -84,7 +84,7 @@ Signed-off-by: Thomas Gleixner pset = per_cpu_ptr(zone->pageset, cpu); pcp = &pset->pcp; -@@ -2320,7 +2333,7 @@ static void drain_pages_zone(unsigned in +@@ -2405,7 +2418,7 @@ static void drain_pages_zone(unsigned in free_pcppages_bulk(zone, pcp->count, pcp); pcp->count = 0; } @@ -93,7 +93,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -2355,6 +2368,7 @@ void drain_local_pages(struct zone *zone +@@ -2440,6 +2453,7 @@ void drain_local_pages(struct zone *zone drain_pages(cpu); } @@ -101,7 +101,7 @@ Signed-off-by: Thomas Gleixner static void drain_local_pages_wq(struct work_struct *work) { /* -@@ -2368,6 +2382,7 @@ static void drain_local_pages_wq(struct +@@ -2453,6 +2467,7 @@ static void drain_local_pages_wq(struct drain_local_pages(NULL); preempt_enable(); } @@ -109,7 +109,7 @@ Signed-off-by: Thomas Gleixner /* * Spill all the per-cpu pages from all CPUs back into the buddy allocator. -@@ -2438,7 +2453,14 @@ void drain_all_pages(struct zone *zone) +@@ -2523,7 +2538,14 @@ void drain_all_pages(struct zone *zone) else cpumask_clear_cpu(cpu, &cpus_with_pcps); } @@ -125,7 +125,7 @@ Signed-off-by: Thomas Gleixner for_each_cpu(cpu, &cpus_with_pcps) { struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu); INIT_WORK(work, drain_local_pages_wq); -@@ -2446,6 +2468,7 @@ void drain_all_pages(struct zone *zone) +@@ -2531,6 +2553,7 @@ void drain_all_pages(struct zone *zone) } for_each_cpu(cpu, &cpus_with_pcps) flush_work(per_cpu_ptr(&pcpu_drain, cpu)); @@ -133,7 +133,7 @@ Signed-off-by: Thomas Gleixner mutex_unlock(&pcpu_drain_mutex); } -@@ -2507,7 +2530,7 @@ void free_hot_cold_page(struct page *pag +@@ -2607,7 +2630,7 @@ void free_hot_cold_page(struct page *pag migratetype = get_pfnblock_migratetype(page, pfn); set_pcppage_migratetype(page, migratetype); @@ -142,7 +142,7 @@ Signed-off-by: Thomas Gleixner __count_vm_event(PGFREE); /* -@@ -2538,7 +2561,7 @@ void free_hot_cold_page(struct page *pag +@@ -2638,7 +2661,7 @@ void free_hot_cold_page(struct page *pag } out: @@ -151,7 +151,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -2695,7 +2718,7 @@ static struct page *rmqueue_pcplist(stru +@@ -2795,7 +2818,7 @@ static struct page *rmqueue_pcplist(stru struct page *page; unsigned long flags; @@ -160,7 +160,7 @@ Signed-off-by: Thomas Gleixner pcp = &this_cpu_ptr(zone->pageset)->pcp; list = &pcp->lists[migratetype]; page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list); -@@ -2703,7 +2726,7 @@ static struct page *rmqueue_pcplist(stru +@@ -2803,7 +2826,7 @@ static struct page *rmqueue_pcplist(stru __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); zone_statistics(preferred_zone, zone); } @@ -169,7 +169,7 @@ Signed-off-by: Thomas Gleixner return page; } -@@ -2730,7 +2753,7 @@ struct page *rmqueue(struct zone *prefer +@@ -2830,7 +2853,7 @@ struct page *rmqueue(struct zone *prefer * allocate greater than order-1 page units with __GFP_NOFAIL. */ WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); @@ -178,7 +178,7 @@ Signed-off-by: Thomas Gleixner do { page = NULL; -@@ -2750,14 +2773,14 @@ struct page *rmqueue(struct zone *prefer +@@ -2850,14 +2873,14 @@ struct page *rmqueue(struct zone *prefer __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); zone_statistics(preferred_zone, zone); @@ -195,7 +195,7 @@ Signed-off-by: Thomas Gleixner return NULL; } -@@ -7591,7 +7614,7 @@ void zone_pcp_reset(struct zone *zone) +@@ -7695,7 +7718,7 @@ void zone_pcp_reset(struct zone *zone) struct per_cpu_pageset *pset; /* avoid races with drain_pages() */ @@ -204,7 +204,7 @@ Signed-off-by: Thomas Gleixner if (zone->pageset != &boot_pageset) { for_each_online_cpu(cpu) { pset = per_cpu_ptr(zone->pageset, cpu); -@@ -7600,7 +7623,7 @@ void zone_pcp_reset(struct zone *zone) +@@ -7704,7 +7727,7 @@ void zone_pcp_reset(struct zone *zone) free_percpu(zone->pageset); zone->pageset = &boot_pageset; } diff --git a/debian/patches/features/all/rt/mm-perform-lru_add_drain_all-remotely.patch b/debian/patches/features/all/rt/mm-perform-lru_add_drain_all-remotely.patch index cf23f5586..782a8856f 100644 --- a/debian/patches/features/all/rt/mm-perform-lru_add_drain_all-remotely.patch +++ b/debian/patches/features/all/rt/mm-perform-lru_add_drain_all-remotely.patch @@ -1,7 +1,7 @@ From: Luiz Capitulino Date: Fri, 27 May 2016 15:03:28 +0200 Subject: [PATCH] mm: perform lru_add_drain_all() remotely -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz lru_add_drain_all() works by scheduling lru_add_drain_cpu() to run on all CPUs that have non-empty LRU pagevecs and then waiting for @@ -20,12 +20,12 @@ Signed-off-by: Rik van Riel Signed-off-by: Luiz Capitulino Signed-off-by: Sebastian Andrzej Siewior --- - mm/swap.c | 37 ++++++++++++++++++++++++++++++------- - 1 file changed, 30 insertions(+), 7 deletions(-) + mm/swap.c | 36 ++++++++++++++++++++++++++++++------ + 1 file changed, 30 insertions(+), 6 deletions(-) --- a/mm/swap.c +++ b/mm/swap.c -@@ -599,9 +599,15 @@ void lru_add_drain_cpu(int cpu) +@@ -617,9 +617,15 @@ void lru_add_drain_cpu(int cpu) unsigned long flags; /* No harm done if a racing interrupt already did this */ @@ -41,7 +41,7 @@ Signed-off-by: Sebastian Andrzej Siewior } pvec = &per_cpu(lru_deactivate_file_pvecs, cpu); -@@ -669,6 +675,16 @@ void lru_add_drain(void) +@@ -687,6 +693,16 @@ void lru_add_drain(void) local_unlock_cpu(swapvec_lock); } @@ -58,7 +58,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void lru_add_drain_per_cpu(struct work_struct *dummy) { lru_add_drain(); -@@ -676,6 +692,16 @@ static void lru_add_drain_per_cpu(struct +@@ -694,6 +710,16 @@ static void lru_add_drain_per_cpu(struct static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); @@ -72,19 +72,19 @@ Signed-off-by: Sebastian Andrzej Siewior +} +#endif + - void lru_add_drain_all(void) + void lru_add_drain_all_cpuslocked(void) { static DEFINE_MUTEX(lock); -@@ -694,21 +720,18 @@ void lru_add_drain_all(void) +@@ -711,21 +737,19 @@ void lru_add_drain_all_cpuslocked(void) cpumask_clear(&has_work); for_each_online_cpu(cpu) { - struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); -- + if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) || pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) || pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) || - pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) || + pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) || - need_activate_page_drain(cpu)) { - INIT_WORK(work, lru_add_drain_per_cpu); - queue_work_on(cpu, mm_percpu_wq, work); @@ -99,5 +99,5 @@ Signed-off-by: Sebastian Andrzej Siewior flush_work(&per_cpu(lru_add_drain_work, cpu)); +#endif - put_online_cpus(); mutex_unlock(&lock); + } diff --git a/debian/patches/features/all/rt/mm-protect-activate-switch-mm.patch b/debian/patches/features/all/rt/mm-protect-activate-switch-mm.patch index e9ce47a93..f18e2e471 100644 --- a/debian/patches/features/all/rt/mm-protect-activate-switch-mm.patch +++ b/debian/patches/features/all/rt/mm-protect-activate-switch-mm.patch @@ -1,7 +1,7 @@ From: Yong Zhang Date: Tue, 15 May 2012 13:53:56 +0800 Subject: mm: Protect activate_mm() by preempt_[disable&enable]_rt() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz User preempt_*_rt instead of local_irq_*_rt or otherwise there will be warning on ARM like below: @@ -37,7 +37,7 @@ Signed-off-by: Thomas Gleixner --- a/fs/exec.c +++ b/fs/exec.c -@@ -1042,12 +1042,14 @@ static int exec_mmap(struct mm_struct *m +@@ -1024,12 +1024,14 @@ static int exec_mmap(struct mm_struct *m } } task_lock(tsk); diff --git a/debian/patches/features/all/rt/mm-rt-kmap-atomic-scheduling.patch b/debian/patches/features/all/rt/mm-rt-kmap-atomic-scheduling.patch index a7ae27d42..dca954dd6 100644 --- a/debian/patches/features/all/rt/mm-rt-kmap-atomic-scheduling.patch +++ b/debian/patches/features/all/rt/mm-rt-kmap-atomic-scheduling.patch @@ -1,7 +1,7 @@ Subject: mm, rt: kmap_atomic scheduling From: Peter Zijlstra Date: Thu, 28 Jul 2011 10:43:51 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz In fact, with migrate_disable() existing one could play games with kmap_atomic. You could save/restore the kmap_atomic slots on context @@ -31,15 +31,15 @@ Link: http://lkml.kernel.org/r/1311842631.5890.208.camel@twins --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c -@@ -37,6 +37,7 @@ - #include +@@ -38,6 +38,7 @@ #include #include + #include +#include #include #include -@@ -196,6 +197,35 @@ start_thread(struct pt_regs *regs, unsig +@@ -198,6 +199,35 @@ start_thread(struct pt_regs *regs, unsig } EXPORT_SYMBOL_GPL(start_thread); @@ -75,7 +75,7 @@ Link: http://lkml.kernel.org/r/1311842631.5890.208.camel@twins /* * switch_to(x,y) should switch tasks from x to y. -@@ -271,6 +301,8 @@ EXPORT_SYMBOL_GPL(start_thread); +@@ -273,6 +303,8 @@ EXPORT_SYMBOL_GPL(start_thread); task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) __switch_to_xtra(prev_p, next_p, tss); @@ -164,7 +164,7 @@ Link: http://lkml.kernel.org/r/1311842631.5890.208.camel@twins } --- a/include/linux/highmem.h +++ b/include/linux/highmem.h -@@ -86,32 +86,51 @@ static inline void __kunmap_atomic(void +@@ -87,32 +87,51 @@ static inline void __kunmap_atomic(void #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) @@ -222,7 +222,7 @@ Link: http://lkml.kernel.org/r/1311842631.5890.208.camel@twins --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -26,6 +26,7 @@ +@@ -27,6 +27,7 @@ #include #include #include @@ -230,7 +230,7 @@ Link: http://lkml.kernel.org/r/1311842631.5890.208.camel@twins /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; -@@ -1058,6 +1059,12 @@ struct task_struct { +@@ -1104,6 +1105,12 @@ struct task_struct { int softirq_nestcnt; unsigned int softirqs_raised; #endif @@ -245,7 +245,7 @@ Link: http://lkml.kernel.org/r/1311842631.5890.208.camel@twins #endif --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h -@@ -24,6 +24,7 @@ static __always_inline void pagefault_di +@@ -185,6 +185,7 @@ static __always_inline void pagefault_di */ static inline void pagefault_disable(void) { @@ -253,7 +253,7 @@ Link: http://lkml.kernel.org/r/1311842631.5890.208.camel@twins pagefault_disabled_inc(); /* * make sure to have issued the store before a pagefault -@@ -40,6 +41,7 @@ static inline void pagefault_enable(void +@@ -201,6 +202,7 @@ static inline void pagefault_enable(void */ barrier(); pagefault_disabled_dec(); @@ -263,7 +263,7 @@ Link: http://lkml.kernel.org/r/1311842631.5890.208.camel@twins /* --- a/mm/highmem.c +++ b/mm/highmem.c -@@ -29,10 +29,11 @@ +@@ -30,10 +30,11 @@ #include #include @@ -276,7 +276,7 @@ Link: http://lkml.kernel.org/r/1311842631.5890.208.camel@twins /* * Virtual_count is not a pure "count". -@@ -107,8 +108,9 @@ static inline wait_queue_head_t *get_pkm +@@ -108,8 +109,9 @@ static inline wait_queue_head_t *get_pkm unsigned long totalhigh_pages __read_mostly; EXPORT_SYMBOL(totalhigh_pages); diff --git a/debian/patches/features/all/rt/mm-scatterlist-dont-disable-irqs-on-RT.patch b/debian/patches/features/all/rt/mm-scatterlist-dont-disable-irqs-on-RT.patch index 7a2b44725..9b3297fda 100644 --- a/debian/patches/features/all/rt/mm-scatterlist-dont-disable-irqs-on-RT.patch +++ b/debian/patches/features/all/rt/mm-scatterlist-dont-disable-irqs-on-RT.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Fri, 3 Jul 2009 08:44:34 -0500 Subject: mm/scatterlist: Do not disable irqs on RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz For -RT it is enough to keep pagefault disabled (which is currently handled by kmap_atomic()). diff --git a/debian/patches/features/all/rt/mm-swap-don-t-disable-preemption-while-taking-the-pe.patch b/debian/patches/features/all/rt/mm-swap-don-t-disable-preemption-while-taking-the-pe.patch deleted file mode 100644 index 4876d56b6..000000000 --- a/debian/patches/features/all/rt/mm-swap-don-t-disable-preemption-while-taking-the-pe.patch +++ /dev/null @@ -1,46 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Fri, 23 Jun 2017 11:43:30 +0200 -Subject: [PATCH] mm, swap: don't disable preemption while taking the per-CPU - cache -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -get_cpu_var() disables preemption and returns the per-CPU version of the -variable. Disabling preemption is useful to ensure atomic access to the -variable within the critical section. -In this case however, after the per-CPU version of the variable is -obtained the ->free_lock is acquired. For that reason it seems the raw -accessor could be used. It only seems that ->slots_ret should be -retested (because with disabled preemption this variable can not be set -to NULL otherwise). -This popped up during PREEMPT-RT testing because it tries to take -spinlocks in a preempt disabled section. - -Signed-off-by: Sebastian Andrzej Siewior ---- - mm/swap_slots.c | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - ---- a/mm/swap_slots.c -+++ b/mm/swap_slots.c -@@ -267,11 +267,11 @@ int free_swap_slot(swp_entry_t entry) - { - struct swap_slots_cache *cache; - -- cache = &get_cpu_var(swp_slots); -+ cache = raw_cpu_ptr(&swp_slots); - if (use_swap_slot_cache && cache->slots_ret) { - spin_lock_irq(&cache->free_lock); - /* Swap slots cache may be deactivated before acquiring lock */ -- if (!use_swap_slot_cache) { -+ if (!use_swap_slot_cache || !cache->slots_ret) { - spin_unlock_irq(&cache->free_lock); - goto direct_free; - } -@@ -291,7 +291,6 @@ int free_swap_slot(swp_entry_t entry) - direct_free: - swapcache_free_entries(&entry, 1); - } -- put_cpu_var(swp_slots); - - return 0; - } diff --git a/debian/patches/features/all/rt/mm-vmalloc-use-get-cpu-light.patch b/debian/patches/features/all/rt/mm-vmalloc-use-get-cpu-light.patch index 943c182fb..4ac9276d0 100644 --- a/debian/patches/features/all/rt/mm-vmalloc-use-get-cpu-light.patch +++ b/debian/patches/features/all/rt/mm-vmalloc-use-get-cpu-light.patch @@ -1,7 +1,7 @@ Subject: mm/vmalloc: Another preempt disable region which sucks From: Thomas Gleixner Date: Tue, 12 Jul 2011 11:39:36 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Avoid the preempt disable version of get_cpu_var(). The inner-lock should provide enough serialisation. @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/mm/vmalloc.c +++ b/mm/vmalloc.c -@@ -866,7 +866,7 @@ static void *new_vmap_block(unsigned int +@@ -865,7 +865,7 @@ static void *new_vmap_block(unsigned int struct vmap_block *vb; struct vmap_area *va; unsigned long vb_idx; @@ -22,7 +22,7 @@ Signed-off-by: Thomas Gleixner void *vaddr; node = numa_node_id(); -@@ -909,11 +909,12 @@ static void *new_vmap_block(unsigned int +@@ -908,11 +908,12 @@ static void *new_vmap_block(unsigned int BUG_ON(err); radix_tree_preload_end(); @@ -37,7 +37,7 @@ Signed-off-by: Thomas Gleixner return vaddr; } -@@ -982,6 +983,7 @@ static void *vb_alloc(unsigned long size +@@ -981,6 +982,7 @@ static void *vb_alloc(unsigned long size struct vmap_block *vb; void *vaddr = NULL; unsigned int order; @@ -45,7 +45,7 @@ Signed-off-by: Thomas Gleixner BUG_ON(offset_in_page(size)); BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); -@@ -996,7 +998,8 @@ static void *vb_alloc(unsigned long size +@@ -995,7 +997,8 @@ static void *vb_alloc(unsigned long size order = get_order(size); rcu_read_lock(); @@ -55,7 +55,7 @@ Signed-off-by: Thomas Gleixner list_for_each_entry_rcu(vb, &vbq->free, free_list) { unsigned long pages_off; -@@ -1019,7 +1022,7 @@ static void *vb_alloc(unsigned long size +@@ -1018,7 +1021,7 @@ static void *vb_alloc(unsigned long size break; } diff --git a/debian/patches/features/all/rt/mm-workingset-do-not-protect-workingset_shadow_nodes.patch b/debian/patches/features/all/rt/mm-workingset-do-not-protect-workingset_shadow_nodes.patch index 063cb38ce..af114bddb 100644 --- a/debian/patches/features/all/rt/mm-workingset-do-not-protect-workingset_shadow_nodes.patch +++ b/debian/patches/features/all/rt/mm-workingset-do-not-protect-workingset_shadow_nodes.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 29 Jan 2015 17:19:44 +0100 Subject: mm/workingset: Do not protect workingset_shadow_nodes with irq off -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz workingset_shadow_nodes is protected by local_irq_disable(). Some users use spin_lock_irq(). @@ -18,7 +18,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/linux/swap.h +++ b/include/linux/swap.h -@@ -11,6 +11,7 @@ +@@ -12,6 +12,7 @@ #include #include #include @@ -26,7 +26,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include struct notifier_block; -@@ -254,7 +255,8 @@ struct swap_info_struct { +@@ -297,7 +298,8 @@ struct vma_swap_readahead { void *workingset_eviction(struct address_space *mapping, struct page *page); bool workingset_refault(void *shadow); void workingset_activation(struct page *page); @@ -46,9 +46,9 @@ Signed-off-by: Sebastian Andrzej Siewior static int page_cache_tree_insert(struct address_space *mapping, struct page *page, void **shadowp) -@@ -142,8 +143,10 @@ static int page_cache_tree_insert(struct - true); - } +@@ -133,8 +134,10 @@ static int page_cache_tree_insert(struct + if (shadowp) + *shadowp = p; } + local_lock(shadow_nodes_lock); __radix_tree_replace(&mapping->page_tree, node, slot, page, @@ -58,7 +58,7 @@ Signed-off-by: Sebastian Andrzej Siewior mapping->nrpages++; return 0; } -@@ -160,6 +163,7 @@ static void page_cache_tree_delete(struc +@@ -151,6 +154,7 @@ static void page_cache_tree_delete(struc VM_BUG_ON_PAGE(PageTail(page), page); VM_BUG_ON_PAGE(nr != 1 && shadow, page); @@ -66,7 +66,7 @@ Signed-off-by: Sebastian Andrzej Siewior for (i = 0; i < nr; i++) { struct radix_tree_node *node; void **slot; -@@ -171,8 +175,9 @@ static void page_cache_tree_delete(struc +@@ -162,8 +166,9 @@ static void page_cache_tree_delete(struc radix_tree_clear_tags(&mapping->page_tree, node, slot); __radix_tree_replace(&mapping->page_tree, node, slot, shadow, @@ -93,7 +93,7 @@ Signed-off-by: Sebastian Andrzej Siewior spin_unlock_irq(&mapping->tree_lock); --- a/mm/workingset.c +++ b/mm/workingset.c -@@ -339,9 +339,10 @@ void workingset_activation(struct page * +@@ -338,9 +338,10 @@ void workingset_activation(struct page * * point where they would still be useful. */ @@ -106,7 +106,7 @@ Signed-off-by: Sebastian Andrzej Siewior { struct address_space *mapping = private; -@@ -359,10 +360,10 @@ void workingset_update_node(struct radix +@@ -358,10 +359,10 @@ void workingset_update_node(struct radix */ if (node->count && node->count == node->exceptional) { if (list_empty(&node->private_list)) @@ -119,7 +119,7 @@ Signed-off-by: Sebastian Andrzej Siewior } } -@@ -374,9 +375,9 @@ static unsigned long count_shadow_nodes( +@@ -373,9 +374,9 @@ static unsigned long count_shadow_nodes( unsigned long cache; /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ @@ -132,9 +132,9 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Approximate a reasonable limit for the radix tree nodes -@@ -478,15 +479,15 @@ static enum lru_status shadow_lru_isolat - mem_cgroup_inc_page_stat(virt_to_page(node), - MEMCG_WORKINGSET_NODERECLAIM); +@@ -475,15 +476,15 @@ static enum lru_status shadow_lru_isolat + goto out_invalid; + inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM); __radix_tree_delete_node(&mapping->page_tree, node, - workingset_update_node, mapping); + __workingset_update_node, mapping); @@ -151,7 +151,7 @@ Signed-off-by: Sebastian Andrzej Siewior spin_lock(lru_lock); return ret; } -@@ -497,9 +498,9 @@ static unsigned long scan_shadow_nodes(s +@@ -494,9 +495,9 @@ static unsigned long scan_shadow_nodes(s unsigned long ret; /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ @@ -164,7 +164,7 @@ Signed-off-by: Sebastian Andrzej Siewior return ret; } -@@ -537,7 +538,7 @@ static int __init workingset_init(void) +@@ -534,7 +535,7 @@ static int __init workingset_init(void) pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", timestamp_bits, max_order, bucket_order); @@ -173,7 +173,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (ret) goto err; ret = register_shrinker(&workingset_shadow_shrinker); -@@ -545,7 +546,7 @@ static int __init workingset_init(void) +@@ -542,7 +543,7 @@ static int __init workingset_init(void) goto err_list_lru; return 0; err_list_lru: diff --git a/debian/patches/features/all/rt/mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch b/debian/patches/features/all/rt/mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch index 9526743ef..c550fed16 100644 --- a/debian/patches/features/all/rt/mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch +++ b/debian/patches/features/all/rt/mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch @@ -1,7 +1,7 @@ From: Mike Galbraith Date: Tue, 22 Mar 2016 11:16:09 +0100 Subject: [PATCH] mm/zsmalloc: copy with get_cpu_var() and locking -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz get_cpu_var() disables preemption and triggers a might_sleep() splat later. This is replaced with get_locked_var(). @@ -50,7 +50,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Object location (, ) is encoded as * as single (unsigned long) handle value. -@@ -323,7 +337,7 @@ static void SetZsPageMovable(struct zs_p +@@ -320,7 +334,7 @@ static void SetZsPageMovable(struct zs_p static int create_cache(struct zs_pool *pool) { @@ -59,7 +59,7 @@ Signed-off-by: Sebastian Andrzej Siewior 0, 0, NULL); if (!pool->handle_cachep) return 1; -@@ -347,10 +361,27 @@ static void destroy_cache(struct zs_pool +@@ -344,10 +358,27 @@ static void destroy_cache(struct zs_pool static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp) { @@ -89,7 +89,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void cache_free_handle(struct zs_pool *pool, unsigned long handle) { kmem_cache_free(pool->handle_cachep, (void *)handle); -@@ -369,12 +400,18 @@ static void cache_free_zspage(struct zs_ +@@ -366,12 +397,18 @@ static void cache_free_zspage(struct zs_ static void record_obj(unsigned long handle, unsigned long obj) { @@ -108,7 +108,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /* zpool driver */ -@@ -463,6 +500,7 @@ MODULE_ALIAS("zpool-zsmalloc"); +@@ -460,6 +497,7 @@ MODULE_ALIAS("zpool-zsmalloc"); /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ static DEFINE_PER_CPU(struct mapping_area, zs_map_area); @@ -177,7 +177,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static void reset_page(struct page *page) -@@ -1376,7 +1444,7 @@ void *zs_map_object(struct zs_pool *pool +@@ -1365,7 +1433,7 @@ void *zs_map_object(struct zs_pool *pool class = pool->size_class[class_idx]; off = (class->size * obj_idx) & ~PAGE_MASK; @@ -186,7 +186,7 @@ Signed-off-by: Sebastian Andrzej Siewior area->vm_mm = mm; if (off + class->size <= PAGE_SIZE) { /* this object is contained entirely within a page */ -@@ -1430,7 +1498,7 @@ void zs_unmap_object(struct zs_pool *poo +@@ -1419,7 +1487,7 @@ void zs_unmap_object(struct zs_pool *poo __zs_unmap_object(area, pages, off, class->size); } diff --git a/debian/patches/features/all/rt/mmci-remove-bogus-irq-save.patch b/debian/patches/features/all/rt/mmci-remove-bogus-irq-save.patch index c003691b2..c8bf48b89 100644 --- a/debian/patches/features/all/rt/mmci-remove-bogus-irq-save.patch +++ b/debian/patches/features/all/rt/mmci-remove-bogus-irq-save.patch @@ -1,7 +1,7 @@ Subject: mmci: Remove bogus local_irq_save() From: Thomas Gleixner Date: Wed, 09 Jan 2013 12:11:12 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz On !RT interrupt runs with interrupts disabled. On RT it's in a thread, so no need to disable interrupts at all. @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c -@@ -1204,15 +1204,12 @@ static irqreturn_t mmci_pio_irq(int irq, +@@ -1200,15 +1200,12 @@ static irqreturn_t mmci_pio_irq(int irq, struct sg_mapping_iter *sg_miter = &host->sg_miter; struct variant_data *variant = host->variant; void __iomem *base = host->base; @@ -29,7 +29,7 @@ Signed-off-by: Thomas Gleixner do { unsigned int remain, len; char *buffer; -@@ -1252,8 +1249,6 @@ static irqreturn_t mmci_pio_irq(int irq, +@@ -1248,8 +1245,6 @@ static irqreturn_t mmci_pio_irq(int irq, sg_miter_stop(sg_miter); diff --git a/debian/patches/features/all/rt/move_sched_delayed_work_to_helper.patch b/debian/patches/features/all/rt/move_sched_delayed_work_to_helper.patch index b0eca56f9..68c478f42 100644 --- a/debian/patches/features/all/rt/move_sched_delayed_work_to_helper.patch +++ b/debian/patches/features/all/rt/move_sched_delayed_work_to_helper.patch @@ -1,7 +1,7 @@ Date: Wed, 26 Jun 2013 15:28:11 -0400 From: Steven Rostedt Subject: rt,ntp: Move call to schedule_delayed_work() to helper thread -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The ntp code for notify_cmos_timer() is called from a hard interrupt context. schedule_delayed_work() under PREEMPT_RT_FULL calls spinlocks @@ -27,7 +27,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c -@@ -17,6 +17,7 @@ +@@ -18,6 +18,7 @@ #include #include #include @@ -35,7 +35,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include "ntp_internal.h" #include "timekeeping_internal.h" -@@ -568,10 +569,35 @@ static void sync_cmos_clock(struct work_ +@@ -569,10 +570,35 @@ static void sync_cmos_clock(struct work_ &sync_cmos_work, timespec64_to_jiffies(&next)); } diff --git a/debian/patches/features/all/rt/mutex-no-spin-on-rt.patch b/debian/patches/features/all/rt/mutex-no-spin-on-rt.patch index bbec191af..14087b74a 100644 --- a/debian/patches/features/all/rt/mutex-no-spin-on-rt.patch +++ b/debian/patches/features/all/rt/mutex-no-spin-on-rt.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Sun, 17 Jul 2011 21:51:45 +0200 Subject: locking: Disable spin on owner for RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Drop spin on owner for mutex / rwsem. We are most likely not using it but… diff --git a/debian/patches/features/all/rt/net-Have-__napi_schedule_irqoff-disable-interrupts-o.patch b/debian/patches/features/all/rt/net-Have-__napi_schedule_irqoff-disable-interrupts-o.patch index 840ef206b..54f1d6c15 100644 --- a/debian/patches/features/all/rt/net-Have-__napi_schedule_irqoff-disable-interrupts-o.patch +++ b/debian/patches/features/all/rt/net-Have-__napi_schedule_irqoff-disable-interrupts-o.patch @@ -2,7 +2,7 @@ From: Steven Rostedt Date: Tue, 6 Dec 2016 17:50:30 -0500 Subject: [PATCH] net: Have __napi_schedule_irqoff() disable interrupts on RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz A customer hit a crash where the napi sd->poll_list became corrupted. The customer had the bnx2x driver, which does a @@ -51,7 +51,7 @@ Signed-off-by: Sebastian Andrzej Siewior { --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -4962,6 +4962,7 @@ bool napi_schedule_prep(struct napi_stru +@@ -5237,6 +5237,7 @@ bool napi_schedule_prep(struct napi_stru } EXPORT_SYMBOL(napi_schedule_prep); @@ -59,7 +59,7 @@ Signed-off-by: Sebastian Andrzej Siewior /** * __napi_schedule_irqoff - schedule for receive * @n: entry to schedule -@@ -4973,6 +4974,7 @@ void __napi_schedule_irqoff(struct napi_ +@@ -5248,6 +5249,7 @@ void __napi_schedule_irqoff(struct napi_ ____napi_schedule(this_cpu_ptr(&softnet_data), n); } EXPORT_SYMBOL(__napi_schedule_irqoff); diff --git a/debian/patches/features/all/rt/net-Qdisc-use-a-seqlock-instead-seqcount.patch b/debian/patches/features/all/rt/net-Qdisc-use-a-seqlock-instead-seqcount.patch index a4d3d60a5..12bab8be0 100644 --- a/debian/patches/features/all/rt/net-Qdisc-use-a-seqlock-instead-seqcount.patch +++ b/debian/patches/features/all/rt/net-Qdisc-use-a-seqlock-instead-seqcount.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 14 Sep 2016 17:36:35 +0200 Subject: [PATCH] net/Qdisc: use a seqlock instead seqcount -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The seqcount disables preemption on -RT while it is held which can't remove. Also we don't want the reader to spin for ages if the writer is @@ -23,7 +23,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h -@@ -481,6 +481,15 @@ static inline void write_seqlock(seqlock +@@ -482,6 +482,15 @@ static inline void write_seqlock(seqlock __raw_write_seqcount_begin(&sl->seqcount); } @@ -41,7 +41,7 @@ Signed-off-by: Sebastian Andrzej Siewior __raw_write_seqcount_end(&sl->seqcount); --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h -@@ -5,6 +5,7 @@ +@@ -6,6 +6,7 @@ #include #include #include @@ -49,7 +49,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct gnet_stats_basic_cpu { struct gnet_stats_basic_packed bstats; -@@ -35,11 +36,11 @@ int gnet_stats_start_copy_compat(struct +@@ -36,11 +37,11 @@ int gnet_stats_start_copy_compat(struct spinlock_t *lock, struct gnet_dump *d, int padattr); @@ -63,7 +63,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b); -@@ -56,13 +57,13 @@ int gen_new_estimator(struct gnet_stats_ +@@ -57,13 +58,13 @@ int gen_new_estimator(struct gnet_stats_ struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *stats_lock, @@ -100,14 +100,14 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -10,6 +10,7 @@ + #include #include - #include - #include + #include +#include - - struct Qdisc_ops; - struct qdisc_walker; -@@ -86,7 +87,7 @@ struct Qdisc { + #include + #include + #include +@@ -90,7 +91,7 @@ struct Qdisc { struct sk_buff *gso_skb ____cacheline_aligned_in_smp; struct qdisc_skb_head q; struct gnet_stats_basic_packed bstats; @@ -116,9 +116,9 @@ Signed-off-by: Sebastian Andrzej Siewior struct gnet_stats_queue qstats; unsigned long state; struct Qdisc *next_sched; -@@ -98,13 +99,22 @@ struct Qdisc { - spinlock_t busylock ____cacheline_aligned_in_smp; - }; +@@ -109,13 +110,22 @@ static inline void qdisc_refcount_inc(st + refcount_inc(&qdisc->refcnt); + } -static inline bool qdisc_is_running(const struct Qdisc *qdisc) +static inline bool qdisc_is_running(struct Qdisc *qdisc) @@ -140,7 +140,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (qdisc_is_running(qdisc)) return false; /* Variant of write_seqcount_begin() telling lockdep a trylock -@@ -113,11 +123,16 @@ static inline bool qdisc_run_begin(struc +@@ -124,11 +134,16 @@ static inline bool qdisc_run_begin(struc raw_write_seqcount_begin(&qdisc->running); seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_); return true; @@ -157,7 +157,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) -@@ -308,7 +323,7 @@ static inline spinlock_t *qdisc_root_sle +@@ -338,7 +353,7 @@ static inline spinlock_t *qdisc_root_sle return qdisc_lock(root); } @@ -230,7 +230,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct gnet_stats_basic_packed *b) --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c -@@ -980,7 +980,7 @@ static struct Qdisc *qdisc_create(struct +@@ -1081,7 +1081,7 @@ static struct Qdisc *qdisc_create(struct rcu_assign_pointer(sch->stab, stab); } if (tca[TCA_RATE]) { @@ -241,7 +241,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (sch->flags & TCQ_F_MQROOT) --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c -@@ -425,7 +425,11 @@ struct Qdisc noop_qdisc = { +@@ -429,7 +429,11 @@ struct Qdisc noop_qdisc = { .ops = &noop_qdisc_ops, .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, @@ -253,7 +253,7 @@ Signed-off-by: Sebastian Andrzej Siewior .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), }; EXPORT_SYMBOL(noop_qdisc); -@@ -624,9 +628,17 @@ struct Qdisc *qdisc_alloc(struct netdev_ +@@ -628,9 +632,17 @@ struct Qdisc *qdisc_alloc(struct netdev_ lockdep_set_class(&sch->busylock, dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); diff --git a/debian/patches/features/all/rt/net-add-a-lock-around-icmp_sk.patch b/debian/patches/features/all/rt/net-add-a-lock-around-icmp_sk.patch index e8d957723..097105ee4 100644 --- a/debian/patches/features/all/rt/net-add-a-lock-around-icmp_sk.patch +++ b/debian/patches/features/all/rt/net-add-a-lock-around-icmp_sk.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 31 Aug 2016 17:54:09 +0200 Subject: [PATCH] net: add a lock around icmp_sk() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz It looks like the this_cpu_ptr() access in icmp_sk() is protected with local_bh_disable(). To avoid missing serialization in -RT I am adding @@ -48,7 +48,7 @@ Signed-off-by: Sebastian Andrzej Siewior local_bh_enable(); } -@@ -673,6 +678,7 @@ void icmp_send(struct sk_buff *skb_in, i +@@ -656,6 +661,7 @@ void icmp_send(struct sk_buff *skb_in, i /* Needed by both icmp_global_allow and icmp_xmit_lock */ local_bh_disable(); @@ -56,7 +56,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* Check global sysctl_icmp_msgs_per_sec ratelimit, unless * incoming dev is loopback. If outgoing dev change to not be -@@ -761,6 +767,7 @@ void icmp_send(struct sk_buff *skb_in, i +@@ -744,6 +750,7 @@ void icmp_send(struct sk_buff *skb_in, i out_unlock: icmp_xmit_unlock(sk); out_bh_enable: diff --git a/debian/patches/features/all/rt/net-add-back-the-missing-serialization-in-ip_send_un.patch b/debian/patches/features/all/rt/net-add-back-the-missing-serialization-in-ip_send_un.patch index 8d4305a36..6d63e0c42 100644 --- a/debian/patches/features/all/rt/net-add-back-the-missing-serialization-in-ip_send_un.patch +++ b/debian/patches/features/all/rt/net-add-back-the-missing-serialization-in-ip_send_un.patch @@ -5,7 +5,7 @@ Subject: [PATCH] net: add back the missing serialization in MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Some time ago Sami Pietikäinen reported a crash on -RT in ip_send_unicast_reply() which was later fixed by Nicholas Mc Guire @@ -51,7 +51,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include #include -@@ -583,6 +584,7 @@ void tcp_v4_send_check(struct sock *sk, +@@ -580,6 +581,7 @@ void tcp_v4_send_check(struct sock *sk, } EXPORT_SYMBOL(tcp_v4_send_check); @@ -59,7 +59,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * This routine will send an RST to the other tcp. * -@@ -711,6 +713,7 @@ static void tcp_v4_send_reset(const stru +@@ -709,6 +711,7 @@ static void tcp_v4_send_reset(const stru arg.tos = ip_hdr(skb)->tos; arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); @@ -67,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, -@@ -720,6 +723,7 @@ static void tcp_v4_send_reset(const stru +@@ -718,6 +721,7 @@ static void tcp_v4_send_reset(const stru __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); local_bh_enable(); @@ -75,7 +75,7 @@ Signed-off-by: Sebastian Andrzej Siewior #ifdef CONFIG_TCP_MD5SIG out: -@@ -797,6 +801,7 @@ static void tcp_v4_send_ack(const struct +@@ -795,6 +799,7 @@ static void tcp_v4_send_ack(const struct arg.bound_dev_if = oif; arg.tos = tos; arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); @@ -83,7 +83,7 @@ Signed-off-by: Sebastian Andrzej Siewior local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, -@@ -805,6 +810,7 @@ static void tcp_v4_send_ack(const struct +@@ -803,6 +808,7 @@ static void tcp_v4_send_ack(const struct __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); local_bh_enable(); diff --git a/debian/patches/features/all/rt/net-another-local-irq-disable-alloc-atomic-headache.patch b/debian/patches/features/all/rt/net-another-local-irq-disable-alloc-atomic-headache.patch index f6a43e903..494d19001 100644 --- a/debian/patches/features/all/rt/net-another-local-irq-disable-alloc-atomic-headache.patch +++ b/debian/patches/features/all/rt/net-another-local-irq-disable-alloc-atomic-headache.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Wed, 26 Sep 2012 16:21:08 +0200 Subject: net: Another local_irq_disable/kmalloc headache -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Replace it by a local lock. Though that's pretty inefficient :( @@ -20,7 +20,7 @@ Signed-off-by: Thomas Gleixner #include #include -@@ -359,6 +360,7 @@ struct napi_alloc_cache { +@@ -334,6 +335,7 @@ struct napi_alloc_cache { static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); @@ -28,7 +28,7 @@ Signed-off-by: Thomas Gleixner static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { -@@ -366,10 +368,10 @@ static void *__netdev_alloc_frag(unsigne +@@ -341,10 +343,10 @@ static void *__netdev_alloc_frag(unsigne unsigned long flags; void *data; @@ -41,7 +41,7 @@ Signed-off-by: Thomas Gleixner return data; } -@@ -437,13 +439,13 @@ struct sk_buff *__netdev_alloc_skb(struc +@@ -412,13 +414,13 @@ struct sk_buff *__netdev_alloc_skb(struc if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; diff --git a/debian/patches/features/all/rt/net-core-cpuhotplug-drain-input_pkt_queue-lockless.patch b/debian/patches/features/all/rt/net-core-cpuhotplug-drain-input_pkt_queue-lockless.patch index ad4bac15d..2aed96f62 100644 --- a/debian/patches/features/all/rt/net-core-cpuhotplug-drain-input_pkt_queue-lockless.patch +++ b/debian/patches/features/all/rt/net-core-cpuhotplug-drain-input_pkt_queue-lockless.patch @@ -1,7 +1,7 @@ Subject: net/core/cpuhotplug: Drain input_pkt_queue lockless From: Grygorii Strashko Date: Fri, 9 Oct 2015 09:25:49 -0500 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz I can constantly see below error report with 4.1 RT-kernel on TI ARM dra7-evm if I'm trying to unplug cpu1: @@ -36,7 +36,7 @@ Cc: stable-rt@vger.kernel.org --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -8098,7 +8098,7 @@ static int dev_cpu_dead(unsigned int old +@@ -8422,7 +8422,7 @@ static int dev_cpu_dead(unsigned int old netif_rx_ni(skb); input_queue_head_incr(oldsd); } diff --git a/debian/patches/features/all/rt/net-core-protect-users-of-napi_alloc_cache-against-r.patch b/debian/patches/features/all/rt/net-core-protect-users-of-napi_alloc_cache-against-r.patch index c9ed0d7d0..4a845dd29 100644 --- a/debian/patches/features/all/rt/net-core-protect-users-of-napi_alloc_cache-against-r.patch +++ b/debian/patches/features/all/rt/net-core-protect-users-of-napi_alloc_cache-against-r.patch @@ -2,7 +2,7 @@ From: Sebastian Andrzej Siewior Date: Fri, 15 Jan 2016 16:33:34 +0100 Subject: net/core: protect users of napi_alloc_cache against reentrance -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz On -RT the code running in BH can not be moved to another CPU so CPU local variable remain local. However the code can be preempted @@ -18,7 +18,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/net/core/skbuff.c +++ b/net/core/skbuff.c -@@ -361,6 +361,7 @@ struct napi_alloc_cache { +@@ -336,6 +336,7 @@ struct napi_alloc_cache { static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock); @@ -26,7 +26,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { -@@ -390,9 +391,13 @@ EXPORT_SYMBOL(netdev_alloc_frag); +@@ -365,9 +366,13 @@ EXPORT_SYMBOL(netdev_alloc_frag); static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { @@ -42,7 +42,7 @@ Signed-off-by: Sebastian Andrzej Siewior } void *napi_alloc_frag(unsigned int fragsz) -@@ -486,9 +491,10 @@ EXPORT_SYMBOL(__netdev_alloc_skb); +@@ -461,9 +466,10 @@ EXPORT_SYMBOL(__netdev_alloc_skb); struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, gfp_t gfp_mask) { @@ -54,7 +54,7 @@ Signed-off-by: Sebastian Andrzej Siewior len += NET_SKB_PAD + NET_IP_ALIGN; -@@ -506,7 +512,10 @@ struct sk_buff *__napi_alloc_skb(struct +@@ -481,7 +487,10 @@ struct sk_buff *__napi_alloc_skb(struct if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; @@ -65,7 +65,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (unlikely(!data)) return NULL; -@@ -517,7 +526,7 @@ struct sk_buff *__napi_alloc_skb(struct +@@ -492,7 +501,7 @@ struct sk_buff *__napi_alloc_skb(struct } /* use OR instead of assignment to avoid clearing of bits in mask */ @@ -74,7 +74,7 @@ Signed-off-by: Sebastian Andrzej Siewior skb->pfmemalloc = 1; skb->head_frag = 1; -@@ -761,23 +770,26 @@ EXPORT_SYMBOL(consume_skb); +@@ -724,23 +733,26 @@ void __consume_stateless_skb(struct sk_b void __kfree_skb_flush(void) { @@ -103,7 +103,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* record skb to CPU local list */ nc->skb_cache[nc->skb_count++] = skb; -@@ -792,6 +804,7 @@ static inline void _kfree_skb_defer(stru +@@ -755,6 +767,7 @@ static inline void _kfree_skb_defer(stru nc->skb_cache); nc->skb_count = 0; } diff --git a/debian/patches/features/all/rt/net-core-remove-explicit-do_softirq-from-busy_poll_s.patch b/debian/patches/features/all/rt/net-core-remove-explicit-do_softirq-from-busy_poll_s.patch deleted file mode 100644 index e442dfbf0..000000000 --- a/debian/patches/features/all/rt/net-core-remove-explicit-do_softirq-from-busy_poll_s.patch +++ /dev/null @@ -1,28 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Mon, 22 May 2017 21:08:08 +0200 -Subject: net/core: remove explicit do_softirq() from busy_poll_stop() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Since commit 217f69743681 ("net: busy-poll: allow preemption in -sk_busy_loop()") there is an explicit do_softirq() invocation after -local_bh_enable() has been invoked. -I don't understand why we need this because local_bh_enable() will -invoke do_softirq() once the softirq counter reached zero and we have -softirq-related work pending. - -Signed-off-by: Sebastian Andrzej Siewior ---- - net/core/dev.c | 2 -- - 1 file changed, 2 deletions(-) - ---- a/net/core/dev.c -+++ b/net/core/dev.c -@@ -5060,8 +5060,6 @@ static void busy_poll_stop(struct napi_s - if (rc == BUSY_POLL_BUDGET) - __napi_schedule(napi); - local_bh_enable(); -- if (local_softirq_pending()) -- do_softirq(); - } - - bool sk_busy_loop(struct sock *sk, int nonblock) diff --git a/debian/patches/features/all/rt/net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch b/debian/patches/features/all/rt/net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch index 8f8785ce6..00474dd77 100644 --- a/debian/patches/features/all/rt/net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch +++ b/debian/patches/features/all/rt/net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 30 Mar 2016 13:36:29 +0200 Subject: [PATCH] net: dev: always take qdisc's busylock in __dev_xmit_skb() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The root-lock is dropped before dev_hard_start_xmit() is invoked and after setting the __QDISC___STATE_RUNNING bit. If this task is now pushed away @@ -21,7 +21,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -3078,7 +3078,11 @@ static inline int __dev_xmit_skb(struct +@@ -3158,7 +3158,11 @@ static inline int __dev_xmit_skb(struct * This permits qdisc->running owner to get the lock more * often and dequeue packets faster. */ diff --git a/debian/patches/features/all/rt/net-fix-iptable-xt-write-recseq-begin-rt-fallout.patch b/debian/patches/features/all/rt/net-fix-iptable-xt-write-recseq-begin-rt-fallout.patch index 2409e8e00..76b524092 100644 --- a/debian/patches/features/all/rt/net-fix-iptable-xt-write-recseq-begin-rt-fallout.patch +++ b/debian/patches/features/all/rt/net-fix-iptable-xt-write-recseq-begin-rt-fallout.patch @@ -1,7 +1,7 @@ Subject: net: netfilter: Serialize xt_write_recseq sections on RT From: Thomas Gleixner Date: Sun, 28 Oct 2012 11:18:08 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The netfilter code relies only on the implicit semantics of local_bh_disable() for serializing wt_write_recseq sections. RT breaks @@ -17,7 +17,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h -@@ -5,6 +5,7 @@ +@@ -6,6 +6,7 @@ #include #include #include @@ -25,7 +25,7 @@ Signed-off-by: Thomas Gleixner #include /* Test a struct->invflags and a boolean for inequality */ -@@ -337,6 +338,8 @@ void xt_free_table_info(struct xt_table_ +@@ -338,6 +339,8 @@ void xt_free_table_info(struct xt_table_ */ DECLARE_PER_CPU(seqcount_t, xt_recseq); @@ -34,7 +34,7 @@ Signed-off-by: Thomas Gleixner /* xt_tee_enabled - true if x_tables needs to handle reentrancy * * Enabled if current ip(6)tables ruleset has at least one -j TEE rule. -@@ -357,6 +360,9 @@ static inline unsigned int xt_write_recs +@@ -358,6 +361,9 @@ static inline unsigned int xt_write_recs { unsigned int addend; @@ -44,7 +44,7 @@ Signed-off-by: Thomas Gleixner /* * Low order bit of sequence is set if we already * called xt_write_recseq_begin(). -@@ -387,6 +393,7 @@ static inline void xt_write_recseq_end(u +@@ -388,6 +394,7 @@ static inline void xt_write_recseq_end(u /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */ smp_wmb(); __this_cpu_add(xt_recseq.sequence, addend); @@ -54,14 +54,15 @@ Signed-off-by: Thomas Gleixner /* --- a/net/netfilter/core.c +++ b/net/netfilter/core.c -@@ -22,12 +22,18 @@ +@@ -21,6 +21,7 @@ + #include #include #include - #include +#include + #include #include #include - #include +@@ -28,6 +29,11 @@ #include "nf_internals.h" diff --git a/debian/patches/features/all/rt/net-make-devnet_rename_seq-a-mutex.patch b/debian/patches/features/all/rt/net-make-devnet_rename_seq-a-mutex.patch index e7a40fe1b..42ea3e717 100644 --- a/debian/patches/features/all/rt/net-make-devnet_rename_seq-a-mutex.patch +++ b/debian/patches/features/all/rt/net-make-devnet_rename_seq-a-mutex.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 20 Mar 2013 18:06:20 +0100 Subject: net: Add a mutex around devnet_rename_seq -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz On RT write_seqcount_begin() disables preemption and device_rename() allocates memory with GFP_KERNEL and grabs later the sysfs_mutex @@ -22,7 +22,7 @@ Signed-off-by: Thomas Gleixner --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -189,6 +189,7 @@ static unsigned int napi_gen_id = NR_CPU +@@ -195,6 +195,7 @@ static unsigned int napi_gen_id = NR_CPU static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8); static seqcount_t devnet_rename_seq; @@ -30,7 +30,7 @@ Signed-off-by: Thomas Gleixner static inline void dev_base_seq_inc(struct net *net) { -@@ -889,7 +890,8 @@ int netdev_get_name(struct net *net, cha +@@ -920,7 +921,8 @@ int netdev_get_name(struct net *net, cha strcpy(name, dev->name); rcu_read_unlock(); if (read_seqcount_retry(&devnet_rename_seq, seq)) { @@ -40,7 +40,7 @@ Signed-off-by: Thomas Gleixner goto retry; } -@@ -1158,20 +1160,17 @@ int dev_change_name(struct net_device *d +@@ -1189,20 +1191,17 @@ int dev_change_name(struct net_device *d if (dev->flags & IFF_UP) return -EBUSY; @@ -67,7 +67,7 @@ Signed-off-by: Thomas Gleixner if (oldname[0] && !strchr(oldname, '%')) netdev_info(dev, "renamed from %s\n", oldname); -@@ -1184,11 +1183,12 @@ int dev_change_name(struct net_device *d +@@ -1215,11 +1214,12 @@ int dev_change_name(struct net_device *d if (ret) { memcpy(dev->name, oldname, IFNAMSIZ); dev->name_assign_type = old_assign_type; @@ -83,7 +83,7 @@ Signed-off-by: Thomas Gleixner netdev_adjacent_rename_links(dev, oldname); -@@ -1209,7 +1209,8 @@ int dev_change_name(struct net_device *d +@@ -1240,7 +1240,8 @@ int dev_change_name(struct net_device *d /* err >= 0 after dev_alloc_name() or stores the first errno */ if (err >= 0) { err = ret; @@ -93,7 +93,7 @@ Signed-off-by: Thomas Gleixner memcpy(dev->name, oldname, IFNAMSIZ); memcpy(oldname, newname, IFNAMSIZ); dev->name_assign_type = old_assign_type; -@@ -1222,6 +1223,11 @@ int dev_change_name(struct net_device *d +@@ -1253,6 +1254,11 @@ int dev_change_name(struct net_device *d } return err; diff --git a/debian/patches/features/all/rt/net-move-xmit_recursion-to-per-task-variable-on-RT.patch b/debian/patches/features/all/rt/net-move-xmit_recursion-to-per-task-variable-on-RT.patch index c36e11d45..39fe0ae93 100644 --- a/debian/patches/features/all/rt/net-move-xmit_recursion-to-per-task-variable-on-RT.patch +++ b/debian/patches/features/all/rt/net-move-xmit_recursion-to-per-task-variable-on-RT.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 13 Jan 2016 15:55:02 +0100 Subject: net: move xmit_recursion to per-task variable on -RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz A softirq on -RT can be preempted. That means one task is in __dev_queue_xmit(), gets preempted and another task may enter @@ -24,7 +24,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h -@@ -2428,14 +2428,53 @@ void netdev_freemem(struct net_device *d +@@ -2433,14 +2433,53 @@ void netdev_freemem(struct net_device *d void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); @@ -81,7 +81,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1061,6 +1061,9 @@ struct task_struct { +@@ -1107,6 +1107,9 @@ struct task_struct { #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; #endif @@ -93,7 +93,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct task_struct *oom_reaper_list; --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -3141,8 +3141,10 @@ static void skb_update_prio(struct sk_bu +@@ -3221,8 +3221,10 @@ static void skb_update_prio(struct sk_bu #define skb_update_prio(skb) #endif @@ -104,7 +104,7 @@ Signed-off-by: Sebastian Andrzej Siewior /** * dev_loopback_xmit - loop back @skb -@@ -3382,8 +3384,7 @@ static int __dev_queue_xmit(struct sk_bu +@@ -3463,8 +3465,7 @@ static int __dev_queue_xmit(struct sk_bu int cpu = smp_processor_id(); /* ok because BHs are off */ if (txq->xmit_lock_owner != cpu) { @@ -114,7 +114,7 @@ Signed-off-by: Sebastian Andrzej Siewior goto recursion_alert; skb = validate_xmit_skb(skb, dev); -@@ -3393,9 +3394,9 @@ static int __dev_queue_xmit(struct sk_bu +@@ -3474,9 +3475,9 @@ static int __dev_queue_xmit(struct sk_bu HARD_TX_LOCK(dev, txq, cpu); if (!netif_xmit_stopped(txq)) { @@ -128,7 +128,7 @@ Signed-off-by: Sebastian Andrzej Siewior goto out; --- a/net/core/filter.c +++ b/net/core/filter.c -@@ -1652,7 +1652,7 @@ static inline int __bpf_tx_skb(struct ne +@@ -1694,7 +1694,7 @@ static inline int __bpf_tx_skb(struct ne { int ret; @@ -137,7 +137,7 @@ Signed-off-by: Sebastian Andrzej Siewior net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n"); kfree_skb(skb); return -ENETDOWN; -@@ -1660,9 +1660,9 @@ static inline int __bpf_tx_skb(struct ne +@@ -1702,9 +1702,9 @@ static inline int __bpf_tx_skb(struct ne skb->dev = dev; diff --git a/debian/patches/features/all/rt/net-prevent-abba-deadlock.patch b/debian/patches/features/all/rt/net-prevent-abba-deadlock.patch index b1cd19f6d..e260831a2 100644 --- a/debian/patches/features/all/rt/net-prevent-abba-deadlock.patch +++ b/debian/patches/features/all/rt/net-prevent-abba-deadlock.patch @@ -1,7 +1,7 @@ Subject: net-flip-lock-dep-thingy.patch From: Thomas Gleixner Date: Tue, 28 Jun 2011 10:59:58 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz ======================================================= [ INFO: possible circular locking dependency detected ] @@ -96,7 +96,7 @@ Signed-off-by: Thomas Gleixner --- a/net/core/sock.c +++ b/net/core/sock.c -@@ -2541,12 +2541,11 @@ void lock_sock_nested(struct sock *sk, i +@@ -2762,12 +2762,11 @@ void lock_sock_nested(struct sock *sk, i if (sk->sk_lock.owned) __lock_sock(sk); sk->sk_lock.owned = 1; diff --git a/debian/patches/features/all/rt/net-provide-a-way-to-delegate-processing-a-softirq-t.patch b/debian/patches/features/all/rt/net-provide-a-way-to-delegate-processing-a-softirq-t.patch index 557c417f1..d7681adaf 100644 --- a/debian/patches/features/all/rt/net-provide-a-way-to-delegate-processing-a-softirq-t.patch +++ b/debian/patches/features/all/rt/net-provide-a-way-to-delegate-processing-a-softirq-t.patch @@ -2,7 +2,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 20 Jan 2016 15:39:05 +0100 Subject: net: provide a way to delegate processing a softirq to ksoftirqd -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz If the NET_RX uses up all of his budget it moves the following NAPI invocations into the `ksoftirqd`. On -RT it does not do so. Instead it @@ -21,7 +21,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h -@@ -508,6 +508,14 @@ extern void thread_do_softirq(void); +@@ -519,6 +519,14 @@ extern void thread_do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); extern void __raise_softirq_irqoff(unsigned int nr); @@ -68,7 +68,7 @@ Signed-off-by: Sebastian Andrzej Siewior void raise_softirq_irqoff(unsigned int nr) --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -5368,7 +5368,7 @@ static __latent_entropy void net_rx_acti +@@ -5642,7 +5642,7 @@ static __latent_entropy void net_rx_acti list_splice_tail(&repoll, &list); list_splice(&list, &sd->poll_list); if (!list_empty(&sd->poll_list)) diff --git a/debian/patches/features/all/rt/net-sched-dev_deactivate_many-use-msleep-1-instead-o.patch b/debian/patches/features/all/rt/net-sched-dev_deactivate_many-use-msleep-1-instead-o.patch index 07f0b9522..249da649c 100644 --- a/debian/patches/features/all/rt/net-sched-dev_deactivate_many-use-msleep-1-instead-o.patch +++ b/debian/patches/features/all/rt/net-sched-dev_deactivate_many-use-msleep-1-instead-o.patch @@ -1,7 +1,7 @@ From: Marc Kleine-Budde Date: Wed, 5 Mar 2014 00:49:47 +0100 Subject: net: sched: Use msleep() instead of yield() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz On PREEMPT_RT enabled systems the interrupt handler run as threads at prio 50 (by default). If a high priority userspace process tries to shut down a busy @@ -47,7 +47,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c -@@ -925,7 +925,7 @@ void dev_deactivate_many(struct list_hea +@@ -930,7 +930,7 @@ void dev_deactivate_many(struct list_hea /* Wait for outstanding qdisc_run calls. */ list_for_each_entry(dev, head, close_list) while (some_qdisc_is_busy(dev)) diff --git a/debian/patches/features/all/rt/net-take-the-tcp_sk_lock-lock-with-BH-disabled.patch b/debian/patches/features/all/rt/net-take-the-tcp_sk_lock-lock-with-BH-disabled.patch new file mode 100644 index 000000000..78792c8cf --- /dev/null +++ b/debian/patches/features/all/rt/net-take-the-tcp_sk_lock-lock-with-BH-disabled.patch @@ -0,0 +1,68 @@ +From: Sebastian Andrzej Siewior +Date: Mon, 21 Aug 2017 15:09:13 +0200 +Subject: [PATCH] net: take the tcp_sk_lock lock with BH disabled +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Lockdep may complain about an unsafe locking scenario: +| CPU0 CPU1 +| ---- ---- +| lock((tcp_sk_lock).lock); +| lock(&per_cpu(local_softirq_locks[i], __cpu).lock); +| lock((tcp_sk_lock).lock); +| lock(&per_cpu(local_softirq_locks[i], __cpu).lock); + +in the call paths: + do_current_softirqs -> tcp_v4_send_ack() +vs + tcp_v4_send_reset -> do_current_softirqs(). + +This should not happen since local_softirq_locks is per CPU. Reversing +the order makes lockdep happy. + +Reported-by: Jacek Konieczny +Signed-off-by: Sebastian Andrzej Siewior +--- + net/ipv4/tcp_ipv4.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -711,8 +711,8 @@ static void tcp_v4_send_reset(const stru + + arg.tos = ip_hdr(skb)->tos; + arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); +- local_lock(tcp_sk_lock); + local_bh_disable(); ++ local_lock(tcp_sk_lock); + ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), + skb, &TCP_SKB_CB(skb)->header.h4.opt, + ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, +@@ -720,8 +720,8 @@ static void tcp_v4_send_reset(const stru + + __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); + __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); +- local_bh_enable(); + local_unlock(tcp_sk_lock); ++ local_bh_enable(); + + #ifdef CONFIG_TCP_MD5SIG + out: +@@ -799,16 +799,16 @@ static void tcp_v4_send_ack(const struct + arg.bound_dev_if = oif; + arg.tos = tos; + arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); +- local_lock(tcp_sk_lock); + local_bh_disable(); ++ local_lock(tcp_sk_lock); + ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), + skb, &TCP_SKB_CB(skb)->header.h4.opt, + ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, + &arg, arg.iov[0].iov_len); + + __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); +- local_bh_enable(); + local_unlock(tcp_sk_lock); ++ local_bh_enable(); + } + + static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) diff --git a/debian/patches/features/all/rt/net-use-cpu-chill.patch b/debian/patches/features/all/rt/net-use-cpu-chill.patch index 6f29e3cf6..72223e8d0 100644 --- a/debian/patches/features/all/rt/net-use-cpu-chill.patch +++ b/debian/patches/features/all/rt/net-use-cpu-chill.patch @@ -1,7 +1,7 @@ Subject: net: Use cpu_chill() instead of cpu_relax() From: Thomas Gleixner Date: Wed, 07 Mar 2012 21:10:04 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Retry loops on RT might loop forever when the modifying side was preempted. Use cpu_chill() instead of cpu_relax() to let the system @@ -24,7 +24,7 @@ Signed-off-by: Thomas Gleixner #include #include #include -@@ -702,7 +703,7 @@ static void prb_retire_rx_blk_timer_expi +@@ -700,7 +701,7 @@ static void prb_retire_rx_blk_timer_expi if (BLOCK_NUM_PKTS(pbd)) { while (atomic_read(&pkc->blk_fill_in_prog)) { /* Waiting for skb_copy_bits to finish... */ @@ -33,7 +33,7 @@ Signed-off-by: Thomas Gleixner } } -@@ -964,7 +965,7 @@ static void prb_retire_current_block(str +@@ -962,7 +963,7 @@ static void prb_retire_current_block(str if (!(status & TP_STATUS_BLK_TMO)) { while (atomic_read(&pkc->blk_fill_in_prog)) { /* Waiting for skb_copy_bits to finish... */ diff --git a/debian/patches/features/all/rt/net-use-trylock-in-icmp_sk.patch b/debian/patches/features/all/rt/net-use-trylock-in-icmp_sk.patch new file mode 100644 index 000000000..9e75a944e --- /dev/null +++ b/debian/patches/features/all/rt/net-use-trylock-in-icmp_sk.patch @@ -0,0 +1,74 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 21 Sep 2017 14:42:04 +0200 +Subject: net: use trylock in icmp_sk +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The locking path can be recursive (same as for sk->sk_lock.slock) and +therefore we need a trylock version for the locallock, too. + +Cc: rt-stable@vger.kernel.org +Reported-by: Jacek Konieczny +Signed-off-by: Sebastian Andrzej Siewior +--- + net/ipv4/icmp.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/net/ipv4/icmp.c ++++ b/net/ipv4/icmp.c +@@ -217,12 +217,16 @@ static inline struct sock *icmp_xmit_loc + { + struct sock *sk; + ++ if (!local_trylock(icmp_sk_lock)) ++ return NULL; ++ + sk = icmp_sk(net); + + if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { + /* This can happen if the output path signals a + * dst_link_failure() for an outgoing ICMP packet. + */ ++ local_unlock(icmp_sk_lock); + return NULL; + } + return sk; +@@ -231,6 +235,7 @@ static inline struct sock *icmp_xmit_loc + static inline void icmp_xmit_unlock(struct sock *sk) + { + spin_unlock(&sk->sk_lock.slock); ++ local_unlock(icmp_sk_lock); + } + + int sysctl_icmp_msgs_per_sec __read_mostly = 1000; +@@ -420,7 +425,6 @@ static void icmp_reply(struct icmp_bxm * + + /* Needed by both icmp_global_allow and icmp_xmit_lock */ + local_bh_disable(); +- local_lock(icmp_sk_lock); + + /* global icmp_msgs_per_sec */ + if (!icmpv4_global_allow(net, type, code)) +@@ -465,7 +469,6 @@ static void icmp_reply(struct icmp_bxm * + out_unlock: + icmp_xmit_unlock(sk); + out_bh_enable: +- local_unlock(icmp_sk_lock); + local_bh_enable(); + } + +@@ -661,7 +664,6 @@ void icmp_send(struct sk_buff *skb_in, i + + /* Needed by both icmp_global_allow and icmp_xmit_lock */ + local_bh_disable(); +- local_lock(icmp_sk_lock); + + /* Check global sysctl_icmp_msgs_per_sec ratelimit, unless + * incoming dev is loopback. If outgoing dev change to not be +@@ -750,7 +752,6 @@ void icmp_send(struct sk_buff *skb_in, i + out_unlock: + icmp_xmit_unlock(sk); + out_bh_enable: +- local_unlock(icmp_sk_lock); + local_bh_enable(); + out:; + } diff --git a/debian/patches/features/all/rt/net-wireless-warn-nort.patch b/debian/patches/features/all/rt/net-wireless-warn-nort.patch index 5eb65ee3a..87dd30c97 100644 --- a/debian/patches/features/all/rt/net-wireless-warn-nort.patch +++ b/debian/patches/features/all/rt/net-wireless-warn-nort.patch @@ -1,7 +1,7 @@ Subject: net/wireless: Use WARN_ON_NORT() From: Thomas Gleixner Date: Thu, 21 Jul 2011 21:05:33 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The softirq counter is meaningless on RT, so the check triggers a false positive. @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c -@@ -4229,7 +4229,7 @@ void ieee80211_rx_napi(struct ieee80211_ +@@ -4250,7 +4250,7 @@ void ieee80211_rx_napi(struct ieee80211_ struct ieee80211_supported_band *sband; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); diff --git a/debian/patches/features/all/rt/net_disable_NET_RX_BUSY_POLL.patch b/debian/patches/features/all/rt/net_disable_NET_RX_BUSY_POLL.patch index 8bb6c9a49..454a11742 100644 --- a/debian/patches/features/all/rt/net_disable_NET_RX_BUSY_POLL.patch +++ b/debian/patches/features/all/rt/net_disable_NET_RX_BUSY_POLL.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Sat, 27 May 2017 19:02:06 +0200 Subject: net/core: disable NET_RX_BUSY_POLL -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz sk_busy_loop() does preempt_disable() followed by a few operations which can take sleeping locks and may get long. @@ -18,7 +18,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/net/Kconfig +++ b/net/Kconfig -@@ -277,7 +277,7 @@ config CGROUP_NET_CLASSID +@@ -272,7 +272,7 @@ config CGROUP_NET_CLASSID config NET_RX_BUSY_POLL bool diff --git a/debian/patches/features/all/rt/oleg-signal-rt-fix.patch b/debian/patches/features/all/rt/oleg-signal-rt-fix.patch index 1f16b272f..732315fe8 100644 --- a/debian/patches/features/all/rt/oleg-signal-rt-fix.patch +++ b/debian/patches/features/all/rt/oleg-signal-rt-fix.patch @@ -1,7 +1,7 @@ From: Oleg Nesterov Date: Tue, 14 Jul 2015 14:26:34 +0200 Subject: signal/x86: Delay calling signals in atomic -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz On x86_64 we must disable preemption before we enable interrupts for stack faults, int3 and debugging, because the current task is using @@ -39,7 +39,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c -@@ -149,6 +149,13 @@ static void exit_to_usermode_loop(struct +@@ -150,6 +150,13 @@ static void exit_to_usermode_loop(struct if (cached_flags & _TIF_NEED_RESCHED) schedule(); @@ -55,7 +55,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h -@@ -27,6 +27,19 @@ typedef struct { +@@ -28,6 +28,19 @@ typedef struct { #define SA_IA32_ABI 0x02000000u #define SA_X32_ABI 0x01000000u @@ -77,7 +77,7 @@ Signed-off-by: Thomas Gleixner #endif --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -760,6 +760,10 @@ struct task_struct { +@@ -795,6 +795,10 @@ struct task_struct { /* Restored if set_restore_sigmask() was used: */ sigset_t saved_sigmask; struct sigpending pending; @@ -90,7 +90,7 @@ Signed-off-by: Thomas Gleixner unsigned int sas_ss_flags; --- a/kernel/signal.c +++ b/kernel/signal.c -@@ -1235,8 +1235,8 @@ int do_send_sig_info(int sig, struct sig +@@ -1236,8 +1236,8 @@ int do_send_sig_info(int sig, struct sig * We don't want to have recursive SIGSEGV's etc, for example, * that is why we also clear SIGNAL_UNKILLABLE. */ @@ -101,7 +101,7 @@ Signed-off-by: Thomas Gleixner { unsigned long int flags; int ret, blocked, ignored; -@@ -1261,6 +1261,39 @@ force_sig_info(int sig, struct siginfo * +@@ -1266,6 +1266,39 @@ force_sig_info(int sig, struct siginfo * return ret; } diff --git a/debian/patches/features/all/rt/panic-disable-random-on-rt.patch b/debian/patches/features/all/rt/panic-disable-random-on-rt.patch index e03c00cf5..e819ca707 100644 --- a/debian/patches/features/all/rt/panic-disable-random-on-rt.patch +++ b/debian/patches/features/all/rt/panic-disable-random-on-rt.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Tue, 14 Jul 2015 14:26:34 +0200 Subject: panic: skip get_random_bytes for RT_FULL in init_oops_id -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Disable on -RT. If this is invoked from irq-context we will have problems to acquire the sleeping lock. @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/panic.c +++ b/kernel/panic.c -@@ -481,9 +481,11 @@ static u64 oops_id; +@@ -482,9 +482,11 @@ static u64 oops_id; static int init_oops_id(void) { diff --git a/debian/patches/features/all/rt/patch-to-introduce-rcu-bh-qs-where-safe-from-softirq.patch b/debian/patches/features/all/rt/patch-to-introduce-rcu-bh-qs-where-safe-from-softirq.patch index a3d54e6f5..5a3164c25 100644 --- a/debian/patches/features/all/rt/patch-to-introduce-rcu-bh-qs-where-safe-from-softirq.patch +++ b/debian/patches/features/all/rt/patch-to-introduce-rcu-bh-qs-where-safe-from-softirq.patch @@ -1,7 +1,7 @@ Subject: rcu: Make ksoftirqd do RCU quiescent states From: "Paul E. McKenney" Date: Wed, 5 Oct 2011 11:45:18 -0700 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Implementing RCU-bh in terms of RCU-preempt makes the system vulnerable to network-based denial-of-service attacks. This patch therefore @@ -31,9 +31,9 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h -@@ -303,11 +303,7 @@ static inline int rcu_preempt_depth(void - /* Internal to kernel */ +@@ -117,11 +117,7 @@ static inline int rcu_preempt_depth(void void rcu_init(void); + extern int rcu_scheduler_active __read_mostly; void rcu_sched_qs(void); -#ifdef CONFIG_PREEMPT_RT_FULL -static inline void rcu_bh_qs(void) { } @@ -45,7 +45,7 @@ Signed-off-by: Thomas Gleixner void rcu_cpu_starting(unsigned int cpu); --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c -@@ -262,7 +262,14 @@ void rcu_sched_qs(void) +@@ -243,7 +243,14 @@ void rcu_sched_qs(void) this_cpu_ptr(&rcu_sched_data), true); } @@ -60,7 +60,7 @@ Signed-off-by: Thomas Gleixner +#else void rcu_bh_qs(void) { - if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) { + RCU_LOCKDEP_WARN(preemptible(), "rcu_bh_qs() invoked with preemption enabled!!!"); --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -29,6 +29,7 @@ @@ -70,8 +70,8 @@ Signed-off-by: Thomas Gleixner +#include #include #include "../time/tick-internal.h" - -@@ -1246,7 +1247,7 @@ static void rcu_prepare_kthreads(int cpu + #include "../locking/rtmutex_common.h" +@@ -1299,7 +1300,7 @@ static void rcu_prepare_kthreads(int cpu #endif /* #else #ifdef CONFIG_RCU_BOOST */ @@ -80,9 +80,9 @@ Signed-off-by: Thomas Gleixner /* * Check to see if any future RCU-related work will need to be done -@@ -1263,7 +1264,9 @@ int rcu_needs_cpu(u64 basemono, u64 *nex - return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) - ? 0 : rcu_cpu_has_callbacks(NULL); +@@ -1315,7 +1316,9 @@ int rcu_needs_cpu(u64 basemono, u64 *nex + *nextevt = KTIME_MAX; + return rcu_cpu_has_callbacks(NULL); } +#endif /* !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) */ @@ -90,7 +90,7 @@ Signed-off-by: Thomas Gleixner /* * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up * after it. -@@ -1359,6 +1362,8 @@ static bool __maybe_unused rcu_try_advan +@@ -1411,6 +1414,8 @@ static bool __maybe_unused rcu_try_advan return cbs_ready; } @@ -99,7 +99,7 @@ Signed-off-by: Thomas Gleixner /* * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready * to invoke. If the CPU has callbacks, try to advance them. Tell the -@@ -1404,6 +1409,7 @@ int rcu_needs_cpu(u64 basemono, u64 *nex +@@ -1453,6 +1458,7 @@ int rcu_needs_cpu(u64 basemono, u64 *nex *nextevt = basemono + dj * TICK_NSEC; return 0; } diff --git a/debian/patches/features/all/rt/pci-switchtec-Don-t-use-completion-s-wait-queue.patch b/debian/patches/features/all/rt/pci-switchtec-Don-t-use-completion-s-wait-queue.patch new file mode 100644 index 000000000..4bd9d5fd8 --- /dev/null +++ b/debian/patches/features/all/rt/pci-switchtec-Don-t-use-completion-s-wait-queue.patch @@ -0,0 +1,109 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 4 Oct 2017 10:24:23 +0200 +Subject: [PATCH] pci/switchtec: Don't use completion's wait queue +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The poll callback is using completion's wait_queue_head_t member and +puts it in poll_wait() so the poll() caller gets a wakeup after command +completed. This does not work on RT because we don't have a +wait_queue_head_t in our completion implementation. Nobody in tree does +like that in tree so this is the only driver that breaks. + +Instead of using the completion here is waitqueue with a status flag as +suggested by Logan. + +I don't have the HW so I have no idea if it works as expected, so please +test it. + +Cc: Kurt Schwemmer +Cc: Logan Gunthorpe +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/pci/switch/switchtec.c | 22 +++++++++++++--------- + 1 file changed, 13 insertions(+), 9 deletions(-) + +--- a/drivers/pci/switch/switchtec.c ++++ b/drivers/pci/switch/switchtec.c +@@ -306,10 +306,11 @@ struct switchtec_user { + + enum mrpc_state state; + +- struct completion comp; ++ wait_queue_head_t cmd_comp; + struct kref kref; + struct list_head list; + ++ bool cmd_done; + u32 cmd; + u32 status; + u32 return_code; +@@ -331,7 +332,7 @@ static struct switchtec_user *stuser_cre + stuser->stdev = stdev; + kref_init(&stuser->kref); + INIT_LIST_HEAD(&stuser->list); +- init_completion(&stuser->comp); ++ init_waitqueue_head(&stuser->cmd_comp); + stuser->event_cnt = atomic_read(&stdev->event_cnt); + + dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser); +@@ -414,7 +415,7 @@ static int mrpc_queue_cmd(struct switcht + kref_get(&stuser->kref); + stuser->read_len = sizeof(stuser->data); + stuser_set_state(stuser, MRPC_QUEUED); +- init_completion(&stuser->comp); ++ stuser->cmd_done = false; + list_add_tail(&stuser->list, &stdev->mrpc_queue); + + mrpc_cmd_submit(stdev); +@@ -451,7 +452,8 @@ static void mrpc_complete_cmd(struct swi + stuser->read_len); + + out: +- complete_all(&stuser->comp); ++ stuser->cmd_done = true; ++ wake_up_interruptible(&stuser->cmd_comp); + list_del_init(&stuser->list); + stuser_put(stuser); + stdev->mrpc_busy = 0; +@@ -721,10 +723,11 @@ static ssize_t switchtec_dev_read(struct + mutex_unlock(&stdev->mrpc_mutex); + + if (filp->f_flags & O_NONBLOCK) { +- if (!try_wait_for_completion(&stuser->comp)) ++ if (!READ_ONCE(stuser->cmd_done)) + return -EAGAIN; + } else { +- rc = wait_for_completion_interruptible(&stuser->comp); ++ rc = wait_event_interruptible(stuser->cmd_comp, ++ stuser->cmd_done); + if (rc < 0) + return rc; + } +@@ -772,7 +775,7 @@ static unsigned int switchtec_dev_poll(s + struct switchtec_dev *stdev = stuser->stdev; + int ret = 0; + +- poll_wait(filp, &stuser->comp.wait, wait); ++ poll_wait(filp, &stuser->cmd_comp, wait); + poll_wait(filp, &stdev->event_wq, wait); + + if (lock_mutex_and_test_alive(stdev)) +@@ -780,7 +783,7 @@ static unsigned int switchtec_dev_poll(s + + mutex_unlock(&stdev->mrpc_mutex); + +- if (try_wait_for_completion(&stuser->comp)) ++ if (READ_ONCE(stuser->cmd_done)) + ret |= POLLIN | POLLRDNORM; + + if (stuser->event_cnt != atomic_read(&stdev->event_cnt)) +@@ -1255,7 +1258,8 @@ static void stdev_kill(struct switchtec_ + + /* Wake up and kill any users waiting on an MRPC request */ + list_for_each_entry_safe(stuser, tmpuser, &stdev->mrpc_queue, list) { +- complete_all(&stuser->comp); ++ stuser->cmd_done = true; ++ wake_up_interruptible(&stuser->cmd_comp); + list_del_init(&stuser->list); + stuser_put(stuser); + } diff --git a/debian/patches/features/all/rt/percpu_ida-use-locklocks.patch b/debian/patches/features/all/rt/percpu_ida-use-locklocks.patch index d237385d8..10aafa296 100644 --- a/debian/patches/features/all/rt/percpu_ida-use-locklocks.patch +++ b/debian/patches/features/all/rt/percpu_ida-use-locklocks.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 9 Apr 2014 11:58:17 +0200 Subject: percpu_ida: Use local locks -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz the local_irq_save() + spin_lock() does not work that well on -RT diff --git a/debian/patches/features/all/rt/perf-make-swevent-hrtimer-irqsafe.patch b/debian/patches/features/all/rt/perf-make-swevent-hrtimer-irqsafe.patch deleted file mode 100644 index ac2097e7c..000000000 --- a/debian/patches/features/all/rt/perf-make-swevent-hrtimer-irqsafe.patch +++ /dev/null @@ -1,69 +0,0 @@ -From: Yong Zhang -Date: Wed, 11 Jul 2012 22:05:21 +0000 -Subject: perf: Make swevent hrtimer run in irq instead of softirq -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Otherwise we get a deadlock like below: - -[ 1044.042749] BUG: scheduling while atomic: ksoftirqd/21/141/0x00010003 -[ 1044.042752] INFO: lockdep is turned off. -[ 1044.042754] Modules linked in: -[ 1044.042757] Pid: 141, comm: ksoftirqd/21 Tainted: G W 3.4.0-rc2-rt3-23676-ga723175-dirty #29 -[ 1044.042759] Call Trace: -[ 1044.042761] [] __schedule_bug+0x65/0x80 -[ 1044.042770] [] __schedule+0x83c/0xa70 -[ 1044.042775] [] ? prepare_to_wait+0x32/0xb0 -[ 1044.042779] [] schedule+0x2e/0xa0 -[ 1044.042782] [] hrtimer_wait_for_timer+0x6d/0xb0 -[ 1044.042786] [] ? wake_up_bit+0x40/0x40 -[ 1044.042790] [] hrtimer_cancel+0x20/0x40 -[ 1044.042794] [] perf_swevent_cancel_hrtimer+0x3c/0x50 -[ 1044.042798] [] task_clock_event_stop+0x11/0x40 -[ 1044.042802] [] task_clock_event_del+0xe/0x10 -[ 1044.042805] [] event_sched_out+0x118/0x1d0 -[ 1044.042809] [] group_sched_out+0x29/0x90 -[ 1044.042813] [] __perf_event_disable+0x18e/0x200 -[ 1044.042817] [] remote_function+0x63/0x70 -[ 1044.042821] [] generic_smp_call_function_single_interrupt+0xce/0x120 -[ 1044.042826] [] smp_call_function_single_interrupt+0x27/0x40 -[ 1044.042831] [] call_function_single_interrupt+0x6c/0x80 -[ 1044.042833] [] ? perf_event_overflow+0x20/0x20 -[ 1044.042840] [] ? _raw_spin_unlock_irq+0x30/0x70 -[ 1044.042844] [] ? _raw_spin_unlock_irq+0x36/0x70 -[ 1044.042848] [] run_hrtimer_softirq+0xc2/0x200 -[ 1044.042853] [] ? perf_event_overflow+0x20/0x20 -[ 1044.042857] [] __do_softirq_common+0xf5/0x3a0 -[ 1044.042862] [] __thread_do_softirq+0x15d/0x200 -[ 1044.042865] [] run_ksoftirqd+0xfa/0x210 -[ 1044.042869] [] ? __thread_do_softirq+0x200/0x200 -[ 1044.042873] [] ? __thread_do_softirq+0x200/0x200 -[ 1044.042877] [] kthread+0xb6/0xc0 -[ 1044.042881] [] ? _raw_spin_unlock_irq+0x3b/0x70 -[ 1044.042886] [] kernel_thread_helper+0x4/0x10 -[ 1044.042889] [] ? finish_task_switch+0x8c/0x110 -[ 1044.042894] [] ? _raw_spin_unlock_irq+0x3b/0x70 -[ 1044.042897] [] ? retint_restore_args+0xe/0xe -[ 1044.042900] [] ? kthreadd+0x1e0/0x1e0 -[ 1044.042902] [] ? gs_change+0xb/0xb - -Signed-off-by: Yong Zhang -Cc: Peter Zijlstra -Cc: Steven Rostedt -Link: http://lkml.kernel.org/r/1341476476-5666-1-git-send-email-yong.zhang0@gmail.com -Signed-off-by: Thomas Gleixner -Signed-off-by: Steven Rostedt - ---- - kernel/events/core.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/kernel/events/core.c -+++ b/kernel/events/core.c -@@ -8495,6 +8495,7 @@ static void perf_swevent_init_hrtimer(st - - hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hwc->hrtimer.function = perf_swevent_hrtimer; -+ hwc->hrtimer.irqsafe = 1; - - /* - * Since hrtimers have a fixed rate, we can do a static freq->period diff --git a/debian/patches/features/all/rt/peter_zijlstra-frob-rcu.patch b/debian/patches/features/all/rt/peter_zijlstra-frob-rcu.patch index 4d9c831db..2b488b50d 100644 --- a/debian/patches/features/all/rt/peter_zijlstra-frob-rcu.patch +++ b/debian/patches/features/all/rt/peter_zijlstra-frob-rcu.patch @@ -1,7 +1,7 @@ Subject: rcu: Frob softirq test From: Peter Zijlstra Date: Sat Aug 13 00:23:17 CEST 2011 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz With RT_FULL we get the below wreckage: @@ -156,7 +156,7 @@ Signed-off-by: Peter Zijlstra --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h -@@ -428,7 +428,7 @@ void rcu_read_unlock_special(struct task +@@ -466,7 +466,7 @@ void rcu_read_unlock_special(struct task } /* Hardware IRQ handlers cannot block, complain if they get here. */ diff --git a/debian/patches/features/all/rt/peterz-percpu-rwsem-rt.patch b/debian/patches/features/all/rt/peterz-percpu-rwsem-rt.patch index 98de5fae1..de206abb3 100644 --- a/debian/patches/features/all/rt/peterz-percpu-rwsem-rt.patch +++ b/debian/patches/features/all/rt/peterz-percpu-rwsem-rt.patch @@ -1,7 +1,7 @@ Subject: locking/percpu-rwsem: Remove preempt_disable variants From: Peter Zijlstra Date: Wed Nov 23 16:29:32 CET 2016 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Effective revert commit: @@ -19,7 +19,7 @@ Signed-off-by: Peter Zijlstra (Intel) --- a/fs/locks.c +++ b/fs/locks.c -@@ -935,7 +935,7 @@ static int flock_lock_inode(struct inode +@@ -945,7 +945,7 @@ static int flock_lock_inode(struct inode return -ENOMEM; } @@ -28,7 +28,7 @@ Signed-off-by: Peter Zijlstra (Intel) spin_lock(&ctx->flc_lock); if (request->fl_flags & FL_ACCESS) goto find_conflict; -@@ -976,7 +976,7 @@ static int flock_lock_inode(struct inode +@@ -986,7 +986,7 @@ static int flock_lock_inode(struct inode out: spin_unlock(&ctx->flc_lock); @@ -37,7 +37,7 @@ Signed-off-by: Peter Zijlstra (Intel) if (new_fl) locks_free_lock(new_fl); locks_dispose_list(&dispose); -@@ -1013,7 +1013,7 @@ static int posix_lock_inode(struct inode +@@ -1023,7 +1023,7 @@ static int posix_lock_inode(struct inode new_fl2 = locks_alloc_lock(); } @@ -46,7 +46,7 @@ Signed-off-by: Peter Zijlstra (Intel) spin_lock(&ctx->flc_lock); /* * New lock request. Walk all POSIX locks and look for conflicts. If -@@ -1185,7 +1185,7 @@ static int posix_lock_inode(struct inode +@@ -1195,7 +1195,7 @@ static int posix_lock_inode(struct inode } out: spin_unlock(&ctx->flc_lock); @@ -55,7 +55,7 @@ Signed-off-by: Peter Zijlstra (Intel) /* * Free any unused locks. */ -@@ -1460,7 +1460,7 @@ int __break_lease(struct inode *inode, u +@@ -1470,7 +1470,7 @@ int __break_lease(struct inode *inode, u return error; } @@ -64,7 +64,7 @@ Signed-off-by: Peter Zijlstra (Intel) spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); -@@ -1512,13 +1512,13 @@ int __break_lease(struct inode *inode, u +@@ -1522,13 +1522,13 @@ int __break_lease(struct inode *inode, u locks_insert_block(fl, new_fl); trace_break_lease_block(inode, new_fl); spin_unlock(&ctx->flc_lock); @@ -80,7 +80,7 @@ Signed-off-by: Peter Zijlstra (Intel) spin_lock(&ctx->flc_lock); trace_break_lease_unblock(inode, new_fl); locks_delete_block(new_fl); -@@ -1535,7 +1535,7 @@ int __break_lease(struct inode *inode, u +@@ -1545,7 +1545,7 @@ int __break_lease(struct inode *inode, u } out: spin_unlock(&ctx->flc_lock); @@ -89,7 +89,7 @@ Signed-off-by: Peter Zijlstra (Intel) locks_dispose_list(&dispose); locks_free_lock(new_fl); return error; -@@ -1609,7 +1609,7 @@ int fcntl_getlease(struct file *filp) +@@ -1619,7 +1619,7 @@ int fcntl_getlease(struct file *filp) ctx = smp_load_acquire(&inode->i_flctx); if (ctx && !list_empty_careful(&ctx->flc_lease)) { @@ -98,7 +98,7 @@ Signed-off-by: Peter Zijlstra (Intel) spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); list_for_each_entry(fl, &ctx->flc_lease, fl_list) { -@@ -1619,7 +1619,7 @@ int fcntl_getlease(struct file *filp) +@@ -1629,7 +1629,7 @@ int fcntl_getlease(struct file *filp) break; } spin_unlock(&ctx->flc_lock); @@ -107,7 +107,7 @@ Signed-off-by: Peter Zijlstra (Intel) locks_dispose_list(&dispose); } -@@ -1694,7 +1694,7 @@ generic_add_lease(struct file *filp, lon +@@ -1704,7 +1704,7 @@ generic_add_lease(struct file *filp, lon return -EINVAL; } @@ -116,7 +116,7 @@ Signed-off-by: Peter Zijlstra (Intel) spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); error = check_conflicting_open(dentry, arg, lease->fl_flags); -@@ -1765,7 +1765,7 @@ generic_add_lease(struct file *filp, lon +@@ -1775,7 +1775,7 @@ generic_add_lease(struct file *filp, lon lease->fl_lmops->lm_setup(lease, priv); out: spin_unlock(&ctx->flc_lock); @@ -125,7 +125,7 @@ Signed-off-by: Peter Zijlstra (Intel) locks_dispose_list(&dispose); if (is_deleg) inode_unlock(inode); -@@ -1788,7 +1788,7 @@ static int generic_delete_lease(struct f +@@ -1798,7 +1798,7 @@ static int generic_delete_lease(struct f return error; } @@ -134,7 +134,7 @@ Signed-off-by: Peter Zijlstra (Intel) spin_lock(&ctx->flc_lock); list_for_each_entry(fl, &ctx->flc_lease, fl_list) { if (fl->fl_file == filp && -@@ -1801,7 +1801,7 @@ static int generic_delete_lease(struct f +@@ -1811,7 +1811,7 @@ static int generic_delete_lease(struct f if (victim) error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); spin_unlock(&ctx->flc_lock); @@ -143,7 +143,7 @@ Signed-off-by: Peter Zijlstra (Intel) locks_dispose_list(&dispose); return error; } -@@ -2532,13 +2532,13 @@ locks_remove_lease(struct file *filp, st +@@ -2535,13 +2535,13 @@ locks_remove_lease(struct file *filp, st if (list_empty(&ctx->flc_lease)) return; @@ -161,7 +161,7 @@ Signed-off-by: Peter Zijlstra (Intel) } --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h -@@ -28,7 +28,7 @@ static struct percpu_rw_semaphore name = +@@ -29,7 +29,7 @@ static struct percpu_rw_semaphore name = extern int __percpu_down_read(struct percpu_rw_semaphore *, int); extern void __percpu_up_read(struct percpu_rw_semaphore *); @@ -170,7 +170,7 @@ Signed-off-by: Peter Zijlstra (Intel) { might_sleep(); -@@ -46,16 +46,10 @@ static inline void percpu_down_read_pree +@@ -47,16 +47,10 @@ static inline void percpu_down_read_pree __this_cpu_inc(*sem->read_count); if (unlikely(!rcu_sync_is_idle(&sem->rss))) __percpu_down_read(sem, false); /* Unconditional memory barrier */ @@ -188,7 +188,7 @@ Signed-off-by: Peter Zijlstra (Intel) preempt_enable(); } -@@ -82,13 +76,9 @@ static inline int percpu_down_read_trylo +@@ -83,13 +77,9 @@ static inline int percpu_down_read_trylo return ret; } @@ -204,7 +204,7 @@ Signed-off-by: Peter Zijlstra (Intel) /* * Same as in percpu_down_read(). */ -@@ -101,12 +91,6 @@ static inline void percpu_up_read_preemp +@@ -102,12 +92,6 @@ static inline void percpu_up_read_preemp rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_); } diff --git a/debian/patches/features/all/rt/peterz-srcu-crypto-chain.patch b/debian/patches/features/all/rt/peterz-srcu-crypto-chain.patch index 7a3fe1e15..fdfec7eb6 100644 --- a/debian/patches/features/all/rt/peterz-srcu-crypto-chain.patch +++ b/debian/patches/features/all/rt/peterz-srcu-crypto-chain.patch @@ -1,7 +1,7 @@ Subject: crypto: Convert crypto notifier chain to SRCU From: Peter Zijlstra Date: Fri, 05 Oct 2012 09:03:24 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The crypto notifier deadlocks on RT. Though this can be a real deadlock on mainline as well due to fifo fair rwsems. diff --git a/debian/patches/features/all/rt/pid.h-include-atomic.h.patch b/debian/patches/features/all/rt/pid.h-include-atomic.h.patch index 86dde8b75..bfc6c3b45 100644 --- a/debian/patches/features/all/rt/pid.h-include-atomic.h.patch +++ b/debian/patches/features/all/rt/pid.h-include-atomic.h.patch @@ -1,7 +1,7 @@ From: Grygorii Strashko Date: Tue, 21 Jul 2015 19:43:56 +0300 Subject: pid.h: include atomic.h -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz This patch fixes build error: CC kernel/pid_namespace.o @@ -27,7 +27,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/linux/pid.h +++ b/include/linux/pid.h -@@ -2,6 +2,7 @@ +@@ -3,6 +3,7 @@ #define _LINUX_PID_H #include diff --git a/debian/patches/features/all/rt/ping-sysrq.patch b/debian/patches/features/all/rt/ping-sysrq.patch index 5f0123138..1ac30e045 100644 --- a/debian/patches/features/all/rt/ping-sysrq.patch +++ b/debian/patches/features/all/rt/ping-sysrq.patch @@ -1,7 +1,7 @@ Subject: net: sysrq via icmp From: Carsten Emde Date: Tue, 19 Jul 2011 13:51:17 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz There are (probably rare) situations when a system crashed and the system console becomes unresponsive but the network icmp layer still is alive. @@ -60,7 +60,7 @@ Signed-off-by: Carsten Emde #include #include #include -@@ -931,6 +932,30 @@ static bool icmp_redirect(struct sk_buff +@@ -915,6 +916,30 @@ static bool icmp_redirect(struct sk_buff } /* @@ -91,7 +91,7 @@ Signed-off-by: Carsten Emde * Handle ICMP_ECHO ("ping") requests. * * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo -@@ -957,6 +982,11 @@ static bool icmp_echo(struct sk_buff *sk +@@ -941,6 +966,11 @@ static bool icmp_echo(struct sk_buff *sk icmp_param.data_len = skb->len; icmp_param.head_len = sizeof(struct icmphdr); icmp_reply(&icmp_param, skb); @@ -105,7 +105,7 @@ Signed-off-by: Carsten Emde return true; --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c -@@ -687,6 +687,13 @@ static struct ctl_table ipv4_net_table[] +@@ -772,6 +772,13 @@ static struct ctl_table ipv4_net_table[] .proc_handler = proc_dointvec }, { diff --git a/debian/patches/features/all/rt/posix-timers-no-broadcast.patch b/debian/patches/features/all/rt/posix-timers-no-broadcast.patch index a90316e19..fe3957e0b 100644 --- a/debian/patches/features/all/rt/posix-timers-no-broadcast.patch +++ b/debian/patches/features/all/rt/posix-timers-no-broadcast.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Fri, 3 Jul 2009 08:29:20 -0500 Subject: posix-timers: Prevent broadcast signals -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Posix timers should not send broadcast signals and kernel only signals. Prevent it. @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c -@@ -507,6 +507,7 @@ static enum hrtimer_restart posix_timer_ +@@ -433,6 +433,7 @@ static enum hrtimer_restart posix_timer_ static struct pid *good_sigevent(sigevent_t * event) { struct task_struct *rtn = current->group_leader; @@ -22,7 +22,7 @@ Signed-off-by: Thomas Gleixner if ((event->sigev_notify & SIGEV_THREAD_ID ) && (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) || -@@ -515,7 +516,8 @@ static struct pid *good_sigevent(sigeven +@@ -441,7 +442,8 @@ static struct pid *good_sigevent(sigeven return NULL; if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) && diff --git a/debian/patches/features/all/rt/posix-timers-thread-posix-cpu-timers-on-rt.patch b/debian/patches/features/all/rt/posix-timers-thread-posix-cpu-timers-on-rt.patch index 49d8b4a64..d0860bbc1 100644 --- a/debian/patches/features/all/rt/posix-timers-thread-posix-cpu-timers-on-rt.patch +++ b/debian/patches/features/all/rt/posix-timers-thread-posix-cpu-timers-on-rt.patch @@ -1,7 +1,7 @@ From: John Stultz Date: Fri, 3 Jul 2009 08:29:58 -0500 Subject: posix-timers: Thread posix-cpu-timers on -rt -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz posix-cpu-timer code takes non -rt safe locks in hard irq context. Move it to a thread. @@ -20,7 +20,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/init_task.h +++ b/include/linux/init_task.h -@@ -167,6 +167,12 @@ extern struct cred init_cred; +@@ -163,6 +163,12 @@ extern struct cred init_cred; # define INIT_PERF_EVENTS(tsk) #endif @@ -32,8 +32,8 @@ Signed-off-by: Thomas Gleixner + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN # define INIT_VTIME(tsk) \ - .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \ -@@ -269,6 +275,7 @@ extern struct cred init_cred; + .vtime.seqcount = SEQCNT_ZERO(tsk.vtime.seqcount), \ +@@ -277,6 +283,7 @@ extern struct cred init_cred; INIT_CPU_TIMERS(tsk) \ .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ .timer_slack_ns = 50000, /* 50 usec default slack */ \ @@ -43,7 +43,7 @@ Signed-off-by: Thomas Gleixner [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -710,6 +710,9 @@ struct task_struct { +@@ -745,6 +745,9 @@ struct task_struct { #ifdef CONFIG_POSIX_TIMERS struct task_cputime cputime_expires; struct list_head cpu_timers[3]; @@ -55,7 +55,7 @@ Signed-off-by: Thomas Gleixner /* Process credentials: */ --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -1451,6 +1451,9 @@ static void rt_mutex_init_task(struct ta +@@ -1497,6 +1497,9 @@ static void rt_mutex_init_task(struct ta */ static void posix_cpu_timers_init(struct task_struct *tsk) { @@ -67,7 +67,7 @@ Signed-off-by: Thomas Gleixner tsk->cputime_expires.sched_exp = 0; --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c -@@ -2,8 +2,10 @@ +@@ -3,8 +3,10 @@ * Implement CPU time clocks for the POSIX clock interface. */ @@ -78,15 +78,15 @@ Signed-off-by: Thomas Gleixner #include #include #include -@@ -12,6 +14,7 @@ - #include +@@ -14,6 +16,7 @@ #include #include + #include +#include - /* - * Called after updating RLIMIT_CPU to run cpu timer and update -@@ -590,7 +593,7 @@ static int posix_cpu_timer_set(struct k_ + #include "posix-timers.h" + +@@ -603,7 +606,7 @@ static int posix_cpu_timer_set(struct k_ /* * Disarm any old timer after extracting its expiry time. */ @@ -95,16 +95,16 @@ Signed-off-by: Thomas Gleixner ret = 0; old_incr = timer->it.cpu.incr; -@@ -1014,7 +1017,7 @@ void posix_cpu_timer_schedule(struct k_i +@@ -1034,7 +1037,7 @@ static void posix_cpu_timer_rearm(struct /* * Now re-arm for the new expiry time. */ - WARN_ON_ONCE(!irqs_disabled()); + WARN_ON_ONCE_NONRT(!irqs_disabled()); arm_timer(timer); + unlock: unlock_task_sighand(p, &flags); - -@@ -1103,13 +1106,13 @@ static inline int fastpath_timer_check(s +@@ -1119,13 +1122,13 @@ static inline int fastpath_timer_check(s * already updated our counts. We need to check if any timers fire now. * Interrupts are disabled. */ @@ -120,7 +120,7 @@ Signed-off-by: Thomas Gleixner /* * The fast path checks that there are no expired thread or thread -@@ -1163,6 +1166,152 @@ void run_posix_cpu_timers(struct task_st +@@ -1179,6 +1182,152 @@ void run_posix_cpu_timers(struct task_st } } diff --git a/debian/patches/features/all/rt/power-disable-highmem-on-rt.patch b/debian/patches/features/all/rt/power-disable-highmem-on-rt.patch index 6043196b0..c584b07da 100644 --- a/debian/patches/features/all/rt/power-disable-highmem-on-rt.patch +++ b/debian/patches/features/all/rt/power-disable-highmem-on-rt.patch @@ -1,7 +1,7 @@ Subject: powerpc: Disable highmem on RT From: Thomas Gleixner Date: Mon, 18 Jul 2011 17:08:34 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The current highmem handling on -RT is not compatible and needs fixups. @@ -12,7 +12,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig -@@ -333,7 +333,7 @@ menu "Kernel options" +@@ -390,7 +390,7 @@ menu "Kernel options" config HIGHMEM bool "High memory support" diff --git a/debian/patches/features/all/rt/power-use-generic-rwsem-on-rt.patch b/debian/patches/features/all/rt/power-use-generic-rwsem-on-rt.patch index 08e670427..4925a09e3 100644 --- a/debian/patches/features/all/rt/power-use-generic-rwsem-on-rt.patch +++ b/debian/patches/features/all/rt/power-use-generic-rwsem-on-rt.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Tue, 14 Jul 2015 14:26:34 +0200 Subject: powerpc: Use generic rwsem on RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Use generic code which uses rtmutex @@ -12,7 +12,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig -@@ -52,10 +52,11 @@ config LOCKDEP_SUPPORT +@@ -111,10 +111,11 @@ config LOCKDEP_SUPPORT config RWSEM_GENERIC_SPINLOCK bool diff --git a/debian/patches/features/all/rt/powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch b/debian/patches/features/all/rt/powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch index 94b12a64c..a25ac6317 100644 --- a/debian/patches/features/all/rt/powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch +++ b/debian/patches/features/all/rt/powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch @@ -1,7 +1,7 @@ From: Bogdan Purcareata Date: Fri, 24 Apr 2015 15:53:13 +0000 Subject: powerpc/kvm: Disable in-kernel MPIC emulation for PREEMPT_RT_FULL -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz While converting the openpic emulation code to use a raw_spinlock_t enables guests to run on RT, there's still a performance issue. For interrupts sent in @@ -28,7 +28,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig -@@ -175,6 +175,7 @@ config KVM_E500MC +@@ -177,6 +177,7 @@ config KVM_E500MC config KVM_MPIC bool "KVM in-kernel MPIC emulation" depends on KVM && E500 diff --git a/debian/patches/features/all/rt/powerpc-preempt-lazy-support.patch b/debian/patches/features/all/rt/powerpc-preempt-lazy-support.patch index 16e90bde9..87010b0ed 100644 --- a/debian/patches/features/all/rt/powerpc-preempt-lazy-support.patch +++ b/debian/patches/features/all/rt/powerpc-preempt-lazy-support.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Thu, 1 Nov 2012 10:14:11 +0100 Subject: powerpc: Add support for lazy preemption -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Implement the powerpc pieces for lazy preempt. @@ -16,8 +16,8 @@ Signed-off-by: Thomas Gleixner --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig -@@ -155,6 +155,7 @@ config PPC - select HAVE_PERF_EVENTS_NMI if PPC64 +@@ -215,6 +215,7 @@ config PPC + select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_LAZY @@ -26,7 +26,7 @@ Signed-off-by: Thomas Gleixner select HAVE_SYSCALL_TRACEPOINTS --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h -@@ -43,6 +43,8 @@ struct thread_info { +@@ -36,6 +36,8 @@ struct thread_info { int cpu; /* cpu we're on */ int preempt_count; /* 0 => preemptable, <0 => BUG */ @@ -35,7 +35,7 @@ Signed-off-by: Thomas Gleixner unsigned long local_flags; /* private flags for thread */ #ifdef CONFIG_LIVEPATCH unsigned long *livepatch_sp; -@@ -88,8 +90,7 @@ static inline struct thread_info *curren +@@ -81,8 +83,7 @@ static inline struct thread_info *curren #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ @@ -44,8 +44,8 @@ Signed-off-by: Thomas Gleixner +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling necessary */ #define TIF_32BIT 4 /* 32 bit binary */ #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */ - #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ -@@ -107,6 +108,8 @@ static inline struct thread_info *curren + #define TIF_PATCH_PENDING 6 /* pending live patching update */ +@@ -101,6 +102,8 @@ static inline struct thread_info *curren #if defined(CONFIG_PPC64) #define TIF_ELF2ABI 18 /* function descriptors must die! */ #endif @@ -54,7 +54,7 @@ Signed-off-by: Thomas Gleixner /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1< #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ -- _TIF_RESTORE_TM) -+ _TIF_RESTORE_TM | _TIF_NEED_RESCHED_LAZY) +- _TIF_RESTORE_TM | _TIF_PATCH_PENDING) ++ _TIF_RESTORE_TM | _TIF_PATCH_PENDING | _TIF_NEED_RESCHED_LAZY) #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR) +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) @@ -84,7 +84,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S -@@ -845,7 +845,14 @@ user_exc_return: /* r10 contains MSR_KE +@@ -866,7 +866,14 @@ user_exc_return: /* r10 contains MSR_KE cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ bne restore andi. r8,r8,_TIF_NEED_RESCHED @@ -99,7 +99,7 @@ Signed-off-by: Thomas Gleixner lwz r3,_MSR(r1) andi. r0,r3,MSR_EE /* interrupts off? */ beq restore /* don't schedule if so */ -@@ -856,11 +863,11 @@ user_exc_return: /* r10 contains MSR_KE +@@ -877,11 +884,11 @@ user_exc_return: /* r10 contains MSR_KE */ bl trace_hardirqs_off #endif @@ -114,7 +114,7 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_TRACE_IRQFLAGS /* And now, to properly rebalance the above, we tell lockdep they * are being turned back on, which will happen when we return -@@ -1183,7 +1190,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRE +@@ -1204,7 +1211,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRE #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ do_work: /* r10 contains MSR_KERNEL here */ @@ -123,7 +123,7 @@ Signed-off-by: Thomas Gleixner beq do_user_signal do_resched: /* r10 contains MSR_KERNEL here */ -@@ -1204,7 +1211,7 @@ do_resched: /* r10 contains MSR_KERNEL +@@ -1225,7 +1232,7 @@ do_resched: /* r10 contains MSR_KERNEL MTMSRD(r10) /* disable interrupts */ CURRENT_THREAD_INFO(r9, r1) lwz r9,TI_FLAGS(r9) @@ -134,7 +134,7 @@ Signed-off-by: Thomas Gleixner beq restore_user --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S -@@ -656,7 +656,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEG +@@ -675,7 +675,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEG bl restore_math b restore #endif @@ -143,7 +143,7 @@ Signed-off-by: Thomas Gleixner beq 2f bl restore_interrupts SCHEDULE_USER -@@ -718,10 +718,18 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEG +@@ -737,10 +737,18 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEG #ifdef CONFIG_PREEMPT /* Check if we need to preempt */ @@ -163,7 +163,7 @@ Signed-off-by: Thomas Gleixner cmpwi cr1,r8,0 ld r0,SOFTE(r1) cmpdi r0,0 -@@ -738,7 +746,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEG +@@ -757,7 +765,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEG /* Re-test flags and eventually loop */ CURRENT_THREAD_INFO(r9, r1) ld r4,TI_FLAGS(r9) diff --git a/debian/patches/features/all/rt/powerpc-ps3-device-init.c-adapt-to-completions-using.patch b/debian/patches/features/all/rt/powerpc-ps3-device-init.c-adapt-to-completions-using.patch index 5147ad7f0..94c9c97da 100644 --- a/debian/patches/features/all/rt/powerpc-ps3-device-init.c-adapt-to-completions-using.patch +++ b/debian/patches/features/all/rt/powerpc-ps3-device-init.c-adapt-to-completions-using.patch @@ -1,7 +1,7 @@ From: Paul Gortmaker Date: Sun, 31 May 2015 14:44:42 -0400 Subject: powerpc: ps3/device-init.c - adapt to completions using swait vs wait -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz To fix: diff --git a/debian/patches/features/all/rt/preempt-lazy-support.patch b/debian/patches/features/all/rt/preempt-lazy-support.patch index 335a08caa..9ef2f614f 100644 --- a/debian/patches/features/all/rt/preempt-lazy-support.patch +++ b/debian/patches/features/all/rt/preempt-lazy-support.patch @@ -1,7 +1,7 @@ Subject: sched: Add support for lazy preemption From: Thomas Gleixner Date: Fri, 26 Oct 2012 18:50:54 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz It has become an obsession to mitigate the determinism vs. throughput loss of RT. Looking at the mainline semantics of preemption points @@ -62,14 +62,14 @@ Signed-off-by: Thomas Gleixner kernel/sched/fair.c | 16 ++++---- kernel/sched/features.h | 3 + kernel/sched/sched.h | 9 ++++ - kernel/trace/trace.c | 37 +++++++++++-------- + kernel/trace/trace.c | 36 ++++++++++-------- kernel/trace/trace.h | 2 + kernel/trace/trace_output.c | 14 ++++++- - 12 files changed, 227 insertions(+), 29 deletions(-) + 12 files changed, 226 insertions(+), 29 deletions(-) --- a/include/linux/preempt.h +++ b/include/linux/preempt.h -@@ -179,6 +179,20 @@ extern void preempt_count_sub(int val); +@@ -180,6 +180,20 @@ extern void preempt_count_sub(int val); #define preempt_count_inc() preempt_count_add(1) #define preempt_count_dec() preempt_count_sub(1) @@ -90,7 +90,7 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_PREEMPT_COUNT #define preempt_disable() \ -@@ -187,6 +201,12 @@ do { \ +@@ -188,6 +202,12 @@ do { \ barrier(); \ } while (0) @@ -103,7 +103,7 @@ Signed-off-by: Thomas Gleixner #define sched_preempt_enable_no_resched() \ do { \ barrier(); \ -@@ -240,6 +260,13 @@ do { \ +@@ -241,6 +261,13 @@ do { \ __preempt_schedule(); \ } while (0) @@ -117,7 +117,7 @@ Signed-off-by: Thomas Gleixner #else /* !CONFIG_PREEMPT */ #define preempt_enable() \ do { \ -@@ -247,6 +274,12 @@ do { \ +@@ -248,6 +275,12 @@ do { \ preempt_count_dec(); \ } while (0) @@ -130,7 +130,7 @@ Signed-off-by: Thomas Gleixner #define preempt_enable_notrace() \ do { \ barrier(); \ -@@ -313,7 +346,7 @@ do { \ +@@ -314,7 +347,7 @@ do { \ } while (0) #define preempt_fold_need_resched() \ do { \ @@ -141,7 +141,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1509,6 +1509,44 @@ static inline int test_tsk_need_resched( +@@ -1600,6 +1600,44 @@ static inline int test_tsk_need_resched( return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } @@ -188,7 +188,7 @@ Signed-off-by: Thomas Gleixner if (task->state & (__TASK_STOPPED | __TASK_TRACED)) --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h -@@ -74,7 +74,17 @@ static inline int test_ti_thread_flag(st +@@ -91,7 +91,17 @@ static inline int test_ti_thread_flag(st #define test_thread_flag(flag) \ test_ti_thread_flag(current_thread_info(), flag) @@ -209,7 +209,7 @@ Signed-off-by: Thomas Gleixner static inline int arch_within_stack_frames(const void * const stack, --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h -@@ -63,6 +63,7 @@ struct trace_entry { +@@ -64,6 +64,7 @@ struct trace_entry { int pid; unsigned short migrate_disable; unsigned short padding; @@ -234,7 +234,7 @@ Signed-off-by: Thomas Gleixner default PREEMPT_NONE --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -528,6 +528,48 @@ void resched_curr(struct rq *rq) +@@ -518,6 +518,48 @@ void resched_curr(struct rq *rq) trace_sched_wake_idle_without_ipi(cpu); } @@ -283,7 +283,7 @@ Signed-off-by: Thomas Gleixner void resched_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); -@@ -2536,6 +2578,9 @@ int sched_fork(unsigned long clone_flags +@@ -2445,6 +2487,9 @@ int sched_fork(unsigned long clone_flags p->on_cpu = 0; #endif init_task_preempt_count(p); @@ -293,7 +293,7 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_SMP plist_node_init(&p->pushable_tasks, MAX_PRIO); RB_CLEAR_NODE(&p->pushable_dl_tasks); -@@ -3527,6 +3572,7 @@ static void __sched notrace __schedule(b +@@ -3362,6 +3407,7 @@ static void __sched notrace __schedule(b next = pick_next_task(rq, prev, &rf); clear_tsk_need_resched(prev); @@ -301,7 +301,7 @@ Signed-off-by: Thomas Gleixner clear_preempt_need_resched(); if (likely(prev != next)) { -@@ -3678,6 +3724,30 @@ static void __sched notrace preempt_sche +@@ -3552,6 +3598,30 @@ static void __sched notrace preempt_sche } while (need_resched()); } @@ -332,7 +332,7 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_PREEMPT /* * this is the entry point to schedule() from in-kernel preemption -@@ -3692,7 +3762,8 @@ asmlinkage __visible void __sched notrac +@@ -3566,7 +3636,8 @@ asmlinkage __visible void __sched notrac */ if (likely(!preemptible())) return; @@ -342,7 +342,7 @@ Signed-off-by: Thomas Gleixner preempt_schedule_common(); } NOKPROBE_SYMBOL(preempt_schedule); -@@ -3719,6 +3790,9 @@ asmlinkage __visible void __sched notrac +@@ -3593,6 +3664,9 @@ asmlinkage __visible void __sched notrac if (likely(!preemptible())) return; @@ -352,7 +352,7 @@ Signed-off-by: Thomas Gleixner do { /* * Because the function tracer can trace preempt_count_sub() -@@ -5548,7 +5622,9 @@ void init_idle(struct task_struct *idle, +@@ -5332,7 +5406,9 @@ void init_idle(struct task_struct *idle, /* Set the preempt count _outside_ the spinlocks! */ init_idle_preempt_count(idle, cpu); @@ -363,15 +363,15 @@ Signed-off-by: Thomas Gleixner /* * The idle tasks have their own, simple scheduling class: */ -@@ -7523,6 +7599,7 @@ void migrate_disable(void) - /* get_online_cpus(); */ +@@ -6888,6 +6964,7 @@ void migrate_disable(void) + } preempt_disable(); + preempt_lazy_disable(); pin_current_cpu(); - p->migrate_disable = 1; -@@ -7592,6 +7669,7 @@ void migrate_enable(void) + migrate_disable_update_cpus_allowed(p); +@@ -6955,6 +7032,7 @@ void migrate_enable(void) arg.dest_cpu = dest_cpu; unpin_current_cpu(); @@ -379,17 +379,17 @@ Signed-off-by: Thomas Gleixner preempt_enable(); stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); tlb_migrate_finish(p->mm); -@@ -7602,6 +7680,7 @@ void migrate_enable(void) +@@ -6963,6 +7041,7 @@ void migrate_enable(void) + } } unpin_current_cpu(); - /* put_online_cpus(); */ + preempt_lazy_enable(); preempt_enable(); } EXPORT_SYMBOL(migrate_enable); --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c -@@ -3742,7 +3742,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq +@@ -3840,7 +3840,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq ideal_runtime = sched_slice(cfs_rq, curr); delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; if (delta_exec > ideal_runtime) { @@ -398,7 +398,7 @@ Signed-off-by: Thomas Gleixner /* * The current task ran long enough, ensure it doesn't get * re-elected due to buddy favours. -@@ -3766,7 +3766,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq +@@ -3864,7 +3864,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq return; if (delta > ideal_runtime) @@ -407,7 +407,7 @@ Signed-off-by: Thomas Gleixner } static void -@@ -3908,7 +3908,7 @@ entity_tick(struct cfs_rq *cfs_rq, struc +@@ -4006,7 +4006,7 @@ entity_tick(struct cfs_rq *cfs_rq, struc * validating it and just reschedule. */ if (queued) { @@ -416,7 +416,7 @@ Signed-off-by: Thomas Gleixner return; } /* -@@ -4090,7 +4090,7 @@ static void __account_cfs_rq_runtime(str +@@ -4188,7 +4188,7 @@ static void __account_cfs_rq_runtime(str * hierarchy can be throttled */ if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) @@ -425,7 +425,7 @@ Signed-off-by: Thomas Gleixner } static __always_inline -@@ -4718,7 +4718,7 @@ static void hrtick_start_fair(struct rq +@@ -4837,7 +4837,7 @@ static void hrtick_start_fair(struct rq if (delta < 0) { if (rq->curr == p) @@ -434,7 +434,7 @@ Signed-off-by: Thomas Gleixner return; } hrtick_start(rq, delta); -@@ -6231,7 +6231,7 @@ static void check_preempt_wakeup(struct +@@ -6230,7 +6230,7 @@ static void check_preempt_wakeup(struct return; preempt: @@ -443,7 +443,7 @@ Signed-off-by: Thomas Gleixner /* * Only set the backward buddy when the current task is still * on the rq. This can happen when a wakeup gets interleaved -@@ -9006,7 +9006,7 @@ static void task_fork_fair(struct task_s +@@ -9084,7 +9084,7 @@ static void task_fork_fair(struct task_s * 'current' within the tree based on its new key value. */ swap(curr->vruntime, se->vruntime); @@ -452,7 +452,7 @@ Signed-off-by: Thomas Gleixner } se->vruntime -= cfs_rq->min_vruntime; -@@ -9030,7 +9030,7 @@ prio_changed_fair(struct rq *rq, struct +@@ -9108,7 +9108,7 @@ prio_changed_fair(struct rq *rq, struct */ if (rq->curr == p) { if (p->prio > oldprio) @@ -463,7 +463,7 @@ Signed-off-by: Thomas Gleixner } --- a/kernel/sched/features.h +++ b/kernel/sched/features.h -@@ -47,6 +47,9 @@ SCHED_FEAT(NONTASK_CAPACITY, true) +@@ -48,6 +48,9 @@ SCHED_FEAT(NONTASK_CAPACITY, true) #ifdef CONFIG_PREEMPT_RT_FULL SCHED_FEAT(TTWU_QUEUE, false) @@ -475,7 +475,7 @@ Signed-off-by: Thomas Gleixner /* --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h -@@ -1477,6 +1477,15 @@ extern void init_sched_fair_class(void); +@@ -1534,6 +1534,15 @@ extern void init_sched_fair_class(void); extern void resched_curr(struct rq *rq); extern void resched_cpu(int cpu); @@ -493,7 +493,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c -@@ -1942,6 +1942,7 @@ tracing_generic_entry_update(struct trac +@@ -2129,6 +2129,7 @@ tracing_generic_entry_update(struct trac struct task_struct *tsk = current; entry->preempt_count = pc & 0xff; @@ -501,7 +501,7 @@ Signed-off-by: Thomas Gleixner entry->pid = (tsk) ? tsk->pid : 0; entry->flags = #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT -@@ -1952,7 +1953,8 @@ tracing_generic_entry_update(struct trac +@@ -2139,7 +2140,8 @@ tracing_generic_entry_update(struct trac ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) | ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) | @@ -511,7 +511,7 @@ Signed-off-by: Thomas Gleixner (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0; -@@ -3119,15 +3121,17 @@ get_total_entries(struct trace_buffer *b +@@ -3341,15 +3343,17 @@ get_total_entries(struct trace_buffer *b static void print_lat_help_header(struct seq_file *m) { @@ -538,29 +538,32 @@ Signed-off-by: Thomas Gleixner } static void print_event_info(struct trace_buffer *buf, struct seq_file *m) -@@ -3153,11 +3157,14 @@ static void print_func_help_header_irq(s - print_event_info(buf, m); - seq_puts(m, "# _-----=> irqs-off\n" - "# / _----=> need-resched\n" -- "# | / _---=> hardirq/softirq\n" -- "# || / _--=> preempt-depth\n" -- "# ||| / delay\n" -- "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n" -- "# | | | |||| | |\n"); -+ "# |/ _-----=> need-resched_lazy\n" -+ "# || / _---=> hardirq/softirq\n" -+ "# ||| / _--=> preempt-depth\n" -+ "# |||| /_--=> preempt-lazy-depth\n" -+ "# ||||| _-=> migrate-disable \n" -+ "# ||||| / delay\n" -+ "# TASK-PID CPU# |||||| TIMESTAMP FUNCTION\n" -+ "# | | | |||||| | |\n"); +@@ -3385,15 +3389,17 @@ static void print_func_help_header_irq(s + tgid ? tgid_space : space); + seq_printf(m, "# %s / _----=> need-resched\n", + tgid ? tgid_space : space); +- seq_printf(m, "# %s| / _---=> hardirq/softirq\n", ++ seq_printf(m, "# %s| / _----=> need-resched_lazy\n", + tgid ? tgid_space : space); +- seq_printf(m, "# %s|| / _--=> preempt-depth\n", ++ seq_printf(m, "# %s|| / _---=> hardirq/softirq\n", + tgid ? tgid_space : space); +- seq_printf(m, "# %s||| / delay\n", ++ seq_printf(m, "# %s||| / _--=> preempt-depth\n", + tgid ? tgid_space : space); +- seq_printf(m, "# TASK-PID CPU#%s|||| TIMESTAMP FUNCTION\n", ++ seq_printf(m, "# %s|||| / delay\n", ++ tgid ? tgid_space : space); ++ seq_printf(m, "# TASK-PID CPU#%s||||| TIMESTAMP FUNCTION\n", + tgid ? " TGID " : space); +- seq_printf(m, "# | | | %s|||| | |\n", ++ seq_printf(m, "# | | | %s||||| | |\n", + tgid ? " | " : space); } - void --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h -@@ -126,6 +126,7 @@ struct kretprobe_trace_entry_head { +@@ -127,6 +127,7 @@ struct kretprobe_trace_entry_head { * NEED_RESCHED - reschedule is requested * HARDIRQ - inside an interrupt handler * SOFTIRQ - inside a softirq handler @@ -568,7 +571,7 @@ Signed-off-by: Thomas Gleixner */ enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, -@@ -135,6 +136,7 @@ enum trace_flag_type { +@@ -136,6 +137,7 @@ enum trace_flag_type { TRACE_FLAG_SOFTIRQ = 0x10, TRACE_FLAG_PREEMPT_RESCHED = 0x20, TRACE_FLAG_NMI = 0x40, @@ -578,7 +581,7 @@ Signed-off-by: Thomas Gleixner #define TRACE_BUF_SIZE 1024 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c -@@ -438,6 +438,7 @@ int trace_print_lat_fmt(struct trace_seq +@@ -447,6 +447,7 @@ int trace_print_lat_fmt(struct trace_seq { char hardsoft_irq; char need_resched; @@ -586,7 +589,7 @@ Signed-off-by: Thomas Gleixner char irqs_off; int hardirq; int softirq; -@@ -468,6 +469,9 @@ int trace_print_lat_fmt(struct trace_seq +@@ -477,6 +478,9 @@ int trace_print_lat_fmt(struct trace_seq break; } @@ -596,7 +599,7 @@ Signed-off-by: Thomas Gleixner hardsoft_irq = (nmi && hardirq) ? 'Z' : nmi ? 'z' : -@@ -476,14 +480,20 @@ int trace_print_lat_fmt(struct trace_seq +@@ -485,14 +489,20 @@ int trace_print_lat_fmt(struct trace_seq softirq ? 's' : '.' ; diff --git a/debian/patches/features/all/rt/preempt-nort-rt-variants.patch b/debian/patches/features/all/rt/preempt-nort-rt-variants.patch index efcfa6d4b..b77211a73 100644 --- a/debian/patches/features/all/rt/preempt-nort-rt-variants.patch +++ b/debian/patches/features/all/rt/preempt-nort-rt-variants.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Fri, 24 Jul 2009 12:38:56 +0200 Subject: preempt: Provide preempt_*_(no)rt variants -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz RT needs a few preempt_disable/enable points which are not necessary otherwise. Implement variants to avoid #ifdeffery. @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/preempt.h +++ b/include/linux/preempt.h -@@ -180,7 +180,11 @@ do { \ +@@ -181,7 +181,11 @@ do { \ preempt_count_dec(); \ } while (0) @@ -27,7 +27,7 @@ Signed-off-by: Thomas Gleixner #define preemptible() (preempt_count() == 0 && !irqs_disabled()) -@@ -297,6 +301,18 @@ do { \ +@@ -298,6 +302,18 @@ do { \ set_preempt_need_resched(); \ } while (0) diff --git a/debian/patches/features/all/rt/printk-27force_early_printk-27-boot-param-to-help-with-debugging.patch b/debian/patches/features/all/rt/printk-27force_early_printk-27-boot-param-to-help-with-debugging.patch index 83bdf66fb..97d0717f8 100644 --- a/debian/patches/features/all/rt/printk-27force_early_printk-27-boot-param-to-help-with-debugging.patch +++ b/debian/patches/features/all/rt/printk-27force_early_printk-27-boot-param-to-help-with-debugging.patch @@ -1,7 +1,7 @@ Subject: printk: Add "force_early_printk" boot param to help with debugging From: Peter Zijlstra Date: Fri, 02 Sep 2011 14:41:29 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Gives me an option to screw printk and actually see what the machine says. @@ -16,7 +16,7 @@ Link: http://lkml.kernel.org/n/tip-ykb97nsfmobq44xketrxs977@git.kernel.org --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -431,6 +431,13 @@ asmlinkage void early_printk(const char +@@ -430,6 +430,13 @@ asmlinkage void early_printk(const char */ static bool __read_mostly printk_killswitch; diff --git a/debian/patches/features/all/rt/printk-kill.patch b/debian/patches/features/all/rt/printk-kill.patch index 51c802f53..41f606469 100644 --- a/debian/patches/features/all/rt/printk-kill.patch +++ b/debian/patches/features/all/rt/printk-kill.patch @@ -1,7 +1,7 @@ Subject: printk: Add a printk kill switch From: Ingo Molnar Date: Fri, 22 Jul 2011 17:58:40 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Add a prinkt-kill-switch. This is used from (NMI) watchdog to ensure that it does not dead-lock with the early printk code. @@ -10,12 +10,12 @@ Signed-off-by: Thomas Gleixner --- include/linux/printk.h | 2 + kernel/printk/printk.c | 79 ++++++++++++++++++++++++++++++++++++------------- - kernel/watchdog_hld.c | 9 +++++ - 3 files changed, 70 insertions(+), 20 deletions(-) + kernel/watchdog_hld.c | 10 ++++++ + 3 files changed, 71 insertions(+), 20 deletions(-) --- a/include/linux/printk.h +++ b/include/linux/printk.h -@@ -141,9 +141,11 @@ struct va_format { +@@ -142,9 +142,11 @@ struct va_format { #ifdef CONFIG_EARLY_PRINTK extern asmlinkage __printf(1, 2) void early_printk(const char *fmt, ...); @@ -29,7 +29,7 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_PRINTK_NMI --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -401,6 +401,58 @@ DEFINE_RAW_SPINLOCK(logbuf_lock); +@@ -400,6 +400,58 @@ DEFINE_RAW_SPINLOCK(logbuf_lock); printk_safe_exit_irqrestore(flags); \ } while (0) @@ -88,8 +88,8 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_PRINTK DECLARE_WAIT_QUEUE_HEAD(log_wait); /* the next printk record to read by syslog(READ) or /proc/kmsg */ -@@ -1705,6 +1757,13 @@ asmlinkage int vprintk_emit(int facility - int printed_len = 0; +@@ -1692,6 +1744,13 @@ asmlinkage int vprintk_emit(int facility + int printed_len; bool in_sched = false; + /* @@ -102,7 +102,7 @@ Signed-off-by: Thomas Gleixner if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; in_sched = true; -@@ -1876,26 +1935,6 @@ static bool suppress_message_printing(in +@@ -1863,26 +1922,6 @@ static bool suppress_message_printing(in #endif /* CONFIG_PRINTK */ @@ -131,15 +131,16 @@ Signed-off-by: Thomas Gleixner { --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c -@@ -21,6 +21,7 @@ - static DEFINE_PER_CPU(bool, hard_watchdog_warn); +@@ -24,6 +24,8 @@ static DEFINE_PER_CPU(bool, hard_watchdo static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); + static DEFINE_PER_CPU(struct perf_event *, dead_event); +static DEFINE_RAW_SPINLOCK(watchdog_output_lock); ++ + static struct cpumask dead_events_mask; - /* boot commands */ - /* -@@ -106,6 +107,13 @@ static void watchdog_overflow_callback(s + static unsigned long hardlockup_allcpu_dumped; +@@ -134,6 +136,13 @@ static void watchdog_overflow_callback(s /* only print hardlockups once */ if (__this_cpu_read(hard_watchdog_warn) == true) return; @@ -153,7 +154,7 @@ Signed-off-by: Thomas Gleixner pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu); print_modules(); -@@ -123,6 +131,7 @@ static void watchdog_overflow_callback(s +@@ -151,6 +160,7 @@ static void watchdog_overflow_callback(s !test_and_set_bit(0, &hardlockup_allcpu_dumped)) trigger_allbutself_cpu_backtrace(); diff --git a/debian/patches/features/all/rt/printk-rt-aware.patch b/debian/patches/features/all/rt/printk-rt-aware.patch index 074f26e23..20632bacc 100644 --- a/debian/patches/features/all/rt/printk-rt-aware.patch +++ b/debian/patches/features/all/rt/printk-rt-aware.patch @@ -1,7 +1,7 @@ Subject: printk: Make rt aware From: Thomas Gleixner Date: Wed, 19 Sep 2012 14:50:37 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Drop the lock before calling the console driver and do not disable interrupts while printing to a serial console. @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -1630,6 +1630,7 @@ static void call_console_drivers(const c +@@ -1617,6 +1617,7 @@ static void call_console_drivers(const c if (!console_drivers) return; @@ -21,7 +21,7 @@ Signed-off-by: Thomas Gleixner for_each_console(con) { if (exclusive_console && con != exclusive_console) continue; -@@ -1645,6 +1646,7 @@ static void call_console_drivers(const c +@@ -1632,6 +1633,7 @@ static void call_console_drivers(const c else con->write(con, text, len); } @@ -29,7 +29,7 @@ Signed-off-by: Thomas Gleixner } int printk_delay_msec __read_mostly; -@@ -1827,12 +1829,22 @@ asmlinkage int vprintk_emit(int facility +@@ -1814,12 +1816,22 @@ asmlinkage int vprintk_emit(int facility /* If called from the scheduler, we can not call up(). */ if (!in_sched) { @@ -53,7 +53,7 @@ Signed-off-by: Thomas Gleixner console_unlock(); } -@@ -2283,10 +2295,15 @@ void console_unlock(void) +@@ -2275,10 +2287,15 @@ void console_unlock(void) console_seq++; raw_spin_unlock(&logbuf_lock); diff --git a/debian/patches/features/all/rt/ptrace-fix-ptrace-vs-tasklist_lock-race.patch b/debian/patches/features/all/rt/ptrace-fix-ptrace-vs-tasklist_lock-race.patch index a8612aeb5..278cce406 100644 --- a/debian/patches/features/all/rt/ptrace-fix-ptrace-vs-tasklist_lock-race.patch +++ b/debian/patches/features/all/rt/ptrace-fix-ptrace-vs-tasklist_lock-race.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 29 Aug 2013 18:21:04 +0200 Subject: ptrace: fix ptrace vs tasklist_lock race -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz As explained by Alexander Fyodorov : @@ -32,8 +32,8 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -100,12 +100,8 @@ struct task_group; - TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ - __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD) + __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \ + TASK_PARKED) -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) - @@ -44,7 +44,7 @@ Signed-off-by: Sebastian Andrzej Siewior #define task_contributes_to_load(task) ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ (task->flags & PF_FROZEN) == 0 && \ (task->state & TASK_NOLOAD) == 0) -@@ -1496,6 +1492,51 @@ static inline int test_tsk_need_resched( +@@ -1593,6 +1589,51 @@ static inline int test_tsk_need_resched( return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } @@ -116,7 +116,7 @@ Signed-off-by: Sebastian Andrzej Siewior spin_unlock_irq(&task->sighand->siglock); --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -1374,6 +1374,18 @@ int migrate_swap(struct task_struct *cur +@@ -1359,6 +1359,18 @@ int migrate_swap(struct task_struct *cur return ret; } @@ -135,7 +135,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * wait_task_inactive - wait for a thread to unschedule. * -@@ -1418,7 +1430,7 @@ unsigned long wait_task_inactive(struct +@@ -1403,7 +1415,7 @@ unsigned long wait_task_inactive(struct * is actually now running somewhere else! */ while (task_running(rq, p)) { @@ -144,7 +144,7 @@ Signed-off-by: Sebastian Andrzej Siewior return 0; cpu_relax(); } -@@ -1433,7 +1445,8 @@ unsigned long wait_task_inactive(struct +@@ -1418,7 +1430,8 @@ unsigned long wait_task_inactive(struct running = task_running(rq, p); queued = task_on_rq_queued(p); ncsw = 0; diff --git a/debian/patches/features/all/rt/radix-tree-use-local-locks.patch b/debian/patches/features/all/rt/radix-tree-use-local-locks.patch index 95bd5220a..16b416964 100644 --- a/debian/patches/features/all/rt/radix-tree-use-local-locks.patch +++ b/debian/patches/features/all/rt/radix-tree-use-local-locks.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 25 Jan 2017 16:34:27 +0100 Subject: [PATCH] radix-tree: use local locks -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The preload functionality uses per-CPU variables and preempt-disable to ensure that it does not switch CPUs during its usage. This patch adds @@ -14,12 +14,12 @@ Signed-off-by: Sebastian Andrzej Siewior --- include/linux/idr.h | 5 +---- include/linux/radix-tree.h | 7 ++----- - lib/radix-tree.c | 30 ++++++++++++++++++++++-------- - 3 files changed, 25 insertions(+), 17 deletions(-) + lib/radix-tree.c | 32 +++++++++++++++++++++++--------- + 3 files changed, 26 insertions(+), 18 deletions(-) --- a/include/linux/idr.h +++ b/include/linux/idr.h -@@ -111,10 +111,7 @@ static inline bool idr_is_empty(const st +@@ -167,10 +167,7 @@ static inline bool idr_is_empty(const st * Each idr_preload() should be matched with an invocation of this * function. See idr_preload() for details. */ @@ -88,7 +88,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Update the allocation stack trace as this is more useful * for debugging. -@@ -475,14 +477,14 @@ static int __radix_tree_preload(gfp_t gf +@@ -475,14 +477,14 @@ static __must_check int __radix_tree_pre */ gfp_mask &= ~__GFP_ACCOUNT; @@ -137,7 +137,12 @@ Signed-off-by: Sebastian Andrzej Siewior static unsigned radix_tree_load_root(const struct radix_tree_root *root, struct radix_tree_node **nodep, unsigned long *maxindex) { -@@ -2107,6 +2115,12 @@ void idr_preload(gfp_t gfp_mask) +@@ -2105,10 +2113,16 @@ EXPORT_SYMBOL(radix_tree_tagged); + void idr_preload(gfp_t gfp_mask) + { + if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE)) +- preempt_disable(); ++ local_lock(radix_tree_preloads_lock); } EXPORT_SYMBOL(idr_preload); @@ -150,12 +155,12 @@ Signed-off-by: Sebastian Andrzej Siewior /** * ida_pre_get - reserve resources for ida allocation * @ida: ida handle -@@ -2123,7 +2137,7 @@ int ida_pre_get(struct ida *ida, gfp_t g - * ida_get_new() can return -EAGAIN, prompting the caller +@@ -2125,7 +2139,7 @@ int ida_pre_get(struct ida *ida, gfp_t g * to return to the ida_pre_get() step. */ -- preempt_enable(); -+ local_unlock(radix_tree_preloads_lock); + if (!__radix_tree_preload(gfp, IDA_PRELOAD_SIZE)) +- preempt_enable(); ++ local_unlock(radix_tree_preloads_lock); if (!this_cpu_read(ida_bitmap)) { struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp); diff --git a/debian/patches/features/all/rt/random-avoid-preempt_disable-ed-section.patch b/debian/patches/features/all/rt/random-avoid-preempt_disable-ed-section.patch index 501459ea8..494fe5bd7 100644 --- a/debian/patches/features/all/rt/random-avoid-preempt_disable-ed-section.patch +++ b/debian/patches/features/all/rt/random-avoid-preempt_disable-ed-section.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Fri, 12 May 2017 15:46:17 +0200 Subject: [PATCH] random: avoid preempt_disable()ed section -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz extract_crng() will use sleeping locks while in a preempt_disable() section due to get_cpu_var(). @@ -23,24 +23,24 @@ Signed-off-by: Sebastian Andrzej Siewior #include #include -@@ -2030,6 +2031,7 @@ static rwlock_t batched_entropy_reset_lo - * goal of being quite fast and not depleting entropy. +@@ -2087,6 +2088,7 @@ static rwlock_t batched_entropy_reset_lo + * at any point prior. */ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64); +static DEFINE_LOCAL_IRQ_LOCK(batched_entropy_u64_lock); u64 get_random_u64(void) { u64 ret; -@@ -2046,7 +2048,7 @@ u64 get_random_u64(void) - return ret; - #endif +@@ -2107,7 +2109,7 @@ u64 get_random_u64(void) + warn_unseeded_randomness(&previous); + use_lock = READ_ONCE(crng_init) < 2; - batch = &get_cpu_var(batched_entropy_u64); + batch = &get_locked_var(batched_entropy_u64_lock, batched_entropy_u64); if (use_lock) read_lock_irqsave(&batched_entropy_reset_lock, flags); if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) { -@@ -2056,12 +2058,13 @@ u64 get_random_u64(void) +@@ -2117,12 +2119,13 @@ u64 get_random_u64(void) ret = batch->entropy_u64[batch->position++]; if (use_lock) read_unlock_irqrestore(&batched_entropy_reset_lock, flags); @@ -55,16 +55,16 @@ Signed-off-by: Sebastian Andrzej Siewior u32 get_random_u32(void) { u32 ret; -@@ -2072,7 +2075,7 @@ u32 get_random_u32(void) - if (arch_get_random_int(&ret)) - return ret; +@@ -2137,7 +2140,7 @@ u32 get_random_u32(void) + warn_unseeded_randomness(&previous); + use_lock = READ_ONCE(crng_init) < 2; - batch = &get_cpu_var(batched_entropy_u32); + batch = &get_locked_var(batched_entropy_u32_lock, batched_entropy_u32); if (use_lock) read_lock_irqsave(&batched_entropy_reset_lock, flags); if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) { -@@ -2082,7 +2085,7 @@ u32 get_random_u32(void) +@@ -2147,7 +2150,7 @@ u32 get_random_u32(void) ret = batch->entropy_u32[batch->position++]; if (use_lock) read_unlock_irqrestore(&batched_entropy_reset_lock, flags); diff --git a/debian/patches/features/all/rt/random-make-it-work-on-rt.patch b/debian/patches/features/all/rt/random-make-it-work-on-rt.patch index aa0ec2035..04dd3a894 100644 --- a/debian/patches/features/all/rt/random-make-it-work-on-rt.patch +++ b/debian/patches/features/all/rt/random-make-it-work-on-rt.patch @@ -1,7 +1,7 @@ Subject: random: Make it work on rt From: Thomas Gleixner Date: Tue, 21 Aug 2012 20:38:50 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Delegate the random insertion to the forced threaded interrupt handler. Store the return IP of the hard interrupt handler in the irq @@ -21,8 +21,8 @@ Signed-off-by: Thomas Gleixner --- a/drivers/char/random.c +++ b/drivers/char/random.c -@@ -1109,28 +1109,27 @@ static __u32 get_reg(struct fast_pool *f - return *(ptr + f->reg_idx++); +@@ -1113,28 +1113,27 @@ static __u32 get_reg(struct fast_pool *f + return *ptr; } -void add_interrupt_randomness(int irq, int irq_flags) @@ -57,7 +57,7 @@ Signed-off-by: Thomas Gleixner add_interrupt_bench(cycles); --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c -@@ -970,6 +970,8 @@ static void vmbus_isr(void) +@@ -966,6 +966,8 @@ static void vmbus_isr(void) void *page_addr = hv_cpu->synic_event_page; struct hv_message *msg; union hv_synic_event_flags *event; @@ -66,7 +66,7 @@ Signed-off-by: Thomas Gleixner bool handled = false; if (unlikely(page_addr == NULL)) -@@ -1013,7 +1015,7 @@ static void vmbus_isr(void) +@@ -1009,7 +1011,7 @@ static void vmbus_isr(void) tasklet_schedule(&hv_cpu->msg_dpc); } @@ -77,7 +77,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h -@@ -66,6 +66,7 @@ struct irq_desc { +@@ -70,6 +70,7 @@ struct irq_desc { unsigned int irqs_unhandled; atomic_t threads_handled; int threads_handled_last; @@ -87,7 +87,7 @@ Signed-off-by: Thomas Gleixner const struct cpumask *percpu_affinity; --- a/include/linux/random.h +++ b/include/linux/random.h -@@ -31,7 +31,7 @@ static inline void add_latent_entropy(vo +@@ -32,7 +32,7 @@ static inline void add_latent_entropy(vo extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) __latent_entropy; @@ -95,10 +95,10 @@ Signed-off-by: Thomas Gleixner +extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip) __latent_entropy; extern void get_random_bytes(void *buf, int nbytes); - extern int add_random_ready_callback(struct random_ready_callback *rdy); + extern int wait_for_random_bytes(void); --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c -@@ -181,10 +181,16 @@ irqreturn_t handle_irq_event_percpu(stru +@@ -183,10 +183,16 @@ irqreturn_t handle_irq_event_percpu(stru { irqreturn_t retval; unsigned int flags = 0; @@ -118,7 +118,7 @@ Signed-off-by: Thomas Gleixner note_interrupt(desc, retval); --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c -@@ -1025,6 +1025,12 @@ static int irq_thread(void *data) +@@ -1027,6 +1027,12 @@ static int irq_thread(void *data) if (action_ret == IRQ_WAKE_THREAD) irq_wake_secondary(desc, action); diff --git a/debian/patches/features/all/rt/rbtree-include-rcu.h-because-we-use-it.patch b/debian/patches/features/all/rt/rbtree-include-rcu.h-because-we-use-it.patch index cec4bcf9f..4f1eec1a5 100644 --- a/debian/patches/features/all/rt/rbtree-include-rcu.h-because-we-use-it.patch +++ b/debian/patches/features/all/rt/rbtree-include-rcu.h-because-we-use-it.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 14 Sep 2016 11:52:17 +0200 Subject: rbtree: include rcu.h because we use it -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Since commit c1adf20052d8 ("Introduce rb_replace_node_rcu()") rbtree_augmented.h uses RCU related data structures but does not include @@ -26,7 +26,7 @@ Signed-off-by: Sebastian Andrzej Siewior * Please note - only struct rb_augment_callbacks and the prototypes for --- a/include/linux/rbtree_latch.h +++ b/include/linux/rbtree_latch.h -@@ -34,6 +34,7 @@ +@@ -35,6 +35,7 @@ #include #include diff --git a/debian/patches/features/all/rt/rcu-Eliminate-softirq-processing-from-rcutree.patch b/debian/patches/features/all/rt/rcu-Eliminate-softirq-processing-from-rcutree.patch index 9ddf4cdc8..ef21bea2d 100644 --- a/debian/patches/features/all/rt/rcu-Eliminate-softirq-processing-from-rcutree.patch +++ b/debian/patches/features/all/rt/rcu-Eliminate-softirq-processing-from-rcutree.patch @@ -1,7 +1,7 @@ From: "Paul E. McKenney" Date: Mon, 4 Nov 2013 13:21:10 -0800 Subject: rcu: Eliminate softirq processing from rcutree -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Running RCU out of softirq is a problem for some workloads that would like to manage RCU core processing independently of other softirq work, @@ -17,17 +17,17 @@ Tested-by: Mike Galbraith Signed-off-by: Paul E. McKenney Signed-off-by: Sebastian Andrzej Siewior --- - kernel/rcu/tree.c | 110 ++++++++++++++++++++++++++++++--- + kernel/rcu/tree.c | 110 ++++++++++++++++++++++++++++++---- kernel/rcu/tree.h | 5 - - kernel/rcu/tree_plugin.h | 155 ++++++----------------------------------------- - 3 files changed, 122 insertions(+), 148 deletions(-) + kernel/rcu/tree_plugin.h | 152 ++++------------------------------------------- + 3 files changed, 114 insertions(+), 153 deletions(-) --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c -@@ -57,6 +57,11 @@ - #include +@@ -58,6 +58,11 @@ #include #include + #include +#include +#include +#include @@ -36,7 +36,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include "tree.h" #include "rcu.h" -@@ -3143,18 +3148,17 @@ static void +@@ -2946,18 +2951,17 @@ static void /* * Do RCU core processing for the current CPU. */ @@ -57,7 +57,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Schedule RCU callback invocation. If the specified type of RCU * does not support RCU priority boosting, just do a direct call, -@@ -3166,18 +3170,105 @@ static void invoke_rcu_callbacks(struct +@@ -2969,18 +2973,105 @@ static void invoke_rcu_callbacks(struct { if (unlikely(!READ_ONCE(rcu_scheduler_fully_active))) return; @@ -169,7 +169,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Handle any core-RCU processing required by a call_rcu() invocation. -@@ -4357,7 +4448,6 @@ void __init rcu_init(void) +@@ -4221,7 +4312,6 @@ void __init rcu_init(void) if (dump_tree) rcu_dump_rcu_node_tree(&rcu_sched_state); __rcu_init_preempt(); @@ -179,9 +179,9 @@ Signed-off-by: Sebastian Andrzej Siewior * We don't need protection against CPU-hotplug here because --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h -@@ -599,12 +599,10 @@ extern struct rcu_state rcu_preempt_stat - +@@ -438,12 +438,10 @@ extern struct rcu_state rcu_preempt_stat int rcu_dynticks_snap(struct rcu_dynticks *rdtp); + bool rcu_eqs_special_set(int cpu); -#ifdef CONFIG_RCU_BOOST DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); @@ -192,7 +192,7 @@ Signed-off-by: Sebastian Andrzej Siewior #ifndef RCU_TREE_NONCORE -@@ -624,10 +622,9 @@ void call_rcu(struct rcu_head *head, rcu +@@ -463,10 +461,9 @@ void call_rcu(struct rcu_head *head, rcu static void __init __rcu_init_preempt(void); static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); @@ -206,7 +206,7 @@ Signed-off-by: Sebastian Andrzej Siewior #endif /* #ifdef CONFIG_RCU_BOOST */ --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h -@@ -24,28 +24,10 @@ +@@ -24,39 +24,16 @@ * Paul E. McKenney */ @@ -218,39 +218,35 @@ Signed-off-by: Sebastian Andrzej Siewior -#include -#include -#include "../time/tick-internal.h" -- - #ifdef CONFIG_RCU_BOOST - #include "../locking/rtmutex_common.h" --/* -- * Control variables for per-CPU and per-rcu_node kthreads. These -- * handle all flavors of RCU. -- */ --static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); --DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); --DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); --DEFINE_PER_CPU(char, rcu_cpu_has_work); +-#ifdef CONFIG_RCU_BOOST - - #else /* #ifdef CONFIG_RCU_BOOST */ - /* -@@ -58,6 +40,14 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work); + * Control variables for per-CPU and per-rcu_node kthreads. These + * handle all flavors of RCU. + */ +-static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); + DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); + DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); + DEFINE_PER_CPU(char, rcu_cpu_has_work); - #endif /* #else #ifdef CONFIG_RCU_BOOST */ - -+/* -+ * Control variables for per-CPU and per-rcu_node kthreads. These -+ * handle all flavors of RCU. -+ */ -+DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); -+DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); -+DEFINE_PER_CPU(char, rcu_cpu_has_work); -+ +-#else /* #ifdef CONFIG_RCU_BOOST */ +- +-/* +- * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST, +- * all uses are in dead code. Provide a definition to keep the compiler +- * happy, but add WARN_ON_ONCE() to complain if used in the wrong place. +- * This probably needs to be excluded from -rt builds. +- */ +-#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; }) +- +-#endif /* #else #ifdef CONFIG_RCU_BOOST */ +- #ifdef CONFIG_RCU_NOCB_CPU static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ -@@ -635,15 +625,6 @@ static void rcu_preempt_check_callbacks( +@@ -682,15 +659,6 @@ static void rcu_preempt_check_callbacks( t->rcu_read_unlock_special.b.need_qs = true; } @@ -263,10 +259,10 @@ Signed-off-by: Sebastian Andrzej Siewior - -#endif /* #ifdef CONFIG_RCU_BOOST */ - - /* - * Queue a preemptible-RCU callback for invocation after a grace period. - */ -@@ -832,6 +813,19 @@ void exit_rcu(void) + /** + * call_rcu() - Queue an RCU callback for invocation after a grace period. + * @head: structure to be used for queueing the RCU updates. +@@ -913,20 +881,23 @@ void exit_rcu(void) #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ @@ -275,21 +271,11 @@ Signed-off-by: Sebastian Andrzej Siewior + */ +static void rcu_cpu_kthread_setup(unsigned int cpu) +{ -+#ifdef CONFIG_RCU_BOOST -+ struct sched_param sp; -+ -+ sp.sched_priority = kthread_prio; -+ sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); -+#endif /* #ifdef CONFIG_RCU_BOOST */ -+} -+ #ifdef CONFIG_RCU_BOOST ++ struct sched_param sp; - #include "../locking/rtmutex_common.h" -@@ -863,16 +857,6 @@ static void rcu_initiate_boost_trace(str - - #endif /* #else #ifdef CONFIG_RCU_TRACE */ - +-#include "../locking/rtmutex_common.h" +- -static void rcu_wake_cond(struct task_struct *t, int status) -{ - /* @@ -298,12 +284,19 @@ Signed-off-by: Sebastian Andrzej Siewior - */ - if (status != RCU_KTHREAD_YIELDING || is_idle_task(current)) - wake_up_process(t); --} -- ++ sp.sched_priority = kthread_prio; ++ sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); ++#endif /* #ifdef CONFIG_RCU_BOOST */ + } + ++#ifdef CONFIG_RCU_BOOST ++ ++#include "../locking/rtmutex_common.h" ++ /* * Carry out RCU priority boosting on the task indicated by ->exp_tasks * or ->boost_tasks, advancing the pointer to the next task in the -@@ -1016,23 +1000,6 @@ static void rcu_initiate_boost(struct rc +@@ -1069,23 +1040,6 @@ static void rcu_initiate_boost(struct rc } /* @@ -327,7 +320,7 @@ Signed-off-by: Sebastian Andrzej Siewior * Is the current CPU running the RCU-callbacks kthread? * Caller must have preemption disabled. */ -@@ -1086,67 +1053,6 @@ static int rcu_spawn_one_boost_kthread(s +@@ -1139,67 +1093,6 @@ static int rcu_spawn_one_boost_kthread(s return 0; } @@ -395,7 +388,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Set the per-rcu_node kthread's affinity to cover all CPUs that are * served by the rcu_node in question. The CPU hotplug lock is still -@@ -1177,26 +1083,12 @@ static void rcu_boost_kthread_setaffinit +@@ -1230,26 +1123,12 @@ static void rcu_boost_kthread_setaffinit free_cpumask_var(cm); } @@ -422,7 +415,7 @@ Signed-off-by: Sebastian Andrzej Siewior rcu_for_each_leaf_node(rcu_state_p, rnp) (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); } -@@ -1219,11 +1111,6 @@ static void rcu_initiate_boost(struct rc +@@ -1272,11 +1151,6 @@ static void rcu_initiate_boost(struct rc raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } diff --git a/debian/patches/features/all/rt/rcu-Suppress-lockdep-false-positive-boost_mtx-compla.patch b/debian/patches/features/all/rt/rcu-Suppress-lockdep-false-positive-boost_mtx-compla.patch new file mode 100644 index 000000000..a73d84253 --- /dev/null +++ b/debian/patches/features/all/rt/rcu-Suppress-lockdep-false-positive-boost_mtx-compla.patch @@ -0,0 +1,50 @@ +From: "Paul E. McKenney" +Date: Tue, 19 Sep 2017 15:36:42 -0700 +Subject: [PATCH] rcu: Suppress lockdep false-positive ->boost_mtx complaints +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Upstream commit bcda31a2659497df39d6bedfbdf17498b4f4ac89 + +RCU priority boosting uses rt_mutex_init_proxy_locked() to initialize an +rt_mutex structure in locked state held by some other task. When that +other task releases it, lockdep complains (quite accurately, but a bit +uselessly) that the other task never acquired it. This complaint can +suppress other, more helpful, lockdep complaints, and in any case it is +a false positive. + +This commit therefore switches from rt_mutex_unlock() to +rt_mutex_futex_unlock(), thereby avoiding the lockdep annotations. +Of course, if lockdep ever learns about rt_mutex_init_proxy_locked(), +addtional adjustments will be required. + +Suggested-by: Peter Zijlstra +Signed-off-by: Paul E. McKenney +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/rcu/tree_plugin.h | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/kernel/rcu/tree_plugin.h ++++ b/kernel/rcu/tree_plugin.h +@@ -31,11 +31,10 @@ + #include + #include + #include "../time/tick-internal.h" ++#include "../locking/rtmutex_common.h" + + #ifdef CONFIG_RCU_BOOST + +-#include "../locking/rtmutex_common.h" +- + /* + * Control variables for per-CPU and per-rcu_node kthreads. These + * handle all flavors of RCU. +@@ -530,7 +529,7 @@ void rcu_read_unlock_special(struct task + + /* Unboost if we were boosted. */ + if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) +- rt_mutex_unlock(&rnp->boost_mtx); ++ rt_mutex_futex_unlock(&rnp->boost_mtx); + + /* + * If this was the last task on the expedited lists, diff --git a/debian/patches/features/all/rt/rcu-disable-rcu-fast-no-hz-on-rt.patch b/debian/patches/features/all/rt/rcu-disable-rcu-fast-no-hz-on-rt.patch index 4f5c9ccc9..b56a8235f 100644 --- a/debian/patches/features/all/rt/rcu-disable-rcu-fast-no-hz-on-rt.patch +++ b/debian/patches/features/all/rt/rcu-disable-rcu-fast-no-hz-on-rt.patch @@ -1,7 +1,7 @@ Subject: rcu: Disable RCU_FAST_NO_HZ on RT From: Thomas Gleixner Date: Sun, 28 Oct 2012 13:26:09 +0000 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz This uses a timer_list timer from the irq disabled guts of the idle code. Disable it for now to prevent wreckage. @@ -9,12 +9,12 @@ code. Disable it for now to prevent wreckage. Signed-off-by: Thomas Gleixner --- - init/Kconfig | 2 +- + kernel/rcu/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -622,7 +622,7 @@ config RCU_FANOUT_LEAF +--- a/kernel/rcu/Kconfig ++++ b/kernel/rcu/Kconfig +@@ -172,7 +172,7 @@ config RCU_FANOUT_LEAF config RCU_FAST_NO_HZ bool "Accelerate last non-dyntick-idle CPU's grace periods" diff --git a/debian/patches/features/all/rt/rcu-enable-rcu_normal_after_boot-by-default-for-RT.patch b/debian/patches/features/all/rt/rcu-enable-rcu_normal_after_boot-by-default-for-RT.patch index b8108db2a..20d0aceb8 100644 --- a/debian/patches/features/all/rt/rcu-enable-rcu_normal_after_boot-by-default-for-RT.patch +++ b/debian/patches/features/all/rt/rcu-enable-rcu_normal_after_boot-by-default-for-RT.patch @@ -1,7 +1,7 @@ From: Julia Cartwright Date: Wed, 12 Oct 2016 11:21:14 -0500 Subject: [PATCH] rcu: enable rcu_normal_after_boot by default for RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The forcing of an expedited grace period is an expensive and very RT-application unfriendly operation, as it forcibly preempts all running @@ -19,9 +19,9 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c -@@ -64,7 +64,7 @@ - #ifndef CONFIG_TINY_RCU +@@ -66,7 +66,7 @@ extern int rcu_expedited; /* from sysctl module_param(rcu_expedited, int, 0); + extern int rcu_normal; /* from sysctl */ module_param(rcu_normal, int, 0); -static int rcu_normal_after_boot; +static int rcu_normal_after_boot = IS_ENABLED(CONFIG_PREEMPT_RT_FULL); diff --git a/debian/patches/features/all/rt/rcu-make-RCU_BOOST-default-on-RT.patch b/debian/patches/features/all/rt/rcu-make-RCU_BOOST-default-on-RT.patch index c76b502bf..c2dbb58c7 100644 --- a/debian/patches/features/all/rt/rcu-make-RCU_BOOST-default-on-RT.patch +++ b/debian/patches/features/all/rt/rcu-make-RCU_BOOST-default-on-RT.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Fri, 21 Mar 2014 20:19:05 +0100 Subject: rcu: make RCU_BOOST default on RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Since it is no longer invoked from the softirq people run into OOM more often if the priority of the RCU thread is too low. Making boosting @@ -10,12 +10,12 @@ someone knows better. Signed-off-by: Sebastian Andrzej Siewior --- - init/Kconfig | 4 ++-- + kernel/rcu/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -506,7 +506,7 @@ config TINY_RCU +--- a/kernel/rcu/Kconfig ++++ b/kernel/rcu/Kconfig +@@ -36,7 +36,7 @@ config TINY_RCU config RCU_EXPERT bool "Make expert-level adjustments to RCU configuration" @@ -24,7 +24,7 @@ Signed-off-by: Sebastian Andrzej Siewior help This option needs to be enabled if you wish to make expert-level adjustments to RCU configuration. By default, -@@ -649,7 +649,7 @@ config TREE_RCU_TRACE +@@ -191,7 +191,7 @@ config RCU_FAST_NO_HZ config RCU_BOOST bool "Enable RCU priority boosting" depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT diff --git a/debian/patches/features/all/rt/rcu-merge-rcu-bh-into-rcu-preempt-for-rt.patch b/debian/patches/features/all/rt/rcu-merge-rcu-bh-into-rcu-preempt-for-rt.patch index 0818f9a5d..fafa0d277 100644 --- a/debian/patches/features/all/rt/rcu-merge-rcu-bh-into-rcu-preempt-for-rt.patch +++ b/debian/patches/features/all/rt/rcu-merge-rcu-bh-into-rcu-preempt-for-rt.patch @@ -1,7 +1,7 @@ Subject: rcu: Merge RCU-bh into RCU-preempt Date: Wed, 5 Oct 2011 11:59:38 -0700 From: Thomas Gleixner -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The Linux kernel has long RCU-bh read-side critical sections that intolerably increase scheduling latency under mainline's RCU-bh rules, @@ -26,36 +26,31 @@ Signed-off-by: Thomas Gleixner --- include/linux/rcupdate.h | 23 +++++++++++++++++++++++ - include/linux/rcutree.h | 21 ++++++++++++++++++--- + include/linux/rcutree.h | 8 ++++++++ + kernel/rcu/rcu.h | 14 +++++++++++--- kernel/rcu/rcutorture.c | 7 +++++++ kernel/rcu/tree.c | 24 ++++++++++++++++++++++++ kernel/rcu/tree.h | 2 ++ kernel/rcu/update.c | 2 ++ - 6 files changed, 76 insertions(+), 3 deletions(-) + 7 files changed, 77 insertions(+), 3 deletions(-) --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h -@@ -178,6 +178,9 @@ void call_rcu(struct rcu_head *head, - +@@ -56,7 +56,11 @@ void call_rcu(struct rcu_head *head, rcu + #define call_rcu call_rcu_sched #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ +#ifdef CONFIG_PREEMPT_RT_FULL +#define call_rcu_bh call_rcu +#else - /** - * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. - * @head: structure to be used for queueing the RCU updates. -@@ -201,6 +204,7 @@ void call_rcu(struct rcu_head *head, - */ - void call_rcu_bh(struct rcu_head *head, - rcu_callback_t func); + void call_rcu_bh(struct rcu_head *head, rcu_callback_t func); +#endif - - /** - * call_rcu_sched() - Queue an RCU for invocation after sched grace period. -@@ -299,7 +303,11 @@ static inline int rcu_preempt_depth(void - /* Internal to kernel */ + void call_rcu_sched(struct rcu_head *head, rcu_callback_t func); + void synchronize_sched(void); + void rcu_barrier_tasks(void); +@@ -113,7 +117,11 @@ static inline int rcu_preempt_depth(void void rcu_init(void); + extern int rcu_scheduler_active __read_mostly; void rcu_sched_qs(void); +#ifdef CONFIG_PREEMPT_RT_FULL +static inline void rcu_bh_qs(void) { } @@ -65,9 +60,9 @@ Signed-off-by: Thomas Gleixner void rcu_check_callbacks(int user); void rcu_report_dead(unsigned int cpu); void rcu_cpu_starting(unsigned int cpu); -@@ -473,7 +481,14 @@ extern struct lockdep_map rcu_callback_m +@@ -263,7 +271,14 @@ extern struct lockdep_map rcu_sched_lock + extern struct lockdep_map rcu_callback_map; int debug_lockdep_rcu_enabled(void); - int rcu_read_lock_held(void); +#ifdef CONFIG_PREEMPT_RT_FULL +static inline int rcu_read_lock_bh_held(void) @@ -77,10 +72,10 @@ Signed-off-by: Thomas Gleixner +#else int rcu_read_lock_bh_held(void); +#endif + int rcu_read_lock_sched_held(void); - /** - * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? -@@ -871,10 +886,14 @@ static inline void rcu_read_unlock(void) + #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +@@ -667,10 +682,14 @@ static inline void rcu_read_unlock(void) static inline void rcu_read_lock_bh(void) { local_bh_disable(); @@ -95,7 +90,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -884,10 +903,14 @@ static inline void rcu_read_lock_bh(void +@@ -680,10 +699,14 @@ static inline void rcu_read_lock_bh(void */ static inline void rcu_read_unlock_bh(void) { @@ -113,7 +108,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -44,7 +44,11 @@ static inline void rcu_virt_note_context - rcu_note_context_switch(); + rcu_note_context_switch(false); } +#ifdef CONFIG_PREEMPT_RT_FULL @@ -136,7 +131,9 @@ Signed-off-by: Thomas Gleixner void rcu_barrier_sched(void); unsigned long get_state_synchronize_rcu(void); void cond_synchronize_rcu(unsigned long oldstate); -@@ -82,17 +90,14 @@ void cond_synchronize_sched(unsigned lon +--- a/kernel/rcu/rcu.h ++++ b/kernel/rcu/rcu.h +@@ -462,18 +462,26 @@ static inline void show_rcu_gp_kthreads( extern unsigned long rcutorture_testseq; extern unsigned long rcutorture_vernum; unsigned long rcu_batches_started(void); @@ -147,17 +144,12 @@ Signed-off-by: Thomas Gleixner unsigned long rcu_batches_completed_sched(void); unsigned long rcu_exp_batches_completed(void); unsigned long rcu_exp_batches_completed_sched(void); + unsigned long srcu_batches_completed(struct srcu_struct *sp); void show_rcu_gp_kthreads(void); - void rcu_force_quiescent_state(void); -void rcu_bh_force_quiescent_state(void); void rcu_sched_force_quiescent_state(void); - - void rcu_idle_enter(void); -@@ -109,6 +114,16 @@ extern int rcu_scheduler_active __read_m - - bool rcu_is_watching(void); - ++ +#ifndef CONFIG_PREEMPT_RT_FULL +void rcu_bh_force_quiescent_state(void); +unsigned long rcu_batches_started_bh(void); @@ -168,12 +160,12 @@ Signed-off-by: Thomas Gleixner +# define rcu_batches_started_bh rcu_batches_completed +#endif + - void rcu_all_qs(void); + #endif /* #else #ifdef CONFIG_TINY_RCU */ - /* RCUtree hotplug events */ + #ifdef CONFIG_RCU_NOCB_CPU --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c -@@ -414,6 +414,7 @@ static struct rcu_torture_ops rcu_ops = +@@ -417,6 +417,7 @@ static struct rcu_torture_ops rcu_ops = .name = "rcu" }; @@ -181,7 +173,7 @@ Signed-off-by: Thomas Gleixner /* * Definitions for rcu_bh torture testing. */ -@@ -453,6 +454,12 @@ static struct rcu_torture_ops rcu_bh_ops +@@ -456,6 +457,12 @@ static struct rcu_torture_ops rcu_bh_ops .name = "rcu_bh" }; @@ -196,23 +188,23 @@ Signed-off-by: Thomas Gleixner * The names includes "busted", and they really means it! --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c -@@ -262,6 +262,7 @@ void rcu_sched_qs(void) +@@ -243,6 +243,7 @@ void rcu_sched_qs(void) this_cpu_ptr(&rcu_sched_data), true); } +#ifndef CONFIG_PREEMPT_RT_FULL void rcu_bh_qs(void) { - if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) { -@@ -271,6 +272,7 @@ void rcu_bh_qs(void) + RCU_LOCKDEP_WARN(preemptible(), "rcu_bh_qs() invoked with preemption enabled!!!"); +@@ -253,6 +254,7 @@ void rcu_bh_qs(void) __this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false); } } +#endif - static DEFINE_PER_CPU(int, rcu_sched_qs_mask); - -@@ -557,11 +559,13 @@ EXPORT_SYMBOL_GPL(rcu_batches_started_sc + /* + * Steal a bit from the bottom of ->dynticks for idle entry/exit +@@ -564,11 +566,13 @@ EXPORT_SYMBOL_GPL(rcu_batches_started_sc /* * Return the number of RCU BH batches started thus far for debug & stats. */ @@ -226,7 +218,7 @@ Signed-off-by: Thomas Gleixner /* * Return the number of RCU batches completed thus far for debug & stats. -@@ -581,6 +585,7 @@ unsigned long rcu_batches_completed_sche +@@ -588,6 +592,7 @@ unsigned long rcu_batches_completed_sche } EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); @@ -234,7 +226,7 @@ Signed-off-by: Thomas Gleixner /* * Return the number of RCU BH batches completed thus far for debug & stats. */ -@@ -589,6 +594,7 @@ unsigned long rcu_batches_completed_bh(v +@@ -596,6 +601,7 @@ unsigned long rcu_batches_completed_bh(v return rcu_bh_state.completed; } EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); @@ -242,7 +234,7 @@ Signed-off-by: Thomas Gleixner /* * Return the number of RCU expedited batches completed thus far for -@@ -612,6 +618,7 @@ unsigned long rcu_exp_batches_completed_ +@@ -619,6 +625,7 @@ unsigned long rcu_exp_batches_completed_ } EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched); @@ -250,7 +242,7 @@ Signed-off-by: Thomas Gleixner /* * Force a quiescent state. */ -@@ -630,6 +637,13 @@ void rcu_bh_force_quiescent_state(void) +@@ -637,6 +644,13 @@ void rcu_bh_force_quiescent_state(void) } EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); @@ -264,7 +256,7 @@ Signed-off-by: Thomas Gleixner /* * Force a quiescent state for RCU-sched. */ -@@ -680,9 +694,11 @@ void rcutorture_get_gp_data(enum rcutort +@@ -687,9 +701,11 @@ void rcutorture_get_gp_data(enum rcutort case RCU_FLAVOR: rsp = rcu_state_p; break; @@ -276,15 +268,15 @@ Signed-off-by: Thomas Gleixner case RCU_SCHED_FLAVOR: rsp = &rcu_sched_state; break; -@@ -3289,6 +3305,7 @@ void call_rcu_sched(struct rcu_head *hea +@@ -3113,6 +3129,7 @@ void call_rcu_sched(struct rcu_head *hea } EXPORT_SYMBOL_GPL(call_rcu_sched); +#ifndef CONFIG_PREEMPT_RT_FULL - /* - * Queue an RCU callback for invocation after a quicker grace period. - */ -@@ -3297,6 +3314,7 @@ void call_rcu_bh(struct rcu_head *head, + /** + * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. + * @head: structure to be used for queueing the RCU updates. +@@ -3140,6 +3157,7 @@ void call_rcu_bh(struct rcu_head *head, __call_rcu(head, func, &rcu_bh_state, -1, 0); } EXPORT_SYMBOL_GPL(call_rcu_bh); @@ -292,7 +284,7 @@ Signed-off-by: Thomas Gleixner /* * Queue an RCU callback for lazy invocation after a grace period. -@@ -3388,6 +3406,7 @@ void synchronize_sched(void) +@@ -3225,6 +3243,7 @@ void synchronize_sched(void) } EXPORT_SYMBOL_GPL(synchronize_sched); @@ -300,7 +292,7 @@ Signed-off-by: Thomas Gleixner /** * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. * -@@ -3414,6 +3433,7 @@ void synchronize_rcu_bh(void) +@@ -3251,6 +3270,7 @@ void synchronize_rcu_bh(void) wait_rcu_gp(call_rcu_bh); } EXPORT_SYMBOL_GPL(synchronize_rcu_bh); @@ -308,7 +300,7 @@ Signed-off-by: Thomas Gleixner /** * get_state_synchronize_rcu - Snapshot current RCU state -@@ -3790,6 +3810,7 @@ static void _rcu_barrier(struct rcu_stat +@@ -3601,6 +3621,7 @@ static void _rcu_barrier(struct rcu_stat mutex_unlock(&rsp->barrier_mutex); } @@ -316,7 +308,7 @@ Signed-off-by: Thomas Gleixner /** * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. */ -@@ -3798,6 +3819,7 @@ void rcu_barrier_bh(void) +@@ -3609,6 +3630,7 @@ void rcu_barrier_bh(void) _rcu_barrier(&rcu_bh_state); } EXPORT_SYMBOL_GPL(rcu_barrier_bh); @@ -324,7 +316,7 @@ Signed-off-by: Thomas Gleixner /** * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. -@@ -4316,7 +4338,9 @@ void __init rcu_init(void) +@@ -4184,7 +4206,9 @@ void __init rcu_init(void) rcu_bootup_announce(); rcu_init_geometry(); @@ -336,7 +328,7 @@ Signed-off-by: Thomas Gleixner rcu_dump_rcu_node_tree(&rcu_sched_state); --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h -@@ -589,7 +589,9 @@ extern struct list_head rcu_struct_flavo +@@ -427,7 +427,9 @@ extern struct list_head rcu_struct_flavo */ extern struct rcu_state rcu_sched_state; @@ -348,7 +340,7 @@ Signed-off-by: Thomas Gleixner extern struct rcu_state rcu_preempt_state; --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c -@@ -298,6 +298,7 @@ int rcu_read_lock_held(void) +@@ -333,6 +333,7 @@ int rcu_read_lock_held(void) } EXPORT_SYMBOL_GPL(rcu_read_lock_held); @@ -356,7 +348,7 @@ Signed-off-by: Thomas Gleixner /** * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? * -@@ -324,6 +325,7 @@ int rcu_read_lock_bh_held(void) +@@ -359,6 +360,7 @@ int rcu_read_lock_bh_held(void) return in_softirq() || irqs_disabled(); } EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); diff --git a/debian/patches/features/all/rt/rcu-segcblist-include-rcupdate.h.patch b/debian/patches/features/all/rt/rcu-segcblist-include-rcupdate.h.patch new file mode 100644 index 000000000..035c7b9f1 --- /dev/null +++ b/debian/patches/features/all/rt/rcu-segcblist-include-rcupdate.h.patch @@ -0,0 +1,22 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 22 Sep 2017 15:01:46 +0200 +Subject: [PATCH] rcu/segcblist: include rcupdate.h +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The RT build on ARM complains about non-existing ULONG_CMP_LT. + +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/rcu/rcu_segcblist.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/kernel/rcu/rcu_segcblist.c ++++ b/kernel/rcu/rcu_segcblist.c +@@ -23,6 +23,7 @@ + #include + #include + #include ++#include + + #include "rcu_segcblist.h" + diff --git a/debian/patches/features/all/rt/rcutree-rcu_bh_qs-disable-irq-while-calling-rcu_pree.patch b/debian/patches/features/all/rt/rcutree-rcu_bh_qs-disable-irq-while-calling-rcu_pree.patch index 505c7d50f..543d3a1dc 100644 --- a/debian/patches/features/all/rt/rcutree-rcu_bh_qs-disable-irq-while-calling-rcu_pree.patch +++ b/debian/patches/features/all/rt/rcutree-rcu_bh_qs-disable-irq-while-calling-rcu_pree.patch @@ -1,7 +1,7 @@ From: Tiejun Chen Date: Wed, 18 Dec 2013 17:51:49 +0800 Subject: rcutree/rcu_bh_qs: Disable irq while calling rcu_preempt_qs() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Any callers to the function rcu_preempt_qs() must disable irqs in order to protect the assignment to ->rcu_read_unlock_special. In @@ -34,7 +34,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c -@@ -267,7 +267,12 @@ static void rcu_preempt_qs(void); +@@ -248,7 +248,12 @@ static void rcu_preempt_qs(void); void rcu_bh_qs(void) { diff --git a/debian/patches/features/all/rt/re-migrate_disable-race-with-cpu-hotplug-3f.patch b/debian/patches/features/all/rt/re-migrate_disable-race-with-cpu-hotplug-3f.patch deleted file mode 100644 index f8328e63e..000000000 --- a/debian/patches/features/all/rt/re-migrate_disable-race-with-cpu-hotplug-3f.patch +++ /dev/null @@ -1,35 +0,0 @@ -From: Yong Zhang -Date: Thu, 28 Jul 2011 11:16:00 +0800 -Subject: hotplug: Reread hotplug_pcp on pin_current_cpu() retry -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -When retry happens, it's likely that the task has been migrated to -another cpu (except unplug failed), but it still derefernces the -original hotplug_pcp per cpu data. - -Update the pointer to hotplug_pcp in the retry path, so it points to -the current cpu. - -Signed-off-by: Yong Zhang -Cc: Peter Zijlstra -Link: http://lkml.kernel.org/r/20110728031600.GA338@windriver.com -Signed-off-by: Thomas Gleixner ---- - kernel/cpu.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - ---- a/kernel/cpu.c -+++ b/kernel/cpu.c -@@ -252,9 +252,11 @@ static DEFINE_PER_CPU(struct hotplug_pcp - */ - void pin_current_cpu(void) - { -- struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp); -+ struct hotplug_pcp *hp; - - retry: -+ hp = this_cpu_ptr(&hotplug_pcp); -+ - if (!hp->unplug || hp->refcount || preempt_count() > 1 || - hp->unplug == current) { - hp->refcount++; diff --git a/debian/patches/features/all/rt/re-preempt_rt_full-arm-coredump-fails-for-cpu-3e-3d-4.patch b/debian/patches/features/all/rt/re-preempt_rt_full-arm-coredump-fails-for-cpu-3e-3d-4.patch index ea8e54e6a..f76815170 100644 --- a/debian/patches/features/all/rt/re-preempt_rt_full-arm-coredump-fails-for-cpu-3e-3d-4.patch +++ b/debian/patches/features/all/rt/re-preempt_rt_full-arm-coredump-fails-for-cpu-3e-3d-4.patch @@ -1,7 +1,7 @@ Subject: ARM: Initialize split page table locks for vector page From: Frank Rowand Date: Sat, 1 Oct 2011 18:58:13 -0700 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Without this patch, ARM can not use SPLIT_PTLOCK_CPUS if PREEMPT_RT_FULL=y because vectors_user_mapping() creates a diff --git a/debian/patches/features/all/rt/rfc-arm-smp-__cpu_disable-fix-sleeping-function-called-from-invalid-context.patch b/debian/patches/features/all/rt/rfc-arm-smp-__cpu_disable-fix-sleeping-function-called-from-invalid-context.patch index 47f1d9149..50ea0e858 100644 --- a/debian/patches/features/all/rt/rfc-arm-smp-__cpu_disable-fix-sleeping-function-called-from-invalid-context.patch +++ b/debian/patches/features/all/rt/rfc-arm-smp-__cpu_disable-fix-sleeping-function-called-from-invalid-context.patch @@ -1,7 +1,7 @@ Subject: ARM: smp: Move clear_tasks_mm_cpumask() call to __cpu_die() From: Grygorii Strashko Date: Fri, 11 Sep 2015 21:21:23 +0300 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz When running with the RT-kernel (4.1.5-rt5) on TI OMAP dra7-evm and trying to do Suspend to RAM, the following backtrace occurs: diff --git a/debian/patches/features/all/rt/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch b/debian/patches/features/all/rt/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch new file mode 100644 index 000000000..8c8caf706 --- /dev/null +++ b/debian/patches/features/all/rt/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch @@ -0,0 +1,155 @@ +From: Daniel Bristot de Oliveira +Date: Mon, 26 Jun 2017 17:07:15 +0200 +Subject: rt: Increase/decrease the nr of migratory tasks when enabling/disabling migration +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +There is a problem in the migrate_disable()/enable() implementation +regarding the number of migratory tasks in the rt/dl RQs. The problem +is the following: + +When a task is attached to the rt runqueue, it is checked if it either +can run in more than one CPU, or if it is with migration disable. If +either check is true, the rt_rq->rt_nr_migratory counter is not +increased. The counter increases otherwise. + +When the task is detached, the same check is done. If either check is +true, the rt_rq->rt_nr_migratory counter is not decreased. The counter +decreases otherwise. The same check is done in the dl scheduler. + +One important thing is that, migrate disable/enable does not touch this +counter for tasks attached to the rt rq. So suppose the following chain +of events. + +Assumptions: +Task A is the only runnable task in A Task B runs on the CPU B +Task A runs on CFS (non-rt) Task B has RT priority +Thus, rt_nr_migratory is 0 B is running +Task A can run on all CPUS. + +Timeline: + CPU A/TASK A CPU B/TASK B +A takes the rt mutex X . +A disables migration . + . B tries to take the rt mutex X + . As it is held by A { + . A inherits the rt priority of B + . A is dequeued from CFS RQ of CPU A + . A is enqueued in the RT RQ of CPU A + . As migration is disabled + . rt_nr_migratory in A is not increased + . +A enables migration +A releases the rt mutex X { + A returns to its original priority + A ask to be dequeued from RT RQ { + As migration is now enabled and it can run on all CPUS { + rt_nr_migratory should be decreased + As rt_nr_migratory is 0, rt_nr_migratory under flows + } +} + +This variable is important because it notifies if there are more than one +runnable & migratory task in the runqueue. If there are more than one +tasks, the rt_rq is set as overloaded, and then tries to migrate some +tasks. This rule is important to keep the scheduler working conserving, +that is, in a system with M CPUs, the M highest priority tasks should be +running. + +As rt_nr_migratory is unsigned, it will become > 0, notifying that the +RQ is overloaded, activating pushing mechanism without need. + +This patch fixes this problem by decreasing/increasing the +rt/dl_nr_migratory in the migrate disable/enable operations. + +Reported-by: Pei Zhang +Reported-by: Luiz Capitulino +Signed-off-by: Daniel Bristot de Oliveira +Cc: Luis Claudio R. Goncalves +Cc: Clark Williams +Cc: Luiz Capitulino +Cc: Sebastian Andrzej Siewior +Cc: Thomas Gleixner +Cc: Steven Rostedt +Cc: Peter Zijlstra +Cc: Ingo Molnar +Cc: LKML +Cc: linux-rt-users +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/sched/core.c | 49 ++++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 44 insertions(+), 5 deletions(-) + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -6851,6 +6851,47 @@ const u32 sched_prio_to_wmult[40] = { + + #if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP) + ++static inline void ++update_nr_migratory(struct task_struct *p, long delta) ++{ ++ if (unlikely((p->sched_class == &rt_sched_class || ++ p->sched_class == &dl_sched_class) && ++ p->nr_cpus_allowed > 1)) { ++ if (p->sched_class == &rt_sched_class) ++ task_rq(p)->rt.rt_nr_migratory += delta; ++ else ++ task_rq(p)->dl.dl_nr_migratory += delta; ++ } ++} ++ ++static inline void ++migrate_disable_update_cpus_allowed(struct task_struct *p) ++{ ++ struct rq *rq; ++ struct rq_flags rf; ++ ++ p->cpus_ptr = cpumask_of(smp_processor_id()); ++ ++ rq = task_rq_lock(p, &rf); ++ update_nr_migratory(p, -1); ++ p->nr_cpus_allowed = 1; ++ task_rq_unlock(rq, p, &rf); ++} ++ ++static inline void ++migrate_enable_update_cpus_allowed(struct task_struct *p) ++{ ++ struct rq *rq; ++ struct rq_flags rf; ++ ++ p->cpus_ptr = &p->cpus_mask; ++ ++ rq = task_rq_lock(p, &rf); ++ p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask); ++ update_nr_migratory(p, 1); ++ task_rq_unlock(rq, p, &rf); ++} ++ + void migrate_disable(void) + { + struct task_struct *p = current; +@@ -6871,10 +6912,9 @@ void migrate_disable(void) + } + + preempt_disable(); +- p->migrate_disable = 1; + +- p->cpus_ptr = cpumask_of(smp_processor_id()); +- p->nr_cpus_allowed = 1; ++ migrate_disable_update_cpus_allowed(p); ++ p->migrate_disable = 1; + + preempt_enable(); + } +@@ -6903,9 +6943,8 @@ void migrate_enable(void) + + preempt_disable(); + +- p->cpus_ptr = &p->cpus_mask; +- p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask); + p->migrate_disable = 0; ++ migrate_enable_update_cpus_allowed(p); + + if (p->migrate_disable_update) { + struct rq *rq; diff --git a/debian/patches/features/all/rt/rt-add-rt-locks.patch b/debian/patches/features/all/rt/rt-add-rt-locks.patch deleted file mode 100644 index cd57bd769..000000000 --- a/debian/patches/features/all/rt/rt-add-rt-locks.patch +++ /dev/null @@ -1,2401 +0,0 @@ -From: Thomas Gleixner -Date: Sun, 26 Jul 2009 19:39:56 +0200 -Subject: rt: Add the preempt-rt lock replacement APIs -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Map spinlocks, rwlocks, rw_semaphores and semaphores to the rt_mutex -based locking functions for preempt-rt. -This also introduces RT's sleeping locks. - -Signed-off-by: Thomas Gleixner - ---- - include/linux/kernel.h | 4 - include/linux/locallock.h | 6 - include/linux/mutex.h | 20 - - include/linux/mutex_rt.h | 130 +++++++++ - include/linux/rtmutex.h | 29 +- - include/linux/rwlock_rt.h | 99 +++++++ - include/linux/rwlock_types_rt.h | 33 ++ - include/linux/rwsem.h | 6 - include/linux/rwsem_rt.h | 167 ++++++++++++ - include/linux/sched.h | 8 - include/linux/sched/wake_q.h | 11 - include/linux/spinlock.h | 12 - include/linux/spinlock_api_smp.h | 4 - include/linux/spinlock_rt.h | 162 +++++++++++ - include/linux/spinlock_types.h | 11 - include/linux/spinlock_types_rt.h | 48 +++ - kernel/futex.c | 11 - kernel/locking/Makefile | 9 - kernel/locking/rt.c | 521 ++++++++++++++++++++++++++++++++++++++ - kernel/locking/rtmutex.c | 480 ++++++++++++++++++++++++++++++++--- - kernel/locking/rtmutex_common.h | 10 - kernel/locking/spinlock.c | 7 - kernel/locking/spinlock_debug.c | 5 - kernel/sched/core.c | 7 - 24 files changed, 1734 insertions(+), 66 deletions(-) - ---- a/include/linux/kernel.h -+++ b/include/linux/kernel.h -@@ -201,6 +201,9 @@ extern int _cond_resched(void); - */ - # define might_sleep() \ - do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) -+ -+# define might_sleep_no_state_check() \ -+ do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) - # define sched_annotate_sleep() (current->task_state_change = 0) - #else - static inline void ___might_sleep(const char *file, int line, -@@ -208,6 +211,7 @@ extern int _cond_resched(void); - static inline void __might_sleep(const char *file, int line, - int preempt_offset) { } - # define might_sleep() do { might_resched(); } while (0) -+# define might_sleep_no_state_check() do { might_resched(); } while (0) - # define sched_annotate_sleep() do { } while (0) - #endif - ---- a/include/linux/locallock.h -+++ b/include/linux/locallock.h -@@ -42,9 +42,15 @@ struct local_irq_lock { - * already takes care of the migrate_disable/enable - * for CONFIG_PREEMPT_BASE map to the normal spin_* calls. - */ -+#ifdef CONFIG_PREEMPT_RT_FULL -+# define spin_lock_local(lock) rt_spin_lock__no_mg(lock) -+# define spin_trylock_local(lock) rt_spin_trylock__no_mg(lock) -+# define spin_unlock_local(lock) rt_spin_unlock__no_mg(lock) -+#else - # define spin_lock_local(lock) spin_lock(lock) - # define spin_trylock_local(lock) spin_trylock(lock) - # define spin_unlock_local(lock) spin_unlock(lock) -+#endif - - static inline void __local_lock(struct local_irq_lock *lv) - { ---- a/include/linux/mutex.h -+++ b/include/linux/mutex.h -@@ -22,6 +22,17 @@ - - struct ww_acquire_ctx; - -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ -+ , .dep_map = { .name = #lockname } -+#else -+# define __DEP_MAP_MUTEX_INITIALIZER(lockname) -+#endif -+ -+#ifdef CONFIG_PREEMPT_RT_FULL -+# include -+#else -+ - /* - * Simple, straightforward mutexes with strict semantics: - * -@@ -113,13 +124,6 @@ do { \ - __mutex_init((mutex), #mutex, &__key); \ - } while (0) - --#ifdef CONFIG_DEBUG_LOCK_ALLOC --# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ -- , .dep_map = { .name = #lockname } --#else --# define __DEP_MAP_MUTEX_INITIALIZER(lockname) --#endif -- - #define __MUTEX_INITIALIZER(lockname) \ - { .owner = ATOMIC_LONG_INIT(0) \ - , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ -@@ -227,4 +231,6 @@ mutex_trylock_recursive(struct mutex *lo - return mutex_trylock(lock); - } - -+#endif /* !PREEMPT_RT_FULL */ -+ - #endif /* __LINUX_MUTEX_H */ ---- /dev/null -+++ b/include/linux/mutex_rt.h -@@ -0,0 +1,130 @@ -+#ifndef __LINUX_MUTEX_RT_H -+#define __LINUX_MUTEX_RT_H -+ -+#ifndef __LINUX_MUTEX_H -+#error "Please include mutex.h" -+#endif -+ -+#include -+ -+/* FIXME: Just for __lockfunc */ -+#include -+ -+struct mutex { -+ struct rt_mutex lock; -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map dep_map; -+#endif -+}; -+ -+#define __MUTEX_INITIALIZER(mutexname) \ -+ { \ -+ .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \ -+ __DEP_MAP_MUTEX_INITIALIZER(mutexname) \ -+ } -+ -+#define DEFINE_MUTEX(mutexname) \ -+ struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) -+ -+extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key); -+extern void __lockfunc _mutex_lock(struct mutex *lock); -+extern void __lockfunc _mutex_lock_io(struct mutex *lock); -+extern void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass); -+extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock); -+extern int __lockfunc _mutex_lock_killable(struct mutex *lock); -+extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass); -+extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); -+extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass); -+extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass); -+extern int __lockfunc _mutex_trylock(struct mutex *lock); -+extern void __lockfunc _mutex_unlock(struct mutex *lock); -+ -+#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock) -+#define mutex_lock(l) _mutex_lock(l) -+#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l) -+#define mutex_lock_killable(l) _mutex_lock_killable(l) -+#define mutex_trylock(l) _mutex_trylock(l) -+#define mutex_unlock(l) _mutex_unlock(l) -+#define mutex_lock_io(l) _mutex_lock_io(l); -+ -+#define __mutex_owner(l) ((l)->lock.owner) -+ -+#ifdef CONFIG_DEBUG_MUTEXES -+#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock) -+#else -+static inline void mutex_destroy(struct mutex *lock) {} -+#endif -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s) -+# define mutex_lock_interruptible_nested(l, s) \ -+ _mutex_lock_interruptible_nested(l, s) -+# define mutex_lock_killable_nested(l, s) \ -+ _mutex_lock_killable_nested(l, s) -+# define mutex_lock_io_nested(l, s) _mutex_lock_io_nested(l, s) -+ -+# define mutex_lock_nest_lock(lock, nest_lock) \ -+do { \ -+ typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ -+ _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ -+} while (0) -+ -+#else -+# define mutex_lock_nested(l, s) _mutex_lock(l) -+# define mutex_lock_interruptible_nested(l, s) \ -+ _mutex_lock_interruptible(l) -+# define mutex_lock_killable_nested(l, s) \ -+ _mutex_lock_killable(l) -+# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) -+# define mutex_lock_io_nested(l, s) _mutex_lock_io(l) -+#endif -+ -+# define mutex_init(mutex) \ -+do { \ -+ static struct lock_class_key __key; \ -+ \ -+ rt_mutex_init(&(mutex)->lock); \ -+ __mutex_do_init((mutex), #mutex, &__key); \ -+} while (0) -+ -+# define __mutex_init(mutex, name, key) \ -+do { \ -+ rt_mutex_init(&(mutex)->lock); \ -+ __mutex_do_init((mutex), name, key); \ -+} while (0) -+ -+/** -+ * These values are chosen such that FAIL and SUCCESS match the -+ * values of the regular mutex_trylock(). -+ */ -+enum mutex_trylock_recursive_enum { -+ MUTEX_TRYLOCK_FAILED = 0, -+ MUTEX_TRYLOCK_SUCCESS = 1, -+ MUTEX_TRYLOCK_RECURSIVE, -+}; -+/** -+ * mutex_trylock_recursive - trylock variant that allows recursive locking -+ * @lock: mutex to be locked -+ * -+ * This function should not be used, _ever_. It is purely for hysterical GEM -+ * raisins, and once those are gone this will be removed. -+ * -+ * Returns: -+ * MUTEX_TRYLOCK_FAILED - trylock failed, -+ * MUTEX_TRYLOCK_SUCCESS - lock acquired, -+ * MUTEX_TRYLOCK_RECURSIVE - we already owned the lock. -+ */ -+int __rt_mutex_owner_current(struct rt_mutex *lock); -+ -+static inline /* __deprecated */ __must_check enum mutex_trylock_recursive_enum -+mutex_trylock_recursive(struct mutex *lock) -+{ -+ if (unlikely(__rt_mutex_owner_current(&lock->lock))) -+ return MUTEX_TRYLOCK_RECURSIVE; -+ -+ return mutex_trylock(lock); -+} -+ -+extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); -+ -+#endif ---- a/include/linux/rtmutex.h -+++ b/include/linux/rtmutex.h -@@ -13,11 +13,15 @@ - #define __LINUX_RT_MUTEX_H - - #include --#include - #include -+#include - - extern int max_lock_depth; /* for sysctl */ - -+#ifdef CONFIG_DEBUG_MUTEXES -+#include -+#endif -+ - /** - * The rt_mutex structure - * -@@ -31,8 +35,8 @@ struct rt_mutex { - struct rb_root waiters; - struct rb_node *waiters_leftmost; - struct task_struct *owner; --#ifdef CONFIG_DEBUG_RT_MUTEXES - int save_state; -+#ifdef CONFIG_DEBUG_RT_MUTEXES - const char *name, *file; - int line; - void *magic; -@@ -55,22 +59,33 @@ struct hrtimer_sleeper; - # define rt_mutex_debug_check_no_locks_held(task) do { } while (0) - #endif - -+# define rt_mutex_init(mutex) \ -+ do { \ -+ raw_spin_lock_init(&(mutex)->wait_lock); \ -+ __rt_mutex_init(mutex, #mutex); \ -+ } while (0) -+ - #ifdef CONFIG_DEBUG_RT_MUTEXES - # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ - , .name = #mutexname, .file = __FILE__, .line = __LINE__ --# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__) - extern void rt_mutex_debug_task_free(struct task_struct *tsk); - #else - # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) --# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL) - # define rt_mutex_debug_task_free(t) do { } while (0) - #endif - --#define __RT_MUTEX_INITIALIZER(mutexname) \ -- { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ -+#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ -+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ - , .waiters = RB_ROOT \ - , .owner = NULL \ -- __DEBUG_RT_MUTEX_INITIALIZER(mutexname)} -+ __DEBUG_RT_MUTEX_INITIALIZER(mutexname) -+ -+#define __RT_MUTEX_INITIALIZER(mutexname) \ -+ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) } -+ -+#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \ -+ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ -+ , .save_state = 1 } - - #define DEFINE_RT_MUTEX(mutexname) \ - struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname) ---- /dev/null -+++ b/include/linux/rwlock_rt.h -@@ -0,0 +1,99 @@ -+#ifndef __LINUX_RWLOCK_RT_H -+#define __LINUX_RWLOCK_RT_H -+ -+#ifndef __LINUX_SPINLOCK_H -+#error Do not include directly. Use spinlock.h -+#endif -+ -+#define rwlock_init(rwl) \ -+do { \ -+ static struct lock_class_key __key; \ -+ \ -+ rt_mutex_init(&(rwl)->lock); \ -+ __rt_rwlock_init(rwl, #rwl, &__key); \ -+} while (0) -+ -+extern void __lockfunc rt_write_lock(rwlock_t *rwlock); -+extern void __lockfunc rt_read_lock(rwlock_t *rwlock); -+extern int __lockfunc rt_write_trylock(rwlock_t *rwlock); -+extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, unsigned long *flags); -+extern int __lockfunc rt_read_trylock(rwlock_t *rwlock); -+extern void __lockfunc rt_write_unlock(rwlock_t *rwlock); -+extern void __lockfunc rt_read_unlock(rwlock_t *rwlock); -+extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock); -+extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock); -+extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key); -+ -+#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock)) -+#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock)) -+ -+#define write_trylock_irqsave(lock, flags) \ -+ __cond_lock(lock, rt_write_trylock_irqsave(lock, &flags)) -+ -+#define read_lock_irqsave(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ flags = rt_read_lock_irqsave(lock); \ -+ } while (0) -+ -+#define write_lock_irqsave(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ flags = rt_write_lock_irqsave(lock); \ -+ } while (0) -+ -+#define read_lock(lock) rt_read_lock(lock) -+ -+#define read_lock_bh(lock) \ -+ do { \ -+ local_bh_disable(); \ -+ rt_read_lock(lock); \ -+ } while (0) -+ -+#define read_lock_irq(lock) read_lock(lock) -+ -+#define write_lock(lock) rt_write_lock(lock) -+ -+#define write_lock_bh(lock) \ -+ do { \ -+ local_bh_disable(); \ -+ rt_write_lock(lock); \ -+ } while (0) -+ -+#define write_lock_irq(lock) write_lock(lock) -+ -+#define read_unlock(lock) rt_read_unlock(lock) -+ -+#define read_unlock_bh(lock) \ -+ do { \ -+ rt_read_unlock(lock); \ -+ local_bh_enable(); \ -+ } while (0) -+ -+#define read_unlock_irq(lock) read_unlock(lock) -+ -+#define write_unlock(lock) rt_write_unlock(lock) -+ -+#define write_unlock_bh(lock) \ -+ do { \ -+ rt_write_unlock(lock); \ -+ local_bh_enable(); \ -+ } while (0) -+ -+#define write_unlock_irq(lock) write_unlock(lock) -+ -+#define read_unlock_irqrestore(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ (void) flags; \ -+ rt_read_unlock(lock); \ -+ } while (0) -+ -+#define write_unlock_irqrestore(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ (void) flags; \ -+ rt_write_unlock(lock); \ -+ } while (0) -+ -+#endif ---- /dev/null -+++ b/include/linux/rwlock_types_rt.h -@@ -0,0 +1,33 @@ -+#ifndef __LINUX_RWLOCK_TYPES_RT_H -+#define __LINUX_RWLOCK_TYPES_RT_H -+ -+#ifndef __LINUX_SPINLOCK_TYPES_H -+#error "Do not include directly. Include spinlock_types.h instead" -+#endif -+ -+/* -+ * rwlocks - rtmutex which allows single reader recursion -+ */ -+typedef struct { -+ struct rt_mutex lock; -+ int read_depth; -+ unsigned int break_lock; -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map dep_map; -+#endif -+} rwlock_t; -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } -+#else -+# define RW_DEP_MAP_INIT(lockname) -+#endif -+ -+#define __RW_LOCK_UNLOCKED(name) \ -+ { .lock = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.lock), \ -+ RW_DEP_MAP_INIT(name) } -+ -+#define DEFINE_RWLOCK(name) \ -+ rwlock_t name = __RW_LOCK_UNLOCKED(name) -+ -+#endif ---- a/include/linux/rwsem.h -+++ b/include/linux/rwsem.h -@@ -19,6 +19,10 @@ - #include - #endif - -+#ifdef CONFIG_PREEMPT_RT_FULL -+#include -+#else /* PREEMPT_RT_FULL */ -+ - struct rw_semaphore; - - #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK -@@ -184,4 +188,6 @@ extern void up_read_non_owner(struct rw_ - # define up_read_non_owner(sem) up_read(sem) - #endif - -+#endif /* !PREEMPT_RT_FULL */ -+ - #endif /* _LINUX_RWSEM_H */ ---- /dev/null -+++ b/include/linux/rwsem_rt.h -@@ -0,0 +1,167 @@ -+#ifndef _LINUX_RWSEM_RT_H -+#define _LINUX_RWSEM_RT_H -+ -+#ifndef _LINUX_RWSEM_H -+#error "Include rwsem.h" -+#endif -+ -+/* -+ * RW-semaphores are a spinlock plus a reader-depth count. -+ * -+ * Note that the semantics are different from the usual -+ * Linux rw-sems, in PREEMPT_RT mode we do not allow -+ * multiple readers to hold the lock at once, we only allow -+ * a read-lock owner to read-lock recursively. This is -+ * better for latency, makes the implementation inherently -+ * fair and makes it simpler as well. -+ */ -+ -+#include -+ -+struct rw_semaphore { -+ struct rt_mutex lock; -+ int read_depth; -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map dep_map; -+#endif -+}; -+ -+#define __RWSEM_INITIALIZER(name) \ -+ { .lock = __RT_MUTEX_INITIALIZER(name.lock), \ -+ RW_DEP_MAP_INIT(name) } -+ -+#define DECLARE_RWSEM(lockname) \ -+ struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname) -+ -+extern void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name, -+ struct lock_class_key *key); -+ -+#define __rt_init_rwsem(sem, name, key) \ -+ do { \ -+ rt_mutex_init(&(sem)->lock); \ -+ __rt_rwsem_init((sem), (name), (key));\ -+ } while (0) -+ -+#define __init_rwsem(sem, name, key) __rt_init_rwsem(sem, name, key) -+ -+# define rt_init_rwsem(sem) \ -+do { \ -+ static struct lock_class_key __key; \ -+ \ -+ __rt_init_rwsem((sem), #sem, &__key); \ -+} while (0) -+ -+extern void rt_down_write(struct rw_semaphore *rwsem); -+extern int rt_down_write_killable(struct rw_semaphore *rwsem); -+extern void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass); -+extern void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass); -+extern int rt_down_write_killable_nested(struct rw_semaphore *rwsem, -+ int subclass); -+extern void rt_down_write_nested_lock(struct rw_semaphore *rwsem, -+ struct lockdep_map *nest); -+extern void rt__down_read(struct rw_semaphore *rwsem); -+extern void rt_down_read(struct rw_semaphore *rwsem); -+extern int rt_down_write_trylock(struct rw_semaphore *rwsem); -+extern int rt__down_read_trylock(struct rw_semaphore *rwsem); -+extern int rt_down_read_trylock(struct rw_semaphore *rwsem); -+extern void __rt_up_read(struct rw_semaphore *rwsem); -+extern void rt_up_read(struct rw_semaphore *rwsem); -+extern void rt_up_write(struct rw_semaphore *rwsem); -+extern void rt_downgrade_write(struct rw_semaphore *rwsem); -+ -+#define init_rwsem(sem) rt_init_rwsem(sem) -+#define rwsem_is_locked(s) rt_mutex_is_locked(&(s)->lock) -+ -+static inline int rwsem_is_contended(struct rw_semaphore *sem) -+{ -+ /* rt_mutex_has_waiters() */ -+ return !RB_EMPTY_ROOT(&sem->lock.waiters); -+} -+ -+static inline void __down_read(struct rw_semaphore *sem) -+{ -+ rt__down_read(sem); -+} -+ -+static inline void down_read(struct rw_semaphore *sem) -+{ -+ rt_down_read(sem); -+} -+ -+static inline int __down_read_trylock(struct rw_semaphore *sem) -+{ -+ return rt__down_read_trylock(sem); -+} -+ -+static inline int down_read_trylock(struct rw_semaphore *sem) -+{ -+ return rt_down_read_trylock(sem); -+} -+ -+static inline void down_write(struct rw_semaphore *sem) -+{ -+ rt_down_write(sem); -+} -+ -+static inline int down_write_killable(struct rw_semaphore *sem) -+{ -+ return rt_down_write_killable(sem); -+} -+ -+static inline int down_write_trylock(struct rw_semaphore *sem) -+{ -+ return rt_down_write_trylock(sem); -+} -+ -+static inline void __up_read(struct rw_semaphore *sem) -+{ -+ __rt_up_read(sem); -+} -+ -+static inline void up_read(struct rw_semaphore *sem) -+{ -+ rt_up_read(sem); -+} -+ -+static inline void up_write(struct rw_semaphore *sem) -+{ -+ rt_up_write(sem); -+} -+ -+static inline void downgrade_write(struct rw_semaphore *sem) -+{ -+ rt_downgrade_write(sem); -+} -+ -+static inline void down_read_nested(struct rw_semaphore *sem, int subclass) -+{ -+ return rt_down_read_nested(sem, subclass); -+} -+ -+static inline void down_write_nested(struct rw_semaphore *sem, int subclass) -+{ -+ rt_down_write_nested(sem, subclass); -+} -+ -+static inline int down_write_killable_nested(struct rw_semaphore *sem, -+ int subclass) -+{ -+ return rt_down_write_killable_nested(sem, subclass); -+} -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+static inline void down_write_nest_lock(struct rw_semaphore *sem, -+ struct rw_semaphore *nest_lock) -+{ -+ rt_down_write_nested_lock(sem, &nest_lock->dep_map); -+} -+ -+#else -+ -+static inline void down_write_nest_lock(struct rw_semaphore *sem, -+ struct rw_semaphore *nest_lock) -+{ -+ rt_down_write_nested_lock(sem, NULL); -+} -+#endif -+#endif ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -123,6 +123,11 @@ struct task_group; - smp_store_mb(current->state, (state_value)); \ - } while (0) - -+#define __set_current_state_no_track(state_value) \ -+ current->state = (state_value); -+#define set_current_state_no_track(state_value) \ -+ smp_store_mb(current->state, (state_value)); -+ - #else - /* - * set_current_state() includes a barrier so that the write of current->state -@@ -160,6 +165,9 @@ struct task_group; - */ - #define __set_current_state(state_value) do { current->state = (state_value); } while (0) - #define set_current_state(state_value) smp_store_mb(current->state, (state_value)) -+ -+#define __set_current_state_no_track(state_value) __set_current_state(state_value) -+#define set_current_state_no_track(state_value) set_current_state(state_value) - #endif - - /* Task command name length: */ ---- a/include/linux/sched/wake_q.h -+++ b/include/linux/sched/wake_q.h -@@ -48,6 +48,15 @@ static inline void wake_q_init(struct wa - - extern void wake_q_add(struct wake_q_head *head, - struct task_struct *task); --extern void wake_up_q(struct wake_q_head *head); -+extern void __wake_up_q(struct wake_q_head *head, bool sleeper); -+static inline void wake_up_q(struct wake_q_head *head) -+{ -+ __wake_up_q(head, false); -+} -+ -+static inline void wake_up_q_sleeper(struct wake_q_head *head) -+{ -+ __wake_up_q(head, true); -+} - - #endif /* _LINUX_SCHED_WAKE_Q_H */ ---- a/include/linux/spinlock.h -+++ b/include/linux/spinlock.h -@@ -268,7 +268,11 @@ static inline void do_raw_spin_unlock(ra - #define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock)) - - /* Include rwlock functions */ --#include -+#ifdef CONFIG_PREEMPT_RT_FULL -+# include -+#else -+# include -+#endif - - /* - * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: -@@ -279,6 +283,10 @@ static inline void do_raw_spin_unlock(ra - # include - #endif - -+#ifdef CONFIG_PREEMPT_RT_FULL -+# include -+#else /* PREEMPT_RT_FULL */ -+ - /* - * Map the spin_lock functions to the raw variants for PREEMPT_RT=n - */ -@@ -408,4 +416,6 @@ extern int _atomic_dec_and_lock(atomic_t - #define atomic_dec_and_lock(atomic, lock) \ - __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) - -+#endif /* !PREEMPT_RT_FULL */ -+ - #endif /* __LINUX_SPINLOCK_H */ ---- a/include/linux/spinlock_api_smp.h -+++ b/include/linux/spinlock_api_smp.h -@@ -187,6 +187,8 @@ static inline int __raw_spin_trylock_bh( - return 0; - } - --#include -+#ifndef CONFIG_PREEMPT_RT_FULL -+# include -+#endif - - #endif /* __LINUX_SPINLOCK_API_SMP_H */ ---- /dev/null -+++ b/include/linux/spinlock_rt.h -@@ -0,0 +1,162 @@ -+#ifndef __LINUX_SPINLOCK_RT_H -+#define __LINUX_SPINLOCK_RT_H -+ -+#ifndef __LINUX_SPINLOCK_H -+#error Do not include directly. Use spinlock.h -+#endif -+ -+#include -+ -+extern void -+__rt_spin_lock_init(spinlock_t *lock, const char *name, struct lock_class_key *key); -+ -+#define spin_lock_init(slock) \ -+do { \ -+ static struct lock_class_key __key; \ -+ \ -+ rt_mutex_init(&(slock)->lock); \ -+ __rt_spin_lock_init(slock, #slock, &__key); \ -+} while (0) -+ -+void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock); -+void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock); -+int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock); -+ -+extern void __lockfunc rt_spin_lock(spinlock_t *lock); -+extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock); -+extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass); -+extern void __lockfunc rt_spin_unlock(spinlock_t *lock); -+extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock); -+extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags); -+extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock); -+extern int __lockfunc rt_spin_trylock(spinlock_t *lock); -+extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock); -+ -+/* -+ * lockdep-less calls, for derived types like rwlock: -+ * (for trylock they can use rt_mutex_trylock() directly. -+ */ -+extern void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock); -+extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock); -+extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock); -+ -+#define spin_lock(lock) rt_spin_lock(lock) -+ -+#define spin_lock_bh(lock) \ -+ do { \ -+ local_bh_disable(); \ -+ rt_spin_lock(lock); \ -+ } while (0) -+ -+#define spin_lock_irq(lock) spin_lock(lock) -+ -+#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock)) -+ -+#define spin_trylock(lock) \ -+({ \ -+ int __locked; \ -+ __locked = spin_do_trylock(lock); \ -+ __locked; \ -+}) -+ -+#ifdef CONFIG_LOCKDEP -+# define spin_lock_nested(lock, subclass) \ -+ do { \ -+ rt_spin_lock_nested(lock, subclass); \ -+ } while (0) -+ -+#define spin_lock_bh_nested(lock, subclass) \ -+ do { \ -+ local_bh_disable(); \ -+ rt_spin_lock_nested(lock, subclass); \ -+ } while (0) -+ -+# define spin_lock_irqsave_nested(lock, flags, subclass) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ flags = 0; \ -+ rt_spin_lock_nested(lock, subclass); \ -+ } while (0) -+#else -+# define spin_lock_nested(lock, subclass) spin_lock(lock) -+# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(lock) -+ -+# define spin_lock_irqsave_nested(lock, flags, subclass) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ flags = 0; \ -+ spin_lock(lock); \ -+ } while (0) -+#endif -+ -+#define spin_lock_irqsave(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ flags = 0; \ -+ spin_lock(lock); \ -+ } while (0) -+ -+static inline unsigned long spin_lock_trace_flags(spinlock_t *lock) -+{ -+ unsigned long flags = 0; -+#ifdef CONFIG_TRACE_IRQFLAGS -+ flags = rt_spin_lock_trace_flags(lock); -+#else -+ spin_lock(lock); /* lock_local */ -+#endif -+ return flags; -+} -+ -+/* FIXME: we need rt_spin_lock_nest_lock */ -+#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0) -+ -+#define spin_unlock(lock) rt_spin_unlock(lock) -+ -+#define spin_unlock_bh(lock) \ -+ do { \ -+ rt_spin_unlock(lock); \ -+ local_bh_enable(); \ -+ } while (0) -+ -+#define spin_unlock_irq(lock) spin_unlock(lock) -+ -+#define spin_unlock_irqrestore(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ (void) flags; \ -+ spin_unlock(lock); \ -+ } while (0) -+ -+#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock)) -+#define spin_trylock_irq(lock) spin_trylock(lock) -+ -+#define spin_trylock_irqsave(lock, flags) \ -+ rt_spin_trylock_irqsave(lock, &(flags)) -+ -+#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock) -+ -+#ifdef CONFIG_GENERIC_LOCKBREAK -+# define spin_is_contended(lock) ((lock)->break_lock) -+#else -+# define spin_is_contended(lock) (((void)(lock), 0)) -+#endif -+ -+static inline int spin_can_lock(spinlock_t *lock) -+{ -+ return !rt_mutex_is_locked(&lock->lock); -+} -+ -+static inline int spin_is_locked(spinlock_t *lock) -+{ -+ return rt_mutex_is_locked(&lock->lock); -+} -+ -+static inline void assert_spin_locked(spinlock_t *lock) -+{ -+ BUG_ON(!spin_is_locked(lock)); -+} -+ -+#define atomic_dec_and_lock(atomic, lock) \ -+ atomic_dec_and_spin_lock(atomic, lock) -+ -+#endif ---- a/include/linux/spinlock_types.h -+++ b/include/linux/spinlock_types.h -@@ -11,8 +11,13 @@ - - #include - --#include -- --#include -+#ifndef CONFIG_PREEMPT_RT_FULL -+# include -+# include -+#else -+# include -+# include -+# include -+#endif - - #endif /* __LINUX_SPINLOCK_TYPES_H */ ---- /dev/null -+++ b/include/linux/spinlock_types_rt.h -@@ -0,0 +1,48 @@ -+#ifndef __LINUX_SPINLOCK_TYPES_RT_H -+#define __LINUX_SPINLOCK_TYPES_RT_H -+ -+#ifndef __LINUX_SPINLOCK_TYPES_H -+#error "Do not include directly. Include spinlock_types.h instead" -+#endif -+ -+#include -+ -+/* -+ * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field: -+ */ -+typedef struct spinlock { -+ struct rt_mutex lock; -+ unsigned int break_lock; -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map dep_map; -+#endif -+} spinlock_t; -+ -+#ifdef CONFIG_DEBUG_RT_MUTEXES -+# define __RT_SPIN_INITIALIZER(name) \ -+ { \ -+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ -+ .save_state = 1, \ -+ .file = __FILE__, \ -+ .line = __LINE__ , \ -+ } -+#else -+# define __RT_SPIN_INITIALIZER(name) \ -+ { \ -+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ -+ .save_state = 1, \ -+ } -+#endif -+ -+/* -+.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock) -+*/ -+ -+#define __SPIN_LOCK_UNLOCKED(name) \ -+ { .lock = __RT_SPIN_INITIALIZER(name.lock), \ -+ SPIN_DEP_MAP_INIT(name) } -+ -+#define DEFINE_SPINLOCK(name) \ -+ spinlock_t name = __SPIN_LOCK_UNLOCKED(name) -+ -+#endif ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1400,6 +1400,7 @@ static int wake_futex_pi(u32 __user *uad - struct task_struct *new_owner; - bool postunlock = false; - DEFINE_WAKE_Q(wake_q); -+ DEFINE_WAKE_Q(wake_sleeper_q); - int ret = 0; - - new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); -@@ -1461,13 +1462,13 @@ static int wake_futex_pi(u32 __user *uad - pi_state->owner = new_owner; - raw_spin_unlock(&new_owner->pi_lock); - -- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); -- -+ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, -+ &wake_sleeper_q); - out_unlock: - raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - - if (postunlock) -- rt_mutex_postunlock(&wake_q); -+ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); - - return ret; - } -@@ -2668,7 +2669,7 @@ static int futex_lock_pi(u32 __user *uad - goto no_block; - } - -- rt_mutex_init_waiter(&rt_waiter); -+ rt_mutex_init_waiter(&rt_waiter, false); - - /* - * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not -@@ -3035,7 +3036,7 @@ static int futex_wait_requeue_pi(u32 __u - * The waiter is allocated on our stack, manipulated by the requeue - * code while we sleep on uaddr. - */ -- rt_mutex_init_waiter(&rt_waiter); -+ rt_mutex_init_waiter(&rt_waiter, false); - - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); - if (unlikely(ret != 0)) ---- a/kernel/locking/Makefile -+++ b/kernel/locking/Makefile -@@ -2,7 +2,7 @@ - # and is generally not a function of system call inputs. - KCOV_INSTRUMENT := n - --obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o -+obj-y += semaphore.o percpu-rwsem.o - - ifdef CONFIG_FUNCTION_TRACER - CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) -@@ -11,7 +11,11 @@ CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS - CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE) - endif - -+ifneq ($(CONFIG_PREEMPT_RT_FULL),y) -+obj-y += mutex.o - obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o -+obj-y += rwsem.o -+endif - obj-$(CONFIG_LOCKDEP) += lockdep.o - ifeq ($(CONFIG_PROC_FS),y) - obj-$(CONFIG_LOCKDEP) += lockdep_proc.o -@@ -24,8 +28,11 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o - obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o - obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o - obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o -+ifneq ($(CONFIG_PREEMPT_RT_FULL),y) - obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o - obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o -+endif -+obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o - obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o - obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o - obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o ---- /dev/null -+++ b/kernel/locking/rt.c -@@ -0,0 +1,521 @@ -+/* -+ * kernel/rt.c -+ * -+ * Real-Time Preemption Support -+ * -+ * started by Ingo Molnar: -+ * -+ * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar -+ * Copyright (C) 2006, Timesys Corp., Thomas Gleixner -+ * -+ * historic credit for proving that Linux spinlocks can be implemented via -+ * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow -+ * and others) who prototyped it on 2.4 and did lots of comparative -+ * research and analysis; TimeSys, for proving that you can implement a -+ * fully preemptible kernel via the use of IRQ threading and mutexes; -+ * Bill Huey for persuasively arguing on lkml that the mutex model is the -+ * right one; and to MontaVista, who ported pmutexes to 2.6. -+ * -+ * This code is a from-scratch implementation and is not based on pmutexes, -+ * but the idea of converting spinlocks to mutexes is used here too. -+ * -+ * lock debugging, locking tree, deadlock detection: -+ * -+ * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey -+ * Released under the General Public License (GPL). -+ * -+ * Includes portions of the generic R/W semaphore implementation from: -+ * -+ * Copyright (c) 2001 David Howells (dhowells@redhat.com). -+ * - Derived partially from idea by Andrea Arcangeli -+ * - Derived also from comments by Linus -+ * -+ * Pending ownership of locks and ownership stealing: -+ * -+ * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt -+ * -+ * (also by Steven Rostedt) -+ * - Converted single pi_lock to individual task locks. -+ * -+ * By Esben Nielsen: -+ * Doing priority inheritance with help of the scheduler. -+ * -+ * Copyright (C) 2006, Timesys Corp., Thomas Gleixner -+ * - major rework based on Esben Nielsens initial patch -+ * - replaced thread_info references by task_struct refs -+ * - removed task->pending_owner dependency -+ * - BKL drop/reacquire for semaphore style locks to avoid deadlocks -+ * in the scheduler return path as discussed with Steven Rostedt -+ * -+ * Copyright (C) 2006, Kihon Technologies Inc. -+ * Steven Rostedt -+ * - debugged and patched Thomas Gleixner's rework. -+ * - added back the cmpxchg to the rework. -+ * - turned atomic require back on for SMP. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "rtmutex_common.h" -+ -+/* -+ * struct mutex functions -+ */ -+void __mutex_do_init(struct mutex *mutex, const char *name, -+ struct lock_class_key *key) -+{ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ /* -+ * Make sure we are not reinitializing a held lock: -+ */ -+ debug_check_no_locks_freed((void *)mutex, sizeof(*mutex)); -+ lockdep_init_map(&mutex->dep_map, name, key, 0); -+#endif -+ mutex->lock.save_state = 0; -+} -+EXPORT_SYMBOL(__mutex_do_init); -+ -+void __lockfunc _mutex_lock(struct mutex *lock) -+{ -+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); -+ rt_mutex_lock(&lock->lock); -+} -+EXPORT_SYMBOL(_mutex_lock); -+ -+void __lockfunc _mutex_lock_io(struct mutex *lock) -+{ -+ int token; -+ -+ token = io_schedule_prepare(); -+ _mutex_lock(lock); -+ io_schedule_finish(token); -+} -+EXPORT_SYMBOL_GPL(_mutex_lock_io); -+ -+int __lockfunc _mutex_lock_interruptible(struct mutex *lock) -+{ -+ int ret; -+ -+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); -+ ret = rt_mutex_lock_interruptible(&lock->lock); -+ if (ret) -+ mutex_release(&lock->dep_map, 1, _RET_IP_); -+ return ret; -+} -+EXPORT_SYMBOL(_mutex_lock_interruptible); -+ -+int __lockfunc _mutex_lock_killable(struct mutex *lock) -+{ -+ int ret; -+ -+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); -+ ret = rt_mutex_lock_killable(&lock->lock); -+ if (ret) -+ mutex_release(&lock->dep_map, 1, _RET_IP_); -+ return ret; -+} -+EXPORT_SYMBOL(_mutex_lock_killable); -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass) -+{ -+ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); -+ rt_mutex_lock(&lock->lock); -+} -+EXPORT_SYMBOL(_mutex_lock_nested); -+ -+void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass) -+{ -+ int token; -+ -+ token = io_schedule_prepare(); -+ -+ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); -+ rt_mutex_lock(&lock->lock); -+ -+ io_schedule_finish(token); -+} -+EXPORT_SYMBOL_GPL(_mutex_lock_io_nested); -+ -+void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) -+{ -+ mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_); -+ rt_mutex_lock(&lock->lock); -+} -+EXPORT_SYMBOL(_mutex_lock_nest_lock); -+ -+int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass) -+{ -+ int ret; -+ -+ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); -+ ret = rt_mutex_lock_interruptible(&lock->lock); -+ if (ret) -+ mutex_release(&lock->dep_map, 1, _RET_IP_); -+ return ret; -+} -+EXPORT_SYMBOL(_mutex_lock_interruptible_nested); -+ -+int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass) -+{ -+ int ret; -+ -+ mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); -+ ret = rt_mutex_lock_killable(&lock->lock); -+ if (ret) -+ mutex_release(&lock->dep_map, 1, _RET_IP_); -+ return ret; -+} -+EXPORT_SYMBOL(_mutex_lock_killable_nested); -+#endif -+ -+int __lockfunc _mutex_trylock(struct mutex *lock) -+{ -+ int ret = rt_mutex_trylock(&lock->lock); -+ -+ if (ret) -+ mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ -+ return ret; -+} -+EXPORT_SYMBOL(_mutex_trylock); -+ -+void __lockfunc _mutex_unlock(struct mutex *lock) -+{ -+ mutex_release(&lock->dep_map, 1, _RET_IP_); -+ rt_mutex_unlock(&lock->lock); -+} -+EXPORT_SYMBOL(_mutex_unlock); -+ -+/* -+ * rwlock_t functions -+ */ -+int __lockfunc rt_write_trylock(rwlock_t *rwlock) -+{ -+ int ret; -+ -+ migrate_disable(); -+ ret = rt_mutex_trylock(&rwlock->lock); -+ if (ret) -+ rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); -+ else -+ migrate_enable(); -+ -+ return ret; -+} -+EXPORT_SYMBOL(rt_write_trylock); -+ -+int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags) -+{ -+ int ret; -+ -+ *flags = 0; -+ ret = rt_write_trylock(rwlock); -+ return ret; -+} -+EXPORT_SYMBOL(rt_write_trylock_irqsave); -+ -+int __lockfunc rt_read_trylock(rwlock_t *rwlock) -+{ -+ struct rt_mutex *lock = &rwlock->lock; -+ int ret = 1; -+ -+ /* -+ * recursive read locks succeed when current owns the lock, -+ * but not when read_depth == 0 which means that the lock is -+ * write locked. -+ */ -+ if (rt_mutex_owner(lock) != current) { -+ migrate_disable(); -+ ret = rt_mutex_trylock(lock); -+ if (ret) -+ rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); -+ else -+ migrate_enable(); -+ -+ } else if (!rwlock->read_depth) { -+ ret = 0; -+ } -+ -+ if (ret) -+ rwlock->read_depth++; -+ -+ return ret; -+} -+EXPORT_SYMBOL(rt_read_trylock); -+ -+void __lockfunc rt_write_lock(rwlock_t *rwlock) -+{ -+ rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); -+ __rt_spin_lock(&rwlock->lock); -+} -+EXPORT_SYMBOL(rt_write_lock); -+ -+void __lockfunc rt_read_lock(rwlock_t *rwlock) -+{ -+ struct rt_mutex *lock = &rwlock->lock; -+ -+ -+ /* -+ * recursive read locks succeed when current owns the lock -+ */ -+ if (rt_mutex_owner(lock) != current) { -+ rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); -+ __rt_spin_lock(lock); -+ } -+ rwlock->read_depth++; -+} -+ -+EXPORT_SYMBOL(rt_read_lock); -+ -+void __lockfunc rt_write_unlock(rwlock_t *rwlock) -+{ -+ /* NOTE: we always pass in '1' for nested, for simplicity */ -+ rwlock_release(&rwlock->dep_map, 1, _RET_IP_); -+ __rt_spin_unlock(&rwlock->lock); -+ migrate_enable(); -+} -+EXPORT_SYMBOL(rt_write_unlock); -+ -+void __lockfunc rt_read_unlock(rwlock_t *rwlock) -+{ -+ /* Release the lock only when read_depth is down to 0 */ -+ if (--rwlock->read_depth == 0) { -+ rwlock_release(&rwlock->dep_map, 1, _RET_IP_); -+ __rt_spin_unlock(&rwlock->lock); -+ migrate_enable(); -+ } -+} -+EXPORT_SYMBOL(rt_read_unlock); -+ -+unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock) -+{ -+ rt_write_lock(rwlock); -+ -+ return 0; -+} -+EXPORT_SYMBOL(rt_write_lock_irqsave); -+ -+unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock) -+{ -+ rt_read_lock(rwlock); -+ -+ return 0; -+} -+EXPORT_SYMBOL(rt_read_lock_irqsave); -+ -+void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key) -+{ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ /* -+ * Make sure we are not reinitializing a held lock: -+ */ -+ debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock)); -+ lockdep_init_map(&rwlock->dep_map, name, key, 0); -+#endif -+ rwlock->lock.save_state = 1; -+ rwlock->read_depth = 0; -+} -+EXPORT_SYMBOL(__rt_rwlock_init); -+ -+/* -+ * rw_semaphores -+ */ -+ -+void rt_up_write(struct rw_semaphore *rwsem) -+{ -+ rwsem_release(&rwsem->dep_map, 1, _RET_IP_); -+ rt_mutex_unlock(&rwsem->lock); -+} -+EXPORT_SYMBOL(rt_up_write); -+ -+void __rt_up_read(struct rw_semaphore *rwsem) -+{ -+ if (--rwsem->read_depth == 0) -+ rt_mutex_unlock(&rwsem->lock); -+} -+ -+void rt_up_read(struct rw_semaphore *rwsem) -+{ -+ rwsem_release(&rwsem->dep_map, 1, _RET_IP_); -+ __rt_up_read(rwsem); -+} -+EXPORT_SYMBOL(rt_up_read); -+ -+/* -+ * downgrade a write lock into a read lock -+ * - just wake up any readers at the front of the queue -+ */ -+void rt_downgrade_write(struct rw_semaphore *rwsem) -+{ -+ BUG_ON(rt_mutex_owner(&rwsem->lock) != current); -+ rwsem->read_depth = 1; -+} -+EXPORT_SYMBOL(rt_downgrade_write); -+ -+int rt_down_write_trylock(struct rw_semaphore *rwsem) -+{ -+ int ret = rt_mutex_trylock(&rwsem->lock); -+ -+ if (ret) -+ rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_); -+ return ret; -+} -+EXPORT_SYMBOL(rt_down_write_trylock); -+ -+void rt_down_write(struct rw_semaphore *rwsem) -+{ -+ rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_); -+ rt_mutex_lock(&rwsem->lock); -+} -+EXPORT_SYMBOL(rt_down_write); -+ -+int rt_down_write_killable(struct rw_semaphore *rwsem) -+{ -+ int ret; -+ -+ rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_); -+ ret = rt_mutex_lock_killable(&rwsem->lock); -+ if (ret) -+ rwsem_release(&rwsem->dep_map, 1, _RET_IP_); -+ return ret; -+} -+EXPORT_SYMBOL(rt_down_write_killable); -+ -+int rt_down_write_killable_nested(struct rw_semaphore *rwsem, int subclass) -+{ -+ int ret; -+ -+ rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_); -+ ret = rt_mutex_lock_killable(&rwsem->lock); -+ if (ret) -+ rwsem_release(&rwsem->dep_map, 1, _RET_IP_); -+ return ret; -+} -+EXPORT_SYMBOL(rt_down_write_killable_nested); -+ -+void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass) -+{ -+ rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_); -+ rt_mutex_lock(&rwsem->lock); -+} -+EXPORT_SYMBOL(rt_down_write_nested); -+ -+void rt_down_write_nested_lock(struct rw_semaphore *rwsem, -+ struct lockdep_map *nest) -+{ -+ rwsem_acquire_nest(&rwsem->dep_map, 0, 0, nest, _RET_IP_); -+ rt_mutex_lock(&rwsem->lock); -+} -+EXPORT_SYMBOL(rt_down_write_nested_lock); -+ -+int rt__down_read_trylock(struct rw_semaphore *rwsem) -+{ -+ struct rt_mutex *lock = &rwsem->lock; -+ int ret = 1; -+ -+ /* -+ * recursive read locks succeed when current owns the rwsem, -+ * but not when read_depth == 0 which means that the rwsem is -+ * write locked. -+ */ -+ if (rt_mutex_owner(lock) != current) -+ ret = rt_mutex_trylock(&rwsem->lock); -+ else if (!rwsem->read_depth) -+ ret = 0; -+ -+ if (ret) -+ rwsem->read_depth++; -+ return ret; -+ -+} -+ -+int rt_down_read_trylock(struct rw_semaphore *rwsem) -+{ -+ int ret; -+ -+ ret = rt__down_read_trylock(rwsem); -+ if (ret) -+ rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_); -+ -+ return ret; -+} -+EXPORT_SYMBOL(rt_down_read_trylock); -+ -+void rt__down_read(struct rw_semaphore *rwsem) -+{ -+ struct rt_mutex *lock = &rwsem->lock; -+ -+ if (rt_mutex_owner(lock) != current) -+ rt_mutex_lock(&rwsem->lock); -+ rwsem->read_depth++; -+} -+EXPORT_SYMBOL(rt__down_read); -+ -+static void __rt_down_read(struct rw_semaphore *rwsem, int subclass) -+{ -+ rwsem_acquire_read(&rwsem->dep_map, subclass, 0, _RET_IP_); -+ rt__down_read(rwsem); -+} -+ -+void rt_down_read(struct rw_semaphore *rwsem) -+{ -+ __rt_down_read(rwsem, 0); -+} -+EXPORT_SYMBOL(rt_down_read); -+ -+void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass) -+{ -+ __rt_down_read(rwsem, subclass); -+} -+EXPORT_SYMBOL(rt_down_read_nested); -+ -+void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name, -+ struct lock_class_key *key) -+{ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ /* -+ * Make sure we are not reinitializing a held lock: -+ */ -+ debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem)); -+ lockdep_init_map(&rwsem->dep_map, name, key, 0); -+#endif -+ rwsem->read_depth = 0; -+ rwsem->lock.save_state = 0; -+} -+EXPORT_SYMBOL(__rt_rwsem_init); -+ -+/** -+ * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 -+ * @cnt: the atomic which we are to dec -+ * @lock: the mutex to return holding if we dec to 0 -+ * -+ * return true and hold lock if we dec to 0, return false otherwise -+ */ -+int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) -+{ -+ /* dec if we can't possibly hit 0 */ -+ if (atomic_add_unless(cnt, -1, 1)) -+ return 0; -+ /* we might hit 0, so take the lock */ -+ mutex_lock(lock); -+ if (!atomic_dec_and_test(cnt)) { -+ /* when we actually did the dec, we didn't hit 0 */ -+ mutex_unlock(lock); -+ return 0; -+ } -+ /* we hit 0, and we hold the lock */ -+ return 1; -+} -+EXPORT_SYMBOL(atomic_dec_and_mutex_lock); ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -7,6 +7,11 @@ - * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner - * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt - * Copyright (C) 2006 Esben Nielsen -+ * Adaptive Spinlocks: -+ * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, -+ * and Peter Morreale, -+ * Adaptive Spinlocks simplification: -+ * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt - * - * See Documentation/locking/rt-mutex-design.txt for details. - */ -@@ -230,6 +235,9 @@ static inline bool unlock_rt_mutex_safe( - } - #endif - -+#define STEAL_NORMAL 0 -+#define STEAL_LATERAL 1 -+ - /* - * Only use with rt_mutex_waiter_{less,equal}() - */ -@@ -238,11 +246,15 @@ static inline bool unlock_rt_mutex_safe( - - static inline int - rt_mutex_waiter_less(struct rt_mutex_waiter *left, -- struct rt_mutex_waiter *right) -+ struct rt_mutex_waiter *right, int mode) - { -- if (left->prio < right->prio) -- return 1; -- -+ if (mode == STEAL_NORMAL) { -+ if (left->prio < right->prio) -+ return 1; -+ } else { -+ if (left->prio <= right->prio) -+ return 1; -+ } - /* - * If both waiters have dl_prio(), we check the deadlines of the - * associated tasks. -@@ -285,7 +297,7 @@ rt_mutex_enqueue(struct rt_mutex *lock, - while (*link) { - parent = *link; - entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry); -- if (rt_mutex_waiter_less(waiter, entry)) { -+ if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) { - link = &parent->rb_left; - } else { - link = &parent->rb_right; -@@ -324,7 +336,7 @@ rt_mutex_enqueue_pi(struct task_struct * - while (*link) { - parent = *link; - entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry); -- if (rt_mutex_waiter_less(waiter, entry)) { -+ if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) { - link = &parent->rb_left; - } else { - link = &parent->rb_right; -@@ -390,6 +402,14 @@ static bool rt_mutex_cond_detect_deadloc - return debug_rt_mutex_detect_deadlock(waiter, chwalk); - } - -+static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter) -+{ -+ if (waiter->savestate) -+ wake_up_lock_sleeper(waiter->task); -+ else -+ wake_up_process(waiter->task); -+} -+ - /* - * Max number of times we'll walk the boosting chain: - */ -@@ -715,13 +735,16 @@ static int rt_mutex_adjust_prio_chain(st - * follow here. This is the end of the chain we are walking. - */ - if (!rt_mutex_owner(lock)) { -+ struct rt_mutex_waiter *lock_top_waiter; -+ - /* - * If the requeue [7] above changed the top waiter, - * then we need to wake the new top waiter up to try - * to get the lock. - */ -- if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) -- wake_up_process(rt_mutex_top_waiter(lock)->task); -+ lock_top_waiter = rt_mutex_top_waiter(lock); -+ if (prerequeue_top_waiter != lock_top_waiter) -+ rt_mutex_wake_waiter(lock_top_waiter); - raw_spin_unlock_irq(&lock->wait_lock); - return 0; - } -@@ -824,8 +847,9 @@ static int rt_mutex_adjust_prio_chain(st - * @waiter: The waiter that is queued to the lock's wait tree if the - * callsite called task_blocked_on_lock(), otherwise NULL - */ --static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, -- struct rt_mutex_waiter *waiter) -+static int __try_to_take_rt_mutex(struct rt_mutex *lock, -+ struct task_struct *task, -+ struct rt_mutex_waiter *waiter, int mode) - { - lockdep_assert_held(&lock->wait_lock); - -@@ -864,8 +888,10 @@ static int try_to_take_rt_mutex(struct r - * If waiter is not the highest priority waiter of - * @lock, give up. - */ -- if (waiter != rt_mutex_top_waiter(lock)) -+ if (waiter != rt_mutex_top_waiter(lock)) { -+ /* XXX rt_mutex_waiter_less() ? */ - return 0; -+ } - - /* - * We can acquire the lock. Remove the waiter from the -@@ -883,15 +909,26 @@ static int try_to_take_rt_mutex(struct r - * not need to be dequeued. - */ - if (rt_mutex_has_waiters(lock)) { -+ struct task_struct *pown = rt_mutex_top_waiter(lock)->task; -+ -+ if (task != pown) -+ return 0; -+ -+ /* -+ * Note that RT tasks are excluded from lateral-steals -+ * to prevent the introduction of an unbounded latency. -+ */ -+ if (rt_task(task)) -+ mode = STEAL_NORMAL; - /* - * If @task->prio is greater than or equal to - * the top waiter priority (kernel view), - * @task lost. - */ - if (!rt_mutex_waiter_less(task_to_waiter(task), -- rt_mutex_top_waiter(lock))) -+ rt_mutex_top_waiter(lock), -+ mode)) - return 0; -- - /* - * The current top waiter stays enqueued. We - * don't have to change anything in the lock -@@ -938,6 +975,339 @@ static int try_to_take_rt_mutex(struct r - return 1; - } - -+#ifdef CONFIG_PREEMPT_RT_FULL -+/* -+ * preemptible spin_lock functions: -+ */ -+static inline void rt_spin_lock_fastlock(struct rt_mutex *lock, -+ void (*slowfn)(struct rt_mutex *lock)) -+{ -+ might_sleep_no_state_check(); -+ -+ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) -+ return; -+ else -+ slowfn(lock); -+} -+ -+static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock, -+ void (*slowfn)(struct rt_mutex *lock)) -+{ -+ if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) -+ return; -+ else -+ slowfn(lock); -+} -+#ifdef CONFIG_SMP -+/* -+ * Note that owner is a speculative pointer and dereferencing relies -+ * on rcu_read_lock() and the check against the lock owner. -+ */ -+static int adaptive_wait(struct rt_mutex *lock, -+ struct task_struct *owner) -+{ -+ int res = 0; -+ -+ rcu_read_lock(); -+ for (;;) { -+ if (owner != rt_mutex_owner(lock)) -+ break; -+ /* -+ * Ensure that owner->on_cpu is dereferenced _after_ -+ * checking the above to be valid. -+ */ -+ barrier(); -+ if (!owner->on_cpu) { -+ res = 1; -+ break; -+ } -+ cpu_relax(); -+ } -+ rcu_read_unlock(); -+ return res; -+} -+#else -+static int adaptive_wait(struct rt_mutex *lock, -+ struct task_struct *orig_owner) -+{ -+ return 1; -+} -+#endif -+ -+static int task_blocks_on_rt_mutex(struct rt_mutex *lock, -+ struct rt_mutex_waiter *waiter, -+ struct task_struct *task, -+ enum rtmutex_chainwalk chwalk); -+/* -+ * Slow path lock function spin_lock style: this variant is very -+ * careful not to miss any non-lock wakeups. -+ * -+ * We store the current state under p->pi_lock in p->saved_state and -+ * the try_to_wake_up() code handles this accordingly. -+ */ -+static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock) -+{ -+ struct task_struct *lock_owner, *self = current; -+ struct rt_mutex_waiter waiter, *top_waiter; -+ unsigned long flags; -+ int ret; -+ -+ rt_mutex_init_waiter(&waiter, true); -+ -+ raw_spin_lock_irqsave(&lock->wait_lock, flags); -+ -+ if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) { -+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -+ return; -+ } -+ -+ BUG_ON(rt_mutex_owner(lock) == self); -+ -+ /* -+ * We save whatever state the task is in and we'll restore it -+ * after acquiring the lock taking real wakeups into account -+ * as well. We are serialized via pi_lock against wakeups. See -+ * try_to_wake_up(). -+ */ -+ raw_spin_lock(&self->pi_lock); -+ self->saved_state = self->state; -+ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); -+ raw_spin_unlock(&self->pi_lock); -+ -+ ret = task_blocks_on_rt_mutex(lock, &waiter, self, RT_MUTEX_MIN_CHAINWALK); -+ BUG_ON(ret); -+ -+ for (;;) { -+ /* Try to acquire the lock again. */ -+ if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL)) -+ break; -+ -+ top_waiter = rt_mutex_top_waiter(lock); -+ lock_owner = rt_mutex_owner(lock); -+ -+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -+ -+ debug_rt_mutex_print_deadlock(&waiter); -+ -+ if (top_waiter != &waiter || adaptive_wait(lock, lock_owner)) -+ schedule(); -+ -+ raw_spin_lock_irqsave(&lock->wait_lock, flags); -+ -+ raw_spin_lock(&self->pi_lock); -+ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); -+ raw_spin_unlock(&self->pi_lock); -+ } -+ -+ /* -+ * Restore the task state to current->saved_state. We set it -+ * to the original state above and the try_to_wake_up() code -+ * has possibly updated it when a real (non-rtmutex) wakeup -+ * happened while we were blocked. Clear saved_state so -+ * try_to_wakeup() does not get confused. -+ */ -+ raw_spin_lock(&self->pi_lock); -+ __set_current_state_no_track(self->saved_state); -+ self->saved_state = TASK_RUNNING; -+ raw_spin_unlock(&self->pi_lock); -+ -+ /* -+ * try_to_take_rt_mutex() sets the waiter bit -+ * unconditionally. We might have to fix that up: -+ */ -+ fixup_rt_mutex_waiters(lock); -+ -+ BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock)); -+ BUG_ON(!RB_EMPTY_NODE(&waiter.tree_entry)); -+ -+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -+ -+ debug_rt_mutex_free_waiter(&waiter); -+} -+ -+static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, -+ struct wake_q_head *wake_q, -+ struct wake_q_head *wq_sleeper); -+/* -+ * Slow path to release a rt_mutex spin_lock style -+ */ -+static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock) -+{ -+ unsigned long flags; -+ DEFINE_WAKE_Q(wake_q); -+ DEFINE_WAKE_Q(wake_sleeper_q); -+ bool postunlock; -+ -+ raw_spin_lock_irqsave(&lock->wait_lock, flags); -+ postunlock = __rt_mutex_unlock_common(lock, &wake_q, &wake_sleeper_q); -+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -+ -+ if (postunlock) -+ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); -+} -+ -+void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock) -+{ -+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); -+ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); -+} -+EXPORT_SYMBOL(rt_spin_lock__no_mg); -+ -+void __lockfunc rt_spin_lock(spinlock_t *lock) -+{ -+ migrate_disable(); -+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); -+ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); -+} -+EXPORT_SYMBOL(rt_spin_lock); -+ -+void __lockfunc __rt_spin_lock(struct rt_mutex *lock) -+{ -+ migrate_disable(); -+ rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock); -+} -+EXPORT_SYMBOL(__rt_spin_lock); -+ -+void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock) -+{ -+ rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock); -+} -+EXPORT_SYMBOL(__rt_spin_lock__no_mg); -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass) -+{ -+ migrate_disable(); -+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); -+ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); -+} -+EXPORT_SYMBOL(rt_spin_lock_nested); -+#endif -+ -+void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock) -+{ -+ /* NOTE: we always pass in '1' for nested, for simplicity */ -+ spin_release(&lock->dep_map, 1, _RET_IP_); -+ rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); -+} -+EXPORT_SYMBOL(rt_spin_unlock__no_mg); -+ -+void __lockfunc rt_spin_unlock(spinlock_t *lock) -+{ -+ /* NOTE: we always pass in '1' for nested, for simplicity */ -+ spin_release(&lock->dep_map, 1, _RET_IP_); -+ rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); -+ migrate_enable(); -+} -+EXPORT_SYMBOL(rt_spin_unlock); -+ -+void __lockfunc __rt_spin_unlock(struct rt_mutex *lock) -+{ -+ rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock); -+} -+EXPORT_SYMBOL(__rt_spin_unlock); -+ -+/* -+ * Wait for the lock to get unlocked: instead of polling for an unlock -+ * (like raw spinlocks do), we lock and unlock, to force the kernel to -+ * schedule if there's contention: -+ */ -+void __lockfunc rt_spin_unlock_wait(spinlock_t *lock) -+{ -+ spin_lock(lock); -+ spin_unlock(lock); -+} -+EXPORT_SYMBOL(rt_spin_unlock_wait); -+ -+int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock) -+{ -+ int ret; -+ -+ ret = rt_mutex_trylock(&lock->lock); -+ if (ret) -+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ return ret; -+} -+EXPORT_SYMBOL(rt_spin_trylock__no_mg); -+ -+int __lockfunc rt_spin_trylock(spinlock_t *lock) -+{ -+ int ret; -+ -+ migrate_disable(); -+ ret = rt_mutex_trylock(&lock->lock); -+ if (ret) -+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ else -+ migrate_enable(); -+ return ret; -+} -+EXPORT_SYMBOL(rt_spin_trylock); -+ -+int __lockfunc rt_spin_trylock_bh(spinlock_t *lock) -+{ -+ int ret; -+ -+ local_bh_disable(); -+ ret = rt_mutex_trylock(&lock->lock); -+ if (ret) { -+ migrate_disable(); -+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ } else -+ local_bh_enable(); -+ return ret; -+} -+EXPORT_SYMBOL(rt_spin_trylock_bh); -+ -+int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags) -+{ -+ int ret; -+ -+ *flags = 0; -+ ret = rt_mutex_trylock(&lock->lock); -+ if (ret) { -+ migrate_disable(); -+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ } -+ return ret; -+} -+EXPORT_SYMBOL(rt_spin_trylock_irqsave); -+ -+int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock) -+{ -+ /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ -+ if (atomic_add_unless(atomic, -1, 1)) -+ return 0; -+ rt_spin_lock(lock); -+ if (atomic_dec_and_test(atomic)) -+ return 1; -+ rt_spin_unlock(lock); -+ return 0; -+} -+EXPORT_SYMBOL(atomic_dec_and_spin_lock); -+ -+ void -+__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key) -+{ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ /* -+ * Make sure we are not reinitializing a held lock: -+ */ -+ debug_check_no_locks_freed((void *)lock, sizeof(*lock)); -+ lockdep_init_map(&lock->dep_map, name, key, 0); -+#endif -+} -+EXPORT_SYMBOL(__rt_spin_lock_init); -+ -+#endif /* PREEMPT_RT_FULL */ -+ -+static inline int -+try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, -+ struct rt_mutex_waiter *waiter) -+{ -+ return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL); -+} -+ - /* - * Task blocks on lock. - * -@@ -1053,6 +1423,7 @@ static int task_blocks_on_rt_mutex(struc - * Called with lock->wait_lock held and interrupts disabled. - */ - static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, -+ struct wake_q_head *wake_sleeper_q, - struct rt_mutex *lock) - { - struct rt_mutex_waiter *waiter; -@@ -1092,7 +1463,10 @@ static void mark_wakeup_next_waiter(stru - * Pairs with preempt_enable() in rt_mutex_postunlock(); - */ - preempt_disable(); -- wake_q_add(wake_q, waiter->task); -+ if (waiter->savestate) -+ wake_q_add(wake_sleeper_q, waiter->task); -+ else -+ wake_q_add(wake_q, waiter->task); - raw_spin_unlock(¤t->pi_lock); - } - -@@ -1176,21 +1550,22 @@ void rt_mutex_adjust_pi(struct task_stru - return; - } - next_lock = waiter->lock; -- raw_spin_unlock_irqrestore(&task->pi_lock, flags); - - /* gets dropped in rt_mutex_adjust_prio_chain()! */ - get_task_struct(task); - -+ raw_spin_unlock_irqrestore(&task->pi_lock, flags); - rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, - next_lock, NULL, task); - } - --void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) -+void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate) - { - debug_rt_mutex_init_waiter(waiter); - RB_CLEAR_NODE(&waiter->pi_tree_entry); - RB_CLEAR_NODE(&waiter->tree_entry); - waiter->task = NULL; -+ waiter->savestate = savestate; - } - - /** -@@ -1270,7 +1645,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, - unsigned long flags; - int ret = 0; - -- rt_mutex_init_waiter(&waiter); -+ rt_mutex_init_waiter(&waiter, false); - - /* - * Technically we could use raw_spin_[un]lock_irq() here, but this can -@@ -1365,7 +1740,8 @@ static inline int rt_mutex_slowtrylock(s - * Return whether the current task needs to call rt_mutex_postunlock(). - */ - static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, -- struct wake_q_head *wake_q) -+ struct wake_q_head *wake_q, -+ struct wake_q_head *wake_sleeper_q) - { - unsigned long flags; - -@@ -1419,7 +1795,7 @@ static bool __sched rt_mutex_slowunlock( - * - * Queue the next waiter for wakeup once we release the wait_lock. - */ -- mark_wakeup_next_waiter(wake_q, lock); -+ mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock); - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - - return true; /* call rt_mutex_postunlock() */ -@@ -1471,9 +1847,11 @@ rt_mutex_fasttrylock(struct rt_mutex *lo - /* - * Performs the wakeup of the the top-waiter and re-enables preemption. - */ --void rt_mutex_postunlock(struct wake_q_head *wake_q) -+void rt_mutex_postunlock(struct wake_q_head *wake_q, -+ struct wake_q_head *wake_sleeper_q) - { - wake_up_q(wake_q); -+ wake_up_q_sleeper(wake_sleeper_q); - - /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ - preempt_enable(); -@@ -1482,15 +1860,17 @@ void rt_mutex_postunlock(struct wake_q_h - static inline void - rt_mutex_fastunlock(struct rt_mutex *lock, - bool (*slowfn)(struct rt_mutex *lock, -- struct wake_q_head *wqh)) -+ struct wake_q_head *wqh, -+ struct wake_q_head *wq_sleeper)) - { - DEFINE_WAKE_Q(wake_q); -+ DEFINE_WAKE_Q(wake_sleeper_q); - - if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) - return; - -- if (slowfn(lock, &wake_q)) -- rt_mutex_postunlock(&wake_q); -+ if (slowfn(lock, &wake_q, &wake_sleeper_q)) -+ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); - } - - /** -@@ -1609,12 +1989,9 @@ void __sched rt_mutex_unlock(struct rt_m - } - EXPORT_SYMBOL_GPL(rt_mutex_unlock); - --/** -- * Futex variant, that since futex variants do not use the fast-path, can be -- * simple and will not need to retry. -- */ --bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, -- struct wake_q_head *wake_q) -+static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, -+ struct wake_q_head *wake_q, -+ struct wake_q_head *wq_sleeper) - { - lockdep_assert_held(&lock->wait_lock); - -@@ -1631,22 +2008,34 @@ bool __sched __rt_mutex_futex_unlock(str - * avoid inversion prior to the wakeup. preempt_disable() - * therein pairs with rt_mutex_postunlock(). - */ -- mark_wakeup_next_waiter(wake_q, lock); -+ mark_wakeup_next_waiter(wake_q, wq_sleeper, lock); - - return true; /* call postunlock() */ - } - -+/** -+ * Futex variant, that since futex variants do not use the fast-path, can be -+ * simple and will not need to retry. -+ */ -+bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, -+ struct wake_q_head *wake_q, -+ struct wake_q_head *wq_sleeper) -+{ -+ return __rt_mutex_unlock_common(lock, wake_q, wq_sleeper); -+} -+ - void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) - { - DEFINE_WAKE_Q(wake_q); -+ DEFINE_WAKE_Q(wake_sleeper_q); - bool postunlock; - - raw_spin_lock_irq(&lock->wait_lock); -- postunlock = __rt_mutex_futex_unlock(lock, &wake_q); -+ postunlock = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q); - raw_spin_unlock_irq(&lock->wait_lock); - - if (postunlock) -- rt_mutex_postunlock(&wake_q); -+ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); - } - - /** -@@ -1679,13 +2068,12 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy); - void __rt_mutex_init(struct rt_mutex *lock, const char *name) - { - lock->owner = NULL; -- raw_spin_lock_init(&lock->wait_lock); - lock->waiters = RB_ROOT; - lock->waiters_leftmost = NULL; - - debug_rt_mutex_init(lock, name); - } --EXPORT_SYMBOL_GPL(__rt_mutex_init); -+EXPORT_SYMBOL(__rt_mutex_init); - - /** - * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a -@@ -1704,7 +2092,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init); - void rt_mutex_init_proxy_locked(struct rt_mutex *lock, - struct task_struct *proxy_owner) - { -- __rt_mutex_init(lock, NULL); -+ rt_mutex_init(lock); - debug_rt_mutex_proxy_lock(lock, proxy_owner); - rt_mutex_set_owner(lock, proxy_owner); - } -@@ -1926,3 +2314,25 @@ bool rt_mutex_cleanup_proxy_lock(struct - - return cleanup; - } -+ -+#ifdef CONFIG_PREEMPT_RT_FULL -+struct ww_mutex { -+}; -+struct ww_acquire_ctx { -+}; -+int __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx) -+{ -+ BUG(); -+} -+EXPORT_SYMBOL_GPL(__ww_mutex_lock); -+int __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx) -+{ -+ BUG(); -+} -+EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible); -+void __sched ww_mutex_unlock(struct ww_mutex *lock) -+{ -+ BUG(); -+} -+EXPORT_SYMBOL_GPL(ww_mutex_unlock); -+#endif ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -14,6 +14,7 @@ - - #include - #include -+#include - - /* - * This is the control structure for tasks blocked on a rt_mutex, -@@ -28,6 +29,7 @@ struct rt_mutex_waiter { - struct rb_node pi_tree_entry; - struct task_struct *task; - struct rt_mutex *lock; -+ bool savestate; - #ifdef CONFIG_DEBUG_RT_MUTEXES - unsigned long ip; - struct pid *deadlock_task_pid; -@@ -107,7 +109,7 @@ extern void rt_mutex_init_proxy_locked(s - struct task_struct *proxy_owner); - extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, - struct task_struct *proxy_owner); --extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); -+extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savetate); - extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, - struct rt_mutex_waiter *waiter, - struct task_struct *task); -@@ -124,9 +126,11 @@ extern int rt_mutex_futex_trylock(struct - - extern void rt_mutex_futex_unlock(struct rt_mutex *lock); - extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, -- struct wake_q_head *wqh); -+ struct wake_q_head *wqh, -+ struct wake_q_head *wq_sleeper); - --extern void rt_mutex_postunlock(struct wake_q_head *wake_q); -+extern void rt_mutex_postunlock(struct wake_q_head *wake_q, -+ struct wake_q_head *wake_sleeper_q); - - #ifdef CONFIG_DEBUG_RT_MUTEXES - # include "rtmutex-debug.h" ---- a/kernel/locking/spinlock.c -+++ b/kernel/locking/spinlock.c -@@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(loc - * __[spin|read|write]_lock_bh() - */ - BUILD_LOCK_OPS(spin, raw_spinlock); -+ -+#ifndef CONFIG_PREEMPT_RT_FULL - BUILD_LOCK_OPS(read, rwlock); - BUILD_LOCK_OPS(write, rwlock); -+#endif - - #endif - -@@ -209,6 +212,8 @@ void __lockfunc _raw_spin_unlock_bh(raw_ - EXPORT_SYMBOL(_raw_spin_unlock_bh); - #endif - -+#ifndef CONFIG_PREEMPT_RT_FULL -+ - #ifndef CONFIG_INLINE_READ_TRYLOCK - int __lockfunc _raw_read_trylock(rwlock_t *lock) - { -@@ -353,6 +358,8 @@ void __lockfunc _raw_write_unlock_bh(rwl - EXPORT_SYMBOL(_raw_write_unlock_bh); - #endif - -+#endif /* !PREEMPT_RT_FULL */ -+ - #ifdef CONFIG_DEBUG_LOCK_ALLOC - - void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) ---- a/kernel/locking/spinlock_debug.c -+++ b/kernel/locking/spinlock_debug.c -@@ -31,6 +31,7 @@ void __raw_spin_lock_init(raw_spinlock_t - - EXPORT_SYMBOL(__raw_spin_lock_init); - -+#ifndef CONFIG_PREEMPT_RT_FULL - void __rwlock_init(rwlock_t *lock, const char *name, - struct lock_class_key *key) - { -@@ -48,6 +49,7 @@ void __rwlock_init(rwlock_t *lock, const - } - - EXPORT_SYMBOL(__rwlock_init); -+#endif - - static void spin_dump(raw_spinlock_t *lock, const char *msg) - { -@@ -135,6 +137,7 @@ void do_raw_spin_unlock(raw_spinlock_t * - arch_spin_unlock(&lock->raw_lock); - } - -+#ifndef CONFIG_PREEMPT_RT_FULL - static void rwlock_bug(rwlock_t *lock, const char *msg) - { - if (!debug_locks_off()) -@@ -224,3 +227,5 @@ void do_raw_write_unlock(rwlock_t *lock) - debug_write_unlock(lock); - arch_write_unlock(&lock->raw_lock); - } -+ -+#endif ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -461,7 +461,7 @@ void wake_q_add(struct wake_q_head *head - head->lastp = &node->next; - } - --void wake_up_q(struct wake_q_head *head) -+void __wake_up_q(struct wake_q_head *head, bool sleeper) - { - struct wake_q_node *node = head->first; - -@@ -478,7 +478,10 @@ void wake_up_q(struct wake_q_head *head) - * wake_up_process() implies a wmb() to pair with the queueing - * in wake_q_add() so as not to miss wakeups. - */ -- wake_up_process(task); -+ if (sleeper) -+ wake_up_lock_sleeper(task); -+ else -+ wake_up_process(task); - put_task_struct(task); - } - } diff --git a/debian/patches/features/all/rt/rt-introduce-cpu-chill.patch b/debian/patches/features/all/rt/rt-introduce-cpu-chill.patch index a75369a40..24ea11166 100644 --- a/debian/patches/features/all/rt/rt-introduce-cpu-chill.patch +++ b/debian/patches/features/all/rt/rt-introduce-cpu-chill.patch @@ -1,7 +1,7 @@ Subject: rt: Introduce cpu_chill() From: Thomas Gleixner Date: Wed, 07 Mar 2012 20:51:03 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Retry loops on RT might loop forever when the modifying side was preempted. Add cpu_chill() to replace cpu_relax(). cpu_chill() @@ -88,7 +88,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/linux/delay.h +++ b/include/linux/delay.h -@@ -63,4 +63,10 @@ static inline void ssleep(unsigned int s +@@ -64,4 +64,10 @@ static inline void ssleep(unsigned int s msleep(seconds * 1000); } @@ -101,9 +101,9 @@ Signed-off-by: Sebastian Andrzej Siewior #endif /* defined(_LINUX_DELAY_H) */ --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c -@@ -1720,6 +1720,25 @@ SYSCALL_DEFINE2(nanosleep, struct timesp - return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC); +@@ -1855,6 +1855,25 @@ COMPAT_SYSCALL_DEFINE2(nanosleep, struct } + #endif +#ifdef CONFIG_PREEMPT_RT_FULL +/* @@ -111,13 +111,13 @@ Signed-off-by: Sebastian Andrzej Siewior + */ +void cpu_chill(void) +{ -+ struct timespec tu = { ++ struct timespec64 tu = { + .tv_nsec = NSEC_PER_MSEC, + }; + unsigned int freeze_flag = current->flags & PF_NOFREEZE; + + current->flags |= PF_NOFREEZE; -+ hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC); ++ hrtimer_nanosleep(&tu, HRTIMER_MODE_REL_HARD, CLOCK_MONOTONIC); + if (!freeze_flag) + current->flags &= ~PF_NOFREEZE; +} diff --git a/debian/patches/features/all/rt/rt-local-irq-lock.patch b/debian/patches/features/all/rt/rt-local-irq-lock.patch index cf3bb97cf..5b7cd9c7a 100644 --- a/debian/patches/features/all/rt/rt-local-irq-lock.patch +++ b/debian/patches/features/all/rt/rt-local-irq-lock.patch @@ -1,7 +1,7 @@ Subject: rt: Add local irq locks From: Thomas Gleixner Date: Mon, 20 Jun 2011 09:03:47 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Introduce locallock. For !RT this maps to preempt_disable()/ local_irq_disable() so there is not much that changes. For RT this will @@ -13,13 +13,13 @@ is held and the owner is preempted. Signed-off-by: Thomas Gleixner --- - include/linux/locallock.h | 266 ++++++++++++++++++++++++++++++++++++++++++++++ - include/linux/percpu.h | 29 +++++ - 2 files changed, 295 insertions(+) + include/linux/locallock.h | 271 ++++++++++++++++++++++++++++++++++++++++++++++ + include/linux/percpu.h | 29 ++++ + 2 files changed, 300 insertions(+) --- /dev/null +++ b/include/linux/locallock.h -@@ -0,0 +1,266 @@ +@@ -0,0 +1,271 @@ +#ifndef _LINUX_LOCALLOCK_H +#define _LINUX_LOCALLOCK_H + @@ -58,20 +58,10 @@ Signed-off-by: Thomas Gleixner + spin_lock_init(&per_cpu(lvar, __cpu).lock); \ + } while (0) + -+/* -+ * spin_lock|trylock|unlock_local flavour that does not migrate disable -+ * used for __local_lock|trylock|unlock where get_local_var/put_local_var -+ * already takes care of the migrate_disable/enable -+ * for CONFIG_PREEMPT_BASE map to the normal spin_* calls. -+ */ -+# define spin_lock_local(lock) spin_lock(lock) -+# define spin_trylock_local(lock) spin_trylock(lock) -+# define spin_unlock_local(lock) spin_unlock(lock) -+ +static inline void __local_lock(struct local_irq_lock *lv) +{ + if (lv->owner != current) { -+ spin_lock_local(&lv->lock); ++ spin_lock(&lv->lock); + LL_WARN(lv->owner); + LL_WARN(lv->nestcnt); + lv->owner = current; @@ -82,14 +72,20 @@ Signed-off-by: Thomas Gleixner +#define local_lock(lvar) \ + do { __local_lock(&get_local_var(lvar)); } while (0) + ++#define local_lock_on(lvar, cpu) \ ++ do { __local_lock(&per_cpu(lvar, cpu)); } while (0) ++ +static inline int __local_trylock(struct local_irq_lock *lv) +{ -+ if (lv->owner != current && spin_trylock_local(&lv->lock)) { ++ if (lv->owner != current && spin_trylock(&lv->lock)) { + LL_WARN(lv->owner); + LL_WARN(lv->nestcnt); + lv->owner = current; + lv->nestcnt = 1; + return 1; ++ } else if (lv->owner == current) { ++ lv->nestcnt++; ++ return 1; + } + return 0; +} @@ -111,7 +107,7 @@ Signed-off-by: Thomas Gleixner + return; + + lv->owner = NULL; -+ spin_unlock_local(&lv->lock); ++ spin_unlock(&lv->lock); +} + +#define local_unlock(lvar) \ @@ -120,6 +116,9 @@ Signed-off-by: Thomas Gleixner + put_local_var(lvar); \ + } while (0) + ++#define local_unlock_on(lvar, cpu) \ ++ do { __local_unlock(&per_cpu(lvar, cpu)); } while (0) ++ +static inline void __local_lock_irq(struct local_irq_lock *lv) +{ + spin_lock_irqsave(&lv->lock, lv->flags); @@ -260,6 +259,12 @@ Signed-off-by: Thomas Gleixner + +static inline void local_irq_lock_init(int lvar) { } + ++#define local_trylock(lvar) \ ++ ({ \ ++ preempt_disable(); \ ++ 1; \ ++ }) ++ +#define local_lock(lvar) preempt_disable() +#define local_unlock(lvar) preempt_enable() +#define local_lock_irq(lvar) local_irq_disable() @@ -288,7 +293,7 @@ Signed-off-by: Thomas Gleixner +#endif --- a/include/linux/percpu.h +++ b/include/linux/percpu.h -@@ -18,6 +18,35 @@ +@@ -19,6 +19,35 @@ #define PERCPU_MODULE_RESERVE 0 #endif diff --git a/debian/patches/features/all/rt/rt-locking-Reenable-migration-accross-schedule.patch b/debian/patches/features/all/rt/rt-locking-Reenable-migration-accross-schedule.patch deleted file mode 100644 index 336ea1b74..000000000 --- a/debian/patches/features/all/rt/rt-locking-Reenable-migration-accross-schedule.patch +++ /dev/null @@ -1,112 +0,0 @@ -From: Thomas Gleixner -Date: Mon, 8 Feb 2016 16:15:28 +0100 -Subject: rt/locking: Reenable migration accross schedule -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -We currently disable migration across lock acquisition. That includes the part -where we block on the lock and schedule out. We cannot disable migration after -taking the lock as that would cause a possible lock inversion. - -But we can be smart and enable migration when we block and schedule out. That -allows the scheduler to place the task freely at least if this is the first -migrate disable level. For nested locking this does not help at all. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/locking/rtmutex.c | 32 ++++++++++++++++++++------------ - 1 file changed, 20 insertions(+), 12 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -980,14 +980,19 @@ static int __try_to_take_rt_mutex(struct - * preemptible spin_lock functions: - */ - static inline void rt_spin_lock_fastlock(struct rt_mutex *lock, -- void (*slowfn)(struct rt_mutex *lock)) -+ void (*slowfn)(struct rt_mutex *lock, -+ bool mg_off), -+ bool do_mig_dis) - { - might_sleep_no_state_check(); - -+ if (do_mig_dis) -+ migrate_disable(); -+ - if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) - return; - else -- slowfn(lock); -+ slowfn(lock, do_mig_dis); - } - - static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock, -@@ -1045,7 +1050,8 @@ static int task_blocks_on_rt_mutex(struc - * We store the current state under p->pi_lock in p->saved_state and - * the try_to_wake_up() code handles this accordingly. - */ --static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock) -+static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock, -+ bool mg_off) - { - struct task_struct *lock_owner, *self = current; - struct rt_mutex_waiter waiter, *top_waiter; -@@ -1089,8 +1095,13 @@ static void noinline __sched rt_spin_lo - - debug_rt_mutex_print_deadlock(&waiter); - -- if (top_waiter != &waiter || adaptive_wait(lock, lock_owner)) -+ if (top_waiter != &waiter || adaptive_wait(lock, lock_owner)) { -+ if (mg_off) -+ migrate_enable(); - schedule(); -+ if (mg_off) -+ migrate_disable(); -+ } - - raw_spin_lock_irqsave(&lock->wait_lock, flags); - -@@ -1148,38 +1159,35 @@ static void noinline __sched rt_spin_lo - - void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock) - { -- rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); -+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, false); - spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - } - EXPORT_SYMBOL(rt_spin_lock__no_mg); - - void __lockfunc rt_spin_lock(spinlock_t *lock) - { -- migrate_disable(); -- rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); -+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true); - spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - } - EXPORT_SYMBOL(rt_spin_lock); - - void __lockfunc __rt_spin_lock(struct rt_mutex *lock) - { -- migrate_disable(); -- rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock); -+ rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, true); - } - EXPORT_SYMBOL(__rt_spin_lock); - - void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock) - { -- rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock); -+ rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, false); - } - EXPORT_SYMBOL(__rt_spin_lock__no_mg); - - #ifdef CONFIG_DEBUG_LOCK_ALLOC - void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass) - { -- migrate_disable(); -- rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); - spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); -+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true); - } - EXPORT_SYMBOL(rt_spin_lock_nested); - #endif diff --git a/debian/patches/features/all/rt/rt-preempt-base-config.patch b/debian/patches/features/all/rt/rt-preempt-base-config.patch index 0f8ce0b7c..bd9e973a6 100644 --- a/debian/patches/features/all/rt/rt-preempt-base-config.patch +++ b/debian/patches/features/all/rt/rt-preempt-base-config.patch @@ -1,7 +1,7 @@ Subject: rt: Provide PREEMPT_RT_BASE config switch From: Thomas Gleixner Date: Fri, 17 Jun 2011 12:39:57 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Introduce PREEMPT_RT_BASE which enables parts of PREEMPT_RT_FULL. Forces interrupt threading and enables some of the RT diff --git a/debian/patches/features/all/rt/rt-serial-warn-fix.patch b/debian/patches/features/all/rt/rt-serial-warn-fix.patch index c74f93589..edbef9683 100644 --- a/debian/patches/features/all/rt/rt-serial-warn-fix.patch +++ b/debian/patches/features/all/rt/rt-serial-warn-fix.patch @@ -1,7 +1,7 @@ Subject: rt: Improve the serial console PASS_LIMIT From: Ingo Molnar Date: Wed Dec 14 13:05:54 CET 2011 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Beyond the warning: diff --git a/debian/patches/features/all/rt/rtmutex--Handle-non-enqueued-waiters-gracefully.patch b/debian/patches/features/all/rt/rtmutex--Handle-non-enqueued-waiters-gracefully.patch index 1385493ec..f85b6504e 100644 --- a/debian/patches/features/all/rt/rtmutex--Handle-non-enqueued-waiters-gracefully.patch +++ b/debian/patches/features/all/rt/rtmutex--Handle-non-enqueued-waiters-gracefully.patch @@ -1,7 +1,7 @@ Subject: rtmutex: Handle non enqueued waiters gracefully From: Thomas Gleixner Date: Fri, 06 Nov 2015 18:51:03 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Yimin debugged that in case of a PI wakeup in progress when rt_mutex_start_proxy_lock() calls task_blocks_on_rt_mutex() the latter @@ -22,7 +22,7 @@ Cc: stable-rt@vger.kernel.org --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -1707,7 +1707,7 @@ int __rt_mutex_start_proxy_lock(struct r +@@ -1716,7 +1716,7 @@ int __rt_mutex_start_proxy_lock(struct r ret = 0; } diff --git a/debian/patches/features/all/rt/rtmutex-Fix-lock-stealing-logic.patch b/debian/patches/features/all/rt/rtmutex-Fix-lock-stealing-logic.patch deleted file mode 100644 index a26594159..000000000 --- a/debian/patches/features/all/rt/rtmutex-Fix-lock-stealing-logic.patch +++ /dev/null @@ -1,162 +0,0 @@ -From: Mike Galbraith -Date: Fri, 23 Jun 2017 09:37:14 +0200 -Subject: rtmutex: Fix lock stealing logic -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -1. When trying to acquire an rtmutex, we first try to grab it without -queueing the waiter, and explicitly check for that initial attempt -in the !waiter path of __try_to_take_rt_mutex(). Checking whether -the lock taker is top waiter before allowing a steal attempt in that -path is a thinko: the lock taker has not yet blocked. - -2. It seems wrong to change the definition of rt_mutex_waiter_less() -to mean less or perhaps equal when we have an rt_mutex_waiter_equal(). - -Remove the thinko, restore rt_mutex_waiter_less(), implement and use -rt_mutex_steal() based upon rt_mutex_waiter_less/equal(), moving all -qualification criteria into the function itself. - -Reviewed-by: Steven Rostedt (VMware) -Signed-off-by: Mike Galbraith -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/locking/rtmutex.c | 75 +++++++++++++++++++++++------------------------ - 1 file changed, 37 insertions(+), 38 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -235,26 +235,19 @@ static inline bool unlock_rt_mutex_safe( - } - #endif - --#define STEAL_NORMAL 0 --#define STEAL_LATERAL 1 -- - /* - * Only use with rt_mutex_waiter_{less,equal}() - */ --#define task_to_waiter(p) \ -- &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline } -+#define task_to_waiter(p) &(struct rt_mutex_waiter) \ -+ { .prio = (p)->prio, .deadline = (p)->dl.deadline, .task = (p) } - - static inline int - rt_mutex_waiter_less(struct rt_mutex_waiter *left, -- struct rt_mutex_waiter *right, int mode) -+ struct rt_mutex_waiter *right) - { -- if (mode == STEAL_NORMAL) { -- if (left->prio < right->prio) -- return 1; -- } else { -- if (left->prio <= right->prio) -- return 1; -- } -+ if (left->prio < right->prio) -+ return 1; -+ - /* - * If both waiters have dl_prio(), we check the deadlines of the - * associated tasks. -@@ -286,6 +279,27 @@ rt_mutex_waiter_equal(struct rt_mutex_wa - return 1; - } - -+#define STEAL_NORMAL 0 -+#define STEAL_LATERAL 1 -+ -+static inline int -+rt_mutex_steal(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, int mode) -+{ -+ struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock); -+ -+ if (waiter == top_waiter || rt_mutex_waiter_less(waiter, top_waiter)) -+ return 1; -+ -+ /* -+ * Note that RT tasks are excluded from lateral-steals -+ * to prevent the introduction of an unbounded latency. -+ */ -+ if (mode == STEAL_NORMAL || rt_task(waiter->task)) -+ return 0; -+ -+ return rt_mutex_waiter_equal(waiter, top_waiter); -+} -+ - static void - rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) - { -@@ -297,7 +311,7 @@ rt_mutex_enqueue(struct rt_mutex *lock, - while (*link) { - parent = *link; - entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry); -- if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) { -+ if (rt_mutex_waiter_less(waiter, entry)) { - link = &parent->rb_left; - } else { - link = &parent->rb_right; -@@ -336,7 +350,7 @@ rt_mutex_enqueue_pi(struct task_struct * - while (*link) { - parent = *link; - entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry); -- if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) { -+ if (rt_mutex_waiter_less(waiter, entry)) { - link = &parent->rb_left; - } else { - link = &parent->rb_right; -@@ -846,6 +860,7 @@ static int rt_mutex_adjust_prio_chain(st - * @task: The task which wants to acquire the lock - * @waiter: The waiter that is queued to the lock's wait tree if the - * callsite called task_blocked_on_lock(), otherwise NULL -+ * @mode: Lock steal mode (STEAL_NORMAL, STEAL_LATERAL) - */ - static int __try_to_take_rt_mutex(struct rt_mutex *lock, - struct task_struct *task, -@@ -885,14 +900,11 @@ static int __try_to_take_rt_mutex(struct - */ - if (waiter) { - /* -- * If waiter is not the highest priority waiter of -- * @lock, give up. -+ * If waiter is not the highest priority waiter of @lock, -+ * or its peer when lateral steal is allowed, give up. - */ -- if (waiter != rt_mutex_top_waiter(lock)) { -- /* XXX rt_mutex_waiter_less() ? */ -+ if (!rt_mutex_steal(lock, waiter, mode)) - return 0; -- } -- - /* - * We can acquire the lock. Remove the waiter from the - * lock waiters tree. -@@ -909,25 +921,12 @@ static int __try_to_take_rt_mutex(struct - * not need to be dequeued. - */ - if (rt_mutex_has_waiters(lock)) { -- struct task_struct *pown = rt_mutex_top_waiter(lock)->task; -- -- if (task != pown) -- return 0; -- -- /* -- * Note that RT tasks are excluded from lateral-steals -- * to prevent the introduction of an unbounded latency. -- */ -- if (rt_task(task)) -- mode = STEAL_NORMAL; - /* -- * If @task->prio is greater than or equal to -- * the top waiter priority (kernel view), -- * @task lost. -+ * If @task->prio is greater than the top waiter -+ * priority (kernel view), or equal to it when a -+ * lateral steal is forbidden, @task lost. - */ -- if (!rt_mutex_waiter_less(task_to_waiter(task), -- rt_mutex_top_waiter(lock), -- mode)) -+ if (!rt_mutex_steal(lock, task_to_waiter(task), mode)) - return 0; - /* - * The current top waiter stays enqueued. We diff --git a/debian/patches/features/all/rt/rtmutex-Make-lock_killable-work.patch b/debian/patches/features/all/rt/rtmutex-Make-lock_killable-work.patch index bec78e563..029fe596d 100644 --- a/debian/patches/features/all/rt/rtmutex-Make-lock_killable-work.patch +++ b/debian/patches/features/all/rt/rtmutex-Make-lock_killable-work.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Sat, 1 Apr 2017 12:50:59 +0200 Subject: [PATCH] rtmutex: Make lock_killable work -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Locking an rt mutex killable does not work because signal handling is restricted to TASK_INTERRUPTIBLE. @@ -17,7 +17,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -1215,18 +1215,13 @@ static int __sched +@@ -1201,18 +1201,13 @@ static int __sched if (try_to_take_rt_mutex(lock, current, waiter)) break; diff --git a/debian/patches/features/all/rt/rtmutex-Provide-rt_mutex_lock_state.patch b/debian/patches/features/all/rt/rtmutex-Provide-rt_mutex_lock_state.patch deleted file mode 100644 index b60ad2af2..000000000 --- a/debian/patches/features/all/rt/rtmutex-Provide-rt_mutex_lock_state.patch +++ /dev/null @@ -1,112 +0,0 @@ -From: Thomas Gleixner -Date: Sat, 1 Apr 2017 12:51:00 +0200 -Subject: [PATCH] rtmutex: Provide rt_mutex_lock_state() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Allow rtmutex to be locked with arbitrary states. Preparatory patch for the -rt rwsem rework. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/rtmutex.h | 1 + - kernel/locking/rtmutex.c | 44 +++++++++++++++++++++++++------------------- - 2 files changed, 26 insertions(+), 19 deletions(-) - ---- a/include/linux/rtmutex.h -+++ b/include/linux/rtmutex.h -@@ -105,6 +105,7 @@ extern void __rt_mutex_init(struct rt_mu - extern void rt_mutex_destroy(struct rt_mutex *lock); - - extern void rt_mutex_lock(struct rt_mutex *lock); -+extern int rt_mutex_lock_state(struct rt_mutex *lock, int state); - extern int rt_mutex_lock_interruptible(struct rt_mutex *lock); - extern int rt_mutex_lock_killable(struct rt_mutex *lock); - extern int rt_mutex_timed_lock(struct rt_mutex *lock, ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -2019,21 +2019,32 @@ rt_mutex_fastunlock(struct rt_mutex *loc - } - - /** -+ * rt_mutex_lock_state - lock a rt_mutex with a given state -+ * -+ * @lock: The rt_mutex to be locked -+ * @state: The state to set when blocking on the rt_mutex -+ */ -+int __sched rt_mutex_lock_state(struct rt_mutex *lock, int state) -+{ -+ might_sleep(); -+ -+ return rt_mutex_fastlock(lock, state, NULL, rt_mutex_slowlock); -+} -+ -+/** - * rt_mutex_lock - lock a rt_mutex - * - * @lock: the rt_mutex to be locked - */ - void __sched rt_mutex_lock(struct rt_mutex *lock) - { -- might_sleep(); -- -- rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, NULL, rt_mutex_slowlock); -+ rt_mutex_lock_state(lock, TASK_UNINTERRUPTIBLE); - } - EXPORT_SYMBOL_GPL(rt_mutex_lock); - - /** - * rt_mutex_lock_interruptible - lock a rt_mutex interruptible -- * -+ ** - * @lock: the rt_mutex to be locked - * - * Returns: -@@ -2042,20 +2053,10 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock); - */ - int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) - { -- might_sleep(); -- -- return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, NULL, rt_mutex_slowlock); -+ return rt_mutex_lock_state(lock, TASK_INTERRUPTIBLE); - } - EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); - --/* -- * Futex variant, must not use fastpath. -- */ --int __sched rt_mutex_futex_trylock(struct rt_mutex *lock) --{ -- return rt_mutex_slowtrylock(lock); --} -- - /** - * rt_mutex_lock_killable - lock a rt_mutex killable - * -@@ -2065,16 +2066,21 @@ int __sched rt_mutex_futex_trylock(struc - * Returns: - * 0 on success - * -EINTR when interrupted by a signal -- * -EDEADLK when the lock would deadlock (when deadlock detection is on) - */ - int __sched rt_mutex_lock_killable(struct rt_mutex *lock) - { -- might_sleep(); -- -- return rt_mutex_fastlock(lock, TASK_KILLABLE, NULL, rt_mutex_slowlock); -+ return rt_mutex_lock_state(lock, TASK_KILLABLE); - } - EXPORT_SYMBOL_GPL(rt_mutex_lock_killable); - -+/* -+ * Futex variant, must not use fastpath. -+ */ -+int __sched rt_mutex_futex_trylock(struct rt_mutex *lock) -+{ -+ return rt_mutex_slowtrylock(lock); -+} -+ - /** - * rt_mutex_timed_lock - lock a rt_mutex interruptible - * the timeout structure is provided diff --git a/debian/patches/features/all/rt/rtmutex-Provide-locked-slowpath.patch b/debian/patches/features/all/rt/rtmutex-Provide-rt_mutex_slowlock_locked.patch similarity index 56% rename from debian/patches/features/all/rt/rtmutex-Provide-locked-slowpath.patch rename to debian/patches/features/all/rt/rtmutex-Provide-rt_mutex_slowlock_locked.patch index a36079195..0020f397c 100644 --- a/debian/patches/features/all/rt/rtmutex-Provide-locked-slowpath.patch +++ b/debian/patches/features/all/rt/rtmutex-Provide-rt_mutex_slowlock_locked.patch @@ -1,27 +1,22 @@ From: Thomas Gleixner -Date: Sat, 1 Apr 2017 12:51:01 +0200 -Subject: [PATCH] rtmutex: Provide locked slowpath -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Date: Thu, 12 Oct 2017 16:14:22 +0200 +Subject: rtmutex: Provide rt_mutex_slowlock_locked() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz -The new rt rwsem implementation needs rtmutex::wait_lock to protect struct -rw_semaphore. Dropping the lock and reaquiring it for locking the rtmutex -would open a race window. - -Split out the inner workings of the locked slowpath so it can be called with -wait_lock held. +This is the inner-part of rt_mutex_slowlock(), required for rwsem-rt. Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior --- - kernel/locking/rtmutex.c | 72 +++++++++++++++++++++++----------------- - kernel/locking/rtmutex_common.h | 8 ++++ - 2 files changed, 50 insertions(+), 30 deletions(-) + kernel/locking/rtmutex.c | 70 ++++++++++++++++++++++------------------ + kernel/locking/rtmutex_common.h | 6 +++ + 2 files changed, 46 insertions(+), 30 deletions(-) --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -1751,30 +1751,13 @@ static void ww_mutex_account_lock(struct +@@ -1244,35 +1244,16 @@ static void rt_mutex_handle_deadlock(int + } } - #endif -/* - * Slow path lock function: @@ -29,19 +24,17 @@ Signed-off-by: Sebastian Andrzej Siewior -static int __sched -rt_mutex_slowlock(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, -- enum rtmutex_chainwalk chwalk, -- struct ww_acquire_ctx *ww_ctx) +- enum rtmutex_chainwalk chwalk) +int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, + enum rtmutex_chainwalk chwalk, -+ struct ww_acquire_ctx *ww_ctx, + struct rt_mutex_waiter *waiter) { - struct rt_mutex_waiter waiter; - unsigned long flags; - int ret = 0; - -- rt_mutex_init_waiter(&waiter, false); +- rt_mutex_init_waiter(&waiter); - - /* - * Technically we could use raw_spin_[un]lock_irq() here, but this can @@ -54,17 +47,16 @@ Signed-off-by: Sebastian Andrzej Siewior - raw_spin_lock_irqsave(&lock->wait_lock, flags); + int ret; - #ifdef CONFIG_PREEMPT_RT_FULL - if (ww_ctx) { -@@ -1790,7 +1773,6 @@ rt_mutex_slowlock(struct rt_mutex *lock, - if (try_to_take_rt_mutex(lock, current, NULL)) { - if (ww_ctx) - ww_mutex_account_lock(lock, ww_ctx); + /* Try to acquire the lock again: */ +- if (try_to_take_rt_mutex(lock, current, NULL)) { - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); ++ if (try_to_take_rt_mutex(lock, current, NULL)) return 0; - } +- } -@@ -1800,13 +1782,13 @@ rt_mutex_slowlock(struct rt_mutex *lock, + set_current_state(state); + +@@ -1280,17 +1261,18 @@ rt_mutex_slowlock(struct rt_mutex *lock, if (unlikely(timeout)) hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); @@ -74,28 +66,21 @@ Signed-off-by: Sebastian Andrzej Siewior - if (likely(!ret)) + if (likely(!ret)) { /* sleep on the mutex */ -- ret = __rt_mutex_slowlock(lock, state, timeout, &waiter, -+ ret = __rt_mutex_slowlock(lock, state, timeout, waiter, - ww_ctx); -- else if (ww_ctx) { -+ } else if (ww_ctx) { - /* ww_mutex received EDEADLK, let it become EALREADY */ - ret = __mutex_lock_check_stamp(lock, ww_ctx); - BUG_ON(!ret); -@@ -1815,10 +1797,10 @@ rt_mutex_slowlock(struct rt_mutex *lock, +- ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); ++ ret = __rt_mutex_slowlock(lock, state, timeout, waiter); ++ } + if (unlikely(ret)) { __set_current_state(TASK_RUNNING); if (rt_mutex_has_waiters(lock)) - remove_waiter(lock, &waiter); +- rt_mutex_handle_deadlock(ret, chwalk, &waiter); + remove_waiter(lock, waiter); - /* ww_mutex want to report EDEADLK/EALREADY, let them */ - if (!ww_ctx) -- rt_mutex_handle_deadlock(ret, chwalk, &waiter); -+ rt_mutex_handle_deadlock(ret, chwalk, waiter); - } else if (ww_ctx) { - ww_mutex_account_lock(lock, ww_ctx); ++ /* ww_mutex want to report EDEADLK/EALREADY, let them */ } -@@ -1828,6 +1810,36 @@ rt_mutex_slowlock(struct rt_mutex *lock, + + /* +@@ -1298,6 +1280,34 @@ rt_mutex_slowlock(struct rt_mutex *lock, * unconditionally. We might have to fix that up. */ fixup_rt_mutex_waiters(lock); @@ -108,14 +93,13 @@ Signed-off-by: Sebastian Andrzej Siewior +static int __sched +rt_mutex_slowlock(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, -+ enum rtmutex_chainwalk chwalk, -+ struct ww_acquire_ctx *ww_ctx) ++ enum rtmutex_chainwalk chwalk) +{ + struct rt_mutex_waiter waiter; + unsigned long flags; + int ret = 0; + -+ rt_mutex_init_waiter(&waiter, false); ++ rt_mutex_init_waiter(&waiter); + + /* + * Technically we could use raw_spin_[un]lock_irq() here, but this can @@ -127,24 +111,21 @@ Signed-off-by: Sebastian Andrzej Siewior + */ + raw_spin_lock_irqsave(&lock->wait_lock, flags); + -+ ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, ww_ctx, -+ &waiter); ++ ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, &waiter); raw_spin_unlock_irqrestore(&lock->wait_lock, flags); --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h -@@ -131,6 +131,14 @@ extern bool __rt_mutex_futex_unlock(stru +@@ -157,6 +157,12 @@ extern bool __rt_mutex_futex_unlock(stru + struct wake_q_head *wqh); - extern void rt_mutex_postunlock(struct wake_q_head *wake_q, - struct wake_q_head *wake_sleeper_q); + extern void rt_mutex_postunlock(struct wake_q_head *wake_q); +/* RW semaphore special interface */ -+struct ww_acquire_ctx; + +int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, + enum rtmutex_chainwalk chwalk, -+ struct ww_acquire_ctx *ww_ctx, + struct rt_mutex_waiter *waiter); #ifdef CONFIG_DEBUG_RT_MUTEXES diff --git a/debian/patches/features/all/rt/rtmutex-add-mutex-implementation-based-on-rtmutex.patch b/debian/patches/features/all/rt/rtmutex-add-mutex-implementation-based-on-rtmutex.patch new file mode 100644 index 000000000..e9c471304 --- /dev/null +++ b/debian/patches/features/all/rt/rtmutex-add-mutex-implementation-based-on-rtmutex.patch @@ -0,0 +1,373 @@ +From: Thomas Gleixner +Date: Thu, 12 Oct 2017 17:17:03 +0200 +Subject: rtmutex: add mutex implementation based on rtmutex +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/mutex_rt.h | 130 ++++++++++++++++++++++++++ + kernel/locking/mutex-rt.c | 223 ++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 353 insertions(+) + create mode 100644 include/linux/mutex_rt.h + create mode 100644 kernel/locking/mutex-rt.c + +--- /dev/null ++++ b/include/linux/mutex_rt.h +@@ -0,0 +1,130 @@ ++#ifndef __LINUX_MUTEX_RT_H ++#define __LINUX_MUTEX_RT_H ++ ++#ifndef __LINUX_MUTEX_H ++#error "Please include mutex.h" ++#endif ++ ++#include ++ ++/* FIXME: Just for __lockfunc */ ++#include ++ ++struct mutex { ++ struct rt_mutex lock; ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ struct lockdep_map dep_map; ++#endif ++}; ++ ++#define __MUTEX_INITIALIZER(mutexname) \ ++ { \ ++ .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \ ++ __DEP_MAP_MUTEX_INITIALIZER(mutexname) \ ++ } ++ ++#define DEFINE_MUTEX(mutexname) \ ++ struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) ++ ++extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key); ++extern void __lockfunc _mutex_lock(struct mutex *lock); ++extern void __lockfunc _mutex_lock_io(struct mutex *lock); ++extern void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass); ++extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock); ++extern int __lockfunc _mutex_lock_killable(struct mutex *lock); ++extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass); ++extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); ++extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass); ++extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass); ++extern int __lockfunc _mutex_trylock(struct mutex *lock); ++extern void __lockfunc _mutex_unlock(struct mutex *lock); ++ ++#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock) ++#define mutex_lock(l) _mutex_lock(l) ++#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l) ++#define mutex_lock_killable(l) _mutex_lock_killable(l) ++#define mutex_trylock(l) _mutex_trylock(l) ++#define mutex_unlock(l) _mutex_unlock(l) ++#define mutex_lock_io(l) _mutex_lock_io(l); ++ ++#define __mutex_owner(l) ((l)->lock.owner) ++ ++#ifdef CONFIG_DEBUG_MUTEXES ++#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock) ++#else ++static inline void mutex_destroy(struct mutex *lock) {} ++#endif ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s) ++# define mutex_lock_interruptible_nested(l, s) \ ++ _mutex_lock_interruptible_nested(l, s) ++# define mutex_lock_killable_nested(l, s) \ ++ _mutex_lock_killable_nested(l, s) ++# define mutex_lock_io_nested(l, s) _mutex_lock_io_nested(l, s) ++ ++# define mutex_lock_nest_lock(lock, nest_lock) \ ++do { \ ++ typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ ++ _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ ++} while (0) ++ ++#else ++# define mutex_lock_nested(l, s) _mutex_lock(l) ++# define mutex_lock_interruptible_nested(l, s) \ ++ _mutex_lock_interruptible(l) ++# define mutex_lock_killable_nested(l, s) \ ++ _mutex_lock_killable(l) ++# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) ++# define mutex_lock_io_nested(l, s) _mutex_lock_io(l) ++#endif ++ ++# define mutex_init(mutex) \ ++do { \ ++ static struct lock_class_key __key; \ ++ \ ++ rt_mutex_init(&(mutex)->lock); \ ++ __mutex_do_init((mutex), #mutex, &__key); \ ++} while (0) ++ ++# define __mutex_init(mutex, name, key) \ ++do { \ ++ rt_mutex_init(&(mutex)->lock); \ ++ __mutex_do_init((mutex), name, key); \ ++} while (0) ++ ++/** ++ * These values are chosen such that FAIL and SUCCESS match the ++ * values of the regular mutex_trylock(). ++ */ ++enum mutex_trylock_recursive_enum { ++ MUTEX_TRYLOCK_FAILED = 0, ++ MUTEX_TRYLOCK_SUCCESS = 1, ++ MUTEX_TRYLOCK_RECURSIVE, ++}; ++/** ++ * mutex_trylock_recursive - trylock variant that allows recursive locking ++ * @lock: mutex to be locked ++ * ++ * This function should not be used, _ever_. It is purely for hysterical GEM ++ * raisins, and once those are gone this will be removed. ++ * ++ * Returns: ++ * MUTEX_TRYLOCK_FAILED - trylock failed, ++ * MUTEX_TRYLOCK_SUCCESS - lock acquired, ++ * MUTEX_TRYLOCK_RECURSIVE - we already owned the lock. ++ */ ++int __rt_mutex_owner_current(struct rt_mutex *lock); ++ ++static inline /* __deprecated */ __must_check enum mutex_trylock_recursive_enum ++mutex_trylock_recursive(struct mutex *lock) ++{ ++ if (unlikely(__rt_mutex_owner_current(&lock->lock))) ++ return MUTEX_TRYLOCK_RECURSIVE; ++ ++ return mutex_trylock(lock); ++} ++ ++extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); ++ ++#endif +--- /dev/null ++++ b/kernel/locking/mutex-rt.c +@@ -0,0 +1,223 @@ ++/* ++ * kernel/rt.c ++ * ++ * Real-Time Preemption Support ++ * ++ * started by Ingo Molnar: ++ * ++ * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar ++ * Copyright (C) 2006, Timesys Corp., Thomas Gleixner ++ * ++ * historic credit for proving that Linux spinlocks can be implemented via ++ * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow ++ * and others) who prototyped it on 2.4 and did lots of comparative ++ * research and analysis; TimeSys, for proving that you can implement a ++ * fully preemptible kernel via the use of IRQ threading and mutexes; ++ * Bill Huey for persuasively arguing on lkml that the mutex model is the ++ * right one; and to MontaVista, who ported pmutexes to 2.6. ++ * ++ * This code is a from-scratch implementation and is not based on pmutexes, ++ * but the idea of converting spinlocks to mutexes is used here too. ++ * ++ * lock debugging, locking tree, deadlock detection: ++ * ++ * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey ++ * Released under the General Public License (GPL). ++ * ++ * Includes portions of the generic R/W semaphore implementation from: ++ * ++ * Copyright (c) 2001 David Howells (dhowells@redhat.com). ++ * - Derived partially from idea by Andrea Arcangeli ++ * - Derived also from comments by Linus ++ * ++ * Pending ownership of locks and ownership stealing: ++ * ++ * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt ++ * ++ * (also by Steven Rostedt) ++ * - Converted single pi_lock to individual task locks. ++ * ++ * By Esben Nielsen: ++ * Doing priority inheritance with help of the scheduler. ++ * ++ * Copyright (C) 2006, Timesys Corp., Thomas Gleixner ++ * - major rework based on Esben Nielsens initial patch ++ * - replaced thread_info references by task_struct refs ++ * - removed task->pending_owner dependency ++ * - BKL drop/reacquire for semaphore style locks to avoid deadlocks ++ * in the scheduler return path as discussed with Steven Rostedt ++ * ++ * Copyright (C) 2006, Kihon Technologies Inc. ++ * Steven Rostedt ++ * - debugged and patched Thomas Gleixner's rework. ++ * - added back the cmpxchg to the rework. ++ * - turned atomic require back on for SMP. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "rtmutex_common.h" ++ ++/* ++ * struct mutex functions ++ */ ++void __mutex_do_init(struct mutex *mutex, const char *name, ++ struct lock_class_key *key) ++{ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ /* ++ * Make sure we are not reinitializing a held lock: ++ */ ++ debug_check_no_locks_freed((void *)mutex, sizeof(*mutex)); ++ lockdep_init_map(&mutex->dep_map, name, key, 0); ++#endif ++ mutex->lock.save_state = 0; ++} ++EXPORT_SYMBOL(__mutex_do_init); ++ ++void __lockfunc _mutex_lock(struct mutex *lock) ++{ ++ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); ++ __rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE); ++} ++EXPORT_SYMBOL(_mutex_lock); ++ ++void __lockfunc _mutex_lock_io(struct mutex *lock) ++{ ++ int token; ++ ++ token = io_schedule_prepare(); ++ _mutex_lock(lock); ++ io_schedule_finish(token); ++} ++EXPORT_SYMBOL_GPL(_mutex_lock_io); ++ ++int __lockfunc _mutex_lock_interruptible(struct mutex *lock) ++{ ++ int ret; ++ ++ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); ++ ret = __rt_mutex_lock_state(&lock->lock, TASK_INTERRUPTIBLE); ++ if (ret) ++ mutex_release(&lock->dep_map, 1, _RET_IP_); ++ return ret; ++} ++EXPORT_SYMBOL(_mutex_lock_interruptible); ++ ++int __lockfunc _mutex_lock_killable(struct mutex *lock) ++{ ++ int ret; ++ ++ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); ++ ret = __rt_mutex_lock_state(&lock->lock, TASK_KILLABLE); ++ if (ret) ++ mutex_release(&lock->dep_map, 1, _RET_IP_); ++ return ret; ++} ++EXPORT_SYMBOL(_mutex_lock_killable); ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass) ++{ ++ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); ++ __rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE); ++} ++EXPORT_SYMBOL(_mutex_lock_nested); ++ ++void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass) ++{ ++ int token; ++ ++ token = io_schedule_prepare(); ++ ++ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); ++ __rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE); ++ ++ io_schedule_finish(token); ++} ++EXPORT_SYMBOL_GPL(_mutex_lock_io_nested); ++ ++void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) ++{ ++ mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_); ++ __rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE); ++} ++EXPORT_SYMBOL(_mutex_lock_nest_lock); ++ ++int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass) ++{ ++ int ret; ++ ++ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); ++ ret = __rt_mutex_lock_state(&lock->lock, TASK_INTERRUPTIBLE); ++ if (ret) ++ mutex_release(&lock->dep_map, 1, _RET_IP_); ++ return ret; ++} ++EXPORT_SYMBOL(_mutex_lock_interruptible_nested); ++ ++int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass) ++{ ++ int ret; ++ ++ mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); ++ ret = __rt_mutex_lock_state(&lock->lock, TASK_KILLABLE); ++ if (ret) ++ mutex_release(&lock->dep_map, 1, _RET_IP_); ++ return ret; ++} ++EXPORT_SYMBOL(_mutex_lock_killable_nested); ++#endif ++ ++int __lockfunc _mutex_trylock(struct mutex *lock) ++{ ++ int ret = __rt_mutex_trylock(&lock->lock); ++ ++ if (ret) ++ mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); ++ ++ return ret; ++} ++EXPORT_SYMBOL(_mutex_trylock); ++ ++void __lockfunc _mutex_unlock(struct mutex *lock) ++{ ++ mutex_release(&lock->dep_map, 1, _RET_IP_); ++ __rt_mutex_unlock(&lock->lock); ++} ++EXPORT_SYMBOL(_mutex_unlock); ++ ++/** ++ * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 ++ * @cnt: the atomic which we are to dec ++ * @lock: the mutex to return holding if we dec to 0 ++ * ++ * return true and hold lock if we dec to 0, return false otherwise ++ */ ++int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) ++{ ++ /* dec if we can't possibly hit 0 */ ++ if (atomic_add_unless(cnt, -1, 1)) ++ return 0; ++ /* we might hit 0, so take the lock */ ++ mutex_lock(lock); ++ if (!atomic_dec_and_test(cnt)) { ++ /* when we actually did the dec, we didn't hit 0 */ ++ mutex_unlock(lock); ++ return 0; ++ } ++ /* we hit 0, and we hold the lock */ ++ return 1; ++} ++EXPORT_SYMBOL(atomic_dec_and_mutex_lock); diff --git a/debian/patches/features/all/rt/rtmutex-add-rwlock-implementation-based-on-rtmutex.patch b/debian/patches/features/all/rt/rtmutex-add-rwlock-implementation-based-on-rtmutex.patch new file mode 100644 index 000000000..046295877 --- /dev/null +++ b/debian/patches/features/all/rt/rtmutex-add-rwlock-implementation-based-on-rtmutex.patch @@ -0,0 +1,569 @@ +From: Thomas Gleixner +Date: Thu, 12 Oct 2017 17:18:06 +0200 +Subject: rtmutex: add rwlock implementation based on rtmutex +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The implementation is bias-based, similar to the rwsem implementation. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/rwlock_rt.h | 119 ++++++++++++ + include/linux/rwlock_types_rt.h | 55 +++++ + kernel/locking/rwlock-rt.c | 368 ++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 542 insertions(+) + create mode 100644 include/linux/rwlock_rt.h + create mode 100644 include/linux/rwlock_types_rt.h + create mode 100644 kernel/locking/rwlock-rt.c + +--- /dev/null ++++ b/include/linux/rwlock_rt.h +@@ -0,0 +1,119 @@ ++#ifndef __LINUX_RWLOCK_RT_H ++#define __LINUX_RWLOCK_RT_H ++ ++#ifndef __LINUX_SPINLOCK_H ++#error Do not include directly. Use spinlock.h ++#endif ++ ++extern void __lockfunc rt_write_lock(rwlock_t *rwlock); ++extern void __lockfunc rt_read_lock(rwlock_t *rwlock); ++extern int __lockfunc rt_write_trylock(rwlock_t *rwlock); ++extern int __lockfunc rt_read_trylock(rwlock_t *rwlock); ++extern void __lockfunc rt_write_unlock(rwlock_t *rwlock); ++extern void __lockfunc rt_read_unlock(rwlock_t *rwlock); ++extern int __lockfunc rt_read_can_lock(rwlock_t *rwlock); ++extern int __lockfunc rt_write_can_lock(rwlock_t *rwlock); ++extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key); ++ ++#define read_can_lock(rwlock) rt_read_can_lock(rwlock) ++#define write_can_lock(rwlock) rt_write_can_lock(rwlock) ++ ++#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock)) ++#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock)) ++ ++static inline int __write_trylock_rt_irqsave(rwlock_t *lock, unsigned long *flags) ++{ ++ /* XXX ARCH_IRQ_ENABLED */ ++ *flags = 0; ++ return rt_write_trylock(lock); ++} ++ ++#define write_trylock_irqsave(lock, flags) \ ++ __cond_lock(lock, __write_trylock_rt_irqsave(lock, &(flags))) ++ ++#define read_lock_irqsave(lock, flags) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ rt_read_lock(lock); \ ++ flags = 0; \ ++ } while (0) ++ ++#define write_lock_irqsave(lock, flags) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ rt_write_lock(lock); \ ++ flags = 0; \ ++ } while (0) ++ ++#define read_lock(lock) rt_read_lock(lock) ++ ++#define read_lock_bh(lock) \ ++ do { \ ++ local_bh_disable(); \ ++ rt_read_lock(lock); \ ++ } while (0) ++ ++#define read_lock_irq(lock) read_lock(lock) ++ ++#define write_lock(lock) rt_write_lock(lock) ++ ++#define write_lock_bh(lock) \ ++ do { \ ++ local_bh_disable(); \ ++ rt_write_lock(lock); \ ++ } while (0) ++ ++#define write_lock_irq(lock) write_lock(lock) ++ ++#define read_unlock(lock) rt_read_unlock(lock) ++ ++#define read_unlock_bh(lock) \ ++ do { \ ++ rt_read_unlock(lock); \ ++ local_bh_enable(); \ ++ } while (0) ++ ++#define read_unlock_irq(lock) read_unlock(lock) ++ ++#define write_unlock(lock) rt_write_unlock(lock) ++ ++#define write_unlock_bh(lock) \ ++ do { \ ++ rt_write_unlock(lock); \ ++ local_bh_enable(); \ ++ } while (0) ++ ++#define write_unlock_irq(lock) write_unlock(lock) ++ ++#define read_unlock_irqrestore(lock, flags) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ (void) flags; \ ++ rt_read_unlock(lock); \ ++ } while (0) ++ ++#define write_unlock_irqrestore(lock, flags) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ (void) flags; \ ++ rt_write_unlock(lock); \ ++ } while (0) ++ ++#define rwlock_init(rwl) \ ++do { \ ++ static struct lock_class_key __key; \ ++ \ ++ __rt_rwlock_init(rwl, #rwl, &__key); \ ++} while (0) ++ ++/* ++ * Internal functions made global for CPU pinning ++ */ ++void __read_rt_lock(struct rt_rw_lock *lock); ++int __read_rt_trylock(struct rt_rw_lock *lock); ++void __write_rt_lock(struct rt_rw_lock *lock); ++int __write_rt_trylock(struct rt_rw_lock *lock); ++void __read_rt_unlock(struct rt_rw_lock *lock); ++void __write_rt_unlock(struct rt_rw_lock *lock); ++ ++#endif +--- /dev/null ++++ b/include/linux/rwlock_types_rt.h +@@ -0,0 +1,55 @@ ++#ifndef __LINUX_RWLOCK_TYPES_RT_H ++#define __LINUX_RWLOCK_TYPES_RT_H ++ ++#ifndef __LINUX_SPINLOCK_TYPES_H ++#error "Do not include directly. Include spinlock_types.h instead" ++#endif ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } ++#else ++# define RW_DEP_MAP_INIT(lockname) ++#endif ++ ++typedef struct rt_rw_lock rwlock_t; ++ ++#define __RW_LOCK_UNLOCKED(name) __RWLOCK_RT_INITIALIZER(name) ++ ++#define DEFINE_RWLOCK(name) \ ++ rwlock_t name = __RW_LOCK_UNLOCKED(name) ++ ++/* ++ * A reader biased implementation primarily for CPU pinning. ++ * ++ * Can be selected as general replacement for the single reader RT rwlock ++ * variant ++ */ ++struct rt_rw_lock { ++ struct rt_mutex rtmutex; ++ atomic_t readers; ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ struct lockdep_map dep_map; ++#endif ++}; ++ ++#define READER_BIAS (1U << 31) ++#define WRITER_BIAS (1U << 30) ++ ++#define __RWLOCK_RT_INITIALIZER(name) \ ++{ \ ++ .readers = ATOMIC_INIT(READER_BIAS), \ ++ .rtmutex = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.rtmutex), \ ++ RW_DEP_MAP_INIT(name) \ ++} ++ ++void __rwlock_biased_rt_init(struct rt_rw_lock *lock, const char *name, ++ struct lock_class_key *key); ++ ++#define rwlock_biased_rt_init(rwlock) \ ++ do { \ ++ static struct lock_class_key __key; \ ++ \ ++ __rwlock_biased_rt_init((rwlock), #rwlock, &__key); \ ++ } while (0) ++ ++#endif +--- /dev/null ++++ b/kernel/locking/rwlock-rt.c +@@ -0,0 +1,368 @@ ++/* ++ */ ++#include ++#include ++ ++#include "rtmutex_common.h" ++#include ++ ++/* ++ * RT-specific reader/writer locks ++ * ++ * write_lock() ++ * 1) Lock lock->rtmutex ++ * 2) Remove the reader BIAS to force readers into the slow path ++ * 3) Wait until all readers have left the critical region ++ * 4) Mark it write locked ++ * ++ * write_unlock() ++ * 1) Remove the write locked marker ++ * 2) Set the reader BIAS so readers can use the fast path again ++ * 3) Unlock lock->rtmutex to release blocked readers ++ * ++ * read_lock() ++ * 1) Try fast path acquisition (reader BIAS is set) ++ * 2) Take lock->rtmutex.wait_lock which protects the writelocked flag ++ * 3) If !writelocked, acquire it for read ++ * 4) If writelocked, block on lock->rtmutex ++ * 5) unlock lock->rtmutex, goto 1) ++ * ++ * read_unlock() ++ * 1) Try fast path release (reader count != 1) ++ * 2) Wake the writer waiting in write_lock()#3 ++ * ++ * read_lock()#3 has the consequence, that rw locks on RT are not writer ++ * fair, but writers, which should be avoided in RT tasks (think tasklist ++ * lock), are subject to the rtmutex priority/DL inheritance mechanism. ++ * ++ * It's possible to make the rw locks writer fair by keeping a list of ++ * active readers. A blocked writer would force all newly incoming readers ++ * to block on the rtmutex, but the rtmutex would have to be proxy locked ++ * for one reader after the other. We can't use multi-reader inheritance ++ * because there is no way to support that with ++ * SCHED_DEADLINE. Implementing the one by one reader boosting/handover ++ * mechanism is a major surgery for a very dubious value. ++ * ++ * The risk of writer starvation is there, but the pathological use cases ++ * which trigger it are not necessarily the typical RT workloads. ++ */ ++ ++void __rwlock_biased_rt_init(struct rt_rw_lock *lock, const char *name, ++ struct lock_class_key *key) ++{ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ /* ++ * Make sure we are not reinitializing a held semaphore: ++ */ ++ debug_check_no_locks_freed((void *)lock, sizeof(*lock)); ++ lockdep_init_map(&lock->dep_map, name, key, 0); ++#endif ++ atomic_set(&lock->readers, READER_BIAS); ++ rt_mutex_init(&lock->rtmutex); ++ lock->rtmutex.save_state = 1; ++} ++ ++int __read_rt_trylock(struct rt_rw_lock *lock) ++{ ++ int r, old; ++ ++ /* ++ * Increment reader count, if lock->readers < 0, i.e. READER_BIAS is ++ * set. ++ */ ++ for (r = atomic_read(&lock->readers); r < 0;) { ++ old = atomic_cmpxchg(&lock->readers, r, r + 1); ++ if (likely(old == r)) ++ return 1; ++ r = old; ++ } ++ return 0; ++} ++ ++void __sched __read_rt_lock(struct rt_rw_lock *lock) ++{ ++ struct rt_mutex *m = &lock->rtmutex; ++ struct rt_mutex_waiter waiter; ++ unsigned long flags; ++ ++ if (__read_rt_trylock(lock)) ++ return; ++ ++ raw_spin_lock_irqsave(&m->wait_lock, flags); ++ /* ++ * Allow readers as long as the writer has not completely ++ * acquired the semaphore for write. ++ */ ++ if (atomic_read(&lock->readers) != WRITER_BIAS) { ++ atomic_inc(&lock->readers); ++ raw_spin_unlock_irqrestore(&m->wait_lock, flags); ++ return; ++ } ++ ++ /* ++ * Call into the slow lock path with the rtmutex->wait_lock ++ * held, so this can't result in the following race: ++ * ++ * Reader1 Reader2 Writer ++ * read_lock() ++ * write_lock() ++ * rtmutex_lock(m) ++ * swait() ++ * read_lock() ++ * unlock(m->wait_lock) ++ * read_unlock() ++ * swake() ++ * lock(m->wait_lock) ++ * lock->writelocked=true ++ * unlock(m->wait_lock) ++ * ++ * write_unlock() ++ * lock->writelocked=false ++ * rtmutex_unlock(m) ++ * read_lock() ++ * write_lock() ++ * rtmutex_lock(m) ++ * swait() ++ * rtmutex_lock(m) ++ * ++ * That would put Reader1 behind the writer waiting on ++ * Reader2 to call read_unlock() which might be unbound. ++ */ ++ rt_mutex_init_waiter(&waiter, false); ++ rt_spin_lock_slowlock_locked(m, &waiter, flags); ++ /* ++ * The slowlock() above is guaranteed to return with the rtmutex is ++ * now held, so there can't be a writer active. Increment the reader ++ * count and immediately drop the rtmutex again. ++ */ ++ atomic_inc(&lock->readers); ++ raw_spin_unlock_irqrestore(&m->wait_lock, flags); ++ rt_spin_lock_slowunlock(m); ++ ++ debug_rt_mutex_free_waiter(&waiter); ++} ++ ++void __read_rt_unlock(struct rt_rw_lock *lock) ++{ ++ struct rt_mutex *m = &lock->rtmutex; ++ struct task_struct *tsk; ++ ++ /* ++ * sem->readers can only hit 0 when a writer is waiting for the ++ * active readers to leave the critical region. ++ */ ++ if (!atomic_dec_and_test(&lock->readers)) ++ return; ++ ++ raw_spin_lock_irq(&m->wait_lock); ++ /* ++ * Wake the writer, i.e. the rtmutex owner. It might release the ++ * rtmutex concurrently in the fast path, but to clean up the rw ++ * lock it needs to acquire m->wait_lock. The worst case which can ++ * happen is a spurious wakeup. ++ */ ++ tsk = rt_mutex_owner(m); ++ if (tsk) ++ wake_up_process(tsk); ++ ++ raw_spin_unlock_irq(&m->wait_lock); ++} ++ ++static void __write_unlock_common(struct rt_rw_lock *lock, int bias, ++ unsigned long flags) ++{ ++ struct rt_mutex *m = &lock->rtmutex; ++ ++ atomic_add(READER_BIAS - bias, &lock->readers); ++ raw_spin_unlock_irqrestore(&m->wait_lock, flags); ++ rt_spin_lock_slowunlock(m); ++} ++ ++void __sched __write_rt_lock(struct rt_rw_lock *lock) ++{ ++ struct rt_mutex *m = &lock->rtmutex; ++ struct task_struct *self = current; ++ unsigned long flags; ++ ++ /* Take the rtmutex as a first step */ ++ __rt_spin_lock(m); ++ ++ /* Force readers into slow path */ ++ atomic_sub(READER_BIAS, &lock->readers); ++ ++ raw_spin_lock_irqsave(&m->wait_lock, flags); ++ ++ raw_spin_lock(&self->pi_lock); ++ self->saved_state = self->state; ++ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); ++ raw_spin_unlock(&self->pi_lock); ++ ++ for (;;) { ++ /* Have all readers left the critical region? */ ++ if (!atomic_read(&lock->readers)) { ++ atomic_set(&lock->readers, WRITER_BIAS); ++ raw_spin_lock(&self->pi_lock); ++ __set_current_state_no_track(self->saved_state); ++ self->saved_state = TASK_RUNNING; ++ raw_spin_unlock(&self->pi_lock); ++ raw_spin_unlock_irqrestore(&m->wait_lock, flags); ++ return; ++ } ++ ++ raw_spin_unlock_irqrestore(&m->wait_lock, flags); ++ ++ if (atomic_read(&lock->readers) != 0) ++ schedule(); ++ ++ raw_spin_lock_irqsave(&m->wait_lock, flags); ++ ++ raw_spin_lock(&self->pi_lock); ++ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); ++ raw_spin_unlock(&self->pi_lock); ++ } ++} ++ ++int __write_rt_trylock(struct rt_rw_lock *lock) ++{ ++ struct rt_mutex *m = &lock->rtmutex; ++ unsigned long flags; ++ ++ if (!__rt_mutex_trylock(m)) ++ return 0; ++ ++ atomic_sub(READER_BIAS, &lock->readers); ++ ++ raw_spin_lock_irqsave(&m->wait_lock, flags); ++ if (!atomic_read(&lock->readers)) { ++ atomic_set(&lock->readers, WRITER_BIAS); ++ raw_spin_unlock_irqrestore(&m->wait_lock, flags); ++ return 1; ++ } ++ __write_unlock_common(lock, 0, flags); ++ return 0; ++} ++ ++void __write_rt_unlock(struct rt_rw_lock *lock) ++{ ++ struct rt_mutex *m = &lock->rtmutex; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&m->wait_lock, flags); ++ __write_unlock_common(lock, WRITER_BIAS, flags); ++} ++ ++/* Map the reader biased implementation */ ++static inline int do_read_rt_trylock(rwlock_t *rwlock) ++{ ++ return __read_rt_trylock(rwlock); ++} ++ ++static inline int do_write_rt_trylock(rwlock_t *rwlock) ++{ ++ return __write_rt_trylock(rwlock); ++} ++ ++static inline void do_read_rt_lock(rwlock_t *rwlock) ++{ ++ __read_rt_lock(rwlock); ++} ++ ++static inline void do_write_rt_lock(rwlock_t *rwlock) ++{ ++ __write_rt_lock(rwlock); ++} ++ ++static inline void do_read_rt_unlock(rwlock_t *rwlock) ++{ ++ __read_rt_unlock(rwlock); ++} ++ ++static inline void do_write_rt_unlock(rwlock_t *rwlock) ++{ ++ __write_rt_unlock(rwlock); ++} ++ ++static inline void do_rwlock_rt_init(rwlock_t *rwlock, const char *name, ++ struct lock_class_key *key) ++{ ++ __rwlock_biased_rt_init(rwlock, name, key); ++} ++ ++int __lockfunc rt_read_can_lock(rwlock_t *rwlock) ++{ ++ return atomic_read(&rwlock->readers) < 0; ++} ++ ++int __lockfunc rt_write_can_lock(rwlock_t *rwlock) ++{ ++ return atomic_read(&rwlock->readers) == READER_BIAS; ++} ++ ++/* ++ * The common functions which get wrapped into the rwlock API. ++ */ ++int __lockfunc rt_read_trylock(rwlock_t *rwlock) ++{ ++ int ret; ++ ++ migrate_disable(); ++ ret = do_read_rt_trylock(rwlock); ++ if (ret) ++ rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_); ++ else ++ migrate_enable(); ++ return ret; ++} ++EXPORT_SYMBOL(rt_read_trylock); ++ ++int __lockfunc rt_write_trylock(rwlock_t *rwlock) ++{ ++ int ret; ++ ++ migrate_disable(); ++ ret = do_write_rt_trylock(rwlock); ++ if (ret) ++ rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); ++ else ++ migrate_enable(); ++ return ret; ++} ++EXPORT_SYMBOL(rt_write_trylock); ++ ++void __lockfunc rt_read_lock(rwlock_t *rwlock) ++{ ++ migrate_disable(); ++ rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_); ++ do_read_rt_lock(rwlock); ++} ++EXPORT_SYMBOL(rt_read_lock); ++ ++void __lockfunc rt_write_lock(rwlock_t *rwlock) ++{ ++ migrate_disable(); ++ rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); ++ do_write_rt_lock(rwlock); ++} ++EXPORT_SYMBOL(rt_write_lock); ++ ++void __lockfunc rt_read_unlock(rwlock_t *rwlock) ++{ ++ rwlock_release(&rwlock->dep_map, 1, _RET_IP_); ++ do_read_rt_unlock(rwlock); ++ migrate_enable(); ++} ++EXPORT_SYMBOL(rt_read_unlock); ++ ++void __lockfunc rt_write_unlock(rwlock_t *rwlock) ++{ ++ rwlock_release(&rwlock->dep_map, 1, _RET_IP_); ++ do_write_rt_unlock(rwlock); ++ migrate_enable(); ++} ++EXPORT_SYMBOL(rt_write_unlock); ++ ++void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key) ++{ ++ do_rwlock_rt_init(rwlock, name, key); ++} ++EXPORT_SYMBOL(__rt_rwlock_init); diff --git a/debian/patches/features/all/rt/rtmutex-add-rwsem-implementation-based-on-rtmutex.patch b/debian/patches/features/all/rt/rtmutex-add-rwsem-implementation-based-on-rtmutex.patch new file mode 100644 index 000000000..c07656a6a --- /dev/null +++ b/debian/patches/features/all/rt/rtmutex-add-rwsem-implementation-based-on-rtmutex.patch @@ -0,0 +1,392 @@ +From: Thomas Gleixner +Date: Thu, 12 Oct 2017 17:28:34 +0200 +Subject: rtmutex: add rwsem implementation based on rtmutex +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The RT specific R/W semaphore implementation restricts the number of readers +to one because a writer cannot block on multiple readers and inherit its +priority or budget. + +The single reader restricting is painful in various ways: + + - Performance bottleneck for multi-threaded applications in the page fault + path (mmap sem) + + - Progress blocker for drivers which are carefully crafted to avoid the + potential reader/writer deadlock in mainline. + +The analysis of the writer code pathes shows, that properly written RT tasks +should not take them. Syscalls like mmap(), file access which take mmap sem +write locked have unbound latencies which are completely unrelated to mmap +sem. Other R/W sem users like graphics drivers are not suitable for RT tasks +either. + +So there is little risk to hurt RT tasks when the RT rwsem implementation is +changed in the following way: + + - Allow concurrent readers + + - Make writers block until the last reader left the critical section. This + blocking is not subject to priority/budget inheritance. + + - Readers blocked on a writer inherit their priority/budget in the normal + way. + +There is a drawback with this scheme. R/W semaphores become writer unfair +though the applications which have triggered writer starvation (mostly on +mmap_sem) in the past are not really the typical workloads running on a RT +system. So while it's unlikely to hit writer starvation, it's possible. If +there are unexpected workloads on RT systems triggering it, we need to rethink +the approach. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/rwsem_rt.h | 67 +++++++++++ + kernel/locking/rwsem-rt.c | 269 ++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 336 insertions(+) + create mode 100644 include/linux/rwsem_rt.h + create mode 100644 kernel/locking/rwsem-rt.c + +--- /dev/null ++++ b/include/linux/rwsem_rt.h +@@ -0,0 +1,67 @@ ++#ifndef _LINUX_RWSEM_RT_H ++#define _LINUX_RWSEM_RT_H ++ ++#ifndef _LINUX_RWSEM_H ++#error "Include rwsem.h" ++#endif ++ ++#include ++#include ++ ++#define READER_BIAS (1U << 31) ++#define WRITER_BIAS (1U << 30) ++ ++struct rw_semaphore { ++ atomic_t readers; ++ struct rt_mutex rtmutex; ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ struct lockdep_map dep_map; ++#endif ++}; ++ ++#define __RWSEM_INITIALIZER(name) \ ++{ \ ++ .readers = ATOMIC_INIT(READER_BIAS), \ ++ .rtmutex = __RT_MUTEX_INITIALIZER(name.rtmutex), \ ++ RW_DEP_MAP_INIT(name) \ ++} ++ ++#define DECLARE_RWSEM(lockname) \ ++ struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname) ++ ++extern void __rwsem_init(struct rw_semaphore *rwsem, const char *name, ++ struct lock_class_key *key); ++ ++#define __init_rwsem(sem, name, key) \ ++do { \ ++ rt_mutex_init(&(sem)->rtmutex); \ ++ __rwsem_init((sem), (name), (key)); \ ++} while (0) ++ ++#define init_rwsem(sem) \ ++do { \ ++ static struct lock_class_key __key; \ ++ \ ++ __init_rwsem((sem), #sem, &__key); \ ++} while (0) ++ ++static inline int rwsem_is_locked(struct rw_semaphore *sem) ++{ ++ return atomic_read(&sem->readers) != READER_BIAS; ++} ++ ++static inline int rwsem_is_contended(struct rw_semaphore *sem) ++{ ++ return atomic_read(&sem->readers) > 0; ++} ++ ++extern void __down_read(struct rw_semaphore *sem); ++extern int __down_read_trylock(struct rw_semaphore *sem); ++extern void __down_write(struct rw_semaphore *sem); ++extern int __must_check __down_write_killable(struct rw_semaphore *sem); ++extern int __down_write_trylock(struct rw_semaphore *sem); ++extern void __up_read(struct rw_semaphore *sem); ++extern void __up_write(struct rw_semaphore *sem); ++extern void __downgrade_write(struct rw_semaphore *sem); ++ ++#endif +--- /dev/null ++++ b/kernel/locking/rwsem-rt.c +@@ -0,0 +1,269 @@ ++/* ++ */ ++#include ++#include ++#include ++#include ++ ++#include "rtmutex_common.h" ++ ++/* ++ * RT-specific reader/writer semaphores ++ * ++ * down_write() ++ * 1) Lock sem->rtmutex ++ * 2) Remove the reader BIAS to force readers into the slow path ++ * 3) Wait until all readers have left the critical region ++ * 4) Mark it write locked ++ * ++ * up_write() ++ * 1) Remove the write locked marker ++ * 2) Set the reader BIAS so readers can use the fast path again ++ * 3) Unlock sem->rtmutex to release blocked readers ++ * ++ * down_read() ++ * 1) Try fast path acquisition (reader BIAS is set) ++ * 2) Take sem->rtmutex.wait_lock which protects the writelocked flag ++ * 3) If !writelocked, acquire it for read ++ * 4) If writelocked, block on sem->rtmutex ++ * 5) unlock sem->rtmutex, goto 1) ++ * ++ * up_read() ++ * 1) Try fast path release (reader count != 1) ++ * 2) Wake the writer waiting in down_write()#3 ++ * ++ * down_read()#3 has the consequence, that rw semaphores on RT are not writer ++ * fair, but writers, which should be avoided in RT tasks (think mmap_sem), ++ * are subject to the rtmutex priority/DL inheritance mechanism. ++ * ++ * It's possible to make the rw semaphores writer fair by keeping a list of ++ * active readers. A blocked writer would force all newly incoming readers to ++ * block on the rtmutex, but the rtmutex would have to be proxy locked for one ++ * reader after the other. We can't use multi-reader inheritance because there ++ * is no way to support that with SCHED_DEADLINE. Implementing the one by one ++ * reader boosting/handover mechanism is a major surgery for a very dubious ++ * value. ++ * ++ * The risk of writer starvation is there, but the pathological use cases ++ * which trigger it are not necessarily the typical RT workloads. ++ */ ++ ++void __rwsem_init(struct rw_semaphore *sem, const char *name, ++ struct lock_class_key *key) ++{ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ /* ++ * Make sure we are not reinitializing a held semaphore: ++ */ ++ debug_check_no_locks_freed((void *)sem, sizeof(*sem)); ++ lockdep_init_map(&sem->dep_map, name, key, 0); ++#endif ++ atomic_set(&sem->readers, READER_BIAS); ++} ++EXPORT_SYMBOL(__rwsem_init); ++ ++int __down_read_trylock(struct rw_semaphore *sem) ++{ ++ int r, old; ++ ++ /* ++ * Increment reader count, if sem->readers < 0, i.e. READER_BIAS is ++ * set. ++ */ ++ for (r = atomic_read(&sem->readers); r < 0;) { ++ old = atomic_cmpxchg(&sem->readers, r, r + 1); ++ if (likely(old == r)) ++ return 1; ++ r = old; ++ } ++ return 0; ++} ++ ++void __sched __down_read(struct rw_semaphore *sem) ++{ ++ struct rt_mutex *m = &sem->rtmutex; ++ struct rt_mutex_waiter waiter; ++ ++ if (__down_read_trylock(sem)) ++ return; ++ ++ might_sleep(); ++ raw_spin_lock_irq(&m->wait_lock); ++ /* ++ * Allow readers as long as the writer has not completely ++ * acquired the semaphore for write. ++ */ ++ if (atomic_read(&sem->readers) != WRITER_BIAS) { ++ atomic_inc(&sem->readers); ++ raw_spin_unlock_irq(&m->wait_lock); ++ return; ++ } ++ ++ /* ++ * Call into the slow lock path with the rtmutex->wait_lock ++ * held, so this can't result in the following race: ++ * ++ * Reader1 Reader2 Writer ++ * down_read() ++ * down_write() ++ * rtmutex_lock(m) ++ * swait() ++ * down_read() ++ * unlock(m->wait_lock) ++ * up_read() ++ * swake() ++ * lock(m->wait_lock) ++ * sem->writelocked=true ++ * unlock(m->wait_lock) ++ * ++ * up_write() ++ * sem->writelocked=false ++ * rtmutex_unlock(m) ++ * down_read() ++ * down_write() ++ * rtmutex_lock(m) ++ * swait() ++ * rtmutex_lock(m) ++ * ++ * That would put Reader1 behind the writer waiting on ++ * Reader2 to call up_read() which might be unbound. ++ */ ++ rt_mutex_init_waiter(&waiter, false); ++ rt_mutex_slowlock_locked(m, TASK_UNINTERRUPTIBLE, NULL, ++ RT_MUTEX_MIN_CHAINWALK, ++ &waiter); ++ /* ++ * The slowlock() above is guaranteed to return with the rtmutex is ++ * now held, so there can't be a writer active. Increment the reader ++ * count and immediately drop the rtmutex again. ++ */ ++ atomic_inc(&sem->readers); ++ raw_spin_unlock_irq(&m->wait_lock); ++ __rt_mutex_unlock(m); ++ ++ debug_rt_mutex_free_waiter(&waiter); ++} ++ ++void __up_read(struct rw_semaphore *sem) ++{ ++ struct rt_mutex *m = &sem->rtmutex; ++ struct task_struct *tsk; ++ ++ /* ++ * sem->readers can only hit 0 when a writer is waiting for the ++ * active readers to leave the critical region. ++ */ ++ if (!atomic_dec_and_test(&sem->readers)) ++ return; ++ ++ might_sleep(); ++ raw_spin_lock_irq(&m->wait_lock); ++ /* ++ * Wake the writer, i.e. the rtmutex owner. It might release the ++ * rtmutex concurrently in the fast path (due to a signal), but to ++ * clean up the rwsem it needs to acquire m->wait_lock. The worst ++ * case which can happen is a spurious wakeup. ++ */ ++ tsk = rt_mutex_owner(m); ++ if (tsk) ++ wake_up_process(tsk); ++ ++ raw_spin_unlock_irq(&m->wait_lock); ++} ++ ++static void __up_write_unlock(struct rw_semaphore *sem, int bias, ++ unsigned long flags) ++{ ++ struct rt_mutex *m = &sem->rtmutex; ++ ++ atomic_add(READER_BIAS - bias, &sem->readers); ++ raw_spin_unlock_irqrestore(&m->wait_lock, flags); ++ __rt_mutex_unlock(m); ++} ++ ++static int __sched __down_write_common(struct rw_semaphore *sem, int state) ++{ ++ struct rt_mutex *m = &sem->rtmutex; ++ unsigned long flags; ++ ++ /* Take the rtmutex as a first step */ ++ if (__rt_mutex_lock_state(m, state)) ++ return -EINTR; ++ ++ /* Force readers into slow path */ ++ atomic_sub(READER_BIAS, &sem->readers); ++ might_sleep(); ++ ++ set_current_state(state); ++ for (;;) { ++ raw_spin_lock_irqsave(&m->wait_lock, flags); ++ /* Have all readers left the critical region? */ ++ if (!atomic_read(&sem->readers)) { ++ atomic_set(&sem->readers, WRITER_BIAS); ++ __set_current_state(TASK_RUNNING); ++ raw_spin_unlock_irqrestore(&m->wait_lock, flags); ++ return 0; ++ } ++ ++ if (signal_pending_state(state, current)) { ++ __set_current_state(TASK_RUNNING); ++ __up_write_unlock(sem, 0, flags); ++ return -EINTR; ++ } ++ raw_spin_unlock_irqrestore(&m->wait_lock, flags); ++ ++ if (atomic_read(&sem->readers) != 0) { ++ schedule(); ++ set_current_state(state); ++ } ++ } ++} ++ ++void __sched __down_write(struct rw_semaphore *sem) ++{ ++ __down_write_common(sem, TASK_UNINTERRUPTIBLE); ++} ++ ++int __sched __down_write_killable(struct rw_semaphore *sem) ++{ ++ return __down_write_common(sem, TASK_KILLABLE); ++} ++ ++int __down_write_trylock(struct rw_semaphore *sem) ++{ ++ struct rt_mutex *m = &sem->rtmutex; ++ unsigned long flags; ++ ++ if (!__rt_mutex_trylock(m)) ++ return 0; ++ ++ atomic_sub(READER_BIAS, &sem->readers); ++ ++ raw_spin_lock_irqsave(&m->wait_lock, flags); ++ if (!atomic_read(&sem->readers)) { ++ atomic_set(&sem->readers, WRITER_BIAS); ++ raw_spin_unlock_irqrestore(&m->wait_lock, flags); ++ return 1; ++ } ++ __up_write_unlock(sem, 0, flags); ++ return 0; ++} ++ ++void __up_write(struct rw_semaphore *sem) ++{ ++ struct rt_mutex *m = &sem->rtmutex; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&m->wait_lock, flags); ++ __up_write_unlock(sem, WRITER_BIAS, flags); ++} ++ ++void __downgrade_write(struct rw_semaphore *sem) ++{ ++ struct rt_mutex *m = &sem->rtmutex; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&m->wait_lock, flags); ++ /* Release it and account current as reader */ ++ __up_write_unlock(sem, WRITER_BIAS - 1, flags); ++} diff --git a/debian/patches/features/all/rt/rtmutex-add-sleeping-lock-implementation.patch b/debian/patches/features/all/rt/rtmutex-add-sleeping-lock-implementation.patch new file mode 100644 index 000000000..a80b185e2 --- /dev/null +++ b/debian/patches/features/all/rt/rtmutex-add-sleeping-lock-implementation.patch @@ -0,0 +1,1197 @@ +From: Thomas Gleixner +Date: Thu, 12 Oct 2017 17:11:19 +0200 +Subject: rtmutex: add sleeping lock implementation +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/kernel.h | 4 + include/linux/rtmutex.h | 21 + + include/linux/sched.h | 9 + include/linux/sched/wake_q.h | 27 ++ + include/linux/spinlock_rt.h | 159 +++++++++++++ + include/linux/spinlock_types_rt.h | 48 ++++ + kernel/fork.c | 1 + kernel/futex.c | 11 + kernel/locking/rtmutex.c | 449 ++++++++++++++++++++++++++++++++++---- + kernel/locking/rtmutex_common.h | 15 + + kernel/sched/core.c | 28 +- + 11 files changed, 713 insertions(+), 59 deletions(-) + create mode 100644 include/linux/spinlock_rt.h + create mode 100644 include/linux/spinlock_types_rt.h + +--- a/include/linux/kernel.h ++++ b/include/linux/kernel.h +@@ -225,6 +225,9 @@ extern int _cond_resched(void); + */ + # define might_sleep() \ + do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) ++ ++# define might_sleep_no_state_check() \ ++ do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) + # define sched_annotate_sleep() (current->task_state_change = 0) + #else + static inline void ___might_sleep(const char *file, int line, +@@ -232,6 +235,7 @@ extern int _cond_resched(void); + static inline void __might_sleep(const char *file, int line, + int preempt_offset) { } + # define might_sleep() do { might_resched(); } while (0) ++# define might_sleep_no_state_check() do { might_resched(); } while (0) + # define sched_annotate_sleep() do { } while (0) + #endif + +--- a/include/linux/rtmutex.h ++++ b/include/linux/rtmutex.h +@@ -14,11 +14,15 @@ + #define __LINUX_RT_MUTEX_H + + #include +-#include + #include ++#include + + extern int max_lock_depth; /* for sysctl */ + ++#ifdef CONFIG_DEBUG_MUTEXES ++#include ++#endif ++ + /** + * The rt_mutex structure + * +@@ -31,8 +35,8 @@ struct rt_mutex { + raw_spinlock_t wait_lock; + struct rb_root_cached waiters; + struct task_struct *owner; +-#ifdef CONFIG_DEBUG_RT_MUTEXES + int save_state; ++#ifdef CONFIG_DEBUG_RT_MUTEXES + const char *name, *file; + int line; + void *magic; +@@ -82,16 +86,23 @@ do { \ + #define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) + #endif + +-#define __RT_MUTEX_INITIALIZER(mutexname) \ +- { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ ++#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ ++ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ + , .waiters = RB_ROOT_CACHED \ + , .owner = NULL \ + __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ +- __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)} ++ __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) ++ ++#define __RT_MUTEX_INITIALIZER(mutexname) \ ++ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) } + + #define DEFINE_RT_MUTEX(mutexname) \ + struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname) + ++#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \ ++ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ ++ , .save_state = 1 } ++ + /** + * rt_mutex_is_locked - is the mutex locked + * @lock: the mutex to be queried +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -123,6 +123,11 @@ struct task_group; + smp_store_mb(current->state, (state_value)); \ + } while (0) + ++#define __set_current_state_no_track(state_value) \ ++ current->state = (state_value); ++#define set_current_state_no_track(state_value) \ ++ smp_store_mb(current->state, (state_value)); ++ + #else + /* + * set_current_state() includes a barrier so that the write of current->state +@@ -160,6 +165,9 @@ struct task_group; + */ + #define __set_current_state(state_value) do { current->state = (state_value); } while (0) + #define set_current_state(state_value) smp_store_mb(current->state, (state_value)) ++ ++#define __set_current_state_no_track(state_value) __set_current_state(state_value) ++#define set_current_state_no_track(state_value) set_current_state(state_value) + #endif + + /* Task command name length: */ +@@ -827,6 +835,7 @@ struct task_struct { + raw_spinlock_t pi_lock; + + struct wake_q_node wake_q; ++ struct wake_q_node wake_q_sleeper; + + #ifdef CONFIG_RT_MUTEXES + /* PI waiters blocked on a rt_mutex held by this task: */ +--- a/include/linux/sched/wake_q.h ++++ b/include/linux/sched/wake_q.h +@@ -47,8 +47,29 @@ static inline void wake_q_init(struct wa + head->lastp = &head->first; + } + +-extern void wake_q_add(struct wake_q_head *head, +- struct task_struct *task); +-extern void wake_up_q(struct wake_q_head *head); ++extern void __wake_q_add(struct wake_q_head *head, ++ struct task_struct *task, bool sleeper); ++static inline void wake_q_add(struct wake_q_head *head, ++ struct task_struct *task) ++{ ++ __wake_q_add(head, task, false); ++} ++ ++static inline void wake_q_add_sleeper(struct wake_q_head *head, ++ struct task_struct *task) ++{ ++ __wake_q_add(head, task, true); ++} ++ ++extern void __wake_up_q(struct wake_q_head *head, bool sleeper); ++static inline void wake_up_q(struct wake_q_head *head) ++{ ++ __wake_up_q(head, false); ++} ++ ++static inline void wake_up_q_sleeper(struct wake_q_head *head) ++{ ++ __wake_up_q(head, true); ++} + + #endif /* _LINUX_SCHED_WAKE_Q_H */ +--- /dev/null ++++ b/include/linux/spinlock_rt.h +@@ -0,0 +1,159 @@ ++#ifndef __LINUX_SPINLOCK_RT_H ++#define __LINUX_SPINLOCK_RT_H ++ ++#ifndef __LINUX_SPINLOCK_H ++#error Do not include directly. Use spinlock.h ++#endif ++ ++#include ++ ++extern void ++__rt_spin_lock_init(spinlock_t *lock, const char *name, struct lock_class_key *key); ++ ++#define spin_lock_init(slock) \ ++do { \ ++ static struct lock_class_key __key; \ ++ \ ++ rt_mutex_init(&(slock)->lock); \ ++ __rt_spin_lock_init(slock, #slock, &__key); \ ++} while (0) ++ ++extern void __lockfunc rt_spin_lock(spinlock_t *lock); ++extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock); ++extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass); ++extern void __lockfunc rt_spin_unlock(spinlock_t *lock); ++extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock); ++extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags); ++extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock); ++extern int __lockfunc rt_spin_trylock(spinlock_t *lock); ++extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock); ++ ++/* ++ * lockdep-less calls, for derived types like rwlock: ++ * (for trylock they can use rt_mutex_trylock() directly. ++ * Migrate disable handling must be done at the call site. ++ */ ++extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock); ++extern void __lockfunc __rt_spin_trylock(struct rt_mutex *lock); ++extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock); ++ ++#define spin_lock(lock) rt_spin_lock(lock) ++ ++#define spin_lock_bh(lock) \ ++ do { \ ++ local_bh_disable(); \ ++ rt_spin_lock(lock); \ ++ } while (0) ++ ++#define spin_lock_irq(lock) spin_lock(lock) ++ ++#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock)) ++ ++#define spin_trylock(lock) \ ++({ \ ++ int __locked; \ ++ __locked = spin_do_trylock(lock); \ ++ __locked; \ ++}) ++ ++#ifdef CONFIG_LOCKDEP ++# define spin_lock_nested(lock, subclass) \ ++ do { \ ++ rt_spin_lock_nested(lock, subclass); \ ++ } while (0) ++ ++#define spin_lock_bh_nested(lock, subclass) \ ++ do { \ ++ local_bh_disable(); \ ++ rt_spin_lock_nested(lock, subclass); \ ++ } while (0) ++ ++# define spin_lock_irqsave_nested(lock, flags, subclass) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ flags = 0; \ ++ rt_spin_lock_nested(lock, subclass); \ ++ } while (0) ++#else ++# define spin_lock_nested(lock, subclass) spin_lock(lock) ++# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(lock) ++ ++# define spin_lock_irqsave_nested(lock, flags, subclass) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ flags = 0; \ ++ spin_lock(lock); \ ++ } while (0) ++#endif ++ ++#define spin_lock_irqsave(lock, flags) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ flags = 0; \ ++ spin_lock(lock); \ ++ } while (0) ++ ++static inline unsigned long spin_lock_trace_flags(spinlock_t *lock) ++{ ++ unsigned long flags = 0; ++#ifdef CONFIG_TRACE_IRQFLAGS ++ flags = rt_spin_lock_trace_flags(lock); ++#else ++ spin_lock(lock); /* lock_local */ ++#endif ++ return flags; ++} ++ ++/* FIXME: we need rt_spin_lock_nest_lock */ ++#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0) ++ ++#define spin_unlock(lock) rt_spin_unlock(lock) ++ ++#define spin_unlock_bh(lock) \ ++ do { \ ++ rt_spin_unlock(lock); \ ++ local_bh_enable(); \ ++ } while (0) ++ ++#define spin_unlock_irq(lock) spin_unlock(lock) ++ ++#define spin_unlock_irqrestore(lock, flags) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ (void) flags; \ ++ spin_unlock(lock); \ ++ } while (0) ++ ++#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock)) ++#define spin_trylock_irq(lock) spin_trylock(lock) ++ ++#define spin_trylock_irqsave(lock, flags) \ ++ rt_spin_trylock_irqsave(lock, &(flags)) ++ ++#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock) ++ ++#ifdef CONFIG_GENERIC_LOCKBREAK ++# define spin_is_contended(lock) ((lock)->break_lock) ++#else ++# define spin_is_contended(lock) (((void)(lock), 0)) ++#endif ++ ++static inline int spin_can_lock(spinlock_t *lock) ++{ ++ return !rt_mutex_is_locked(&lock->lock); ++} ++ ++static inline int spin_is_locked(spinlock_t *lock) ++{ ++ return rt_mutex_is_locked(&lock->lock); ++} ++ ++static inline void assert_spin_locked(spinlock_t *lock) ++{ ++ BUG_ON(!spin_is_locked(lock)); ++} ++ ++#define atomic_dec_and_lock(atomic, lock) \ ++ atomic_dec_and_spin_lock(atomic, lock) ++ ++#endif +--- /dev/null ++++ b/include/linux/spinlock_types_rt.h +@@ -0,0 +1,48 @@ ++#ifndef __LINUX_SPINLOCK_TYPES_RT_H ++#define __LINUX_SPINLOCK_TYPES_RT_H ++ ++#ifndef __LINUX_SPINLOCK_TYPES_H ++#error "Do not include directly. Include spinlock_types.h instead" ++#endif ++ ++#include ++ ++/* ++ * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field: ++ */ ++typedef struct spinlock { ++ struct rt_mutex lock; ++ unsigned int break_lock; ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ struct lockdep_map dep_map; ++#endif ++} spinlock_t; ++ ++#ifdef CONFIG_DEBUG_RT_MUTEXES ++# define __RT_SPIN_INITIALIZER(name) \ ++ { \ ++ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ ++ .save_state = 1, \ ++ .file = __FILE__, \ ++ .line = __LINE__ , \ ++ } ++#else ++# define __RT_SPIN_INITIALIZER(name) \ ++ { \ ++ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ ++ .save_state = 1, \ ++ } ++#endif ++ ++/* ++.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock) ++*/ ++ ++#define __SPIN_LOCK_UNLOCKED(name) \ ++ { .lock = __RT_SPIN_INITIALIZER(name.lock), \ ++ SPIN_DEP_MAP_INIT(name) } ++ ++#define DEFINE_SPINLOCK(name) \ ++ spinlock_t name = __SPIN_LOCK_UNLOCKED(name) ++ ++#endif +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -600,6 +600,7 @@ static struct task_struct *dup_task_stru + tsk->splice_pipe = NULL; + tsk->task_frag.page = NULL; + tsk->wake_q.next = NULL; ++ tsk->wake_q_sleeper.next = NULL; + + account_kernel_stack(tsk, 1); + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1432,6 +1432,7 @@ static int wake_futex_pi(u32 __user *uad + struct task_struct *new_owner; + bool postunlock = false; + DEFINE_WAKE_Q(wake_q); ++ DEFINE_WAKE_Q(wake_sleeper_q); + int ret = 0; + + new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); +@@ -1493,13 +1494,13 @@ static int wake_futex_pi(u32 __user *uad + pi_state->owner = new_owner; + raw_spin_unlock(&new_owner->pi_lock); + +- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); +- ++ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, ++ &wake_sleeper_q); + out_unlock: + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + + if (postunlock) +- rt_mutex_postunlock(&wake_q); ++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); + + return ret; + } +@@ -2757,7 +2758,7 @@ static int futex_lock_pi(u32 __user *uad + goto no_block; + } + +- rt_mutex_init_waiter(&rt_waiter); ++ rt_mutex_init_waiter(&rt_waiter, false); + + /* + * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not +@@ -3129,7 +3130,7 @@ static int futex_wait_requeue_pi(u32 __u + * The waiter is allocated on our stack, manipulated by the requeue + * code while we sleep on uaddr. + */ +- rt_mutex_init_waiter(&rt_waiter); ++ rt_mutex_init_waiter(&rt_waiter, false); + + ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); + if (unlikely(ret != 0)) +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -7,6 +7,11 @@ + * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner + * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt + * Copyright (C) 2006 Esben Nielsen ++ * Adaptive Spinlocks: ++ * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, ++ * and Peter Morreale, ++ * Adaptive Spinlocks simplification: ++ * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt + * + * See Documentation/locking/rt-mutex-design.txt for details. + */ +@@ -234,7 +239,7 @@ static inline bool unlock_rt_mutex_safe( + * Only use with rt_mutex_waiter_{less,equal}() + */ + #define task_to_waiter(p) \ +- &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline } ++ &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline, .task = (p) } + + static inline int + rt_mutex_waiter_less(struct rt_mutex_waiter *left, +@@ -274,6 +279,27 @@ rt_mutex_waiter_equal(struct rt_mutex_wa + return 1; + } + ++#define STEAL_NORMAL 0 ++#define STEAL_LATERAL 1 ++ ++static inline int ++rt_mutex_steal(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, int mode) ++{ ++ struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock); ++ ++ if (waiter == top_waiter || rt_mutex_waiter_less(waiter, top_waiter)) ++ return 1; ++ ++ /* ++ * Note that RT tasks are excluded from lateral-steals ++ * to prevent the introduction of an unbounded latency. ++ */ ++ if (mode == STEAL_NORMAL || rt_task(waiter->task)) ++ return 0; ++ ++ return rt_mutex_waiter_equal(waiter, top_waiter); ++} ++ + static void + rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) + { +@@ -378,6 +404,14 @@ static bool rt_mutex_cond_detect_deadloc + return debug_rt_mutex_detect_deadlock(waiter, chwalk); + } + ++static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter) ++{ ++ if (waiter->savestate) ++ wake_up_lock_sleeper(waiter->task); ++ else ++ wake_up_process(waiter->task); ++} ++ + /* + * Max number of times we'll walk the boosting chain: + */ +@@ -703,13 +737,16 @@ static int rt_mutex_adjust_prio_chain(st + * follow here. This is the end of the chain we are walking. + */ + if (!rt_mutex_owner(lock)) { ++ struct rt_mutex_waiter *lock_top_waiter; ++ + /* + * If the requeue [7] above changed the top waiter, + * then we need to wake the new top waiter up to try + * to get the lock. + */ +- if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) +- wake_up_process(rt_mutex_top_waiter(lock)->task); ++ lock_top_waiter = rt_mutex_top_waiter(lock); ++ if (prerequeue_top_waiter != lock_top_waiter) ++ rt_mutex_wake_waiter(lock_top_waiter); + raw_spin_unlock_irq(&lock->wait_lock); + return 0; + } +@@ -811,9 +848,11 @@ static int rt_mutex_adjust_prio_chain(st + * @task: The task which wants to acquire the lock + * @waiter: The waiter that is queued to the lock's wait tree if the + * callsite called task_blocked_on_lock(), otherwise NULL ++ * @mode: Lock steal mode (STEAL_NORMAL, STEAL_LATERAL) + */ +-static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, +- struct rt_mutex_waiter *waiter) ++static int __try_to_take_rt_mutex(struct rt_mutex *lock, ++ struct task_struct *task, ++ struct rt_mutex_waiter *waiter, int mode) + { + lockdep_assert_held(&lock->wait_lock); + +@@ -849,12 +888,11 @@ static int try_to_take_rt_mutex(struct r + */ + if (waiter) { + /* +- * If waiter is not the highest priority waiter of +- * @lock, give up. ++ * If waiter is not the highest priority waiter of @lock, ++ * or its peer when lateral steal is allowed, give up. + */ +- if (waiter != rt_mutex_top_waiter(lock)) ++ if (!rt_mutex_steal(lock, waiter, mode)) + return 0; +- + /* + * We can acquire the lock. Remove the waiter from the + * lock waiters tree. +@@ -872,14 +910,12 @@ static int try_to_take_rt_mutex(struct r + */ + if (rt_mutex_has_waiters(lock)) { + /* +- * If @task->prio is greater than or equal to +- * the top waiter priority (kernel view), +- * @task lost. ++ * If @task->prio is greater than the top waiter ++ * priority (kernel view), or equal to it when a ++ * lateral steal is forbidden, @task lost. + */ +- if (!rt_mutex_waiter_less(task_to_waiter(task), +- rt_mutex_top_waiter(lock))) ++ if (!rt_mutex_steal(lock, task_to_waiter(task), mode)) + return 0; +- + /* + * The current top waiter stays enqueued. We + * don't have to change anything in the lock +@@ -926,6 +962,309 @@ static int try_to_take_rt_mutex(struct r + return 1; + } + ++#ifdef CONFIG_PREEMPT_RT_FULL ++/* ++ * preemptible spin_lock functions: ++ */ ++static inline void rt_spin_lock_fastlock(struct rt_mutex *lock, ++ void (*slowfn)(struct rt_mutex *lock)) ++{ ++ might_sleep_no_state_check(); ++ ++ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) ++ return; ++ else ++ slowfn(lock); ++} ++ ++static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock, ++ void (*slowfn)(struct rt_mutex *lock)) ++{ ++ if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) ++ return; ++ else ++ slowfn(lock); ++} ++#ifdef CONFIG_SMP ++/* ++ * Note that owner is a speculative pointer and dereferencing relies ++ * on rcu_read_lock() and the check against the lock owner. ++ */ ++static int adaptive_wait(struct rt_mutex *lock, ++ struct task_struct *owner) ++{ ++ int res = 0; ++ ++ rcu_read_lock(); ++ for (;;) { ++ if (owner != rt_mutex_owner(lock)) ++ break; ++ /* ++ * Ensure that owner->on_cpu is dereferenced _after_ ++ * checking the above to be valid. ++ */ ++ barrier(); ++ if (!owner->on_cpu) { ++ res = 1; ++ break; ++ } ++ cpu_relax(); ++ } ++ rcu_read_unlock(); ++ return res; ++} ++#else ++static int adaptive_wait(struct rt_mutex *lock, ++ struct task_struct *orig_owner) ++{ ++ return 1; ++} ++#endif ++ ++static int task_blocks_on_rt_mutex(struct rt_mutex *lock, ++ struct rt_mutex_waiter *waiter, ++ struct task_struct *task, ++ enum rtmutex_chainwalk chwalk); ++/* ++ * Slow path lock function spin_lock style: this variant is very ++ * careful not to miss any non-lock wakeups. ++ * ++ * We store the current state under p->pi_lock in p->saved_state and ++ * the try_to_wake_up() code handles this accordingly. ++ */ ++void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock, ++ struct rt_mutex_waiter *waiter, ++ unsigned long flags) ++{ ++ struct task_struct *lock_owner, *self = current; ++ struct rt_mutex_waiter *top_waiter; ++ int ret; ++ ++ if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) ++ return; ++ ++ BUG_ON(rt_mutex_owner(lock) == self); ++ ++ /* ++ * We save whatever state the task is in and we'll restore it ++ * after acquiring the lock taking real wakeups into account ++ * as well. We are serialized via pi_lock against wakeups. See ++ * try_to_wake_up(). ++ */ ++ raw_spin_lock(&self->pi_lock); ++ self->saved_state = self->state; ++ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); ++ raw_spin_unlock(&self->pi_lock); ++ ++ ret = task_blocks_on_rt_mutex(lock, waiter, self, RT_MUTEX_MIN_CHAINWALK); ++ BUG_ON(ret); ++ ++ for (;;) { ++ /* Try to acquire the lock again. */ ++ if (__try_to_take_rt_mutex(lock, self, waiter, STEAL_LATERAL)) ++ break; ++ ++ top_waiter = rt_mutex_top_waiter(lock); ++ lock_owner = rt_mutex_owner(lock); ++ ++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); ++ ++ debug_rt_mutex_print_deadlock(waiter); ++ ++ if (top_waiter != waiter || adaptive_wait(lock, lock_owner)) ++ schedule(); ++ ++ raw_spin_lock_irqsave(&lock->wait_lock, flags); ++ ++ raw_spin_lock(&self->pi_lock); ++ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); ++ raw_spin_unlock(&self->pi_lock); ++ } ++ ++ /* ++ * Restore the task state to current->saved_state. We set it ++ * to the original state above and the try_to_wake_up() code ++ * has possibly updated it when a real (non-rtmutex) wakeup ++ * happened while we were blocked. Clear saved_state so ++ * try_to_wakeup() does not get confused. ++ */ ++ raw_spin_lock(&self->pi_lock); ++ __set_current_state_no_track(self->saved_state); ++ self->saved_state = TASK_RUNNING; ++ raw_spin_unlock(&self->pi_lock); ++ ++ /* ++ * try_to_take_rt_mutex() sets the waiter bit ++ * unconditionally. We might have to fix that up: ++ */ ++ fixup_rt_mutex_waiters(lock); ++ ++ BUG_ON(rt_mutex_has_waiters(lock) && waiter == rt_mutex_top_waiter(lock)); ++ BUG_ON(!RB_EMPTY_NODE(&waiter->tree_entry)); ++} ++ ++static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock) ++{ ++ struct rt_mutex_waiter waiter; ++ unsigned long flags; ++ ++ rt_mutex_init_waiter(&waiter, true); ++ ++ raw_spin_lock_irqsave(&lock->wait_lock, flags); ++ rt_spin_lock_slowlock_locked(lock, &waiter, flags); ++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); ++ debug_rt_mutex_free_waiter(&waiter); ++} ++ ++static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, ++ struct wake_q_head *wake_q, ++ struct wake_q_head *wq_sleeper); ++/* ++ * Slow path to release a rt_mutex spin_lock style ++ */ ++void __sched rt_spin_lock_slowunlock(struct rt_mutex *lock) ++{ ++ unsigned long flags; ++ DEFINE_WAKE_Q(wake_q); ++ DEFINE_WAKE_Q(wake_sleeper_q); ++ bool postunlock; ++ ++ raw_spin_lock_irqsave(&lock->wait_lock, flags); ++ postunlock = __rt_mutex_unlock_common(lock, &wake_q, &wake_sleeper_q); ++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); ++ ++ if (postunlock) ++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); ++} ++ ++void __lockfunc rt_spin_lock(spinlock_t *lock) ++{ ++ migrate_disable(); ++ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); ++ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); ++} ++EXPORT_SYMBOL(rt_spin_lock); ++ ++void __lockfunc __rt_spin_lock(struct rt_mutex *lock) ++{ ++ rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock); ++} ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass) ++{ ++ migrate_disable(); ++ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); ++ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); ++} ++EXPORT_SYMBOL(rt_spin_lock_nested); ++#endif ++ ++void __lockfunc rt_spin_unlock(spinlock_t *lock) ++{ ++ /* NOTE: we always pass in '1' for nested, for simplicity */ ++ spin_release(&lock->dep_map, 1, _RET_IP_); ++ rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); ++ migrate_enable(); ++} ++EXPORT_SYMBOL(rt_spin_unlock); ++ ++void __lockfunc __rt_spin_unlock(struct rt_mutex *lock) ++{ ++ rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock); ++} ++EXPORT_SYMBOL(__rt_spin_unlock); ++ ++/* ++ * Wait for the lock to get unlocked: instead of polling for an unlock ++ * (like raw spinlocks do), we lock and unlock, to force the kernel to ++ * schedule if there's contention: ++ */ ++void __lockfunc rt_spin_unlock_wait(spinlock_t *lock) ++{ ++ spin_lock(lock); ++ spin_unlock(lock); ++} ++EXPORT_SYMBOL(rt_spin_unlock_wait); ++ ++int __lockfunc rt_spin_trylock(spinlock_t *lock) ++{ ++ int ret; ++ ++ migrate_disable(); ++ ret = __rt_mutex_trylock(&lock->lock); ++ if (ret) ++ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); ++ else ++ migrate_enable(); ++ return ret; ++} ++EXPORT_SYMBOL(rt_spin_trylock); ++ ++int __lockfunc rt_spin_trylock_bh(spinlock_t *lock) ++{ ++ int ret; ++ ++ local_bh_disable(); ++ ret = __rt_mutex_trylock(&lock->lock); ++ if (ret) { ++ migrate_disable(); ++ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); ++ } else ++ local_bh_enable(); ++ return ret; ++} ++EXPORT_SYMBOL(rt_spin_trylock_bh); ++ ++int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags) ++{ ++ int ret; ++ ++ *flags = 0; ++ ret = __rt_mutex_trylock(&lock->lock); ++ if (ret) { ++ migrate_disable(); ++ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); ++ } ++ return ret; ++} ++EXPORT_SYMBOL(rt_spin_trylock_irqsave); ++ ++int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock) ++{ ++ /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ ++ if (atomic_add_unless(atomic, -1, 1)) ++ return 0; ++ rt_spin_lock(lock); ++ if (atomic_dec_and_test(atomic)) ++ return 1; ++ rt_spin_unlock(lock); ++ return 0; ++} ++EXPORT_SYMBOL(atomic_dec_and_spin_lock); ++ ++void ++__rt_spin_lock_init(spinlock_t *lock, const char *name, struct lock_class_key *key) ++{ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ /* ++ * Make sure we are not reinitializing a held lock: ++ */ ++ debug_check_no_locks_freed((void *)lock, sizeof(*lock)); ++ lockdep_init_map(&lock->dep_map, name, key, 0); ++#endif ++} ++EXPORT_SYMBOL(__rt_spin_lock_init); ++ ++#endif /* PREEMPT_RT_FULL */ ++ ++static inline int ++try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, ++ struct rt_mutex_waiter *waiter) ++{ ++ return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL); ++} ++ + /* + * Task blocks on lock. + * +@@ -1039,6 +1378,7 @@ static int task_blocks_on_rt_mutex(struc + * Called with lock->wait_lock held and interrupts disabled. + */ + static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, ++ struct wake_q_head *wake_sleeper_q, + struct rt_mutex *lock) + { + struct rt_mutex_waiter *waiter; +@@ -1078,7 +1418,10 @@ static void mark_wakeup_next_waiter(stru + * Pairs with preempt_enable() in rt_mutex_postunlock(); + */ + preempt_disable(); +- wake_q_add(wake_q, waiter->task); ++ if (waiter->savestate) ++ wake_q_add_sleeper(wake_sleeper_q, waiter->task); ++ else ++ wake_q_add(wake_q, waiter->task); + raw_spin_unlock(¤t->pi_lock); + } + +@@ -1162,21 +1505,22 @@ void rt_mutex_adjust_pi(struct task_stru + return; + } + next_lock = waiter->lock; +- raw_spin_unlock_irqrestore(&task->pi_lock, flags); + + /* gets dropped in rt_mutex_adjust_prio_chain()! */ + get_task_struct(task); + ++ raw_spin_unlock_irqrestore(&task->pi_lock, flags); + rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, + next_lock, NULL, task); + } + +-void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) ++void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate) + { + debug_rt_mutex_init_waiter(waiter); + RB_CLEAR_NODE(&waiter->pi_tree_entry); + RB_CLEAR_NODE(&waiter->tree_entry); + waiter->task = NULL; ++ waiter->savestate = savestate; + } + + /** +@@ -1295,7 +1639,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, + unsigned long flags; + int ret = 0; + +- rt_mutex_init_waiter(&waiter); ++ rt_mutex_init_waiter(&waiter, false); + + /* + * Technically we could use raw_spin_[un]lock_irq() here, but this can +@@ -1361,7 +1705,8 @@ static inline int rt_mutex_slowtrylock(s + * Return whether the current task needs to call rt_mutex_postunlock(). + */ + static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, +- struct wake_q_head *wake_q) ++ struct wake_q_head *wake_q, ++ struct wake_q_head *wake_sleeper_q) + { + unsigned long flags; + +@@ -1415,7 +1760,7 @@ static bool __sched rt_mutex_slowunlock( + * + * Queue the next waiter for wakeup once we release the wait_lock. + */ +- mark_wakeup_next_waiter(wake_q, lock); ++ mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock); + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + + return true; /* call rt_mutex_postunlock() */ +@@ -1467,9 +1812,11 @@ rt_mutex_fasttrylock(struct rt_mutex *lo + /* + * Performs the wakeup of the the top-waiter and re-enables preemption. + */ +-void rt_mutex_postunlock(struct wake_q_head *wake_q) ++void rt_mutex_postunlock(struct wake_q_head *wake_q, ++ struct wake_q_head *wake_sleeper_q) + { + wake_up_q(wake_q); ++ wake_up_q_sleeper(wake_sleeper_q); + + /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ + preempt_enable(); +@@ -1478,15 +1825,17 @@ void rt_mutex_postunlock(struct wake_q_h + static inline void + rt_mutex_fastunlock(struct rt_mutex *lock, + bool (*slowfn)(struct rt_mutex *lock, +- struct wake_q_head *wqh)) ++ struct wake_q_head *wqh, ++ struct wake_q_head *wq_sleeper)) + { + DEFINE_WAKE_Q(wake_q); ++ DEFINE_WAKE_Q(wake_sleeper_q); + + if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) + return; + +- if (slowfn(lock, &wake_q)) +- rt_mutex_postunlock(&wake_q); ++ if (slowfn(lock, &wake_q, &wake_sleeper_q)) ++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); + } + + int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state) +@@ -1641,16 +1990,13 @@ void __sched __rt_mutex_unlock(struct rt + void __sched rt_mutex_unlock(struct rt_mutex *lock) + { + mutex_release(&lock->dep_map, 1, _RET_IP_); +- rt_mutex_fastunlock(lock, rt_mutex_slowunlock); ++ __rt_mutex_unlock(lock); + } + EXPORT_SYMBOL_GPL(rt_mutex_unlock); + +-/** +- * Futex variant, that since futex variants do not use the fast-path, can be +- * simple and will not need to retry. +- */ +-bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, +- struct wake_q_head *wake_q) ++static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, ++ struct wake_q_head *wake_q, ++ struct wake_q_head *wq_sleeper) + { + lockdep_assert_held(&lock->wait_lock); + +@@ -1667,22 +2013,34 @@ bool __sched __rt_mutex_futex_unlock(str + * avoid inversion prior to the wakeup. preempt_disable() + * therein pairs with rt_mutex_postunlock(). + */ +- mark_wakeup_next_waiter(wake_q, lock); ++ mark_wakeup_next_waiter(wake_q, wq_sleeper, lock); + + return true; /* call postunlock() */ + } + ++/** ++ * Futex variant, that since futex variants do not use the fast-path, can be ++ * simple and will not need to retry. ++ */ ++bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, ++ struct wake_q_head *wake_q, ++ struct wake_q_head *wq_sleeper) ++{ ++ return __rt_mutex_unlock_common(lock, wake_q, wq_sleeper); ++} ++ + void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) + { + DEFINE_WAKE_Q(wake_q); ++ DEFINE_WAKE_Q(wake_sleeper_q); + bool postunlock; + + raw_spin_lock_irq(&lock->wait_lock); +- postunlock = __rt_mutex_futex_unlock(lock, &wake_q); ++ postunlock = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q); + raw_spin_unlock_irq(&lock->wait_lock); + + if (postunlock) +- rt_mutex_postunlock(&wake_q); ++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); + } + + /** +@@ -1721,7 +2079,7 @@ void __rt_mutex_init(struct rt_mutex *lo + if (name && key) + debug_rt_mutex_init(lock, name, key); + } +-EXPORT_SYMBOL_GPL(__rt_mutex_init); ++EXPORT_SYMBOL(__rt_mutex_init); + + /** + * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a +@@ -1890,6 +2248,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m + struct hrtimer_sleeper *to, + struct rt_mutex_waiter *waiter) + { ++ struct task_struct *tsk = current; + int ret; + + raw_spin_lock_irq(&lock->wait_lock); +@@ -1901,6 +2260,24 @@ int rt_mutex_wait_proxy_lock(struct rt_m + * have to fix that up. + */ + fixup_rt_mutex_waiters(lock); ++ /* ++ * RT has a problem here when the wait got interrupted by a timeout ++ * or a signal. task->pi_blocked_on is still set. The task must ++ * acquire the hash bucket lock when returning from this function. ++ * ++ * If the hash bucket lock is contended then the ++ * BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in ++ * task_blocks_on_rt_mutex() will trigger. This can be avoided by ++ * clearing task->pi_blocked_on which removes the task from the ++ * boosting chain of the rtmutex. That's correct because the task ++ * is not longer blocked on it. ++ */ ++ if (ret) { ++ raw_spin_lock(&tsk->pi_lock); ++ tsk->pi_blocked_on = NULL; ++ raw_spin_unlock(&tsk->pi_lock); ++ } ++ + raw_spin_unlock_irq(&lock->wait_lock); + + return ret; +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -15,6 +15,7 @@ + + #include + #include ++#include + + /* + * This is the control structure for tasks blocked on a rt_mutex, +@@ -29,6 +30,7 @@ struct rt_mutex_waiter { + struct rb_node pi_tree_entry; + struct task_struct *task; + struct rt_mutex *lock; ++ bool savestate; + #ifdef CONFIG_DEBUG_RT_MUTEXES + unsigned long ip; + struct pid *deadlock_task_pid; +@@ -137,7 +139,7 @@ extern void rt_mutex_init_proxy_locked(s + struct task_struct *proxy_owner); + extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, + struct task_struct *proxy_owner); +-extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); ++extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savetate); + extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter, + struct task_struct *task); +@@ -154,9 +156,12 @@ extern int rt_mutex_futex_trylock(struct + + extern void rt_mutex_futex_unlock(struct rt_mutex *lock); + extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, +- struct wake_q_head *wqh); ++ struct wake_q_head *wqh, ++ struct wake_q_head *wq_sleeper); ++ ++extern void rt_mutex_postunlock(struct wake_q_head *wake_q, ++ struct wake_q_head *wake_sleeper_q); + +-extern void rt_mutex_postunlock(struct wake_q_head *wake_q); + /* RW semaphore special interface */ + + extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state); +@@ -166,6 +171,10 @@ int __sched rt_mutex_slowlock_locked(str + struct hrtimer_sleeper *timeout, + enum rtmutex_chainwalk chwalk, + struct rt_mutex_waiter *waiter); ++void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock, ++ struct rt_mutex_waiter *waiter, ++ unsigned long flags); ++void __sched rt_spin_lock_slowunlock(struct rt_mutex *lock); + + #ifdef CONFIG_DEBUG_RT_MUTEXES + # include "rtmutex-debug.h" +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -427,9 +427,15 @@ static bool set_nr_if_polling(struct tas + #endif + #endif + +-void wake_q_add(struct wake_q_head *head, struct task_struct *task) ++void __wake_q_add(struct wake_q_head *head, struct task_struct *task, ++ bool sleeper) + { +- struct wake_q_node *node = &task->wake_q; ++ struct wake_q_node *node; ++ ++ if (sleeper) ++ node = &task->wake_q_sleeper; ++ else ++ node = &task->wake_q; + + /* + * Atomically grab the task, if ->wake_q is !nil already it means +@@ -451,24 +457,32 @@ void wake_q_add(struct wake_q_head *head + head->lastp = &node->next; + } + +-void wake_up_q(struct wake_q_head *head) ++void __wake_up_q(struct wake_q_head *head, bool sleeper) + { + struct wake_q_node *node = head->first; + + while (node != WAKE_Q_TAIL) { + struct task_struct *task; + +- task = container_of(node, struct task_struct, wake_q); ++ if (sleeper) ++ task = container_of(node, struct task_struct, wake_q_sleeper); ++ else ++ task = container_of(node, struct task_struct, wake_q); + BUG_ON(!task); + /* Task can safely be re-inserted now: */ + node = node->next; +- task->wake_q.next = NULL; +- ++ if (sleeper) ++ task->wake_q_sleeper.next = NULL; ++ else ++ task->wake_q.next = NULL; + /* + * wake_up_process() implies a wmb() to pair with the queueing + * in wake_q_add() so as not to miss wakeups. + */ +- wake_up_process(task); ++ if (sleeper) ++ wake_up_lock_sleeper(task); ++ else ++ wake_up_process(task); + put_task_struct(task); + } + } diff --git a/debian/patches/features/all/rt/rtmutex-add-a-first-shot-of-ww_mutex.patch b/debian/patches/features/all/rt/rtmutex-add-ww_mutex-addon-for-mutex-rt.patch similarity index 66% rename from debian/patches/features/all/rt/rtmutex-add-a-first-shot-of-ww_mutex.patch rename to debian/patches/features/all/rt/rtmutex-add-ww_mutex-addon-for-mutex-rt.patch index 64826419e..f8c1d2c75 100644 --- a/debian/patches/features/all/rt/rtmutex-add-a-first-shot-of-ww_mutex.patch +++ b/debian/patches/features/all/rt/rtmutex-add-ww_mutex-addon-for-mutex-rt.patch @@ -1,30 +1,14 @@ -From: Sebastian Andrzej Siewior -Date: Mon, 28 Oct 2013 09:36:37 +0100 -Subject: rtmutex: Add RT aware ww locks -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +From: Sebastian Andrzej Siewior +Date: Thu, 12 Oct 2017 17:34:38 +0200 +Subject: rtmutex: add ww_mutex addon for mutex-rt +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz -lockdep says: -| -------------------------------------------------------------------------- -| | Wound/wait tests | -| --------------------- -| ww api failures: ok | ok | ok | -| ww contexts mixing: ok | ok | -| finishing ww context: ok | ok | ok | ok | -| locking mismatches: ok | ok | ok | -| EDEADLK handling: ok | ok | ok | ok | ok | ok | ok | ok | ok | ok | -| spinlock nest unlocked: ok | -| ----------------------------------------------------- -| |block | try |context| -| ----------------------------------------------------- -| context: ok | ok | ok | -| try: ok | ok | ok | -| block: ok | ok | ok | -| spinlock: ok | ok | ok | - -Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Sebastian Andrzej Siewior --- - kernel/locking/rtmutex.c | 273 ++++++++++++++++++++++++++++++++++++++++++----- - 1 file changed, 247 insertions(+), 26 deletions(-) + kernel/locking/rtmutex.c | 264 ++++++++++++++++++++++++++++++++++++++-- + kernel/locking/rtmutex_common.h | 2 + kernel/locking/rwsem-rt.c | 2 + 3 files changed, 257 insertions(+), 11 deletions(-) --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -36,18 +20,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include "rtmutex_common.h" -@@ -1285,8 +1286,8 @@ int atomic_dec_and_spin_lock(atomic_t *a - } - EXPORT_SYMBOL(atomic_dec_and_spin_lock); - -- void --__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key) -+void -+__rt_spin_lock_init(spinlock_t *lock, const char *name, struct lock_class_key *key) - { - #ifdef CONFIG_DEBUG_LOCK_ALLOC - /* -@@ -1300,6 +1301,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init); +@@ -1258,6 +1259,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init); #endif /* PREEMPT_RT_FULL */ @@ -88,7 +61,7 @@ Signed-off-by: Sebastian Andrzej Siewior static inline int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, struct rt_mutex_waiter *waiter) -@@ -1580,7 +1615,8 @@ void rt_mutex_init_waiter(struct rt_mute +@@ -1536,7 +1571,8 @@ void rt_mutex_init_waiter(struct rt_mute static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, @@ -98,7 +71,7 @@ Signed-off-by: Sebastian Andrzej Siewior { int ret = 0; -@@ -1598,6 +1634,12 @@ static int __sched +@@ -1554,6 +1590,12 @@ static int __sched break; } @@ -111,7 +84,7 @@ Signed-off-by: Sebastian Andrzej Siewior raw_spin_unlock_irq(&lock->wait_lock); debug_rt_mutex_print_deadlock(waiter); -@@ -1632,13 +1674,91 @@ static void rt_mutex_handle_deadlock(int +@@ -1588,16 +1630,106 @@ static void rt_mutex_handle_deadlock(int } } @@ -173,14 +146,13 @@ Signed-off-by: Sebastian Andrzej Siewior + * Give any possible sleeping processes the chance to wake up, + * so they can recheck if they have to back off. + */ -+ rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters, ++ rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters.rb_root, + tree_entry) { + /* XXX debug rt mutex waiter wakeup */ + + BUG_ON(waiter->lock != lock); + rt_mutex_wake_waiter(waiter); + } -+ +} + +#else @@ -192,21 +164,13 @@ Signed-off-by: Sebastian Andrzej Siewior +} +#endif + - /* - * Slow path lock function: - */ - static int __sched - rt_mutex_slowlock(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, -- enum rtmutex_chainwalk chwalk) -+ enum rtmutex_chainwalk chwalk, -+ struct ww_acquire_ctx *ww_ctx) + int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, + enum rtmutex_chainwalk chwalk, ++ struct ww_acquire_ctx *ww_ctx, + struct rt_mutex_waiter *waiter) { - struct rt_mutex_waiter waiter; - unsigned long flags; -@@ -1656,8 +1776,20 @@ rt_mutex_slowlock(struct rt_mutex *lock, - */ - raw_spin_lock_irqsave(&lock->wait_lock, flags); + int ret; +#ifdef CONFIG_PREEMPT_RT_FULL + if (ww_ctx) { @@ -219,39 +183,61 @@ Signed-off-by: Sebastian Andrzej Siewior +#endif + /* Try to acquire the lock again: */ - if (try_to_take_rt_mutex(lock, current, NULL)) { +- if (try_to_take_rt_mutex(lock, current, NULL)) ++ if (try_to_take_rt_mutex(lock, current, NULL)) { + if (ww_ctx) + ww_mutex_account_lock(lock, ww_ctx); - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); return 0; - } -@@ -1672,13 +1804,23 @@ rt_mutex_slowlock(struct rt_mutex *lock, ++ } - if (likely(!ret)) + set_current_state(state); + +@@ -1609,7 +1741,12 @@ int __sched rt_mutex_slowlock_locked(str + + if (likely(!ret)) { /* sleep on the mutex */ -- ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); -+ ret = __rt_mutex_slowlock(lock, state, timeout, &waiter, +- ret = __rt_mutex_slowlock(lock, state, timeout, waiter); ++ ret = __rt_mutex_slowlock(lock, state, timeout, waiter, + ww_ctx); -+ else if (ww_ctx) { ++ } else if (ww_ctx) { + /* ww_mutex received EDEADLK, let it become EALREADY */ + ret = __mutex_lock_check_stamp(lock, ww_ctx); + BUG_ON(!ret); -+ } + } if (unlikely(ret)) { - __set_current_state(TASK_RUNNING); +@@ -1617,6 +1754,10 @@ int __sched rt_mutex_slowlock_locked(str if (rt_mutex_has_waiters(lock)) - remove_waiter(lock, &waiter); -- rt_mutex_handle_deadlock(ret, chwalk, &waiter); -+ /* ww_mutex want to report EDEADLK/EALREADY, let them */ + remove_waiter(lock, waiter); + /* ww_mutex want to report EDEADLK/EALREADY, let them */ + if (!ww_ctx) -+ rt_mutex_handle_deadlock(ret, chwalk, &waiter); ++ rt_mutex_handle_deadlock(ret, chwalk, waiter); + } else if (ww_ctx) { + ww_mutex_account_lock(lock, ww_ctx); } /* -@@ -1808,29 +1950,33 @@ static bool __sched rt_mutex_slowunlock( +@@ -1633,7 +1774,8 @@ int __sched rt_mutex_slowlock_locked(str + static int __sched + rt_mutex_slowlock(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, +- enum rtmutex_chainwalk chwalk) ++ enum rtmutex_chainwalk chwalk, ++ struct ww_acquire_ctx *ww_ctx) + { + struct rt_mutex_waiter waiter; + unsigned long flags; +@@ -1651,7 +1793,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, + */ + raw_spin_lock_irqsave(&lock->wait_lock, flags); + +- ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, &waiter); ++ ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, ww_ctx, ++ &waiter); + + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + +@@ -1774,29 +1917,33 @@ static bool __sched rt_mutex_slowunlock( */ static inline int rt_mutex_fastlock(struct rt_mutex *lock, int state, @@ -289,42 +275,15 @@ Signed-off-by: Sebastian Andrzej Siewior } static inline int -@@ -1881,7 +2027,7 @@ void __sched rt_mutex_lock(struct rt_mut - { - might_sleep(); - -- rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); -+ rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, NULL, rt_mutex_slowlock); - } - EXPORT_SYMBOL_GPL(rt_mutex_lock); - -@@ -1898,7 +2044,7 @@ int __sched rt_mutex_lock_interruptible( - { - might_sleep(); - -- return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock); -+ return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, NULL, rt_mutex_slowlock); - } - EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); - -@@ -1925,7 +2071,7 @@ int __sched rt_mutex_lock_killable(struc - { - might_sleep(); - -- return rt_mutex_fastlock(lock, TASK_KILLABLE, rt_mutex_slowlock); -+ return rt_mutex_fastlock(lock, TASK_KILLABLE, NULL, rt_mutex_slowlock); - } - EXPORT_SYMBOL_GPL(rt_mutex_lock_killable); - -@@ -1949,6 +2095,7 @@ rt_mutex_timed_lock(struct rt_mutex *loc - - return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, +@@ -1934,6 +2081,7 @@ rt_mutex_timed_lock(struct rt_mutex *loc + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); + ret = rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, RT_MUTEX_MIN_CHAINWALK, + NULL, rt_mutex_slowlock); - } - EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); -@@ -2247,7 +2394,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m + if (ret) + mutex_release(&lock->dep_map, 1, _RET_IP_); +@@ -2254,7 +2402,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m raw_spin_lock_irq(&lock->wait_lock); /* sleep on the mutex */ set_current_state(TASK_INTERRUPTIBLE); @@ -333,10 +292,11 @@ Signed-off-by: Sebastian Andrzej Siewior /* * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might * have to fix that up. -@@ -2314,24 +2461,98 @@ bool rt_mutex_cleanup_proxy_lock(struct +@@ -2338,3 +2486,99 @@ bool rt_mutex_cleanup_proxy_lock(struct + return cleanup; } - ++ +static inline int +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ @@ -363,16 +323,10 @@ Signed-off-by: Sebastian Andrzej Siewior + return 0; +} + - #ifdef CONFIG_PREEMPT_RT_FULL --struct ww_mutex { --}; --struct ww_acquire_ctx { --}; --int __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx) ++#ifdef CONFIG_PREEMPT_RT_FULL +int __sched +ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) - { -- BUG(); ++{ + int ret; + + might_sleep(); @@ -387,15 +341,12 @@ Signed-off-by: Sebastian Andrzej Siewior + return ww_mutex_deadlock_injection(lock, ctx); + + return ret; - } --EXPORT_SYMBOL_GPL(__ww_mutex_lock); --int __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx) ++} +EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); + +int __sched +ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) - { -- BUG(); ++{ + int ret; + + might_sleep(); @@ -410,13 +361,11 @@ Signed-off-by: Sebastian Andrzej Siewior + return ww_mutex_deadlock_injection(lock, ctx); + + return ret; - } --EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible); ++} +EXPORT_SYMBOL_GPL(ww_mutex_lock); + - void __sched ww_mutex_unlock(struct ww_mutex *lock) - { -- BUG(); ++void __sched ww_mutex_unlock(struct ww_mutex *lock) ++{ + int nest = !!lock->ctx; + + /* @@ -433,14 +382,42 @@ Signed-off-by: Sebastian Andrzej Siewior + } + + mutex_release(&lock->base.dep_map, nest, _RET_IP_); -+ rt_mutex_unlock(&lock->base.lock); ++ __rt_mutex_unlock(&lock->base.lock); +} +EXPORT_SYMBOL(ww_mutex_unlock); + +int __rt_mutex_owner_current(struct rt_mutex *lock) +{ + return rt_mutex_owner(lock) == current; - } --EXPORT_SYMBOL_GPL(ww_mutex_unlock); ++} +EXPORT_SYMBOL(__rt_mutex_owner_current); - #endif ++#endif +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -163,6 +163,7 @@ extern void rt_mutex_postunlock(struct w + struct wake_q_head *wake_sleeper_q); + + /* RW semaphore special interface */ ++struct ww_acquire_ctx; + + extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state); + extern int __rt_mutex_trylock(struct rt_mutex *lock); +@@ -170,6 +171,7 @@ extern void __rt_mutex_unlock(struct rt_ + int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, + enum rtmutex_chainwalk chwalk, ++ struct ww_acquire_ctx *ww_ctx, + struct rt_mutex_waiter *waiter); + void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter, +--- a/kernel/locking/rwsem-rt.c ++++ b/kernel/locking/rwsem-rt.c +@@ -130,7 +130,7 @@ void __sched __down_read(struct rw_semap + */ + rt_mutex_init_waiter(&waiter, false); + rt_mutex_slowlock_locked(m, TASK_UNINTERRUPTIBLE, NULL, +- RT_MUTEX_MIN_CHAINWALK, ++ RT_MUTEX_MIN_CHAINWALK, NULL, + &waiter); + /* + * The slowlock() above is guaranteed to return with the rtmutex is diff --git a/debian/patches/features/all/rt/rtmutex-avoid-include-hell.patch b/debian/patches/features/all/rt/rtmutex-avoid-include-hell.patch index 43e73a589..19b171f78 100644 --- a/debian/patches/features/all/rt/rtmutex-avoid-include-hell.patch +++ b/debian/patches/features/all/rt/rtmutex-avoid-include-hell.patch @@ -1,7 +1,7 @@ Subject: rtmutex: Avoid include hell From: Thomas Gleixner Date: Wed, 29 Jun 2011 20:06:39 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Include only the required raw types. This avoids pulling in the complete spinlock header which in turn requires rtmutex.h at some point. @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h -@@ -14,7 +14,7 @@ +@@ -15,7 +15,7 @@ #include #include diff --git a/debian/patches/features/all/rt/rtmutex-export-lockdep-less-version-of-rt_mutex-s-lo.patch b/debian/patches/features/all/rt/rtmutex-export-lockdep-less-version-of-rt_mutex-s-lo.patch new file mode 100644 index 000000000..0d4d53b54 --- /dev/null +++ b/debian/patches/features/all/rt/rtmutex-export-lockdep-less-version-of-rt_mutex-s-lo.patch @@ -0,0 +1,151 @@ +From: Thomas Gleixner +Date: Thu, 12 Oct 2017 16:36:39 +0200 +Subject: rtmutex: export lockdep-less version of rt_mutex's lock, + trylock and unlock +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Required for lock implementation ontop of rtmutex. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/locking/rtmutex.c | 70 +++++++++++++++++++++++++--------------- + kernel/locking/rtmutex_common.h | 3 + + 2 files changed, 47 insertions(+), 26 deletions(-) + +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1489,6 +1489,29 @@ rt_mutex_fastunlock(struct rt_mutex *loc + rt_mutex_postunlock(&wake_q); + } + ++int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state) ++{ ++ might_sleep(); ++ return rt_mutex_fastlock(lock, state, NULL, rt_mutex_slowlock); ++} ++ ++/** ++ * rt_mutex_lock_state - lock a rt_mutex with a given state ++ * ++ * @lock: The rt_mutex to be locked ++ * @state: The state to set when blocking on the rt_mutex ++ */ ++static int __sched rt_mutex_lock_state(struct rt_mutex *lock, int state) ++{ ++ int ret; ++ ++ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); ++ ret = __rt_mutex_lock_state(lock, state); ++ if (ret) ++ mutex_release(&lock->dep_map, 1, _RET_IP_); ++ return ret; ++} ++ + /** + * rt_mutex_lock - lock a rt_mutex + * +@@ -1496,10 +1519,7 @@ rt_mutex_fastunlock(struct rt_mutex *loc + */ + void __sched rt_mutex_lock(struct rt_mutex *lock) + { +- might_sleep(); +- +- mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); +- rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); ++ rt_mutex_lock_state(lock, TASK_UNINTERRUPTIBLE); + } + EXPORT_SYMBOL_GPL(rt_mutex_lock); + +@@ -1514,16 +1534,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock); + */ + int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) + { +- int ret; +- +- might_sleep(); +- +- mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); +- ret = rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock); +- if (ret) +- mutex_release(&lock->dep_map, 1, _RET_IP_); +- +- return ret; ++ return rt_mutex_lock_state(lock, TASK_INTERRUPTIBLE); + } + EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); + +@@ -1544,13 +1555,10 @@ int __sched rt_mutex_futex_trylock(struc + * Returns: + * 0 on success + * -EINTR when interrupted by a signal +- * -EDEADLK when the lock would deadlock (when deadlock detection is on) + */ + int __sched rt_mutex_lock_killable(struct rt_mutex *lock) + { +- might_sleep(); +- +- return rt_mutex_fastlock(lock, TASK_KILLABLE, rt_mutex_slowlock); ++ return rt_mutex_lock_state(lock, TASK_KILLABLE); + } + EXPORT_SYMBOL_GPL(rt_mutex_lock_killable); + +@@ -1585,6 +1593,18 @@ rt_mutex_timed_lock(struct rt_mutex *loc + } + EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); + ++int __sched __rt_mutex_trylock(struct rt_mutex *lock) ++{ ++#ifdef CONFIG_PREEMPT_RT_FULL ++ if (WARN_ON_ONCE(in_irq() || in_nmi())) ++#else ++ if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) ++#endif ++ return 0; ++ ++ return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); ++} ++ + /** + * rt_mutex_trylock - try to lock a rt_mutex + * +@@ -1600,14 +1620,7 @@ int __sched rt_mutex_trylock(struct rt_m + { + int ret; + +-#ifdef CONFIG_PREEMPT_RT_FULL +- if (WARN_ON_ONCE(in_irq() || in_nmi())) +-#else +- if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) +-#endif +- return 0; +- +- ret = rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); ++ ret = __rt_mutex_trylock(lock); + if (ret) + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); + +@@ -1615,6 +1628,11 @@ int __sched rt_mutex_trylock(struct rt_m + } + EXPORT_SYMBOL_GPL(rt_mutex_trylock); + ++void __sched __rt_mutex_unlock(struct rt_mutex *lock) ++{ ++ rt_mutex_fastunlock(lock, rt_mutex_slowunlock); ++} ++ + /** + * rt_mutex_unlock - unlock a rt_mutex + * +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -159,6 +159,9 @@ extern bool __rt_mutex_futex_unlock(stru + extern void rt_mutex_postunlock(struct wake_q_head *wake_q); + /* RW semaphore special interface */ + ++extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state); ++extern int __rt_mutex_trylock(struct rt_mutex *lock); ++extern void __rt_mutex_unlock(struct rt_mutex *lock); + int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, + enum rtmutex_chainwalk chwalk, diff --git a/debian/patches/features/all/rt/rtmutex-futex-prepare-rt.patch b/debian/patches/features/all/rt/rtmutex-futex-prepare-rt.patch index d521605c5..b63ce382f 100644 --- a/debian/patches/features/all/rt/rtmutex-futex-prepare-rt.patch +++ b/debian/patches/features/all/rt/rtmutex-futex-prepare-rt.patch @@ -1,7 +1,7 @@ Subject: rtmutex: Handle the various new futex race conditions From: Thomas Gleixner Date: Fri, 10 Jun 2011 11:04:15 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz RT opens a few new interesting race conditions in the rtmutex/futex combo due to futex hash bucket lock being a 'sleeping' spinlock and @@ -10,13 +10,13 @@ therefor not disabling preemption. Signed-off-by: Thomas Gleixner --- kernel/futex.c | 77 ++++++++++++++++++++++++++++++++-------- - kernel/locking/rtmutex.c | 37 ++++++++++++++++--- + kernel/locking/rtmutex.c | 36 +++++++++++++++--- kernel/locking/rtmutex_common.h | 2 + - 3 files changed, 95 insertions(+), 21 deletions(-) + 3 files changed, 94 insertions(+), 21 deletions(-) --- a/kernel/futex.c +++ b/kernel/futex.c -@@ -2013,6 +2013,16 @@ static int futex_requeue(u32 __user *uad +@@ -2101,6 +2101,16 @@ static int futex_requeue(u32 __user *uad requeue_pi_wake_futex(this, &key2, hb2); drop_count++; continue; @@ -33,7 +33,7 @@ Signed-off-by: Thomas Gleixner } else if (ret) { /* * rt_mutex_start_proxy_lock() detected a -@@ -2998,7 +3008,7 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3090,7 +3100,7 @@ static int futex_wait_requeue_pi(u32 __u struct hrtimer_sleeper timeout, *to = NULL; struct futex_pi_state *pi_state = NULL; struct rt_mutex_waiter rt_waiter; @@ -42,7 +42,7 @@ Signed-off-by: Thomas Gleixner union futex_key key2 = FUTEX_KEY_INIT; struct futex_q q = futex_q_init; int res, ret; -@@ -3054,20 +3064,55 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3148,20 +3158,55 @@ static int futex_wait_requeue_pi(u32 __u /* Queue the futex_q, drop the hb lock, wait for wakeup. */ futex_wait_queue_me(hb, &q, to); @@ -109,7 +109,7 @@ Signed-off-by: Thomas Gleixner /* Check if the requeue code acquired the second futex for us. */ if (!q.rt_waiter) { -@@ -3076,7 +3121,8 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3170,7 +3215,8 @@ static int futex_wait_requeue_pi(u32 __u * did a lock-steal - fix up the PI-state in that case. */ if (q.pi_state && (q.pi_state->owner != current)) { @@ -119,7 +119,7 @@ Signed-off-by: Thomas Gleixner ret = fixup_pi_state_owner(uaddr2, &q, current); if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { pi_state = q.pi_state; -@@ -3087,7 +3133,7 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3181,7 +3227,7 @@ static int futex_wait_requeue_pi(u32 __u * the requeue_pi() code acquired for us. */ put_pi_state(q.pi_state); @@ -128,7 +128,7 @@ Signed-off-by: Thomas Gleixner } } else { struct rt_mutex *pi_mutex; -@@ -3101,7 +3147,8 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3195,7 +3241,8 @@ static int futex_wait_requeue_pi(u32 __u pi_mutex = &q.pi_state->pi_mutex; ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); @@ -152,7 +152,7 @@ Signed-off-by: Thomas Gleixner /* * We can speed up the acquire/release, if there's no debugging state to be * set up. -@@ -391,7 +396,8 @@ int max_lock_depth = 1024; +@@ -379,7 +384,8 @@ int max_lock_depth = 1024; static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) { @@ -162,7 +162,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -527,7 +533,7 @@ static int rt_mutex_adjust_prio_chain(st +@@ -515,7 +521,7 @@ static int rt_mutex_adjust_prio_chain(st * reached or the state of the chain has changed while we * dropped the locks. */ @@ -171,11 +171,10 @@ Signed-off-by: Thomas Gleixner goto out_unlock_pi; /* -@@ -963,6 +969,23 @@ static int task_blocks_on_rt_mutex(struc +@@ -951,6 +957,22 @@ static int task_blocks_on_rt_mutex(struc return -EDEADLK; raw_spin_lock(&task->pi_lock); -+ + /* + * In the case of futex requeue PI, this will be a proxy + * lock. The task will wake unaware that it is enqueueed on @@ -190,12 +189,12 @@ Signed-off-by: Thomas Gleixner + return -EAGAIN; + } + -+ BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)); ++ BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)); + - rt_mutex_adjust_prio(task); waiter->task = task; waiter->lock = lock; -@@ -987,7 +1010,7 @@ static int task_blocks_on_rt_mutex(struc + waiter->prio = task->prio; +@@ -974,7 +996,7 @@ static int task_blocks_on_rt_mutex(struc rt_mutex_enqueue_pi(owner, waiter); rt_mutex_adjust_prio(owner); @@ -204,7 +203,7 @@ Signed-off-by: Thomas Gleixner chain_walk = 1; } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { chain_walk = 1; -@@ -1083,7 +1106,7 @@ static void remove_waiter(struct rt_mute +@@ -1070,7 +1092,7 @@ static void remove_waiter(struct rt_mute { bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); struct task_struct *owner = rt_mutex_owner(lock); @@ -213,7 +212,7 @@ Signed-off-by: Thomas Gleixner lockdep_assert_held(&lock->wait_lock); -@@ -1109,7 +1132,8 @@ static void remove_waiter(struct rt_mute +@@ -1096,7 +1118,8 @@ static void remove_waiter(struct rt_mute rt_mutex_adjust_prio(owner); /* Store the lock on which owner is blocked or NULL */ @@ -223,7 +222,7 @@ Signed-off-by: Thomas Gleixner raw_spin_unlock(&owner->pi_lock); -@@ -1145,7 +1169,8 @@ void rt_mutex_adjust_pi(struct task_stru +@@ -1132,7 +1155,8 @@ void rt_mutex_adjust_pi(struct task_stru raw_spin_lock_irqsave(&task->pi_lock, flags); waiter = task->pi_blocked_on; @@ -235,7 +234,7 @@ Signed-off-by: Thomas Gleixner } --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h -@@ -99,6 +99,8 @@ enum rtmutex_chainwalk { +@@ -129,6 +129,8 @@ enum rtmutex_chainwalk { /* * PI-futex support (proxy locking functions, etc.): */ diff --git a/debian/patches/features/all/rt/rtmutex-lock-killable.patch b/debian/patches/features/all/rt/rtmutex-lock-killable.patch index aa1cda73f..95acbf51d 100644 --- a/debian/patches/features/all/rt/rtmutex-lock-killable.patch +++ b/debian/patches/features/all/rt/rtmutex-lock-killable.patch @@ -1,7 +1,7 @@ Subject: rtmutex: Add rtmutex_lock_killable() From: Thomas Gleixner Date: Thu, 09 Jun 2011 11:43:52 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Add "killable" type to rtmutex. We need this since rtmutex are used as "normal" mutexes which do use this type. @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h -@@ -91,6 +91,7 @@ extern void rt_mutex_destroy(struct rt_m +@@ -108,6 +108,7 @@ extern void rt_mutex_destroy(struct rt_m extern void rt_mutex_lock(struct rt_mutex *lock); extern int rt_mutex_lock_interruptible(struct rt_mutex *lock); @@ -24,7 +24,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -1537,6 +1537,25 @@ int __sched rt_mutex_futex_trylock(struc +@@ -1531,6 +1531,25 @@ int __sched rt_mutex_futex_trylock(struc } /** diff --git a/debian/patches/features/all/rt/rtmutex-trylock-is-okay-on-RT.patch b/debian/patches/features/all/rt/rtmutex-trylock-is-okay-on-RT.patch index 0fc981203..a5bdd4d3b 100644 --- a/debian/patches/features/all/rt/rtmutex-trylock-is-okay-on-RT.patch +++ b/debian/patches/features/all/rt/rtmutex-trylock-is-okay-on-RT.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed 02 Dec 2015 11:34:07 +0100 Subject: rtmutex: trylock is okay on -RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz non-RT kernel could deadlock on rt_mutex_trylock() in softirq context. On -RT we don't run softirqs in IRQ context but in thread context so it is @@ -14,10 +14,10 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -1547,7 +1547,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); - */ - int __sched rt_mutex_trylock(struct rt_mutex *lock) +@@ -1551,7 +1551,11 @@ int __sched rt_mutex_trylock(struct rt_m { + int ret; + +#ifdef CONFIG_PREEMPT_RT_FULL + if (WARN_ON_ONCE(in_irq() || in_nmi())) +#else @@ -25,4 +25,4 @@ Signed-off-by: Sebastian Andrzej Siewior +#endif return 0; - return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); + ret = rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); diff --git a/debian/patches/features/all/rt/rtmutex-wire-up-RT-s-locking.patch b/debian/patches/features/all/rt/rtmutex-wire-up-RT-s-locking.patch new file mode 100644 index 000000000..b548e3fae --- /dev/null +++ b/debian/patches/features/all/rt/rtmutex-wire-up-RT-s-locking.patch @@ -0,0 +1,249 @@ +From: Thomas Gleixner +Date: Thu, 12 Oct 2017 17:31:14 +0200 +Subject: rtmutex: wire up RT's locking +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/mutex.h | 20 +++++++++++++------- + include/linux/rwsem.h | 11 +++++++++++ + include/linux/spinlock.h | 12 +++++++++++- + include/linux/spinlock_api_smp.h | 4 +++- + include/linux/spinlock_types.h | 11 ++++++++--- + kernel/locking/Makefile | 9 ++++++++- + kernel/locking/spinlock.c | 7 +++++++ + kernel/locking/spinlock_debug.c | 5 +++++ + 8 files changed, 66 insertions(+), 13 deletions(-) + +--- a/include/linux/mutex.h ++++ b/include/linux/mutex.h +@@ -23,6 +23,17 @@ + + struct ww_acquire_ctx; + ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ ++ , .dep_map = { .name = #lockname } ++#else ++# define __DEP_MAP_MUTEX_INITIALIZER(lockname) ++#endif ++ ++#ifdef CONFIG_PREEMPT_RT_FULL ++# include ++#else ++ + /* + * Simple, straightforward mutexes with strict semantics: + * +@@ -114,13 +125,6 @@ do { \ + __mutex_init((mutex), #mutex, &__key); \ + } while (0) + +-#ifdef CONFIG_DEBUG_LOCK_ALLOC +-# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ +- , .dep_map = { .name = #lockname } +-#else +-# define __DEP_MAP_MUTEX_INITIALIZER(lockname) +-#endif +- + #define __MUTEX_INITIALIZER(lockname) \ + { .owner = ATOMIC_LONG_INIT(0) \ + , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ +@@ -228,4 +232,6 @@ mutex_trylock_recursive(struct mutex *lo + return mutex_trylock(lock); + } + ++#endif /* !PREEMPT_RT_FULL */ ++ + #endif /* __LINUX_MUTEX_H */ +--- a/include/linux/rwsem.h ++++ b/include/linux/rwsem.h +@@ -20,6 +20,10 @@ + #include + #endif + ++#ifdef CONFIG_PREEMPT_RT_FULL ++#include ++#else /* PREEMPT_RT_FULL */ ++ + struct rw_semaphore; + + #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK +@@ -108,6 +112,13 @@ static inline int rwsem_is_contended(str + return !list_empty(&sem->wait_list); + } + ++#endif /* !PREEMPT_RT_FULL */ ++ ++/* ++ * The functions below are the same for all rwsem implementations including ++ * the RT specific variant. ++ */ ++ + /* + * lock for reading + */ +--- a/include/linux/spinlock.h ++++ b/include/linux/spinlock.h +@@ -286,7 +286,11 @@ static inline void do_raw_spin_unlock(ra + #define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock)) + + /* Include rwlock functions */ +-#include ++#ifdef CONFIG_PREEMPT_RT_FULL ++# include ++#else ++# include ++#endif + + /* + * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: +@@ -297,6 +301,10 @@ static inline void do_raw_spin_unlock(ra + # include + #endif + ++#ifdef CONFIG_PREEMPT_RT_FULL ++# include ++#else /* PREEMPT_RT_FULL */ ++ + /* + * Map the spin_lock functions to the raw variants for PREEMPT_RT=n + */ +@@ -421,4 +429,6 @@ extern int _atomic_dec_and_lock(atomic_t + #define atomic_dec_and_lock(atomic, lock) \ + __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) + ++#endif /* !PREEMPT_RT_FULL */ ++ + #endif /* __LINUX_SPINLOCK_H */ +--- a/include/linux/spinlock_api_smp.h ++++ b/include/linux/spinlock_api_smp.h +@@ -187,6 +187,8 @@ static inline int __raw_spin_trylock_bh( + return 0; + } + +-#include ++#ifndef CONFIG_PREEMPT_RT_FULL ++# include ++#endif + + #endif /* __LINUX_SPINLOCK_API_SMP_H */ +--- a/include/linux/spinlock_types.h ++++ b/include/linux/spinlock_types.h +@@ -11,8 +11,13 @@ + + #include + +-#include +- +-#include ++#ifndef CONFIG_PREEMPT_RT_FULL ++# include ++# include ++#else ++# include ++# include ++# include ++#endif + + #endif /* __LINUX_SPINLOCK_TYPES_H */ +--- a/kernel/locking/Makefile ++++ b/kernel/locking/Makefile +@@ -3,7 +3,7 @@ + # and is generally not a function of system call inputs. + KCOV_INSTRUMENT := n + +-obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o ++obj-y += semaphore.o percpu-rwsem.o + + ifdef CONFIG_FUNCTION_TRACER + CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) +@@ -12,7 +12,11 @@ CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS + CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE) + endif + ++ifneq ($(CONFIG_PREEMPT_RT_FULL),y) ++obj-y += mutex.o + obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o ++endif ++obj-y += rwsem.o + obj-$(CONFIG_LOCKDEP) += lockdep.o + ifeq ($(CONFIG_PROC_FS),y) + obj-$(CONFIG_LOCKDEP) += lockdep_proc.o +@@ -25,8 +29,11 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o + obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o + obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o + obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o ++ifneq ($(CONFIG_PREEMPT_RT_FULL),y) + obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o + obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o ++endif ++obj-$(CONFIG_PREEMPT_RT_FULL) += mutex-rt.o rwsem-rt.o rwlock-rt.o + obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o + obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o + obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o +--- a/kernel/locking/spinlock.c ++++ b/kernel/locking/spinlock.c +@@ -125,8 +125,11 @@ void __lockfunc __raw_##op##_lock_bh(loc + * __[spin|read|write]_lock_bh() + */ + BUILD_LOCK_OPS(spin, raw_spinlock); ++ ++#ifndef CONFIG_PREEMPT_RT_FULL + BUILD_LOCK_OPS(read, rwlock); + BUILD_LOCK_OPS(write, rwlock); ++#endif + + #endif + +@@ -210,6 +213,8 @@ void __lockfunc _raw_spin_unlock_bh(raw_ + EXPORT_SYMBOL(_raw_spin_unlock_bh); + #endif + ++#ifndef CONFIG_PREEMPT_RT_FULL ++ + #ifndef CONFIG_INLINE_READ_TRYLOCK + int __lockfunc _raw_read_trylock(rwlock_t *lock) + { +@@ -354,6 +359,8 @@ void __lockfunc _raw_write_unlock_bh(rwl + EXPORT_SYMBOL(_raw_write_unlock_bh); + #endif + ++#endif /* !PREEMPT_RT_FULL */ ++ + #ifdef CONFIG_DEBUG_LOCK_ALLOC + + void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) +--- a/kernel/locking/spinlock_debug.c ++++ b/kernel/locking/spinlock_debug.c +@@ -31,6 +31,7 @@ void __raw_spin_lock_init(raw_spinlock_t + + EXPORT_SYMBOL(__raw_spin_lock_init); + ++#ifndef CONFIG_PREEMPT_RT_FULL + void __rwlock_init(rwlock_t *lock, const char *name, + struct lock_class_key *key) + { +@@ -48,6 +49,7 @@ void __rwlock_init(rwlock_t *lock, const + } + + EXPORT_SYMBOL(__rwlock_init); ++#endif + + static void spin_dump(raw_spinlock_t *lock, const char *msg) + { +@@ -135,6 +137,7 @@ void do_raw_spin_unlock(raw_spinlock_t * + arch_spin_unlock(&lock->raw_lock); + } + ++#ifndef CONFIG_PREEMPT_RT_FULL + static void rwlock_bug(rwlock_t *lock, const char *msg) + { + if (!debug_locks_off()) +@@ -224,3 +227,5 @@ void do_raw_write_unlock(rwlock_t *lock) + debug_write_unlock(lock); + arch_write_unlock(&lock->raw_lock); + } ++ ++#endif diff --git a/debian/patches/features/all/rt/rtmutex_dont_include_rcu.patch b/debian/patches/features/all/rt/rtmutex_dont_include_rcu.patch index f555241ca..8ae0d24e8 100644 --- a/debian/patches/features/all/rt/rtmutex_dont_include_rcu.patch +++ b/debian/patches/features/all/rt/rtmutex_dont_include_rcu.patch @@ -1,6 +1,6 @@ From: Sebastian Andrzej Siewior Subject: rbtree: don't include the rcu header -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The RCU header pulls in spinlock.h and fails due not yet defined types: @@ -94,15 +94,15 @@ Signed-off-by: Sebastian Andrzej Siewior +#endif --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h -@@ -45,6 +45,7 @@ - #include - #include - #include +@@ -42,6 +42,7 @@ + #include + #include + #include +#include - #include - -@@ -593,54 +594,6 @@ static inline void rcu_preempt_sleep_che + #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) + #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) +@@ -372,54 +373,6 @@ static inline void rcu_preempt_sleep_che }) /** @@ -154,6 +154,6 @@ Signed-off-by: Sebastian Andrzej Siewior -}) - -/** - * rcu_access_pointer() - fetch RCU pointer with no dereferencing - * @p: The pointer to read - * + * rcu_swap_protected() - swap an RCU and a regular pointer + * @rcu_ptr: RCU pointer + * @ptr: regular pointer diff --git a/debian/patches/features/all/rt/rwsem-rt-Lift-single-reader-restriction.patch b/debian/patches/features/all/rt/rwsem-rt-Lift-single-reader-restriction.patch deleted file mode 100644 index ccb02dadb..000000000 --- a/debian/patches/features/all/rt/rwsem-rt-Lift-single-reader-restriction.patch +++ /dev/null @@ -1,743 +0,0 @@ -From: Thomas Gleixner -Date: Sat, 1 Apr 2017 12:51:02 +0200 -Subject: [PATCH] rwsem/rt: Lift single reader restriction -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -The RT specific R/W semaphore implementation restricts the number of readers -to one because a writer cannot block on multiple readers and inherit its -priority or budget. - -The single reader restricting is painful in various ways: - - - Performance bottleneck for multi-threaded applications in the page fault - path (mmap sem) - - - Progress blocker for drivers which are carefully crafted to avoid the - potential reader/writer deadlock in mainline. - -The analysis of the writer code pathes shows, that properly written RT tasks -should not take them. Syscalls like mmap(), file access which take mmap sem -write locked have unbound latencies which are completely unrelated to mmap -sem. Other R/W sem users like graphics drivers are not suitable for RT tasks -either. - -So there is little risk to hurt RT tasks when the RT rwsem implementation is -changed in the following way: - - - Allow concurrent readers - - - Make writers block until the last reader left the critical section. This - blocking is not subject to priority/budget inheritance. - - - Readers blocked on a writer inherit their priority/budget in the normal - way. - -There is a drawback with this scheme. R/W semaphores become writer unfair -though the applications which have triggered writer starvation (mostly on -mmap_sem) in the past are not really the typical workloads running on a RT -system. So while it's unlikely to hit writer starvation, it's possible. If -there are unexpected workloads on RT systems triggering it, we need to rethink -the approach. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/rwsem.h | 9 + - include/linux/rwsem_rt.h | 166 +++++----------------------- - kernel/locking/Makefile | 4 - kernel/locking/rt.c | 167 ---------------------------- - kernel/locking/rwsem-rt.c | 269 ++++++++++++++++++++++++++++++++++++++++++++++ - 5 files changed, 311 insertions(+), 304 deletions(-) - create mode 100644 kernel/locking/rwsem-rt.c - ---- a/include/linux/rwsem.h -+++ b/include/linux/rwsem.h -@@ -110,6 +110,13 @@ static inline int rwsem_is_contended(str - return !list_empty(&sem->wait_list); - } - -+#endif /* !PREEMPT_RT_FULL */ -+ -+/* -+ * The functions below are the same for all rwsem implementations including -+ * the RT specific variant. -+ */ -+ - /* - * lock for reading - */ -@@ -188,6 +195,4 @@ extern void up_read_non_owner(struct rw_ - # define up_read_non_owner(sem) up_read(sem) - #endif - --#endif /* !PREEMPT_RT_FULL */ -- - #endif /* _LINUX_RWSEM_H */ ---- a/include/linux/rwsem_rt.h -+++ b/include/linux/rwsem_rt.h -@@ -5,163 +5,63 @@ - #error "Include rwsem.h" - #endif - --/* -- * RW-semaphores are a spinlock plus a reader-depth count. -- * -- * Note that the semantics are different from the usual -- * Linux rw-sems, in PREEMPT_RT mode we do not allow -- * multiple readers to hold the lock at once, we only allow -- * a read-lock owner to read-lock recursively. This is -- * better for latency, makes the implementation inherently -- * fair and makes it simpler as well. -- */ -- - #include -+#include -+ -+#define READER_BIAS (1U << 31) -+#define WRITER_BIAS (1U << 30) - - struct rw_semaphore { -- struct rt_mutex lock; -- int read_depth; -+ atomic_t readers; -+ struct rt_mutex rtmutex; - #ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; - #endif - }; - --#define __RWSEM_INITIALIZER(name) \ -- { .lock = __RT_MUTEX_INITIALIZER(name.lock), \ -- RW_DEP_MAP_INIT(name) } -+#define __RWSEM_INITIALIZER(name) \ -+{ \ -+ .readers = ATOMIC_INIT(READER_BIAS), \ -+ .rtmutex = __RT_MUTEX_INITIALIZER(name.rtmutex), \ -+ RW_DEP_MAP_INIT(name) \ -+} - - #define DECLARE_RWSEM(lockname) \ - struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname) - --extern void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name, -- struct lock_class_key *key); -- --#define __rt_init_rwsem(sem, name, key) \ -- do { \ -- rt_mutex_init(&(sem)->lock); \ -- __rt_rwsem_init((sem), (name), (key));\ -- } while (0) -+extern void __rwsem_init(struct rw_semaphore *rwsem, const char *name, -+ struct lock_class_key *key); - --#define __init_rwsem(sem, name, key) __rt_init_rwsem(sem, name, key) -+#define __init_rwsem(sem, name, key) \ -+do { \ -+ rt_mutex_init(&(sem)->rtmutex); \ -+ __rwsem_init((sem), (name), (key)); \ -+} while (0) - --# define rt_init_rwsem(sem) \ -+#define init_rwsem(sem) \ - do { \ - static struct lock_class_key __key; \ - \ -- __rt_init_rwsem((sem), #sem, &__key); \ -+ __init_rwsem((sem), #sem, &__key); \ - } while (0) - --extern void rt_down_write(struct rw_semaphore *rwsem); --extern int rt_down_write_killable(struct rw_semaphore *rwsem); --extern void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass); --extern void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass); --extern int rt_down_write_killable_nested(struct rw_semaphore *rwsem, -- int subclass); --extern void rt_down_write_nested_lock(struct rw_semaphore *rwsem, -- struct lockdep_map *nest); --extern void rt__down_read(struct rw_semaphore *rwsem); --extern void rt_down_read(struct rw_semaphore *rwsem); --extern int rt_down_write_trylock(struct rw_semaphore *rwsem); --extern int rt__down_read_trylock(struct rw_semaphore *rwsem); --extern int rt_down_read_trylock(struct rw_semaphore *rwsem); --extern void __rt_up_read(struct rw_semaphore *rwsem); --extern void rt_up_read(struct rw_semaphore *rwsem); --extern void rt_up_write(struct rw_semaphore *rwsem); --extern void rt_downgrade_write(struct rw_semaphore *rwsem); -- --#define init_rwsem(sem) rt_init_rwsem(sem) --#define rwsem_is_locked(s) rt_mutex_is_locked(&(s)->lock) -- --static inline int rwsem_is_contended(struct rw_semaphore *sem) --{ -- /* rt_mutex_has_waiters() */ -- return !RB_EMPTY_ROOT(&sem->lock.waiters); --} -- --static inline void __down_read(struct rw_semaphore *sem) --{ -- rt__down_read(sem); --} -- --static inline void down_read(struct rw_semaphore *sem) --{ -- rt_down_read(sem); --} -- --static inline int __down_read_trylock(struct rw_semaphore *sem) --{ -- return rt__down_read_trylock(sem); --} -- --static inline int down_read_trylock(struct rw_semaphore *sem) --{ -- return rt_down_read_trylock(sem); --} -- --static inline void down_write(struct rw_semaphore *sem) --{ -- rt_down_write(sem); --} -- --static inline int down_write_killable(struct rw_semaphore *sem) --{ -- return rt_down_write_killable(sem); --} -- --static inline int down_write_trylock(struct rw_semaphore *sem) --{ -- return rt_down_write_trylock(sem); --} -- --static inline void __up_read(struct rw_semaphore *sem) --{ -- __rt_up_read(sem); --} -- --static inline void up_read(struct rw_semaphore *sem) --{ -- rt_up_read(sem); --} -- --static inline void up_write(struct rw_semaphore *sem) --{ -- rt_up_write(sem); --} -- --static inline void downgrade_write(struct rw_semaphore *sem) -+static inline int rwsem_is_locked(struct rw_semaphore *sem) - { -- rt_downgrade_write(sem); -+ return atomic_read(&sem->readers) != READER_BIAS; - } - --static inline void down_read_nested(struct rw_semaphore *sem, int subclass) --{ -- return rt_down_read_nested(sem, subclass); --} -- --static inline void down_write_nested(struct rw_semaphore *sem, int subclass) --{ -- rt_down_write_nested(sem, subclass); --} -- --static inline int down_write_killable_nested(struct rw_semaphore *sem, -- int subclass) --{ -- return rt_down_write_killable_nested(sem, subclass); --} -- --#ifdef CONFIG_DEBUG_LOCK_ALLOC --static inline void down_write_nest_lock(struct rw_semaphore *sem, -- struct rw_semaphore *nest_lock) -+static inline int rwsem_is_contended(struct rw_semaphore *sem) - { -- rt_down_write_nested_lock(sem, &nest_lock->dep_map); -+ return atomic_read(&sem->readers) > 0; - } - --#else -+extern void __down_read(struct rw_semaphore *sem); -+extern int __down_read_trylock(struct rw_semaphore *sem); -+extern void __down_write(struct rw_semaphore *sem); -+extern int __must_check __down_write_killable(struct rw_semaphore *sem); -+extern int __down_write_trylock(struct rw_semaphore *sem); -+extern void __up_read(struct rw_semaphore *sem); -+extern void __up_write(struct rw_semaphore *sem); -+extern void __downgrade_write(struct rw_semaphore *sem); - --static inline void down_write_nest_lock(struct rw_semaphore *sem, -- struct rw_semaphore *nest_lock) --{ -- rt_down_write_nested_lock(sem, NULL); --} --#endif - #endif ---- a/kernel/locking/Makefile -+++ b/kernel/locking/Makefile -@@ -14,8 +14,8 @@ endif - ifneq ($(CONFIG_PREEMPT_RT_FULL),y) - obj-y += mutex.o - obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o --obj-y += rwsem.o - endif -+obj-y += rwsem.o - obj-$(CONFIG_LOCKDEP) += lockdep.o - ifeq ($(CONFIG_PROC_FS),y) - obj-$(CONFIG_LOCKDEP) += lockdep_proc.o -@@ -32,7 +32,7 @@ ifneq ($(CONFIG_PREEMPT_RT_FULL),y) - obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o - obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o - endif --obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o -+obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o rwsem-rt.o - obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o - obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o - obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o ---- a/kernel/locking/rt.c -+++ b/kernel/locking/rt.c -@@ -329,173 +329,6 @@ void __rt_rwlock_init(rwlock_t *rwlock, - } - EXPORT_SYMBOL(__rt_rwlock_init); - --/* -- * rw_semaphores -- */ -- --void rt_up_write(struct rw_semaphore *rwsem) --{ -- rwsem_release(&rwsem->dep_map, 1, _RET_IP_); -- rt_mutex_unlock(&rwsem->lock); --} --EXPORT_SYMBOL(rt_up_write); -- --void __rt_up_read(struct rw_semaphore *rwsem) --{ -- if (--rwsem->read_depth == 0) -- rt_mutex_unlock(&rwsem->lock); --} -- --void rt_up_read(struct rw_semaphore *rwsem) --{ -- rwsem_release(&rwsem->dep_map, 1, _RET_IP_); -- __rt_up_read(rwsem); --} --EXPORT_SYMBOL(rt_up_read); -- --/* -- * downgrade a write lock into a read lock -- * - just wake up any readers at the front of the queue -- */ --void rt_downgrade_write(struct rw_semaphore *rwsem) --{ -- BUG_ON(rt_mutex_owner(&rwsem->lock) != current); -- rwsem->read_depth = 1; --} --EXPORT_SYMBOL(rt_downgrade_write); -- --int rt_down_write_trylock(struct rw_semaphore *rwsem) --{ -- int ret = rt_mutex_trylock(&rwsem->lock); -- -- if (ret) -- rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_); -- return ret; --} --EXPORT_SYMBOL(rt_down_write_trylock); -- --void rt_down_write(struct rw_semaphore *rwsem) --{ -- rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_); -- rt_mutex_lock(&rwsem->lock); --} --EXPORT_SYMBOL(rt_down_write); -- --int rt_down_write_killable(struct rw_semaphore *rwsem) --{ -- int ret; -- -- rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_); -- ret = rt_mutex_lock_killable(&rwsem->lock); -- if (ret) -- rwsem_release(&rwsem->dep_map, 1, _RET_IP_); -- return ret; --} --EXPORT_SYMBOL(rt_down_write_killable); -- --int rt_down_write_killable_nested(struct rw_semaphore *rwsem, int subclass) --{ -- int ret; -- -- rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_); -- ret = rt_mutex_lock_killable(&rwsem->lock); -- if (ret) -- rwsem_release(&rwsem->dep_map, 1, _RET_IP_); -- return ret; --} --EXPORT_SYMBOL(rt_down_write_killable_nested); -- --void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass) --{ -- rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_); -- rt_mutex_lock(&rwsem->lock); --} --EXPORT_SYMBOL(rt_down_write_nested); -- --void rt_down_write_nested_lock(struct rw_semaphore *rwsem, -- struct lockdep_map *nest) --{ -- rwsem_acquire_nest(&rwsem->dep_map, 0, 0, nest, _RET_IP_); -- rt_mutex_lock(&rwsem->lock); --} --EXPORT_SYMBOL(rt_down_write_nested_lock); -- --int rt__down_read_trylock(struct rw_semaphore *rwsem) --{ -- struct rt_mutex *lock = &rwsem->lock; -- int ret = 1; -- -- /* -- * recursive read locks succeed when current owns the rwsem, -- * but not when read_depth == 0 which means that the rwsem is -- * write locked. -- */ -- if (rt_mutex_owner(lock) != current) -- ret = rt_mutex_trylock(&rwsem->lock); -- else if (!rwsem->read_depth) -- ret = 0; -- -- if (ret) -- rwsem->read_depth++; -- return ret; -- --} -- --int rt_down_read_trylock(struct rw_semaphore *rwsem) --{ -- int ret; -- -- ret = rt__down_read_trylock(rwsem); -- if (ret) -- rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_); -- -- return ret; --} --EXPORT_SYMBOL(rt_down_read_trylock); -- --void rt__down_read(struct rw_semaphore *rwsem) --{ -- struct rt_mutex *lock = &rwsem->lock; -- -- if (rt_mutex_owner(lock) != current) -- rt_mutex_lock(&rwsem->lock); -- rwsem->read_depth++; --} --EXPORT_SYMBOL(rt__down_read); -- --static void __rt_down_read(struct rw_semaphore *rwsem, int subclass) --{ -- rwsem_acquire_read(&rwsem->dep_map, subclass, 0, _RET_IP_); -- rt__down_read(rwsem); --} -- --void rt_down_read(struct rw_semaphore *rwsem) --{ -- __rt_down_read(rwsem, 0); --} --EXPORT_SYMBOL(rt_down_read); -- --void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass) --{ -- __rt_down_read(rwsem, subclass); --} --EXPORT_SYMBOL(rt_down_read_nested); -- --void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name, -- struct lock_class_key *key) --{ --#ifdef CONFIG_DEBUG_LOCK_ALLOC -- /* -- * Make sure we are not reinitializing a held lock: -- */ -- debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem)); -- lockdep_init_map(&rwsem->dep_map, name, key, 0); --#endif -- rwsem->read_depth = 0; -- rwsem->lock.save_state = 0; --} --EXPORT_SYMBOL(__rt_rwsem_init); -- - /** - * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 - * @cnt: the atomic which we are to dec ---- /dev/null -+++ b/kernel/locking/rwsem-rt.c -@@ -0,0 +1,269 @@ -+/* -+ */ -+#include -+#include -+#include -+#include -+ -+#include "rtmutex_common.h" -+ -+/* -+ * RT-specific reader/writer semaphores -+ * -+ * down_write() -+ * 1) Lock sem->rtmutex -+ * 2) Remove the reader BIAS to force readers into the slow path -+ * 3) Wait until all readers have left the critical region -+ * 4) Mark it write locked -+ * -+ * up_write() -+ * 1) Remove the write locked marker -+ * 2) Set the reader BIAS so readers can use the fast path again -+ * 3) Unlock sem->rtmutex to release blocked readers -+ * -+ * down_read() -+ * 1) Try fast path acquisition (reader BIAS is set) -+ * 2) Take sem->rtmutex.wait_lock which protects the writelocked flag -+ * 3) If !writelocked, acquire it for read -+ * 4) If writelocked, block on sem->rtmutex -+ * 5) unlock sem->rtmutex, goto 1) -+ * -+ * up_read() -+ * 1) Try fast path release (reader count != 1) -+ * 2) Wake the writer waiting in down_write()#3 -+ * -+ * down_read()#3 has the consequence, that rw semaphores on RT are not writer -+ * fair, but writers, which should be avoided in RT tasks (think mmap_sem), -+ * are subject to the rtmutex priority/DL inheritance mechanism. -+ * -+ * It's possible to make the rw semaphores writer fair by keeping a list of -+ * active readers. A blocked writer would force all newly incoming readers to -+ * block on the rtmutex, but the rtmutex would have to be proxy locked for one -+ * reader after the other. We can't use multi-reader inheritance because there -+ * is no way to support that with SCHED_DEADLINE. Implementing the one by one -+ * reader boosting/handover mechanism is a major surgery for a very dubious -+ * value. -+ * -+ * The risk of writer starvation is there, but the pathological use cases -+ * which trigger it are not necessarily the typical RT workloads. -+ */ -+ -+void __rwsem_init(struct rw_semaphore *sem, const char *name, -+ struct lock_class_key *key) -+{ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ /* -+ * Make sure we are not reinitializing a held semaphore: -+ */ -+ debug_check_no_locks_freed((void *)sem, sizeof(*sem)); -+ lockdep_init_map(&sem->dep_map, name, key, 0); -+#endif -+ atomic_set(&sem->readers, READER_BIAS); -+} -+EXPORT_SYMBOL(__rwsem_init); -+ -+int __down_read_trylock(struct rw_semaphore *sem) -+{ -+ int r, old; -+ -+ /* -+ * Increment reader count, if sem->readers < 0, i.e. READER_BIAS is -+ * set. -+ */ -+ for (r = atomic_read(&sem->readers); r < 0;) { -+ old = atomic_cmpxchg(&sem->readers, r, r + 1); -+ if (likely(old == r)) -+ return 1; -+ r = old; -+ } -+ return 0; -+} -+ -+void __sched __down_read(struct rw_semaphore *sem) -+{ -+ struct rt_mutex *m = &sem->rtmutex; -+ struct rt_mutex_waiter waiter; -+ -+ if (__down_read_trylock(sem)) -+ return; -+ -+ might_sleep(); -+ raw_spin_lock_irq(&m->wait_lock); -+ /* -+ * Allow readers as long as the writer has not completely -+ * acquired the semaphore for write. -+ */ -+ if (atomic_read(&sem->readers) != WRITER_BIAS) { -+ atomic_inc(&sem->readers); -+ raw_spin_unlock_irq(&m->wait_lock); -+ return; -+ } -+ -+ /* -+ * Call into the slow lock path with the rtmutex->wait_lock -+ * held, so this can't result in the following race: -+ * -+ * Reader1 Reader2 Writer -+ * down_read() -+ * down_write() -+ * rtmutex_lock(m) -+ * swait() -+ * down_read() -+ * unlock(m->wait_lock) -+ * up_read() -+ * swake() -+ * lock(m->wait_lock) -+ * sem->writelocked=true -+ * unlock(m->wait_lock) -+ * -+ * up_write() -+ * sem->writelocked=false -+ * rtmutex_unlock(m) -+ * down_read() -+ * down_write() -+ * rtmutex_lock(m) -+ * swait() -+ * rtmutex_lock(m) -+ * -+ * That would put Reader1 behind the writer waiting on -+ * Reader2 to call up_read() which might be unbound. -+ */ -+ rt_mutex_init_waiter(&waiter, false); -+ rt_mutex_slowlock_locked(m, TASK_UNINTERRUPTIBLE, NULL, -+ RT_MUTEX_MIN_CHAINWALK, NULL, -+ &waiter); -+ /* -+ * The slowlock() above is guaranteed to return with the rtmutex is -+ * now held, so there can't be a writer active. Increment the reader -+ * count and immediately drop the rtmutex again. -+ */ -+ atomic_inc(&sem->readers); -+ raw_spin_unlock_irq(&m->wait_lock); -+ rt_mutex_unlock(m); -+ -+ debug_rt_mutex_free_waiter(&waiter); -+} -+ -+void __up_read(struct rw_semaphore *sem) -+{ -+ struct rt_mutex *m = &sem->rtmutex; -+ struct task_struct *tsk; -+ -+ /* -+ * sem->readers can only hit 0 when a writer is waiting for the -+ * active readers to leave the critical region. -+ */ -+ if (!atomic_dec_and_test(&sem->readers)) -+ return; -+ -+ might_sleep(); -+ raw_spin_lock_irq(&m->wait_lock); -+ /* -+ * Wake the writer, i.e. the rtmutex owner. It might release the -+ * rtmutex concurrently in the fast path (due to a signal), but to -+ * clean up the rwsem it needs to acquire m->wait_lock. The worst -+ * case which can happen is a spurious wakeup. -+ */ -+ tsk = rt_mutex_owner(m); -+ if (tsk) -+ wake_up_process(tsk); -+ -+ raw_spin_unlock_irq(&m->wait_lock); -+} -+ -+static void __up_write_unlock(struct rw_semaphore *sem, int bias, -+ unsigned long flags) -+{ -+ struct rt_mutex *m = &sem->rtmutex; -+ -+ atomic_add(READER_BIAS - bias, &sem->readers); -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ rt_mutex_unlock(m); -+} -+ -+static int __sched __down_write_common(struct rw_semaphore *sem, int state) -+{ -+ struct rt_mutex *m = &sem->rtmutex; -+ unsigned long flags; -+ -+ /* Take the rtmutex as a first step */ -+ if (rt_mutex_lock_state(m, state)) -+ return -EINTR; -+ -+ /* Force readers into slow path */ -+ atomic_sub(READER_BIAS, &sem->readers); -+ might_sleep(); -+ -+ set_current_state(state); -+ for (;;) { -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ /* Have all readers left the critical region? */ -+ if (!atomic_read(&sem->readers)) { -+ atomic_set(&sem->readers, WRITER_BIAS); -+ __set_current_state(TASK_RUNNING); -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ return 0; -+ } -+ -+ if (signal_pending_state(state, current)) { -+ __set_current_state(TASK_RUNNING); -+ __up_write_unlock(sem, 0, flags); -+ return -EINTR; -+ } -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ -+ if (atomic_read(&sem->readers) != 0) { -+ schedule(); -+ set_current_state(state); -+ } -+ } -+} -+ -+void __sched __down_write(struct rw_semaphore *sem) -+{ -+ __down_write_common(sem, TASK_UNINTERRUPTIBLE); -+} -+ -+int __sched __down_write_killable(struct rw_semaphore *sem) -+{ -+ return __down_write_common(sem, TASK_KILLABLE); -+} -+ -+int __down_write_trylock(struct rw_semaphore *sem) -+{ -+ struct rt_mutex *m = &sem->rtmutex; -+ unsigned long flags; -+ -+ if (!rt_mutex_trylock(m)) -+ return 0; -+ -+ atomic_sub(READER_BIAS, &sem->readers); -+ -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ if (!atomic_read(&sem->readers)) { -+ atomic_set(&sem->readers, WRITER_BIAS); -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ return 1; -+ } -+ __up_write_unlock(sem, 0, flags); -+ return 0; -+} -+ -+void __up_write(struct rw_semaphore *sem) -+{ -+ struct rt_mutex *m = &sem->rtmutex; -+ unsigned long flags; -+ -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ __up_write_unlock(sem, WRITER_BIAS, flags); -+} -+ -+void __downgrade_write(struct rw_semaphore *sem) -+{ -+ struct rt_mutex *m = &sem->rtmutex; -+ unsigned long flags; -+ -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ /* Release it and account current as reader */ -+ __up_write_unlock(sem, WRITER_BIAS - 1, flags); -+} diff --git a/debian/patches/features/all/rt/rxrpc-remove-unused-static-variables.patch b/debian/patches/features/all/rt/rxrpc-remove-unused-static-variables.patch index 27c772dcf..9998cb936 100644 --- a/debian/patches/features/all/rt/rxrpc-remove-unused-static-variables.patch +++ b/debian/patches/features/all/rt/rxrpc-remove-unused-static-variables.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Fri, 21 Oct 2016 10:54:50 +0200 Subject: [PATCH] rxrpc: remove unused static variables -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The rxrpc_security_methods and rxrpc_security_sem user has been removed in 648af7fca159 ("rxrpc: Absorb the rxkad security module"). This was diff --git a/debian/patches/features/all/rt/sas-ata-isci-dont-t-disable-interrupts-in-qc_issue-h.patch b/debian/patches/features/all/rt/sas-ata-isci-dont-t-disable-interrupts-in-qc_issue-h.patch index e51f47836..8a8acd2cf 100644 --- a/debian/patches/features/all/rt/sas-ata-isci-dont-t-disable-interrupts-in-qc_issue-h.patch +++ b/debian/patches/features/all/rt/sas-ata-isci-dont-t-disable-interrupts-in-qc_issue-h.patch @@ -1,7 +1,7 @@ From: Paul Gortmaker Date: Sat, 14 Feb 2015 11:01:16 -0500 Subject: sas-ata/isci: dont't disable interrupts in qc_issue handler -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz On 3.14-rt we see the following trace on Canoe Pass for SCSI_ISCI "Intel(R) C600 Series Chipset SAS Controller" diff --git a/debian/patches/features/all/rt/sched-Prevent-task-state-corruption-by-spurious-lock.patch b/debian/patches/features/all/rt/sched-Prevent-task-state-corruption-by-spurious-lock.patch index ba6a7795b..2c97b7fbe 100644 --- a/debian/patches/features/all/rt/sched-Prevent-task-state-corruption-by-spurious-lock.patch +++ b/debian/patches/features/all/rt/sched-Prevent-task-state-corruption-by-spurious-lock.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Tue, 6 Jun 2017 14:20:37 +0200 Subject: sched: Prevent task state corruption by spurious lock wakeup -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Mathias and others reported GDB failures on RT. @@ -67,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -2206,7 +2206,7 @@ EXPORT_SYMBOL(wake_up_process); +@@ -2207,7 +2207,7 @@ EXPORT_SYMBOL(wake_up_process); */ int wake_up_lock_sleeper(struct task_struct *p) { diff --git a/debian/patches/features/all/rt/sched-Remove-TASK_ALL.patch b/debian/patches/features/all/rt/sched-Remove-TASK_ALL.patch index de2476b87..e73da24c9 100644 --- a/debian/patches/features/all/rt/sched-Remove-TASK_ALL.patch +++ b/debian/patches/features/all/rt/sched-Remove-TASK_ALL.patch @@ -1,7 +1,7 @@ From: Peter Zijlstra Date: Wed, 7 Jun 2017 10:12:45 +0200 Subject: [PATCH] sched: Remove TASK_ALL -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz It's unused: @@ -20,7 +20,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -94,7 +94,6 @@ struct task_group; +@@ -93,7 +93,6 @@ struct task_group; /* Convenience macros for the sake of wake_up(): */ #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) diff --git a/debian/patches/features/all/rt/sched-deadline-dl_task_timer-has-to-be-irqsafe.patch b/debian/patches/features/all/rt/sched-deadline-dl_task_timer-has-to-be-irqsafe.patch deleted file mode 100644 index b256231f3..000000000 --- a/debian/patches/features/all/rt/sched-deadline-dl_task_timer-has-to-be-irqsafe.patch +++ /dev/null @@ -1,23 +0,0 @@ -From: Juri Lelli -Date: Tue, 13 May 2014 15:30:20 +0200 -Subject: sched/deadline: dl_task_timer has to be irqsafe -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -As for rt_period_timer, dl_task_timer has to be irqsafe. - -Signed-off-by: Juri Lelli -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/sched/deadline.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/kernel/sched/deadline.c -+++ b/kernel/sched/deadline.c -@@ -693,6 +693,7 @@ void init_dl_task_timer(struct sched_dl_ - - hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - timer->function = dl_task_timer; -+ timer->irqsafe = 1; - } - - /* diff --git a/debian/patches/features/all/rt/sched-delay-put-task.patch b/debian/patches/features/all/rt/sched-delay-put-task.patch index d42fc085f..4314a4c8c 100644 --- a/debian/patches/features/all/rt/sched-delay-put-task.patch +++ b/debian/patches/features/all/rt/sched-delay-put-task.patch @@ -1,7 +1,7 @@ Subject: sched: Move task_struct cleanup to RCU From: Thomas Gleixner Date: Tue, 31 May 2011 16:59:16 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz __put_task_struct() does quite some expensive work. We don't want to burden random tasks with that. @@ -9,13 +9,13 @@ burden random tasks with that. Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 3 +++ - include/linux/sched/task.h | 10 ++++++++++ + include/linux/sched/task.h | 11 ++++++++++- kernel/fork.c | 15 ++++++++++++++- - 3 files changed, 27 insertions(+), 1 deletion(-) + 3 files changed, 27 insertions(+), 2 deletions(-) --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1047,6 +1047,9 @@ struct task_struct { +@@ -1093,6 +1093,9 @@ struct task_struct { unsigned int sequential_io; unsigned int sequential_io_avg; #endif @@ -27,7 +27,7 @@ Signed-off-by: Thomas Gleixner #endif --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h -@@ -86,6 +86,15 @@ extern void sched_exec(void); +@@ -88,6 +88,15 @@ extern void sched_exec(void); #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) @@ -43,17 +43,18 @@ Signed-off-by: Thomas Gleixner extern void __put_task_struct(struct task_struct *t); static inline void put_task_struct(struct task_struct *t) -@@ -93,6 +102,7 @@ static inline void put_task_struct(struc +@@ -95,7 +104,7 @@ static inline void put_task_struct(struc if (atomic_dec_and_test(&t->usage)) __put_task_struct(t); } +- +#endif - struct task_struct *task_rcu_dereference(struct task_struct **ptask); - struct task_struct *try_get_task_struct(struct task_struct **ptask); + + #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -389,7 +389,9 @@ static inline void put_signal_struct(str +@@ -408,7 +408,9 @@ static inline void put_signal_struct(str if (atomic_dec_and_test(&sig->sigcnt)) free_signal_struct(sig); } @@ -64,7 +65,7 @@ Signed-off-by: Thomas Gleixner void __put_task_struct(struct task_struct *tsk) { WARN_ON(!tsk->exit_state); -@@ -406,7 +408,18 @@ void __put_task_struct(struct task_struc +@@ -425,7 +427,18 @@ void __put_task_struct(struct task_struc if (!profile_handoff_task(tsk)) free_task(tsk); } diff --git a/debian/patches/features/all/rt/sched-disable-rt-group-sched-on-rt.patch b/debian/patches/features/all/rt/sched-disable-rt-group-sched-on-rt.patch index 8a3ed49b8..6d2673af4 100644 --- a/debian/patches/features/all/rt/sched-disable-rt-group-sched-on-rt.patch +++ b/debian/patches/features/all/rt/sched-disable-rt-group-sched-on-rt.patch @@ -1,7 +1,7 @@ Subject: sched: Disable CONFIG_RT_GROUP_SCHED on RT From: Thomas Gleixner Date: Mon, 18 Jul 2011 17:03:52 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Carsten reported problems when running: @@ -19,7 +19,7 @@ Signed-off-by: Thomas Gleixner --- a/init/Kconfig +++ b/init/Kconfig -@@ -1052,6 +1052,7 @@ config CFS_BANDWIDTH +@@ -744,6 +744,7 @@ config CFS_BANDWIDTH config RT_GROUP_SCHED bool "Group scheduling for SCHED_RR/FIFO" depends on CGROUP_SCHED diff --git a/debian/patches/features/all/rt/sched-disable-ttwu-queue.patch b/debian/patches/features/all/rt/sched-disable-ttwu-queue.patch index 7043f6dde..97afc8375 100644 --- a/debian/patches/features/all/rt/sched-disable-ttwu-queue.patch +++ b/debian/patches/features/all/rt/sched-disable-ttwu-queue.patch @@ -1,7 +1,7 @@ Subject: sched: Disable TTWU_QUEUE on RT From: Thomas Gleixner Date: Tue, 13 Sep 2011 16:42:35 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The queued remote wakeup mechanism can introduce rather large latencies if the number of migrated tasks is high. Disable it for RT. @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/sched/features.h +++ b/kernel/sched/features.h -@@ -45,11 +45,16 @@ SCHED_FEAT(LB_BIAS, true) +@@ -46,11 +46,16 @@ SCHED_FEAT(LB_BIAS, true) */ SCHED_FEAT(NONTASK_CAPACITY, true) diff --git a/debian/patches/features/all/rt/sched-limit-nr-migrate.patch b/debian/patches/features/all/rt/sched-limit-nr-migrate.patch index 6c82fa537..a9f66aadb 100644 --- a/debian/patches/features/all/rt/sched-limit-nr-migrate.patch +++ b/debian/patches/features/all/rt/sched-limit-nr-migrate.patch @@ -1,7 +1,7 @@ Subject: sched: Limit the number of task migrations per batch From: Thomas Gleixner Date: Mon, 06 Jun 2011 12:12:51 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Put an upper limit on the number of tasks which are migrated per batch to avoid large latencies. @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -58,7 +58,11 @@ const_debug unsigned int sysctl_sched_fe +@@ -59,7 +59,11 @@ const_debug unsigned int sysctl_sched_fe * Number of tasks to iterate in a single balance run. * Limited because this is done with IRQs disabled. */ diff --git a/debian/patches/features/all/rt/sched-might-sleep-do-not-account-rcu-depth.patch b/debian/patches/features/all/rt/sched-might-sleep-do-not-account-rcu-depth.patch index 2de594b38..4c3be8020 100644 --- a/debian/patches/features/all/rt/sched-might-sleep-do-not-account-rcu-depth.patch +++ b/debian/patches/features/all/rt/sched-might-sleep-do-not-account-rcu-depth.patch @@ -1,7 +1,7 @@ Subject: sched: Do not account rcu_preempt_depth on RT in might_sleep() From: Thomas Gleixner Date: Tue, 07 Jun 2011 09:19:06 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz RT changes the rcu_preempt_depth semantics, so we cannot check for it in might_sleep(). @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h -@@ -261,6 +261,11 @@ void synchronize_rcu(void); +@@ -74,6 +74,11 @@ void synchronize_rcu(void); * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. */ #define rcu_preempt_depth() (current->rcu_read_lock_nesting) @@ -26,7 +26,7 @@ Signed-off-by: Thomas Gleixner #else /* #ifdef CONFIG_PREEMPT_RCU */ -@@ -286,6 +291,8 @@ static inline int rcu_preempt_depth(void +@@ -99,6 +104,8 @@ static inline int rcu_preempt_depth(void return 0; } @@ -37,7 +37,7 @@ Signed-off-by: Thomas Gleixner /* Internal to kernel */ --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -6271,7 +6271,7 @@ void __init sched_init(void) +@@ -6050,7 +6050,7 @@ void __init sched_init(void) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP static inline int preempt_count_equals(int preempt_offset) { diff --git a/debian/patches/features/all/rt/sched-mmdrop-delayed.patch b/debian/patches/features/all/rt/sched-mmdrop-delayed.patch index 6de59746b..2a407ed0e 100644 --- a/debian/patches/features/all/rt/sched-mmdrop-delayed.patch +++ b/debian/patches/features/all/rt/sched-mmdrop-delayed.patch @@ -1,7 +1,7 @@ Subject: sched: Move mmdrop to RCU on RT From: Thomas Gleixner Date: Mon, 06 Jun 2011 12:20:33 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Takes sleeping locks and calls into the memory allocator, so nothing we want to do in task switch and oder atomic contexts. @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h -@@ -11,6 +11,7 @@ +@@ -12,6 +12,7 @@ #include #include #include @@ -24,8 +24,8 @@ Signed-off-by: Thomas Gleixner #include #include -@@ -491,6 +492,9 @@ struct mm_struct { - bool tlb_flush_pending; +@@ -504,6 +505,9 @@ struct mm_struct { + bool tlb_flush_batched; #endif struct uprobes_state uprobes_state; +#ifdef CONFIG_PREEMPT_RT_BASE @@ -36,7 +36,7 @@ Signed-off-by: Thomas Gleixner #endif --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h -@@ -42,6 +42,17 @@ static inline void mmdrop(struct mm_stru +@@ -43,6 +43,17 @@ static inline void mmdrop(struct mm_stru __mmdrop(mm); } @@ -56,7 +56,7 @@ Signed-off-by: Thomas Gleixner struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -885,6 +885,19 @@ void __mmdrop(struct mm_struct *mm) +@@ -931,6 +931,19 @@ void __mmdrop(struct mm_struct *mm) } EXPORT_SYMBOL_GPL(__mmdrop); @@ -78,7 +78,7 @@ Signed-off-by: Thomas Gleixner VM_BUG_ON(atomic_read(&mm->mm_users)); --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -2785,8 +2785,12 @@ static struct rq *finish_task_switch(str +@@ -2696,8 +2696,12 @@ static struct rq *finish_task_switch(str finish_arch_post_lock_switch(); fire_sched_in_preempt_notifiers(current); @@ -92,7 +92,7 @@ Signed-off-by: Thomas Gleixner if (unlikely(prev_state == TASK_DEAD)) { if (prev->sched_class->task_dead) prev->sched_class->task_dead(prev); -@@ -5612,6 +5616,8 @@ void sched_setnuma(struct task_struct *p +@@ -5434,6 +5438,8 @@ void sched_setnuma(struct task_struct *p #endif /* CONFIG_NUMA_BALANCING */ #ifdef CONFIG_HOTPLUG_CPU @@ -101,7 +101,7 @@ Signed-off-by: Thomas Gleixner /* * Ensure that the idle task is using init_mm right before its CPU goes * offline. -@@ -5626,7 +5632,12 @@ void idle_task_exit(void) +@@ -5448,7 +5454,12 @@ void idle_task_exit(void) switch_mm(mm, &init_mm, current); finish_arch_post_lock_switch(); } @@ -115,7 +115,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -5953,6 +5964,10 @@ int sched_cpu_dying(unsigned int cpu) +@@ -5751,6 +5762,10 @@ int sched_cpu_dying(unsigned int cpu) update_max_interval(); nohz_balance_exit_idle(cpu); hrtick_clear(rq); diff --git a/debian/patches/features/all/rt/sched-rt-mutex-wakeup.patch b/debian/patches/features/all/rt/sched-rt-mutex-wakeup.patch index 88137f96b..24a0c9f62 100644 --- a/debian/patches/features/all/rt/sched-rt-mutex-wakeup.patch +++ b/debian/patches/features/all/rt/sched-rt-mutex-wakeup.patch @@ -1,7 +1,7 @@ Subject: sched: Add saved_state for tasks blocked on sleeping locks From: Thomas Gleixner Date: Sat, 25 Jun 2011 09:21:04 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Spinlocks are state preserving in !RT. RT changes the state when a task gets blocked on a lock. So we need to remember the state before @@ -18,16 +18,16 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -490,6 +490,8 @@ struct task_struct { +@@ -526,6 +526,8 @@ struct task_struct { #endif /* -1 unrunnable, 0 runnable, >0 stopped: */ volatile long state; + /* saved state for "spinlock sleepers" */ + volatile long saved_state; - void *stack; - atomic_t usage; - /* Per task flags (PF_*), defined further below: */ -@@ -1410,6 +1412,7 @@ extern struct task_struct *find_task_by_ + + /* + * This begins the randomizable portion of task_struct. Only +@@ -1507,6 +1509,7 @@ extern struct task_struct *find_task_by_ extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); @@ -37,10 +37,10 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_SMP --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -2013,8 +2013,25 @@ try_to_wake_up(struct task_struct *p, un +@@ -2016,8 +2016,25 @@ try_to_wake_up(struct task_struct *p, un */ - smp_mb__before_spinlock(); raw_spin_lock_irqsave(&p->pi_lock, flags); + smp_mb__after_spinlock(); - if (!(p->state & state)) + if (!(p->state & state)) { + /* @@ -64,7 +64,7 @@ Signed-off-by: Thomas Gleixner trace_sched_waking(p); -@@ -2180,6 +2197,18 @@ int wake_up_process(struct task_struct * +@@ -2181,6 +2198,18 @@ int wake_up_process(struct task_struct * } EXPORT_SYMBOL(wake_up_process); @@ -85,7 +85,7 @@ Signed-off-by: Thomas Gleixner return try_to_wake_up(p, state, 0); --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h -@@ -1293,6 +1293,7 @@ static inline void finish_lock_switch(st +@@ -1342,6 +1342,7 @@ static inline void finish_lock_switch(st #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ #define WF_FORK 0x02 /* child wakeup after fork */ #define WF_MIGRATED 0x4 /* internal use, task got migrated */ diff --git a/debian/patches/features/all/rt/sched-ttwu-ensure-success-return-is-correct.patch b/debian/patches/features/all/rt/sched-ttwu-ensure-success-return-is-correct.patch index 81f425469..31299c105 100644 --- a/debian/patches/features/all/rt/sched-ttwu-ensure-success-return-is-correct.patch +++ b/debian/patches/features/all/rt/sched-ttwu-ensure-success-return-is-correct.patch @@ -1,7 +1,7 @@ Subject: sched: ttwu: Return success when only changing the saved_state value From: Thomas Gleixner Date: Tue, 13 Dec 2011 21:42:19 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz When a task blocks on a rt lock, it saves the current state in p->saved_state, so a lock related wake up will not destroy the @@ -21,7 +21,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -2020,8 +2020,10 @@ try_to_wake_up(struct task_struct *p, un +@@ -2023,8 +2023,10 @@ try_to_wake_up(struct task_struct *p, un * if the wakeup condition is true. */ if (!(wake_flags & WF_LOCK_SLEEPER)) { diff --git a/debian/patches/features/all/rt/sched-workqueue-Only-wake-up-idle-workers-if-not-blo.patch b/debian/patches/features/all/rt/sched-workqueue-Only-wake-up-idle-workers-if-not-blo.patch index 647ea1f1d..9ad3c67bf 100644 --- a/debian/patches/features/all/rt/sched-workqueue-Only-wake-up-idle-workers-if-not-blo.patch +++ b/debian/patches/features/all/rt/sched-workqueue-Only-wake-up-idle-workers-if-not-blo.patch @@ -1,7 +1,7 @@ From: Steven Rostedt Date: Mon, 18 Mar 2013 15:12:49 -0400 Subject: sched/workqueue: Only wake up idle workers if not blocked on sleeping spin lock -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz In -rt, most spin_locks() turn into mutexes. One of these spin_lock conversions is performed on the workqueue gcwq->lock. When the idle @@ -24,7 +24,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -3474,8 +3474,10 @@ static void __sched notrace __schedule(b +@@ -3388,8 +3388,10 @@ static void __sched notrace __schedule(b * If a worker went to sleep, notify and ask workqueue * whether it wants to wake up a task to maintain * concurrency. diff --git a/debian/patches/features/all/rt/scsi-fcoe-rt-aware.patch b/debian/patches/features/all/rt/scsi-fcoe-rt-aware.patch index c9f1b0cb4..b9d350740 100644 --- a/debian/patches/features/all/rt/scsi-fcoe-rt-aware.patch +++ b/debian/patches/features/all/rt/scsi-fcoe-rt-aware.patch @@ -1,7 +1,7 @@ Subject: scsi/fcoe: Make RT aware. From: Thomas Gleixner Date: Sat, 12 Nov 2011 14:00:48 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Do not disable preemption while taking sleeping locks. All user look safe for migrate_diable() only. @@ -71,7 +71,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c -@@ -836,7 +836,7 @@ static unsigned long fcoe_ctlr_age_fcfs( +@@ -835,7 +835,7 @@ static unsigned long fcoe_ctlr_age_fcfs( INIT_LIST_HEAD(&del_list); @@ -80,7 +80,7 @@ Signed-off-by: Thomas Gleixner list_for_each_entry_safe(fcf, next, &fip->fcfs, list) { deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2; -@@ -872,7 +872,7 @@ static unsigned long fcoe_ctlr_age_fcfs( +@@ -871,7 +871,7 @@ static unsigned long fcoe_ctlr_age_fcfs( sel_time = fcf->time; } } diff --git a/debian/patches/features/all/rt/scsi-qla2xxx-fix-bug-sleeping-function-called-from-invalid-context.patch b/debian/patches/features/all/rt/scsi-qla2xxx-fix-bug-sleeping-function-called-from-invalid-context.patch index 412f07bed..8f52bcce8 100644 --- a/debian/patches/features/all/rt/scsi-qla2xxx-fix-bug-sleeping-function-called-from-invalid-context.patch +++ b/debian/patches/features/all/rt/scsi-qla2xxx-fix-bug-sleeping-function-called-from-invalid-context.patch @@ -1,7 +1,7 @@ Subject: scsi: qla2xxx: Use local_irq_save_nort() in qla2x00_poll From: John Kacur Date: Fri, 27 Apr 2012 12:48:46 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz RT triggers the following: diff --git a/debian/patches/features/all/rt/seqlock-prevent-rt-starvation.patch b/debian/patches/features/all/rt/seqlock-prevent-rt-starvation.patch index e8bcc2215..83a06520b 100644 --- a/debian/patches/features/all/rt/seqlock-prevent-rt-starvation.patch +++ b/debian/patches/features/all/rt/seqlock-prevent-rt-starvation.patch @@ -1,7 +1,7 @@ Subject: seqlock: Prevent rt starvation From: Thomas Gleixner Date: Wed, 22 Feb 2012 12:03:30 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz If a low prio writer gets preempted while holding the seqlock write locked, a high prio reader spins forever on RT. @@ -28,7 +28,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h -@@ -220,20 +220,30 @@ static inline int read_seqcount_retry(co +@@ -221,20 +221,30 @@ static inline int read_seqcount_retry(co return __read_seqcount_retry(s, start); } @@ -63,7 +63,7 @@ Signed-off-by: Thomas Gleixner /** * raw_write_seqcount_barrier - do a seq write barrier * @s: pointer to seqcount_t -@@ -428,10 +438,32 @@ typedef struct { +@@ -429,10 +439,32 @@ typedef struct { /* * Read side functions for starting and finalizing a read side section. */ @@ -96,7 +96,7 @@ Signed-off-by: Thomas Gleixner static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) { -@@ -446,36 +478,36 @@ static inline unsigned read_seqretry(con +@@ -447,36 +479,36 @@ static inline unsigned read_seqretry(con static inline void write_seqlock(seqlock_t *sl) { spin_lock(&sl->lock); @@ -139,7 +139,7 @@ Signed-off-by: Thomas Gleixner spin_unlock_irq(&sl->lock); } -@@ -484,7 +516,7 @@ static inline unsigned long __write_seql +@@ -485,7 +517,7 @@ static inline unsigned long __write_seql unsigned long flags; spin_lock_irqsave(&sl->lock, flags); @@ -148,7 +148,7 @@ Signed-off-by: Thomas Gleixner return flags; } -@@ -494,7 +526,7 @@ static inline unsigned long __write_seql +@@ -495,7 +527,7 @@ static inline unsigned long __write_seql static inline void write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) { @@ -159,7 +159,7 @@ Signed-off-by: Thomas Gleixner --- a/include/net/neighbour.h +++ b/include/net/neighbour.h -@@ -446,7 +446,7 @@ static inline int neigh_hh_bridge(struct +@@ -450,7 +450,7 @@ static inline int neigh_hh_bridge(struct } #endif @@ -167,8 +167,8 @@ Signed-off-by: Thomas Gleixner +static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb) { unsigned int seq; - int hh_len; -@@ -470,7 +470,7 @@ static inline int neigh_hh_output(const + unsigned int hh_len; +@@ -474,7 +474,7 @@ static inline int neigh_hh_output(const static inline int neigh_output(struct neighbour *n, struct sk_buff *skb) { @@ -177,7 +177,7 @@ Signed-off-by: Thomas Gleixner if ((n->nud_state & NUD_CONNECTED) && hh->hh_len) return neigh_hh_output(hh, skb); -@@ -511,7 +511,7 @@ struct neighbour_cb { +@@ -515,7 +515,7 @@ struct neighbour_cb { #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) diff --git a/debian/patches/features/all/rt/signal-fix-up-rcu-wreckage.patch b/debian/patches/features/all/rt/signal-fix-up-rcu-wreckage.patch index d50ac9ec3..3eabcf73e 100644 --- a/debian/patches/features/all/rt/signal-fix-up-rcu-wreckage.patch +++ b/debian/patches/features/all/rt/signal-fix-up-rcu-wreckage.patch @@ -1,7 +1,7 @@ Subject: signal: Make __lock_task_sighand() RT aware From: Thomas Gleixner Date: Fri, 22 Jul 2011 08:07:08 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz local_irq_save() + spin_lock(&sighand->siglock) does not work on -RT. Use the nort variants. @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/signal.c +++ b/kernel/signal.c -@@ -1295,12 +1295,12 @@ struct sighand_struct *__lock_task_sigha +@@ -1300,12 +1300,12 @@ struct sighand_struct *__lock_task_sigha * Disable interrupts early to avoid deadlocks. * See rcu_read_unlock() comment header for details. */ @@ -28,7 +28,7 @@ Signed-off-by: Thomas Gleixner break; } /* -@@ -1321,7 +1321,7 @@ struct sighand_struct *__lock_task_sigha +@@ -1326,7 +1326,7 @@ struct sighand_struct *__lock_task_sigha } spin_unlock(&sighand->siglock); rcu_read_unlock(); diff --git a/debian/patches/features/all/rt/signal-revert-ptrace-preempt-magic.patch b/debian/patches/features/all/rt/signal-revert-ptrace-preempt-magic.patch index 6a9918da8..882e65648 100644 --- a/debian/patches/features/all/rt/signal-revert-ptrace-preempt-magic.patch +++ b/debian/patches/features/all/rt/signal-revert-ptrace-preempt-magic.patch @@ -1,7 +1,7 @@ Subject: signal: Revert ptrace preempt magic From: Thomas Gleixner Date: Wed, 21 Sep 2011 19:57:12 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Upstream commit '53da1d9456fe7f8 fix ptrace slowness' is nothing more than a bandaid around the ptrace design trainwreck. It's not a @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/signal.c +++ b/kernel/signal.c -@@ -1865,15 +1865,7 @@ static void ptrace_stop(int exit_code, i +@@ -1874,15 +1874,7 @@ static void ptrace_stop(int exit_code, i if (gstop_done && ptrace_reparented(current)) do_notify_parent_cldstop(current, false, why); diff --git a/debian/patches/features/all/rt/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch b/debian/patches/features/all/rt/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch index 4e554b5f1..d2afedb38 100644 --- a/debian/patches/features/all/rt/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch +++ b/debian/patches/features/all/rt/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Fri, 3 Jul 2009 08:44:56 -0500 Subject: signals: Allow rt tasks to cache one sigqueue struct -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz To avoid allocation allow rt tasks to cache one sigqueue struct in task struct. @@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -753,6 +753,8 @@ struct task_struct { +@@ -788,6 +788,8 @@ struct task_struct { /* Signal handlers: */ struct signal_struct *signal; struct sighand_struct *sighand; @@ -29,7 +29,7 @@ Signed-off-by: Thomas Gleixner /* Restored if set_restore_sigmask() was used: */ --- a/include/linux/signal.h +++ b/include/linux/signal.h -@@ -231,6 +231,7 @@ static inline void init_sigpending(struc +@@ -243,6 +243,7 @@ static inline void init_sigpending(struc } extern void flush_sigqueue(struct sigpending *queue); @@ -50,7 +50,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -1607,6 +1607,7 @@ static __latent_entropy struct task_stru +@@ -1649,6 +1649,7 @@ static __latent_entropy struct task_stru spin_lock_init(&p->alloc_lock); init_sigpending(&p->pending); @@ -68,7 +68,7 @@ Signed-off-by: Thomas Gleixner #include #include #include -@@ -357,13 +358,30 @@ static bool task_participate_group_stop( +@@ -358,13 +359,30 @@ static bool task_participate_group_stop( return false; } @@ -100,7 +100,7 @@ Signed-off-by: Thomas Gleixner { struct sigqueue *q = NULL; struct user_struct *user; -@@ -380,7 +398,10 @@ static struct sigqueue * +@@ -381,7 +399,10 @@ static struct sigqueue * if (override_rlimit || atomic_read(&user->sigpending) <= task_rlimit(t, RLIMIT_SIGPENDING)) { @@ -112,7 +112,7 @@ Signed-off-by: Thomas Gleixner } else { print_dropped_signal(sig); } -@@ -397,6 +418,13 @@ static struct sigqueue * +@@ -398,6 +419,13 @@ static struct sigqueue * return q; } @@ -126,7 +126,7 @@ Signed-off-by: Thomas Gleixner static void __sigqueue_free(struct sigqueue *q) { if (q->flags & SIGQUEUE_PREALLOC) -@@ -406,6 +434,21 @@ static void __sigqueue_free(struct sigqu +@@ -407,6 +435,21 @@ static void __sigqueue_free(struct sigqu kmem_cache_free(sigqueue_cachep, q); } @@ -148,7 +148,7 @@ Signed-off-by: Thomas Gleixner void flush_sigqueue(struct sigpending *queue) { struct sigqueue *q; -@@ -419,6 +462,21 @@ void flush_sigqueue(struct sigpending *q +@@ -420,6 +463,21 @@ void flush_sigqueue(struct sigpending *q } /* @@ -170,7 +170,7 @@ Signed-off-by: Thomas Gleixner * Flush all pending signals for this kthread. */ void flush_signals(struct task_struct *t) -@@ -539,7 +597,7 @@ static void collect_signal(int sig, stru +@@ -540,7 +598,7 @@ static void collect_signal(int sig, stru (info->si_code == SI_TIMER) && (info->si_sys_private); @@ -179,7 +179,7 @@ Signed-off-by: Thomas Gleixner } else { /* * Ok, it wasn't in the queue. This must be -@@ -575,6 +633,8 @@ int dequeue_signal(struct task_struct *t +@@ -576,6 +634,8 @@ int dequeue_signal(struct task_struct *t bool resched_timer = false; int signr; @@ -188,7 +188,7 @@ Signed-off-by: Thomas Gleixner /* We only dequeue private signals from ourselves, we don't let * signalfd steal them */ -@@ -1504,7 +1564,8 @@ EXPORT_SYMBOL(kill_pid); +@@ -1513,7 +1573,8 @@ EXPORT_SYMBOL(kill_pid); */ struct sigqueue *sigqueue_alloc(void) { diff --git a/debian/patches/features/all/rt/skbufhead-raw-lock.patch b/debian/patches/features/all/rt/skbufhead-raw-lock.patch index c94a756de..b63a2a6d5 100644 --- a/debian/patches/features/all/rt/skbufhead-raw-lock.patch +++ b/debian/patches/features/all/rt/skbufhead-raw-lock.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Tue, 12 Jul 2011 15:38:34 +0200 Subject: net: Use skbufhead with raw lock -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Use the rps lock as rawlock so we can keep irq-off regions. It looks low latency. However we can't kfree() from this context therefore we defer this @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h -@@ -2768,6 +2768,7 @@ struct softnet_data { +@@ -2772,6 +2772,7 @@ struct softnet_data { unsigned int dropped; struct sk_buff_head input_pkt_queue; struct napi_struct backlog; @@ -26,7 +26,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h -@@ -285,6 +285,7 @@ struct sk_buff_head { +@@ -288,6 +288,7 @@ struct sk_buff_head { __u32 qlen; spinlock_t lock; @@ -34,7 +34,7 @@ Signed-off-by: Thomas Gleixner }; struct sk_buff; -@@ -1587,6 +1588,12 @@ static inline void skb_queue_head_init(s +@@ -1668,6 +1669,12 @@ static inline void skb_queue_head_init(s __skb_queue_head_init(list); } @@ -49,7 +49,7 @@ Signed-off-by: Thomas Gleixner { --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -211,14 +211,14 @@ static inline struct hlist_head *dev_ind +@@ -217,14 +217,14 @@ static inline struct hlist_head *dev_ind static inline void rps_lock(struct softnet_data *sd) { #ifdef CONFIG_RPS @@ -66,7 +66,7 @@ Signed-off-by: Thomas Gleixner #endif } -@@ -4319,7 +4319,7 @@ static void flush_backlog(struct work_st +@@ -4581,7 +4581,7 @@ static void flush_backlog(struct work_st skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->input_pkt_queue); @@ -75,7 +75,7 @@ Signed-off-by: Thomas Gleixner input_queue_head_incr(sd); } } -@@ -4329,11 +4329,14 @@ static void flush_backlog(struct work_st +@@ -4591,11 +4591,14 @@ static void flush_backlog(struct work_st skb_queue_walk_safe(&sd->process_queue, skb, tmp) { if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->process_queue); @@ -91,7 +91,7 @@ Signed-off-by: Thomas Gleixner } static void flush_all_backlogs(void) -@@ -4867,7 +4870,9 @@ static int process_backlog(struct napi_s +@@ -5142,7 +5145,9 @@ static int process_backlog(struct napi_s while (again) { struct sk_buff *skb; @@ -101,7 +101,7 @@ Signed-off-by: Thomas Gleixner rcu_read_lock(); __netif_receive_skb(skb); rcu_read_unlock(); -@@ -4875,9 +4880,9 @@ static int process_backlog(struct napi_s +@@ -5150,9 +5155,9 @@ static int process_backlog(struct napi_s if (++work >= quota) return work; @@ -112,9 +112,9 @@ Signed-off-by: Thomas Gleixner rps_lock(sd); if (skb_queue_empty(&sd->input_pkt_queue)) { /* -@@ -5318,13 +5323,21 @@ static __latent_entropy void net_rx_acti - struct softnet_data *sd = this_cpu_ptr(&softnet_data); - unsigned long time_limit = jiffies + 2; +@@ -5592,13 +5597,21 @@ static __latent_entropy void net_rx_acti + unsigned long time_limit = jiffies + + usecs_to_jiffies(netdev_budget_usecs); int budget = netdev_budget; + struct sk_buff_head tofree_q; + struct sk_buff *skb; @@ -134,7 +134,7 @@ Signed-off-by: Thomas Gleixner for (;;) { struct napi_struct *n; -@@ -8089,6 +8102,9 @@ static int dev_cpu_dead(unsigned int old +@@ -8413,6 +8426,9 @@ static int dev_cpu_dead(unsigned int old netif_rx_ni(skb); input_queue_head_incr(oldsd); } @@ -144,7 +144,7 @@ Signed-off-by: Thomas Gleixner return 0; } -@@ -8392,8 +8408,9 @@ static int __init net_dev_init(void) +@@ -8716,8 +8732,9 @@ static int __init net_dev_init(void) INIT_WORK(flush, flush_backlog); diff --git a/debian/patches/features/all/rt/slub-disable-SLUB_CPU_PARTIAL.patch b/debian/patches/features/all/rt/slub-disable-SLUB_CPU_PARTIAL.patch index ef5a1bf0b..5f46b58b0 100644 --- a/debian/patches/features/all/rt/slub-disable-SLUB_CPU_PARTIAL.patch +++ b/debian/patches/features/all/rt/slub-disable-SLUB_CPU_PARTIAL.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 15 Apr 2015 19:00:47 +0200 Subject: slub: Disable SLUB_CPU_PARTIAL -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz |BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:915 |in_atomic(): 1, irqs_disabled(): 0, pid: 87, name: rcuop/7 @@ -37,7 +37,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/init/Kconfig +++ b/init/Kconfig -@@ -1865,7 +1865,7 @@ config SLAB_FREELIST_RANDOM +@@ -1589,7 +1589,7 @@ config SLAB_FREELIST_HARDENED config SLUB_CPU_PARTIAL default y diff --git a/debian/patches/features/all/rt/slub-enable-irqs-for-no-wait.patch b/debian/patches/features/all/rt/slub-enable-irqs-for-no-wait.patch index 534869273..538380e40 100644 --- a/debian/patches/features/all/rt/slub-enable-irqs-for-no-wait.patch +++ b/debian/patches/features/all/rt/slub-enable-irqs-for-no-wait.patch @@ -1,7 +1,7 @@ Subject: slub: Enable irqs for __GFP_WAIT From: Thomas Gleixner Date: Wed, 09 Jan 2013 12:08:15 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz SYSTEM_RUNNING might be too late for enabling interrupts. Allocations with GFP_WAIT can happen before that. So use this as an indicator. @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/mm/slub.c +++ b/mm/slub.c -@@ -1538,14 +1538,17 @@ static struct page *allocate_slab(struct +@@ -1572,14 +1572,17 @@ static struct page *allocate_slab(struct void *start, *p; int idx, order; bool shuffle; @@ -33,7 +33,7 @@ Signed-off-by: Thomas Gleixner local_irq_enable(); flags |= s->allocflags; -@@ -1620,11 +1623,7 @@ static struct page *allocate_slab(struct +@@ -1654,11 +1657,7 @@ static struct page *allocate_slab(struct page->frozen = 1; out: diff --git a/debian/patches/features/all/rt/smp-hotplug-Move-unparking-of-percpu-threads-to-the-.patch b/debian/patches/features/all/rt/smp-hotplug-Move-unparking-of-percpu-threads-to-the-.patch deleted file mode 100644 index 939e51e14..000000000 --- a/debian/patches/features/all/rt/smp-hotplug-Move-unparking-of-percpu-threads-to-the-.patch +++ /dev/null @@ -1,158 +0,0 @@ -From: Thomas Gleixner -Date: Thu, 6 Jul 2017 01:57:55 -0700 -Subject: [PATCH] smp/hotplug: Move unparking of percpu threads to the control - CPU -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Upstream commit 9cd4f1a4e7a858849e889a081a99adff83e08e4c - -Vikram reported the following backtrace: - - BUG: scheduling while atomic: swapper/7/0/0x00000002 - CPU: 7 PID: 0 Comm: swapper/7 Not tainted 4.9.32-perf+ #680 - schedule - schedule_hrtimeout_range_clock - schedule_hrtimeout - wait_task_inactive - __kthread_bind_mask - __kthread_bind - __kthread_unpark - kthread_unpark - cpuhp_online_idle - cpu_startup_entry - secondary_start_kernel - -He analyzed correctly that a parked cpu hotplug thread of an offlined CPU -was still on the runqueue when the CPU came back online and tried to unpark -it. This causes the thread which invoked kthread_unpark() to call -wait_task_inactive() and subsequently schedule() with preemption disabled. -His proposed workaround was to "make sure" that a parked thread has -scheduled out when the CPU goes offline, so the situation cannot happen. - -But that's still wrong because the root cause is not the fact that the -percpu thread is still on the runqueue and neither that preemption is -disabled, which could be simply solved by enabling preemption before -calling kthread_unpark(). - -The real issue is that the calling thread is the idle task of the upcoming -CPU, which is not supposed to call anything which might sleep. The moron, -who wrote that code, missed completely that kthread_unpark() might end up -in schedule(). - -The solution is simpler than expected. The thread which controls the -hotplug operation is waiting for the CPU to call complete() on the hotplug -state completion. So the idle task of the upcoming CPU can set its state to -CPUHP_AP_ONLINE_IDLE and invoke complete(). This in turn wakes the control -task on a different CPU, which then can safely do the unpark and kick the -now unparked hotplug thread of the upcoming CPU to complete the bringup to -the final target state. - -Control CPU AP - -bringup_cpu(); - __cpu_up() ------------> - bringup_ap(); - bringup_wait_for_ap() - wait_for_completion(); - cpuhp_online_idle(); - <------------ complete(); - unpark(AP->stopper); - unpark(AP->hotplugthread); - while(1) - do_idle(); - kick(AP->hotplugthread); - wait_for_completion(); hotplug_thread() - run_online_callbacks(); - complete(); - -Fixes: 8df3e07e7f21 ("cpu/hotplug: Let upcoming cpu bring itself fully up") -Reported-by: Vikram Mulukutla -Signed-off-by: Thomas Gleixner -Acked-by: Peter Zijlstra -Cc: Sebastian Sewior -Cc: Rusty Russell -Cc: Tejun Heo -Cc: Andrew Morton -Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1707042218020.2131@nanos -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/cpu.c | 37 ++++++++++++++++++------------------- - 1 file changed, 18 insertions(+), 19 deletions(-) - ---- a/kernel/cpu.c -+++ b/kernel/cpu.c -@@ -344,13 +344,25 @@ void cpu_hotplug_enable(void) - EXPORT_SYMBOL_GPL(cpu_hotplug_enable); - #endif /* CONFIG_HOTPLUG_CPU */ - --/* Notifier wrappers for transitioning to state machine */ -+static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st); - - static int bringup_wait_for_ap(unsigned int cpu) - { - struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); - -+ /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */ - wait_for_completion(&st->done); -+ BUG_ON(!cpu_online(cpu)); -+ -+ /* Unpark the stopper thread and the hotplug thread of the target cpu */ -+ stop_machine_unpark(cpu); -+ kthread_unpark(st->thread); -+ -+ /* Should we go further up ? */ -+ if (st->target > CPUHP_AP_ONLINE_IDLE) { -+ __cpuhp_kick_ap_work(st); -+ wait_for_completion(&st->done); -+ } - return st->result; - } - -@@ -371,9 +383,7 @@ static int bringup_cpu(unsigned int cpu) - irq_unlock_sparse(); - if (ret) - return ret; -- ret = bringup_wait_for_ap(cpu); -- BUG_ON(!cpu_online(cpu)); -- return ret; -+ return bringup_wait_for_ap(cpu); - } - - /* -@@ -859,31 +869,20 @@ void notify_cpu_starting(unsigned int cp - } - - /* -- * Called from the idle task. We need to set active here, so we can kick off -- * the stopper thread and unpark the smpboot threads. If the target state is -- * beyond CPUHP_AP_ONLINE_IDLE we kick cpuhp thread and let it bring up the -- * cpu further. -+ * Called from the idle task. Wake up the controlling task which brings the -+ * stopper and the hotplug thread of the upcoming CPU up and then delegates -+ * the rest of the online bringup to the hotplug thread. - */ - void cpuhp_online_idle(enum cpuhp_state state) - { - struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); -- unsigned int cpu = smp_processor_id(); - - /* Happens for the boot cpu */ - if (state != CPUHP_AP_ONLINE_IDLE) - return; - - st->state = CPUHP_AP_ONLINE_IDLE; -- -- /* Unpark the stopper thread and the hotplug thread of this cpu */ -- stop_machine_unpark(cpu); -- kthread_unpark(st->thread); -- -- /* Should we go further up ? */ -- if (st->target > CPUHP_AP_ONLINE_IDLE) -- __cpuhp_kick_ap_work(st); -- else -- complete(&st->done); -+ complete(&st->done); - } - - /* Requires cpu_add_remove_lock to be held */ diff --git a/debian/patches/features/all/rt/snd-pcm-fix-snd_pcm_stream_lock-irqs_disabled-splats.patch b/debian/patches/features/all/rt/snd-pcm-fix-snd_pcm_stream_lock-irqs_disabled-splats.patch index 4856ce435..4767e88e0 100644 --- a/debian/patches/features/all/rt/snd-pcm-fix-snd_pcm_stream_lock-irqs_disabled-splats.patch +++ b/debian/patches/features/all/rt/snd-pcm-fix-snd_pcm_stream_lock-irqs_disabled-splats.patch @@ -1,7 +1,7 @@ From: Mike Galbraith Date: Wed, 18 Feb 2015 15:09:23 +0100 Subject: snd/pcm: fix snd_pcm_stream_lock*() irqs_disabled() splats -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Locking functions previously using read_lock_irq()/read_lock_irqsave() were changed to local_irq_disable/save(), leading to gripes. Use nort variants. @@ -32,7 +32,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c -@@ -136,7 +136,7 @@ EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock) +@@ -148,7 +148,7 @@ EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock) void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream) { if (!substream->pcm->nonatomic) @@ -41,7 +41,7 @@ Signed-off-by: Sebastian Andrzej Siewior snd_pcm_stream_lock(substream); } EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq); -@@ -151,7 +151,7 @@ void snd_pcm_stream_unlock_irq(struct sn +@@ -163,7 +163,7 @@ void snd_pcm_stream_unlock_irq(struct sn { snd_pcm_stream_unlock(substream); if (!substream->pcm->nonatomic) @@ -50,7 +50,7 @@ Signed-off-by: Sebastian Andrzej Siewior } EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irq); -@@ -159,7 +159,7 @@ unsigned long _snd_pcm_stream_lock_irqsa +@@ -171,7 +171,7 @@ unsigned long _snd_pcm_stream_lock_irqsa { unsigned long flags = 0; if (!substream->pcm->nonatomic) @@ -59,7 +59,7 @@ Signed-off-by: Sebastian Andrzej Siewior snd_pcm_stream_lock(substream); return flags; } -@@ -177,7 +177,7 @@ void snd_pcm_stream_unlock_irqrestore(st +@@ -189,7 +189,7 @@ void snd_pcm_stream_unlock_irqrestore(st { snd_pcm_stream_unlock(substream); if (!substream->pcm->nonatomic) diff --git a/debian/patches/features/all/rt/softirq-disable-softirq-stacks-for-rt.patch b/debian/patches/features/all/rt/softirq-disable-softirq-stacks-for-rt.patch index e64943479..6e21f70ed 100644 --- a/debian/patches/features/all/rt/softirq-disable-softirq-stacks-for-rt.patch +++ b/debian/patches/features/all/rt/softirq-disable-softirq-stacks-for-rt.patch @@ -1,7 +1,7 @@ Subject: softirq: Disable softirq stacks for RT From: Thomas Gleixner Date: Mon, 18 Jul 2011 13:59:17 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Disable extra stacks for softirqs. We want to preempt softirqs and having them on special IRQ-stack does not make this easier. @@ -20,7 +20,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c -@@ -638,6 +638,7 @@ void irq_ctx_init(void) +@@ -685,6 +685,7 @@ void irq_ctx_init(void) } } @@ -28,7 +28,7 @@ Signed-off-by: Thomas Gleixner void do_softirq_own_stack(void) { struct thread_info *curtp, *irqtp; -@@ -655,6 +656,7 @@ void do_softirq_own_stack(void) +@@ -702,6 +703,7 @@ void do_softirq_own_stack(void) if (irqtp->flags) set_bits(irqtp->flags, &curtp->flags); } @@ -74,7 +74,7 @@ Signed-off-by: Thomas Gleixner mflr r0 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c -@@ -147,6 +147,7 @@ void irq_ctx_exit(int cpu) +@@ -148,6 +148,7 @@ void irq_ctx_exit(int cpu) hardirq_ctx[cpu] = NULL; } @@ -82,7 +82,7 @@ Signed-off-by: Thomas Gleixner void do_softirq_own_stack(void) { struct thread_info *curctx; -@@ -174,6 +175,7 @@ void do_softirq_own_stack(void) +@@ -175,6 +176,7 @@ void do_softirq_own_stack(void) "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr" ); } @@ -92,7 +92,7 @@ Signed-off-by: Thomas Gleixner { --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c -@@ -854,6 +854,7 @@ void __irq_entry handler_irq(int pil, st +@@ -855,6 +855,7 @@ void __irq_entry handler_irq(int pil, st set_irq_regs(old_regs); } @@ -100,7 +100,7 @@ Signed-off-by: Thomas Gleixner void do_softirq_own_stack(void) { void *orig_sp, *sp = softirq_stack[smp_processor_id()]; -@@ -868,6 +869,7 @@ void do_softirq_own_stack(void) +@@ -869,6 +870,7 @@ void do_softirq_own_stack(void) __asm__ __volatile__("mov %0, %%sp" : : "r" (orig_sp)); } @@ -110,7 +110,7 @@ Signed-off-by: Thomas Gleixner void fixup_irqs(void) --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S -@@ -889,6 +889,7 @@ EXPORT_SYMBOL(native_load_gs_index) +@@ -950,6 +950,7 @@ EXPORT_SYMBOL(native_load_gs_index) jmp 2b .previous @@ -118,17 +118,17 @@ Signed-off-by: Thomas Gleixner /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(do_softirq_own_stack) pushq %rbp -@@ -901,6 +902,7 @@ ENTRY(do_softirq_own_stack) - decl PER_CPU_VAR(irq_count) +@@ -960,6 +961,7 @@ ENTRY(do_softirq_own_stack) + leaveq ret - END(do_softirq_own_stack) + ENDPROC(do_softirq_own_stack) +#endif #ifdef CONFIG_XEN - idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0 + idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c -@@ -127,6 +127,7 @@ void irq_ctx_init(int cpu) +@@ -128,6 +128,7 @@ void irq_ctx_init(int cpu) cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu)); } @@ -136,7 +136,7 @@ Signed-off-by: Thomas Gleixner void do_softirq_own_stack(void) { struct irq_stack *irqstk; -@@ -143,6 +144,7 @@ void do_softirq_own_stack(void) +@@ -144,6 +145,7 @@ void do_softirq_own_stack(void) call_on_stack(__do_softirq, isp); } @@ -146,7 +146,7 @@ Signed-off-by: Thomas Gleixner { --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h -@@ -484,7 +484,7 @@ struct softirq_action +@@ -495,7 +495,7 @@ struct softirq_action asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); diff --git a/debian/patches/features/all/rt/softirq-preempt-fix-3-re.patch b/debian/patches/features/all/rt/softirq-preempt-fix-3-re.patch index b8ff3b1ce..20ebf3c26 100644 --- a/debian/patches/features/all/rt/softirq-preempt-fix-3-re.patch +++ b/debian/patches/features/all/rt/softirq-preempt-fix-3-re.patch @@ -1,7 +1,7 @@ Subject: softirq: Check preemption after reenabling interrupts From: Thomas Gleixner Date: Sun, 13 Nov 2011 17:17:09 +0100 (CET) -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz raise_softirq_irqoff() disables interrupts and wakes the softirq daemon, but after reenabling interrupts there is no preemption check, @@ -23,7 +23,7 @@ Signed-off-by: Thomas Gleixner --- a/block/blk-softirq.c +++ b/block/blk-softirq.c -@@ -52,6 +52,7 @@ static void trigger_softirq(void *data) +@@ -53,6 +53,7 @@ static void trigger_softirq(void *data) raise_softirq_irqoff(BLOCK_SOFTIRQ); local_irq_restore(flags); @@ -31,7 +31,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -90,6 +91,7 @@ static int blk_softirq_cpu_dead(unsigned +@@ -91,6 +92,7 @@ static int blk_softirq_cpu_dead(unsigned this_cpu_ptr(&blk_cpu_done)); raise_softirq_irqoff(BLOCK_SOFTIRQ); local_irq_enable(); @@ -39,7 +39,7 @@ Signed-off-by: Thomas Gleixner return 0; } -@@ -142,6 +144,7 @@ void __blk_complete_request(struct reque +@@ -143,6 +145,7 @@ void __blk_complete_request(struct reque goto do_local; local_irq_restore(flags); @@ -49,7 +49,7 @@ Signed-off-by: Thomas Gleixner /** --- a/include/linux/preempt.h +++ b/include/linux/preempt.h -@@ -186,8 +186,10 @@ do { \ +@@ -187,8 +187,10 @@ do { \ #ifdef CONFIG_PREEMPT_RT_BASE # define preempt_enable_no_resched() sched_preempt_enable_no_resched() @@ -60,7 +60,7 @@ Signed-off-by: Thomas Gleixner #endif #define preemptible() (preempt_count() == 0 && !irqs_disabled()) -@@ -274,6 +276,7 @@ do { \ +@@ -275,6 +277,7 @@ do { \ #define preempt_disable_notrace() barrier() #define preempt_enable_no_resched_notrace() barrier() #define preempt_enable_notrace() barrier() @@ -70,7 +70,7 @@ Signed-off-by: Thomas Gleixner #define migrate_disable() barrier() --- a/lib/irq_poll.c +++ b/lib/irq_poll.c -@@ -36,6 +36,7 @@ void irq_poll_sched(struct irq_poll *iop +@@ -37,6 +37,7 @@ void irq_poll_sched(struct irq_poll *iop list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll)); __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); local_irq_restore(flags); @@ -78,7 +78,7 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(irq_poll_sched); -@@ -71,6 +72,7 @@ void irq_poll_complete(struct irq_poll * +@@ -72,6 +73,7 @@ void irq_poll_complete(struct irq_poll * local_irq_save(flags); __irq_poll_complete(iop); local_irq_restore(flags); @@ -86,7 +86,7 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(irq_poll_complete); -@@ -95,6 +97,7 @@ static void __latent_entropy irq_poll_so +@@ -96,6 +98,7 @@ static void __latent_entropy irq_poll_so } local_irq_enable(); @@ -94,7 +94,7 @@ Signed-off-by: Thomas Gleixner /* Even though interrupts have been re-enabled, this * access is safe because interrupts can only add new -@@ -132,6 +135,7 @@ static void __latent_entropy irq_poll_so +@@ -133,6 +136,7 @@ static void __latent_entropy irq_poll_so __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); local_irq_enable(); @@ -102,7 +102,7 @@ Signed-off-by: Thomas Gleixner } /** -@@ -195,6 +199,7 @@ static int irq_poll_cpu_dead(unsigned in +@@ -196,6 +200,7 @@ static int irq_poll_cpu_dead(unsigned in this_cpu_ptr(&blk_cpu_iopoll)); __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); local_irq_enable(); @@ -112,7 +112,7 @@ Signed-off-by: Thomas Gleixner } --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -2404,6 +2404,7 @@ static void __netif_reschedule(struct Qd +@@ -2431,6 +2431,7 @@ static void __netif_reschedule(struct Qd sd->output_queue_tailp = &q->next_sched; raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); @@ -120,7 +120,7 @@ Signed-off-by: Thomas Gleixner } void __netif_schedule(struct Qdisc *q) -@@ -2466,6 +2467,7 @@ void __dev_kfree_skb_irq(struct sk_buff +@@ -2493,6 +2494,7 @@ void __dev_kfree_skb_irq(struct sk_buff __this_cpu_write(softnet_data.completion_queue, skb); raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); @@ -128,7 +128,7 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(__dev_kfree_skb_irq); -@@ -3773,6 +3775,7 @@ static int enqueue_to_backlog(struct sk_ +@@ -3855,6 +3857,7 @@ static int enqueue_to_backlog(struct sk_ rps_unlock(sd); local_irq_restore(flags); @@ -136,15 +136,14 @@ Signed-off-by: Thomas Gleixner atomic_long_inc(&skb->dev->rx_dropped); kfree_skb(skb); -@@ -4822,6 +4825,7 @@ static void net_rps_action_and_irq_enabl +@@ -5104,12 +5107,14 @@ static void net_rps_action_and_irq_enabl sd->rps_ipi_list = NULL; local_irq_enable(); + preempt_check_resched_rt(); /* Send pending IPI's to kick RPS processing on remote cpus. */ - while (remsd) { -@@ -4835,6 +4839,7 @@ static void net_rps_action_and_irq_enabl + net_rps_send_ipi(remsd); } else #endif local_irq_enable(); @@ -152,7 +151,7 @@ Signed-off-by: Thomas Gleixner } static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) -@@ -4912,6 +4917,7 @@ void __napi_schedule(struct napi_struct +@@ -5187,6 +5192,7 @@ void __napi_schedule(struct napi_struct local_irq_save(flags); ____napi_schedule(this_cpu_ptr(&softnet_data), n); local_irq_restore(flags); @@ -160,11 +159,11 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(__napi_schedule); -@@ -8074,6 +8080,7 @@ static int dev_cpu_dead(unsigned int old +@@ -8391,6 +8397,7 @@ static int dev_cpu_dead(unsigned int old raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_enable(); + preempt_check_resched_rt(); - /* Process offline CPU's input_pkt_queue */ - while ((skb = __skb_dequeue(&oldsd->process_queue))) { + #ifdef CONFIG_RPS + remsd = oldsd->rps_ipi_list; diff --git a/debian/patches/features/all/rt/softirq-split-locks.patch b/debian/patches/features/all/rt/softirq-split-locks.patch index cd91b3976..a4c3afaa1 100644 --- a/debian/patches/features/all/rt/softirq-split-locks.patch +++ b/debian/patches/features/all/rt/softirq-split-locks.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Thu, 04 Oct 2012 14:20:47 +0100 Subject: softirq: Split softirq locks -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The 3.x RT series removed the split softirq implementation in favour of pushing softirq processing into the context of the thread which @@ -37,7 +37,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/bottom_half.h +++ b/include/linux/bottom_half.h -@@ -3,6 +3,39 @@ +@@ -4,6 +4,39 @@ #include @@ -77,7 +77,7 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_TRACE_IRQFLAGS extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); #else -@@ -30,5 +63,6 @@ static inline void local_bh_enable(void) +@@ -31,5 +64,6 @@ static inline void local_bh_enable(void) { __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET); } @@ -86,7 +86,7 @@ Signed-off-by: Thomas Gleixner #endif /* _LINUX_BH_H */ --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h -@@ -481,10 +481,11 @@ struct softirq_action +@@ -492,10 +492,11 @@ struct softirq_action void (*action)(struct softirq_action *); }; @@ -100,7 +100,7 @@ Signed-off-by: Thomas Gleixner void do_softirq_own_stack(void); #else static inline void do_softirq_own_stack(void) -@@ -492,6 +493,9 @@ static inline void do_softirq_own_stack( +@@ -503,6 +504,9 @@ static inline void do_softirq_own_stack( __do_softirq(); } #endif @@ -110,7 +110,7 @@ Signed-off-by: Thomas Gleixner extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); -@@ -499,6 +503,7 @@ extern void __raise_softirq_irqoff(unsig +@@ -510,6 +514,7 @@ extern void __raise_softirq_irqoff(unsig extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); @@ -118,9 +118,9 @@ Signed-off-by: Thomas Gleixner DECLARE_PER_CPU(struct task_struct *, ksoftirqd); -@@ -656,6 +661,12 @@ void tasklet_hrtimer_cancel(struct taskl - tasklet_kill(&ttimer->tasklet); - } +@@ -642,6 +647,12 @@ extern void tasklet_kill_immediate(struc + extern void tasklet_init(struct tasklet_struct *t, + void (*func)(unsigned long), unsigned long data); +#ifdef CONFIG_PREEMPT_RT_FULL +extern void softirq_early_init(void); @@ -133,7 +133,7 @@ Signed-off-by: Thomas Gleixner * --- a/include/linux/preempt.h +++ b/include/linux/preempt.h -@@ -50,7 +50,11 @@ +@@ -51,7 +51,11 @@ #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) #define NMI_OFFSET (1UL << NMI_SHIFT) @@ -146,7 +146,7 @@ Signed-off-by: Thomas Gleixner /* We use the MSB mostly because its available */ #define PREEMPT_NEED_RESCHED 0x80000000 -@@ -80,9 +84,15 @@ +@@ -81,9 +85,15 @@ #include #define hardirq_count() (preempt_count() & HARDIRQ_MASK) @@ -163,7 +163,7 @@ Signed-off-by: Thomas Gleixner /* * Are we doing bottom half or hardware interrupt processing? -@@ -100,7 +110,6 @@ +@@ -101,7 +111,6 @@ #define in_irq() (hardirq_count()) #define in_softirq() (softirq_count()) #define in_interrupt() (irq_count()) @@ -173,7 +173,7 @@ Signed-off-by: Thomas Gleixner (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1050,6 +1050,8 @@ struct task_struct { +@@ -1096,6 +1096,8 @@ struct task_struct { #endif #ifdef CONFIG_PREEMPT_RT_BASE struct rcu_head put_rcu; @@ -182,7 +182,7 @@ Signed-off-by: Thomas Gleixner #endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; -@@ -1222,6 +1224,7 @@ extern struct pid *cad_pid; +@@ -1313,6 +1315,7 @@ extern struct pid *cad_pid; /* * Per process flags */ @@ -192,7 +192,7 @@ Signed-off-by: Thomas Gleixner #define PF_EXITPIDONE 0x00000008 /* PI exit done on shut down */ --- a/init/main.c +++ b/init/main.c -@@ -537,6 +537,7 @@ asmlinkage __visible void __init start_k +@@ -539,6 +539,7 @@ asmlinkage __visible void __init start_k setup_command_line(command_line); setup_nr_cpu_ids(); setup_per_cpu_areas(); @@ -507,7 +507,7 @@ Signed-off-by: Thomas Gleixner + lockdep_softirq_exit(); + current->flags &= ~PF_IN_SOFTIRQ; + vtime_account_irq_enter(current); -+ tsk_restore_flags(current, old_flags, PF_MEMALLOC); ++ current_restore_flags(old_flags, PF_MEMALLOC); +} + +/* @@ -763,7 +763,7 @@ Signed-off-by: Thomas Gleixner void open_softirq(int nr, void (*action)(struct softirq_action *)) { softirq_vec[nr].action = action; -@@ -747,23 +1097,7 @@ EXPORT_SYMBOL(tasklet_unlock_wait); +@@ -696,23 +1046,7 @@ EXPORT_SYMBOL(tasklet_unlock_wait); static int ksoftirqd_should_run(unsigned int cpu) { @@ -788,7 +788,7 @@ Signed-off-by: Thomas Gleixner } #ifdef CONFIG_HOTPLUG_CPU -@@ -830,6 +1164,8 @@ static int takeover_tasklets(unsigned in +@@ -779,6 +1113,8 @@ static int takeover_tasklets(unsigned in static struct smp_hotplug_thread softirq_threads = { .store = &ksoftirqd, @@ -799,7 +799,7 @@ Signed-off-by: Thomas Gleixner .thread_comm = "ksoftirqd/%u", --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c -@@ -881,14 +881,7 @@ static bool can_stop_idle_tick(int cpu, +@@ -895,14 +895,7 @@ static bool can_stop_idle_tick(int cpu, return false; if (unlikely(local_softirq_pending() && cpu_online(cpu))) { @@ -817,7 +817,7 @@ Signed-off-by: Thomas Gleixner --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -3845,11 +3845,9 @@ int netif_rx_ni(struct sk_buff *skb) +@@ -4061,11 +4061,9 @@ int netif_rx_ni(struct sk_buff *skb) trace_netif_rx_ni_entry(skb); diff --git a/debian/patches/features/all/rt/softirq-split-timer-softirqs-out-of-ksoftirqd.patch b/debian/patches/features/all/rt/softirq-split-timer-softirqs-out-of-ksoftirqd.patch index fccec8fc9..ea5c4257c 100644 --- a/debian/patches/features/all/rt/softirq-split-timer-softirqs-out-of-ksoftirqd.patch +++ b/debian/patches/features/all/rt/softirq-split-timer-softirqs-out-of-ksoftirqd.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 20 Jan 2016 16:34:17 +0100 Subject: softirq: split timer softirqs out of ksoftirqd -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The softirqd runs in -RT with SCHED_FIFO (prio 1) and deals mostly with timer wakeup which can not happen in hardirq context. The prio has been @@ -173,7 +173,7 @@ Signed-off-by: Sebastian Andrzej Siewior local_irq_restore(flags); #endif } -@@ -1176,18 +1225,30 @@ static int takeover_tasklets(unsigned in +@@ -1125,18 +1174,30 @@ static int takeover_tasklets(unsigned in static struct smp_hotplug_thread softirq_threads = { .store = &ksoftirqd, .setup = ksoftirqd_set_sched_params, diff --git a/debian/patches/features/all/rt/softirq-wake-the-timer-softirq-if-needed.patch b/debian/patches/features/all/rt/softirq-wake-the-timer-softirq-if-needed.patch index 8c74dee6e..0a6b53cf1 100644 --- a/debian/patches/features/all/rt/softirq-wake-the-timer-softirq-if-needed.patch +++ b/debian/patches/features/all/rt/softirq-wake-the-timer-softirq-if-needed.patch @@ -1,7 +1,7 @@ From: Mike Galbraith Date: Fri, 20 Jan 2017 18:10:20 +0100 Subject: [PATCH] softirq: wake the timer softirq if needed -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The irq-exit path only checks the "normal"-softirq thread if it is running and ignores the state of the "timer"-softirq thread. It is possible diff --git a/debian/patches/features/all/rt/sparc64-use-generic-rwsem-spinlocks-rt.patch b/debian/patches/features/all/rt/sparc64-use-generic-rwsem-spinlocks-rt.patch index bcb917bdc..499756b37 100644 --- a/debian/patches/features/all/rt/sparc64-use-generic-rwsem-spinlocks-rt.patch +++ b/debian/patches/features/all/rt/sparc64-use-generic-rwsem-spinlocks-rt.patch @@ -1,7 +1,7 @@ From: Allen Pais Date: Fri, 13 Dec 2013 09:44:41 +0530 Subject: sparc64: use generic rwsem spinlocks rt -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Signed-off-by: Allen Pais Signed-off-by: Sebastian Andrzej Siewior @@ -11,7 +11,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig -@@ -199,12 +199,10 @@ config NR_CPUS +@@ -206,12 +206,10 @@ config NR_CPUS source kernel/Kconfig.hz config RWSEM_GENERIC_SPINLOCK diff --git a/debian/patches/features/all/rt/spinlock-types-separate-raw.patch b/debian/patches/features/all/rt/spinlock-types-separate-raw.patch index e864f20c0..b8312d601 100644 --- a/debian/patches/features/all/rt/spinlock-types-separate-raw.patch +++ b/debian/patches/features/all/rt/spinlock-types-separate-raw.patch @@ -1,7 +1,7 @@ Subject: spinlock: Split the lock types header From: Thomas Gleixner Date: Wed, 29 Jun 2011 19:34:01 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Split raw_spinlock into its own file and the remaining spinlock_t into its own non-RT header. The non-RT header will be replaced later by sleeping diff --git a/debian/patches/features/all/rt/srcu-Prohibit-call_srcu-use-under-raw-spinlocks.patch b/debian/patches/features/all/rt/srcu-Prohibit-call_srcu-use-under-raw-spinlocks.patch new file mode 100644 index 000000000..795b53e94 --- /dev/null +++ b/debian/patches/features/all/rt/srcu-Prohibit-call_srcu-use-under-raw-spinlocks.patch @@ -0,0 +1,403 @@ +From: "Paul E. McKenney" +Date: Tue, 10 Oct 2017 13:52:30 -0700 +Subject: srcu: Prohibit call_srcu() use under raw spinlocks +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +Upstream commit 08265b8f1a139c1cff052b35ab7cf929528f88bb + +Invoking queue_delayed_work() while holding a raw spinlock is forbidden +in -rt kernels, which is exactly what __call_srcu() does, indirectly via +srcu_funnel_gp_start(). This commit therefore downgrades Tree SRCU's +locking from raw to non-raw spinlocks, which works because call_srcu() +is not ever called while holding a raw spinlock. + +Reported-by: Sebastian Andrzej Siewior +Signed-off-by: Paul E. McKenney +--- + include/linux/srcutree.h | 8 +-- + kernel/rcu/srcutree.c | 109 +++++++++++++++++++++++++++++------------------ + 2 files changed, 72 insertions(+), 45 deletions(-) + +--- a/include/linux/srcutree.h ++++ b/include/linux/srcutree.h +@@ -40,7 +40,7 @@ struct srcu_data { + unsigned long srcu_unlock_count[2]; /* Unlocks per CPU. */ + + /* Update-side state. */ +- raw_spinlock_t __private lock ____cacheline_internodealigned_in_smp; ++ spinlock_t __private lock ____cacheline_internodealigned_in_smp; + struct rcu_segcblist srcu_cblist; /* List of callbacks.*/ + unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */ + unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ +@@ -58,7 +58,7 @@ struct srcu_data { + * Node in SRCU combining tree, similar in function to rcu_data. + */ + struct srcu_node { +- raw_spinlock_t __private lock; ++ spinlock_t __private lock; + unsigned long srcu_have_cbs[4]; /* GP seq for children */ + /* having CBs, but only */ + /* is > ->srcu_gq_seq. */ +@@ -78,7 +78,7 @@ struct srcu_struct { + struct srcu_node *level[RCU_NUM_LVLS + 1]; + /* First node at each level. */ + struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ +- raw_spinlock_t __private lock; /* Protect counters */ ++ spinlock_t __private lock; /* Protect counters */ + struct mutex srcu_gp_mutex; /* Serialize GP work. */ + unsigned int srcu_idx; /* Current rdr array element. */ + unsigned long srcu_gp_seq; /* Grace-period seq #. */ +@@ -107,7 +107,7 @@ struct srcu_struct { + #define __SRCU_STRUCT_INIT(name, pcpu_name) \ + { \ + .sda = &pcpu_name, \ +- .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ ++ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ + .srcu_gp_seq_needed = 0 - 1, \ + __SRCU_DEP_MAP_INIT(name) \ + } +--- a/kernel/rcu/srcutree.c ++++ b/kernel/rcu/srcutree.c +@@ -54,6 +54,33 @@ static void srcu_invoke_callbacks(struct + static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); + static void process_srcu(struct work_struct *work); + ++/* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */ ++#define spin_lock_rcu_node(p) \ ++do { \ ++ spin_lock(&ACCESS_PRIVATE(p, lock)); \ ++ smp_mb__after_unlock_lock(); \ ++} while (0) ++ ++#define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock)) ++ ++#define spin_lock_irq_rcu_node(p) \ ++do { \ ++ spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \ ++ smp_mb__after_unlock_lock(); \ ++} while (0) ++ ++#define spin_unlock_irq_rcu_node(p) \ ++ spin_unlock_irq(&ACCESS_PRIVATE(p, lock)) ++ ++#define spin_lock_irqsave_rcu_node(p, flags) \ ++do { \ ++ spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ ++ smp_mb__after_unlock_lock(); \ ++} while (0) ++ ++#define spin_unlock_irqrestore_rcu_node(p, flags) \ ++ spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \ ++ + /* + * Initialize SRCU combining tree. Note that statically allocated + * srcu_struct structures might already have srcu_read_lock() and +@@ -78,7 +105,7 @@ static void init_srcu_struct_nodes(struc + + /* Each pass through this loop initializes one srcu_node structure. */ + rcu_for_each_node_breadth_first(sp, snp) { +- raw_spin_lock_init(&ACCESS_PRIVATE(snp, lock)); ++ spin_lock_init(&ACCESS_PRIVATE(snp, lock)); + WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) != + ARRAY_SIZE(snp->srcu_data_have_cbs)); + for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) { +@@ -112,7 +139,7 @@ static void init_srcu_struct_nodes(struc + snp_first = sp->level[level]; + for_each_possible_cpu(cpu) { + sdp = per_cpu_ptr(sp->sda, cpu); +- raw_spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); ++ spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); + rcu_segcblist_init(&sdp->srcu_cblist); + sdp->srcu_cblist_invoking = false; + sdp->srcu_gp_seq_needed = sp->srcu_gp_seq; +@@ -171,7 +198,7 @@ int __init_srcu_struct(struct srcu_struc + /* Don't re-initialize a lock while it is held. */ + debug_check_no_locks_freed((void *)sp, sizeof(*sp)); + lockdep_init_map(&sp->dep_map, name, key, 0); +- raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock)); ++ spin_lock_init(&ACCESS_PRIVATE(sp, lock)); + return init_srcu_struct_fields(sp, false); + } + EXPORT_SYMBOL_GPL(__init_srcu_struct); +@@ -188,7 +215,7 @@ EXPORT_SYMBOL_GPL(__init_srcu_struct); + */ + int init_srcu_struct(struct srcu_struct *sp) + { +- raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock)); ++ spin_lock_init(&ACCESS_PRIVATE(sp, lock)); + return init_srcu_struct_fields(sp, false); + } + EXPORT_SYMBOL_GPL(init_srcu_struct); +@@ -211,13 +238,13 @@ static void check_init_srcu_struct(struc + /* The smp_load_acquire() pairs with the smp_store_release(). */ + if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/ + return; /* Already initialized. */ +- raw_spin_lock_irqsave_rcu_node(sp, flags); ++ spin_lock_irqsave_rcu_node(sp, flags); + if (!rcu_seq_state(sp->srcu_gp_seq_needed)) { +- raw_spin_unlock_irqrestore_rcu_node(sp, flags); ++ spin_unlock_irqrestore_rcu_node(sp, flags); + return; + } + init_srcu_struct_fields(sp, true); +- raw_spin_unlock_irqrestore_rcu_node(sp, flags); ++ spin_unlock_irqrestore_rcu_node(sp, flags); + } + + /* +@@ -499,7 +526,7 @@ static void srcu_gp_end(struct srcu_stru + mutex_lock(&sp->srcu_cb_mutex); + + /* End the current grace period. */ +- raw_spin_lock_irq_rcu_node(sp); ++ spin_lock_irq_rcu_node(sp); + idx = rcu_seq_state(sp->srcu_gp_seq); + WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); + cbdelay = srcu_get_delay(sp); +@@ -508,7 +535,7 @@ static void srcu_gp_end(struct srcu_stru + gpseq = rcu_seq_current(&sp->srcu_gp_seq); + if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq)) + sp->srcu_gp_seq_needed_exp = gpseq; +- raw_spin_unlock_irq_rcu_node(sp); ++ spin_unlock_irq_rcu_node(sp); + mutex_unlock(&sp->srcu_gp_mutex); + /* A new grace period can start at this point. But only one. */ + +@@ -516,7 +543,7 @@ static void srcu_gp_end(struct srcu_stru + idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); + idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs); + rcu_for_each_node_breadth_first(sp, snp) { +- raw_spin_lock_irq_rcu_node(snp); ++ spin_lock_irq_rcu_node(snp); + cbs = false; + if (snp >= sp->level[rcu_num_lvls - 1]) + cbs = snp->srcu_have_cbs[idx] == gpseq; +@@ -526,7 +553,7 @@ static void srcu_gp_end(struct srcu_stru + snp->srcu_gp_seq_needed_exp = gpseq; + mask = snp->srcu_data_have_cbs[idx]; + snp->srcu_data_have_cbs[idx] = 0; +- raw_spin_unlock_irq_rcu_node(snp); ++ spin_unlock_irq_rcu_node(snp); + if (cbs) + srcu_schedule_cbs_snp(sp, snp, mask, cbdelay); + +@@ -534,11 +561,11 @@ static void srcu_gp_end(struct srcu_stru + if (!(gpseq & counter_wrap_check)) + for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { + sdp = per_cpu_ptr(sp->sda, cpu); +- raw_spin_lock_irqsave_rcu_node(sdp, flags); ++ spin_lock_irqsave_rcu_node(sdp, flags); + if (ULONG_CMP_GE(gpseq, + sdp->srcu_gp_seq_needed + 100)) + sdp->srcu_gp_seq_needed = gpseq; +- raw_spin_unlock_irqrestore_rcu_node(sdp, flags); ++ spin_unlock_irqrestore_rcu_node(sdp, flags); + } + } + +@@ -546,17 +573,17 @@ static void srcu_gp_end(struct srcu_stru + mutex_unlock(&sp->srcu_cb_mutex); + + /* Start a new grace period if needed. */ +- raw_spin_lock_irq_rcu_node(sp); ++ spin_lock_irq_rcu_node(sp); + gpseq = rcu_seq_current(&sp->srcu_gp_seq); + if (!rcu_seq_state(gpseq) && + ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) { + srcu_gp_start(sp); +- raw_spin_unlock_irq_rcu_node(sp); ++ spin_unlock_irq_rcu_node(sp); + /* Throttle expedited grace periods: Should be rare! */ + srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff + ? 0 : SRCU_INTERVAL); + } else { +- raw_spin_unlock_irq_rcu_node(sp); ++ spin_unlock_irq_rcu_node(sp); + } + } + +@@ -576,18 +603,18 @@ static void srcu_funnel_exp_start(struct + if (rcu_seq_done(&sp->srcu_gp_seq, s) || + ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s)) + return; +- raw_spin_lock_irqsave_rcu_node(snp, flags); ++ spin_lock_irqsave_rcu_node(snp, flags); + if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) { +- raw_spin_unlock_irqrestore_rcu_node(snp, flags); ++ spin_unlock_irqrestore_rcu_node(snp, flags); + return; + } + WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); +- raw_spin_unlock_irqrestore_rcu_node(snp, flags); ++ spin_unlock_irqrestore_rcu_node(snp, flags); + } +- raw_spin_lock_irqsave_rcu_node(sp, flags); ++ spin_lock_irqsave_rcu_node(sp, flags); + if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) + sp->srcu_gp_seq_needed_exp = s; +- raw_spin_unlock_irqrestore_rcu_node(sp, flags); ++ spin_unlock_irqrestore_rcu_node(sp, flags); + } + + /* +@@ -609,12 +636,12 @@ static void srcu_funnel_gp_start(struct + for (; snp != NULL; snp = snp->srcu_parent) { + if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode) + return; /* GP already done and CBs recorded. */ +- raw_spin_lock_irqsave_rcu_node(snp, flags); ++ spin_lock_irqsave_rcu_node(snp, flags); + if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) { + snp_seq = snp->srcu_have_cbs[idx]; + if (snp == sdp->mynode && snp_seq == s) + snp->srcu_data_have_cbs[idx] |= sdp->grpmask; +- raw_spin_unlock_irqrestore_rcu_node(snp, flags); ++ spin_unlock_irqrestore_rcu_node(snp, flags); + if (snp == sdp->mynode && snp_seq != s) { + srcu_schedule_cbs_sdp(sdp, do_norm + ? SRCU_INTERVAL +@@ -630,11 +657,11 @@ static void srcu_funnel_gp_start(struct + snp->srcu_data_have_cbs[idx] |= sdp->grpmask; + if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s)) + snp->srcu_gp_seq_needed_exp = s; +- raw_spin_unlock_irqrestore_rcu_node(snp, flags); ++ spin_unlock_irqrestore_rcu_node(snp, flags); + } + + /* Top of tree, must ensure the grace period will be started. */ +- raw_spin_lock_irqsave_rcu_node(sp, flags); ++ spin_lock_irqsave_rcu_node(sp, flags); + if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) { + /* + * Record need for grace period s. Pair with load +@@ -653,7 +680,7 @@ static void srcu_funnel_gp_start(struct + queue_delayed_work(system_power_efficient_wq, &sp->work, + srcu_get_delay(sp)); + } +- raw_spin_unlock_irqrestore_rcu_node(sp, flags); ++ spin_unlock_irqrestore_rcu_node(sp, flags); + } + + /* +@@ -816,7 +843,7 @@ void __call_srcu(struct srcu_struct *sp, + rhp->func = func; + local_irq_save(flags); + sdp = this_cpu_ptr(sp->sda); +- raw_spin_lock_rcu_node(sdp); ++ spin_lock_rcu_node(sdp); + rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false); + rcu_segcblist_advance(&sdp->srcu_cblist, + rcu_seq_current(&sp->srcu_gp_seq)); +@@ -830,7 +857,7 @@ void __call_srcu(struct srcu_struct *sp, + sdp->srcu_gp_seq_needed_exp = s; + needexp = true; + } +- raw_spin_unlock_irqrestore_rcu_node(sdp, flags); ++ spin_unlock_irqrestore_rcu_node(sdp, flags); + if (needgp) + srcu_funnel_gp_start(sp, sdp, s, do_norm); + else if (needexp) +@@ -886,7 +913,7 @@ static void __synchronize_srcu(struct sr + + /* + * Make sure that later code is ordered after the SRCU grace +- * period. This pairs with the raw_spin_lock_irq_rcu_node() ++ * period. This pairs with the spin_lock_irq_rcu_node() + * in srcu_invoke_callbacks(). Unlike Tree RCU, this is needed + * because the current CPU might have been totally uninvolved with + * (and thus unordered against) that grace period. +@@ -1010,7 +1037,7 @@ void srcu_barrier(struct srcu_struct *sp + */ + for_each_possible_cpu(cpu) { + sdp = per_cpu_ptr(sp->sda, cpu); +- raw_spin_lock_irq_rcu_node(sdp); ++ spin_lock_irq_rcu_node(sdp); + atomic_inc(&sp->srcu_barrier_cpu_cnt); + sdp->srcu_barrier_head.func = srcu_barrier_cb; + debug_rcu_head_queue(&sdp->srcu_barrier_head); +@@ -1019,7 +1046,7 @@ void srcu_barrier(struct srcu_struct *sp + debug_rcu_head_unqueue(&sdp->srcu_barrier_head); + atomic_dec(&sp->srcu_barrier_cpu_cnt); + } +- raw_spin_unlock_irq_rcu_node(sdp); ++ spin_unlock_irq_rcu_node(sdp); + } + + /* Remove the initial count, at which point reaching zero can happen. */ +@@ -1068,17 +1095,17 @@ static void srcu_advance_state(struct sr + */ + idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */ + if (idx == SRCU_STATE_IDLE) { +- raw_spin_lock_irq_rcu_node(sp); ++ spin_lock_irq_rcu_node(sp); + if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { + WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq)); +- raw_spin_unlock_irq_rcu_node(sp); ++ spin_unlock_irq_rcu_node(sp); + mutex_unlock(&sp->srcu_gp_mutex); + return; + } + idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); + if (idx == SRCU_STATE_IDLE) + srcu_gp_start(sp); +- raw_spin_unlock_irq_rcu_node(sp); ++ spin_unlock_irq_rcu_node(sp); + if (idx != SRCU_STATE_IDLE) { + mutex_unlock(&sp->srcu_gp_mutex); + return; /* Someone else started the grace period. */ +@@ -1127,19 +1154,19 @@ static void srcu_invoke_callbacks(struct + sdp = container_of(work, struct srcu_data, work.work); + sp = sdp->sp; + rcu_cblist_init(&ready_cbs); +- raw_spin_lock_irq_rcu_node(sdp); ++ spin_lock_irq_rcu_node(sdp); + rcu_segcblist_advance(&sdp->srcu_cblist, + rcu_seq_current(&sp->srcu_gp_seq)); + if (sdp->srcu_cblist_invoking || + !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) { +- raw_spin_unlock_irq_rcu_node(sdp); ++ spin_unlock_irq_rcu_node(sdp); + return; /* Someone else on the job or nothing to do. */ + } + + /* We are on the job! Extract and invoke ready callbacks. */ + sdp->srcu_cblist_invoking = true; + rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs); +- raw_spin_unlock_irq_rcu_node(sdp); ++ spin_unlock_irq_rcu_node(sdp); + rhp = rcu_cblist_dequeue(&ready_cbs); + for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { + debug_rcu_head_unqueue(rhp); +@@ -1152,13 +1179,13 @@ static void srcu_invoke_callbacks(struct + * Update counts, accelerate new callbacks, and if needed, + * schedule another round of callback invocation. + */ +- raw_spin_lock_irq_rcu_node(sdp); ++ spin_lock_irq_rcu_node(sdp); + rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs); + (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, + rcu_seq_snap(&sp->srcu_gp_seq)); + sdp->srcu_cblist_invoking = false; + more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); +- raw_spin_unlock_irq_rcu_node(sdp); ++ spin_unlock_irq_rcu_node(sdp); + if (more) + srcu_schedule_cbs_sdp(sdp, 0); + } +@@ -1171,7 +1198,7 @@ static void srcu_reschedule(struct srcu_ + { + bool pushgp = true; + +- raw_spin_lock_irq_rcu_node(sp); ++ spin_lock_irq_rcu_node(sp); + if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { + if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) { + /* All requests fulfilled, time to go idle. */ +@@ -1181,7 +1208,7 @@ static void srcu_reschedule(struct srcu_ + /* Outstanding request and no GP. Start one. */ + srcu_gp_start(sp); + } +- raw_spin_unlock_irq_rcu_node(sp); ++ spin_unlock_irq_rcu_node(sp); + + if (pushgp) + queue_delayed_work(system_power_efficient_wq, &sp->work, delay); diff --git a/debian/patches/features/all/rt/srcu-replace-local_irqsave-with-a-locallock.patch b/debian/patches/features/all/rt/srcu-replace-local_irqsave-with-a-locallock.patch new file mode 100644 index 000000000..2eacce312 --- /dev/null +++ b/debian/patches/features/all/rt/srcu-replace-local_irqsave-with-a-locallock.patch @@ -0,0 +1,71 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 12 Oct 2017 18:37:12 +0200 +Subject: [PATCH] srcu: replace local_irqsave() with a locallock +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +There are two instances which disable interrupts in order to become a +stable this_cpu_ptr() pointer. The restore part is coupled with +spin_unlock_irqrestore() which does not work on RT. +Replace the local_irq_save() call with the appropriate local_lock() +version of it. + +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/rcu/srcutree.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +--- a/kernel/rcu/srcutree.c ++++ b/kernel/rcu/srcutree.c +@@ -37,6 +37,7 @@ + #include + #include + #include ++#include + + #include "rcu.h" + #include "rcu_segcblist.h" +@@ -749,6 +750,8 @@ static void srcu_flip(struct srcu_struct + * negligible when amoritized over that time period, and the extra latency + * of a needlessly non-expedited grace period is similarly negligible. + */ ++static DEFINE_LOCAL_IRQ_LOCK(sp_llock); ++ + static bool srcu_might_be_idle(struct srcu_struct *sp) + { + unsigned long curseq; +@@ -757,13 +760,13 @@ static bool srcu_might_be_idle(struct sr + unsigned long t; + + /* If the local srcu_data structure has callbacks, not idle. */ +- local_irq_save(flags); ++ local_lock_irqsave(sp_llock, flags); + sdp = this_cpu_ptr(sp->sda); + if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) { +- local_irq_restore(flags); ++ local_unlock_irqrestore(sp_llock, flags); + return false; /* Callbacks already present, so not idle. */ + } +- local_irq_restore(flags); ++ local_unlock_irqrestore(sp_llock, flags); + + /* + * No local callbacks, so probabalistically probe global state. +@@ -841,7 +844,7 @@ void __call_srcu(struct srcu_struct *sp, + return; + } + rhp->func = func; +- local_irq_save(flags); ++ local_lock_irqsave(sp_llock, flags); + sdp = this_cpu_ptr(sp->sda); + spin_lock_rcu_node(sdp); + rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false); +@@ -857,7 +860,8 @@ void __call_srcu(struct srcu_struct *sp, + sdp->srcu_gp_seq_needed_exp = s; + needexp = true; + } +- spin_unlock_irqrestore_rcu_node(sdp, flags); ++ spin_unlock_rcu_node(sdp); ++ local_unlock_irqrestore(sp_llock, flags); + if (needgp) + srcu_funnel_gp_start(sp, sdp, s, do_norm); + else if (needexp) diff --git a/debian/patches/features/all/rt/srcu-use-cpu_online-instead-custom-check.patch b/debian/patches/features/all/rt/srcu-use-cpu_online-instead-custom-check.patch new file mode 100644 index 000000000..ccab258be --- /dev/null +++ b/debian/patches/features/all/rt/srcu-use-cpu_online-instead-custom-check.patch @@ -0,0 +1,100 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 13 Sep 2017 14:43:41 +0200 +Subject: [PATCH] srcu: use cpu_online() instead custom check +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The current check via srcu_online is slightly racy because after looking +at srcu_online there could be an interrupt that interrupted us long +enough until the CPU we checked against went offline. +An alternative would be to hold the hotplug rwsem (so the CPUs don't +change their state) and then check based on cpu_online() if we queue it +on a specific CPU or not. queue_work_on() itself can handle if something +is enqueued on an offline CPU but a timer which is enqueued on an offline +CPU won't fire until the CPU is back online. + +I am not sure if the removal in rcu_init() is okay or not. I assume that +SRCU won't enqueue a work item before SRCU is up and ready. + +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/rcu/srcutree.c | 22 ++++------------------ + kernel/rcu/tree.c | 6 ------ + 2 files changed, 4 insertions(+), 24 deletions(-) + +--- a/kernel/rcu/srcutree.c ++++ b/kernel/rcu/srcutree.c +@@ -36,6 +36,7 @@ + #include + #include + #include ++#include + + #include "rcu.h" + #include "rcu_segcblist.h" +@@ -425,21 +426,6 @@ static void srcu_gp_start(struct srcu_st + } + + /* +- * Track online CPUs to guide callback workqueue placement. +- */ +-DEFINE_PER_CPU(bool, srcu_online); +- +-void srcu_online_cpu(unsigned int cpu) +-{ +- WRITE_ONCE(per_cpu(srcu_online, cpu), true); +-} +- +-void srcu_offline_cpu(unsigned int cpu) +-{ +- WRITE_ONCE(per_cpu(srcu_online, cpu), false); +-} +- +-/* + * Place the workqueue handler on the specified CPU if online, otherwise + * just run it whereever. This is useful for placing workqueue handlers + * that are to invoke the specified CPU's callbacks. +@@ -450,12 +436,12 @@ static bool srcu_queue_delayed_work_on(i + { + bool ret; + +- preempt_disable(); +- if (READ_ONCE(per_cpu(srcu_online, cpu))) ++ cpus_read_lock(); ++ if (cpu_online(cpu)) + ret = queue_delayed_work_on(cpu, wq, dwork, delay); + else + ret = queue_delayed_work(wq, dwork, delay); +- preempt_enable(); ++ cpus_read_unlock(); + return ret; + } + +--- a/kernel/rcu/tree.c ++++ b/kernel/rcu/tree.c +@@ -3775,8 +3775,6 @@ int rcutree_online_cpu(unsigned int cpu) + { + sync_sched_exp_online_cleanup(cpu); + rcutree_affinity_setting(cpu, -1); +- if (IS_ENABLED(CONFIG_TREE_SRCU)) +- srcu_online_cpu(cpu); + return 0; + } + +@@ -3787,8 +3785,6 @@ int rcutree_online_cpu(unsigned int cpu) + int rcutree_offline_cpu(unsigned int cpu) + { + rcutree_affinity_setting(cpu, cpu); +- if (IS_ENABLED(CONFIG_TREE_SRCU)) +- srcu_offline_cpu(cpu); + return 0; + } + +@@ -4236,8 +4232,6 @@ void __init rcu_init(void) + for_each_online_cpu(cpu) { + rcutree_prepare_cpu(cpu); + rcu_cpu_starting(cpu); +- if (IS_ENABLED(CONFIG_TREE_SRCU)) +- srcu_online_cpu(cpu); + } + } + diff --git a/debian/patches/features/all/rt/stop-machine-raw-lock.patch b/debian/patches/features/all/rt/stop-machine-raw-lock.patch index 6972572df..5a23f5784 100644 --- a/debian/patches/features/all/rt/stop-machine-raw-lock.patch +++ b/debian/patches/features/all/rt/stop-machine-raw-lock.patch @@ -1,7 +1,7 @@ Subject: stop_machine: Use raw spinlocks From: Thomas Gleixner Date: Wed, 29 Jun 2011 11:01:51 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Use raw-locks in stomp_machine() to allow locking in irq-off regions. diff --git a/debian/patches/features/all/rt/stop_machine-convert-stop_machine_run-to-PREEMPT_RT.patch b/debian/patches/features/all/rt/stop_machine-convert-stop_machine_run-to-PREEMPT_RT.patch index 89d5d5150..4fbb55351 100644 --- a/debian/patches/features/all/rt/stop_machine-convert-stop_machine_run-to-PREEMPT_RT.patch +++ b/debian/patches/features/all/rt/stop_machine-convert-stop_machine_run-to-PREEMPT_RT.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:30:27 -0500 Subject: stop_machine: convert stop_machine_run() to PREEMPT_RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Instead of playing with non-preemption, introduce explicit startup serialization. This is more robust and cleaner as diff --git a/debian/patches/features/all/rt/sunrpc-make-svc_xprt_do_enqueue-use-get_cpu_light.patch b/debian/patches/features/all/rt/sunrpc-make-svc_xprt_do_enqueue-use-get_cpu_light.patch index 46200c02b..2dd90cc71 100644 --- a/debian/patches/features/all/rt/sunrpc-make-svc_xprt_do_enqueue-use-get_cpu_light.patch +++ b/debian/patches/features/all/rt/sunrpc-make-svc_xprt_do_enqueue-use-get_cpu_light.patch @@ -1,7 +1,7 @@ From: Mike Galbraith Date: Wed, 18 Feb 2015 16:05:28 +0100 Subject: sunrpc: Make svc_xprt_do_enqueue() use get_cpu_light() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz |BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:915 |in_atomic(): 1, irqs_disabled(): 0, pid: 3194, name: rpc.nfsd diff --git a/debian/patches/features/all/rt/suspend-prevernt-might-sleep-splats.patch b/debian/patches/features/all/rt/suspend-prevernt-might-sleep-splats.patch index 44cee1ed1..52590bcf7 100644 --- a/debian/patches/features/all/rt/suspend-prevernt-might-sleep-splats.patch +++ b/debian/patches/features/all/rt/suspend-prevernt-might-sleep-splats.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Thu, 15 Jul 2010 10:29:00 +0200 Subject: suspend: Prevent might sleep splats -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz timekeeping suspend/resume calls read_persistant_clock() which takes rtc_lock. That results in might sleep warnings because at that point @@ -26,7 +26,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/kernel.h +++ b/include/linux/kernel.h -@@ -499,6 +499,7 @@ extern enum system_states { +@@ -531,6 +531,7 @@ extern enum system_states { SYSTEM_HALT, SYSTEM_POWER_OFF, SYSTEM_RESTART, @@ -87,7 +87,7 @@ Signed-off-by: Thomas Gleixner Enable_cpus: --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c -@@ -384,6 +384,8 @@ static int suspend_enter(suspend_state_t +@@ -428,6 +428,8 @@ static int suspend_enter(suspend_state_t arch_suspend_disable_irqs(); BUG_ON(!irqs_disabled()); @@ -96,7 +96,7 @@ Signed-off-by: Thomas Gleixner error = syscore_suspend(); if (!error) { *wakeup = pm_wakeup_pending(); -@@ -400,6 +402,8 @@ static int suspend_enter(suspend_state_t +@@ -444,6 +446,8 @@ static int suspend_enter(suspend_state_t syscore_resume(); } diff --git a/debian/patches/features/all/rt/sysfs-realtime-entry.patch b/debian/patches/features/all/rt/sysfs-realtime-entry.patch index 691d4f74f..4a5a18109 100644 --- a/debian/patches/features/all/rt/sysfs-realtime-entry.patch +++ b/debian/patches/features/all/rt/sysfs-realtime-entry.patch @@ -1,7 +1,7 @@ Subject: sysfs: Add /sys/kernel/realtime entry From: Clark Williams Date: Sat Jul 30 21:55:53 2011 -0500 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Add a /sys/kernel entry to indicate that the kernel is a realtime kernel. @@ -20,13 +20,13 @@ Signed-off-by: Peter Zijlstra --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c -@@ -136,6 +136,15 @@ KERNEL_ATTR_RO(vmcoreinfo); +@@ -140,6 +140,15 @@ KERNEL_ATTR_RO(vmcoreinfo); - #endif /* CONFIG_KEXEC_CORE */ + #endif /* CONFIG_CRASH_CORE */ +#if defined(CONFIG_PREEMPT_RT_FULL) -+static ssize_t realtime_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) ++static ssize_t realtime_show(struct kobject *kobj, ++ struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", 1); +} @@ -36,7 +36,7 @@ Signed-off-by: Peter Zijlstra /* whether file capabilities are enabled */ static ssize_t fscaps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) -@@ -225,6 +234,9 @@ static struct attribute * kernel_attrs[] +@@ -231,6 +240,9 @@ static struct attribute * kernel_attrs[] &rcu_expedited_attr.attr, &rcu_normal_attr.attr, #endif diff --git a/debian/patches/features/all/rt/tasklet-rt-prevent-tasklets-from-going-into-infinite-spin-in-rt.patch b/debian/patches/features/all/rt/tasklet-rt-prevent-tasklets-from-going-into-infinite-spin-in-rt.patch index b0eccb6e4..5909e69f7 100644 --- a/debian/patches/features/all/rt/tasklet-rt-prevent-tasklets-from-going-into-infinite-spin-in-rt.patch +++ b/debian/patches/features/all/rt/tasklet-rt-prevent-tasklets-from-going-into-infinite-spin-in-rt.patch @@ -1,7 +1,7 @@ Subject: tasklet: Prevent tasklets from going into infinite spin in RT From: Ingo Molnar Date: Tue Nov 29 20:18:22 2011 -0500 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz When CONFIG_PREEMPT_RT_FULL is enabled, tasklets run as threads, and spinlocks turn are mutexes. But this can cause issues with @@ -44,7 +44,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h -@@ -520,8 +520,9 @@ static inline struct task_struct *this_c +@@ -531,8 +531,9 @@ static inline struct task_struct *this_c to be executed on some cpu at least once after this. * If the tasklet is already scheduled, but its execution is still not started, it will be executed only once. @@ -56,7 +56,7 @@ Signed-off-by: Thomas Gleixner * Tasklet is strictly serialized wrt itself, but not wrt another tasklets. If client needs some intertask synchronization, he makes it with spinlocks. -@@ -546,27 +547,36 @@ struct tasklet_struct name = { NULL, 0, +@@ -557,27 +558,36 @@ struct tasklet_struct name = { NULL, 0, enum { TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */ @@ -99,7 +99,7 @@ Signed-off-by: Thomas Gleixner #define tasklet_unlock_wait(t) do { } while (0) #define tasklet_unlock(t) do { } while (0) #endif -@@ -615,12 +625,7 @@ static inline void tasklet_disable(struc +@@ -626,12 +636,7 @@ static inline void tasklet_disable(struc smp_mb(); } @@ -366,7 +366,7 @@ Signed-off-by: Thomas Gleixner } while (test_bit(TASKLET_STATE_SCHED, &t->state)); } tasklet_unlock_wait(t); -@@ -660,6 +728,23 @@ void __init softirq_init(void) +@@ -609,6 +677,23 @@ void __init softirq_init(void) open_softirq(HI_SOFTIRQ, tasklet_hi_action); } diff --git a/debian/patches/features/all/rt/thermal-Defer-thermal-wakups-to-threads.patch b/debian/patches/features/all/rt/thermal-Defer-thermal-wakups-to-threads.patch index a90d1c21a..b7ff95c47 100644 --- a/debian/patches/features/all/rt/thermal-Defer-thermal-wakups-to-threads.patch +++ b/debian/patches/features/all/rt/thermal-Defer-thermal-wakups-to-threads.patch @@ -1,7 +1,7 @@ From: Daniel Wagner Date: Tue, 17 Feb 2015 09:37:44 +0100 Subject: thermal: Defer thermal wakups to threads -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz On RT the spin lock in pkg_temp_thermal_platfrom_thermal_notify will call schedule while we run in irq context. diff --git a/debian/patches/features/all/rt/tick-broadcast--Make-hrtimer-irqsafe.patch b/debian/patches/features/all/rt/tick-broadcast--Make-hrtimer-irqsafe.patch deleted file mode 100644 index d22f62e44..000000000 --- a/debian/patches/features/all/rt/tick-broadcast--Make-hrtimer-irqsafe.patch +++ /dev/null @@ -1,58 +0,0 @@ -Subject: tick/broadcast: Make broadcast hrtimer irqsafe -From: Thomas Gleixner -Date: Sat, 27 Feb 2016 10:47:10 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -Otherwise we end up with the following: - -|================================= -|[ INFO: inconsistent lock state ] -|4.4.2-rt7+ #5 Not tainted -|--------------------------------- -|inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. -|ktimersoftd/0/4 [HC0[0]:SC0[0]:HE1:SE1] takes: -| (tick_broadcast_lock){?.....}, at: [] tick_handle_oneshot_broadcast+0x58/0x27c -|{IN-HARDIRQ-W} state was registered at: -| [] mark_lock+0x19c/0x6a0 -| [] __lock_acquire+0xb1c/0x2100 -| [] lock_acquire+0xf8/0x230 -| [] _raw_spin_lock_irqsave+0x50/0x68 -| [] tick_broadcast_switch_to_oneshot+0x20/0x60 -| [] tick_switch_to_oneshot+0x64/0xd8 -| [] tick_init_highres+0x1c/0x24 -| [] hrtimer_run_queues+0x78/0x100 -| [] update_process_times+0x38/0x74 -| [] tick_periodic+0x60/0x140 -| [] tick_handle_periodic+0x2c/0x94 -| [] arch_timer_handler_phys+0x3c/0x48 -| [] handle_percpu_devid_irq+0x100/0x390 -| [] generic_handle_irq+0x34/0x4c -| [] __handle_domain_irq+0x90/0xf8 -| [] gic_handle_irq+0x5c/0xa4 -| [] el1_irq+0x6c/0xec -| [] default_idle_call+0x2c/0x44 -| [] cpu_startup_entry+0x3cc/0x410 -| [] rest_init+0x158/0x168 -| [] start_kernel+0x3a0/0x3b4 -| [<0000000080621000>] 0x80621000 -|irq event stamp: 18723 -|hardirqs last enabled at (18723): [] _raw_spin_unlock_irq+0x38/0x80 -|hardirqs last disabled at (18722): [] run_hrtimer_softirq+0x2c/0x2f4 -|softirqs last enabled at (0): [] copy_process.isra.50+0x300/0x16d4 -|softirqs last disabled at (0): [< (null)>] (null) - -Reported-by: Sebastian Andrzej Siewior -Signed-off-by: Thomas Gleixner ---- - kernel/time/tick-broadcast-hrtimer.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/kernel/time/tick-broadcast-hrtimer.c -+++ b/kernel/time/tick-broadcast-hrtimer.c -@@ -107,5 +107,6 @@ void tick_setup_hrtimer_broadcast(void) - { - hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - bctimer.function = bc_handler; -+ bctimer.irqsafe = true; - clockevents_register_device(&ce_broadcast_hrtimer); - } diff --git a/debian/patches/features/all/rt/time-hrtimer-avoid-schedule_work-with-interrupts-dis.patch b/debian/patches/features/all/rt/time-hrtimer-avoid-schedule_work-with-interrupts-dis.patch new file mode 100644 index 000000000..a879a2a1b --- /dev/null +++ b/debian/patches/features/all/rt/time-hrtimer-avoid-schedule_work-with-interrupts-dis.patch @@ -0,0 +1,53 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 15 Nov 2017 17:29:51 +0100 +Subject: [PATCH] time/hrtimer: avoid schedule_work() with interrupts disabled +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +The NOHZ code tries to schedule a workqueue with interrupts disabled. +Since this does not work -RT I am switching it to swork instead. + +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/time/timer.c | 15 +++++++++++---- + 1 file changed, 11 insertions(+), 4 deletions(-) + +--- a/kernel/time/timer.c ++++ b/kernel/time/timer.c +@@ -217,8 +217,7 @@ static DEFINE_PER_CPU(struct timer_base, + static DEFINE_STATIC_KEY_FALSE(timers_nohz_active); + static DEFINE_MUTEX(timer_keys_mutex); + +-static void timer_update_keys(struct work_struct *work); +-static DECLARE_WORK(timer_update_work, timer_update_keys); ++static struct swork_event timer_update_swork; + + #ifdef CONFIG_SMP + unsigned int sysctl_timer_migration = 1; +@@ -238,7 +237,7 @@ static void timers_update_migration(void + static inline void timers_update_migration(void) { } + #endif /* !CONFIG_SMP */ + +-static void timer_update_keys(struct work_struct *work) ++static void timer_update_keys(struct swork_event *event) + { + mutex_lock(&timer_keys_mutex); + timers_update_migration(); +@@ -248,9 +247,17 @@ static void timer_update_keys(struct wor + + void timers_update_nohz(void) + { +- schedule_work(&timer_update_work); ++ swork_queue(&timer_update_swork); + } + ++static __init int hrtimer_init_thread(void) ++{ ++ WARN_ON(swork_get()); ++ INIT_SWORK(&timer_update_swork, timer_update_keys); ++ return 0; ++} ++early_initcall(hrtimer_init_thread); ++ + int timer_migration_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) diff --git a/debian/patches/features/all/rt/timekeeping-split-jiffies-lock.patch b/debian/patches/features/all/rt/timekeeping-split-jiffies-lock.patch index 52c9c753d..dcd29d5ff 100644 --- a/debian/patches/features/all/rt/timekeeping-split-jiffies-lock.patch +++ b/debian/patches/features/all/rt/timekeeping-split-jiffies-lock.patch @@ -1,7 +1,7 @@ Subject: timekeeping: Split jiffies seqlock From: Thomas Gleixner Date: Thu, 14 Feb 2013 22:36:59 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Replace jiffies_lock seqlock with a simple seqcounter and a rawlock so it can be taken in atomic context on RT. @@ -115,7 +115,7 @@ Signed-off-by: Thomas Gleixner return period; } -@@ -672,10 +677,10 @@ static ktime_t tick_nohz_stop_sched_tick +@@ -684,10 +689,10 @@ static ktime_t tick_nohz_stop_sched_tick /* Read jiffies and the time when jiffies were updated last */ do { @@ -130,7 +130,7 @@ Signed-off-by: Thomas Gleixner if (rcu_needs_cpu(basemono, &next_rcu) || --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c -@@ -2323,8 +2323,10 @@ EXPORT_SYMBOL(hardpps); +@@ -2326,8 +2326,10 @@ EXPORT_SYMBOL(hardpps); */ void xtime_update(unsigned long ticks) { @@ -145,7 +145,7 @@ Signed-off-by: Thomas Gleixner } --- a/kernel/time/timekeeping.h +++ b/kernel/time/timekeeping.h -@@ -17,7 +17,8 @@ extern void timekeeping_resume(void); +@@ -18,7 +18,8 @@ extern void timekeeping_resume(void); extern void do_timer(unsigned long ticks); extern void update_wall_time(void); diff --git a/debian/patches/features/all/rt/timer-delay-waking-softirqs-from-the-jiffy-tick.patch b/debian/patches/features/all/rt/timer-delay-waking-softirqs-from-the-jiffy-tick.patch index f4676a366..7b1815def 100644 --- a/debian/patches/features/all/rt/timer-delay-waking-softirqs-from-the-jiffy-tick.patch +++ b/debian/patches/features/all/rt/timer-delay-waking-softirqs-from-the-jiffy-tick.patch @@ -1,7 +1,7 @@ From: Peter Zijlstra Date: Fri, 21 Aug 2009 11:56:45 +0200 Subject: timer: delay waking softirqs from the jiffy tick -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz People were complaining about broken balancing with the recent -rt series. @@ -59,7 +59,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/time/timer.c +++ b/kernel/time/timer.c -@@ -1601,13 +1601,13 @@ void update_process_times(int user_tick) +@@ -1636,13 +1636,13 @@ void update_process_times(int user_tick) /* Note: this timer irq context must be accounted for as well. */ account_process_tick(p, user_tick); diff --git a/debian/patches/features/all/rt/timer-fd-avoid-live-lock.patch b/debian/patches/features/all/rt/timer-fd-avoid-live-lock.patch index 5d117bec8..48a58a63a 100644 --- a/debian/patches/features/all/rt/timer-fd-avoid-live-lock.patch +++ b/debian/patches/features/all/rt/timer-fd-avoid-live-lock.patch @@ -1,7 +1,7 @@ Subject: timer-fd: Prevent live lock From: Thomas Gleixner Date: Wed, 25 Jan 2012 11:08:40 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz If hrtimer_try_to_cancel() requires a retry, then depending on the priority setting te retry loop might prevent timer callback completion diff --git a/debian/patches/features/all/rt/timer-hrtimer-check-properly-for-a-running-timer.patch b/debian/patches/features/all/rt/timer-hrtimer-check-properly-for-a-running-timer.patch deleted file mode 100644 index ebbcb2f9b..000000000 --- a/debian/patches/features/all/rt/timer-hrtimer-check-properly-for-a-running-timer.patch +++ /dev/null @@ -1,34 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Wed, 1 Mar 2017 16:30:49 +0100 -Subject: [PATCH] timer/hrtimer: check properly for a running timer -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -hrtimer_callback_running() checks only whether a timmer is running on a -CPU in hardirq-context. This is okay for !RT. For RT environment we move -most timers to the timer-softirq and therefore we therefore need to -check if the timer is running in the softirq context. - -Cc: stable-rt@vger.kernel.org -Reported-by: Alexander Gerasiov -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/hrtimer.h | 8 +++++++- - 1 file changed, 7 insertions(+), 1 deletion(-) - ---- a/include/linux/hrtimer.h -+++ b/include/linux/hrtimer.h -@@ -440,7 +440,13 @@ static inline int hrtimer_is_queued(stru - */ - static inline int hrtimer_callback_running(const struct hrtimer *timer) - { -- return timer->base->cpu_base->running == timer; -+ if (timer->base->cpu_base->running == timer) -+ return 1; -+#ifdef CONFIG_PREEMPT_RT_BASE -+ if (timer->base->cpu_base->running_soft == timer) -+ return 1; -+#endif -+ return 0; - } - - /* Forward a hrtimer so it expires after now: */ diff --git a/debian/patches/features/all/rt/timer-make-the-base-lock-raw.patch b/debian/patches/features/all/rt/timer-make-the-base-lock-raw.patch deleted file mode 100644 index 3f87fdda1..000000000 --- a/debian/patches/features/all/rt/timer-make-the-base-lock-raw.patch +++ /dev/null @@ -1,181 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Wed, 13 Jul 2016 18:22:23 +0200 -Subject: [PATCH] timer: make the base lock raw -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz - -The part where the base lock is held got more predictable / shorter after the -timer rework. One reason is the lack of re-cascading. -That means the lock can be made raw and held in IRQ context. - -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/time/timer.c | 48 ++++++++++++++++++++++++------------------------ - 1 file changed, 24 insertions(+), 24 deletions(-) - ---- a/kernel/time/timer.c -+++ b/kernel/time/timer.c -@@ -195,7 +195,7 @@ EXPORT_SYMBOL(jiffies_64); - #endif - - struct timer_base { -- spinlock_t lock; -+ raw_spinlock_t lock; - struct timer_list *running_timer; - unsigned long clk; - unsigned long next_expiry; -@@ -913,10 +913,10 @@ static struct timer_base *lock_timer_bas - - if (!(tf & TIMER_MIGRATING)) { - base = get_timer_base(tf); -- spin_lock_irqsave(&base->lock, *flags); -+ raw_spin_lock_irqsave(&base->lock, *flags); - if (timer->flags == tf) - return base; -- spin_unlock_irqrestore(&base->lock, *flags); -+ raw_spin_unlock_irqrestore(&base->lock, *flags); - } - cpu_relax(); - } -@@ -986,9 +986,9 @@ static inline int - /* See the comment in lock_timer_base() */ - timer->flags |= TIMER_MIGRATING; - -- spin_unlock(&base->lock); -+ raw_spin_unlock(&base->lock); - base = new_base; -- spin_lock(&base->lock); -+ raw_spin_lock(&base->lock); - WRITE_ONCE(timer->flags, - (timer->flags & ~TIMER_BASEMASK) | base->cpu); - } -@@ -1013,7 +1013,7 @@ static inline int - } - - out_unlock: -- spin_unlock_irqrestore(&base->lock, flags); -+ raw_spin_unlock_irqrestore(&base->lock, flags); - - return ret; - } -@@ -1106,16 +1106,16 @@ void add_timer_on(struct timer_list *tim - if (base != new_base) { - timer->flags |= TIMER_MIGRATING; - -- spin_unlock(&base->lock); -+ raw_spin_unlock(&base->lock); - base = new_base; -- spin_lock(&base->lock); -+ raw_spin_lock(&base->lock); - WRITE_ONCE(timer->flags, - (timer->flags & ~TIMER_BASEMASK) | cpu); - } - - debug_activate(timer, timer->expires); - internal_add_timer(base, timer); -- spin_unlock_irqrestore(&base->lock, flags); -+ raw_spin_unlock_irqrestore(&base->lock, flags); - } - EXPORT_SYMBOL_GPL(add_timer_on); - -@@ -1141,7 +1141,7 @@ int del_timer(struct timer_list *timer) - if (timer_pending(timer)) { - base = lock_timer_base(timer, &flags); - ret = detach_if_pending(timer, base, true); -- spin_unlock_irqrestore(&base->lock, flags); -+ raw_spin_unlock_irqrestore(&base->lock, flags); - } - - return ret; -@@ -1168,7 +1168,7 @@ int try_to_del_timer_sync(struct timer_l - if (base->running_timer != timer) - ret = detach_if_pending(timer, base, true); - -- spin_unlock_irqrestore(&base->lock, flags); -+ raw_spin_unlock_irqrestore(&base->lock, flags); - - return ret; - } -@@ -1299,13 +1299,13 @@ static void expire_timers(struct timer_b - data = timer->data; - - if (timer->flags & TIMER_IRQSAFE) { -- spin_unlock(&base->lock); -+ raw_spin_unlock(&base->lock); - call_timer_fn(timer, fn, data); -- spin_lock(&base->lock); -+ raw_spin_lock(&base->lock); - } else { -- spin_unlock_irq(&base->lock); -+ raw_spin_unlock_irq(&base->lock); - call_timer_fn(timer, fn, data); -- spin_lock_irq(&base->lock); -+ raw_spin_lock_irq(&base->lock); - } - } - } -@@ -1474,7 +1474,7 @@ u64 get_next_timer_interrupt(unsigned lo - if (cpu_is_offline(smp_processor_id())) - return expires; - -- spin_lock(&base->lock); -+ raw_spin_lock(&base->lock); - nextevt = __next_timer_interrupt(base); - is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA); - base->next_expiry = nextevt; -@@ -1502,7 +1502,7 @@ u64 get_next_timer_interrupt(unsigned lo - if ((expires - basem) > TICK_NSEC) - base->is_idle = true; - } -- spin_unlock(&base->lock); -+ raw_spin_unlock(&base->lock); - - return cmp_next_hrtimer_event(basem, expires); - } -@@ -1590,7 +1590,7 @@ static inline void __run_timers(struct t - if (!time_after_eq(jiffies, base->clk)) - return; - -- spin_lock_irq(&base->lock); -+ raw_spin_lock_irq(&base->lock); - - while (time_after_eq(jiffies, base->clk)) { - -@@ -1601,7 +1601,7 @@ static inline void __run_timers(struct t - expire_timers(base, heads + levels); - } - base->running_timer = NULL; -- spin_unlock_irq(&base->lock); -+ raw_spin_unlock_irq(&base->lock); - } - - /* -@@ -1786,16 +1786,16 @@ int timers_dead_cpu(unsigned int cpu) - * The caller is globally serialized and nobody else - * takes two locks at once, deadlock is not possible. - */ -- spin_lock_irq(&new_base->lock); -- spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); -+ raw_spin_lock_irq(&new_base->lock); -+ raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); - - BUG_ON(old_base->running_timer); - - for (i = 0; i < WHEEL_SIZE; i++) - migrate_timer_list(new_base, old_base->vectors + i); - -- spin_unlock(&old_base->lock); -- spin_unlock_irq(&new_base->lock); -+ raw_spin_unlock(&old_base->lock); -+ raw_spin_unlock_irq(&new_base->lock); - put_cpu_ptr(&timer_bases); - } - return 0; -@@ -1811,7 +1811,7 @@ static void __init init_timer_cpu(int cp - for (i = 0; i < NR_BASES; i++) { - base = per_cpu_ptr(&timer_bases[i], cpu); - base->cpu = cpu; -- spin_lock_init(&base->lock); -+ raw_spin_lock_init(&base->lock); - base->clk = jiffies; - } - } diff --git a/debian/patches/features/all/rt/timers-prepare-for-full-preemption.patch b/debian/patches/features/all/rt/timers-prepare-for-full-preemption.patch index fab492709..abca04795 100644 --- a/debian/patches/features/all/rt/timers-prepare-for-full-preemption.patch +++ b/debian/patches/features/all/rt/timers-prepare-for-full-preemption.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:29:34 -0500 Subject: timers: Prepare for full preemption -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz When softirqs can be preempted we need to make sure that cancelling the timer from the active thread can not deadlock vs. a running timer @@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/timer.h +++ b/include/linux/timer.h -@@ -198,7 +198,7 @@ extern void add_timer(struct timer_list +@@ -213,7 +213,7 @@ extern void add_timer(struct timer_list extern int try_to_del_timer_sync(struct timer_list *timer); @@ -29,7 +29,7 @@ Signed-off-by: Thomas Gleixner # define del_timer_sync(t) del_timer(t) --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -532,11 +532,14 @@ void resched_cpu(int cpu) +@@ -523,11 +523,14 @@ void resched_cpu(int cpu) */ int get_nohz_timer_target(void) { @@ -46,7 +46,7 @@ Signed-off-by: Thomas Gleixner rcu_read_lock(); for_each_domain(cpu, sd) { -@@ -555,6 +558,8 @@ int get_nohz_timer_target(void) +@@ -546,6 +549,8 @@ int get_nohz_timer_target(void) cpu = housekeeping_any_cpu(); unlock: rcu_read_unlock(); @@ -75,7 +75,7 @@ Signed-off-by: Thomas Gleixner unsigned long clk; unsigned long next_expiry; unsigned int cpu; -@@ -1119,6 +1123,33 @@ void add_timer_on(struct timer_list *tim +@@ -1148,6 +1152,33 @@ void add_timer_on(struct timer_list *tim } EXPORT_SYMBOL_GPL(add_timer_on); @@ -93,7 +93,7 @@ Signed-off-by: Thomas Gleixner + + base = get_timer_base(tf); + swait_event(base->wait_for_running_timer, -+ base->running_timer != timer); ++ base->running_timer != timer); +} + +# define wakeup_timer_waiters(b) swake_up_all(&(b)->wait_for_running_timer) @@ -107,9 +107,9 @@ Signed-off-by: Thomas Gleixner +#endif + /** - * del_timer - deactive a timer. + * del_timer - deactivate a timer. * @timer: the timer to be deactivated -@@ -1174,7 +1205,7 @@ int try_to_del_timer_sync(struct timer_l +@@ -1203,7 +1234,7 @@ int try_to_del_timer_sync(struct timer_l } EXPORT_SYMBOL(try_to_del_timer_sync); @@ -118,7 +118,7 @@ Signed-off-by: Thomas Gleixner /** * del_timer_sync - deactivate a timer and wait for the handler to finish. * @timer: the timer to be deactivated -@@ -1234,7 +1265,7 @@ int del_timer_sync(struct timer_list *ti +@@ -1263,7 +1294,7 @@ int del_timer_sync(struct timer_list *ti int ret = try_to_del_timer_sync(timer); if (ret >= 0) return ret; @@ -127,7 +127,7 @@ Signed-off-by: Thomas Gleixner } } EXPORT_SYMBOL(del_timer_sync); -@@ -1298,13 +1329,16 @@ static void expire_timers(struct timer_b +@@ -1327,13 +1358,16 @@ static void expire_timers(struct timer_b fn = timer->function; data = timer->data; @@ -145,7 +145,7 @@ Signed-off-by: Thomas Gleixner raw_spin_lock_irq(&base->lock); } } -@@ -1600,8 +1634,8 @@ static inline void __run_timers(struct t +@@ -1635,8 +1669,8 @@ static inline void __run_timers(struct t while (levels--) expire_timers(base, heads + levels); } @@ -155,7 +155,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -1813,6 +1847,9 @@ static void __init init_timer_cpu(int cp +@@ -1861,6 +1895,9 @@ static void __init init_timer_cpu(int cp base->cpu = cpu; raw_spin_lock_init(&base->lock); base->clk = jiffies; diff --git a/debian/patches/features/all/rt/tpm_tis-fix-stall-after-iowrite-s.patch b/debian/patches/features/all/rt/tpm_tis-fix-stall-after-iowrite-s.patch new file mode 100644 index 000000000..19897b44c --- /dev/null +++ b/debian/patches/features/all/rt/tpm_tis-fix-stall-after-iowrite-s.patch @@ -0,0 +1,78 @@ +From: Haris Okanovic +Date: Tue, 15 Aug 2017 15:13:08 -0500 +Subject: [PATCH] tpm_tis: fix stall after iowrite*()s +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +ioread8() operations to TPM MMIO addresses can stall the cpu when +immediately following a sequence of iowrite*()'s to the same region. + +For example, cyclitest measures ~400us latency spikes when a non-RT +usermode application communicates with an SPI-based TPM chip (Intel Atom +E3940 system, PREEMPT_RT_FULL kernel). The spikes are caused by a +stalling ioread8() operation following a sequence of 30+ iowrite8()s to +the same address. I believe this happens because the write sequence is +buffered (in cpu or somewhere along the bus), and gets flushed on the +first LOAD instruction (ioread*()) that follows. + +The enclosed change appears to fix this issue: read the TPM chip's +access register (status code) after every iowrite*() operation to +amortize the cost of flushing data to chip across multiple instructions. + +Signed-off-by: Haris Okanovic +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/char/tpm/tpm_tis.c | 29 +++++++++++++++++++++++++++-- + 1 file changed, 27 insertions(+), 2 deletions(-) + +--- a/drivers/char/tpm/tpm_tis.c ++++ b/drivers/char/tpm/tpm_tis.c +@@ -52,6 +52,31 @@ static inline struct tpm_tis_tcg_phy *to + return container_of(data, struct tpm_tis_tcg_phy, priv); + } + ++#ifdef CONFIG_PREEMPT_RT_FULL ++/* ++ * Flushes previous write operations to chip so that a subsequent ++ * ioread*()s won't stall a cpu. ++ */ ++static inline void tpm_tis_flush(void __iomem *iobase) ++{ ++ ioread8(iobase + TPM_ACCESS(0)); ++} ++#else ++#define tpm_tis_flush(iobase) do { } while (0) ++#endif ++ ++static inline void tpm_tis_iowrite8(u8 b, void __iomem *iobase, u32 addr) ++{ ++ iowrite8(b, iobase + addr); ++ tpm_tis_flush(iobase); ++} ++ ++static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr) ++{ ++ iowrite32(b, iobase + addr); ++ tpm_tis_flush(iobase); ++} ++ + static bool interrupts = true; + module_param(interrupts, bool, 0444); + MODULE_PARM_DESC(interrupts, "Enable interrupts"); +@@ -230,7 +255,7 @@ static int tpm_tcg_write_bytes(struct tp + tpm_platform_begin_xfer(); + + while (len--) +- iowrite8(*value++, phy->iobase + addr); ++ tpm_tis_iowrite8(*value++, phy->iobase, addr); + + tpm_platform_end_xfer(); + +@@ -269,7 +294,7 @@ static int tpm_tcg_write32(struct tpm_ti + + tpm_platform_begin_xfer(); + +- iowrite32(value, phy->iobase + addr); ++ tpm_tis_iowrite32(value, phy->iobase, addr); + + tpm_platform_end_xfer(); + diff --git a/debian/patches/features/all/rt/tracing-account-for-preempt-off-in-preempt_schedule.patch b/debian/patches/features/all/rt/tracing-account-for-preempt-off-in-preempt_schedule.patch index 23842d381..0095fa801 100644 --- a/debian/patches/features/all/rt/tracing-account-for-preempt-off-in-preempt_schedule.patch +++ b/debian/patches/features/all/rt/tracing-account-for-preempt-off-in-preempt_schedule.patch @@ -1,7 +1,7 @@ From: Steven Rostedt Date: Thu, 29 Sep 2011 12:24:30 -0500 Subject: tracing: Account for preempt off in preempt_schedule() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz The preempt_schedule() uses the preempt_disable_notrace() version because it can cause infinite recursion by the function tracer as @@ -28,7 +28,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -3654,7 +3654,16 @@ asmlinkage __visible void __sched notrac +@@ -3605,7 +3605,16 @@ asmlinkage __visible void __sched notrac * an infinite recursion. */ prev_ctx = exception_enter(); diff --git a/debian/patches/features/all/rt/tty-serial-8250-don-t-take-the-trylock-during-oops.patch b/debian/patches/features/all/rt/tty-serial-8250-don-t-take-the-trylock-during-oops.patch index ec6527dea..876ef09bf 100644 --- a/debian/patches/features/all/rt/tty-serial-8250-don-t-take-the-trylock-during-oops.patch +++ b/debian/patches/features/all/rt/tty-serial-8250-don-t-take-the-trylock-during-oops.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Mon, 11 Apr 2016 16:55:02 +0200 Subject: [PATCH] tty: serial: 8250: don't take the trylock during oops -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz An oops with irqs off (panic() from irqsafe hrtimer like the watchdog timer) will lead to a lockdep warning on each invocation and as such @@ -15,7 +15,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c -@@ -3179,10 +3179,8 @@ void serial8250_console_write(struct uar +@@ -3214,10 +3214,8 @@ void serial8250_console_write(struct uar serial8250_rpm_get(up); diff --git a/debian/patches/features/all/rt/upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch b/debian/patches/features/all/rt/upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch index 85c3aa174..72b3ad085 100644 --- a/debian/patches/features/all/rt/upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch +++ b/debian/patches/features/all/rt/upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch @@ -1,7 +1,7 @@ Subject: net: Remove preemption disabling in netif_rx() From: Priyanka Jain Date: Thu, 17 May 2012 09:35:11 +0530 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz 1)enqueue_to_backlog() (called from netif_rx) should be bind to a particluar CPU. This can be achieved by @@ -38,7 +38,7 @@ Signed-off-by: Thomas Gleixner --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -3799,7 +3799,7 @@ static int netif_rx_internal(struct sk_b +@@ -4015,7 +4015,7 @@ static int netif_rx_internal(struct sk_b struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu; @@ -47,7 +47,7 @@ Signed-off-by: Thomas Gleixner rcu_read_lock(); cpu = get_rps_cpu(skb->dev, skb, &rflow); -@@ -3809,14 +3809,14 @@ static int netif_rx_internal(struct sk_b +@@ -4025,14 +4025,14 @@ static int netif_rx_internal(struct sk_b ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); rcu_read_unlock(); diff --git a/debian/patches/features/all/rt/usb-use-_nort-in-giveback.patch b/debian/patches/features/all/rt/usb-use-_nort-in-giveback.patch index b98065758..00a16a887 100644 --- a/debian/patches/features/all/rt/usb-use-_nort-in-giveback.patch +++ b/debian/patches/features/all/rt/usb-use-_nort-in-giveback.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Fri, 8 Nov 2013 17:34:54 +0100 Subject: usb: Use _nort in giveback function -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Since commit 94dfd7ed ("USB: HCD: support giveback of URB in tasklet context") I see @@ -44,7 +44,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c -@@ -1764,9 +1764,9 @@ static void __usb_hcd_giveback_urb(struc +@@ -1775,9 +1775,9 @@ static void __usb_hcd_giveback_urb(struc * and no one may trigger the above deadlock situation when * running complete() in tasklet. */ diff --git a/debian/patches/features/all/rt/user-use-local-irq-nort.patch b/debian/patches/features/all/rt/user-use-local-irq-nort.patch index bb995507f..e74f69453 100644 --- a/debian/patches/features/all/rt/user-use-local-irq-nort.patch +++ b/debian/patches/features/all/rt/user-use-local-irq-nort.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Tue, 21 Jul 2009 23:06:05 +0200 Subject: core: Do not disable interrupts on RT in kernel/users.c -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Use the local_irq_*_nort variants to reduce latencies in RT. The code is serialized by the locks. No need to disable interrupts. diff --git a/debian/patches/features/all/rt/wait.h-include-atomic.h.patch b/debian/patches/features/all/rt/wait.h-include-atomic.h.patch index 0f681d359..cf2226095 100644 --- a/debian/patches/features/all/rt/wait.h-include-atomic.h.patch +++ b/debian/patches/features/all/rt/wait.h-include-atomic.h.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Mon, 28 Oct 2013 12:19:57 +0100 Subject: wait.h: include atomic.h -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz | CC init/main.o |In file included from include/linux/mmzone.h:9:0, @@ -23,11 +23,11 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/linux/wait.h +++ b/include/linux/wait.h -@@ -9,6 +9,7 @@ +@@ -10,6 +10,7 @@ #include #include +#include - typedef struct __wait_queue wait_queue_t; - typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key); + typedef struct wait_queue_entry wait_queue_entry_t; + diff --git a/debian/patches/features/all/rt/work-queue-work-around-irqsafe-timer-optimization.patch b/debian/patches/features/all/rt/work-queue-work-around-irqsafe-timer-optimization.patch index bdb39c09a..4a9d9dbbd 100644 --- a/debian/patches/features/all/rt/work-queue-work-around-irqsafe-timer-optimization.patch +++ b/debian/patches/features/all/rt/work-queue-work-around-irqsafe-timer-optimization.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Mon, 01 Jul 2013 11:02:42 +0200 Subject: workqueue: Prevent workqueue versus ata-piix livelock -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz An Intel i7 system regularly detected rcu_preempt stalls after the kernel was upgraded from 3.6-rt to 3.8-rt. When the stall happened, disk I/O was no @@ -122,7 +122,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include "workqueue_internal.h" -@@ -1281,7 +1282,7 @@ static int try_to_grab_pending(struct wo +@@ -1282,7 +1283,7 @@ static int try_to_grab_pending(struct wo local_unlock_irqrestore(pendingb_lock, *flags); if (work_is_canceling(work)) return -ENOENT; diff --git a/debian/patches/features/all/rt/work-simple-Simple-work-queue-implemenation.patch b/debian/patches/features/all/rt/work-simple-Simple-work-queue-implemenation.patch index e6a8e7fd6..879225fd5 100644 --- a/debian/patches/features/all/rt/work-simple-Simple-work-queue-implemenation.patch +++ b/debian/patches/features/all/rt/work-simple-Simple-work-queue-implemenation.patch @@ -1,7 +1,7 @@ From: Daniel Wagner Date: Fri, 11 Jul 2014 15:26:11 +0200 Subject: work-simple: Simple work queue implemenation -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Provides a framework for enqueuing callbacks from irq context PREEMPT_RT_FULL safe. The callbacks are executed in kthread context. @@ -45,13 +45,13 @@ Signed-off-by: Daniel Wagner +#endif /* _LINUX_SWORK_H */ --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile -@@ -17,7 +17,7 @@ endif +@@ -18,7 +18,7 @@ endif obj-y += core.o loadavg.o clock.o cputime.o - obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o --obj-y += wait.o swait.o completion.o idle.o -+obj-y += wait.o swait.o swork.o completion.o idle.o - obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o + obj-y += idle_task.o fair.o rt.o deadline.o +-obj-y += wait.o wait_bit.o swait.o completion.o idle.o ++obj-y += wait.o wait_bit.o swait.o swork.o completion.o idle.o + obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o obj-$(CONFIG_SCHEDSTATS) += stats.o --- /dev/null diff --git a/debian/patches/features/all/rt/workqueue-distangle-from-rq-lock.patch b/debian/patches/features/all/rt/workqueue-distangle-from-rq-lock.patch index a528653fc..959cfc547 100644 --- a/debian/patches/features/all/rt/workqueue-distangle-from-rq-lock.patch +++ b/debian/patches/features/all/rt/workqueue-distangle-from-rq-lock.patch @@ -22,17 +22,17 @@ Cc: Jens Axboe Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20110622174919.135236139@linutronix.de Signed-off-by: Thomas Gleixner -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz --- - kernel/sched/core.c | 86 +++++++------------------------------------- - kernel/workqueue.c | 52 +++++++++++--------------- + kernel/sched/core.c | 84 +++++++------------------------------------- + kernel/workqueue.c | 52 ++++++++++++--------------- kernel/workqueue_internal.h | 5 +- - 3 files changed, 41 insertions(+), 102 deletions(-) + 3 files changed, 41 insertions(+), 100 deletions(-) --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -1701,10 +1701,6 @@ static inline void ttwu_activate(struct +@@ -1716,10 +1716,6 @@ static inline void ttwu_activate(struct { activate_task(rq, p, en_flags); p->on_rq = TASK_ON_RQ_QUEUED; @@ -43,13 +43,13 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4 } /* -@@ -2157,58 +2153,6 @@ try_to_wake_up(struct task_struct *p, un +@@ -2160,56 +2156,6 @@ try_to_wake_up(struct task_struct *p, un } /** - * try_to_wake_up_local - try to wake up a local task with rq lock held - * @p: the thread to be awakened -- * @cookie: context's cookie for pinning +- * @rf: request-queue flags for pinning - * - * Put @p on the run-queue if it's not already there. The caller must - * ensure that this_rq() is locked, @p is bound to this_rq() and not @@ -72,11 +72,9 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4 - * disabled avoiding further scheduler activity on it and we've - * not yet picked a replacement task. - */ -- rq_unpin_lock(rq, rf); -- raw_spin_unlock(&rq->lock); +- rq_unlock(rq, rf); - raw_spin_lock(&p->pi_lock); -- raw_spin_lock(&rq->lock); -- rq_repin_lock(rq, rf); +- rq_relock(rq, rf); - } - - if (!(p->state & TASK_NORMAL)) @@ -89,7 +87,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4 - delayacct_blkio_end(); - atomic_dec(&rq->nr_iowait); - } -- ttwu_activate(rq, p, ENQUEUE_WAKEUP); +- ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK); - } - - ttwu_do_wakeup(rq, p, 0, rf); @@ -102,7 +100,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4 * wake_up_process - Wake up a specific process * @p: The process to be woken up. * -@@ -3496,21 +3440,6 @@ static void __sched notrace __schedule(b +@@ -3410,21 +3356,6 @@ static void __sched notrace __schedule(b atomic_inc(&rq->nr_iowait); delayacct_blkio_start(); } @@ -124,7 +122,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4 } switch_count = &prev->nvcsw; } -@@ -3575,6 +3504,14 @@ static inline void sched_submit_work(str +@@ -3500,6 +3431,14 @@ static inline void sched_submit_work(str { if (!tsk->state || tsk_is_pi_blocked(tsk)) return; @@ -139,7 +137,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4 /* * If we are going to sleep and we have plugged IO queued, * make sure to submit it to avoid deadlocks. -@@ -3583,6 +3520,12 @@ static inline void sched_submit_work(str +@@ -3508,6 +3447,12 @@ static inline void sched_submit_work(str blk_schedule_flush_plug(tsk); } @@ -152,7 +150,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4 asmlinkage __visible void __sched schedule(void) { struct task_struct *tsk = current; -@@ -3593,6 +3536,7 @@ asmlinkage __visible void __sched schedu +@@ -3518,6 +3463,7 @@ asmlinkage __visible void __sched schedu __schedule(false); sched_preempt_enable_no_resched(); } while (need_resched()); @@ -162,7 +160,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c -@@ -843,43 +843,32 @@ static void wake_up_worker(struct worker +@@ -844,43 +844,32 @@ static void wake_up_worker(struct worker } /** @@ -217,7 +215,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4 struct worker_pool *pool; /* -@@ -888,13 +877,15 @@ struct task_struct *wq_worker_sleeping(s +@@ -889,13 +878,15 @@ struct task_struct *wq_worker_sleeping(s * checking NOT_RUNNING. */ if (worker->flags & WORKER_NOT_RUNNING) @@ -237,7 +235,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4 /* * The counterpart of the following dec_and_test, implied mb, -@@ -908,9 +899,12 @@ struct task_struct *wq_worker_sleeping(s +@@ -909,9 +900,12 @@ struct task_struct *wq_worker_sleeping(s * lock is safe. */ if (atomic_dec_and_test(&pool->nr_running) && @@ -255,7 +253,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4 /** --- a/kernel/workqueue_internal.h +++ b/kernel/workqueue_internal.h -@@ -43,6 +43,7 @@ struct worker { +@@ -45,6 +45,7 @@ struct worker { unsigned long last_active; /* L: last active timestamp */ unsigned int flags; /* X: flags */ int id; /* I: worker id */ @@ -263,7 +261,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4 /* * Opaque string set with work_set_desc(). Printed out with task -@@ -68,7 +69,7 @@ static inline struct worker *current_wq_ +@@ -70,7 +71,7 @@ static inline struct worker *current_wq_ * Scheduler hooks for concurrency managed workqueue. Only to be used from * sched/core.c and workqueue.c. */ diff --git a/debian/patches/features/all/rt/workqueue-prevent-deadlock-stall.patch b/debian/patches/features/all/rt/workqueue-prevent-deadlock-stall.patch index 6f1b61ca9..cf4f42349 100644 --- a/debian/patches/features/all/rt/workqueue-prevent-deadlock-stall.patch +++ b/debian/patches/features/all/rt/workqueue-prevent-deadlock-stall.patch @@ -1,7 +1,7 @@ Subject: workqueue: Prevent deadlock/stall on RT From: Thomas Gleixner Date: Fri, 27 Jun 2014 16:24:52 +0200 (CEST) -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Austin reported a XFS deadlock/stall on RT where scheduled work gets never exececuted and tasks are waiting for each other for ever. @@ -44,7 +44,7 @@ Cc: Steven Rostedt --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -3626,9 +3626,8 @@ void __noreturn do_task_dead(void) +@@ -3475,9 +3475,8 @@ void __noreturn do_task_dead(void) static inline void sched_submit_work(struct task_struct *tsk) { @@ -55,7 +55,7 @@ Cc: Steven Rostedt /* * If a worker went to sleep, notify and ask workqueue whether * it wants to wake up a task to maintain concurrency. -@@ -3636,6 +3635,10 @@ static inline void sched_submit_work(str +@@ -3485,6 +3484,10 @@ static inline void sched_submit_work(str if (tsk->flags & PF_WQ_WORKER) wq_worker_sleeping(tsk); @@ -68,7 +68,7 @@ Cc: Steven Rostedt * make sure to submit it to avoid deadlocks. --- a/kernel/workqueue.c +++ b/kernel/workqueue.c -@@ -123,6 +123,11 @@ enum { +@@ -124,6 +124,11 @@ enum { * cpu or grabbing pool->lock is enough for read access. If * POOL_DISASSOCIATED is set, it's identical to L. * @@ -80,7 +80,7 @@ Cc: Steven Rostedt * A: pool->attach_mutex protected. * * PL: wq_pool_mutex protected. -@@ -430,6 +435,31 @@ static void workqueue_sysfs_unregister(s +@@ -431,6 +436,31 @@ static void workqueue_sysfs_unregister(s if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \ else @@ -112,7 +112,7 @@ Cc: Steven Rostedt #ifdef CONFIG_DEBUG_OBJECTS_WORK static struct debug_obj_descr work_debug_descr; -@@ -836,10 +866,16 @@ static struct worker *first_idle_worker( +@@ -837,10 +867,16 @@ static struct worker *first_idle_worker( */ static void wake_up_worker(struct worker_pool *pool) { @@ -130,7 +130,7 @@ Cc: Steven Rostedt } /** -@@ -868,7 +904,7 @@ void wq_worker_running(struct task_struc +@@ -869,7 +905,7 @@ void wq_worker_running(struct task_struc */ void wq_worker_sleeping(struct task_struct *task) { @@ -139,7 +139,7 @@ Cc: Steven Rostedt struct worker_pool *pool; /* -@@ -885,26 +921,18 @@ void wq_worker_sleeping(struct task_stru +@@ -886,26 +922,18 @@ void wq_worker_sleeping(struct task_stru return; worker->sleeping = 1; @@ -169,7 +169,7 @@ Cc: Steven Rostedt } /** -@@ -1635,7 +1663,9 @@ static void worker_enter_idle(struct wor +@@ -1636,7 +1664,9 @@ static void worker_enter_idle(struct wor worker->last_active = jiffies; /* idle_list is LIFO */ @@ -179,7 +179,7 @@ Cc: Steven Rostedt if (too_many_workers(pool) && !timer_pending(&pool->idle_timer)) mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); -@@ -1668,7 +1698,9 @@ static void worker_leave_idle(struct wor +@@ -1669,7 +1699,9 @@ static void worker_leave_idle(struct wor return; worker_clr_flags(worker, WORKER_IDLE); pool->nr_idle--; @@ -189,7 +189,7 @@ Cc: Steven Rostedt } static struct worker *alloc_worker(int node) -@@ -1834,7 +1866,9 @@ static void destroy_worker(struct worker +@@ -1835,7 +1867,9 @@ static void destroy_worker(struct worker pool->nr_workers--; pool->nr_idle--; diff --git a/debian/patches/features/all/rt/workqueue-use-locallock.patch b/debian/patches/features/all/rt/workqueue-use-locallock.patch index e133e272c..f3deef263 100644 --- a/debian/patches/features/all/rt/workqueue-use-locallock.patch +++ b/debian/patches/features/all/rt/workqueue-use-locallock.patch @@ -1,7 +1,7 @@ Subject: workqueue: Use local irq lock instead of irq disable regions From: Thomas Gleixner Date: Sun, 17 Jul 2011 21:42:26 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Use a local_irq_lock as a replacement for irq off regions. We keep the semantic of irq-off in regard to the pool->lock and remain preemptible. @@ -21,7 +21,7 @@ Signed-off-by: Thomas Gleixner #include "workqueue_internal.h" -@@ -350,6 +351,8 @@ EXPORT_SYMBOL_GPL(system_power_efficient +@@ -351,6 +352,8 @@ EXPORT_SYMBOL_GPL(system_power_efficient struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly; EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq); @@ -30,7 +30,7 @@ Signed-off-by: Thomas Gleixner static int worker_thread(void *__worker); static void workqueue_sysfs_unregister(struct workqueue_struct *wq); -@@ -1103,9 +1106,11 @@ static void put_pwq_unlocked(struct pool +@@ -1104,9 +1107,11 @@ static void put_pwq_unlocked(struct pool * As both pwqs and pools are RCU protected, the * following lock operations are safe. */ @@ -44,7 +44,7 @@ Signed-off-by: Thomas Gleixner } } -@@ -1209,7 +1214,7 @@ static int try_to_grab_pending(struct wo +@@ -1210,7 +1215,7 @@ static int try_to_grab_pending(struct wo struct worker_pool *pool; struct pool_workqueue *pwq; @@ -53,7 +53,7 @@ Signed-off-by: Thomas Gleixner /* try to steal the timer if it exists */ if (is_dwork) { -@@ -1273,7 +1278,7 @@ static int try_to_grab_pending(struct wo +@@ -1274,7 +1279,7 @@ static int try_to_grab_pending(struct wo spin_unlock(&pool->lock); fail: rcu_read_unlock(); @@ -62,7 +62,7 @@ Signed-off-by: Thomas Gleixner if (work_is_canceling(work)) return -ENOENT; cpu_relax(); -@@ -1378,7 +1383,7 @@ static void __queue_work(int cpu, struct +@@ -1379,7 +1384,7 @@ static void __queue_work(int cpu, struct * queued or lose PENDING. Grabbing PENDING and queueing should * happen with IRQ disabled. */ @@ -71,7 +71,7 @@ Signed-off-by: Thomas Gleixner debug_work_activate(work); -@@ -1484,14 +1489,14 @@ bool queue_work_on(int cpu, struct workq +@@ -1485,14 +1490,14 @@ bool queue_work_on(int cpu, struct workq bool ret = false; unsigned long flags; @@ -88,7 +88,7 @@ Signed-off-by: Thomas Gleixner return ret; } EXPORT_SYMBOL(queue_work_on); -@@ -1500,8 +1505,11 @@ void delayed_work_timer_fn(unsigned long +@@ -1501,8 +1506,11 @@ void delayed_work_timer_fn(unsigned long { struct delayed_work *dwork = (struct delayed_work *)__data; @@ -100,7 +100,7 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(delayed_work_timer_fn); -@@ -1557,14 +1565,14 @@ bool queue_delayed_work_on(int cpu, stru +@@ -1558,14 +1566,14 @@ bool queue_delayed_work_on(int cpu, stru unsigned long flags; /* read the comment in __queue_work() */ @@ -117,7 +117,7 @@ Signed-off-by: Thomas Gleixner return ret; } EXPORT_SYMBOL(queue_delayed_work_on); -@@ -1599,7 +1607,7 @@ bool mod_delayed_work_on(int cpu, struct +@@ -1600,7 +1608,7 @@ bool mod_delayed_work_on(int cpu, struct if (likely(ret >= 0)) { __queue_delayed_work(cpu, wq, dwork, delay); @@ -126,7 +126,7 @@ Signed-off-by: Thomas Gleixner } /* -ENOENT from try_to_grab_pending() becomes %true */ -@@ -2923,7 +2931,7 @@ static bool __cancel_work_timer(struct w +@@ -2950,7 +2958,7 @@ static bool __cancel_work_timer(struct w /* tell other tasks trying to grab @work to back off */ mark_work_canceling(work); @@ -135,7 +135,7 @@ Signed-off-by: Thomas Gleixner /* * This allows canceling during early boot. We know that @work -@@ -2984,10 +2992,10 @@ EXPORT_SYMBOL_GPL(cancel_work_sync); +@@ -3011,10 +3019,10 @@ EXPORT_SYMBOL_GPL(cancel_work_sync); */ bool flush_delayed_work(struct delayed_work *dwork) { @@ -148,7 +148,7 @@ Signed-off-by: Thomas Gleixner return flush_work(&dwork->work); } EXPORT_SYMBOL(flush_delayed_work); -@@ -3005,7 +3013,7 @@ static bool __cancel_work(struct work_st +@@ -3032,7 +3040,7 @@ static bool __cancel_work(struct work_st return false; set_work_pool_and_clear_pending(work, get_work_pool_id(work)); diff --git a/debian/patches/features/all/rt/workqueue-use-rcu.patch b/debian/patches/features/all/rt/workqueue-use-rcu.patch index 6e1f2f489..666ae49e1 100644 --- a/debian/patches/features/all/rt/workqueue-use-rcu.patch +++ b/debian/patches/features/all/rt/workqueue-use-rcu.patch @@ -1,7 +1,7 @@ Subject: workqueue: Use normal rcu From: Thomas Gleixner Date: Wed, 24 Jul 2013 15:26:54 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz There is no need for sched_rcu. The undocumented reason why sched_rcu is used is to avoid a few explicit rcu_read_lock()/unlock() pairs by @@ -15,7 +15,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/workqueue.c +++ b/kernel/workqueue.c -@@ -125,7 +125,7 @@ enum { +@@ -126,7 +126,7 @@ enum { * * PL: wq_pool_mutex protected. * @@ -24,7 +24,7 @@ Signed-off-by: Thomas Gleixner * * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads. * -@@ -134,7 +134,7 @@ enum { +@@ -135,7 +135,7 @@ enum { * * WQ: wq->mutex protected. * @@ -51,7 +51,7 @@ Signed-off-by: Thomas Gleixner * determined without grabbing wq->mutex. */ struct work_struct unbound_release_work; -@@ -357,20 +357,20 @@ static void workqueue_sysfs_unregister(s +@@ -358,20 +358,20 @@ static void workqueue_sysfs_unregister(s #include #define assert_rcu_or_pool_mutex() \ @@ -78,7 +78,7 @@ Signed-off-by: Thomas Gleixner #define for_each_cpu_worker_pool(pool, cpu) \ for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \ -@@ -382,7 +382,7 @@ static void workqueue_sysfs_unregister(s +@@ -383,7 +383,7 @@ static void workqueue_sysfs_unregister(s * @pool: iteration cursor * @pi: integer used for iteration * @@ -87,7 +87,7 @@ Signed-off-by: Thomas Gleixner * locked. If the pool needs to be used beyond the locking in effect, the * caller is responsible for guaranteeing that the pool stays online. * -@@ -414,7 +414,7 @@ static void workqueue_sysfs_unregister(s +@@ -415,7 +415,7 @@ static void workqueue_sysfs_unregister(s * @pwq: iteration cursor * @wq: the target workqueue * @@ -96,7 +96,7 @@ Signed-off-by: Thomas Gleixner * If the pwq needs to be used beyond the locking in effect, the caller is * responsible for guaranteeing that the pwq stays online. * -@@ -550,7 +550,7 @@ static int worker_pool_assign_id(struct +@@ -551,7 +551,7 @@ static int worker_pool_assign_id(struct * @wq: the target workqueue * @node: the node ID * @@ -105,7 +105,7 @@ Signed-off-by: Thomas Gleixner * read locked. * If the pwq needs to be used beyond the locking in effect, the caller is * responsible for guaranteeing that the pwq stays online. -@@ -694,8 +694,8 @@ static struct pool_workqueue *get_work_p +@@ -695,8 +695,8 @@ static struct pool_workqueue *get_work_p * @work: the work item of interest * * Pools are created and destroyed under wq_pool_mutex, and allows read @@ -116,7 +116,7 @@ Signed-off-by: Thomas Gleixner * * All fields of the returned pool are accessible as long as the above * mentioned locking is in effect. If the returned pool needs to be used -@@ -1100,7 +1100,7 @@ static void put_pwq_unlocked(struct pool +@@ -1101,7 +1101,7 @@ static void put_pwq_unlocked(struct pool { if (pwq) { /* @@ -125,7 +125,7 @@ Signed-off-by: Thomas Gleixner * following lock operations are safe. */ spin_lock_irq(&pwq->pool->lock); -@@ -1228,6 +1228,7 @@ static int try_to_grab_pending(struct wo +@@ -1229,6 +1229,7 @@ static int try_to_grab_pending(struct wo if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) return 0; @@ -133,7 +133,7 @@ Signed-off-by: Thomas Gleixner /* * The queueing is in progress, or it is already queued. Try to * steal it from ->worklist without clearing WORK_STRUCT_PENDING. -@@ -1266,10 +1267,12 @@ static int try_to_grab_pending(struct wo +@@ -1267,10 +1268,12 @@ static int try_to_grab_pending(struct wo set_work_pool_and_keep_pending(work, pool->id); spin_unlock(&pool->lock); @@ -146,7 +146,7 @@ Signed-off-by: Thomas Gleixner local_irq_restore(*flags); if (work_is_canceling(work)) return -ENOENT; -@@ -1383,6 +1386,7 @@ static void __queue_work(int cpu, struct +@@ -1384,6 +1387,7 @@ static void __queue_work(int cpu, struct if (unlikely(wq->flags & __WQ_DRAINING) && WARN_ON_ONCE(!is_chained_work(wq))) return; @@ -154,7 +154,7 @@ Signed-off-by: Thomas Gleixner retry: if (req_cpu == WORK_CPU_UNBOUND) cpu = wq_select_unbound_cpu(raw_smp_processor_id()); -@@ -1439,10 +1443,8 @@ static void __queue_work(int cpu, struct +@@ -1440,10 +1444,8 @@ static void __queue_work(int cpu, struct /* pwq determined, queue */ trace_workqueue_queue_work(req_cpu, pwq, work); @@ -167,7 +167,7 @@ Signed-off-by: Thomas Gleixner pwq->nr_in_flight[pwq->work_color]++; work_flags = work_color_to_flags(pwq->work_color); -@@ -1460,7 +1462,9 @@ static void __queue_work(int cpu, struct +@@ -1461,7 +1463,9 @@ static void __queue_work(int cpu, struct insert_work(pwq, work, worklist, work_flags); @@ -177,7 +177,7 @@ Signed-off-by: Thomas Gleixner } /** -@@ -2789,14 +2793,14 @@ static bool start_flush_work(struct work +@@ -2814,14 +2818,14 @@ static bool start_flush_work(struct work might_sleep(); @@ -195,10 +195,10 @@ Signed-off-by: Thomas Gleixner /* see the comment in try_to_grab_pending() with the same code */ pwq = get_work_pwq(work); if (pwq) { -@@ -2825,10 +2829,11 @@ static bool start_flush_work(struct work - else - lock_map_acquire_read(&pwq->wq->lockdep_map); - lock_map_release(&pwq->wq->lockdep_map); +@@ -2852,10 +2856,11 @@ static bool start_flush_work(struct work + lock_map_acquire(&pwq->wq->lockdep_map); + lock_map_release(&pwq->wq->lockdep_map); + } - + rcu_read_unlock(); return true; @@ -208,7 +208,7 @@ Signed-off-by: Thomas Gleixner return false; } -@@ -3258,7 +3263,7 @@ static void rcu_free_pool(struct rcu_hea +@@ -3283,7 +3288,7 @@ static void rcu_free_pool(struct rcu_hea * put_unbound_pool - put a worker_pool * @pool: worker_pool to put * @@ -217,7 +217,7 @@ Signed-off-by: Thomas Gleixner * safe manner. get_unbound_pool() calls this function on its failure path * and this function should be able to release pools which went through, * successfully or not, init_worker_pool(). -@@ -3312,8 +3317,8 @@ static void put_unbound_pool(struct work +@@ -3337,8 +3342,8 @@ static void put_unbound_pool(struct work del_timer_sync(&pool->idle_timer); del_timer_sync(&pool->mayday_timer); @@ -228,7 +228,7 @@ Signed-off-by: Thomas Gleixner } /** -@@ -3420,14 +3425,14 @@ static void pwq_unbound_release_workfn(s +@@ -3445,14 +3450,14 @@ static void pwq_unbound_release_workfn(s put_unbound_pool(pool); mutex_unlock(&wq_pool_mutex); @@ -245,7 +245,7 @@ Signed-off-by: Thomas Gleixner } /** -@@ -4081,7 +4086,7 @@ void destroy_workqueue(struct workqueue_ +@@ -4127,7 +4132,7 @@ void destroy_workqueue(struct workqueue_ * The base ref is never dropped on per-cpu pwqs. Directly * schedule RCU free. */ @@ -254,7 +254,7 @@ Signed-off-by: Thomas Gleixner } else { /* * We're the sole accessor of @wq at this point. Directly -@@ -4174,7 +4179,8 @@ bool workqueue_congested(int cpu, struct +@@ -4221,7 +4226,8 @@ bool workqueue_congested(int cpu, struct struct pool_workqueue *pwq; bool ret; @@ -264,7 +264,7 @@ Signed-off-by: Thomas Gleixner if (cpu == WORK_CPU_UNBOUND) cpu = smp_processor_id(); -@@ -4185,7 +4191,8 @@ bool workqueue_congested(int cpu, struct +@@ -4232,7 +4238,8 @@ bool workqueue_congested(int cpu, struct pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu)); ret = !list_empty(&pwq->delayed_works); @@ -274,7 +274,7 @@ Signed-off-by: Thomas Gleixner return ret; } -@@ -4211,15 +4218,15 @@ unsigned int work_busy(struct work_struc +@@ -4258,15 +4265,15 @@ unsigned int work_busy(struct work_struc if (work_pending(work)) ret |= WORK_BUSY_PENDING; @@ -294,7 +294,7 @@ Signed-off-by: Thomas Gleixner return ret; } -@@ -4408,7 +4415,7 @@ void show_workqueue_state(void) +@@ -4455,7 +4462,7 @@ void show_workqueue_state(void) unsigned long flags; int pi; @@ -303,7 +303,7 @@ Signed-off-by: Thomas Gleixner pr_info("Showing busy workqueues and worker pools:\n"); -@@ -4461,7 +4468,7 @@ void show_workqueue_state(void) +@@ -4508,7 +4515,7 @@ void show_workqueue_state(void) spin_unlock_irqrestore(&pool->lock, flags); } @@ -312,7 +312,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -4822,16 +4829,16 @@ bool freeze_workqueues_busy(void) +@@ -4869,16 +4876,16 @@ bool freeze_workqueues_busy(void) * nr_active is monotonically decreasing. It's safe * to peek without lock. */ @@ -332,7 +332,7 @@ Signed-off-by: Thomas Gleixner } out_unlock: mutex_unlock(&wq_pool_mutex); -@@ -5021,7 +5028,8 @@ static ssize_t wq_pool_ids_show(struct d +@@ -5068,7 +5075,8 @@ static ssize_t wq_pool_ids_show(struct d const char *delim = ""; int node, written = 0; @@ -342,7 +342,7 @@ Signed-off-by: Thomas Gleixner for_each_node(node) { written += scnprintf(buf + written, PAGE_SIZE - written, "%s%d:%d", delim, node, -@@ -5029,7 +5037,8 @@ static ssize_t wq_pool_ids_show(struct d +@@ -5076,7 +5084,8 @@ static ssize_t wq_pool_ids_show(struct d delim = " "; } written += scnprintf(buf + written, PAGE_SIZE - written, "\n"); diff --git a/debian/patches/features/all/rt/x86-UV-raw_spinlock-conversion.patch b/debian/patches/features/all/rt/x86-UV-raw_spinlock-conversion.patch index a29f990ad..94cb6917a 100644 --- a/debian/patches/features/all/rt/x86-UV-raw_spinlock-conversion.patch +++ b/debian/patches/features/all/rt/x86-UV-raw_spinlock-conversion.patch @@ -1,7 +1,7 @@ From: Mike Galbraith Date: Sun, 2 Nov 2014 08:31:37 +0100 Subject: x86: UV: raw_spinlock conversion -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Shrug. Lots of hobbyists have a beast in their basement, right? @@ -16,7 +16,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h -@@ -624,9 +624,9 @@ struct bau_control { +@@ -643,9 +643,9 @@ struct bau_control { cycles_t send_message; cycles_t period_end; cycles_t period_time; @@ -29,7 +29,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* tunables */ int max_concurr; int max_concurr_const; -@@ -815,15 +815,15 @@ static inline int atom_asr(short i, stru +@@ -847,15 +847,15 @@ static inline int atom_asr(short i, stru * to be lowered below the current 'v'. atomic_add_unless can only stop * on equal. */ @@ -51,7 +51,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c -@@ -747,9 +747,9 @@ static void destination_plugged(struct b +@@ -740,9 +740,9 @@ static void destination_plugged(struct b quiesce_local_uvhub(hmaster); @@ -63,7 +63,7 @@ Signed-off-by: Sebastian Andrzej Siewior end_uvhub_quiesce(hmaster); -@@ -769,9 +769,9 @@ static void destination_timeout(struct b +@@ -762,9 +762,9 @@ static void destination_timeout(struct b quiesce_local_uvhub(hmaster); @@ -75,7 +75,7 @@ Signed-off-by: Sebastian Andrzej Siewior end_uvhub_quiesce(hmaster); -@@ -792,7 +792,7 @@ static void disable_for_period(struct ba +@@ -785,7 +785,7 @@ static void disable_for_period(struct ba cycles_t tm1; hmaster = bcp->uvhub_master; @@ -84,7 +84,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!bcp->baudisabled) { stat->s_bau_disabled++; tm1 = get_cycles(); -@@ -805,7 +805,7 @@ static void disable_for_period(struct ba +@@ -798,7 +798,7 @@ static void disable_for_period(struct ba } } } @@ -93,7 +93,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static void count_max_concurr(int stat, struct bau_control *bcp, -@@ -868,7 +868,7 @@ static void record_send_stats(cycles_t t +@@ -861,7 +861,7 @@ static void record_send_stats(cycles_t t */ static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat) { @@ -102,7 +102,7 @@ Signed-off-by: Sebastian Andrzej Siewior atomic_t *v; v = &hmaster->active_descriptor_count; -@@ -1001,7 +1001,7 @@ static int check_enable(struct bau_contr +@@ -995,7 +995,7 @@ static int check_enable(struct bau_contr struct bau_control *hmaster; hmaster = bcp->uvhub_master; @@ -111,7 +111,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) { stat->s_bau_reenabled++; for_each_present_cpu(tcpu) { -@@ -1013,10 +1013,10 @@ static int check_enable(struct bau_contr +@@ -1007,10 +1007,10 @@ static int check_enable(struct bau_contr tbcp->period_giveups = 0; } } @@ -124,7 +124,7 @@ Signed-off-by: Sebastian Andrzej Siewior return -1; } -@@ -1938,9 +1938,9 @@ static void __init init_per_cpu_tunables +@@ -1941,9 +1941,9 @@ static void __init init_per_cpu_tunables bcp->cong_reps = congested_reps; bcp->disabled_period = sec_2_cycles(disabled_period); bcp->giveup_limit = giveup_limit; diff --git a/debian/patches/features/all/rt/x86-crypto-reduce-preempt-disabled-regions.patch b/debian/patches/features/all/rt/x86-crypto-reduce-preempt-disabled-regions.patch index 3b60e6658..439bae962 100644 --- a/debian/patches/features/all/rt/x86-crypto-reduce-preempt-disabled-regions.patch +++ b/debian/patches/features/all/rt/x86-crypto-reduce-preempt-disabled-regions.patch @@ -1,7 +1,7 @@ Subject: x86: crypto: Reduce preempt disabled regions From: Peter Zijlstra Date: Mon, 14 Nov 2011 18:19:27 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Restrict the preempt disabled regions to the actual floating point operations and enable preemption for the administrative actions. @@ -19,7 +19,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c -@@ -374,14 +374,14 @@ static int ecb_encrypt(struct skcipher_r +@@ -386,14 +386,14 @@ static int ecb_encrypt(struct skcipher_r err = skcipher_walk_virt(&walk, req, true); @@ -36,7 +36,7 @@ Signed-off-by: Thomas Gleixner return err; } -@@ -396,14 +396,14 @@ static int ecb_decrypt(struct skcipher_r +@@ -408,14 +408,14 @@ static int ecb_decrypt(struct skcipher_r err = skcipher_walk_virt(&walk, req, true); @@ -53,7 +53,7 @@ Signed-off-by: Thomas Gleixner return err; } -@@ -418,14 +418,14 @@ static int cbc_encrypt(struct skcipher_r +@@ -430,14 +430,14 @@ static int cbc_encrypt(struct skcipher_r err = skcipher_walk_virt(&walk, req, true); @@ -70,7 +70,7 @@ Signed-off-by: Thomas Gleixner return err; } -@@ -440,14 +440,14 @@ static int cbc_decrypt(struct skcipher_r +@@ -452,14 +452,14 @@ static int cbc_decrypt(struct skcipher_r err = skcipher_walk_virt(&walk, req, true); @@ -87,7 +87,7 @@ Signed-off-by: Thomas Gleixner return err; } -@@ -497,18 +497,20 @@ static int ctr_crypt(struct skcipher_req +@@ -509,18 +509,20 @@ static int ctr_crypt(struct skcipher_req err = skcipher_walk_virt(&walk, req, true); diff --git a/debian/patches/features/all/rt/x86-highmem-add-a-already-used-pte-check.patch b/debian/patches/features/all/rt/x86-highmem-add-a-already-used-pte-check.patch index dd19ee842..55837d9fd 100644 --- a/debian/patches/features/all/rt/x86-highmem-add-a-already-used-pte-check.patch +++ b/debian/patches/features/all/rt/x86-highmem-add-a-already-used-pte-check.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Mon, 11 Mar 2013 17:09:55 +0100 Subject: x86/highmem: Add a "already used pte" check -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz This is a copy from kmap_atomic_prot(). diff --git a/debian/patches/features/all/rt/x86-io-apic-migra-no-unmask.patch b/debian/patches/features/all/rt/x86-io-apic-migra-no-unmask.patch index bbac5f9b0..5fb22e515 100644 --- a/debian/patches/features/all/rt/x86-io-apic-migra-no-unmask.patch +++ b/debian/patches/features/all/rt/x86-io-apic-migra-no-unmask.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:29:27 -0500 Subject: x86/ioapic: Do not unmask io_apic when interrupt is in progress -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz With threaded interrupts we might see an interrupt in progress on migration. Do not unmask it when this is the case. @@ -16,7 +16,7 @@ xXx --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c -@@ -1711,7 +1711,8 @@ static bool io_apic_level_ack_pending(st +@@ -1690,7 +1690,8 @@ static bool io_apic_level_ack_pending(st static inline bool ioapic_irqd_mask(struct irq_data *data) { /* If we are moving the irq we need to mask it */ diff --git a/debian/patches/features/all/rt/x86-kvm-require-const-tsc-for-rt.patch b/debian/patches/features/all/rt/x86-kvm-require-const-tsc-for-rt.patch index db05e7a99..a9e94099f 100644 --- a/debian/patches/features/all/rt/x86-kvm-require-const-tsc-for-rt.patch +++ b/debian/patches/features/all/rt/x86-kvm-require-const-tsc-for-rt.patch @@ -1,7 +1,7 @@ Subject: x86: kvm Require const tsc for RT From: Thomas Gleixner Date: Sun, 06 Nov 2011 12:26:18 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Non constant TSC is a nightmare on bare metal already, but with virtualization it becomes a complete disaster because the workarounds @@ -15,7 +15,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -6118,6 +6118,13 @@ int kvm_arch_init(void *opaque) +@@ -6133,6 +6133,13 @@ int kvm_arch_init(void *opaque) goto out; } diff --git a/debian/patches/features/all/rt/x86-mce-timer-hrtimer.patch b/debian/patches/features/all/rt/x86-mce-timer-hrtimer.patch index 8c2b3be91..7a7634e1c 100644 --- a/debian/patches/features/all/rt/x86-mce-timer-hrtimer.patch +++ b/debian/patches/features/all/rt/x86-mce-timer-hrtimer.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Mon, 13 Dec 2010 16:33:39 +0100 Subject: x86: Convert mce timer to hrtimer -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz mce_timer is started in atomic contexts of cpu bringup. This results in might_sleep() warnings on RT. Convert mce_timer to a hrtimer to @@ -27,7 +27,7 @@ fold in: --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c -@@ -41,6 +41,7 @@ +@@ -42,6 +42,7 @@ #include #include #include @@ -35,7 +35,7 @@ fold in: #include #include -@@ -1315,7 +1316,7 @@ int memory_failure(unsigned long pfn, in +@@ -1345,7 +1346,7 @@ int memory_failure(unsigned long pfn, in static unsigned long check_interval = INITIAL_CHECK_INTERVAL; static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ @@ -44,7 +44,7 @@ fold in: static unsigned long mce_adjust_timer_default(unsigned long interval) { -@@ -1324,27 +1325,19 @@ static unsigned long mce_adjust_timer_de +@@ -1354,27 +1355,19 @@ static unsigned long mce_adjust_timer_de static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; @@ -78,7 +78,7 @@ fold in: iv = __this_cpu_read(mce_next_interval); if (mce_available(this_cpu_ptr(&cpu_info))) { -@@ -1367,7 +1360,11 @@ static void mce_timer_fn(unsigned long d +@@ -1397,7 +1390,11 @@ static void mce_timer_fn(unsigned long d done: __this_cpu_write(mce_next_interval, iv); @@ -91,7 +91,7 @@ fold in: } /* -@@ -1375,7 +1372,7 @@ static void mce_timer_fn(unsigned long d +@@ -1405,7 +1402,7 @@ static void mce_timer_fn(unsigned long d */ void mce_timer_kick(unsigned long interval) { @@ -100,7 +100,7 @@ fold in: unsigned long iv = __this_cpu_read(mce_next_interval); __start_timer(t, interval); -@@ -1390,7 +1387,7 @@ static void mce_timer_delete_all(void) +@@ -1420,7 +1417,7 @@ static void mce_timer_delete_all(void) int cpu; for_each_online_cpu(cpu) @@ -108,8 +108,8 @@ fold in: + hrtimer_cancel(&per_cpu(mce_timer, cpu)); } - static void mce_do_trigger(struct work_struct *work) -@@ -1725,7 +1722,7 @@ static void __mcheck_cpu_clear_vendor(st + /* +@@ -1749,7 +1746,7 @@ static void __mcheck_cpu_clear_vendor(st } } @@ -118,7 +118,7 @@ fold in: { unsigned long iv = check_interval * HZ; -@@ -1738,18 +1735,19 @@ static void mce_start_timer(struct timer +@@ -1762,18 +1759,19 @@ static void mce_start_timer(struct timer static void __mcheck_cpu_setup_timer(void) { @@ -144,7 +144,7 @@ fold in: mce_start_timer(t); } -@@ -2509,7 +2507,7 @@ static int mce_cpu_dead(unsigned int cpu +@@ -2270,7 +2268,7 @@ static int mce_cpu_dead(unsigned int cpu static int mce_cpu_online(unsigned int cpu) { @@ -153,7 +153,7 @@ fold in: int ret; mce_device_create(cpu); -@@ -2526,10 +2524,10 @@ static int mce_cpu_online(unsigned int c +@@ -2287,10 +2285,10 @@ static int mce_cpu_online(unsigned int c static int mce_cpu_pre_down(unsigned int cpu) { diff --git a/debian/patches/features/all/rt/x86-mce-use-swait-queue-for-mce-wakeups.patch b/debian/patches/features/all/rt/x86-mce-use-swait-queue-for-mce-wakeups.patch index 129d53275..543a4729a 100644 --- a/debian/patches/features/all/rt/x86-mce-use-swait-queue-for-mce-wakeups.patch +++ b/debian/patches/features/all/rt/x86-mce-use-swait-queue-for-mce-wakeups.patch @@ -1,7 +1,7 @@ Subject: x86/mce: use swait queue for mce wakeups From: Steven Rostedt Date: Fri, 27 Feb 2015 15:20:37 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz We had a customer report a lockup on a 3.0-rt kernel that had the following backtrace: @@ -56,43 +56,31 @@ Signed-off-by: Sebastian Andrzej Siewior [wagi: use work-simple framework to defer work to a kthread] Signed-off-by: Daniel Wagner --- - arch/x86/kernel/cpu/mcheck/mce.c | 68 ++++++++++++++++++++++++++++++++------- - 1 file changed, 56 insertions(+), 12 deletions(-) + arch/x86/kernel/cpu/mcheck/dev-mcelog.c | 37 +++++++++++++++++++++++++++++--- + 1 file changed, 34 insertions(+), 3 deletions(-) ---- a/arch/x86/kernel/cpu/mcheck/mce.c -+++ b/arch/x86/kernel/cpu/mcheck/mce.c -@@ -42,6 +42,7 @@ - #include - #include - #include +--- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c ++++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c +@@ -14,6 +14,7 @@ + #include + #include + #include +#include - #include - #include -@@ -1397,6 +1398,56 @@ static void mce_do_trigger(struct work_s + #include "mce-internal.h" + +@@ -86,13 +87,43 @@ static void mce_do_trigger(struct work_s static DECLARE_WORK(mce_trigger_work, mce_do_trigger); -+static void __mce_notify_work(struct swork_event *event) -+{ -+ /* Not more than two messages every minute */ -+ static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); -+ -+ /* wake processes polling /dev/mcelog */ -+ wake_up_interruptible(&mce_chrdev_wait); -+ -+ /* -+ * There is no risk of missing notifications because -+ * work_pending is always cleared before the function is -+ * executed. -+ */ -+ if (mce_helper[0] && !work_pending(&mce_trigger_work)) -+ schedule_work(&mce_trigger_work); -+ -+ if (__ratelimit(&ratelimit)) -+ pr_info(HW_ERR "Machine check events logged\n"); -+} -+ +- +-void mce_work_trigger(void) ++static void __mce_work_trigger(struct swork_event *event) + { + if (mce_helper[0]) + schedule_work(&mce_trigger_work); + } + +#ifdef CONFIG_PREEMPT_RT_FULL +static bool notify_work_ready __read_mostly; +static struct swork_event notify_work; @@ -105,56 +93,34 @@ Signed-off-by: Daniel Wagner + if (err) + return err; + -+ INIT_SWORK(¬ify_work, __mce_notify_work); ++ INIT_SWORK(¬ify_work, __mce_work_trigger); + notify_work_ready = true; + return 0; +} + -+static void mce_notify_work(void) ++void mce_work_trigger(void) +{ + if (notify_work_ready) + swork_queue(¬ify_work); +} ++ +#else -+static void mce_notify_work(void) ++void mce_work_trigger(void) +{ -+ __mce_notify_work(NULL); ++ __mce_work_trigger(NULL); +} +static inline int mce_notify_work_init(void) { return 0; } +#endif + - /* - * Notify the user(s) about new machine check events. - * Can be called from interrupt context, but not from machine check/NMI -@@ -1404,19 +1455,8 @@ static DECLARE_WORK(mce_trigger_work, mc - */ - int mce_notify_irq(void) + static ssize_t + show_trigger(struct device *s, struct device_attribute *attr, char *buf) { -- /* Not more than two messages every minute */ -- static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); -- - if (test_and_clear_bit(0, &mce_need_notify)) { -- /* wake processes polling /dev/mcelog */ -- wake_up_interruptible(&mce_chrdev_wait); -- -- if (mce_helper[0]) -- schedule_work(&mce_trigger_work); -- -- if (__ratelimit(&ratelimit)) -- pr_info(HW_ERR "Machine check events logged\n"); -- -+ mce_notify_work(); - return 1; - } - return 0; -@@ -2561,6 +2601,10 @@ static __init int mcheck_init_device(voi - goto err_out; - } +@@ -356,7 +387,7 @@ static __init int dev_mcelog_init_device -+ err = mce_notify_work_init(); -+ if (err) -+ goto err_out; -+ - if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) { - err = -ENOMEM; - goto err_out; + return err; + } +- ++ mce_notify_work_init(); + mce_register_decode_chain(&dev_mcelog_nb); + return 0; + } diff --git a/debian/patches/features/all/rt/x86-preempt-lazy.patch b/debian/patches/features/all/rt/x86-preempt-lazy.patch index beb5e9fb3..a53f34b2a 100644 --- a/debian/patches/features/all/rt/x86-preempt-lazy.patch +++ b/debian/patches/features/all/rt/x86-preempt-lazy.patch @@ -1,7 +1,7 @@ Subject: x86: Support for lazy preemption From: Thomas Gleixner Date: Thu, 01 Nov 2012 11:03:47 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Implement the x86 pieces for lazy preempt. @@ -18,26 +18,26 @@ Signed-off-by: Thomas Gleixner --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig -@@ -160,6 +160,7 @@ config X86 +@@ -169,6 +169,7 @@ config X86 + select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP - select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_PREEMPT_LAZY - select HAVE_STACK_VALIDATION if X86_64 - select HAVE_SYSCALL_TRACEPOINTS - select HAVE_UNSTABLE_SCHED_CLOCK + select HAVE_RCU_TABLE_FREE + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c -@@ -130,7 +130,7 @@ static long syscall_trace_enter(struct p +@@ -132,7 +132,7 @@ static long syscall_trace_enter(struct p #define EXIT_TO_USERMODE_LOOP_FLAGS \ (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ -- _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY) -+ _TIF_NEED_RESCHED_MASK | _TIF_USER_RETURN_NOTIFY) +- _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY | _TIF_PATCH_PENDING) ++ _TIF_NEED_RESCHED_MASK | _TIF_USER_RETURN_NOTIFY | _TIF_PATCH_PENDING) static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) { -@@ -146,7 +146,7 @@ static void exit_to_usermode_loop(struct +@@ -147,7 +147,7 @@ static void exit_to_usermode_loop(struct /* We have work to do. */ local_irq_enable(); @@ -48,7 +48,7 @@ Signed-off-by: Thomas Gleixner #ifdef ARCH_RT_DELAYS_SIGNAL_SEND --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S -@@ -340,8 +340,25 @@ END(ret_from_exception) +@@ -338,8 +338,25 @@ END(ret_from_exception) ENTRY(resume_kernel) DISABLE_INTERRUPTS(CLBR_ANY) .Lneed_resched: @@ -76,7 +76,7 @@ Signed-off-by: Thomas Gleixner call preempt_schedule_irq --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S -@@ -541,7 +541,23 @@ GLOBAL(retint_user) +@@ -623,7 +623,23 @@ GLOBAL(retint_user) bt $9, EFLAGS(%rsp) /* were interrupts off? */ jnc 1f 0: cmpl $0, PER_CPU_VAR(__preempt_count) @@ -102,7 +102,7 @@ Signed-off-by: Thomas Gleixner 1: --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h -@@ -85,17 +85,46 @@ static __always_inline void __preempt_co +@@ -86,17 +86,46 @@ static __always_inline void __preempt_co * a decrement which hits zero means we have no preempt_count and should * reschedule. */ @@ -152,7 +152,7 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_PREEMPT --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h -@@ -54,11 +54,14 @@ struct task_struct; +@@ -55,11 +55,14 @@ struct task_struct; struct thread_info { unsigned long flags; /* low level flags */ @@ -167,7 +167,7 @@ Signed-off-by: Thomas Gleixner } #define init_stack (init_thread_union.stack) -@@ -67,6 +70,10 @@ struct thread_info { +@@ -68,6 +71,10 @@ struct thread_info { #include @@ -178,23 +178,23 @@ Signed-off-by: Thomas Gleixner #endif /* -@@ -85,6 +92,7 @@ struct thread_info { +@@ -83,6 +90,7 @@ struct thread_info { #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ - #define TIF_NOTSC 16 /* TSC is not accessible in userland */ -@@ -108,6 +116,7 @@ struct thread_info { + #define TIF_PATCH_PENDING 13 /* pending live patching update */ +@@ -109,6 +117,7 @@ struct thread_info { #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) - #define _TIF_NOTSC (1 << TIF_NOTSC) -@@ -143,6 +152,8 @@ struct thread_info { + #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) +@@ -150,6 +159,8 @@ struct thread_info { #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) @@ -205,7 +205,7 @@ Signed-off-by: Thomas Gleixner /* --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c -@@ -36,6 +36,7 @@ void common(void) { +@@ -37,6 +37,7 @@ void common(void) { BLANK(); OFFSET(TASK_TI_flags, task_struct, thread_info.flags); @@ -213,7 +213,7 @@ Signed-off-by: Thomas Gleixner OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); BLANK(); -@@ -92,4 +93,5 @@ void common(void) { +@@ -93,4 +94,5 @@ void common(void) { BLANK(); DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); diff --git a/debian/patches/features/all/rt/x86-signal-delay-calling-signals-on-32bit.patch b/debian/patches/features/all/rt/x86-signal-delay-calling-signals-on-32bit.patch index 9a53942ac..e95b06489 100644 --- a/debian/patches/features/all/rt/x86-signal-delay-calling-signals-on-32bit.patch +++ b/debian/patches/features/all/rt/x86-signal-delay-calling-signals-on-32bit.patch @@ -1,7 +1,7 @@ From: Yang Shi Date: Thu, 10 Dec 2015 10:58:51 -0800 Subject: x86/signal: delay calling signals on 32bit -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz When running some ptrace single step tests on x86-32 machine, the below problem is triggered: @@ -32,7 +32,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h -@@ -36,7 +36,7 @@ typedef struct { +@@ -37,7 +37,7 @@ typedef struct { * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the * trap. */ diff --git a/debian/patches/features/all/rt/x86-stackprot-no-random-on-rt.patch b/debian/patches/features/all/rt/x86-stackprot-no-random-on-rt.patch index 1c889a75b..076eb3806 100644 --- a/debian/patches/features/all/rt/x86-stackprot-no-random-on-rt.patch +++ b/debian/patches/features/all/rt/x86-stackprot-no-random-on-rt.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Thu, 16 Dec 2010 14:25:18 +0100 Subject: x86: stackprotector: Avoid random pool on rt -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz CPU bringup calls into the random pool to initialize the stack canary. During boot that works nicely even on RT as the might sleep @@ -15,12 +15,12 @@ Reported-by: Carsten Emde Signed-off-by: Thomas Gleixner --- - arch/x86/include/asm/stackprotector.h | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) + arch/x86/include/asm/stackprotector.h | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h -@@ -59,7 +59,7 @@ +@@ -60,7 +60,7 @@ */ static __always_inline void boot_init_stack_canary(void) { @@ -29,11 +29,10 @@ Signed-off-by: Thomas Gleixner u64 tsc; #ifdef CONFIG_X86_64 -@@ -70,8 +70,15 @@ static __always_inline void boot_init_st +@@ -71,8 +71,14 @@ static __always_inline void boot_init_st * of randomness. The TSC only matters for very early init, * there it already has some randomness on most systems. Later * on during the bootup the random pool has true entropy too. -+ * + * For preempt-rt we need to weaken the randomness a bit, as + * we can't call into the random generator from atomic context + * due to locking constraints. We just leave canary @@ -44,4 +43,4 @@ Signed-off-by: Thomas Gleixner +#endif tsc = rdtsc(); canary += tsc + (tsc << 32UL); - + canary &= CANARY_MASK; diff --git a/debian/patches/features/all/rt/x86-use-gen-rwsem-spinlocks-rt.patch b/debian/patches/features/all/rt/x86-use-gen-rwsem-spinlocks-rt.patch index a200b1e9b..bd0988363 100644 --- a/debian/patches/features/all/rt/x86-use-gen-rwsem-spinlocks-rt.patch +++ b/debian/patches/features/all/rt/x86-use-gen-rwsem-spinlocks-rt.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Sun, 26 Jul 2009 02:21:32 +0200 Subject: x86: Use generic rwsem_spinlocks on -rt -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.9-rt7.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz Simplifies the separation of anon_rw_semaphores and rw_semaphores for -rt. @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig -@@ -242,8 +242,11 @@ config ARCH_MAY_HAVE_PC_FDC +@@ -255,8 +255,11 @@ config ARCH_MAY_HAVE_PC_FDC def_bool y depends on ISA_DMA_API diff --git a/debian/patches/features/all/rt/xen-9pfs-don-t-inclide-rwlock.h-directly.patch b/debian/patches/features/all/rt/xen-9pfs-don-t-inclide-rwlock.h-directly.patch new file mode 100644 index 000000000..0d36dcd0d --- /dev/null +++ b/debian/patches/features/all/rt/xen-9pfs-don-t-inclide-rwlock.h-directly.patch @@ -0,0 +1,29 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 5 Oct 2017 14:38:52 +0200 +Subject: [PATCH] xen/9pfs: don't inclide rwlock.h directly. +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14-rt1.tar.xz + +rwlock.h should not be included directly. Instead linux/splinlock.h +should be included. One thing it does is to break the RT build. + +Cc: Eric Van Hensbergen +Cc: Ron Minnich +Cc: Latchesar Ionkov +Cc: "David S. Miller" +Cc: v9fs-developer@lists.sourceforge.net +Cc: netdev@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior +--- + net/9p/trans_xen.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/net/9p/trans_xen.c ++++ b/net/9p/trans_xen.c +@@ -38,7 +38,6 @@ + + #include + #include +-#include + #include + #include + #include diff --git a/debian/patches/series-rt b/debian/patches/series-rt index f20286333..11b0da843 100644 --- a/debian/patches/series-rt +++ b/debian/patches/series-rt @@ -5,120 +5,53 @@ ############################################################ # UPSTREAM changes queued ############################################################ +features/all/rt/rcu-Suppress-lockdep-false-positive-boost_mtx-compla.patch ############################################################ # UPSTREAM FIXES, patches pending ############################################################ -features/all/rt/timer-make-the-base-lock-raw.patch ############################################################ # Stuff broken upstream, patches submitted ############################################################ -features/all/rt/lockdep-Handle-statically-initialized-PER_CPU-locks-.patch -features/all/rt/lockdep-Fix-compilation-error-for-CONFIG_MODULES-and.patch -features/all/rt/lockdep-Fix-per-cpu-static-objects.patch -features/all/rt/0001-futex-Cleanup-variable-names-for-futex_top_waiter.patch -features/all/rt/0002-futex-Use-smp_store_release-in-mark_wake_futex.patch -features/all/rt/0003-futex-Remove-rt_mutex_deadlock_account_.patch -features/all/rt/0004-futex-rt_mutex-Provide-futex-specific-rt_mutex-API.patch -features/all/rt/0005-futex-Change-locking-rules.patch -features/all/rt/0006-futex-Cleanup-refcounting.patch -features/all/rt/0007-futex-Rework-inconsistent-rt_mutex-futex_q-state.patch -features/all/rt/0008-futex-Pull-rt_mutex_futex_unlock-out-from-under-hb-l.patch -features/all/rt/0009-futex-rt_mutex-Introduce-rt_mutex_init_waiter.patch -features/all/rt/0010-futex-rt_mutex-Restructure-rt_mutex_finish_proxy_loc.patch -features/all/rt/0011-futex-Rework-futex_lock_pi-to-use-rt_mutex_-_proxy_l.patch -features/all/rt/0012-futex-Futex_unlock_pi-determinism.patch -features/all/rt/0013-futex-Drop-hb-lock-before-enqueueing-on-the-rtmutex.patch -features/all/rt/0001-rtmutex-Deboost-before-waking-up-the-top-waiter.patch -features/all/rt/0002-sched-rtmutex-deadline-Fix-a-PI-crash-for-deadline-t.patch -features/all/rt/0003-sched-deadline-rtmutex-Dont-miss-the-dl_runtime-dl_p.patch -features/all/rt/0004-rtmutex-Clean-up.patch -features/all/rt/0005-sched-rtmutex-Refactor-rt_mutex_setprio.patch -features/all/rt/0006-sched-tracing-Update-trace_sched_pi_setprio.patch -features/all/rt/0007-rtmutex-Fix-PI-chain-order-integrity.patch -features/all/rt/0008-rtmutex-Fix-more-prio-comparisons.patch -features/all/rt/0009-rtmutex-Plug-preempt-count-leak-in-rt_mutex_futex_un.patch -features/all/rt/0001-futex-Avoid-freeing-an-active-timer.patch -features/all/rt/0002-futex-Fix-small-and-harmless-looking-inconsistencies.patch -features/all/rt/0003-futex-Clarify-mark_wake_futex-memory-barrier-usage.patch -features/all/rt/0004-MAINTAINERS-Add-FUTEX-SUBSYSTEM.patch -features/all/rt/futex-rt_mutex-Fix-rt_mutex_cleanup_proxy_lock.patch - -features/all/rt/arm64-cpufeature-don-t-use-mutex-in-bringup-path.patch -features/all/rt/smp-hotplug-Move-unparking-of-percpu-threads-to-the-.patch - -### -# get_online_cpus() rework. -# cpus_allowed queue from sched/core -features/all/rt/0001-ia64-topology-Remove-cpus_allowed-manipulation.patch -features/all/rt/0002-workqueue-Provide-work_on_cpu_safe.patch -features/all/rt/0003-ia64-salinfo-Replace-racy-task-affinity-logic.patch -features/all/rt/0004-ia64-sn-hwperf-Replace-racy-task-affinity-logic.patch -features/all/rt/0005-powerpc-smp-Replace-open-coded-task-affinity-logic.patch -features/all/rt/0006-sparc-sysfs-Replace-racy-task-affinity-logic.patch -features/all/rt/0007-ACPI-processor-Fix-error-handling-in-__acpi_processo.patch -features/all/rt/0008-ACPI-processor-Replace-racy-task-affinity-logic.patch -features/all/rt/0009-cpufreq-ia64-Replace-racy-task-affinity-logic.patch -features/all/rt/0010-cpufreq-sh-Replace-racy-task-affinity-logic.patch -features/all/rt/0011-cpufreq-sparc-us3-Replace-racy-task-affinity-logic.patch -features/all/rt/0012-cpufreq-sparc-us2e-Replace-racy-task-affinity-logic.patch -features/all/rt/0013-crypto-N2-Replace-racy-task-affinity-logic.patch - -# a few patches from tip's sched/core -features/all/rt/0001-sched-clock-Fix-early-boot-preempt-assumption-in-__s.patch -features/all/rt/0001-init-Pin-init-task-to-the-boot-CPU-initially.patch -features/all/rt/0002-arm-Adjust-system_state-check.patch -features/all/rt/0003-arm64-Adjust-system_state-check.patch -features/all/rt/0004-x86-smp-Adjust-system_state-check.patch -features/all/rt/0005-metag-Adjust-system_state-check.patch -features/all/rt/0006-powerpc-Adjust-system_state-check.patch -features/all/rt/0007-ACPI-Adjust-system_state-check.patch -features/all/rt/0008-mm-Adjust-system_state-check.patch -features/all/rt/0009-cpufreq-pasemi-Adjust-system_state-check.patch -features/all/rt/0010-iommu-vt-d-Adjust-system_state-checks.patch -features/all/rt/0012-async-Adjust-system_state-checks.patch -features/all/rt/0013-extable-Adjust-system_state-checks.patch -features/all/rt/0014-printk-Adjust-system_state-checks.patch -features/all/rt/0015-mm-vmscan-Adjust-system_state-checks.patch -features/all/rt/0016-init-Introduce-SYSTEM_SCHEDULING-state.patch -features/all/rt/0017-sched-core-Enable-might_sleep-and-smp_processor_id-c.patch - -# recursive get_online_cpus() invocations from smp/hotplug -#0001-cpu-hotplug-Provide-cpus_read-write_-un-lock.patch -#0002-cpu-hotplug-Provide-lockdep_assert_cpus_held.patch -#0003-cpu-hotplug-Provide-cpuhp_setup-remove_state-_nocall.patch -#0004-cpu-hotplug-Add-__cpuhp_state_add_instance_cpuslocke.patch -#0005-stop_machine-Provide-stop_machine_cpuslocked.patch -#0006-padata-Make-padata_alloc-static.patch -#0007-padata-Avoid-nested-calls-to-cpus_read_lock-in-pcryp.patch -#0008-x86-mtrr-Remove-get_online_cpus-from-mtrr_save_state.patch -#0009-cpufreq-Use-cpuhp_setup_state_nocalls_cpuslocked.patch -#0010-KVM-PPC-Book3S-HV-Use-cpuhp_setup_state_nocalls_cpus.patch -#0011-hwtracing-coresight-etm3x-Use-cpuhp_setup_state_noca.patch -#0012-hwtracing-coresight-etm4x-Use-cpuhp_setup_state_noca.patch -#0013-perf-x86-intel-cqm-Use-cpuhp_setup_state_cpuslocked.patch -#0014-ARM-hw_breakpoint-Use-cpuhp_setup_state_cpuslocked.patch -#0015-s390-kernel-Use-stop_machine_cpuslocked.patch -#0016-powerpc-powernv-Use-stop_machine_cpuslocked.patch -#0017-cpu-hotplug-Use-stop_machine_cpuslocked-in-takedown_.patch -#0018-x86-perf-Drop-EXPORT-of-perf_check_microcode.patch -#0019-perf-x86-intel-Drop-get_online_cpus-in-intel_snb_che.patch -#0020-PCI-Use-cpu_hotplug_disable-instead-of-get_online_cp.patch -#0021-PCI-Replace-the-racy-recursion-prevention.patch -#0022-ACPI-processor-Use-cpu_hotplug_disable-instead-of-ge.patch -#0023-perf-tracing-cpuhotplug-Fix-locking-order.patch -#0024-jump_label-Reorder-hotplug-lock-and-jump_label_lock.patch -#0025-kprobes-Cure-hotplug-lock-ordering-issues.patch -#0026-arm64-Prevent-cpu-hotplug-rwsem-recursion.patch -#0027-arm-Prevent-hotplug-rwsem-recursion.patch -#0028-s390-Prevent-hotplug-rwsem-recursion.patch -#0029-cpu-hotplug-Convert-hotplug-locking-to-percpu-rwsem.patch -#0030-sched-Provide-is_percpu_thread-helper.patch -#0031-acpi-processor-Prevent-cpu-hotplug-deadlock.patch -#0032-cpuhotplug-Link-lock-stacks-for-hotplug-callbacks.patch -### +# soft hrtimer patches (v3) +features/all/rt/0001-timers-Use-static-keys-for-migrate_enable-and-nohz_a.patch +features/all/rt/0002-hrtimer-Correct-blantanly-wrong-comment.patch +features/all/rt/0003-hrtimer-Fix-kerneldoc-for-struct-hrtimer_cpu_base.patch +features/all/rt/0004-hrtimer-Cleanup-clock-argument-in-schedule_hrtimeout.patch +features/all/rt/0005-hrtimer-Fix-hrtimer-function-description.patch +features/all/rt/0006-hrtimer-Ensure-POSIX-compliance-relative-CLOCK_REALT.patch +features/all/rt/0007-hrtimer-Cleanup-hrtimer_mode-enum.patch +features/all/rt/0008-tracing-hrtimer-Take-all-clock-bases-and-modes-into-.patch +features/all/rt/0009-tracing-hrtimer-Print-hrtimer-mode-in-hrtimer_start-.patch +features/all/rt/0010-hrtimer-Switch-for-loop-to-_ffs-evaluation.patch +features/all/rt/0011-hrtimer-Store-running-timer-in-hrtimer_clock_base.patch +features/all/rt/0012-hrtimer-Make-room-in-struct-hrtimer_cpu_base.patch +features/all/rt/0013-hrtimer-Reduce-conditional-code-hres_active.patch +features/all/rt/0014-hrtimer-Use-accesor-functions-instead-of-direct-acce.patch +features/all/rt/0015-hrtimer-Make-the-remote-enqueue-check-unconditional.patch +features/all/rt/0016-hrtimer-Make-hrtimer_cpu_base.next_timer-handling-un.patch +features/all/rt/0017-hrtimer-Make-hrtimer_reprogramm-unconditional.patch +features/all/rt/0018-hrtimer-Reduce-conditional-code-and-make-hrtimer_for.patch +features/all/rt/0019-hrtimer-Unify-handling-of-hrtimer-remove.patch +features/all/rt/0020-hrtimer-Unify-handling-of-remote-enqueue.patch +features/all/rt/0021-hrtimer-Make-remote-enqueue-decision-less-restrictiv.patch +features/all/rt/0022-hrtimer-Remove-base-argument-from-hrtimer_reprogram.patch +features/all/rt/0023-hrtimer-Split-hrtimer_start_range_ns.patch +features/all/rt/0024-hrtimer-Split-__hrtimer_get_next_event.patch +features/all/rt/0025-hrtimer-Use-irqsave-irqrestore-around-__run_hrtimer.patch +features/all/rt/0026-hrtimer-Add-clock-bases-and-hrtimer-mode-for-soft-ir.patch +features/all/rt/0027-hrtimer-Prepare-handling-of-hard-and-softirq-based-h.patch +features/all/rt/0028-hrtimer-Implement-support-for-softirq-based-hrtimers.patch +features/all/rt/0029-hrtimer-Implement-SOFT-HARD-clock-base-selection.patch +features/all/rt/0030-can-bcm-Replace-hrtimer_tasklet-with-softirq-based-h.patch +features/all/rt/0031-mac80211_hwsim-Replace-hrtimer-tasklet-with-softirq-.patch +features/all/rt/0032-xfrm-Replace-hrtimer-tasklet-with-softirq-hrtimer.patch +features/all/rt/0033-softirq-Remove-tasklet_hrtimer.patch +features/all/rt/0034-ALSA-dummy-Replace-tasklet-with-softirq-hrtimer.patch +features/all/rt/0035-usb-gadget-NCM-Replace-tasklet-with-softirq-hrtimer.patch +features/all/rt/0036-net-mvpp2-Replace-tasklet-with-softirq-hrtimer.patch # Those two should vanish soon (not use PIT during bootup) features/all/rt/at91_dont_enable_disable_clock.patch @@ -134,12 +67,8 @@ features/all/rt/rfc-arm-smp-__cpu_disable-fix-sleeping-function-called-from-inva ############################################################ features/all/rt/rtmutex--Handle-non-enqueued-waiters-gracefully.patch features/all/rt/rbtree-include-rcu.h-because-we-use-it.patch -features/all/rt/fs-dcache-init-in_lookup_hashtable.patch -features/all/rt/iommu-iova-don-t-disable-preempt-around-this_cpu_ptr.patch -features/all/rt/iommu-vt-d-don-t-disable-preemption-while-accessing-.patch features/all/rt/rxrpc-remove-unused-static-variables.patch -features/all/rt/mm-swap-don-t-disable-preemption-while-taking-the-pe.patch -features/all/rt/cpu_pm-replace-raw_notifier-to-atomic_notifier.patch +features/all/rt/mfd-syscon-atmel-smc-include-string.h.patch # Wants a different fix for upstream features/all/rt/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch @@ -147,11 +76,16 @@ features/all/rt/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch ############################################################ # Submitted on LKML ############################################################ +features/all/rt/Bluetooth-avoid-recursive-locking-in-hci_send_to_cha.patch +features/all/rt/iommu-iova-Use-raw_cpu_ptr-instead-of-get_cpu_ptr-fo.patch +features/all/rt/greybus-audio-don-t-inclide-rwlock.h-directly.patch +features/all/rt/xen-9pfs-don-t-inclide-rwlock.h-directly.patch # SPARC part of erly printk consolidation features/all/rt/sparc64-use-generic-rwsem-spinlocks-rt.patch # SRCU +# XXX features/all/rt/kernel-SRCU-provide-a-static-initializer.patch ############################################################ @@ -181,43 +115,53 @@ features/all/rt/kernel-SRCU-provide-a-static-initializer.patch ############################################################ # Stuff which should go upstream ASAP ############################################################ -features/all/rt/CPUFREQ-Loongson2-drop-set_cpus_allowed_ptr.patch features/all/rt/kernel-sched-Provide-a-pointer-to-the-valid-CPU-mask.patch features/all/rt/add_migrate_disable.patch -# tracing: Inter-event (e.g. latency) support | 2017-06-27 -features/all/rt/0001-tracing-Add-hist_field_name-accessor.patch -features/all/rt/0002-tracing-Reimplement-log2.patch -features/all/rt/0003-ring-buffer-Add-interface-for-setting-absolute-time-.patch -features/all/rt/0004-ring-buffer-Redefine-the-unimplemented-RINGBUF_TIME_.patch -features/all/rt/0005-tracing-Give-event-triggers-access-to-ring_buffer_ev.patch -features/all/rt/0006-tracing-Add-ring-buffer-event-param-to-hist-field-fu.patch -features/all/rt/0007-tracing-Increase-tracing-map-KEYS_MAX-size.patch -features/all/rt/0008-tracing-Break-out-hist-trigger-assignment-parsing.patch -features/all/rt/0009-tracing-Make-traceprobe-parsing-code-reusable.patch -features/all/rt/0010-tracing-Add-NO_DISCARD-event-file-flag.patch -features/all/rt/0011-tracing-Add-post-trigger-flag-to-hist-trigger-comman.patch -features/all/rt/0012-tracing-Add-hist-trigger-timestamp-support.patch -features/all/rt/0013-tracing-Add-per-element-variable-support-to-tracing_.patch -features/all/rt/0014-tracing-Add-hist_data-member-to-hist_field.patch -features/all/rt/0015-tracing-Add-usecs-modifier-for-hist-trigger-timestam.patch -features/all/rt/0016-tracing-Add-variable-support-to-hist-triggers.patch -features/all/rt/0017-tracing-Account-for-variables-in-named-trigger-compa.patch -features/all/rt/0018-tracing-Add-simple-expression-support-to-hist-trigge.patch -features/all/rt/0019-tracing-Add-variable-reference-handling-to-hist-trig.patch -features/all/rt/0020-tracing-Add-support-for-dynamic-tracepoints.patch -features/all/rt/0021-tracing-Add-hist-trigger-action-hook.patch -features/all/rt/0022-tracing-Add-support-for-synthetic-events.patch -features/all/rt/0023-tracing-Add-onmatch-hist-trigger-action-support.patch -features/all/rt/0024-tracing-Add-onmax-hist-trigger-action-support.patch -features/all/rt/0025-tracing-Allow-whitespace-to-surround-hist-trigger-fi.patch -features/all/rt/0026-tracing-Make-duplicate-count-from-tracing_map-availa.patch -features/all/rt/0027-tracing-Add-cpu-field-for-hist-triggers.patch -features/all/rt/0028-tracing-Add-hist-trigger-support-for-variable-refere.patch -features/all/rt/0029-tracing-Add-last-error-error-facility-for-hist-trigg.patch -features/all/rt/0030-tracing-Add-inter-event-hist-trigger-Documentation.patch -features/all/rt/0031-tracing-Make-tracing_set_clock-non-static.patch -features/all/rt/0032-tracing-Add-a-clock-attribute-for-hist-triggers.patch +# tracing: Bug fixes and minor cleanup | 2017-09-22 +features/all/rt/0001-tracing-Steve-s-unofficial-trace_recursive_lock-patc.patch +features/all/rt/0002-tracing-Reverse-the-order-of-trace_types_lock-and-ev.patch +features/all/rt/0003-tracing-Exclude-generic-fields-from-histograms.patch +features/all/rt/0004-tracing-Remove-lookups-from-tracing_map-hitcount.patch +features/all/rt/0005-tracing-Increase-tracing-map-KEYS_MAX-size.patch +features/all/rt/0006-tracing-Make-traceprobe-parsing-code-reusable.patch +features/all/rt/0007-tracing-Clean-up-hist_field_flags-enum.patch +features/all/rt/0008-tracing-Add-hist_field_name-accessor.patch +features/all/rt/0009-tracing-Reimplement-log2.patch +# v3 tracing: Inter-event (e.g. latency) support | 2017-09-22 +features/all/rt/0010-tracing-Add-support-to-detect-and-avoid-duplicates.patch +features/all/rt/0011-tracing-Remove-code-which-merges-duplicates.patch +features/all/rt/0012-ring-buffer-Add-interface-for-setting-absolute-time-.patch +features/all/rt/0013-ring-buffer-Redefine-the-unimplemented-RINGBUF_TIME_.patch +features/all/rt/0014-tracing-Give-event-triggers-access-to-ring_buffer_ev.patch +features/all/rt/0015-tracing-Add-ring-buffer-event-param-to-hist-field-fu.patch +features/all/rt/0016-tracing-Break-out-hist-trigger-assignment-parsing.patch +features/all/rt/0017-tracing-Add-hist-trigger-timestamp-support.patch +features/all/rt/0018-tracing-Add-per-element-variable-support-to-tracing_.patch +features/all/rt/0019-tracing-Add-hist_data-member-to-hist_field.patch +features/all/rt/0020-tracing-Add-usecs-modifier-for-hist-trigger-timestam.patch +features/all/rt/0021-tracing-Add-variable-support-to-hist-triggers.patch +features/all/rt/0022-tracing-Account-for-variables-in-named-trigger-compa.patch +features/all/rt/0023-tracing-Move-get_hist_field_flags.patch +features/all/rt/0024-tracing-Add-simple-expression-support-to-hist-trigge.patch +features/all/rt/0025-tracing-Generalize-per-element-hist-trigger-data.patch +features/all/rt/0026-tracing-Pass-tracing_map_elt-to-hist_field-accessor-.patch +features/all/rt/0027-tracing-Add-hist_field-type-field.patch +features/all/rt/0028-tracing-Add-variable-reference-handling-to-hist-trig.patch +features/all/rt/0029-tracing-Add-hist-trigger-action-hook.patch +features/all/rt/0030-tracing-Add-support-for-synthetic-events.patch +features/all/rt/0031-tracing-Add-support-for-field-variables.patch +features/all/rt/0032-tracing-Add-onmatch-hist-trigger-action-support.patch +features/all/rt/0033-tracing-Add-onmax-hist-trigger-action-support.patch +features/all/rt/0034-tracing-Allow-whitespace-to-surround-hist-trigger-fi.patch +features/all/rt/0035-tracing-Add-cpu-field-for-hist-triggers.patch +features/all/rt/0036-tracing-Add-hist-trigger-support-for-variable-refere.patch +features/all/rt/0037-tracing-Add-last-error-error-facility-for-hist-trigg.patch +features/all/rt/0038-tracing-Add-inter-event-hist-trigger-Documentation.patch +features/all/rt/0039-tracing-Make-tracing_set_clock-non-static.patch +features/all/rt/0040-tracing-Add-a-clock-attribute-for-hist-triggers.patch +features/all/rt/0041-tracing-Increase-trace_recursive_lock-limit-for-synt.patch +features/all/rt/0042-tracing-Add-inter-event-blurb-to-HIST_TRIGGERS-confi.patch # SCHED BLOCK/WQ features/all/rt/block-shorten-interrupt-disabled-regions.patch @@ -264,7 +208,6 @@ features/all/rt/suspend-prevernt-might-sleep-splats.patch # NETWORKING features/all/rt/net-prevent-abba-deadlock.patch features/all/rt/net-sched-dev_deactivate_many-use-msleep-1-instead-o.patch -features/all/rt/net-core-remove-explicit-do_softirq-from-busy_poll_s.patch features/all/rt/net_disable_NET_RX_BUSY_POLL.patch # X86 @@ -308,10 +251,8 @@ features/all/rt/local-irq-rt-depending-variants.patch features/all/rt/preempt-nort-rt-variants.patch # local locks & migrate disable -#introduce_migrate_disable_cpu_light.patch features/all/rt/futex-workaround-migrate_disable-enable-in-different.patch features/all/rt/rt-local-irq-lock.patch -features/all/rt/locallock-add-local_lock_on.patch # ANNOTATE local_irq_disable sites features/all/rt/ata-disable-interrupts-if-non-rt.patch @@ -374,7 +315,6 @@ features/all/rt/slub-disable-SLUB_CPU_PARTIAL.patch features/all/rt/mm-page-alloc-use-local-lock-on-target-cpu.patch features/all/rt/mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch features/all/rt/mm-memcontrol-do_not_disable_irq.patch -features/all/rt/mm-memcontrol-mem_cgroup_migrate-replace-another-loc.patch features/all/rt/mm-backing-dev-don-t-disable-IRQs-in-wb_congested_pu.patch features/all/rt/mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch @@ -388,14 +328,15 @@ features/all/rt/panic-disable-random-on-rt.patch features/all/rt/timers-prepare-for-full-preemption.patch features/all/rt/timer-delay-waking-softirqs-from-the-jiffy-tick.patch +# KVM require constant freq TSC (smp function call -> cpufreq) +features/all/rt/x86-kvm-require-const-tsc-for-rt.patch + # HRTIMERS +features/all/rt/time-hrtimer-avoid-schedule_work-with-interrupts-dis.patch +features/all/rt/hrtimer-consolidate-hrtimer_init-hrtimer_init_sleepe.patch features/all/rt/hrtimers-prepare-full-preemption.patch -features/all/rt/hrtimer-enfore-64byte-alignment.patch -features/all/rt/hrtimer-fixup-hrtimer-callback-changes-for-preempt-r.patch -features/all/rt/sched-deadline-dl_task_timer-has-to-be-irqsafe.patch +features/all/rt/hrtimer-by-timers-by-default-into-the-softirq-context.patch features/all/rt/timer-fd-avoid-live-lock.patch -features/all/rt/tick-broadcast--Make-hrtimer-irqsafe.patch -features/all/rt/timer-hrtimer-check-properly-for-a-running-timer.patch # POSIX-CPU-TIMERS features/all/rt/posix-timers-thread-posix-cpu-timers-on-rt.patch @@ -415,23 +356,21 @@ features/all/rt/sched-disable-ttwu-queue.patch features/all/rt/sched-disable-rt-group-sched-on-rt.patch features/all/rt/sched-ttwu-ensure-success-return-is-correct.patch features/all/rt/sched-workqueue-Only-wake-up-idle-workers-if-not-blo.patch +features/all/rt/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch # STOP MACHINE features/all/rt/stop_machine-convert-stop_machine_run-to-PREEMPT_RT.patch features/all/rt/stop-machine-raw-lock.patch # MIGRATE DISABLE AND PER CPU -# XXX redo features/all/rt/hotplug-light-get-online-cpus.patch -features/all/rt/hotplug-sync_unplug-no-27-5cn-27-in-task-name.patch -features/all/rt/re-migrate_disable-race-with-cpu-hotplug-3f.patch features/all/rt/ftrace-migrate-disable-tracing.patch -features/all/rt/hotplug-use-migrate-disable.patch # NOHZ # LOCKDEP features/all/rt/lockdep-no-softirq-accounting-on-rt.patch +features/all/rt/lockdep-disable-self-test.patch # SOFTIRQ features/all/rt/mutex-no-spin-on-rt.patch @@ -451,6 +390,7 @@ features/all/rt/fs-nfs-turn-rmdir_sem-into-a-semaphore.patch # FUTEX/RTMUTEX features/all/rt/rtmutex-futex-prepare-rt.patch features/all/rt/futex-requeue-pi-fix.patch +features/all/rt/locking-rtmutex-don-t-drop-the-wait_lock-twice.patch features/all/rt/futex-Ensure-lock-unlock-symetry-versus-pi_lock-and-.patch # RTMUTEX @@ -462,15 +402,19 @@ features/all/rt/rtmutex-Make-lock_killable-work.patch features/all/rt/spinlock-types-separate-raw.patch features/all/rt/rtmutex-avoid-include-hell.patch features/all/rt/rtmutex_dont_include_rcu.patch -features/all/rt/rt-add-rt-locks.patch -features/all/rt/rtmutex-Fix-lock-stealing-logic.patch -features/all/rt/kernel-locking-use-an-exclusive-wait_q-for-sleeper.patch -features/all/rt/rtmutex-add-a-first-shot-of-ww_mutex.patch -features/all/rt/rtmutex-Provide-rt_mutex_lock_state.patch -features/all/rt/rtmutex-Provide-locked-slowpath.patch -features/all/rt/futex-rtmutex-Cure-RT-double-blocking-issue.patch -features/all/rt/rwsem-rt-Lift-single-reader-restriction.patch +features/all/rt/rtmutex-Provide-rt_mutex_slowlock_locked.patch +features/all/rt/rtmutex-export-lockdep-less-version-of-rt_mutex-s-lo.patch +features/all/rt/rtmutex-add-sleeping-lock-implementation.patch +features/all/rt/rtmutex-add-mutex-implementation-based-on-rtmutex.patch +features/all/rt/rtmutex-add-rwsem-implementation-based-on-rtmutex.patch +features/all/rt/rtmutex-add-rwlock-implementation-based-on-rtmutex.patch +features/all/rt/rtmutex-wire-up-RT-s-locking.patch +features/all/rt/rtmutex-add-ww_mutex-addon-for-mutex-rt.patch +features/all/rt/locking-rt-mutex-fix-deadlock-in-device-mapper-block.patch +features/all/rt/locking-rtmutex-re-init-the-wait_lock-in-rt_mutex_in.patch features/all/rt/ptrace-fix-ptrace-vs-tasklist_lock-race.patch +features/all/rt/RCU-we-need-to-skip-that-warning-but-only-on-sleepin.patch +features/all/rt/locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch # RCU features/all/rt/peter_zijlstra-frob-rcu.patch @@ -499,6 +443,8 @@ features/all/rt/mm-protect-activate-switch-mm.patch features/all/rt/fs-block-rt-support.patch features/all/rt/fs-ntfs-disable-interrupt-non-rt.patch features/all/rt/fs-jbd2-pull-your-plug-when-waiting-for-space.patch +features/all/rt/fs-dcache-bringt-back-explicit-INIT_HLIST_BL_HEAD-in.patch +features/all/rt/fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch # X86 features/all/rt/x86-mce-timer-hrtimer.patch @@ -515,6 +461,7 @@ features/all/rt/block-mq-use-cpu_light.patch features/all/rt/block-mq-drop-preempt-disable.patch features/all/rt/block-mq-don-t-complete-requests-via-IPI.patch features/all/rt/md-raid5-percpu-handling-rt-aware.patch +features/all/rt/md-raid5-do-not-disable-interrupts.patch # CPU CHILL features/all/rt/rt-introduce-cpu-chill.patch @@ -558,7 +505,9 @@ features/all/rt/net-provide-a-way-to-delegate-processing-a-softirq-t.patch features/all/rt/net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch features/all/rt/net-Qdisc-use-a-seqlock-instead-seqcount.patch features/all/rt/net-add-back-the-missing-serialization-in-ip_send_un.patch +features/all/rt/net-take-the-tcp_sk_lock-lock-with-BH-disabled.patch features/all/rt/net-add-a-lock-around-icmp_sk.patch +features/all/rt/net-use-trylock-in-icmp_sk.patch features/all/rt/net-Have-__napi_schedule_irqoff-disable-interrupts-o.patch # NETWORK DEBUGGING AID @@ -608,10 +557,6 @@ features/all/rt/arm-enable-highmem-for-rt.patch # SYSRQ -# KVM require constant freq TSC (smp function call -> cpufreq) -features/all/rt/x86-kvm-require-const-tsc-for-rt.patch -features/all/rt/KVM-lapic-mark-LAPIC-timer-handler-as-irqsafe.patch - # SCSI/FCOE features/all/rt/scsi-fcoe-rt-aware.patch features/all/rt/sas-ata-isci-dont-t-disable-interrupts-in-qc_issue-h.patch @@ -635,16 +580,8 @@ features/all/rt/random-avoid-preempt_disable-ed-section.patch features/all/rt/char-random-don-t-print-that-the-init-is-done.patch # HOTPLUG -# XXX -features/all/rt/cpu-rt-make-hotplug-lock-a-sleeping-spinlock-on-rt.patch -features/all/rt/cpu-rt-rework-cpu-down.patch -features/all/rt/cpu-hotplug-Document-why-PREEMPT_RT-uses-a-spinlock.patch -features/all/rt/kernel-cpu-fix-cpu-down-problem-if-kthread-s-cpu-is-.patch -features/all/rt/kernel-hotplug-restore-original-cpu-mask-oncpu-down.patch -features/all/rt/cpu_down_move_migrate_enable_back.patch -features/all/rt/hotplug-Use-set_cpus_allowed_ptr-in-sync_unplug_thre.patch -# -features/all/rt/rt-locking-Reenable-migration-accross-schedule.patch +features/all/rt/cpu-hotplug--Implement-CPU-pinning.patch +features/all/rt/hotplug-duct-tape-RT-rwlock-usage-for-non-RT.patch # SCSCI QLA2xxx features/all/rt/scsi-qla2xxx-fix-bug-sleeping-function-called-from-invalid-context.patch @@ -657,6 +594,7 @@ features/all/rt/net-fix-iptable-xt-write-recseq-begin-rt-fallout.patch features/all/rt/net-make-devnet_rename_seq-a-mutex.patch # CRYPTO +# XXX features/all/rt/peterz-srcu-crypto-chain.patch # LOCKDEP @@ -664,10 +602,12 @@ features/all/rt/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.p features/all/rt/lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch # PERF -features/all/rt/perf-make-swevent-hrtimer-irqsafe.patch -features/all/rt/kernel-perf-mark-perf_cpu_context-s-timer-as-irqsafe.patch # RCU +features/all/rt/srcu-use-cpu_online-instead-custom-check.patch +features/all/rt/srcu-Prohibit-call_srcu-use-under-raw-spinlocks.patch +features/all/rt/srcu-replace-local_irqsave-with-a-locallock.patch +features/all/rt/rcu-segcblist-include-rcupdate.h.patch features/all/rt/rcu-disable-rcu-fast-no-hz-on-rt.patch features/all/rt/rcu-Eliminate-softirq-processing-from-rcutree.patch features/all/rt/rcu-make-RCU_BOOST-default-on-RT.patch @@ -690,10 +630,11 @@ features/all/rt/cpufreq-drop-K8-s-driver-from-beeing-selected.patch features/all/rt/connector-cn_proc-Protect-send_msg-with-a-local-lock.patch features/all/rt/drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch features/all/rt/drivers-zram-Don-t-disable-preemption-in-zcomp_strea.patch +features/all/rt/drivers-zram-fix-zcomp_stream_get-smp_processor_id-u.patch +features/all/rt/tpm_tis-fix-stall-after-iowrite-s.patch +features/all/rt/pci-switchtec-Don-t-use-completion-s-wait-queue.patch # I915 -features/all/rt/drm-i915-drop-trace_i915_gem_ring_dispatch-onrt.patch -features/all/rt/i915-bogus-warning-from-i915-when-running-on-PREEMPT.patch features/all/rt/drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch features/all/rt/drmi915_Use_local_lockunlock_irq()_in_intel_pipe_update_startend().patch features/all/rt/drm-i915-init-spinlock-properly-on-RT.patch @@ -710,6 +651,9 @@ features/all/rt/move_sched_delayed_work_to_helper.patch # MD features/all/rt/md-disable-bcache.patch +# Security +features/all/rt/apparmor-use-a-locallock-instead-preempt_disable.patch + # WORKQUEUE SIGH features/all/rt/workqueue-prevent-deadlock-stall.patch