diff --git a/debian/changelog b/debian/changelog index cfaf0b38b..8963e23f2 100644 --- a/debian/changelog +++ b/debian/changelog @@ -254,6 +254,9 @@ linux (4.16.8-1) UNRELEASED; urgency=medium [ Ben Hutchings ] * kbuild: use -fmacro-prefix-map to make __FILE__ a relative path + [ Salvatore Bonaccorso ] + * [rt] Update to 4.16.7-rt1 and reenable + -- Vagrant Cascadian Mon, 30 Apr 2018 11:23:15 -0700 linux (4.16.5-1) unstable; urgency=medium diff --git a/debian/config/defines b/debian/config/defines index efb68390e..5effcb897 100644 --- a/debian/config/defines +++ b/debian/config/defines @@ -113,7 +113,7 @@ debug-info: true signed-code: false [featureset-rt_base] -enabled: false +enabled: true [description] part-long-up: This kernel is not suitable for SMP (multi-processor, diff --git a/debian/patches/features/all/rt/0001-ARM-at91-add-TCB-registers-definitions.patch b/debian/patches/features/all/rt/0001-ARM-at91-add-TCB-registers-definitions.patch new file mode 100644 index 000000000..d25bfbe50 --- /dev/null +++ b/debian/patches/features/all/rt/0001-ARM-at91-add-TCB-registers-definitions.patch @@ -0,0 +1,236 @@ +From: Alexandre Belloni +Date: Wed, 18 Apr 2018 12:51:38 +0200 +Subject: [PATCH 1/6] ARM: at91: add TCB registers definitions +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Add registers and bits definitions for the timer counter blocks found on +Atmel ARM SoCs. + +Tested-by: Alexander Dahl +Tested-by: Andras Szemzo +Signed-off-by: Alexandre Belloni +Signed-off-by: Sebastian Andrzej Siewior +--- + include/soc/at91/atmel_tcb.h | 216 +++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 216 insertions(+) + create mode 100644 include/soc/at91/atmel_tcb.h + +--- /dev/null ++++ b/include/soc/at91/atmel_tcb.h +@@ -0,0 +1,216 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (C) 2018 Microchip */ ++ ++#ifndef __SOC_ATMEL_TCB_H ++#define __SOC_ATMEL_TCB_H ++ ++/* Channel registers */ ++#define ATMEL_TC_COFFS(c) ((c) * 0x40) ++#define ATMEL_TC_CCR(c) ATMEL_TC_COFFS(c) ++#define ATMEL_TC_CMR(c) (ATMEL_TC_COFFS(c) + 0x4) ++#define ATMEL_TC_SMMR(c) (ATMEL_TC_COFFS(c) + 0x8) ++#define ATMEL_TC_RAB(c) (ATMEL_TC_COFFS(c) + 0xc) ++#define ATMEL_TC_CV(c) (ATMEL_TC_COFFS(c) + 0x10) ++#define ATMEL_TC_RA(c) (ATMEL_TC_COFFS(c) + 0x14) ++#define ATMEL_TC_RB(c) (ATMEL_TC_COFFS(c) + 0x18) ++#define ATMEL_TC_RC(c) (ATMEL_TC_COFFS(c) + 0x1c) ++#define ATMEL_TC_SR(c) (ATMEL_TC_COFFS(c) + 0x20) ++#define ATMEL_TC_IER(c) (ATMEL_TC_COFFS(c) + 0x24) ++#define ATMEL_TC_IDR(c) (ATMEL_TC_COFFS(c) + 0x28) ++#define ATMEL_TC_IMR(c) (ATMEL_TC_COFFS(c) + 0x2c) ++#define ATMEL_TC_EMR(c) (ATMEL_TC_COFFS(c) + 0x30) ++ ++/* Block registers */ ++#define ATMEL_TC_BCR 0xc0 ++#define ATMEL_TC_BMR 0xc4 ++#define ATMEL_TC_QIER 0xc8 ++#define ATMEL_TC_QIDR 0xcc ++#define ATMEL_TC_QIMR 0xd0 ++#define ATMEL_TC_QISR 0xd4 ++#define ATMEL_TC_FMR 0xd8 ++#define ATMEL_TC_WPMR 0xe4 ++ ++/* CCR fields */ ++#define ATMEL_TC_CCR_CLKEN BIT(0) ++#define ATMEL_TC_CCR_CLKDIS BIT(1) ++#define ATMEL_TC_CCR_SWTRG BIT(2) ++ ++/* Common CMR fields */ ++#define ATMEL_TC_CMR_TCLKS_MSK GENMASK(2, 0) ++#define ATMEL_TC_CMR_TCLK(x) (x) ++#define ATMEL_TC_CMR_XC(x) ((x) + 5) ++#define ATMEL_TC_CMR_CLKI BIT(3) ++#define ATMEL_TC_CMR_BURST_MSK GENMASK(5, 4) ++#define ATMEL_TC_CMR_BURST_XC(x) (((x) + 1) << 4) ++#define ATMEL_TC_CMR_WAVE BIT(15) ++ ++/* Capture mode CMR fields */ ++#define ATMEL_TC_CMR_LDBSTOP BIT(6) ++#define ATMEL_TC_CMR_LDBDIS BIT(7) ++#define ATMEL_TC_CMR_ETRGEDG_MSK GENMASK(9, 8) ++#define ATMEL_TC_CMR_ETRGEDG_NONE (0 << 8) ++#define ATMEL_TC_CMR_ETRGEDG_RISING (1 << 8) ++#define ATMEL_TC_CMR_ETRGEDG_FALLING (2 << 8) ++#define ATMEL_TC_CMR_ETRGEDG_BOTH (3 << 8) ++#define ATMEL_TC_CMR_ABETRG BIT(10) ++#define ATMEL_TC_CMR_CPCTRG BIT(14) ++#define ATMEL_TC_CMR_LDRA_MSK GENMASK(17, 16) ++#define ATMEL_TC_CMR_LDRA_NONE (0 << 16) ++#define ATMEL_TC_CMR_LDRA_RISING (1 << 16) ++#define ATMEL_TC_CMR_LDRA_FALLING (2 << 16) ++#define ATMEL_TC_CMR_LDRA_BOTH (3 << 16) ++#define ATMEL_TC_CMR_LDRB_MSK GENMASK(19, 18) ++#define ATMEL_TC_CMR_LDRB_NONE (0 << 18) ++#define ATMEL_TC_CMR_LDRB_RISING (1 << 18) ++#define ATMEL_TC_CMR_LDRB_FALLING (2 << 18) ++#define ATMEL_TC_CMR_LDRB_BOTH (3 << 18) ++#define ATMEL_TC_CMR_SBSMPLR_MSK GENMASK(22, 20) ++#define ATMEL_TC_CMR_SBSMPLR(x) ((x) << 20) ++ ++/* Waveform mode CMR fields */ ++#define ATMEL_TC_CMR_CPCSTOP BIT(6) ++#define ATMEL_TC_CMR_CPCDIS BIT(7) ++#define ATMEL_TC_CMR_EEVTEDG_MSK GENMASK(9, 8) ++#define ATMEL_TC_CMR_EEVTEDG_NONE (0 << 8) ++#define ATMEL_TC_CMR_EEVTEDG_RISING (1 << 8) ++#define ATMEL_TC_CMR_EEVTEDG_FALLING (2 << 8) ++#define ATMEL_TC_CMR_EEVTEDG_BOTH (3 << 8) ++#define ATMEL_TC_CMR_EEVT_MSK GENMASK(11, 10) ++#define ATMEL_TC_CMR_EEVT_XC(x) (((x) + 1) << 10) ++#define ATMEL_TC_CMR_ENETRG BIT(12) ++#define ATMEL_TC_CMR_WAVESEL_MSK GENMASK(14, 13) ++#define ATMEL_TC_CMR_WAVESEL_UP (0 << 13) ++#define ATMEL_TC_CMR_WAVESEL_UPDOWN (1 << 13) ++#define ATMEL_TC_CMR_WAVESEL_UPRC (2 << 13) ++#define ATMEL_TC_CMR_WAVESEL_UPDOWNRC (3 << 13) ++#define ATMEL_TC_CMR_ACPA_MSK GENMASK(17, 16) ++#define ATMEL_TC_CMR_ACPA(a) (ATMEL_TC_CMR_ACTION_##a << 16) ++#define ATMEL_TC_CMR_ACPC_MSK GENMASK(19, 18) ++#define ATMEL_TC_CMR_ACPC(a) (ATMEL_TC_CMR_ACTION_##a << 18) ++#define ATMEL_TC_CMR_AEEVT_MSK GENMASK(21, 20) ++#define ATMEL_TC_CMR_AEEVT(a) (ATMEL_TC_CMR_ACTION_##a << 20) ++#define ATMEL_TC_CMR_ASWTRG_MSK GENMASK(23, 22) ++#define ATMEL_TC_CMR_ASWTRG(a) (ATMEL_TC_CMR_ACTION_##a << 22) ++#define ATMEL_TC_CMR_BCPB_MSK GENMASK(25, 24) ++#define ATMEL_TC_CMR_BCPB(a) (ATMEL_TC_CMR_ACTION_##a << 24) ++#define ATMEL_TC_CMR_BCPC_MSK GENMASK(27, 26) ++#define ATMEL_TC_CMR_BCPC(a) (ATMEL_TC_CMR_ACTION_##a << 26) ++#define ATMEL_TC_CMR_BEEVT_MSK GENMASK(29, 28) ++#define ATMEL_TC_CMR_BEEVT(a) (ATMEL_TC_CMR_ACTION_##a << 28) ++#define ATMEL_TC_CMR_BSWTRG_MSK GENMASK(31, 30) ++#define ATMEL_TC_CMR_BSWTRG(a) (ATMEL_TC_CMR_ACTION_##a << 30) ++#define ATMEL_TC_CMR_ACTION_NONE 0 ++#define ATMEL_TC_CMR_ACTION_SET 1 ++#define ATMEL_TC_CMR_ACTION_CLEAR 2 ++#define ATMEL_TC_CMR_ACTION_TOGGLE 3 ++ ++/* SMMR fields */ ++#define ATMEL_TC_SMMR_GCEN BIT(0) ++#define ATMEL_TC_SMMR_DOWN BIT(1) ++ ++/* SR/IER/IDR/IMR fields */ ++#define ATMEL_TC_COVFS BIT(0) ++#define ATMEL_TC_LOVRS BIT(1) ++#define ATMEL_TC_CPAS BIT(2) ++#define ATMEL_TC_CPBS BIT(3) ++#define ATMEL_TC_CPCS BIT(4) ++#define ATMEL_TC_LDRAS BIT(5) ++#define ATMEL_TC_LDRBS BIT(6) ++#define ATMEL_TC_ETRGS BIT(7) ++#define ATMEL_TC_CLKSTA BIT(16) ++#define ATMEL_TC_MTIOA BIT(17) ++#define ATMEL_TC_MTIOB BIT(18) ++ ++/* EMR fields */ ++#define ATMEL_TC_EMR_TRIGSRCA_MSK GENMASK(1, 0) ++#define ATMEL_TC_EMR_TRIGSRCA_TIOA 0 ++#define ATMEL_TC_EMR_TRIGSRCA_PWMX 1 ++#define ATMEL_TC_EMR_TRIGSRCB_MSK GENMASK(5, 4) ++#define ATMEL_TC_EMR_TRIGSRCB_TIOB (0 << 4) ++#define ATMEL_TC_EMR_TRIGSRCB_PWM (1 << 4) ++#define ATMEL_TC_EMR_NOCLKDIV BIT(8) ++ ++/* BCR fields */ ++#define ATMEL_TC_BCR_SYNC BIT(0) ++ ++/* BMR fields */ ++#define ATMEL_TC_BMR_TCXC_MSK(c) GENMASK(((c) * 2) + 1, (c) * 2) ++#define ATMEL_TC_BMR_TCXC(x, c) ((x) << (2 * (c))) ++#define ATMEL_TC_BMR_QDEN BIT(8) ++#define ATMEL_TC_BMR_POSEN BIT(9) ++#define ATMEL_TC_BMR_SPEEDEN BIT(10) ++#define ATMEL_TC_BMR_QDTRANS BIT(11) ++#define ATMEL_TC_BMR_EDGPHA BIT(12) ++#define ATMEL_TC_BMR_INVA BIT(13) ++#define ATMEL_TC_BMR_INVB BIT(14) ++#define ATMEL_TC_BMR_INVIDX BIT(15) ++#define ATMEL_TC_BMR_SWAP BIT(16) ++#define ATMEL_TC_BMR_IDXPHB BIT(17) ++#define ATMEL_TC_BMR_AUTOC BIT(18) ++#define ATMEL_TC_MAXFILT_MSK GENMASK(25, 20) ++#define ATMEL_TC_MAXFILT(x) (((x) - 1) << 20) ++#define ATMEL_TC_MAXCMP_MSK GENMASK(29, 26) ++#define ATMEL_TC_MAXCMP(x) ((x) << 26) ++ ++/* QEDC fields */ ++#define ATMEL_TC_QEDC_IDX BIT(0) ++#define ATMEL_TC_QEDC_DIRCHG BIT(1) ++#define ATMEL_TC_QEDC_QERR BIT(2) ++#define ATMEL_TC_QEDC_MPE BIT(3) ++#define ATMEL_TC_QEDC_DIR BIT(8) ++ ++/* FMR fields */ ++#define ATMEL_TC_FMR_ENCF(x) BIT(x) ++ ++/* WPMR fields */ ++#define ATMEL_TC_WPMR_WPKEY (0x54494d << 8) ++#define ATMEL_TC_WPMR_WPEN BIT(0) ++ ++static inline struct clk *tcb_clk_get(struct device_node *node, int channel) ++{ ++ struct clk *clk; ++ char clk_name[] = "t0_clk"; ++ ++ clk_name[1] += channel; ++ clk = of_clk_get_by_name(node->parent, clk_name); ++ if (!IS_ERR(clk)) ++ return clk; ++ ++ return of_clk_get_by_name(node->parent, "t0_clk"); ++} ++ ++static inline int tcb_irq_get(struct device_node *node, int channel) ++{ ++ int irq; ++ ++ irq = of_irq_get(node->parent, channel); ++ if (irq > 0) ++ return irq; ++ ++ return of_irq_get(node->parent, 0); ++} ++ ++static const u8 atmel_tc_divisors[5] = { 2, 8, 32, 128, 0, }; ++ ++struct atmel_tcb_info { ++ int bits; ++}; ++ ++static const struct atmel_tcb_info atmel_tcb_infos[] = { ++ { .bits = 16 }, ++ { .bits = 32 }, ++}; ++ ++static const struct of_device_id atmel_tcb_dt_ids[] = { ++ { ++ .compatible = "atmel,at91rm9200-tcb", ++ .data = &atmel_tcb_infos[0], ++ }, { ++ .compatible = "atmel,at91sam9x5-tcb", ++ .data = &atmel_tcb_infos[1], ++ }, { ++ /* sentinel */ ++ } ++}; ++ ++#endif /* __SOC_ATMEL_TCB_H */ diff --git a/debian/patches/features/all/rt/0001-get-rid-of-trylock-loop-in-locking-dentries-on-shrin.patch b/debian/patches/features/all/rt/0001-get-rid-of-trylock-loop-in-locking-dentries-on-shrin.patch new file mode 100644 index 000000000..7a241e0ec --- /dev/null +++ b/debian/patches/features/all/rt/0001-get-rid-of-trylock-loop-in-locking-dentries-on-shrin.patch @@ -0,0 +1,153 @@ +From: Al Viro +Date: Fri, 23 Feb 2018 21:54:18 -0500 +Subject: [PATCH 01/17] get rid of trylock loop in locking dentries on shrink + list +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 3b3f09f48ba78c0634e929849860a6447d057eed + +In case of trylock failure don't re-add to the list - drop the locks +and carefully get them in the right order. For shrink_dentry_list(), +somebody having grabbed a reference to dentry means that we can +kick it off-list, so if we find dentry being modified under us we +don't need to play silly buggers with retries anyway - off the list +it is. + +The locking logics taken out into a helper of its own; lock_parent() +is no longer used for dentries that can be killed under us. + +[fix from Eric Biggers folded] + +Signed-off-by: Al Viro +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/dcache.c | 106 ++++++++++++++++++++++++++++++++++++++---------------------- + 1 file changed, 68 insertions(+), 38 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -974,56 +974,86 @@ void d_prune_aliases(struct inode *inode + } + EXPORT_SYMBOL(d_prune_aliases); + +-static void shrink_dentry_list(struct list_head *list) ++/* ++ * Lock a dentry from shrink list. ++ * Note that dentry is *not* protected from concurrent dentry_kill(), ++ * d_delete(), etc. It is protected from freeing (by the fact of ++ * being on a shrink list), but everything else is fair game. ++ * Return false if dentry has been disrupted or grabbed, leaving ++ * the caller to kick it off-list. Otherwise, return true and have ++ * that dentry's inode and parent both locked. ++ */ ++static bool shrink_lock_dentry(struct dentry *dentry) + { +- struct dentry *dentry, *parent; ++ struct inode *inode; ++ struct dentry *parent; ++ ++ if (dentry->d_lockref.count) ++ return false; + ++ inode = dentry->d_inode; ++ if (inode && unlikely(!spin_trylock(&inode->i_lock))) { ++ rcu_read_lock(); /* to protect inode */ ++ spin_unlock(&dentry->d_lock); ++ spin_lock(&inode->i_lock); ++ spin_lock(&dentry->d_lock); ++ if (unlikely(dentry->d_lockref.count)) ++ goto out; ++ /* changed inode means that somebody had grabbed it */ ++ if (unlikely(inode != dentry->d_inode)) ++ goto out; ++ rcu_read_unlock(); ++ } ++ ++ parent = dentry->d_parent; ++ if (IS_ROOT(dentry) || likely(spin_trylock(&parent->d_lock))) ++ return true; ++ ++ rcu_read_lock(); /* to protect parent */ ++ spin_unlock(&dentry->d_lock); ++ parent = READ_ONCE(dentry->d_parent); ++ spin_lock(&parent->d_lock); ++ if (unlikely(parent != dentry->d_parent)) { ++ spin_unlock(&parent->d_lock); ++ spin_lock(&dentry->d_lock); ++ goto out; ++ } ++ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); ++ if (likely(!dentry->d_lockref.count)) { ++ rcu_read_unlock(); ++ return true; ++ } ++ spin_unlock(&parent->d_lock); ++out: ++ if (inode) ++ spin_unlock(&inode->i_lock); ++ rcu_read_unlock(); ++ return false; ++} ++ ++static void shrink_dentry_list(struct list_head *list) ++{ + while (!list_empty(list)) { ++ struct dentry *dentry, *parent; + struct inode *inode; ++ + dentry = list_entry(list->prev, struct dentry, d_lru); + spin_lock(&dentry->d_lock); +- parent = lock_parent(dentry); +- +- /* +- * The dispose list is isolated and dentries are not accounted +- * to the LRU here, so we can simply remove it from the list +- * here regardless of whether it is referenced or not. +- */ +- d_shrink_del(dentry); +- +- /* +- * We found an inuse dentry which was not removed from +- * the LRU because of laziness during lookup. Do not free it. +- */ +- if (dentry->d_lockref.count > 0) { ++ if (!shrink_lock_dentry(dentry)) { ++ bool can_free = false; ++ d_shrink_del(dentry); ++ if (dentry->d_lockref.count < 0) ++ can_free = dentry->d_flags & DCACHE_MAY_FREE; + spin_unlock(&dentry->d_lock); +- if (parent) +- spin_unlock(&parent->d_lock); +- continue; +- } +- +- +- if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { +- bool can_free = dentry->d_flags & DCACHE_MAY_FREE; +- spin_unlock(&dentry->d_lock); +- if (parent) +- spin_unlock(&parent->d_lock); + if (can_free) + dentry_free(dentry); + continue; + } +- +- inode = dentry->d_inode; +- if (inode && unlikely(!spin_trylock(&inode->i_lock))) { +- d_shrink_add(dentry, list); +- spin_unlock(&dentry->d_lock); +- if (parent) +- spin_unlock(&parent->d_lock); +- continue; +- } +- ++ d_shrink_del(dentry); ++ parent = dentry->d_parent; + __dentry_kill(dentry); +- ++ if (parent == dentry) ++ continue; + /* + * We need to prune ancestors too. This is necessary to prevent + * quadratic behavior of shrink_dcache_parent(), but is also diff --git a/debian/patches/features/all/rt/0001-iommu-amd-Take-into-account-that-alloc_dev_data-may-.patch b/debian/patches/features/all/rt/0001-iommu-amd-Take-into-account-that-alloc_dev_data-may-.patch new file mode 100644 index 000000000..3e3e8c931 --- /dev/null +++ b/debian/patches/features/all/rt/0001-iommu-amd-Take-into-account-that-alloc_dev_data-may-.patch @@ -0,0 +1,33 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 22 Mar 2018 16:22:33 +0100 +Subject: [PATCH 01/10] iommu/amd: Take into account that alloc_dev_data() may + return NULL +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 39ffe39545cd5cb5b8cee9f0469165cf24dc62c2 + +find_dev_data() does not check whether the return value alloc_dev_data() +is NULL. This was okay once because the pointer was returned once as-is. +Since commit df3f7a6e8e85 ("iommu/amd: Use is_attach_deferred +call-back") the pointer may be used within find_dev_data() so a NULL +check is required. + +Cc: Baoquan He +Fixes: df3f7a6e8e85 ("iommu/amd: Use is_attach_deferred call-back") +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Joerg Roedel +--- + drivers/iommu/amd_iommu.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -310,6 +310,8 @@ static struct iommu_dev_data *find_dev_d + + if (dev_data == NULL) { + dev_data = alloc_dev_data(devid); ++ if (!dev_data) ++ return NULL; + + if (translation_pre_enabled(iommu)) + dev_data->defer_attach = true; diff --git a/debian/patches/features/all/rt/0001-iommu-amd-Use-raw-locks-on-atomic-context-paths.patch b/debian/patches/features/all/rt/0001-iommu-amd-Use-raw-locks-on-atomic-context-paths.patch new file mode 100644 index 000000000..479bc3a6c --- /dev/null +++ b/debian/patches/features/all/rt/0001-iommu-amd-Use-raw-locks-on-atomic-context-paths.patch @@ -0,0 +1,171 @@ +From: Scott Wood +Date: Sun, 21 Jan 2018 03:28:54 -0600 +Subject: [PATCH 1/3] iommu/amd: Use raw locks on atomic context paths +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 27790398c2aed917828dc3c6f81240d57f1584c9 + +Several functions in this driver are called from atomic context, +and thus raw locks must be used in order to be safe on PREEMPT_RT. + +This includes paths that must wait for command completion, which is +a potential PREEMPT_RT latency concern but not easily avoidable. + +Signed-off-by: Scott Wood +Signed-off-by: Joerg Roedel +--- + drivers/iommu/amd_iommu.c | 30 +++++++++++++++--------------- + drivers/iommu/amd_iommu_init.c | 2 +- + drivers/iommu/amd_iommu_types.h | 4 ++-- + 3 files changed, 18 insertions(+), 18 deletions(-) + +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -1056,9 +1056,9 @@ static int iommu_queue_command_sync(stru + unsigned long flags; + int ret; + +- spin_lock_irqsave(&iommu->lock, flags); ++ raw_spin_lock_irqsave(&iommu->lock, flags); + ret = __iommu_queue_command_sync(iommu, cmd, sync); +- spin_unlock_irqrestore(&iommu->lock, flags); ++ raw_spin_unlock_irqrestore(&iommu->lock, flags); + + return ret; + } +@@ -1084,7 +1084,7 @@ static int iommu_completion_wait(struct + + build_completion_wait(&cmd, (u64)&iommu->cmd_sem); + +- spin_lock_irqsave(&iommu->lock, flags); ++ raw_spin_lock_irqsave(&iommu->lock, flags); + + iommu->cmd_sem = 0; + +@@ -1095,7 +1095,7 @@ static int iommu_completion_wait(struct + ret = wait_on_sem(&iommu->cmd_sem); + + out_unlock: +- spin_unlock_irqrestore(&iommu->lock, flags); ++ raw_spin_unlock_irqrestore(&iommu->lock, flags); + + return ret; + } +@@ -3627,7 +3627,7 @@ static struct irq_remap_table *get_irq_t + goto out_unlock; + + /* Initialize table spin-lock */ +- spin_lock_init(&table->lock); ++ raw_spin_lock_init(&table->lock); + + if (ioapic) + /* Keep the first 32 indexes free for IOAPIC interrupts */ +@@ -3689,7 +3689,7 @@ static int alloc_irq_index(u16 devid, in + if (align) + alignment = roundup_pow_of_two(count); + +- spin_lock_irqsave(&table->lock, flags); ++ raw_spin_lock_irqsave(&table->lock, flags); + + /* Scan table for free entries */ + for (index = ALIGN(table->min_index, alignment), c = 0; +@@ -3716,7 +3716,7 @@ static int alloc_irq_index(u16 devid, in + index = -ENOSPC; + + out: +- spin_unlock_irqrestore(&table->lock, flags); ++ raw_spin_unlock_irqrestore(&table->lock, flags); + + return index; + } +@@ -3737,7 +3737,7 @@ static int modify_irte_ga(u16 devid, int + if (!table) + return -ENOMEM; + +- spin_lock_irqsave(&table->lock, flags); ++ raw_spin_lock_irqsave(&table->lock, flags); + + entry = (struct irte_ga *)table->table; + entry = &entry[index]; +@@ -3748,7 +3748,7 @@ static int modify_irte_ga(u16 devid, int + if (data) + data->ref = entry; + +- spin_unlock_irqrestore(&table->lock, flags); ++ raw_spin_unlock_irqrestore(&table->lock, flags); + + iommu_flush_irt(iommu, devid); + iommu_completion_wait(iommu); +@@ -3770,9 +3770,9 @@ static int modify_irte(u16 devid, int in + if (!table) + return -ENOMEM; + +- spin_lock_irqsave(&table->lock, flags); ++ raw_spin_lock_irqsave(&table->lock, flags); + table->table[index] = irte->val; +- spin_unlock_irqrestore(&table->lock, flags); ++ raw_spin_unlock_irqrestore(&table->lock, flags); + + iommu_flush_irt(iommu, devid); + iommu_completion_wait(iommu); +@@ -3794,9 +3794,9 @@ static void free_irte(u16 devid, int ind + if (!table) + return; + +- spin_lock_irqsave(&table->lock, flags); ++ raw_spin_lock_irqsave(&table->lock, flags); + iommu->irte_ops->clear_allocated(table, index); +- spin_unlock_irqrestore(&table->lock, flags); ++ raw_spin_unlock_irqrestore(&table->lock, flags); + + iommu_flush_irt(iommu, devid); + iommu_completion_wait(iommu); +@@ -4397,7 +4397,7 @@ int amd_iommu_update_ga(int cpu, bool is + if (!irt) + return -ENODEV; + +- spin_lock_irqsave(&irt->lock, flags); ++ raw_spin_lock_irqsave(&irt->lock, flags); + + if (ref->lo.fields_vapic.guest_mode) { + if (cpu >= 0) +@@ -4406,7 +4406,7 @@ int amd_iommu_update_ga(int cpu, bool is + barrier(); + } + +- spin_unlock_irqrestore(&irt->lock, flags); ++ raw_spin_unlock_irqrestore(&irt->lock, flags); + + iommu_flush_irt(iommu, devid); + iommu_completion_wait(iommu); +--- a/drivers/iommu/amd_iommu_init.c ++++ b/drivers/iommu/amd_iommu_init.c +@@ -1474,7 +1474,7 @@ static int __init init_iommu_one(struct + { + int ret; + +- spin_lock_init(&iommu->lock); ++ raw_spin_lock_init(&iommu->lock); + + /* Add IOMMU to internal data structures */ + list_add_tail(&iommu->list, &amd_iommu_list); +--- a/drivers/iommu/amd_iommu_types.h ++++ b/drivers/iommu/amd_iommu_types.h +@@ -408,7 +408,7 @@ extern bool amd_iommu_iotlb_sup; + #define IRQ_TABLE_ALIGNMENT 128 + + struct irq_remap_table { +- spinlock_t lock; ++ raw_spinlock_t lock; + unsigned min_index; + u32 *table; + }; +@@ -490,7 +490,7 @@ struct amd_iommu { + int index; + + /* locks the accesses to the hardware */ +- spinlock_t lock; ++ raw_spinlock_t lock; + + /* Pointer to PCI device of this IOMMU */ + struct pci_dev *dev; diff --git a/debian/patches/features/all/rt/0002-clocksource-drivers-Add-a-new-driver-for-the-Atmel-A.patch b/debian/patches/features/all/rt/0002-clocksource-drivers-Add-a-new-driver-for-the-Atmel-A.patch new file mode 100644 index 000000000..2e1b24cb3 --- /dev/null +++ b/debian/patches/features/all/rt/0002-clocksource-drivers-Add-a-new-driver-for-the-Atmel-A.patch @@ -0,0 +1,672 @@ +From: Alexandre Belloni +Date: Wed, 18 Apr 2018 12:51:39 +0200 +Subject: [PATCH 2/6] clocksource/drivers: Add a new driver for the Atmel ARM + TC blocks +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Add a driver for the Atmel Timer Counter Blocks. This driver provides a +clocksource and two clockevent devices. + +One of the clockevent device is linked to the clocksource counter and so it +will run at the same frequency. This will be used when there is only on TCB +channel available for timers. + +The other clockevent device runs on a separate TCB channel when available. + +This driver uses regmap and syscon to be able to probe early in the boot +and avoid having to switch on the TCB clocksource later. Using regmap also +means that unused TCB channels may be used by other drivers (PWM for +example). read/writel are still used to access channel specific registers +to avoid the performance impact of regmap (mainly locking). + +Tested-by: Alexander Dahl +Tested-by: Andras Szemzo +Signed-off-by: Alexandre Belloni +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/clocksource/Kconfig | 8 + drivers/clocksource/Makefile | 3 + drivers/clocksource/timer-atmel-tcb.c | 608 ++++++++++++++++++++++++++++++++++ + 3 files changed, 618 insertions(+), 1 deletion(-) + create mode 100644 drivers/clocksource/timer-atmel-tcb.c + +--- a/drivers/clocksource/Kconfig ++++ b/drivers/clocksource/Kconfig +@@ -392,6 +392,14 @@ config ATMEL_ST + help + Support for the Atmel ST timer. + ++config ATMEL_ARM_TCB_CLKSRC ++ bool "Microchip ARM TC Block" if COMPILE_TEST ++ select REGMAP_MMIO ++ depends on GENERIC_CLOCKEVENTS ++ help ++ This enables build of clocksource and clockevent driver for ++ the integrated Timer Counter Blocks in Microchip ARM SoCs. ++ + config CLKSRC_METAG_GENERIC + def_bool y if METAG + help +--- a/drivers/clocksource/Makefile ++++ b/drivers/clocksource/Makefile +@@ -3,7 +3,8 @@ obj-$(CONFIG_TIMER_OF) += timer-of.o + obj-$(CONFIG_TIMER_PROBE) += timer-probe.o + obj-$(CONFIG_ATMEL_PIT) += timer-atmel-pit.o + obj-$(CONFIG_ATMEL_ST) += timer-atmel-st.o +-obj-$(CONFIG_ATMEL_TCB_CLKSRC) += tcb_clksrc.o ++obj-$(CONFIG_ATMEL_TCB_CLKSRC) += tcb_clksrc.o ++obj-$(CONFIG_ATMEL_ARM_TCB_CLKSRC) += timer-atmel-tcb.o + obj-$(CONFIG_X86_PM_TIMER) += acpi_pm.o + obj-$(CONFIG_SCx200HR_TIMER) += scx200_hrt.o + obj-$(CONFIG_CS5535_CLOCK_EVENT_SRC) += cs5535-clockevt.o +--- /dev/null ++++ b/drivers/clocksource/timer-atmel-tcb.c +@@ -0,0 +1,608 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static struct atmel_tcb_clksrc { ++ struct clocksource clksrc; ++ struct clock_event_device clkevt; ++ struct regmap *regmap; ++ void __iomem *base; ++ struct clk *clk[2]; ++ char name[20]; ++ int channels[2]; ++ int bits; ++ int irq; ++ struct { ++ u32 cmr; ++ u32 imr; ++ u32 rc; ++ bool clken; ++ } cache[2]; ++ u32 bmr_cache; ++ bool registered; ++} tc = { ++ .clksrc = { ++ .rating = 200, ++ .mask = CLOCKSOURCE_MASK(32), ++ .flags = CLOCK_SOURCE_IS_CONTINUOUS, ++ }, ++ .clkevt = { ++ .features = CLOCK_EVT_FEAT_ONESHOT, ++ /* Should be lower than at91rm9200's system timer */ ++ .rating = 125, ++ }, ++}; ++ ++static struct tc_clkevt_device { ++ struct clock_event_device clkevt; ++ struct regmap *regmap; ++ void __iomem *base; ++ struct clk *slow_clk; ++ struct clk *clk; ++ char name[20]; ++ int channel; ++ int irq; ++ struct { ++ u32 cmr; ++ u32 imr; ++ u32 rc; ++ bool clken; ++ } cache; ++ bool registered; ++} tce = { ++ .clkevt = { ++ .features = CLOCK_EVT_FEAT_PERIODIC | ++ CLOCK_EVT_FEAT_ONESHOT, ++ /* ++ * Should be lower than at91rm9200's system timer ++ * but higher than tc.clkevt.rating ++ */ ++ .rating = 140, ++ }, ++}; ++ ++/* ++ * Clockevent device using its own channel ++ */ ++static int tc_clkevt2_shutdown(struct clock_event_device *d) ++{ ++ writel(0xff, tce.base + ATMEL_TC_IDR(tce.channel)); ++ writel(ATMEL_TC_CCR_CLKDIS, tce.base + ATMEL_TC_CCR(tce.channel)); ++ if (!clockevent_state_detached(d)) ++ clk_disable(tce.clk); ++ ++ return 0; ++} ++ ++/* For now, we always use the 32K clock ... this optimizes for NO_HZ, ++ * because using one of the divided clocks would usually mean the ++ * tick rate can never be less than several dozen Hz (vs 0.5 Hz). ++ * ++ * A divided clock could be good for high resolution timers, since ++ * 30.5 usec resolution can seem "low". ++ */ ++static int tc_clkevt2_set_oneshot(struct clock_event_device *d) ++{ ++ if (clockevent_state_oneshot(d) || clockevent_state_periodic(d)) ++ tc_clkevt2_shutdown(d); ++ ++ clk_enable(tce.clk); ++ ++ /* slow clock, count up to RC, then irq and stop */ ++ writel(ATMEL_TC_CMR_TCLK(4) | ATMEL_TC_CMR_CPCSTOP | ++ ATMEL_TC_CMR_WAVE | ATMEL_TC_CMR_WAVESEL_UPRC, ++ tce.base + ATMEL_TC_CMR(tce.channel)); ++ writel(ATMEL_TC_CPCS, tce.base + ATMEL_TC_IER(tce.channel)); ++ ++ return 0; ++} ++ ++static int tc_clkevt2_set_periodic(struct clock_event_device *d) ++{ ++ if (clockevent_state_oneshot(d) || clockevent_state_periodic(d)) ++ tc_clkevt2_shutdown(d); ++ ++ /* By not making the gentime core emulate periodic mode on top ++ * of oneshot, we get lower overhead and improved accuracy. ++ */ ++ clk_enable(tce.clk); ++ ++ /* slow clock, count up to RC, then irq and restart */ ++ writel(ATMEL_TC_CMR_TCLK(4) | ATMEL_TC_CMR_WAVE | ++ ATMEL_TC_CMR_WAVESEL_UPRC, ++ tce.base + ATMEL_TC_CMR(tce.channel)); ++ writel((32768 + HZ / 2) / HZ, tce.base + ATMEL_TC_RC(tce.channel)); ++ ++ /* Enable clock and interrupts on RC compare */ ++ writel(ATMEL_TC_CPCS, tce.base + ATMEL_TC_IER(tce.channel)); ++ writel(ATMEL_TC_CCR_CLKEN | ATMEL_TC_CCR_SWTRG, ++ tce.base + ATMEL_TC_CCR(tce.channel)); ++ ++ return 0; ++} ++ ++static int tc_clkevt2_next_event(unsigned long delta, ++ struct clock_event_device *d) ++{ ++ writel(delta, tce.base + ATMEL_TC_RC(tce.channel)); ++ writel(ATMEL_TC_CCR_CLKEN | ATMEL_TC_CCR_SWTRG, ++ tce.base + ATMEL_TC_CCR(tce.channel)); ++ ++ return 0; ++} ++ ++static irqreturn_t tc_clkevt2_irq(int irq, void *handle) ++{ ++ unsigned int sr; ++ ++ sr = readl(tce.base + ATMEL_TC_SR(tce.channel)); ++ if (sr & ATMEL_TC_CPCS) { ++ tce.clkevt.event_handler(&tce.clkevt); ++ return IRQ_HANDLED; ++ } ++ ++ return IRQ_NONE; ++} ++ ++static void tc_clkevt2_suspend(struct clock_event_device *d) ++{ ++ tce.cache.cmr = readl(tce.base + ATMEL_TC_CMR(tce.channel)); ++ tce.cache.imr = readl(tce.base + ATMEL_TC_IMR(tce.channel)); ++ tce.cache.rc = readl(tce.base + ATMEL_TC_RC(tce.channel)); ++ tce.cache.clken = !!(readl(tce.base + ATMEL_TC_SR(tce.channel)) & ++ ATMEL_TC_CLKSTA); ++} ++ ++static void tc_clkevt2_resume(struct clock_event_device *d) ++{ ++ /* Restore registers for the channel, RA and RB are not used */ ++ writel(tce.cache.cmr, tc.base + ATMEL_TC_CMR(tce.channel)); ++ writel(tce.cache.rc, tc.base + ATMEL_TC_RC(tce.channel)); ++ writel(0, tc.base + ATMEL_TC_RA(tce.channel)); ++ writel(0, tc.base + ATMEL_TC_RB(tce.channel)); ++ /* Disable all the interrupts */ ++ writel(0xff, tc.base + ATMEL_TC_IDR(tce.channel)); ++ /* Reenable interrupts that were enabled before suspending */ ++ writel(tce.cache.imr, tc.base + ATMEL_TC_IER(tce.channel)); ++ ++ /* Start the clock if it was used */ ++ if (tce.cache.clken) ++ writel(ATMEL_TC_CCR_CLKEN | ATMEL_TC_CCR_SWTRG, ++ tc.base + ATMEL_TC_CCR(tce.channel)); ++} ++ ++static int __init tc_clkevt_register(struct device_node *node, ++ struct regmap *regmap, void __iomem *base, ++ int channel, int irq, int bits) ++{ ++ int ret; ++ ++ tce.regmap = regmap; ++ tce.base = base; ++ tce.channel = channel; ++ tce.irq = irq; ++ ++ tce.slow_clk = of_clk_get_by_name(node->parent, "slow_clk"); ++ if (IS_ERR(tce.slow_clk)) ++ return PTR_ERR(tce.slow_clk); ++ ++ ret = clk_prepare_enable(tce.slow_clk); ++ if (ret) ++ return ret; ++ ++ tce.clk = tcb_clk_get(node, tce.channel); ++ if (IS_ERR(tce.clk)) { ++ ret = PTR_ERR(tce.clk); ++ goto err_slow; ++ } ++ ++ snprintf(tce.name, sizeof(tce.name), "%s:%d", ++ kbasename(node->parent->full_name), channel); ++ tce.clkevt.cpumask = cpumask_of(0); ++ tce.clkevt.name = tce.name; ++ tce.clkevt.set_next_event = tc_clkevt2_next_event, ++ tce.clkevt.set_state_shutdown = tc_clkevt2_shutdown, ++ tce.clkevt.set_state_periodic = tc_clkevt2_set_periodic, ++ tce.clkevt.set_state_oneshot = tc_clkevt2_set_oneshot, ++ tce.clkevt.suspend = tc_clkevt2_suspend, ++ tce.clkevt.resume = tc_clkevt2_resume, ++ ++ /* try to enable clk to avoid future errors in mode change */ ++ ret = clk_prepare_enable(tce.clk); ++ if (ret) ++ goto err_slow; ++ clk_disable(tce.clk); ++ ++ clockevents_config_and_register(&tce.clkevt, 32768, 1, BIT(bits) - 1); ++ ++ ret = request_irq(tce.irq, tc_clkevt2_irq, IRQF_TIMER | IRQF_SHARED, ++ tce.clkevt.name, &tce); ++ if (ret) ++ goto err_clk; ++ ++ tce.registered = true; ++ ++ return 0; ++ ++err_clk: ++ clk_unprepare(tce.clk); ++err_slow: ++ clk_disable_unprepare(tce.slow_clk); ++ ++ return ret; ++} ++ ++/* ++ * Clocksource and clockevent using the same channel(s) ++ */ ++static u64 tc_get_cycles(struct clocksource *cs) ++{ ++ u32 lower, upper; ++ ++ do { ++ upper = readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[1])); ++ lower = readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[0])); ++ } while (upper != readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[1]))); ++ ++ return (upper << 16) | lower; ++} ++ ++static u64 tc_get_cycles32(struct clocksource *cs) ++{ ++ return readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[0])); ++} ++ ++static u64 notrace tc_sched_clock_read(void) ++{ ++ return tc_get_cycles(&tc.clksrc); ++} ++ ++static u64 notrace tc_sched_clock_read32(void) ++{ ++ return tc_get_cycles32(&tc.clksrc); ++} ++ ++static int tcb_clkevt_next_event(unsigned long delta, ++ struct clock_event_device *d) ++{ ++ u32 old, next, cur; ++ ++ ++ old = readl(tc.base + ATMEL_TC_CV(tc.channels[0])); ++ next = old + delta; ++ writel(next, tc.base + ATMEL_TC_RC(tc.channels[0])); ++ cur = readl(tc.base + ATMEL_TC_CV(tc.channels[0])); ++ ++ /* check whether the delta elapsed while setting the register */ ++ if ((next < old && cur < old && cur > next) || ++ (next > old && (cur < old || cur > next))) { ++ /* ++ * Clear the CPCS bit in the status register to avoid ++ * generating a spurious interrupt next time a valid ++ * timer event is configured. ++ */ ++ old = readl(tc.base + ATMEL_TC_SR(tc.channels[0])); ++ return -ETIME; ++ } ++ ++ writel(ATMEL_TC_CPCS, tc.base + ATMEL_TC_IER(tc.channels[0])); ++ ++ return 0; ++} ++ ++static irqreturn_t tc_clkevt_irq(int irq, void *handle) ++{ ++ unsigned int sr; ++ ++ sr = readl(tc.base + ATMEL_TC_SR(tc.channels[0])); ++ if (sr & ATMEL_TC_CPCS) { ++ tc.clkevt.event_handler(&tc.clkevt); ++ return IRQ_HANDLED; ++ } ++ ++ return IRQ_NONE; ++} ++ ++static int tcb_clkevt_oneshot(struct clock_event_device *dev) ++{ ++ if (clockevent_state_oneshot(dev)) ++ return 0; ++ ++ /* ++ * Because both clockevent devices may share the same IRQ, we don't want ++ * the less likely one to stay requested ++ */ ++ return request_irq(tc.irq, tc_clkevt_irq, IRQF_TIMER | IRQF_SHARED, ++ tc.name, &tc); ++} ++ ++static int tcb_clkevt_shutdown(struct clock_event_device *dev) ++{ ++ writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[0])); ++ if (tc.bits == 16) ++ writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[1])); ++ ++ if (!clockevent_state_detached(dev)) ++ free_irq(tc.irq, &tc); ++ ++ return 0; ++} ++ ++static void __init tcb_setup_dual_chan(struct atmel_tcb_clksrc *tc, ++ int mck_divisor_idx) ++{ ++ /* first channel: waveform mode, input mclk/8, clock TIOA on overflow */ ++ writel(mck_divisor_idx /* likely divide-by-8 */ ++ | ATMEL_TC_CMR_WAVE ++ | ATMEL_TC_CMR_WAVESEL_UP /* free-run */ ++ | ATMEL_TC_CMR_ACPA(SET) /* TIOA rises at 0 */ ++ | ATMEL_TC_CMR_ACPC(CLEAR), /* (duty cycle 50%) */ ++ tc->base + ATMEL_TC_CMR(tc->channels[0])); ++ writel(0x0000, tc->base + ATMEL_TC_RA(tc->channels[0])); ++ writel(0x8000, tc->base + ATMEL_TC_RC(tc->channels[0])); ++ writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[0])); /* no irqs */ ++ writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[0])); ++ ++ /* second channel: waveform mode, input TIOA */ ++ writel(ATMEL_TC_CMR_XC(tc->channels[1]) /* input: TIOA */ ++ | ATMEL_TC_CMR_WAVE ++ | ATMEL_TC_CMR_WAVESEL_UP, /* free-run */ ++ tc->base + ATMEL_TC_CMR(tc->channels[1])); ++ writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[1])); /* no irqs */ ++ writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[1])); ++ ++ /* chain both channel, we assume the previous channel */ ++ regmap_write(tc->regmap, ATMEL_TC_BMR, ++ ATMEL_TC_BMR_TCXC(1 + tc->channels[1], tc->channels[1])); ++ /* then reset all the timers */ ++ regmap_write(tc->regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC); ++} ++ ++static void __init tcb_setup_single_chan(struct atmel_tcb_clksrc *tc, ++ int mck_divisor_idx) ++{ ++ /* channel 0: waveform mode, input mclk/8 */ ++ writel(mck_divisor_idx /* likely divide-by-8 */ ++ | ATMEL_TC_CMR_WAVE ++ | ATMEL_TC_CMR_WAVESEL_UP, /* free-run */ ++ tc->base + ATMEL_TC_CMR(tc->channels[0])); ++ writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[0])); /* no irqs */ ++ writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[0])); ++ ++ /* then reset all the timers */ ++ regmap_write(tc->regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC); ++} ++ ++static void tc_clksrc_suspend(struct clocksource *cs) ++{ ++ int i; ++ ++ for (i = 0; i < 1 + (tc.bits == 16); i++) { ++ tc.cache[i].cmr = readl(tc.base + ATMEL_TC_CMR(tc.channels[i])); ++ tc.cache[i].imr = readl(tc.base + ATMEL_TC_IMR(tc.channels[i])); ++ tc.cache[i].rc = readl(tc.base + ATMEL_TC_RC(tc.channels[i])); ++ tc.cache[i].clken = !!(readl(tc.base + ++ ATMEL_TC_SR(tc.channels[i])) & ++ ATMEL_TC_CLKSTA); ++ } ++ ++ if (tc.bits == 16) ++ regmap_read(tc.regmap, ATMEL_TC_BMR, &tc.bmr_cache); ++} ++ ++static void tc_clksrc_resume(struct clocksource *cs) ++{ ++ int i; ++ ++ for (i = 0; i < 1 + (tc.bits == 16); i++) { ++ /* Restore registers for the channel, RA and RB are not used */ ++ writel(tc.cache[i].cmr, tc.base + ATMEL_TC_CMR(tc.channels[i])); ++ writel(tc.cache[i].rc, tc.base + ATMEL_TC_RC(tc.channels[i])); ++ writel(0, tc.base + ATMEL_TC_RA(tc.channels[i])); ++ writel(0, tc.base + ATMEL_TC_RB(tc.channels[i])); ++ /* Disable all the interrupts */ ++ writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[i])); ++ /* Reenable interrupts that were enabled before suspending */ ++ writel(tc.cache[i].imr, tc.base + ATMEL_TC_IER(tc.channels[i])); ++ ++ /* Start the clock if it was used */ ++ if (tc.cache[i].clken) ++ writel(ATMEL_TC_CCR_CLKEN, tc.base + ++ ATMEL_TC_CCR(tc.channels[i])); ++ } ++ ++ /* in case of dual channel, chain channels */ ++ if (tc.bits == 16) ++ regmap_write(tc.regmap, ATMEL_TC_BMR, tc.bmr_cache); ++ /* Finally, trigger all the channels*/ ++ regmap_write(tc.regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC); ++} ++ ++static int __init tcb_clksrc_register(struct device_node *node, ++ struct regmap *regmap, void __iomem *base, ++ int channel, int channel1, int irq, ++ int bits) ++{ ++ u32 rate, divided_rate = 0; ++ int best_divisor_idx = -1; ++ int i, err = -1; ++ u64 (*tc_sched_clock)(void); ++ ++ tc.regmap = regmap; ++ tc.base = base; ++ tc.channels[0] = channel; ++ tc.channels[1] = channel1; ++ tc.irq = irq; ++ tc.bits = bits; ++ ++ tc.clk[0] = tcb_clk_get(node, tc.channels[0]); ++ if (IS_ERR(tc.clk[0])) ++ return PTR_ERR(tc.clk[0]); ++ err = clk_prepare_enable(tc.clk[0]); ++ if (err) { ++ pr_debug("can't enable T0 clk\n"); ++ goto err_clk; ++ } ++ ++ /* How fast will we be counting? Pick something over 5 MHz. */ ++ rate = (u32)clk_get_rate(tc.clk[0]); ++ for (i = 0; i < 5; i++) { ++ unsigned int divisor = atmel_tc_divisors[i]; ++ unsigned int tmp; ++ ++ if (!divisor) ++ continue; ++ ++ tmp = rate / divisor; ++ pr_debug("TC: %u / %-3u [%d] --> %u\n", rate, divisor, i, tmp); ++ if (best_divisor_idx > 0) { ++ if (tmp < 5 * 1000 * 1000) ++ continue; ++ } ++ divided_rate = tmp; ++ best_divisor_idx = i; ++ } ++ ++ if (tc.bits == 32) { ++ tc.clksrc.read = tc_get_cycles32; ++ tcb_setup_single_chan(&tc, best_divisor_idx); ++ tc_sched_clock = tc_sched_clock_read32; ++ snprintf(tc.name, sizeof(tc.name), "%s:%d", ++ kbasename(node->parent->full_name), tc.channels[0]); ++ } else { ++ tc.clk[1] = tcb_clk_get(node, tc.channels[1]); ++ if (IS_ERR(tc.clk[1])) ++ goto err_disable_t0; ++ ++ err = clk_prepare_enable(tc.clk[1]); ++ if (err) { ++ pr_debug("can't enable T1 clk\n"); ++ goto err_clk1; ++ } ++ tc.clksrc.read = tc_get_cycles, ++ tcb_setup_dual_chan(&tc, best_divisor_idx); ++ tc_sched_clock = tc_sched_clock_read; ++ snprintf(tc.name, sizeof(tc.name), "%s:%d,%d", ++ kbasename(node->parent->full_name), tc.channels[0], ++ tc.channels[1]); ++ } ++ ++ pr_debug("%s at %d.%03d MHz\n", tc.name, ++ divided_rate / 1000000, ++ ((divided_rate + 500000) % 1000000) / 1000); ++ ++ tc.clksrc.name = tc.name; ++ tc.clksrc.suspend = tc_clksrc_suspend; ++ tc.clksrc.resume = tc_clksrc_resume; ++ ++ err = clocksource_register_hz(&tc.clksrc, divided_rate); ++ if (err) ++ goto err_disable_t1; ++ ++ sched_clock_register(tc_sched_clock, 32, divided_rate); ++ ++ tc.registered = true; ++ ++ /* Set up and register clockevents */ ++ tc.clkevt.name = tc.name; ++ tc.clkevt.cpumask = cpumask_of(0); ++ tc.clkevt.set_next_event = tcb_clkevt_next_event; ++ tc.clkevt.set_state_oneshot = tcb_clkevt_oneshot; ++ tc.clkevt.set_state_shutdown = tcb_clkevt_shutdown; ++ clockevents_config_and_register(&tc.clkevt, divided_rate, 1, ++ BIT(tc.bits) - 1); ++ ++ return 0; ++ ++err_disable_t1: ++ if (tc.bits == 16) ++ clk_disable_unprepare(tc.clk[1]); ++ ++err_clk1: ++ if (tc.bits == 16) ++ clk_put(tc.clk[1]); ++ ++err_disable_t0: ++ clk_disable_unprepare(tc.clk[0]); ++ ++err_clk: ++ clk_put(tc.clk[0]); ++ ++ pr_err("%s: unable to register clocksource/clockevent\n", ++ tc.clksrc.name); ++ ++ return err; ++} ++ ++static int __init tcb_clksrc_init(struct device_node *node) ++{ ++ const struct of_device_id *match; ++ const struct atmel_tcb_info *tcb_info; ++ struct regmap *regmap; ++ void __iomem *tcb_base; ++ u32 channel; ++ int bits, irq, err, chan1 = -1; ++ ++ if (tc.registered && tce.registered) ++ return -ENODEV; ++ ++ /* ++ * The regmap has to be used to access registers that are shared ++ * between channels on the same TCB but we keep direct IO access for ++ * the counters to avoid the impact on performance ++ */ ++ regmap = syscon_node_to_regmap(node->parent); ++ if (IS_ERR(regmap)) ++ return PTR_ERR(regmap); ++ ++ tcb_base = of_iomap(node->parent, 0); ++ if (!tcb_base) { ++ pr_err("%s +%d %s\n", __FILE__, __LINE__, __func__); ++ return -ENXIO; ++ } ++ ++ match = of_match_node(atmel_tcb_dt_ids, node->parent); ++ tcb_info = match->data; ++ bits = tcb_info->bits; ++ ++ err = of_property_read_u32_index(node, "reg", 0, &channel); ++ if (err) ++ return err; ++ ++ irq = tcb_irq_get(node, channel); ++ if (irq < 0) ++ return irq; ++ ++ if (tc.registered) ++ return tc_clkevt_register(node, regmap, tcb_base, channel, irq, ++ bits); ++ ++ if (bits == 16) { ++ of_property_read_u32_index(node, "reg", 1, &chan1); ++ if (chan1 == -1) { ++ if (tce.registered) { ++ pr_err("%s: clocksource needs two channels\n", ++ node->parent->full_name); ++ return -EINVAL; ++ } else { ++ return tc_clkevt_register(node, regmap, ++ tcb_base, channel, ++ irq, bits); ++ } ++ } ++ } ++ ++ return tcb_clksrc_register(node, regmap, tcb_base, channel, chan1, irq, ++ bits); ++} ++CLOCKSOURCE_OF_DECLARE(atmel_tcb_clksrc, "atmel,tcb-timer", ++ tcb_clksrc_init); diff --git a/debian/patches/features/all/rt/0002-iommu-amd-Don-t-use-dev_data-in-irte_ga_set_affinity.patch b/debian/patches/features/all/rt/0002-iommu-amd-Don-t-use-dev_data-in-irte_ga_set_affinity.patch new file mode 100644 index 000000000..5f4275290 --- /dev/null +++ b/debian/patches/features/all/rt/0002-iommu-amd-Don-t-use-dev_data-in-irte_ga_set_affinity.patch @@ -0,0 +1,32 @@ +From: Scott Wood +Date: Sun, 28 Jan 2018 14:22:19 -0600 +Subject: [PATCH 2/3] iommu/amd: Don't use dev_data in irte_ga_set_affinity() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 01ee04badefd296eb7a4430497373be9b7b16783 + +search_dev_data() acquires a non-raw lock, which can't be done +from atomic context on PREEMPT_RT. There is no need to look at +dev_data because guest_mode should never be set if use_vapic is +not set. + +Signed-off-by: Scott Wood +Signed-off-by: Joerg Roedel +--- + drivers/iommu/amd_iommu.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -3877,10 +3877,8 @@ static void irte_ga_set_affinity(void *e + u8 vector, u32 dest_apicid) + { + struct irte_ga *irte = (struct irte_ga *) entry; +- struct iommu_dev_data *dev_data = search_dev_data(devid); + +- if (!dev_data || !dev_data->use_vapic || +- !irte->lo.fields_remap.guest_mode) { ++ if (!irte->lo.fields_remap.guest_mode) { + irte->hi.fields.vector = vector; + irte->lo.fields_remap.destination = dest_apicid; + modify_irte_ga(devid, index, irte, NULL); diff --git a/debian/patches/features/all/rt/0002-iommu-amd-Turn-dev_data_list-into-a-lock-less-list.patch b/debian/patches/features/all/rt/0002-iommu-amd-Turn-dev_data_list-into-a-lock-less-list.patch new file mode 100644 index 000000000..176557ba1 --- /dev/null +++ b/debian/patches/features/all/rt/0002-iommu-amd-Turn-dev_data_list-into-a-lock-less-list.patch @@ -0,0 +1,98 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 22 Mar 2018 16:22:34 +0100 +Subject: [PATCH 02/10] iommu/amd: Turn dev_data_list into a lock less list +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 779da73273fc4c4c6f41579a95e4fb7880a1720e + +alloc_dev_data() adds new items to dev_data_list and search_dev_data() +is searching for items in this list. Both protect the access to the list +with a spinlock. +There is no need to navigate forth and back within the list and there is +also no deleting of a specific item. This qualifies the list to become a +lock less list and as part of this, the spinlock can be removed. +With this change the ordering of those items within the list is changed: +before the change new items were added to the end of the list, now they +are added to the front. I don't think it matters but wanted to mention +it. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Joerg Roedel +--- + drivers/iommu/amd_iommu.c | 28 ++++++++++------------------ + drivers/iommu/amd_iommu_types.h | 2 +- + 2 files changed, 11 insertions(+), 19 deletions(-) + +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -83,8 +83,7 @@ + static DEFINE_RWLOCK(amd_iommu_devtable_lock); + + /* List of all available dev_data structures */ +-static LIST_HEAD(dev_data_list); +-static DEFINE_SPINLOCK(dev_data_list_lock); ++static LLIST_HEAD(dev_data_list); + + LIST_HEAD(ioapic_map); + LIST_HEAD(hpet_map); +@@ -203,40 +202,33 @@ static struct dma_ops_domain* to_dma_ops + static struct iommu_dev_data *alloc_dev_data(u16 devid) + { + struct iommu_dev_data *dev_data; +- unsigned long flags; + + dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); + if (!dev_data) + return NULL; + + dev_data->devid = devid; +- +- spin_lock_irqsave(&dev_data_list_lock, flags); +- list_add_tail(&dev_data->dev_data_list, &dev_data_list); +- spin_unlock_irqrestore(&dev_data_list_lock, flags); +- + ratelimit_default_init(&dev_data->rs); + ++ llist_add(&dev_data->dev_data_list, &dev_data_list); + return dev_data; + } + + static struct iommu_dev_data *search_dev_data(u16 devid) + { + struct iommu_dev_data *dev_data; +- unsigned long flags; ++ struct llist_node *node; ++ ++ if (llist_empty(&dev_data_list)) ++ return NULL; + +- spin_lock_irqsave(&dev_data_list_lock, flags); +- list_for_each_entry(dev_data, &dev_data_list, dev_data_list) { ++ node = dev_data_list.first; ++ llist_for_each_entry(dev_data, node, dev_data_list) { + if (dev_data->devid == devid) +- goto out_unlock; ++ return dev_data; + } + +- dev_data = NULL; +- +-out_unlock: +- spin_unlock_irqrestore(&dev_data_list_lock, flags); +- +- return dev_data; ++ return NULL; + } + + static int __last_alias(struct pci_dev *pdev, u16 alias, void *data) +--- a/drivers/iommu/amd_iommu_types.h ++++ b/drivers/iommu/amd_iommu_types.h +@@ -627,7 +627,7 @@ struct devid_map { + */ + struct iommu_dev_data { + struct list_head list; /* For domain->dev_list */ +- struct list_head dev_data_list; /* For global dev_data_list */ ++ struct llist_node dev_data_list; /* For global dev_data_list */ + struct protection_domain *domain; /* Domain the device is bound to */ + u16 devid; /* PCI Device ID */ + u16 alias; /* Alias Device ID */ diff --git a/debian/patches/features/all/rt/0002-now-lock_parent-can-t-run-into-killed-dentry.patch b/debian/patches/features/all/rt/0002-now-lock_parent-can-t-run-into-killed-dentry.patch new file mode 100644 index 000000000..b66632fd6 --- /dev/null +++ b/debian/patches/features/all/rt/0002-now-lock_parent-can-t-run-into-killed-dentry.patch @@ -0,0 +1,46 @@ +From: Al Viro +Date: Fri, 23 Feb 2018 22:07:35 -0500 +Subject: [PATCH 02/17] now lock_parent() can't run into killed dentry +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 65d8eb5a8f5480756105173de147ef5d60163e2f + +all remaining callers hold either a reference or ->i_lock + +Signed-off-by: Al Viro +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/dcache.c | 13 +++---------- + 1 file changed, 3 insertions(+), 10 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -593,8 +593,6 @@ static inline struct dentry *lock_parent + struct dentry *parent = dentry->d_parent; + if (IS_ROOT(dentry)) + return NULL; +- if (unlikely(dentry->d_lockref.count < 0)) +- return NULL; + if (likely(spin_trylock(&parent->d_lock))) + return parent; + rcu_read_lock(); +@@ -614,16 +612,11 @@ static inline struct dentry *lock_parent + spin_unlock(&parent->d_lock); + goto again; + } +- if (parent != dentry) { ++ rcu_read_unlock(); ++ if (parent != dentry) + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); +- if (unlikely(dentry->d_lockref.count < 0)) { +- spin_unlock(&parent->d_lock); +- parent = NULL; +- } +- } else { ++ else + parent = NULL; +- } +- rcu_read_unlock(); + return parent; + } + diff --git a/debian/patches/features/all/rt/0003-clocksource-drivers-atmel-pit-make-option-silent.patch b/debian/patches/features/all/rt/0003-clocksource-drivers-atmel-pit-make-option-silent.patch new file mode 100644 index 000000000..4866b108b --- /dev/null +++ b/debian/patches/features/all/rt/0003-clocksource-drivers-atmel-pit-make-option-silent.patch @@ -0,0 +1,30 @@ +From: Alexandre Belloni +Date: Wed, 18 Apr 2018 12:51:40 +0200 +Subject: [PATCH 3/6] clocksource/drivers: atmel-pit: make option silent +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +To conform with the other option, make the ATMEL_PIT option silent so it +can be selected from the platform + +Tested-by: Alexander Dahl +Signed-off-by: Alexandre Belloni +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/clocksource/Kconfig | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/clocksource/Kconfig ++++ b/drivers/clocksource/Kconfig +@@ -381,8 +381,11 @@ config ARMV7M_SYSTICK + This options enables support for the ARMv7M system timer unit + + config ATMEL_PIT ++ bool "Microchip ARM Periodic Interval Timer (PIT)" if COMPILE_TEST + select TIMER_OF if OF +- def_bool SOC_AT91SAM9 || SOC_SAMA5 ++ help ++ This enables build of clocksource and clockevent driver for ++ the integrated PIT in Microchip ARM SoCs. + + config ATMEL_ST + bool "Atmel ST timer support" if COMPILE_TEST diff --git a/debian/patches/features/all/rt/0003-iommu-amd-Avoid-locking-get_irq_table-from-atomic-co.patch b/debian/patches/features/all/rt/0003-iommu-amd-Avoid-locking-get_irq_table-from-atomic-co.patch new file mode 100644 index 000000000..b7fe7e7c8 --- /dev/null +++ b/debian/patches/features/all/rt/0003-iommu-amd-Avoid-locking-get_irq_table-from-atomic-co.patch @@ -0,0 +1,117 @@ +From: Scott Wood +Date: Wed, 14 Feb 2018 17:36:28 -0600 +Subject: [PATCH 3/3] iommu/amd: Avoid locking get_irq_table() from atomic + context +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit df42a04b15f19a842393dc98a84cbc52b1f8ed49 + +get_irq_table() previously acquired amd_iommu_devtable_lock which is not +a raw lock, and thus cannot be acquired from atomic context on +PREEMPT_RT. Many calls to modify_irte*() come from atomic context due to +the IRQ desc->lock, as does amd_iommu_update_ga() due to the preemption +disabling in vcpu_load/put(). + +The only difference between calling get_irq_table() and reading from +irq_lookup_table[] directly, other than the lock acquisition and +amd_iommu_rlookup_table[] check, is if the table entry is unpopulated, +which should never happen when looking up a devid that came from an +irq_2_irte struct, as get_irq_table() would have already been called on +that devid during irq_remapping_alloc(). + +The lock acquisition is not needed in these cases because entries in +irq_lookup_table[] never change once non-NULL -- nor would the +amd_iommu_devtable_lock usage in get_irq_table() provide meaningful +protection if they did, since it's released before using the looked up +table in the get_irq_table() caller. + +Rename the old get_irq_table() to alloc_irq_table(), and create a new +lockless get_irq_table() to be used in non-allocating contexts that WARNs +if it doesn't find what it's looking for. + +Signed-off-by: Scott Wood +Signed-off-by: Joerg Roedel +--- + drivers/iommu/amd_iommu.c | 29 ++++++++++++++++++++++------- + 1 file changed, 22 insertions(+), 7 deletions(-) + +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -3595,7 +3595,22 @@ static void set_dte_irq_entry(u16 devid, + amd_iommu_dev_table[devid].data[2] = dte; + } + +-static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic) ++static struct irq_remap_table *get_irq_table(u16 devid) ++{ ++ struct irq_remap_table *table; ++ ++ if (WARN_ONCE(!amd_iommu_rlookup_table[devid], ++ "%s: no iommu for devid %x\n", __func__, devid)) ++ return NULL; ++ ++ table = irq_lookup_table[devid]; ++ if (WARN_ONCE(!table, "%s: no table for devid %x\n", __func__, devid)) ++ return NULL; ++ ++ return table; ++} ++ ++static struct irq_remap_table *alloc_irq_table(u16 devid, bool ioapic) + { + struct irq_remap_table *table = NULL; + struct amd_iommu *iommu; +@@ -3682,7 +3697,7 @@ static int alloc_irq_index(u16 devid, in + if (!iommu) + return -ENODEV; + +- table = get_irq_table(devid, false); ++ table = alloc_irq_table(devid, false); + if (!table) + return -ENODEV; + +@@ -3733,7 +3748,7 @@ static int modify_irte_ga(u16 devid, int + if (iommu == NULL) + return -EINVAL; + +- table = get_irq_table(devid, false); ++ table = get_irq_table(devid); + if (!table) + return -ENOMEM; + +@@ -3766,7 +3781,7 @@ static int modify_irte(u16 devid, int in + if (iommu == NULL) + return -EINVAL; + +- table = get_irq_table(devid, false); ++ table = get_irq_table(devid); + if (!table) + return -ENOMEM; + +@@ -3790,7 +3805,7 @@ static void free_irte(u16 devid, int ind + if (iommu == NULL) + return; + +- table = get_irq_table(devid, false); ++ table = get_irq_table(devid); + if (!table) + return; + +@@ -4108,7 +4123,7 @@ static int irq_remapping_alloc(struct ir + return ret; + + if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) { +- if (get_irq_table(devid, true)) ++ if (alloc_irq_table(devid, true)) + index = info->ioapic_pin; + else + ret = -ENOMEM; +@@ -4391,7 +4406,7 @@ int amd_iommu_update_ga(int cpu, bool is + if (!iommu) + return -ENODEV; + +- irt = get_irq_table(devid, false); ++ irt = get_irq_table(devid); + if (!irt) + return -ENODEV; + diff --git a/debian/patches/features/all/rt/0003-iommu-amd-Split-domain-id-out-of-amd_iommu_devtable_.patch b/debian/patches/features/all/rt/0003-iommu-amd-Split-domain-id-out-of-amd_iommu_devtable_.patch new file mode 100644 index 000000000..5785c3fe1 --- /dev/null +++ b/debian/patches/features/all/rt/0003-iommu-amd-Split-domain-id-out-of-amd_iommu_devtable_.patch @@ -0,0 +1,63 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 22 Mar 2018 16:22:35 +0100 +Subject: [PATCH 03/10] iommu/amd: Split domain id out of + amd_iommu_devtable_lock +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 2bc00180890427dcc092b2f2b0d03c904bcade29 + +domain_id_alloc() and domain_id_free() is used for id management. Those +two function share a bitmap (amd_iommu_pd_alloc_bitmap) and set/clear +bits based on id allocation. There is no need to share this with +amd_iommu_devtable_lock, it can use its own lock for this operation. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Joerg Roedel +--- + drivers/iommu/amd_iommu.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -81,6 +81,7 @@ + #define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) + + static DEFINE_RWLOCK(amd_iommu_devtable_lock); ++static DEFINE_SPINLOCK(pd_bitmap_lock); + + /* List of all available dev_data structures */ + static LLIST_HEAD(dev_data_list); +@@ -1599,29 +1600,26 @@ static void del_domain_from_list(struct + + static u16 domain_id_alloc(void) + { +- unsigned long flags; + int id; + +- write_lock_irqsave(&amd_iommu_devtable_lock, flags); ++ spin_lock(&pd_bitmap_lock); + id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID); + BUG_ON(id == 0); + if (id > 0 && id < MAX_DOMAIN_ID) + __set_bit(id, amd_iommu_pd_alloc_bitmap); + else + id = 0; +- write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); ++ spin_unlock(&pd_bitmap_lock); + + return id; + } + + static void domain_id_free(int id) + { +- unsigned long flags; +- +- write_lock_irqsave(&amd_iommu_devtable_lock, flags); ++ spin_lock(&pd_bitmap_lock); + if (id > 0 && id < MAX_DOMAIN_ID) + __clear_bit(id, amd_iommu_pd_alloc_bitmap); +- write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); ++ spin_unlock(&pd_bitmap_lock); + } + + #define DEFINE_FREE_PT_FN(LVL, FN) \ diff --git a/debian/patches/features/all/rt/0003-split-the-slow-part-of-lock_parent-off.patch b/debian/patches/features/all/rt/0003-split-the-slow-part-of-lock_parent-off.patch new file mode 100644 index 000000000..3881f5d0b --- /dev/null +++ b/debian/patches/features/all/rt/0003-split-the-slow-part-of-lock_parent-off.patch @@ -0,0 +1,50 @@ +From: Al Viro +Date: Fri, 23 Feb 2018 22:11:34 -0500 +Subject: [PATCH 03/17] split the slow part of lock_parent() off +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 8b987a46a1e0e93d4cb4babea06ea274e2e2b658 + +Turn the "trylock failed" part into uninlined __lock_parent(). + +Signed-off-by: Al Viro +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/dcache.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -588,13 +588,9 @@ static void __dentry_kill(struct dentry + dentry_free(dentry); + } + +-static inline struct dentry *lock_parent(struct dentry *dentry) ++static struct dentry *__lock_parent(struct dentry *dentry) + { +- struct dentry *parent = dentry->d_parent; +- if (IS_ROOT(dentry)) +- return NULL; +- if (likely(spin_trylock(&parent->d_lock))) +- return parent; ++ struct dentry *parent; + rcu_read_lock(); + spin_unlock(&dentry->d_lock); + again: +@@ -620,6 +616,16 @@ static inline struct dentry *lock_parent + return parent; + } + ++static inline struct dentry *lock_parent(struct dentry *dentry) ++{ ++ struct dentry *parent = dentry->d_parent; ++ if (IS_ROOT(dentry)) ++ return NULL; ++ if (likely(spin_trylock(&parent->d_lock))) ++ return parent; ++ return __lock_parent(dentry); ++} ++ + /* + * Finish off a dentry we've decided to kill. + * dentry->d_lock must be held, returns with it unlocked. diff --git a/debian/patches/features/all/rt/0004-ARM-at91-Implement-clocksource-selection.patch b/debian/patches/features/all/rt/0004-ARM-at91-Implement-clocksource-selection.patch new file mode 100644 index 000000000..32b28bab5 --- /dev/null +++ b/debian/patches/features/all/rt/0004-ARM-at91-Implement-clocksource-selection.patch @@ -0,0 +1,49 @@ +From: Alexandre Belloni +Date: Wed, 18 Apr 2018 12:51:41 +0200 +Subject: [PATCH 4/6] ARM: at91: Implement clocksource selection +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Allow selecting and unselecting the PIT clocksource driver so it doesn't +have to be compile when unused. + +Tested-by: Alexander Dahl +Signed-off-by: Alexandre Belloni +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/arm/mach-at91/Kconfig | 25 +++++++++++++++++++++++++ + 1 file changed, 25 insertions(+) + +--- a/arch/arm/mach-at91/Kconfig ++++ b/arch/arm/mach-at91/Kconfig +@@ -106,6 +106,31 @@ config SOC_AT91SAM9 + AT91SAM9X35 + AT91SAM9XE + ++comment "Clocksource driver selection" ++ ++config ATMEL_CLOCKSOURCE_PIT ++ bool "Periodic Interval Timer (PIT) support" ++ depends on SOC_AT91SAM9 || SOC_SAMA5 ++ default SOC_AT91SAM9 || SOC_SAMA5 ++ select ATMEL_PIT ++ help ++ Select this to get a clocksource based on the Atmel Periodic Interval ++ Timer. It has a relatively low resolution and the TC Block clocksource ++ should be preferred. ++ ++config ATMEL_CLOCKSOURCE_TCB ++ bool "Timer Counter Blocks (TCB) support" ++ depends on SOC_AT91RM9200 || SOC_AT91SAM9 || SOC_SAMA5 || COMPILE_TEST ++ default SOC_AT91RM9200 || SOC_AT91SAM9 || SOC_SAMA5 ++ depends on !ATMEL_TCLIB ++ select ATMEL_ARM_TCB_CLKSRC ++ help ++ Select this to get a high precision clocksource based on a ++ TC block with a 5+ MHz base clock rate. ++ On platforms with 16-bit counters, two timer channels are combined ++ to make a single 32-bit timer. ++ It can also be used as a clock event device supporting oneshot mode. ++ + config HAVE_AT91_UTMI + bool + diff --git a/debian/patches/features/all/rt/0004-dput-consolidate-the-do-we-need-to-retain-it-into-an.patch b/debian/patches/features/all/rt/0004-dput-consolidate-the-do-we-need-to-retain-it-into-an.patch new file mode 100644 index 000000000..30a4ec64a --- /dev/null +++ b/debian/patches/features/all/rt/0004-dput-consolidate-the-do-we-need-to-retain-it-into-an.patch @@ -0,0 +1,74 @@ +From: Al Viro +Date: Fri, 23 Feb 2018 21:07:24 -0500 +Subject: [PATCH 04/17] dput(): consolidate the "do we need to retain it?" into + an inlined helper +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit a338579f2f3d6a15c78f1dc7de4c248b4183fcea + +Signed-off-by: Al Viro +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/dcache.c | 42 +++++++++++++++++++++++------------------- + 1 file changed, 23 insertions(+), 19 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -626,6 +626,24 @@ static inline struct dentry *lock_parent + return __lock_parent(dentry); + } + ++static inline bool retain_dentry(struct dentry *dentry) ++{ ++ WARN_ON(d_in_lookup(dentry)); ++ ++ /* Unreachable? Get rid of it */ ++ if (unlikely(d_unhashed(dentry))) ++ return false; ++ ++ if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) ++ return false; ++ ++ if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) { ++ if (dentry->d_op->d_delete(dentry)) ++ return false; ++ } ++ return true; ++} ++ + /* + * Finish off a dentry we've decided to kill. + * dentry->d_lock must be held, returns with it unlocked. +@@ -804,27 +822,13 @@ void dput(struct dentry *dentry) + /* Slow case: now with the dentry lock held */ + rcu_read_unlock(); + +- WARN_ON(d_in_lookup(dentry)); +- +- /* Unreachable? Get rid of it */ +- if (unlikely(d_unhashed(dentry))) +- goto kill_it; +- +- if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) +- goto kill_it; +- +- if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) { +- if (dentry->d_op->d_delete(dentry)) +- goto kill_it; ++ if (likely(retain_dentry(dentry))) { ++ dentry_lru_add(dentry); ++ dentry->d_lockref.count--; ++ spin_unlock(&dentry->d_lock); ++ return; + } + +- dentry_lru_add(dentry); +- +- dentry->d_lockref.count--; +- spin_unlock(&dentry->d_lock); +- return; +- +-kill_it: + dentry = dentry_kill(dentry); + if (dentry) { + cond_resched(); diff --git a/debian/patches/features/all/rt/0004-iommu-amd-Split-irq_lookup_table-out-of-the-amd_iomm.patch b/debian/patches/features/all/rt/0004-iommu-amd-Split-irq_lookup_table-out-of-the-amd_iomm.patch new file mode 100644 index 000000000..afd17e45c --- /dev/null +++ b/debian/patches/features/all/rt/0004-iommu-amd-Split-irq_lookup_table-out-of-the-amd_iomm.patch @@ -0,0 +1,51 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 22 Mar 2018 16:22:36 +0100 +Subject: [PATCH 04/10] iommu/amd: Split irq_lookup_table out of the + amd_iommu_devtable_lock +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit ea6166f4b83e9cfba1c18f46a764d50045682fe5 + +The function get_irq_table() reads/writes irq_lookup_table while holding +the amd_iommu_devtable_lock. It also modifies +amd_iommu_dev_table[].data[2]. +set_dte_entry() is using amd_iommu_dev_table[].data[0|1] (under the +domain->lock) so it should be okay. The access to the iommu is +serialized with its own (iommu's) lock. + +So split out get_irq_table() out of amd_iommu_devtable_lock's lock. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Joerg Roedel +--- + drivers/iommu/amd_iommu.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -82,6 +82,7 @@ + + static DEFINE_RWLOCK(amd_iommu_devtable_lock); + static DEFINE_SPINLOCK(pd_bitmap_lock); ++static DEFINE_SPINLOCK(iommu_table_lock); + + /* List of all available dev_data structures */ + static LLIST_HEAD(dev_data_list); +@@ -3609,7 +3610,7 @@ static struct irq_remap_table *alloc_irq + unsigned long flags; + u16 alias; + +- write_lock_irqsave(&amd_iommu_devtable_lock, flags); ++ spin_lock_irqsave(&iommu_table_lock, flags); + + iommu = amd_iommu_rlookup_table[devid]; + if (!iommu) +@@ -3674,7 +3675,7 @@ static struct irq_remap_table *alloc_irq + iommu_completion_wait(iommu); + + out_unlock: +- write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); ++ spin_unlock_irqrestore(&iommu_table_lock, flags); + + return table; + } diff --git a/debian/patches/features/all/rt/0005-ARM-configs-at91-use-new-TCB-timer-driver.patch b/debian/patches/features/all/rt/0005-ARM-configs-at91-use-new-TCB-timer-driver.patch new file mode 100644 index 000000000..d55057111 --- /dev/null +++ b/debian/patches/features/all/rt/0005-ARM-configs-at91-use-new-TCB-timer-driver.patch @@ -0,0 +1,35 @@ +From: Alexandre Belloni +Date: Wed, 18 Apr 2018 12:51:42 +0200 +Subject: [PATCH 5/6] ARM: configs: at91: use new TCB timer driver +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Unselecting ATMEL_TCLIB switches the TCB timer driver from tcb_clksrc to +timer-atmel-tcb. + +Signed-off-by: Alexandre Belloni +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/arm/configs/at91_dt_defconfig | 1 - + arch/arm/configs/sama5_defconfig | 1 - + 2 files changed, 2 deletions(-) + +--- a/arch/arm/configs/at91_dt_defconfig ++++ b/arch/arm/configs/at91_dt_defconfig +@@ -64,7 +64,6 @@ CONFIG_BLK_DEV_LOOP=y + CONFIG_BLK_DEV_RAM=y + CONFIG_BLK_DEV_RAM_COUNT=4 + CONFIG_BLK_DEV_RAM_SIZE=8192 +-CONFIG_ATMEL_TCLIB=y + CONFIG_ATMEL_SSC=y + CONFIG_SCSI=y + CONFIG_BLK_DEV_SD=y +--- a/arch/arm/configs/sama5_defconfig ++++ b/arch/arm/configs/sama5_defconfig +@@ -75,7 +75,6 @@ CONFIG_BLK_DEV_LOOP=y + CONFIG_BLK_DEV_RAM=y + CONFIG_BLK_DEV_RAM_COUNT=4 + CONFIG_BLK_DEV_RAM_SIZE=8192 +-CONFIG_ATMEL_TCLIB=y + CONFIG_ATMEL_SSC=y + CONFIG_EEPROM_AT24=y + CONFIG_SCSI=y diff --git a/debian/patches/features/all/rt/0005-handle-move-to-LRU-in-retain_dentry.patch b/debian/patches/features/all/rt/0005-handle-move-to-LRU-in-retain_dentry.patch new file mode 100644 index 000000000..0f983bbc8 --- /dev/null +++ b/debian/patches/features/all/rt/0005-handle-move-to-LRU-in-retain_dentry.patch @@ -0,0 +1,55 @@ +From: Al Viro +Date: Tue, 6 Mar 2018 21:37:31 -0500 +Subject: [PATCH 05/17] handle move to LRU in retain_dentry() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 62d9956cefe6ecc4b43a7fae37af78ba7adaceaa + +Signed-off-by: Al Viro +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/dcache.c | 19 ++++++------------- + 1 file changed, 6 insertions(+), 13 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -440,17 +440,6 @@ static void d_lru_shrink_move(struct lis + list_lru_isolate_move(lru, &dentry->d_lru, list); + } + +-/* +- * dentry_lru_(add|del)_list) must be called with d_lock held. +- */ +-static void dentry_lru_add(struct dentry *dentry) +-{ +- if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) +- d_lru_add(dentry); +- else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED))) +- dentry->d_flags |= DCACHE_REFERENCED; +-} +- + /** + * d_drop - drop a dentry + * @dentry: dentry to drop +@@ -641,6 +630,12 @@ static inline bool retain_dentry(struct + if (dentry->d_op->d_delete(dentry)) + return false; + } ++ /* retain; LRU fodder */ ++ dentry->d_lockref.count--; ++ if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) ++ d_lru_add(dentry); ++ else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED))) ++ dentry->d_flags |= DCACHE_REFERENCED; + return true; + } + +@@ -823,8 +818,6 @@ void dput(struct dentry *dentry) + rcu_read_unlock(); + + if (likely(retain_dentry(dentry))) { +- dentry_lru_add(dentry); +- dentry->d_lockref.count--; + spin_unlock(&dentry->d_lock); + return; + } diff --git a/debian/patches/features/all/rt/0005-iommu-amd-Remove-the-special-case-from-alloc_irq_tab.patch b/debian/patches/features/all/rt/0005-iommu-amd-Remove-the-special-case-from-alloc_irq_tab.patch new file mode 100644 index 000000000..f14aeb469 --- /dev/null +++ b/debian/patches/features/all/rt/0005-iommu-amd-Remove-the-special-case-from-alloc_irq_tab.patch @@ -0,0 +1,95 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 22 Mar 2018 16:22:37 +0100 +Subject: [PATCH 05/10] iommu/amd: Remove the special case from + alloc_irq_table() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit fde65dd3d3096e8f6ecc7bbe544eb91f4220772c + +alloc_irq_table() has a special ioapic argument. If set then it will +pre-allocate / reserve the first 32 indexes. The argument is only once +true and it would make alloc_irq_table() a little simpler if we would +extract the special bits to the caller. +The caller of irq_remapping_alloc() is holding irq_domain_mutex so the +initialization of iommu->irte_ops->set_allocated() should not race +against other user. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Joerg Roedel +--- + drivers/iommu/amd_iommu.c | 34 ++++++++++++++++++++-------------- + 1 file changed, 20 insertions(+), 14 deletions(-) + +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -3603,7 +3603,7 @@ static struct irq_remap_table *get_irq_t + return table; + } + +-static struct irq_remap_table *alloc_irq_table(u16 devid, bool ioapic) ++static struct irq_remap_table *alloc_irq_table(u16 devid) + { + struct irq_remap_table *table = NULL; + struct amd_iommu *iommu; +@@ -3637,10 +3637,6 @@ static struct irq_remap_table *alloc_irq + /* Initialize table spin-lock */ + raw_spin_lock_init(&table->lock); + +- if (ioapic) +- /* Keep the first 32 indexes free for IOAPIC interrupts */ +- table->min_index = 32; +- + table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_ATOMIC); + if (!table->table) { + kfree(table); +@@ -3655,12 +3651,6 @@ static struct irq_remap_table *alloc_irq + memset(table->table, 0, + (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2))); + +- if (ioapic) { +- int i; +- +- for (i = 0; i < 32; ++i) +- iommu->irte_ops->set_allocated(table, i); +- } + + irq_lookup_table[devid] = table; + set_dte_irq_entry(devid, table); +@@ -3690,7 +3680,7 @@ static int alloc_irq_index(u16 devid, in + if (!iommu) + return -ENODEV; + +- table = alloc_irq_table(devid, false); ++ table = alloc_irq_table(devid); + if (!table) + return -ENODEV; + +@@ -4116,10 +4106,26 @@ static int irq_remapping_alloc(struct ir + return ret; + + if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) { +- if (alloc_irq_table(devid, true)) ++ struct irq_remap_table *table; ++ struct amd_iommu *iommu; ++ ++ table = alloc_irq_table(devid); ++ if (table) { ++ if (!table->min_index) { ++ /* ++ * Keep the first 32 indexes free for IOAPIC ++ * interrupts. ++ */ ++ table->min_index = 32; ++ iommu = amd_iommu_rlookup_table[devid]; ++ for (i = 0; i < 32; ++i) ++ iommu->irte_ops->set_allocated(table, i); ++ } ++ WARN_ON(table->min_index != 32); + index = info->ioapic_pin; +- else ++ } else { + ret = -ENOMEM; ++ } + } else { + bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI); + diff --git a/debian/patches/features/all/rt/0006-ARM-configs-at91-unselect-PIT.patch b/debian/patches/features/all/rt/0006-ARM-configs-at91-unselect-PIT.patch new file mode 100644 index 000000000..4f2350554 --- /dev/null +++ b/debian/patches/features/all/rt/0006-ARM-configs-at91-unselect-PIT.patch @@ -0,0 +1,36 @@ +From: Alexandre Belloni +Date: Wed, 18 Apr 2018 12:51:43 +0200 +Subject: [PATCH 6/6] ARM: configs: at91: unselect PIT +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +The PIT is not required anymore to successfully boot and may actually harm +in case preempt-rt is used because the PIT interrupt is shared. +Disable it so the TCB clocksource is used. + +Signed-off-by: Alexandre Belloni +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/arm/configs/at91_dt_defconfig | 1 + + arch/arm/configs/sama5_defconfig | 1 + + 2 files changed, 2 insertions(+) + +--- a/arch/arm/configs/at91_dt_defconfig ++++ b/arch/arm/configs/at91_dt_defconfig +@@ -19,6 +19,7 @@ CONFIG_ARCH_MULTI_V5=y + CONFIG_ARCH_AT91=y + CONFIG_SOC_AT91RM9200=y + CONFIG_SOC_AT91SAM9=y ++# CONFIG_ATMEL_CLOCKSOURCE_PIT is not set + CONFIG_AEABI=y + CONFIG_UACCESS_WITH_MEMCPY=y + CONFIG_ZBOOT_ROM_TEXT=0x0 +--- a/arch/arm/configs/sama5_defconfig ++++ b/arch/arm/configs/sama5_defconfig +@@ -20,6 +20,7 @@ CONFIG_ARCH_AT91=y + CONFIG_SOC_SAMA5D2=y + CONFIG_SOC_SAMA5D3=y + CONFIG_SOC_SAMA5D4=y ++# CONFIG_ATMEL_CLOCKSOURCE_PIT is not set + CONFIG_AEABI=y + CONFIG_UACCESS_WITH_MEMCPY=y + CONFIG_ZBOOT_ROM_TEXT=0x0 diff --git a/debian/patches/features/all/rt/0006-get-rid-of-trylock-loop-around-dentry_kill.patch b/debian/patches/features/all/rt/0006-get-rid-of-trylock-loop-around-dentry_kill.patch new file mode 100644 index 000000000..645b9db7c --- /dev/null +++ b/debian/patches/features/all/rt/0006-get-rid-of-trylock-loop-around-dentry_kill.patch @@ -0,0 +1,73 @@ +From: Al Viro +Date: Fri, 23 Feb 2018 21:25:42 -0500 +Subject: [PATCH 06/17] get rid of trylock loop around dentry_kill() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit f657a666fd1b1b9fe59963943c74c245ae66f4cc + +In case when trylock in there fails, deal with it directly in +dentry_kill(). Note that in cases when we drop and retake +->d_lock, we need to recheck whether to retain the dentry. +Another thing is that dropping/retaking ->d_lock might have +ended up with negative dentry turning into positive; that, +of course, can happen only once... + +Signed-off-by: Al Viro +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/dcache.c | 34 +++++++++++++++++++++++++++------- + 1 file changed, 27 insertions(+), 7 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -651,23 +651,43 @@ static struct dentry *dentry_kill(struct + struct dentry *parent = NULL; + + if (inode && unlikely(!spin_trylock(&inode->i_lock))) +- goto failed; ++ goto slow_positive; + + if (!IS_ROOT(dentry)) { + parent = dentry->d_parent; + if (unlikely(!spin_trylock(&parent->d_lock))) { +- if (inode) +- spin_unlock(&inode->i_lock); +- goto failed; ++ parent = __lock_parent(dentry); ++ if (likely(inode || !dentry->d_inode)) ++ goto got_locks; ++ /* negative that became positive */ ++ if (parent) ++ spin_unlock(&parent->d_lock); ++ inode = dentry->d_inode; ++ goto slow_positive; + } + } +- + __dentry_kill(dentry); + return parent; + +-failed: ++slow_positive: ++ spin_unlock(&dentry->d_lock); ++ spin_lock(&inode->i_lock); ++ spin_lock(&dentry->d_lock); ++ parent = lock_parent(dentry); ++got_locks: ++ if (unlikely(dentry->d_lockref.count != 1)) { ++ dentry->d_lockref.count--; ++ } else if (likely(!retain_dentry(dentry))) { ++ __dentry_kill(dentry); ++ return parent; ++ } ++ /* we are keeping it, after all */ ++ if (inode) ++ spin_unlock(&inode->i_lock); ++ if (parent) ++ spin_unlock(&parent->d_lock); + spin_unlock(&dentry->d_lock); +- return dentry; /* try again with same dentry */ ++ return NULL; + } + + /* diff --git a/debian/patches/features/all/rt/0006-iommu-amd-Use-table-instead-irt-as-variable-name-in-.patch b/debian/patches/features/all/rt/0006-iommu-amd-Use-table-instead-irt-as-variable-name-in-.patch new file mode 100644 index 000000000..9df168488 --- /dev/null +++ b/debian/patches/features/all/rt/0006-iommu-amd-Use-table-instead-irt-as-variable-name-in-.patch @@ -0,0 +1,53 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 22 Mar 2018 16:22:38 +0100 +Subject: [PATCH 06/10] iommu/amd: Use `table' instead `irt' as variable name + in amd_iommu_update_ga() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 4fde541c9dc114c5b448ad34b0286fe8b7c550f1 + +The variable of type struct irq_remap_table is always named `table' +except in amd_iommu_update_ga() where it is called `irt'. Make it +consistent and name it also `table'. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Joerg Roedel +--- + drivers/iommu/amd_iommu.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -4391,7 +4391,7 @@ int amd_iommu_update_ga(int cpu, bool is + { + unsigned long flags; + struct amd_iommu *iommu; +- struct irq_remap_table *irt; ++ struct irq_remap_table *table; + struct amd_ir_data *ir_data = (struct amd_ir_data *)data; + int devid = ir_data->irq_2_irte.devid; + struct irte_ga *entry = (struct irte_ga *) ir_data->entry; +@@ -4405,11 +4405,11 @@ int amd_iommu_update_ga(int cpu, bool is + if (!iommu) + return -ENODEV; + +- irt = get_irq_table(devid); +- if (!irt) ++ table = get_irq_table(devid); ++ if (!table) + return -ENODEV; + +- raw_spin_lock_irqsave(&irt->lock, flags); ++ raw_spin_lock_irqsave(&table->lock, flags); + + if (ref->lo.fields_vapic.guest_mode) { + if (cpu >= 0) +@@ -4418,7 +4418,7 @@ int amd_iommu_update_ga(int cpu, bool is + barrier(); + } + +- raw_spin_unlock_irqrestore(&irt->lock, flags); ++ raw_spin_unlock_irqrestore(&table->lock, flags); + + iommu_flush_irt(iommu, devid); + iommu_completion_wait(iommu); diff --git a/debian/patches/features/all/rt/0007-fs-dcache-Avoid-a-try_lock-loop-in-shrink_dentry_lis.patch b/debian/patches/features/all/rt/0007-fs-dcache-Avoid-a-try_lock-loop-in-shrink_dentry_lis.patch new file mode 100644 index 000000000..55af182c4 --- /dev/null +++ b/debian/patches/features/all/rt/0007-fs-dcache-Avoid-a-try_lock-loop-in-shrink_dentry_lis.patch @@ -0,0 +1,150 @@ +From: John Ogness +Date: Fri, 23 Feb 2018 00:50:24 +0100 +Subject: [PATCH 07/17] fs/dcache: Avoid a try_lock loop in + shrink_dentry_list() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 8f04da2adbdffed8dc4b2feb00ec3b3d84683885 + +shrink_dentry_list() holds dentry->d_lock and needs to acquire +dentry->d_inode->i_lock. This cannot be done with a spin_lock() +operation because it's the reverse of the regular lock order. +To avoid ABBA deadlocks it is done with a trylock loop. + +Trylock loops are problematic in two scenarios: + + 1) PREEMPT_RT converts spinlocks to 'sleeping' spinlocks, which are + preemptible. As a consequence the i_lock holder can be preempted + by a higher priority task. If that task executes the trylock loop + it will do so forever and live lock. + + 2) In virtual machines trylock loops are problematic as well. The + VCPU on which the i_lock holder runs can be scheduled out and a + task on a different VCPU can loop for a whole time slice. In the + worst case this can lead to starvation. Commits 47be61845c77 + ("fs/dcache.c: avoid soft-lockup in dput()") and 046b961b45f9 + ("shrink_dentry_list(): take parent's d_lock earlier") are + addressing exactly those symptoms. + +Avoid the trylock loop by using dentry_kill(). When pruning ancestors, +the same code applies that is used to kill a dentry in dput(). This +also has the benefit that the locking order is now the same. First +the inode is locked, then the parent. + +Signed-off-by: John Ogness +Signed-off-by: Al Viro +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/dcache.c | 41 ++++++++++------------------------------- + 1 file changed, 10 insertions(+), 31 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -992,9 +992,11 @@ EXPORT_SYMBOL(d_prune_aliases); + + /* + * Lock a dentry from shrink list. ++ * Called under rcu_read_lock() and dentry->d_lock; the former ++ * guarantees that nothing we access will be freed under us. + * Note that dentry is *not* protected from concurrent dentry_kill(), +- * d_delete(), etc. It is protected from freeing (by the fact of +- * being on a shrink list), but everything else is fair game. ++ * d_delete(), etc. ++ * + * Return false if dentry has been disrupted or grabbed, leaving + * the caller to kick it off-list. Otherwise, return true and have + * that dentry's inode and parent both locked. +@@ -1009,7 +1011,6 @@ static bool shrink_lock_dentry(struct de + + inode = dentry->d_inode; + if (inode && unlikely(!spin_trylock(&inode->i_lock))) { +- rcu_read_lock(); /* to protect inode */ + spin_unlock(&dentry->d_lock); + spin_lock(&inode->i_lock); + spin_lock(&dentry->d_lock); +@@ -1018,16 +1019,13 @@ static bool shrink_lock_dentry(struct de + /* changed inode means that somebody had grabbed it */ + if (unlikely(inode != dentry->d_inode)) + goto out; +- rcu_read_unlock(); + } + + parent = dentry->d_parent; + if (IS_ROOT(dentry) || likely(spin_trylock(&parent->d_lock))) + return true; + +- rcu_read_lock(); /* to protect parent */ + spin_unlock(&dentry->d_lock); +- parent = READ_ONCE(dentry->d_parent); + spin_lock(&parent->d_lock); + if (unlikely(parent != dentry->d_parent)) { + spin_unlock(&parent->d_lock); +@@ -1035,15 +1033,12 @@ static bool shrink_lock_dentry(struct de + goto out; + } + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); +- if (likely(!dentry->d_lockref.count)) { +- rcu_read_unlock(); ++ if (likely(!dentry->d_lockref.count)) + return true; +- } + spin_unlock(&parent->d_lock); + out: + if (inode) + spin_unlock(&inode->i_lock); +- rcu_read_unlock(); + return false; + } + +@@ -1051,12 +1046,13 @@ static void shrink_dentry_list(struct li + { + while (!list_empty(list)) { + struct dentry *dentry, *parent; +- struct inode *inode; + + dentry = list_entry(list->prev, struct dentry, d_lru); + spin_lock(&dentry->d_lock); ++ rcu_read_lock(); + if (!shrink_lock_dentry(dentry)) { + bool can_free = false; ++ rcu_read_unlock(); + d_shrink_del(dentry); + if (dentry->d_lockref.count < 0) + can_free = dentry->d_flags & DCACHE_MAY_FREE; +@@ -1065,6 +1061,7 @@ static void shrink_dentry_list(struct li + dentry_free(dentry); + continue; + } ++ rcu_read_unlock(); + d_shrink_del(dentry); + parent = dentry->d_parent; + __dentry_kill(dentry); +@@ -1077,26 +1074,8 @@ static void shrink_dentry_list(struct li + * fragmentation. + */ + dentry = parent; +- while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) { +- parent = lock_parent(dentry); +- if (dentry->d_lockref.count != 1) { +- dentry->d_lockref.count--; +- spin_unlock(&dentry->d_lock); +- if (parent) +- spin_unlock(&parent->d_lock); +- break; +- } +- inode = dentry->d_inode; /* can't be NULL */ +- if (unlikely(!spin_trylock(&inode->i_lock))) { +- spin_unlock(&dentry->d_lock); +- if (parent) +- spin_unlock(&parent->d_lock); +- cpu_relax(); +- continue; +- } +- __dentry_kill(dentry); +- dentry = parent; +- } ++ while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) ++ dentry = dentry_kill(dentry); + } + } + diff --git a/debian/patches/features/all/rt/0007-iommu-amd-Factor-out-setting-the-remap-table-for-a-d.patch b/debian/patches/features/all/rt/0007-iommu-amd-Factor-out-setting-the-remap-table-for-a-d.patch new file mode 100644 index 000000000..f4ee4f8db --- /dev/null +++ b/debian/patches/features/all/rt/0007-iommu-amd-Factor-out-setting-the-remap-table-for-a-d.patch @@ -0,0 +1,67 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 22 Mar 2018 16:22:39 +0100 +Subject: [PATCH 07/10] iommu/amd: Factor out setting the remap table for a + devid +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 2fcc1e8ac4a8514c64f946178fc36c2e30e56a41 + +Setting the IRQ remap table for a specific devid (or its alias devid) +includes three steps. Those three steps are always repeated each time +this is done. +Introduce a new helper function, move those steps there and use that +function instead. The compiler can still decide if it is worth to +inline. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Joerg Roedel +--- + drivers/iommu/amd_iommu.c | 23 ++++++++++++----------- + 1 file changed, 12 insertions(+), 11 deletions(-) + +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -3603,6 +3603,14 @@ static struct irq_remap_table *get_irq_t + return table; + } + ++static void set_remap_table_entry(struct amd_iommu *iommu, u16 devid, ++ struct irq_remap_table *table) ++{ ++ irq_lookup_table[devid] = table; ++ set_dte_irq_entry(devid, table); ++ iommu_flush_dte(iommu, devid); ++} ++ + static struct irq_remap_table *alloc_irq_table(u16 devid) + { + struct irq_remap_table *table = NULL; +@@ -3623,9 +3631,7 @@ static struct irq_remap_table *alloc_irq + alias = amd_iommu_alias_table[devid]; + table = irq_lookup_table[alias]; + if (table) { +- irq_lookup_table[devid] = table; +- set_dte_irq_entry(devid, table); +- iommu_flush_dte(iommu, devid); ++ set_remap_table_entry(iommu, devid, table); + goto out; + } + +@@ -3652,14 +3658,9 @@ static struct irq_remap_table *alloc_irq + (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2))); + + +- irq_lookup_table[devid] = table; +- set_dte_irq_entry(devid, table); +- iommu_flush_dte(iommu, devid); +- if (devid != alias) { +- irq_lookup_table[alias] = table; +- set_dte_irq_entry(alias, table); +- iommu_flush_dte(iommu, alias); +- } ++ set_remap_table_entry(iommu, devid, table); ++ if (devid != alias) ++ set_remap_table_entry(iommu, alias, table); + + out: + iommu_completion_wait(iommu); diff --git a/debian/patches/features/all/rt/0008-dcache.c-trim-includes.patch b/debian/patches/features/all/rt/0008-dcache.c-trim-includes.patch new file mode 100644 index 000000000..7c99f78c9 --- /dev/null +++ b/debian/patches/features/all/rt/0008-dcache.c-trim-includes.patch @@ -0,0 +1,33 @@ +From: Al Viro +Date: Sun, 25 Feb 2018 02:47:29 -0500 +Subject: [PATCH 08/17] dcache.c: trim includes +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 43986d63b60fd0152d9038ee3f0f9294efa8c983 + +Signed-off-by: Al Viro +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/dcache.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -25,17 +25,14 @@ + #include + #include + #include +-#include + #include + #include + #include +-#include + #include + #include + #include + #include + #include +-#include + #include + #include "internal.h" + #include "mount.h" diff --git a/debian/patches/features/all/rt/0008-iommu-amd-Drop-the-lock-while-allocating-new-irq-rem.patch b/debian/patches/features/all/rt/0008-iommu-amd-Drop-the-lock-while-allocating-new-irq-rem.patch new file mode 100644 index 000000000..ec638a874 --- /dev/null +++ b/debian/patches/features/all/rt/0008-iommu-amd-Drop-the-lock-while-allocating-new-irq-rem.patch @@ -0,0 +1,132 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 22 Mar 2018 16:22:40 +0100 +Subject: [PATCH 08/10] iommu/amd: Drop the lock while allocating new irq remap + table +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 993ca6e063a69a0c65ca42ed449b6bc1b3844151 + +The irq_remap_table is allocated while the iommu_table_lock is held with +interrupts disabled. +>From looking at the call sites, all callers are in the early device +initialisation (apic_bsp_setup(), pci_enable_device(), +pci_enable_msi()) so make sense to drop the lock which also enables +interrupts and try to allocate that memory with GFP_KERNEL instead +GFP_ATOMIC. + +Since during the allocation the iommu_table_lock is dropped, we need to +recheck if table exists after the lock has been reacquired. I *think* +that it is impossible that the "devid" entry appears in irq_lookup_table +while the lock is dropped since the same device can only be probed once. +However I check for both cases, just to be sure. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Joerg Roedel +--- + drivers/iommu/amd_iommu.c | 63 ++++++++++++++++++++++++++++++++-------------- + 1 file changed, 45 insertions(+), 18 deletions(-) + +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -3603,6 +3603,30 @@ static struct irq_remap_table *get_irq_t + return table; + } + ++static struct irq_remap_table *__alloc_irq_table(void) ++{ ++ struct irq_remap_table *table; ++ ++ table = kzalloc(sizeof(*table), GFP_KERNEL); ++ if (!table) ++ return NULL; ++ ++ table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_KERNEL); ++ if (!table->table) { ++ kfree(table); ++ return NULL; ++ } ++ raw_spin_lock_init(&table->lock); ++ ++ if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) ++ memset(table->table, 0, ++ MAX_IRQS_PER_TABLE * sizeof(u32)); ++ else ++ memset(table->table, 0, ++ (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2))); ++ return table; ++} ++ + static void set_remap_table_entry(struct amd_iommu *iommu, u16 devid, + struct irq_remap_table *table) + { +@@ -3614,6 +3638,7 @@ static void set_remap_table_entry(struct + static struct irq_remap_table *alloc_irq_table(u16 devid) + { + struct irq_remap_table *table = NULL; ++ struct irq_remap_table *new_table = NULL; + struct amd_iommu *iommu; + unsigned long flags; + u16 alias; +@@ -3632,42 +3657,44 @@ static struct irq_remap_table *alloc_irq + table = irq_lookup_table[alias]; + if (table) { + set_remap_table_entry(iommu, devid, table); +- goto out; ++ goto out_wait; + } ++ spin_unlock_irqrestore(&iommu_table_lock, flags); + + /* Nothing there yet, allocate new irq remapping table */ +- table = kzalloc(sizeof(*table), GFP_ATOMIC); +- if (!table) +- goto out_unlock; ++ new_table = __alloc_irq_table(); ++ if (!new_table) ++ return NULL; + +- /* Initialize table spin-lock */ +- raw_spin_lock_init(&table->lock); ++ spin_lock_irqsave(&iommu_table_lock, flags); + +- table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_ATOMIC); +- if (!table->table) { +- kfree(table); +- table = NULL; ++ table = irq_lookup_table[devid]; ++ if (table) + goto out_unlock; +- } + +- if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) +- memset(table->table, 0, +- MAX_IRQS_PER_TABLE * sizeof(u32)); +- else +- memset(table->table, 0, +- (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2))); ++ table = irq_lookup_table[alias]; ++ if (table) { ++ set_remap_table_entry(iommu, devid, table); ++ goto out_wait; ++ } + ++ table = new_table; ++ new_table = NULL; + + set_remap_table_entry(iommu, devid, table); + if (devid != alias) + set_remap_table_entry(iommu, alias, table); + +-out: ++out_wait: + iommu_completion_wait(iommu); + + out_unlock: + spin_unlock_irqrestore(&iommu_table_lock, flags); + ++ if (new_table) { ++ kmem_cache_free(amd_iommu_irq_cache, new_table->table); ++ kfree(new_table); ++ } + return table; + } + diff --git a/debian/patches/features/all/rt/0009-iommu-amd-Make-amd_iommu_devtable_lock-a-spin_lock.patch b/debian/patches/features/all/rt/0009-iommu-amd-Make-amd_iommu_devtable_lock-a-spin_lock.patch new file mode 100644 index 000000000..57fb1bb16 --- /dev/null +++ b/debian/patches/features/all/rt/0009-iommu-amd-Make-amd_iommu_devtable_lock-a-spin_lock.patch @@ -0,0 +1,74 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 22 Mar 2018 16:22:41 +0100 +Subject: [PATCH 09/10] iommu/amd: Make amd_iommu_devtable_lock a spin_lock +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 2cd1083d79a0a8c223af430ca97884c28a1e2fc0 + +Before commit 0bb6e243d7fb ("iommu/amd: Support IOMMU_DOMAIN_DMA type +allocation") amd_iommu_devtable_lock had a read_lock() user but now +there are none. In fact, after the mentioned commit we had only +write_lock() user of the lock. Since there is no reason to keep it as +writer lock, change its type to a spin_lock. +I *think* that we might even be able to remove the lock because all its +current user seem to have their own protection. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Joerg Roedel +--- + drivers/iommu/amd_iommu.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -80,7 +80,7 @@ + */ + #define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) + +-static DEFINE_RWLOCK(amd_iommu_devtable_lock); ++static DEFINE_SPINLOCK(amd_iommu_devtable_lock); + static DEFINE_SPINLOCK(pd_bitmap_lock); + static DEFINE_SPINLOCK(iommu_table_lock); + +@@ -2096,9 +2096,9 @@ static int attach_device(struct device * + } + + skip_ats_check: +- write_lock_irqsave(&amd_iommu_devtable_lock, flags); ++ spin_lock_irqsave(&amd_iommu_devtable_lock, flags); + ret = __attach_device(dev_data, domain); +- write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); ++ spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + /* + * We might boot into a crash-kernel here. The crashed kernel +@@ -2148,9 +2148,9 @@ static void detach_device(struct device + domain = dev_data->domain; + + /* lock device table */ +- write_lock_irqsave(&amd_iommu_devtable_lock, flags); ++ spin_lock_irqsave(&amd_iommu_devtable_lock, flags); + __detach_device(dev_data); +- write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); ++ spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + if (!dev_is_pci(dev)) + return; +@@ -2813,7 +2813,7 @@ static void cleanup_domain(struct protec + struct iommu_dev_data *entry; + unsigned long flags; + +- write_lock_irqsave(&amd_iommu_devtable_lock, flags); ++ spin_lock_irqsave(&amd_iommu_devtable_lock, flags); + + while (!list_empty(&domain->dev_list)) { + entry = list_first_entry(&domain->dev_list, +@@ -2821,7 +2821,7 @@ static void cleanup_domain(struct protec + __detach_device(entry); + } + +- write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); ++ spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + } + + static void protection_domain_free(struct protection_domain *domain) diff --git a/debian/patches/features/all/rt/0009-split-d_path-and-friends-into-a-separate-file.patch b/debian/patches/features/all/rt/0009-split-d_path-and-friends-into-a-separate-file.patch new file mode 100644 index 000000000..926e59c22 --- /dev/null +++ b/debian/patches/features/all/rt/0009-split-d_path-and-friends-into-a-separate-file.patch @@ -0,0 +1,997 @@ +From: Al Viro +Date: Mon, 5 Mar 2018 19:15:50 -0500 +Subject: [PATCH 09/17] split d_path() and friends into a separate file +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 7a5cf791a747640adb2a1b5e3838321b26953a23 + +Those parts of fs/dcache.c are pretty much self-contained. + +Signed-off-by: Al Viro +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/Makefile | 2 + fs/d_path.c | 470 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + fs/dcache.c | 467 ----------------------------------------------------------- + 3 files changed, 472 insertions(+), 467 deletions(-) + create mode 100644 fs/d_path.c + +--- a/fs/Makefile ++++ b/fs/Makefile +@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table. + ioctl.o readdir.o select.o dcache.o inode.o \ + attr.o bad_inode.o file.o filesystems.o namespace.o \ + seq_file.o xattr.o libfs.o fs-writeback.o \ +- pnode.o splice.o sync.o utimes.o \ ++ pnode.o splice.o sync.o utimes.o d_path.o \ + stack.o fs_struct.o statfs.o fs_pin.o nsfs.o + + ifeq ($(CONFIG_BLOCK),y) +--- /dev/null ++++ b/fs/d_path.c +@@ -0,0 +1,470 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "mount.h" ++ ++static int prepend(char **buffer, int *buflen, const char *str, int namelen) ++{ ++ *buflen -= namelen; ++ if (*buflen < 0) ++ return -ENAMETOOLONG; ++ *buffer -= namelen; ++ memcpy(*buffer, str, namelen); ++ return 0; ++} ++ ++/** ++ * prepend_name - prepend a pathname in front of current buffer pointer ++ * @buffer: buffer pointer ++ * @buflen: allocated length of the buffer ++ * @name: name string and length qstr structure ++ * ++ * With RCU path tracing, it may race with d_move(). Use READ_ONCE() to ++ * make sure that either the old or the new name pointer and length are ++ * fetched. However, there may be mismatch between length and pointer. ++ * The length cannot be trusted, we need to copy it byte-by-byte until ++ * the length is reached or a null byte is found. It also prepends "/" at ++ * the beginning of the name. The sequence number check at the caller will ++ * retry it again when a d_move() does happen. So any garbage in the buffer ++ * due to mismatched pointer and length will be discarded. ++ * ++ * Load acquire is needed to make sure that we see that terminating NUL. ++ */ ++static int prepend_name(char **buffer, int *buflen, const struct qstr *name) ++{ ++ const char *dname = smp_load_acquire(&name->name); /* ^^^ */ ++ u32 dlen = READ_ONCE(name->len); ++ char *p; ++ ++ *buflen -= dlen + 1; ++ if (*buflen < 0) ++ return -ENAMETOOLONG; ++ p = *buffer -= dlen + 1; ++ *p++ = '/'; ++ while (dlen--) { ++ char c = *dname++; ++ if (!c) ++ break; ++ *p++ = c; ++ } ++ return 0; ++} ++ ++/** ++ * prepend_path - Prepend path string to a buffer ++ * @path: the dentry/vfsmount to report ++ * @root: root vfsmnt/dentry ++ * @buffer: pointer to the end of the buffer ++ * @buflen: pointer to buffer length ++ * ++ * The function will first try to write out the pathname without taking any ++ * lock other than the RCU read lock to make sure that dentries won't go away. ++ * It only checks the sequence number of the global rename_lock as any change ++ * in the dentry's d_seq will be preceded by changes in the rename_lock ++ * sequence number. If the sequence number had been changed, it will restart ++ * the whole pathname back-tracing sequence again by taking the rename_lock. ++ * In this case, there is no need to take the RCU read lock as the recursive ++ * parent pointer references will keep the dentry chain alive as long as no ++ * rename operation is performed. ++ */ ++static int prepend_path(const struct path *path, ++ const struct path *root, ++ char **buffer, int *buflen) ++{ ++ struct dentry *dentry; ++ struct vfsmount *vfsmnt; ++ struct mount *mnt; ++ int error = 0; ++ unsigned seq, m_seq = 0; ++ char *bptr; ++ int blen; ++ ++ rcu_read_lock(); ++restart_mnt: ++ read_seqbegin_or_lock(&mount_lock, &m_seq); ++ seq = 0; ++ rcu_read_lock(); ++restart: ++ bptr = *buffer; ++ blen = *buflen; ++ error = 0; ++ dentry = path->dentry; ++ vfsmnt = path->mnt; ++ mnt = real_mount(vfsmnt); ++ read_seqbegin_or_lock(&rename_lock, &seq); ++ while (dentry != root->dentry || vfsmnt != root->mnt) { ++ struct dentry * parent; ++ ++ if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { ++ struct mount *parent = READ_ONCE(mnt->mnt_parent); ++ /* Escaped? */ ++ if (dentry != vfsmnt->mnt_root) { ++ bptr = *buffer; ++ blen = *buflen; ++ error = 3; ++ break; ++ } ++ /* Global root? */ ++ if (mnt != parent) { ++ dentry = READ_ONCE(mnt->mnt_mountpoint); ++ mnt = parent; ++ vfsmnt = &mnt->mnt; ++ continue; ++ } ++ if (!error) ++ error = is_mounted(vfsmnt) ? 1 : 2; ++ break; ++ } ++ parent = dentry->d_parent; ++ prefetch(parent); ++ error = prepend_name(&bptr, &blen, &dentry->d_name); ++ if (error) ++ break; ++ ++ dentry = parent; ++ } ++ if (!(seq & 1)) ++ rcu_read_unlock(); ++ if (need_seqretry(&rename_lock, seq)) { ++ seq = 1; ++ goto restart; ++ } ++ done_seqretry(&rename_lock, seq); ++ ++ if (!(m_seq & 1)) ++ rcu_read_unlock(); ++ if (need_seqretry(&mount_lock, m_seq)) { ++ m_seq = 1; ++ goto restart_mnt; ++ } ++ done_seqretry(&mount_lock, m_seq); ++ ++ if (error >= 0 && bptr == *buffer) { ++ if (--blen < 0) ++ error = -ENAMETOOLONG; ++ else ++ *--bptr = '/'; ++ } ++ *buffer = bptr; ++ *buflen = blen; ++ return error; ++} ++ ++/** ++ * __d_path - return the path of a dentry ++ * @path: the dentry/vfsmount to report ++ * @root: root vfsmnt/dentry ++ * @buf: buffer to return value in ++ * @buflen: buffer length ++ * ++ * Convert a dentry into an ASCII path name. ++ * ++ * Returns a pointer into the buffer or an error code if the ++ * path was too long. ++ * ++ * "buflen" should be positive. ++ * ++ * If the path is not reachable from the supplied root, return %NULL. ++ */ ++char *__d_path(const struct path *path, ++ const struct path *root, ++ char *buf, int buflen) ++{ ++ char *res = buf + buflen; ++ int error; ++ ++ prepend(&res, &buflen, "\0", 1); ++ error = prepend_path(path, root, &res, &buflen); ++ ++ if (error < 0) ++ return ERR_PTR(error); ++ if (error > 0) ++ return NULL; ++ return res; ++} ++ ++char *d_absolute_path(const struct path *path, ++ char *buf, int buflen) ++{ ++ struct path root = {}; ++ char *res = buf + buflen; ++ int error; ++ ++ prepend(&res, &buflen, "\0", 1); ++ error = prepend_path(path, &root, &res, &buflen); ++ ++ if (error > 1) ++ error = -EINVAL; ++ if (error < 0) ++ return ERR_PTR(error); ++ return res; ++} ++ ++/* ++ * same as __d_path but appends "(deleted)" for unlinked files. ++ */ ++static int path_with_deleted(const struct path *path, ++ const struct path *root, ++ char **buf, int *buflen) ++{ ++ prepend(buf, buflen, "\0", 1); ++ if (d_unlinked(path->dentry)) { ++ int error = prepend(buf, buflen, " (deleted)", 10); ++ if (error) ++ return error; ++ } ++ ++ return prepend_path(path, root, buf, buflen); ++} ++ ++static int prepend_unreachable(char **buffer, int *buflen) ++{ ++ return prepend(buffer, buflen, "(unreachable)", 13); ++} ++ ++static void get_fs_root_rcu(struct fs_struct *fs, struct path *root) ++{ ++ unsigned seq; ++ ++ do { ++ seq = read_seqcount_begin(&fs->seq); ++ *root = fs->root; ++ } while (read_seqcount_retry(&fs->seq, seq)); ++} ++ ++/** ++ * d_path - return the path of a dentry ++ * @path: path to report ++ * @buf: buffer to return value in ++ * @buflen: buffer length ++ * ++ * Convert a dentry into an ASCII path name. If the entry has been deleted ++ * the string " (deleted)" is appended. Note that this is ambiguous. ++ * ++ * Returns a pointer into the buffer or an error code if the path was ++ * too long. Note: Callers should use the returned pointer, not the passed ++ * in buffer, to use the name! The implementation often starts at an offset ++ * into the buffer, and may leave 0 bytes at the start. ++ * ++ * "buflen" should be positive. ++ */ ++char *d_path(const struct path *path, char *buf, int buflen) ++{ ++ char *res = buf + buflen; ++ struct path root; ++ int error; ++ ++ /* ++ * We have various synthetic filesystems that never get mounted. On ++ * these filesystems dentries are never used for lookup purposes, and ++ * thus don't need to be hashed. They also don't need a name until a ++ * user wants to identify the object in /proc/pid/fd/. The little hack ++ * below allows us to generate a name for these objects on demand: ++ * ++ * Some pseudo inodes are mountable. When they are mounted ++ * path->dentry == path->mnt->mnt_root. In that case don't call d_dname ++ * and instead have d_path return the mounted path. ++ */ ++ if (path->dentry->d_op && path->dentry->d_op->d_dname && ++ (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root)) ++ return path->dentry->d_op->d_dname(path->dentry, buf, buflen); ++ ++ rcu_read_lock(); ++ get_fs_root_rcu(current->fs, &root); ++ error = path_with_deleted(path, &root, &res, &buflen); ++ rcu_read_unlock(); ++ ++ if (error < 0) ++ res = ERR_PTR(error); ++ return res; ++} ++EXPORT_SYMBOL(d_path); ++ ++/* ++ * Helper function for dentry_operations.d_dname() members ++ */ ++char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, ++ const char *fmt, ...) ++{ ++ va_list args; ++ char temp[64]; ++ int sz; ++ ++ va_start(args, fmt); ++ sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1; ++ va_end(args); ++ ++ if (sz > sizeof(temp) || sz > buflen) ++ return ERR_PTR(-ENAMETOOLONG); ++ ++ buffer += buflen - sz; ++ return memcpy(buffer, temp, sz); ++} ++ ++char *simple_dname(struct dentry *dentry, char *buffer, int buflen) ++{ ++ char *end = buffer + buflen; ++ /* these dentries are never renamed, so d_lock is not needed */ ++ if (prepend(&end, &buflen, " (deleted)", 11) || ++ prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) || ++ prepend(&end, &buflen, "/", 1)) ++ end = ERR_PTR(-ENAMETOOLONG); ++ return end; ++} ++EXPORT_SYMBOL(simple_dname); ++ ++/* ++ * Write full pathname from the root of the filesystem into the buffer. ++ */ ++static char *__dentry_path(struct dentry *d, char *buf, int buflen) ++{ ++ struct dentry *dentry; ++ char *end, *retval; ++ int len, seq = 0; ++ int error = 0; ++ ++ if (buflen < 2) ++ goto Elong; ++ ++ rcu_read_lock(); ++restart: ++ dentry = d; ++ end = buf + buflen; ++ len = buflen; ++ prepend(&end, &len, "\0", 1); ++ /* Get '/' right */ ++ retval = end-1; ++ *retval = '/'; ++ read_seqbegin_or_lock(&rename_lock, &seq); ++ while (!IS_ROOT(dentry)) { ++ struct dentry *parent = dentry->d_parent; ++ ++ prefetch(parent); ++ error = prepend_name(&end, &len, &dentry->d_name); ++ if (error) ++ break; ++ ++ retval = end; ++ dentry = parent; ++ } ++ if (!(seq & 1)) ++ rcu_read_unlock(); ++ if (need_seqretry(&rename_lock, seq)) { ++ seq = 1; ++ goto restart; ++ } ++ done_seqretry(&rename_lock, seq); ++ if (error) ++ goto Elong; ++ return retval; ++Elong: ++ return ERR_PTR(-ENAMETOOLONG); ++} ++ ++char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) ++{ ++ return __dentry_path(dentry, buf, buflen); ++} ++EXPORT_SYMBOL(dentry_path_raw); ++ ++char *dentry_path(struct dentry *dentry, char *buf, int buflen) ++{ ++ char *p = NULL; ++ char *retval; ++ ++ if (d_unlinked(dentry)) { ++ p = buf + buflen; ++ if (prepend(&p, &buflen, "//deleted", 10) != 0) ++ goto Elong; ++ buflen++; ++ } ++ retval = __dentry_path(dentry, buf, buflen); ++ if (!IS_ERR(retval) && p) ++ *p = '/'; /* restore '/' overriden with '\0' */ ++ return retval; ++Elong: ++ return ERR_PTR(-ENAMETOOLONG); ++} ++ ++static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root, ++ struct path *pwd) ++{ ++ unsigned seq; ++ ++ do { ++ seq = read_seqcount_begin(&fs->seq); ++ *root = fs->root; ++ *pwd = fs->pwd; ++ } while (read_seqcount_retry(&fs->seq, seq)); ++} ++ ++/* ++ * NOTE! The user-level library version returns a ++ * character pointer. The kernel system call just ++ * returns the length of the buffer filled (which ++ * includes the ending '\0' character), or a negative ++ * error value. So libc would do something like ++ * ++ * char *getcwd(char * buf, size_t size) ++ * { ++ * int retval; ++ * ++ * retval = sys_getcwd(buf, size); ++ * if (retval >= 0) ++ * return buf; ++ * errno = -retval; ++ * return NULL; ++ * } ++ */ ++SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) ++{ ++ int error; ++ struct path pwd, root; ++ char *page = __getname(); ++ ++ if (!page) ++ return -ENOMEM; ++ ++ rcu_read_lock(); ++ get_fs_root_and_pwd_rcu(current->fs, &root, &pwd); ++ ++ error = -ENOENT; ++ if (!d_unlinked(pwd.dentry)) { ++ unsigned long len; ++ char *cwd = page + PATH_MAX; ++ int buflen = PATH_MAX; ++ ++ prepend(&cwd, &buflen, "\0", 1); ++ error = prepend_path(&pwd, &root, &cwd, &buflen); ++ rcu_read_unlock(); ++ ++ if (error < 0) ++ goto out; ++ ++ /* Unreachable from current root */ ++ if (error > 0) { ++ error = prepend_unreachable(&cwd, &buflen); ++ if (error) ++ goto out; ++ } ++ ++ error = -ERANGE; ++ len = PATH_MAX + page - cwd; ++ if (len <= size) { ++ error = len; ++ if (copy_to_user(buf, cwd, len)) ++ error = -EFAULT; ++ } ++ } else { ++ rcu_read_unlock(); ++ } ++ ++out: ++ __putname(page); ++ return error; ++} +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -14,7 +14,7 @@ + * the dcache entry is deleted or garbage collected. + */ + +-#include ++#include + #include + #include + #include +@@ -24,15 +24,11 @@ + #include + #include + #include +-#include +-#include + #include + #include + #include +-#include + #include + #include +-#include + #include + #include "internal.h" + #include "mount.h" +@@ -3072,467 +3068,6 @@ struct dentry *d_splice_alias(struct ino + } + EXPORT_SYMBOL(d_splice_alias); + +-static int prepend(char **buffer, int *buflen, const char *str, int namelen) +-{ +- *buflen -= namelen; +- if (*buflen < 0) +- return -ENAMETOOLONG; +- *buffer -= namelen; +- memcpy(*buffer, str, namelen); +- return 0; +-} +- +-/** +- * prepend_name - prepend a pathname in front of current buffer pointer +- * @buffer: buffer pointer +- * @buflen: allocated length of the buffer +- * @name: name string and length qstr structure +- * +- * With RCU path tracing, it may race with d_move(). Use READ_ONCE() to +- * make sure that either the old or the new name pointer and length are +- * fetched. However, there may be mismatch between length and pointer. +- * The length cannot be trusted, we need to copy it byte-by-byte until +- * the length is reached or a null byte is found. It also prepends "/" at +- * the beginning of the name. The sequence number check at the caller will +- * retry it again when a d_move() does happen. So any garbage in the buffer +- * due to mismatched pointer and length will be discarded. +- * +- * Load acquire is needed to make sure that we see that terminating NUL. +- */ +-static int prepend_name(char **buffer, int *buflen, const struct qstr *name) +-{ +- const char *dname = smp_load_acquire(&name->name); /* ^^^ */ +- u32 dlen = READ_ONCE(name->len); +- char *p; +- +- *buflen -= dlen + 1; +- if (*buflen < 0) +- return -ENAMETOOLONG; +- p = *buffer -= dlen + 1; +- *p++ = '/'; +- while (dlen--) { +- char c = *dname++; +- if (!c) +- break; +- *p++ = c; +- } +- return 0; +-} +- +-/** +- * prepend_path - Prepend path string to a buffer +- * @path: the dentry/vfsmount to report +- * @root: root vfsmnt/dentry +- * @buffer: pointer to the end of the buffer +- * @buflen: pointer to buffer length +- * +- * The function will first try to write out the pathname without taking any +- * lock other than the RCU read lock to make sure that dentries won't go away. +- * It only checks the sequence number of the global rename_lock as any change +- * in the dentry's d_seq will be preceded by changes in the rename_lock +- * sequence number. If the sequence number had been changed, it will restart +- * the whole pathname back-tracing sequence again by taking the rename_lock. +- * In this case, there is no need to take the RCU read lock as the recursive +- * parent pointer references will keep the dentry chain alive as long as no +- * rename operation is performed. +- */ +-static int prepend_path(const struct path *path, +- const struct path *root, +- char **buffer, int *buflen) +-{ +- struct dentry *dentry; +- struct vfsmount *vfsmnt; +- struct mount *mnt; +- int error = 0; +- unsigned seq, m_seq = 0; +- char *bptr; +- int blen; +- +- rcu_read_lock(); +-restart_mnt: +- read_seqbegin_or_lock(&mount_lock, &m_seq); +- seq = 0; +- rcu_read_lock(); +-restart: +- bptr = *buffer; +- blen = *buflen; +- error = 0; +- dentry = path->dentry; +- vfsmnt = path->mnt; +- mnt = real_mount(vfsmnt); +- read_seqbegin_or_lock(&rename_lock, &seq); +- while (dentry != root->dentry || vfsmnt != root->mnt) { +- struct dentry * parent; +- +- if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { +- struct mount *parent = READ_ONCE(mnt->mnt_parent); +- /* Escaped? */ +- if (dentry != vfsmnt->mnt_root) { +- bptr = *buffer; +- blen = *buflen; +- error = 3; +- break; +- } +- /* Global root? */ +- if (mnt != parent) { +- dentry = READ_ONCE(mnt->mnt_mountpoint); +- mnt = parent; +- vfsmnt = &mnt->mnt; +- continue; +- } +- if (!error) +- error = is_mounted(vfsmnt) ? 1 : 2; +- break; +- } +- parent = dentry->d_parent; +- prefetch(parent); +- error = prepend_name(&bptr, &blen, &dentry->d_name); +- if (error) +- break; +- +- dentry = parent; +- } +- if (!(seq & 1)) +- rcu_read_unlock(); +- if (need_seqretry(&rename_lock, seq)) { +- seq = 1; +- goto restart; +- } +- done_seqretry(&rename_lock, seq); +- +- if (!(m_seq & 1)) +- rcu_read_unlock(); +- if (need_seqretry(&mount_lock, m_seq)) { +- m_seq = 1; +- goto restart_mnt; +- } +- done_seqretry(&mount_lock, m_seq); +- +- if (error >= 0 && bptr == *buffer) { +- if (--blen < 0) +- error = -ENAMETOOLONG; +- else +- *--bptr = '/'; +- } +- *buffer = bptr; +- *buflen = blen; +- return error; +-} +- +-/** +- * __d_path - return the path of a dentry +- * @path: the dentry/vfsmount to report +- * @root: root vfsmnt/dentry +- * @buf: buffer to return value in +- * @buflen: buffer length +- * +- * Convert a dentry into an ASCII path name. +- * +- * Returns a pointer into the buffer or an error code if the +- * path was too long. +- * +- * "buflen" should be positive. +- * +- * If the path is not reachable from the supplied root, return %NULL. +- */ +-char *__d_path(const struct path *path, +- const struct path *root, +- char *buf, int buflen) +-{ +- char *res = buf + buflen; +- int error; +- +- prepend(&res, &buflen, "\0", 1); +- error = prepend_path(path, root, &res, &buflen); +- +- if (error < 0) +- return ERR_PTR(error); +- if (error > 0) +- return NULL; +- return res; +-} +- +-char *d_absolute_path(const struct path *path, +- char *buf, int buflen) +-{ +- struct path root = {}; +- char *res = buf + buflen; +- int error; +- +- prepend(&res, &buflen, "\0", 1); +- error = prepend_path(path, &root, &res, &buflen); +- +- if (error > 1) +- error = -EINVAL; +- if (error < 0) +- return ERR_PTR(error); +- return res; +-} +- +-/* +- * same as __d_path but appends "(deleted)" for unlinked files. +- */ +-static int path_with_deleted(const struct path *path, +- const struct path *root, +- char **buf, int *buflen) +-{ +- prepend(buf, buflen, "\0", 1); +- if (d_unlinked(path->dentry)) { +- int error = prepend(buf, buflen, " (deleted)", 10); +- if (error) +- return error; +- } +- +- return prepend_path(path, root, buf, buflen); +-} +- +-static int prepend_unreachable(char **buffer, int *buflen) +-{ +- return prepend(buffer, buflen, "(unreachable)", 13); +-} +- +-static void get_fs_root_rcu(struct fs_struct *fs, struct path *root) +-{ +- unsigned seq; +- +- do { +- seq = read_seqcount_begin(&fs->seq); +- *root = fs->root; +- } while (read_seqcount_retry(&fs->seq, seq)); +-} +- +-/** +- * d_path - return the path of a dentry +- * @path: path to report +- * @buf: buffer to return value in +- * @buflen: buffer length +- * +- * Convert a dentry into an ASCII path name. If the entry has been deleted +- * the string " (deleted)" is appended. Note that this is ambiguous. +- * +- * Returns a pointer into the buffer or an error code if the path was +- * too long. Note: Callers should use the returned pointer, not the passed +- * in buffer, to use the name! The implementation often starts at an offset +- * into the buffer, and may leave 0 bytes at the start. +- * +- * "buflen" should be positive. +- */ +-char *d_path(const struct path *path, char *buf, int buflen) +-{ +- char *res = buf + buflen; +- struct path root; +- int error; +- +- /* +- * We have various synthetic filesystems that never get mounted. On +- * these filesystems dentries are never used for lookup purposes, and +- * thus don't need to be hashed. They also don't need a name until a +- * user wants to identify the object in /proc/pid/fd/. The little hack +- * below allows us to generate a name for these objects on demand: +- * +- * Some pseudo inodes are mountable. When they are mounted +- * path->dentry == path->mnt->mnt_root. In that case don't call d_dname +- * and instead have d_path return the mounted path. +- */ +- if (path->dentry->d_op && path->dentry->d_op->d_dname && +- (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root)) +- return path->dentry->d_op->d_dname(path->dentry, buf, buflen); +- +- rcu_read_lock(); +- get_fs_root_rcu(current->fs, &root); +- error = path_with_deleted(path, &root, &res, &buflen); +- rcu_read_unlock(); +- +- if (error < 0) +- res = ERR_PTR(error); +- return res; +-} +-EXPORT_SYMBOL(d_path); +- +-/* +- * Helper function for dentry_operations.d_dname() members +- */ +-char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, +- const char *fmt, ...) +-{ +- va_list args; +- char temp[64]; +- int sz; +- +- va_start(args, fmt); +- sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1; +- va_end(args); +- +- if (sz > sizeof(temp) || sz > buflen) +- return ERR_PTR(-ENAMETOOLONG); +- +- buffer += buflen - sz; +- return memcpy(buffer, temp, sz); +-} +- +-char *simple_dname(struct dentry *dentry, char *buffer, int buflen) +-{ +- char *end = buffer + buflen; +- /* these dentries are never renamed, so d_lock is not needed */ +- if (prepend(&end, &buflen, " (deleted)", 11) || +- prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) || +- prepend(&end, &buflen, "/", 1)) +- end = ERR_PTR(-ENAMETOOLONG); +- return end; +-} +-EXPORT_SYMBOL(simple_dname); +- +-/* +- * Write full pathname from the root of the filesystem into the buffer. +- */ +-static char *__dentry_path(struct dentry *d, char *buf, int buflen) +-{ +- struct dentry *dentry; +- char *end, *retval; +- int len, seq = 0; +- int error = 0; +- +- if (buflen < 2) +- goto Elong; +- +- rcu_read_lock(); +-restart: +- dentry = d; +- end = buf + buflen; +- len = buflen; +- prepend(&end, &len, "\0", 1); +- /* Get '/' right */ +- retval = end-1; +- *retval = '/'; +- read_seqbegin_or_lock(&rename_lock, &seq); +- while (!IS_ROOT(dentry)) { +- struct dentry *parent = dentry->d_parent; +- +- prefetch(parent); +- error = prepend_name(&end, &len, &dentry->d_name); +- if (error) +- break; +- +- retval = end; +- dentry = parent; +- } +- if (!(seq & 1)) +- rcu_read_unlock(); +- if (need_seqretry(&rename_lock, seq)) { +- seq = 1; +- goto restart; +- } +- done_seqretry(&rename_lock, seq); +- if (error) +- goto Elong; +- return retval; +-Elong: +- return ERR_PTR(-ENAMETOOLONG); +-} +- +-char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) +-{ +- return __dentry_path(dentry, buf, buflen); +-} +-EXPORT_SYMBOL(dentry_path_raw); +- +-char *dentry_path(struct dentry *dentry, char *buf, int buflen) +-{ +- char *p = NULL; +- char *retval; +- +- if (d_unlinked(dentry)) { +- p = buf + buflen; +- if (prepend(&p, &buflen, "//deleted", 10) != 0) +- goto Elong; +- buflen++; +- } +- retval = __dentry_path(dentry, buf, buflen); +- if (!IS_ERR(retval) && p) +- *p = '/'; /* restore '/' overriden with '\0' */ +- return retval; +-Elong: +- return ERR_PTR(-ENAMETOOLONG); +-} +- +-static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root, +- struct path *pwd) +-{ +- unsigned seq; +- +- do { +- seq = read_seqcount_begin(&fs->seq); +- *root = fs->root; +- *pwd = fs->pwd; +- } while (read_seqcount_retry(&fs->seq, seq)); +-} +- +-/* +- * NOTE! The user-level library version returns a +- * character pointer. The kernel system call just +- * returns the length of the buffer filled (which +- * includes the ending '\0' character), or a negative +- * error value. So libc would do something like +- * +- * char *getcwd(char * buf, size_t size) +- * { +- * int retval; +- * +- * retval = sys_getcwd(buf, size); +- * if (retval >= 0) +- * return buf; +- * errno = -retval; +- * return NULL; +- * } +- */ +-SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) +-{ +- int error; +- struct path pwd, root; +- char *page = __getname(); +- +- if (!page) +- return -ENOMEM; +- +- rcu_read_lock(); +- get_fs_root_and_pwd_rcu(current->fs, &root, &pwd); +- +- error = -ENOENT; +- if (!d_unlinked(pwd.dentry)) { +- unsigned long len; +- char *cwd = page + PATH_MAX; +- int buflen = PATH_MAX; +- +- prepend(&cwd, &buflen, "\0", 1); +- error = prepend_path(&pwd, &root, &cwd, &buflen); +- rcu_read_unlock(); +- +- if (error < 0) +- goto out; +- +- /* Unreachable from current root */ +- if (error > 0) { +- error = prepend_unreachable(&cwd, &buflen); +- if (error) +- goto out; +- } +- +- error = -ERANGE; +- len = PATH_MAX + page - cwd; +- if (len <= size) { +- error = len; +- if (copy_to_user(buf, cwd, len)) +- error = -EFAULT; +- } +- } else { +- rcu_read_unlock(); +- } +- +-out: +- __putname(page); +- return error; +-} +- + /* + * Test whether new_dentry is a subdirectory of old_dentry. + * diff --git a/debian/patches/features/all/rt/0009-tracing-Move-hist-trigger-Documentation-to-histogram.patch b/debian/patches/features/all/rt/0009-tracing-Move-hist-trigger-Documentation-to-histogram.patch new file mode 100644 index 000000000..7c0814425 --- /dev/null +++ b/debian/patches/features/all/rt/0009-tracing-Move-hist-trigger-Documentation-to-histogram.patch @@ -0,0 +1,3146 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:35 -0600 +Subject: [PATCH 09/48] tracing: Move hist trigger Documentation to + histogram.txt +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +The hist trigger Documentation takes up a large part of events.txt - +since it will be getting even larger, move it to a separate file. + +Link: http://lkml.kernel.org/r/92761155ea4f529e590821b1e02207fe8619f248.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 995f87b4d6ddb6bbb40309c08c3ca2a1f9f9db80) +Signed-off-by: Sebastian Andrzej Siewior +--- + Documentation/trace/events.txt | 1548 ------------------------------------- + Documentation/trace/histogram.txt | 1568 ++++++++++++++++++++++++++++++++++++++ + 2 files changed, 1569 insertions(+), 1547 deletions(-) + create mode 100644 Documentation/trace/histogram.txt + +--- a/Documentation/trace/events.txt ++++ b/Documentation/trace/events.txt +@@ -517,1550 +517,4 @@ triggers (you have to use '!' for each o + totals derived from one or more trace event format fields and/or + event counts (hitcount). + +- The format of a hist trigger is as follows: +- +- hist:keys=[:values=] +- [:sort=][:size=#entries][:pause][:continue] +- [:clear][:name=histname1] [if ] +- +- When a matching event is hit, an entry is added to a hash table +- using the key(s) and value(s) named. Keys and values correspond to +- fields in the event's format description. Values must correspond to +- numeric fields - on an event hit, the value(s) will be added to a +- sum kept for that field. The special string 'hitcount' can be used +- in place of an explicit value field - this is simply a count of +- event hits. If 'values' isn't specified, an implicit 'hitcount' +- value will be automatically created and used as the only value. +- Keys can be any field, or the special string 'stacktrace', which +- will use the event's kernel stacktrace as the key. The keywords +- 'keys' or 'key' can be used to specify keys, and the keywords +- 'values', 'vals', or 'val' can be used to specify values. Compound +- keys consisting of up to two fields can be specified by the 'keys' +- keyword. Hashing a compound key produces a unique entry in the +- table for each unique combination of component keys, and can be +- useful for providing more fine-grained summaries of event data. +- Additionally, sort keys consisting of up to two fields can be +- specified by the 'sort' keyword. If more than one field is +- specified, the result will be a 'sort within a sort': the first key +- is taken to be the primary sort key and the second the secondary +- key. If a hist trigger is given a name using the 'name' parameter, +- its histogram data will be shared with other triggers of the same +- name, and trigger hits will update this common data. Only triggers +- with 'compatible' fields can be combined in this way; triggers are +- 'compatible' if the fields named in the trigger share the same +- number and type of fields and those fields also have the same names. +- Note that any two events always share the compatible 'hitcount' and +- 'stacktrace' fields and can therefore be combined using those +- fields, however pointless that may be. +- +- 'hist' triggers add a 'hist' file to each event's subdirectory. +- Reading the 'hist' file for the event will dump the hash table in +- its entirety to stdout. If there are multiple hist triggers +- attached to an event, there will be a table for each trigger in the +- output. The table displayed for a named trigger will be the same as +- any other instance having the same name. Each printed hash table +- entry is a simple list of the keys and values comprising the entry; +- keys are printed first and are delineated by curly braces, and are +- followed by the set of value fields for the entry. By default, +- numeric fields are displayed as base-10 integers. This can be +- modified by appending any of the following modifiers to the field +- name: +- +- .hex display a number as a hex value +- .sym display an address as a symbol +- .sym-offset display an address as a symbol and offset +- .syscall display a syscall id as a system call name +- .execname display a common_pid as a program name +- +- Note that in general the semantics of a given field aren't +- interpreted when applying a modifier to it, but there are some +- restrictions to be aware of in this regard: +- +- - only the 'hex' modifier can be used for values (because values +- are essentially sums, and the other modifiers don't make sense +- in that context). +- - the 'execname' modifier can only be used on a 'common_pid'. The +- reason for this is that the execname is simply the 'comm' value +- saved for the 'current' process when an event was triggered, +- which is the same as the common_pid value saved by the event +- tracing code. Trying to apply that comm value to other pid +- values wouldn't be correct, and typically events that care save +- pid-specific comm fields in the event itself. +- +- A typical usage scenario would be the following to enable a hist +- trigger, read its current contents, and then turn it off: +- +- # echo 'hist:keys=skbaddr.hex:vals=len' > \ +- /sys/kernel/debug/tracing/events/net/netif_rx/trigger +- +- # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist +- +- # echo '!hist:keys=skbaddr.hex:vals=len' > \ +- /sys/kernel/debug/tracing/events/net/netif_rx/trigger +- +- The trigger file itself can be read to show the details of the +- currently attached hist trigger. This information is also displayed +- at the top of the 'hist' file when read. +- +- By default, the size of the hash table is 2048 entries. The 'size' +- parameter can be used to specify more or fewer than that. The units +- are in terms of hashtable entries - if a run uses more entries than +- specified, the results will show the number of 'drops', the number +- of hits that were ignored. The size should be a power of 2 between +- 128 and 131072 (any non- power-of-2 number specified will be rounded +- up). +- +- The 'sort' parameter can be used to specify a value field to sort +- on. The default if unspecified is 'hitcount' and the default sort +- order is 'ascending'. To sort in the opposite direction, append +- .descending' to the sort key. +- +- The 'pause' parameter can be used to pause an existing hist trigger +- or to start a hist trigger but not log any events until told to do +- so. 'continue' or 'cont' can be used to start or restart a paused +- hist trigger. +- +- The 'clear' parameter will clear the contents of a running hist +- trigger and leave its current paused/active state. +- +- Note that the 'pause', 'cont', and 'clear' parameters should be +- applied using 'append' shell operator ('>>') if applied to an +- existing trigger, rather than via the '>' operator, which will cause +- the trigger to be removed through truncation. +- +-- enable_hist/disable_hist +- +- The enable_hist and disable_hist triggers can be used to have one +- event conditionally start and stop another event's already-attached +- hist trigger. Any number of enable_hist and disable_hist triggers +- can be attached to a given event, allowing that event to kick off +- and stop aggregations on a host of other events. +- +- The format is very similar to the enable/disable_event triggers: +- +- enable_hist::[:count] +- disable_hist::[:count] +- +- Instead of enabling or disabling the tracing of the target event +- into the trace buffer as the enable/disable_event triggers do, the +- enable/disable_hist triggers enable or disable the aggregation of +- the target event into a hash table. +- +- A typical usage scenario for the enable_hist/disable_hist triggers +- would be to first set up a paused hist trigger on some event, +- followed by an enable_hist/disable_hist pair that turns the hist +- aggregation on and off when conditions of interest are hit: +- +- # echo 'hist:keys=skbaddr.hex:vals=len:pause' > \ +- /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger +- +- # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \ +- /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger +- +- # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \ +- /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger +- +- The above sets up an initially paused hist trigger which is unpaused +- and starts aggregating events when a given program is executed, and +- which stops aggregating when the process exits and the hist trigger +- is paused again. +- +- The examples below provide a more concrete illustration of the +- concepts and typical usage patterns discussed above. +- +- +-6.2 'hist' trigger examples +---------------------------- +- +- The first set of examples creates aggregations using the kmalloc +- event. The fields that can be used for the hist trigger are listed +- in the kmalloc event's format file: +- +- # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/format +- name: kmalloc +- ID: 374 +- format: +- field:unsigned short common_type; offset:0; size:2; signed:0; +- field:unsigned char common_flags; offset:2; size:1; signed:0; +- field:unsigned char common_preempt_count; offset:3; size:1; signed:0; +- field:int common_pid; offset:4; size:4; signed:1; +- +- field:unsigned long call_site; offset:8; size:8; signed:0; +- field:const void * ptr; offset:16; size:8; signed:0; +- field:size_t bytes_req; offset:24; size:8; signed:0; +- field:size_t bytes_alloc; offset:32; size:8; signed:0; +- field:gfp_t gfp_flags; offset:40; size:4; signed:0; +- +- We'll start by creating a hist trigger that generates a simple table +- that lists the total number of bytes requested for each function in +- the kernel that made one or more calls to kmalloc: +- +- # echo 'hist:key=call_site:val=bytes_req' > \ +- /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger +- +- This tells the tracing system to create a 'hist' trigger using the +- call_site field of the kmalloc event as the key for the table, which +- just means that each unique call_site address will have an entry +- created for it in the table. The 'val=bytes_req' parameter tells +- the hist trigger that for each unique entry (call_site) in the +- table, it should keep a running total of the number of bytes +- requested by that call_site. +- +- We'll let it run for awhile and then dump the contents of the 'hist' +- file in the kmalloc event's subdirectory (for readability, a number +- of entries have been omitted): +- +- # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist +- # trigger info: hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active] +- +- { call_site: 18446744072106379007 } hitcount: 1 bytes_req: 176 +- { call_site: 18446744071579557049 } hitcount: 1 bytes_req: 1024 +- { call_site: 18446744071580608289 } hitcount: 1 bytes_req: 16384 +- { call_site: 18446744071581827654 } hitcount: 1 bytes_req: 24 +- { call_site: 18446744071580700980 } hitcount: 1 bytes_req: 8 +- { call_site: 18446744071579359876 } hitcount: 1 bytes_req: 152 +- { call_site: 18446744071580795365 } hitcount: 3 bytes_req: 144 +- { call_site: 18446744071581303129 } hitcount: 3 bytes_req: 144 +- { call_site: 18446744071580713234 } hitcount: 4 bytes_req: 2560 +- { call_site: 18446744071580933750 } hitcount: 4 bytes_req: 736 +- . +- . +- . +- { call_site: 18446744072106047046 } hitcount: 69 bytes_req: 5576 +- { call_site: 18446744071582116407 } hitcount: 73 bytes_req: 2336 +- { call_site: 18446744072106054684 } hitcount: 136 bytes_req: 140504 +- { call_site: 18446744072106224230 } hitcount: 136 bytes_req: 19584 +- { call_site: 18446744072106078074 } hitcount: 153 bytes_req: 2448 +- { call_site: 18446744072106062406 } hitcount: 153 bytes_req: 36720 +- { call_site: 18446744071582507929 } hitcount: 153 bytes_req: 37088 +- { call_site: 18446744072102520590 } hitcount: 273 bytes_req: 10920 +- { call_site: 18446744071582143559 } hitcount: 358 bytes_req: 716 +- { call_site: 18446744072106465852 } hitcount: 417 bytes_req: 56712 +- { call_site: 18446744072102523378 } hitcount: 485 bytes_req: 27160 +- { call_site: 18446744072099568646 } hitcount: 1676 bytes_req: 33520 +- +- Totals: +- Hits: 4610 +- Entries: 45 +- Dropped: 0 +- +- The output displays a line for each entry, beginning with the key +- specified in the trigger, followed by the value(s) also specified in +- the trigger. At the beginning of the output is a line that displays +- the trigger info, which can also be displayed by reading the +- 'trigger' file: +- +- # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger +- hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active] +- +- At the end of the output are a few lines that display the overall +- totals for the run. The 'Hits' field shows the total number of +- times the event trigger was hit, the 'Entries' field shows the total +- number of used entries in the hash table, and the 'Dropped' field +- shows the number of hits that were dropped because the number of +- used entries for the run exceeded the maximum number of entries +- allowed for the table (normally 0, but if not a hint that you may +- want to increase the size of the table using the 'size' parameter). +- +- Notice in the above output that there's an extra field, 'hitcount', +- which wasn't specified in the trigger. Also notice that in the +- trigger info output, there's a parameter, 'sort=hitcount', which +- wasn't specified in the trigger either. The reason for that is that +- every trigger implicitly keeps a count of the total number of hits +- attributed to a given entry, called the 'hitcount'. That hitcount +- information is explicitly displayed in the output, and in the +- absence of a user-specified sort parameter, is used as the default +- sort field. +- +- The value 'hitcount' can be used in place of an explicit value in +- the 'values' parameter if you don't really need to have any +- particular field summed and are mainly interested in hit +- frequencies. +- +- To turn the hist trigger off, simply call up the trigger in the +- command history and re-execute it with a '!' prepended: +- +- # echo '!hist:key=call_site:val=bytes_req' > \ +- /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger +- +- Finally, notice that the call_site as displayed in the output above +- isn't really very useful. It's an address, but normally addresses +- are displayed in hex. To have a numeric field displayed as a hex +- value, simply append '.hex' to the field name in the trigger: +- +- # echo 'hist:key=call_site.hex:val=bytes_req' > \ +- /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger +- +- # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist +- # trigger info: hist:keys=call_site.hex:vals=bytes_req:sort=hitcount:size=2048 [active] +- +- { call_site: ffffffffa026b291 } hitcount: 1 bytes_req: 433 +- { call_site: ffffffffa07186ff } hitcount: 1 bytes_req: 176 +- { call_site: ffffffff811ae721 } hitcount: 1 bytes_req: 16384 +- { call_site: ffffffff811c5134 } hitcount: 1 bytes_req: 8 +- { call_site: ffffffffa04a9ebb } hitcount: 1 bytes_req: 511 +- { call_site: ffffffff8122e0a6 } hitcount: 1 bytes_req: 12 +- { call_site: ffffffff8107da84 } hitcount: 1 bytes_req: 152 +- { call_site: ffffffff812d8246 } hitcount: 1 bytes_req: 24 +- { call_site: ffffffff811dc1e5 } hitcount: 3 bytes_req: 144 +- { call_site: ffffffffa02515e8 } hitcount: 3 bytes_req: 648 +- { call_site: ffffffff81258159 } hitcount: 3 bytes_req: 144 +- { call_site: ffffffff811c80f4 } hitcount: 4 bytes_req: 544 +- . +- . +- . +- { call_site: ffffffffa06c7646 } hitcount: 106 bytes_req: 8024 +- { call_site: ffffffffa06cb246 } hitcount: 132 bytes_req: 31680 +- { call_site: ffffffffa06cef7a } hitcount: 132 bytes_req: 2112 +- { call_site: ffffffff8137e399 } hitcount: 132 bytes_req: 23232 +- { call_site: ffffffffa06c941c } hitcount: 185 bytes_req: 171360 +- { call_site: ffffffffa06f2a66 } hitcount: 185 bytes_req: 26640 +- { call_site: ffffffffa036a70e } hitcount: 265 bytes_req: 10600 +- { call_site: ffffffff81325447 } hitcount: 292 bytes_req: 584 +- { call_site: ffffffffa072da3c } hitcount: 446 bytes_req: 60656 +- { call_site: ffffffffa036b1f2 } hitcount: 526 bytes_req: 29456 +- { call_site: ffffffffa0099c06 } hitcount: 1780 bytes_req: 35600 +- +- Totals: +- Hits: 4775 +- Entries: 46 +- Dropped: 0 +- +- Even that's only marginally more useful - while hex values do look +- more like addresses, what users are typically more interested in +- when looking at text addresses are the corresponding symbols +- instead. To have an address displayed as symbolic value instead, +- simply append '.sym' or '.sym-offset' to the field name in the +- trigger: +- +- # echo 'hist:key=call_site.sym:val=bytes_req' > \ +- /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger +- +- # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist +- # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=hitcount:size=2048 [active] +- +- { call_site: [ffffffff810adcb9] syslog_print_all } hitcount: 1 bytes_req: 1024 +- { call_site: [ffffffff8154bc62] usb_control_msg } hitcount: 1 bytes_req: 8 +- { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid] } hitcount: 1 bytes_req: 7 +- { call_site: [ffffffff8154acbe] usb_alloc_urb } hitcount: 1 bytes_req: 192 +- { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid] } hitcount: 1 bytes_req: 7 +- { call_site: [ffffffff811e3a25] __seq_open_private } hitcount: 1 bytes_req: 40 +- { call_site: [ffffffff8109524a] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 +- { call_site: [ffffffff811febd5] fsnotify_alloc_group } hitcount: 2 bytes_req: 528 +- { call_site: [ffffffff81440f58] __tty_buffer_request_room } hitcount: 2 bytes_req: 2624 +- { call_site: [ffffffff81200ba6] inotify_new_group } hitcount: 2 bytes_req: 96 +- { call_site: [ffffffffa05e19af] ieee80211_start_tx_ba_session [mac80211] } hitcount: 2 bytes_req: 464 +- { call_site: [ffffffff81672406] tcp_get_metrics } hitcount: 2 bytes_req: 304 +- { call_site: [ffffffff81097ec2] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 +- { call_site: [ffffffff81089b05] sched_create_group } hitcount: 2 bytes_req: 1424 +- . +- . +- . +- { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915] } hitcount: 1185 bytes_req: 123240 +- { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm] } hitcount: 1185 bytes_req: 104280 +- { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915] } hitcount: 1402 bytes_req: 190672 +- { call_site: [ffffffff812891ca] ext4_find_extent } hitcount: 1518 bytes_req: 146208 +- { call_site: [ffffffffa029070e] drm_vma_node_allow [drm] } hitcount: 1746 bytes_req: 69840 +- { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 2021 bytes_req: 792312 +- { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm] } hitcount: 2592 bytes_req: 145152 +- { call_site: [ffffffffa0489a66] intel_ring_begin [i915] } hitcount: 2629 bytes_req: 378576 +- { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915] } hitcount: 2629 bytes_req: 3783248 +- { call_site: [ffffffff81325607] apparmor_file_alloc_security } hitcount: 5192 bytes_req: 10384 +- { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid] } hitcount: 5529 bytes_req: 110584 +- { call_site: [ffffffff8131ebf7] aa_alloc_task_context } hitcount: 21943 bytes_req: 702176 +- { call_site: [ffffffff8125847d] ext4_htree_store_dirent } hitcount: 55759 bytes_req: 5074265 +- +- Totals: +- Hits: 109928 +- Entries: 71 +- Dropped: 0 +- +- Because the default sort key above is 'hitcount', the above shows a +- the list of call_sites by increasing hitcount, so that at the bottom +- we see the functions that made the most kmalloc calls during the +- run. If instead we we wanted to see the top kmalloc callers in +- terms of the number of bytes requested rather than the number of +- calls, and we wanted the top caller to appear at the top, we can use +- the 'sort' parameter, along with the 'descending' modifier: +- +- # echo 'hist:key=call_site.sym:val=bytes_req:sort=bytes_req.descending' > \ +- /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger +- +- # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist +- # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=bytes_req.descending:size=2048 [active] +- +- { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915] } hitcount: 2186 bytes_req: 3397464 +- { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 1790 bytes_req: 712176 +- { call_site: [ffffffff8125847d] ext4_htree_store_dirent } hitcount: 8132 bytes_req: 513135 +- { call_site: [ffffffff811e2a1b] seq_buf_alloc } hitcount: 106 bytes_req: 440128 +- { call_site: [ffffffffa0489a66] intel_ring_begin [i915] } hitcount: 2186 bytes_req: 314784 +- { call_site: [ffffffff812891ca] ext4_find_extent } hitcount: 2174 bytes_req: 208992 +- { call_site: [ffffffff811ae8e1] __kmalloc } hitcount: 8 bytes_req: 131072 +- { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915] } hitcount: 859 bytes_req: 116824 +- { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm] } hitcount: 1834 bytes_req: 102704 +- { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915] } hitcount: 972 bytes_req: 101088 +- { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm] } hitcount: 972 bytes_req: 85536 +- { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid] } hitcount: 3333 bytes_req: 66664 +- { call_site: [ffffffff8137e559] sg_kmalloc } hitcount: 209 bytes_req: 61632 +- . +- . +- . +- { call_site: [ffffffff81095225] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 +- { call_site: [ffffffff81097ec2] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 +- { call_site: [ffffffff812d8406] copy_semundo } hitcount: 2 bytes_req: 48 +- { call_site: [ffffffff81200ba6] inotify_new_group } hitcount: 1 bytes_req: 48 +- { call_site: [ffffffffa027121a] drm_getmagic [drm] } hitcount: 1 bytes_req: 48 +- { call_site: [ffffffff811e3a25] __seq_open_private } hitcount: 1 bytes_req: 40 +- { call_site: [ffffffff811c52f4] bprm_change_interp } hitcount: 2 bytes_req: 16 +- { call_site: [ffffffff8154bc62] usb_control_msg } hitcount: 1 bytes_req: 8 +- { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid] } hitcount: 1 bytes_req: 7 +- { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid] } hitcount: 1 bytes_req: 7 +- +- Totals: +- Hits: 32133 +- Entries: 81 +- Dropped: 0 +- +- To display the offset and size information in addition to the symbol +- name, just use 'sym-offset' instead: +- +- # echo 'hist:key=call_site.sym-offset:val=bytes_req:sort=bytes_req.descending' > \ +- /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger +- +- # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist +- # trigger info: hist:keys=call_site.sym-offset:vals=bytes_req:sort=bytes_req.descending:size=2048 [active] +- +- { call_site: [ffffffffa046041c] i915_gem_execbuffer2+0x6c/0x2c0 [i915] } hitcount: 4569 bytes_req: 3163720 +- { call_site: [ffffffffa0489a66] intel_ring_begin+0xc6/0x1f0 [i915] } hitcount: 4569 bytes_req: 657936 +- { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23+0x694/0x1020 [i915] } hitcount: 1519 bytes_req: 472936 +- { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23+0x516/0x1020 [i915] } hitcount: 3050 bytes_req: 211832 +- { call_site: [ffffffff811e2a1b] seq_buf_alloc+0x1b/0x50 } hitcount: 34 bytes_req: 148384 +- { call_site: [ffffffffa04a580c] intel_crtc_page_flip+0xbc/0x870 [i915] } hitcount: 1385 bytes_req: 144040 +- { call_site: [ffffffff811ae8e1] __kmalloc+0x191/0x1b0 } hitcount: 8 bytes_req: 131072 +- { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl+0x282/0x360 [drm] } hitcount: 1385 bytes_req: 121880 +- { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc+0x32/0x100 [drm] } hitcount: 1848 bytes_req: 103488 +- { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state+0x2c/0xa0 [i915] } hitcount: 461 bytes_req: 62696 +- { call_site: [ffffffffa029070e] drm_vma_node_allow+0x2e/0xd0 [drm] } hitcount: 1541 bytes_req: 61640 +- { call_site: [ffffffff815f8d7b] sk_prot_alloc+0xcb/0x1b0 } hitcount: 57 bytes_req: 57456 +- . +- . +- . +- { call_site: [ffffffff8109524a] alloc_fair_sched_group+0x5a/0x1a0 } hitcount: 2 bytes_req: 128 +- { call_site: [ffffffffa027b921] drm_vm_open_locked+0x31/0xa0 [drm] } hitcount: 3 bytes_req: 96 +- { call_site: [ffffffff8122e266] proc_self_follow_link+0x76/0xb0 } hitcount: 8 bytes_req: 96 +- { call_site: [ffffffff81213e80] load_elf_binary+0x240/0x1650 } hitcount: 3 bytes_req: 84 +- { call_site: [ffffffff8154bc62] usb_control_msg+0x42/0x110 } hitcount: 1 bytes_req: 8 +- { call_site: [ffffffffa00bf6fe] hidraw_send_report+0x7e/0x1a0 [hid] } hitcount: 1 bytes_req: 7 +- { call_site: [ffffffffa00bf1ca] hidraw_report_event+0x8a/0x120 [hid] } hitcount: 1 bytes_req: 7 +- +- Totals: +- Hits: 26098 +- Entries: 64 +- Dropped: 0 +- +- We can also add multiple fields to the 'values' parameter. For +- example, we might want to see the total number of bytes allocated +- alongside bytes requested, and display the result sorted by bytes +- allocated in a descending order: +- +- # echo 'hist:keys=call_site.sym:values=bytes_req,bytes_alloc:sort=bytes_alloc.descending' > \ +- /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger +- +- # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist +- # trigger info: hist:keys=call_site.sym:vals=bytes_req,bytes_alloc:sort=bytes_alloc.descending:size=2048 [active] +- +- { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915] } hitcount: 7403 bytes_req: 4084360 bytes_alloc: 5958016 +- { call_site: [ffffffff811e2a1b] seq_buf_alloc } hitcount: 541 bytes_req: 2213968 bytes_alloc: 2228224 +- { call_site: [ffffffffa0489a66] intel_ring_begin [i915] } hitcount: 7404 bytes_req: 1066176 bytes_alloc: 1421568 +- { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 1565 bytes_req: 557368 bytes_alloc: 1037760 +- { call_site: [ffffffff8125847d] ext4_htree_store_dirent } hitcount: 9557 bytes_req: 595778 bytes_alloc: 695744 +- { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 5839 bytes_req: 430680 bytes_alloc: 470400 +- { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915] } hitcount: 2388 bytes_req: 324768 bytes_alloc: 458496 +- { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm] } hitcount: 3911 bytes_req: 219016 bytes_alloc: 250304 +- { call_site: [ffffffff815f8d7b] sk_prot_alloc } hitcount: 235 bytes_req: 236880 bytes_alloc: 240640 +- { call_site: [ffffffff8137e559] sg_kmalloc } hitcount: 557 bytes_req: 169024 bytes_alloc: 221760 +- { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid] } hitcount: 9378 bytes_req: 187548 bytes_alloc: 206312 +- { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915] } hitcount: 1519 bytes_req: 157976 bytes_alloc: 194432 +- . +- . +- . +- { call_site: [ffffffff8109bd3b] sched_autogroup_create_attach } hitcount: 2 bytes_req: 144 bytes_alloc: 192 +- { call_site: [ffffffff81097ee8] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 +- { call_site: [ffffffff8109524a] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 +- { call_site: [ffffffff81095225] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 +- { call_site: [ffffffff81097ec2] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 +- { call_site: [ffffffff81213e80] load_elf_binary } hitcount: 3 bytes_req: 84 bytes_alloc: 96 +- { call_site: [ffffffff81079a2e] kthread_create_on_node } hitcount: 1 bytes_req: 56 bytes_alloc: 64 +- { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid] } hitcount: 1 bytes_req: 7 bytes_alloc: 8 +- { call_site: [ffffffff8154bc62] usb_control_msg } hitcount: 1 bytes_req: 8 bytes_alloc: 8 +- { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid] } hitcount: 1 bytes_req: 7 bytes_alloc: 8 +- +- Totals: +- Hits: 66598 +- Entries: 65 +- Dropped: 0 +- +- Finally, to finish off our kmalloc example, instead of simply having +- the hist trigger display symbolic call_sites, we can have the hist +- trigger additionally display the complete set of kernel stack traces +- that led to each call_site. To do that, we simply use the special +- value 'stacktrace' for the key parameter: +- +- # echo 'hist:keys=stacktrace:values=bytes_req,bytes_alloc:sort=bytes_alloc' > \ +- /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger +- +- The above trigger will use the kernel stack trace in effect when an +- event is triggered as the key for the hash table. This allows the +- enumeration of every kernel callpath that led up to a particular +- event, along with a running total of any of the event fields for +- that event. Here we tally bytes requested and bytes allocated for +- every callpath in the system that led up to a kmalloc (in this case +- every callpath to a kmalloc for a kernel compile): +- +- # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist +- # trigger info: hist:keys=stacktrace:vals=bytes_req,bytes_alloc:sort=bytes_alloc:size=2048 [active] +- +- { stacktrace: +- __kmalloc_track_caller+0x10b/0x1a0 +- kmemdup+0x20/0x50 +- hidraw_report_event+0x8a/0x120 [hid] +- hid_report_raw_event+0x3ea/0x440 [hid] +- hid_input_report+0x112/0x190 [hid] +- hid_irq_in+0xc2/0x260 [usbhid] +- __usb_hcd_giveback_urb+0x72/0x120 +- usb_giveback_urb_bh+0x9e/0xe0 +- tasklet_hi_action+0xf8/0x100 +- __do_softirq+0x114/0x2c0 +- irq_exit+0xa5/0xb0 +- do_IRQ+0x5a/0xf0 +- ret_from_intr+0x0/0x30 +- cpuidle_enter+0x17/0x20 +- cpu_startup_entry+0x315/0x3e0 +- rest_init+0x7c/0x80 +- } hitcount: 3 bytes_req: 21 bytes_alloc: 24 +- { stacktrace: +- __kmalloc_track_caller+0x10b/0x1a0 +- kmemdup+0x20/0x50 +- hidraw_report_event+0x8a/0x120 [hid] +- hid_report_raw_event+0x3ea/0x440 [hid] +- hid_input_report+0x112/0x190 [hid] +- hid_irq_in+0xc2/0x260 [usbhid] +- __usb_hcd_giveback_urb+0x72/0x120 +- usb_giveback_urb_bh+0x9e/0xe0 +- tasklet_hi_action+0xf8/0x100 +- __do_softirq+0x114/0x2c0 +- irq_exit+0xa5/0xb0 +- do_IRQ+0x5a/0xf0 +- ret_from_intr+0x0/0x30 +- } hitcount: 3 bytes_req: 21 bytes_alloc: 24 +- { stacktrace: +- kmem_cache_alloc_trace+0xeb/0x150 +- aa_alloc_task_context+0x27/0x40 +- apparmor_cred_prepare+0x1f/0x50 +- security_prepare_creds+0x16/0x20 +- prepare_creds+0xdf/0x1a0 +- SyS_capset+0xb5/0x200 +- system_call_fastpath+0x12/0x6a +- } hitcount: 1 bytes_req: 32 bytes_alloc: 32 +- . +- . +- . +- { stacktrace: +- __kmalloc+0x11b/0x1b0 +- i915_gem_execbuffer2+0x6c/0x2c0 [i915] +- drm_ioctl+0x349/0x670 [drm] +- do_vfs_ioctl+0x2f0/0x4f0 +- SyS_ioctl+0x81/0xa0 +- system_call_fastpath+0x12/0x6a +- } hitcount: 17726 bytes_req: 13944120 bytes_alloc: 19593808 +- { stacktrace: +- __kmalloc+0x11b/0x1b0 +- load_elf_phdrs+0x76/0xa0 +- load_elf_binary+0x102/0x1650 +- search_binary_handler+0x97/0x1d0 +- do_execveat_common.isra.34+0x551/0x6e0 +- SyS_execve+0x3a/0x50 +- return_from_execve+0x0/0x23 +- } hitcount: 33348 bytes_req: 17152128 bytes_alloc: 20226048 +- { stacktrace: +- kmem_cache_alloc_trace+0xeb/0x150 +- apparmor_file_alloc_security+0x27/0x40 +- security_file_alloc+0x16/0x20 +- get_empty_filp+0x93/0x1c0 +- path_openat+0x31/0x5f0 +- do_filp_open+0x3a/0x90 +- do_sys_open+0x128/0x220 +- SyS_open+0x1e/0x20 +- system_call_fastpath+0x12/0x6a +- } hitcount: 4766422 bytes_req: 9532844 bytes_alloc: 38131376 +- { stacktrace: +- __kmalloc+0x11b/0x1b0 +- seq_buf_alloc+0x1b/0x50 +- seq_read+0x2cc/0x370 +- proc_reg_read+0x3d/0x80 +- __vfs_read+0x28/0xe0 +- vfs_read+0x86/0x140 +- SyS_read+0x46/0xb0 +- system_call_fastpath+0x12/0x6a +- } hitcount: 19133 bytes_req: 78368768 bytes_alloc: 78368768 +- +- Totals: +- Hits: 6085872 +- Entries: 253 +- Dropped: 0 +- +- If you key a hist trigger on common_pid, in order for example to +- gather and display sorted totals for each process, you can use the +- special .execname modifier to display the executable names for the +- processes in the table rather than raw pids. The example below +- keeps a per-process sum of total bytes read: +- +- # echo 'hist:key=common_pid.execname:val=count:sort=count.descending' > \ +- /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger +- +- # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/hist +- # trigger info: hist:keys=common_pid.execname:vals=count:sort=count.descending:size=2048 [active] +- +- { common_pid: gnome-terminal [ 3196] } hitcount: 280 count: 1093512 +- { common_pid: Xorg [ 1309] } hitcount: 525 count: 256640 +- { common_pid: compiz [ 2889] } hitcount: 59 count: 254400 +- { common_pid: bash [ 8710] } hitcount: 3 count: 66369 +- { common_pid: dbus-daemon-lau [ 8703] } hitcount: 49 count: 47739 +- { common_pid: irqbalance [ 1252] } hitcount: 27 count: 27648 +- { common_pid: 01ifupdown [ 8705] } hitcount: 3 count: 17216 +- { common_pid: dbus-daemon [ 772] } hitcount: 10 count: 12396 +- { common_pid: Socket Thread [ 8342] } hitcount: 11 count: 11264 +- { common_pid: nm-dhcp-client. [ 8701] } hitcount: 6 count: 7424 +- { common_pid: gmain [ 1315] } hitcount: 18 count: 6336 +- . +- . +- . +- { common_pid: postgres [ 1892] } hitcount: 2 count: 32 +- { common_pid: postgres [ 1891] } hitcount: 2 count: 32 +- { common_pid: gmain [ 8704] } hitcount: 2 count: 32 +- { common_pid: upstart-dbus-br [ 2740] } hitcount: 21 count: 21 +- { common_pid: nm-dispatcher.a [ 8696] } hitcount: 1 count: 16 +- { common_pid: indicator-datet [ 2904] } hitcount: 1 count: 16 +- { common_pid: gdbus [ 2998] } hitcount: 1 count: 16 +- { common_pid: rtkit-daemon [ 2052] } hitcount: 1 count: 8 +- { common_pid: init [ 1] } hitcount: 2 count: 2 +- +- Totals: +- Hits: 2116 +- Entries: 51 +- Dropped: 0 +- +- Similarly, if you key a hist trigger on syscall id, for example to +- gather and display a list of systemwide syscall hits, you can use +- the special .syscall modifier to display the syscall names rather +- than raw ids. The example below keeps a running total of syscall +- counts for the system during the run: +- +- # echo 'hist:key=id.syscall:val=hitcount' > \ +- /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger +- +- # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist +- # trigger info: hist:keys=id.syscall:vals=hitcount:sort=hitcount:size=2048 [active] +- +- { id: sys_fsync [ 74] } hitcount: 1 +- { id: sys_newuname [ 63] } hitcount: 1 +- { id: sys_prctl [157] } hitcount: 1 +- { id: sys_statfs [137] } hitcount: 1 +- { id: sys_symlink [ 88] } hitcount: 1 +- { id: sys_sendmmsg [307] } hitcount: 1 +- { id: sys_semctl [ 66] } hitcount: 1 +- { id: sys_readlink [ 89] } hitcount: 3 +- { id: sys_bind [ 49] } hitcount: 3 +- { id: sys_getsockname [ 51] } hitcount: 3 +- { id: sys_unlink [ 87] } hitcount: 3 +- { id: sys_rename [ 82] } hitcount: 4 +- { id: unknown_syscall [ 58] } hitcount: 4 +- { id: sys_connect [ 42] } hitcount: 4 +- { id: sys_getpid [ 39] } hitcount: 4 +- . +- . +- . +- { id: sys_rt_sigprocmask [ 14] } hitcount: 952 +- { id: sys_futex [202] } hitcount: 1534 +- { id: sys_write [ 1] } hitcount: 2689 +- { id: sys_setitimer [ 38] } hitcount: 2797 +- { id: sys_read [ 0] } hitcount: 3202 +- { id: sys_select [ 23] } hitcount: 3773 +- { id: sys_writev [ 20] } hitcount: 4531 +- { id: sys_poll [ 7] } hitcount: 8314 +- { id: sys_recvmsg [ 47] } hitcount: 13738 +- { id: sys_ioctl [ 16] } hitcount: 21843 +- +- Totals: +- Hits: 67612 +- Entries: 72 +- Dropped: 0 +- +- The syscall counts above provide a rough overall picture of system +- call activity on the system; we can see for example that the most +- popular system call on this system was the 'sys_ioctl' system call. +- +- We can use 'compound' keys to refine that number and provide some +- further insight as to which processes exactly contribute to the +- overall ioctl count. +- +- The command below keeps a hitcount for every unique combination of +- system call id and pid - the end result is essentially a table +- that keeps a per-pid sum of system call hits. The results are +- sorted using the system call id as the primary key, and the +- hitcount sum as the secondary key: +- +- # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount' > \ +- /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger +- +- # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist +- # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 [active] +- +- { id: sys_read [ 0], common_pid: rtkit-daemon [ 1877] } hitcount: 1 +- { id: sys_read [ 0], common_pid: gdbus [ 2976] } hitcount: 1 +- { id: sys_read [ 0], common_pid: console-kit-dae [ 3400] } hitcount: 1 +- { id: sys_read [ 0], common_pid: postgres [ 1865] } hitcount: 1 +- { id: sys_read [ 0], common_pid: deja-dup-monito [ 3543] } hitcount: 2 +- { id: sys_read [ 0], common_pid: NetworkManager [ 890] } hitcount: 2 +- { id: sys_read [ 0], common_pid: evolution-calen [ 3048] } hitcount: 2 +- { id: sys_read [ 0], common_pid: postgres [ 1864] } hitcount: 2 +- { id: sys_read [ 0], common_pid: nm-applet [ 3022] } hitcount: 2 +- { id: sys_read [ 0], common_pid: whoopsie [ 1212] } hitcount: 2 +- . +- . +- . +- { id: sys_ioctl [ 16], common_pid: bash [ 8479] } hitcount: 1 +- { id: sys_ioctl [ 16], common_pid: bash [ 3472] } hitcount: 12 +- { id: sys_ioctl [ 16], common_pid: gnome-terminal [ 3199] } hitcount: 16 +- { id: sys_ioctl [ 16], common_pid: Xorg [ 1267] } hitcount: 1808 +- { id: sys_ioctl [ 16], common_pid: compiz [ 2994] } hitcount: 5580 +- . +- . +- . +- { id: sys_waitid [247], common_pid: upstart-dbus-br [ 2690] } hitcount: 3 +- { id: sys_waitid [247], common_pid: upstart-dbus-br [ 2688] } hitcount: 16 +- { id: sys_inotify_add_watch [254], common_pid: gmain [ 975] } hitcount: 2 +- { id: sys_inotify_add_watch [254], common_pid: gmain [ 3204] } hitcount: 4 +- { id: sys_inotify_add_watch [254], common_pid: gmain [ 2888] } hitcount: 4 +- { id: sys_inotify_add_watch [254], common_pid: gmain [ 3003] } hitcount: 4 +- { id: sys_inotify_add_watch [254], common_pid: gmain [ 2873] } hitcount: 4 +- { id: sys_inotify_add_watch [254], common_pid: gmain [ 3196] } hitcount: 6 +- { id: sys_openat [257], common_pid: java [ 2623] } hitcount: 2 +- { id: sys_eventfd2 [290], common_pid: ibus-ui-gtk3 [ 2760] } hitcount: 4 +- { id: sys_eventfd2 [290], common_pid: compiz [ 2994] } hitcount: 6 +- +- Totals: +- Hits: 31536 +- Entries: 323 +- Dropped: 0 +- +- The above list does give us a breakdown of the ioctl syscall by +- pid, but it also gives us quite a bit more than that, which we +- don't really care about at the moment. Since we know the syscall +- id for sys_ioctl (16, displayed next to the sys_ioctl name), we +- can use that to filter out all the other syscalls: +- +- # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount if id == 16' > \ +- /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger +- +- # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist +- # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 if id == 16 [active] +- +- { id: sys_ioctl [ 16], common_pid: gmain [ 2769] } hitcount: 1 +- { id: sys_ioctl [ 16], common_pid: evolution-addre [ 8571] } hitcount: 1 +- { id: sys_ioctl [ 16], common_pid: gmain [ 3003] } hitcount: 1 +- { id: sys_ioctl [ 16], common_pid: gmain [ 2781] } hitcount: 1 +- { id: sys_ioctl [ 16], common_pid: gmain [ 2829] } hitcount: 1 +- { id: sys_ioctl [ 16], common_pid: bash [ 8726] } hitcount: 1 +- { id: sys_ioctl [ 16], common_pid: bash [ 8508] } hitcount: 1 +- { id: sys_ioctl [ 16], common_pid: gmain [ 2970] } hitcount: 1 +- { id: sys_ioctl [ 16], common_pid: gmain [ 2768] } hitcount: 1 +- . +- . +- . +- { id: sys_ioctl [ 16], common_pid: pool [ 8559] } hitcount: 45 +- { id: sys_ioctl [ 16], common_pid: pool [ 8555] } hitcount: 48 +- { id: sys_ioctl [ 16], common_pid: pool [ 8551] } hitcount: 48 +- { id: sys_ioctl [ 16], common_pid: avahi-daemon [ 896] } hitcount: 66 +- { id: sys_ioctl [ 16], common_pid: Xorg [ 1267] } hitcount: 26674 +- { id: sys_ioctl [ 16], common_pid: compiz [ 2994] } hitcount: 73443 +- +- Totals: +- Hits: 101162 +- Entries: 103 +- Dropped: 0 +- +- The above output shows that 'compiz' and 'Xorg' are far and away +- the heaviest ioctl callers (which might lead to questions about +- whether they really need to be making all those calls and to +- possible avenues for further investigation.) +- +- The compound key examples used a key and a sum value (hitcount) to +- sort the output, but we can just as easily use two keys instead. +- Here's an example where we use a compound key composed of the the +- common_pid and size event fields. Sorting with pid as the primary +- key and 'size' as the secondary key allows us to display an +- ordered summary of the recvfrom sizes, with counts, received by +- each process: +- +- # echo 'hist:key=common_pid.execname,size:val=hitcount:sort=common_pid,size' > \ +- /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/trigger +- +- # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/hist +- # trigger info: hist:keys=common_pid.execname,size:vals=hitcount:sort=common_pid.execname,size:size=2048 [active] +- +- { common_pid: smbd [ 784], size: 4 } hitcount: 1 +- { common_pid: dnsmasq [ 1412], size: 4096 } hitcount: 672 +- { common_pid: postgres [ 1796], size: 1000 } hitcount: 6 +- { common_pid: postgres [ 1867], size: 1000 } hitcount: 10 +- { common_pid: bamfdaemon [ 2787], size: 28 } hitcount: 2 +- { common_pid: bamfdaemon [ 2787], size: 14360 } hitcount: 1 +- { common_pid: compiz [ 2994], size: 8 } hitcount: 1 +- { common_pid: compiz [ 2994], size: 20 } hitcount: 11 +- { common_pid: gnome-terminal [ 3199], size: 4 } hitcount: 2 +- { common_pid: firefox [ 8817], size: 4 } hitcount: 1 +- { common_pid: firefox [ 8817], size: 8 } hitcount: 5 +- { common_pid: firefox [ 8817], size: 588 } hitcount: 2 +- { common_pid: firefox [ 8817], size: 628 } hitcount: 1 +- { common_pid: firefox [ 8817], size: 6944 } hitcount: 1 +- { common_pid: firefox [ 8817], size: 408880 } hitcount: 2 +- { common_pid: firefox [ 8822], size: 8 } hitcount: 2 +- { common_pid: firefox [ 8822], size: 160 } hitcount: 2 +- { common_pid: firefox [ 8822], size: 320 } hitcount: 2 +- { common_pid: firefox [ 8822], size: 352 } hitcount: 1 +- . +- . +- . +- { common_pid: pool [ 8923], size: 1960 } hitcount: 10 +- { common_pid: pool [ 8923], size: 2048 } hitcount: 10 +- { common_pid: pool [ 8924], size: 1960 } hitcount: 10 +- { common_pid: pool [ 8924], size: 2048 } hitcount: 10 +- { common_pid: pool [ 8928], size: 1964 } hitcount: 4 +- { common_pid: pool [ 8928], size: 1965 } hitcount: 2 +- { common_pid: pool [ 8928], size: 2048 } hitcount: 6 +- { common_pid: pool [ 8929], size: 1982 } hitcount: 1 +- { common_pid: pool [ 8929], size: 2048 } hitcount: 1 +- +- Totals: +- Hits: 2016 +- Entries: 224 +- Dropped: 0 +- +- The above example also illustrates the fact that although a compound +- key is treated as a single entity for hashing purposes, the sub-keys +- it's composed of can be accessed independently. +- +- The next example uses a string field as the hash key and +- demonstrates how you can manually pause and continue a hist trigger. +- In this example, we'll aggregate fork counts and don't expect a +- large number of entries in the hash table, so we'll drop it to a +- much smaller number, say 256: +- +- # echo 'hist:key=child_comm:val=hitcount:size=256' > \ +- /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger +- +- # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist +- # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active] +- +- { child_comm: dconf worker } hitcount: 1 +- { child_comm: ibus-daemon } hitcount: 1 +- { child_comm: whoopsie } hitcount: 1 +- { child_comm: smbd } hitcount: 1 +- { child_comm: gdbus } hitcount: 1 +- { child_comm: kthreadd } hitcount: 1 +- { child_comm: dconf worker } hitcount: 1 +- { child_comm: evolution-alarm } hitcount: 2 +- { child_comm: Socket Thread } hitcount: 2 +- { child_comm: postgres } hitcount: 2 +- { child_comm: bash } hitcount: 3 +- { child_comm: compiz } hitcount: 3 +- { child_comm: evolution-sourc } hitcount: 4 +- { child_comm: dhclient } hitcount: 4 +- { child_comm: pool } hitcount: 5 +- { child_comm: nm-dispatcher.a } hitcount: 8 +- { child_comm: firefox } hitcount: 8 +- { child_comm: dbus-daemon } hitcount: 8 +- { child_comm: glib-pacrunner } hitcount: 10 +- { child_comm: evolution } hitcount: 23 +- +- Totals: +- Hits: 89 +- Entries: 20 +- Dropped: 0 +- +- If we want to pause the hist trigger, we can simply append :pause to +- the command that started the trigger. Notice that the trigger info +- displays as [paused]: +- +- # echo 'hist:key=child_comm:val=hitcount:size=256:pause' >> \ +- /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger +- +- # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist +- # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [paused] +- +- { child_comm: dconf worker } hitcount: 1 +- { child_comm: kthreadd } hitcount: 1 +- { child_comm: dconf worker } hitcount: 1 +- { child_comm: gdbus } hitcount: 1 +- { child_comm: ibus-daemon } hitcount: 1 +- { child_comm: Socket Thread } hitcount: 2 +- { child_comm: evolution-alarm } hitcount: 2 +- { child_comm: smbd } hitcount: 2 +- { child_comm: bash } hitcount: 3 +- { child_comm: whoopsie } hitcount: 3 +- { child_comm: compiz } hitcount: 3 +- { child_comm: evolution-sourc } hitcount: 4 +- { child_comm: pool } hitcount: 5 +- { child_comm: postgres } hitcount: 6 +- { child_comm: firefox } hitcount: 8 +- { child_comm: dhclient } hitcount: 10 +- { child_comm: emacs } hitcount: 12 +- { child_comm: dbus-daemon } hitcount: 20 +- { child_comm: nm-dispatcher.a } hitcount: 20 +- { child_comm: evolution } hitcount: 35 +- { child_comm: glib-pacrunner } hitcount: 59 +- +- Totals: +- Hits: 199 +- Entries: 21 +- Dropped: 0 +- +- To manually continue having the trigger aggregate events, append +- :cont instead. Notice that the trigger info displays as [active] +- again, and the data has changed: +- +- # echo 'hist:key=child_comm:val=hitcount:size=256:cont' >> \ +- /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger +- +- # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist +- # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active] +- +- { child_comm: dconf worker } hitcount: 1 +- { child_comm: dconf worker } hitcount: 1 +- { child_comm: kthreadd } hitcount: 1 +- { child_comm: gdbus } hitcount: 1 +- { child_comm: ibus-daemon } hitcount: 1 +- { child_comm: Socket Thread } hitcount: 2 +- { child_comm: evolution-alarm } hitcount: 2 +- { child_comm: smbd } hitcount: 2 +- { child_comm: whoopsie } hitcount: 3 +- { child_comm: compiz } hitcount: 3 +- { child_comm: evolution-sourc } hitcount: 4 +- { child_comm: bash } hitcount: 5 +- { child_comm: pool } hitcount: 5 +- { child_comm: postgres } hitcount: 6 +- { child_comm: firefox } hitcount: 8 +- { child_comm: dhclient } hitcount: 11 +- { child_comm: emacs } hitcount: 12 +- { child_comm: dbus-daemon } hitcount: 22 +- { child_comm: nm-dispatcher.a } hitcount: 22 +- { child_comm: evolution } hitcount: 35 +- { child_comm: glib-pacrunner } hitcount: 59 +- +- Totals: +- Hits: 206 +- Entries: 21 +- Dropped: 0 +- +- The previous example showed how to start and stop a hist trigger by +- appending 'pause' and 'continue' to the hist trigger command. A +- hist trigger can also be started in a paused state by initially +- starting the trigger with ':pause' appended. This allows you to +- start the trigger only when you're ready to start collecting data +- and not before. For example, you could start the trigger in a +- paused state, then unpause it and do something you want to measure, +- then pause the trigger again when done. +- +- Of course, doing this manually can be difficult and error-prone, but +- it is possible to automatically start and stop a hist trigger based +- on some condition, via the enable_hist and disable_hist triggers. +- +- For example, suppose we wanted to take a look at the relative +- weights in terms of skb length for each callpath that leads to a +- netif_receieve_skb event when downloading a decent-sized file using +- wget. +- +- First we set up an initially paused stacktrace trigger on the +- netif_receive_skb event: +- +- # echo 'hist:key=stacktrace:vals=len:pause' > \ +- /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger +- +- Next, we set up an 'enable_hist' trigger on the sched_process_exec +- event, with an 'if filename==/usr/bin/wget' filter. The effect of +- this new trigger is that it will 'unpause' the hist trigger we just +- set up on netif_receive_skb if and only if it sees a +- sched_process_exec event with a filename of '/usr/bin/wget'. When +- that happens, all netif_receive_skb events are aggregated into a +- hash table keyed on stacktrace: +- +- # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \ +- /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger +- +- The aggregation continues until the netif_receive_skb is paused +- again, which is what the following disable_hist event does by +- creating a similar setup on the sched_process_exit event, using the +- filter 'comm==wget': +- +- # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \ +- /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger +- +- Whenever a process exits and the comm field of the disable_hist +- trigger filter matches 'comm==wget', the netif_receive_skb hist +- trigger is disabled. +- +- The overall effect is that netif_receive_skb events are aggregated +- into the hash table for only the duration of the wget. Executing a +- wget command and then listing the 'hist' file will display the +- output generated by the wget command: +- +- $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz +- +- # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist +- # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused] +- +- { stacktrace: +- __netif_receive_skb_core+0x46d/0x990 +- __netif_receive_skb+0x18/0x60 +- netif_receive_skb_internal+0x23/0x90 +- napi_gro_receive+0xc8/0x100 +- ieee80211_deliver_skb+0xd6/0x270 [mac80211] +- ieee80211_rx_handlers+0xccf/0x22f0 [mac80211] +- ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211] +- ieee80211_rx+0x31d/0x900 [mac80211] +- iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm] +- iwl_rx_dispatch+0x8e/0xf0 [iwldvm] +- iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi] +- irq_thread_fn+0x20/0x50 +- irq_thread+0x11f/0x150 +- kthread+0xd2/0xf0 +- ret_from_fork+0x42/0x70 +- } hitcount: 85 len: 28884 +- { stacktrace: +- __netif_receive_skb_core+0x46d/0x990 +- __netif_receive_skb+0x18/0x60 +- netif_receive_skb_internal+0x23/0x90 +- napi_gro_complete+0xa4/0xe0 +- dev_gro_receive+0x23a/0x360 +- napi_gro_receive+0x30/0x100 +- ieee80211_deliver_skb+0xd6/0x270 [mac80211] +- ieee80211_rx_handlers+0xccf/0x22f0 [mac80211] +- ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211] +- ieee80211_rx+0x31d/0x900 [mac80211] +- iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm] +- iwl_rx_dispatch+0x8e/0xf0 [iwldvm] +- iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi] +- irq_thread_fn+0x20/0x50 +- irq_thread+0x11f/0x150 +- kthread+0xd2/0xf0 +- } hitcount: 98 len: 664329 +- { stacktrace: +- __netif_receive_skb_core+0x46d/0x990 +- __netif_receive_skb+0x18/0x60 +- process_backlog+0xa8/0x150 +- net_rx_action+0x15d/0x340 +- __do_softirq+0x114/0x2c0 +- do_softirq_own_stack+0x1c/0x30 +- do_softirq+0x65/0x70 +- __local_bh_enable_ip+0xb5/0xc0 +- ip_finish_output+0x1f4/0x840 +- ip_output+0x6b/0xc0 +- ip_local_out_sk+0x31/0x40 +- ip_send_skb+0x1a/0x50 +- udp_send_skb+0x173/0x2a0 +- udp_sendmsg+0x2bf/0x9f0 +- inet_sendmsg+0x64/0xa0 +- sock_sendmsg+0x3d/0x50 +- } hitcount: 115 len: 13030 +- { stacktrace: +- __netif_receive_skb_core+0x46d/0x990 +- __netif_receive_skb+0x18/0x60 +- netif_receive_skb_internal+0x23/0x90 +- napi_gro_complete+0xa4/0xe0 +- napi_gro_flush+0x6d/0x90 +- iwl_pcie_irq_handler+0x92a/0x12f0 [iwlwifi] +- irq_thread_fn+0x20/0x50 +- irq_thread+0x11f/0x150 +- kthread+0xd2/0xf0 +- ret_from_fork+0x42/0x70 +- } hitcount: 934 len: 5512212 +- +- Totals: +- Hits: 1232 +- Entries: 4 +- Dropped: 0 +- +- The above shows all the netif_receive_skb callpaths and their total +- lengths for the duration of the wget command. +- +- The 'clear' hist trigger param can be used to clear the hash table. +- Suppose we wanted to try another run of the previous example but +- this time also wanted to see the complete list of events that went +- into the histogram. In order to avoid having to set everything up +- again, we can just clear the histogram first: +- +- # echo 'hist:key=stacktrace:vals=len:clear' >> \ +- /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger +- +- Just to verify that it is in fact cleared, here's what we now see in +- the hist file: +- +- # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist +- # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused] +- +- Totals: +- Hits: 0 +- Entries: 0 +- Dropped: 0 +- +- Since we want to see the detailed list of every netif_receive_skb +- event occurring during the new run, which are in fact the same +- events being aggregated into the hash table, we add some additional +- 'enable_event' events to the triggering sched_process_exec and +- sched_process_exit events as such: +- +- # echo 'enable_event:net:netif_receive_skb if filename==/usr/bin/wget' > \ +- /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger +- +- # echo 'disable_event:net:netif_receive_skb if comm==wget' > \ +- /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger +- +- If you read the trigger files for the sched_process_exec and +- sched_process_exit triggers, you should see two triggers for each: +- one enabling/disabling the hist aggregation and the other +- enabling/disabling the logging of events: +- +- # cat /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger +- enable_event:net:netif_receive_skb:unlimited if filename==/usr/bin/wget +- enable_hist:net:netif_receive_skb:unlimited if filename==/usr/bin/wget +- +- # cat /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger +- enable_event:net:netif_receive_skb:unlimited if comm==wget +- disable_hist:net:netif_receive_skb:unlimited if comm==wget +- +- In other words, whenever either of the sched_process_exec or +- sched_process_exit events is hit and matches 'wget', it enables or +- disables both the histogram and the event log, and what you end up +- with is a hash table and set of events just covering the specified +- duration. Run the wget command again: +- +- $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz +- +- Displaying the 'hist' file should show something similar to what you +- saw in the last run, but this time you should also see the +- individual events in the trace file: +- +- # cat /sys/kernel/debug/tracing/trace +- +- # tracer: nop +- # +- # entries-in-buffer/entries-written: 183/1426 #P:4 +- # +- # _-----=> irqs-off +- # / _----=> need-resched +- # | / _---=> hardirq/softirq +- # || / _--=> preempt-depth +- # ||| / delay +- # TASK-PID CPU# |||| TIMESTAMP FUNCTION +- # | | | |||| | | +- wget-15108 [000] ..s1 31769.606929: netif_receive_skb: dev=lo skbaddr=ffff88009c353100 len=60 +- wget-15108 [000] ..s1 31769.606999: netif_receive_skb: dev=lo skbaddr=ffff88009c353200 len=60 +- dnsmasq-1382 [000] ..s1 31769.677652: netif_receive_skb: dev=lo skbaddr=ffff88009c352b00 len=130 +- dnsmasq-1382 [000] ..s1 31769.685917: netif_receive_skb: dev=lo skbaddr=ffff88009c352200 len=138 +- ##### CPU 2 buffer started #### +- irq/29-iwlwifi-559 [002] ..s. 31772.031529: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433d00 len=2948 +- irq/29-iwlwifi-559 [002] ..s. 31772.031572: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432200 len=1500 +- irq/29-iwlwifi-559 [002] ..s. 31772.032196: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433100 len=2948 +- irq/29-iwlwifi-559 [002] ..s. 31772.032761: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433000 len=2948 +- irq/29-iwlwifi-559 [002] ..s. 31772.033220: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432e00 len=1500 +- . +- . +- . +- +- The following example demonstrates how multiple hist triggers can be +- attached to a given event. This capability can be useful for +- creating a set of different summaries derived from the same set of +- events, or for comparing the effects of different filters, among +- other things. +- +- # echo 'hist:keys=skbaddr.hex:vals=len if len < 0' >> \ +- /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger +- # echo 'hist:keys=skbaddr.hex:vals=len if len > 4096' >> \ +- /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger +- # echo 'hist:keys=skbaddr.hex:vals=len if len == 256' >> \ +- /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger +- # echo 'hist:keys=skbaddr.hex:vals=len' >> \ +- /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger +- # echo 'hist:keys=len:vals=common_preempt_count' >> \ +- /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger +- +- The above set of commands create four triggers differing only in +- their filters, along with a completely different though fairly +- nonsensical trigger. Note that in order to append multiple hist +- triggers to the same file, you should use the '>>' operator to +- append them ('>' will also add the new hist trigger, but will remove +- any existing hist triggers beforehand). +- +- Displaying the contents of the 'hist' file for the event shows the +- contents of all five histograms: +- +- # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist +- +- # event histogram +- # +- # trigger info: hist:keys=len:vals=hitcount,common_preempt_count:sort=hitcount:size=2048 [active] +- # +- +- { len: 176 } hitcount: 1 common_preempt_count: 0 +- { len: 223 } hitcount: 1 common_preempt_count: 0 +- { len: 4854 } hitcount: 1 common_preempt_count: 0 +- { len: 395 } hitcount: 1 common_preempt_count: 0 +- { len: 177 } hitcount: 1 common_preempt_count: 0 +- { len: 446 } hitcount: 1 common_preempt_count: 0 +- { len: 1601 } hitcount: 1 common_preempt_count: 0 +- . +- . +- . +- { len: 1280 } hitcount: 66 common_preempt_count: 0 +- { len: 116 } hitcount: 81 common_preempt_count: 40 +- { len: 708 } hitcount: 112 common_preempt_count: 0 +- { len: 46 } hitcount: 221 common_preempt_count: 0 +- { len: 1264 } hitcount: 458 common_preempt_count: 0 +- +- Totals: +- Hits: 1428 +- Entries: 147 +- Dropped: 0 +- +- +- # event histogram +- # +- # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active] +- # +- +- { skbaddr: ffff8800baee5e00 } hitcount: 1 len: 130 +- { skbaddr: ffff88005f3d5600 } hitcount: 1 len: 1280 +- { skbaddr: ffff88005f3d4900 } hitcount: 1 len: 1280 +- { skbaddr: ffff88009fed6300 } hitcount: 1 len: 115 +- { skbaddr: ffff88009fe0ad00 } hitcount: 1 len: 115 +- { skbaddr: ffff88008cdb1900 } hitcount: 1 len: 46 +- { skbaddr: ffff880064b5ef00 } hitcount: 1 len: 118 +- { skbaddr: ffff880044e3c700 } hitcount: 1 len: 60 +- { skbaddr: ffff880100065900 } hitcount: 1 len: 46 +- { skbaddr: ffff8800d46bd500 } hitcount: 1 len: 116 +- { skbaddr: ffff88005f3d5f00 } hitcount: 1 len: 1280 +- { skbaddr: ffff880100064700 } hitcount: 1 len: 365 +- { skbaddr: ffff8800badb6f00 } hitcount: 1 len: 60 +- . +- . +- . +- { skbaddr: ffff88009fe0be00 } hitcount: 27 len: 24677 +- { skbaddr: ffff88009fe0a400 } hitcount: 27 len: 23052 +- { skbaddr: ffff88009fe0b700 } hitcount: 31 len: 25589 +- { skbaddr: ffff88009fe0b600 } hitcount: 32 len: 27326 +- { skbaddr: ffff88006a462800 } hitcount: 68 len: 71678 +- { skbaddr: ffff88006a463700 } hitcount: 70 len: 72678 +- { skbaddr: ffff88006a462b00 } hitcount: 71 len: 77589 +- { skbaddr: ffff88006a463600 } hitcount: 73 len: 71307 +- { skbaddr: ffff88006a462200 } hitcount: 81 len: 81032 +- +- Totals: +- Hits: 1451 +- Entries: 318 +- Dropped: 0 +- +- +- # event histogram +- # +- # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len == 256 [active] +- # +- +- +- Totals: +- Hits: 0 +- Entries: 0 +- Dropped: 0 +- +- +- # event histogram +- # +- # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len > 4096 [active] +- # +- +- { skbaddr: ffff88009fd2c300 } hitcount: 1 len: 7212 +- { skbaddr: ffff8800d2bcce00 } hitcount: 1 len: 7212 +- { skbaddr: ffff8800d2bcd700 } hitcount: 1 len: 7212 +- { skbaddr: ffff8800d2bcda00 } hitcount: 1 len: 21492 +- { skbaddr: ffff8800ae2e2d00 } hitcount: 1 len: 7212 +- { skbaddr: ffff8800d2bcdb00 } hitcount: 1 len: 7212 +- { skbaddr: ffff88006a4df500 } hitcount: 1 len: 4854 +- { skbaddr: ffff88008ce47b00 } hitcount: 1 len: 18636 +- { skbaddr: ffff8800ae2e2200 } hitcount: 1 len: 12924 +- { skbaddr: ffff88005f3e1000 } hitcount: 1 len: 4356 +- { skbaddr: ffff8800d2bcdc00 } hitcount: 2 len: 24420 +- { skbaddr: ffff8800d2bcc200 } hitcount: 2 len: 12996 +- +- Totals: +- Hits: 14 +- Entries: 12 +- Dropped: 0 +- +- +- # event histogram +- # +- # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len < 0 [active] +- # +- +- +- Totals: +- Hits: 0 +- Entries: 0 +- Dropped: 0 +- +- Named triggers can be used to have triggers share a common set of +- histogram data. This capability is mostly useful for combining the +- output of events generated by tracepoints contained inside inline +- functions, but names can be used in a hist trigger on any event. +- For example, these two triggers when hit will update the same 'len' +- field in the shared 'foo' histogram data: +- +- # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \ +- /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger +- # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \ +- /sys/kernel/debug/tracing/events/net/netif_rx/trigger +- +- You can see that they're updating common histogram data by reading +- each event's hist files at the same time: +- +- # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist; +- cat /sys/kernel/debug/tracing/events/net/netif_rx/hist +- +- # event histogram +- # +- # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active] +- # +- +- { skbaddr: ffff88000ad53500 } hitcount: 1 len: 46 +- { skbaddr: ffff8800af5a1500 } hitcount: 1 len: 76 +- { skbaddr: ffff8800d62a1900 } hitcount: 1 len: 46 +- { skbaddr: ffff8800d2bccb00 } hitcount: 1 len: 468 +- { skbaddr: ffff8800d3c69900 } hitcount: 1 len: 46 +- { skbaddr: ffff88009ff09100 } hitcount: 1 len: 52 +- { skbaddr: ffff88010f13ab00 } hitcount: 1 len: 168 +- { skbaddr: ffff88006a54f400 } hitcount: 1 len: 46 +- { skbaddr: ffff8800d2bcc500 } hitcount: 1 len: 260 +- { skbaddr: ffff880064505000 } hitcount: 1 len: 46 +- { skbaddr: ffff8800baf24e00 } hitcount: 1 len: 32 +- { skbaddr: ffff88009fe0ad00 } hitcount: 1 len: 46 +- { skbaddr: ffff8800d3edff00 } hitcount: 1 len: 44 +- { skbaddr: ffff88009fe0b400 } hitcount: 1 len: 168 +- { skbaddr: ffff8800a1c55a00 } hitcount: 1 len: 40 +- { skbaddr: ffff8800d2bcd100 } hitcount: 1 len: 40 +- { skbaddr: ffff880064505f00 } hitcount: 1 len: 174 +- { skbaddr: ffff8800a8bff200 } hitcount: 1 len: 160 +- { skbaddr: ffff880044e3cc00 } hitcount: 1 len: 76 +- { skbaddr: ffff8800a8bfe700 } hitcount: 1 len: 46 +- { skbaddr: ffff8800d2bcdc00 } hitcount: 1 len: 32 +- { skbaddr: ffff8800a1f64800 } hitcount: 1 len: 46 +- { skbaddr: ffff8800d2bcde00 } hitcount: 1 len: 988 +- { skbaddr: ffff88006a5dea00 } hitcount: 1 len: 46 +- { skbaddr: ffff88002e37a200 } hitcount: 1 len: 44 +- { skbaddr: ffff8800a1f32c00 } hitcount: 2 len: 676 +- { skbaddr: ffff88000ad52600 } hitcount: 2 len: 107 +- { skbaddr: ffff8800a1f91e00 } hitcount: 2 len: 92 +- { skbaddr: ffff8800af5a0200 } hitcount: 2 len: 142 +- { skbaddr: ffff8800d2bcc600 } hitcount: 2 len: 220 +- { skbaddr: ffff8800ba36f500 } hitcount: 2 len: 92 +- { skbaddr: ffff8800d021f800 } hitcount: 2 len: 92 +- { skbaddr: ffff8800a1f33600 } hitcount: 2 len: 675 +- { skbaddr: ffff8800a8bfff00 } hitcount: 3 len: 138 +- { skbaddr: ffff8800d62a1300 } hitcount: 3 len: 138 +- { skbaddr: ffff88002e37a100 } hitcount: 4 len: 184 +- { skbaddr: ffff880064504400 } hitcount: 4 len: 184 +- { skbaddr: ffff8800a8bfec00 } hitcount: 4 len: 184 +- { skbaddr: ffff88000ad53700 } hitcount: 5 len: 230 +- { skbaddr: ffff8800d2bcdb00 } hitcount: 5 len: 196 +- { skbaddr: ffff8800a1f90000 } hitcount: 6 len: 276 +- { skbaddr: ffff88006a54f900 } hitcount: 6 len: 276 +- +- Totals: +- Hits: 81 +- Entries: 42 +- Dropped: 0 +- # event histogram +- # +- # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active] +- # +- +- { skbaddr: ffff88000ad53500 } hitcount: 1 len: 46 +- { skbaddr: ffff8800af5a1500 } hitcount: 1 len: 76 +- { skbaddr: ffff8800d62a1900 } hitcount: 1 len: 46 +- { skbaddr: ffff8800d2bccb00 } hitcount: 1 len: 468 +- { skbaddr: ffff8800d3c69900 } hitcount: 1 len: 46 +- { skbaddr: ffff88009ff09100 } hitcount: 1 len: 52 +- { skbaddr: ffff88010f13ab00 } hitcount: 1 len: 168 +- { skbaddr: ffff88006a54f400 } hitcount: 1 len: 46 +- { skbaddr: ffff8800d2bcc500 } hitcount: 1 len: 260 +- { skbaddr: ffff880064505000 } hitcount: 1 len: 46 +- { skbaddr: ffff8800baf24e00 } hitcount: 1 len: 32 +- { skbaddr: ffff88009fe0ad00 } hitcount: 1 len: 46 +- { skbaddr: ffff8800d3edff00 } hitcount: 1 len: 44 +- { skbaddr: ffff88009fe0b400 } hitcount: 1 len: 168 +- { skbaddr: ffff8800a1c55a00 } hitcount: 1 len: 40 +- { skbaddr: ffff8800d2bcd100 } hitcount: 1 len: 40 +- { skbaddr: ffff880064505f00 } hitcount: 1 len: 174 +- { skbaddr: ffff8800a8bff200 } hitcount: 1 len: 160 +- { skbaddr: ffff880044e3cc00 } hitcount: 1 len: 76 +- { skbaddr: ffff8800a8bfe700 } hitcount: 1 len: 46 +- { skbaddr: ffff8800d2bcdc00 } hitcount: 1 len: 32 +- { skbaddr: ffff8800a1f64800 } hitcount: 1 len: 46 +- { skbaddr: ffff8800d2bcde00 } hitcount: 1 len: 988 +- { skbaddr: ffff88006a5dea00 } hitcount: 1 len: 46 +- { skbaddr: ffff88002e37a200 } hitcount: 1 len: 44 +- { skbaddr: ffff8800a1f32c00 } hitcount: 2 len: 676 +- { skbaddr: ffff88000ad52600 } hitcount: 2 len: 107 +- { skbaddr: ffff8800a1f91e00 } hitcount: 2 len: 92 +- { skbaddr: ffff8800af5a0200 } hitcount: 2 len: 142 +- { skbaddr: ffff8800d2bcc600 } hitcount: 2 len: 220 +- { skbaddr: ffff8800ba36f500 } hitcount: 2 len: 92 +- { skbaddr: ffff8800d021f800 } hitcount: 2 len: 92 +- { skbaddr: ffff8800a1f33600 } hitcount: 2 len: 675 +- { skbaddr: ffff8800a8bfff00 } hitcount: 3 len: 138 +- { skbaddr: ffff8800d62a1300 } hitcount: 3 len: 138 +- { skbaddr: ffff88002e37a100 } hitcount: 4 len: 184 +- { skbaddr: ffff880064504400 } hitcount: 4 len: 184 +- { skbaddr: ffff8800a8bfec00 } hitcount: 4 len: 184 +- { skbaddr: ffff88000ad53700 } hitcount: 5 len: 230 +- { skbaddr: ffff8800d2bcdb00 } hitcount: 5 len: 196 +- { skbaddr: ffff8800a1f90000 } hitcount: 6 len: 276 +- { skbaddr: ffff88006a54f900 } hitcount: 6 len: 276 +- +- Totals: +- Hits: 81 +- Entries: 42 +- Dropped: 0 +- +- And here's an example that shows how to combine histogram data from +- any two events even if they don't share any 'compatible' fields +- other than 'hitcount' and 'stacktrace'. These commands create a +- couple of triggers named 'bar' using those fields: +- +- # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \ +- /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger +- # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \ +- /sys/kernel/debug/tracing/events/net/netif_rx/trigger +- +- And displaying the output of either shows some interesting if +- somewhat confusing output: +- +- # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist +- # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist +- +- # event histogram +- # +- # trigger info: hist:name=bar:keys=stacktrace:vals=hitcount:sort=hitcount:size=2048 [active] +- # +- +- { stacktrace: +- _do_fork+0x18e/0x330 +- kernel_thread+0x29/0x30 +- kthreadd+0x154/0x1b0 +- ret_from_fork+0x3f/0x70 +- } hitcount: 1 +- { stacktrace: +- netif_rx_internal+0xb2/0xd0 +- netif_rx_ni+0x20/0x70 +- dev_loopback_xmit+0xaa/0xd0 +- ip_mc_output+0x126/0x240 +- ip_local_out_sk+0x31/0x40 +- igmp_send_report+0x1e9/0x230 +- igmp_timer_expire+0xe9/0x120 +- call_timer_fn+0x39/0xf0 +- run_timer_softirq+0x1e1/0x290 +- __do_softirq+0xfd/0x290 +- irq_exit+0x98/0xb0 +- smp_apic_timer_interrupt+0x4a/0x60 +- apic_timer_interrupt+0x6d/0x80 +- cpuidle_enter+0x17/0x20 +- call_cpuidle+0x3b/0x60 +- cpu_startup_entry+0x22d/0x310 +- } hitcount: 1 +- { stacktrace: +- netif_rx_internal+0xb2/0xd0 +- netif_rx_ni+0x20/0x70 +- dev_loopback_xmit+0xaa/0xd0 +- ip_mc_output+0x17f/0x240 +- ip_local_out_sk+0x31/0x40 +- ip_send_skb+0x1a/0x50 +- udp_send_skb+0x13e/0x270 +- udp_sendmsg+0x2bf/0x980 +- inet_sendmsg+0x67/0xa0 +- sock_sendmsg+0x38/0x50 +- SYSC_sendto+0xef/0x170 +- SyS_sendto+0xe/0x10 +- entry_SYSCALL_64_fastpath+0x12/0x6a +- } hitcount: 2 +- { stacktrace: +- netif_rx_internal+0xb2/0xd0 +- netif_rx+0x1c/0x60 +- loopback_xmit+0x6c/0xb0 +- dev_hard_start_xmit+0x219/0x3a0 +- __dev_queue_xmit+0x415/0x4f0 +- dev_queue_xmit_sk+0x13/0x20 +- ip_finish_output2+0x237/0x340 +- ip_finish_output+0x113/0x1d0 +- ip_output+0x66/0xc0 +- ip_local_out_sk+0x31/0x40 +- ip_send_skb+0x1a/0x50 +- udp_send_skb+0x16d/0x270 +- udp_sendmsg+0x2bf/0x980 +- inet_sendmsg+0x67/0xa0 +- sock_sendmsg+0x38/0x50 +- ___sys_sendmsg+0x14e/0x270 +- } hitcount: 76 +- { stacktrace: +- netif_rx_internal+0xb2/0xd0 +- netif_rx+0x1c/0x60 +- loopback_xmit+0x6c/0xb0 +- dev_hard_start_xmit+0x219/0x3a0 +- __dev_queue_xmit+0x415/0x4f0 +- dev_queue_xmit_sk+0x13/0x20 +- ip_finish_output2+0x237/0x340 +- ip_finish_output+0x113/0x1d0 +- ip_output+0x66/0xc0 +- ip_local_out_sk+0x31/0x40 +- ip_send_skb+0x1a/0x50 +- udp_send_skb+0x16d/0x270 +- udp_sendmsg+0x2bf/0x980 +- inet_sendmsg+0x67/0xa0 +- sock_sendmsg+0x38/0x50 +- ___sys_sendmsg+0x269/0x270 +- } hitcount: 77 +- { stacktrace: +- netif_rx_internal+0xb2/0xd0 +- netif_rx+0x1c/0x60 +- loopback_xmit+0x6c/0xb0 +- dev_hard_start_xmit+0x219/0x3a0 +- __dev_queue_xmit+0x415/0x4f0 +- dev_queue_xmit_sk+0x13/0x20 +- ip_finish_output2+0x237/0x340 +- ip_finish_output+0x113/0x1d0 +- ip_output+0x66/0xc0 +- ip_local_out_sk+0x31/0x40 +- ip_send_skb+0x1a/0x50 +- udp_send_skb+0x16d/0x270 +- udp_sendmsg+0x2bf/0x980 +- inet_sendmsg+0x67/0xa0 +- sock_sendmsg+0x38/0x50 +- SYSC_sendto+0xef/0x170 +- } hitcount: 88 +- { stacktrace: +- _do_fork+0x18e/0x330 +- SyS_clone+0x19/0x20 +- entry_SYSCALL_64_fastpath+0x12/0x6a +- } hitcount: 244 +- +- Totals: +- Hits: 489 +- Entries: 7 +- Dropped: 0 ++ See Documentation/trace/histogram.txt for details and examples. +--- /dev/null ++++ b/Documentation/trace/histogram.txt +@@ -0,0 +1,1568 @@ ++ Event Histograms ++ ++ Documentation written by Tom Zanussi ++ ++1. Introduction ++=============== ++ ++ Histogram triggers are special event triggers that can be used to ++ aggregate trace event data into histograms. For information on ++ trace events and event triggers, see Documentation/trace/events.txt. ++ ++ ++2. Histogram Trigger Command ++============================ ++ ++ A histogram trigger command is an event trigger command that ++ aggregates event hits into a hash table keyed on one or more trace ++ event format fields (or stacktrace) and a set of running totals ++ derived from one or more trace event format fields and/or event ++ counts (hitcount). ++ ++ The format of a hist trigger is as follows: ++ ++ hist:keys=[:values=] ++ [:sort=][:size=#entries][:pause][:continue] ++ [:clear][:name=histname1] [if ] ++ ++ When a matching event is hit, an entry is added to a hash table ++ using the key(s) and value(s) named. Keys and values correspond to ++ fields in the event's format description. Values must correspond to ++ numeric fields - on an event hit, the value(s) will be added to a ++ sum kept for that field. The special string 'hitcount' can be used ++ in place of an explicit value field - this is simply a count of ++ event hits. If 'values' isn't specified, an implicit 'hitcount' ++ value will be automatically created and used as the only value. ++ Keys can be any field, or the special string 'stacktrace', which ++ will use the event's kernel stacktrace as the key. The keywords ++ 'keys' or 'key' can be used to specify keys, and the keywords ++ 'values', 'vals', or 'val' can be used to specify values. Compound ++ keys consisting of up to two fields can be specified by the 'keys' ++ keyword. Hashing a compound key produces a unique entry in the ++ table for each unique combination of component keys, and can be ++ useful for providing more fine-grained summaries of event data. ++ Additionally, sort keys consisting of up to two fields can be ++ specified by the 'sort' keyword. If more than one field is ++ specified, the result will be a 'sort within a sort': the first key ++ is taken to be the primary sort key and the second the secondary ++ key. If a hist trigger is given a name using the 'name' parameter, ++ its histogram data will be shared with other triggers of the same ++ name, and trigger hits will update this common data. Only triggers ++ with 'compatible' fields can be combined in this way; triggers are ++ 'compatible' if the fields named in the trigger share the same ++ number and type of fields and those fields also have the same names. ++ Note that any two events always share the compatible 'hitcount' and ++ 'stacktrace' fields and can therefore be combined using those ++ fields, however pointless that may be. ++ ++ 'hist' triggers add a 'hist' file to each event's subdirectory. ++ Reading the 'hist' file for the event will dump the hash table in ++ its entirety to stdout. If there are multiple hist triggers ++ attached to an event, there will be a table for each trigger in the ++ output. The table displayed for a named trigger will be the same as ++ any other instance having the same name. Each printed hash table ++ entry is a simple list of the keys and values comprising the entry; ++ keys are printed first and are delineated by curly braces, and are ++ followed by the set of value fields for the entry. By default, ++ numeric fields are displayed as base-10 integers. This can be ++ modified by appending any of the following modifiers to the field ++ name: ++ ++ .hex display a number as a hex value ++ .sym display an address as a symbol ++ .sym-offset display an address as a symbol and offset ++ .syscall display a syscall id as a system call name ++ .execname display a common_pid as a program name ++ ++ Note that in general the semantics of a given field aren't ++ interpreted when applying a modifier to it, but there are some ++ restrictions to be aware of in this regard: ++ ++ - only the 'hex' modifier can be used for values (because values ++ are essentially sums, and the other modifiers don't make sense ++ in that context). ++ - the 'execname' modifier can only be used on a 'common_pid'. The ++ reason for this is that the execname is simply the 'comm' value ++ saved for the 'current' process when an event was triggered, ++ which is the same as the common_pid value saved by the event ++ tracing code. Trying to apply that comm value to other pid ++ values wouldn't be correct, and typically events that care save ++ pid-specific comm fields in the event itself. ++ ++ A typical usage scenario would be the following to enable a hist ++ trigger, read its current contents, and then turn it off: ++ ++ # echo 'hist:keys=skbaddr.hex:vals=len' > \ ++ /sys/kernel/debug/tracing/events/net/netif_rx/trigger ++ ++ # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist ++ ++ # echo '!hist:keys=skbaddr.hex:vals=len' > \ ++ /sys/kernel/debug/tracing/events/net/netif_rx/trigger ++ ++ The trigger file itself can be read to show the details of the ++ currently attached hist trigger. This information is also displayed ++ at the top of the 'hist' file when read. ++ ++ By default, the size of the hash table is 2048 entries. The 'size' ++ parameter can be used to specify more or fewer than that. The units ++ are in terms of hashtable entries - if a run uses more entries than ++ specified, the results will show the number of 'drops', the number ++ of hits that were ignored. The size should be a power of 2 between ++ 128 and 131072 (any non- power-of-2 number specified will be rounded ++ up). ++ ++ The 'sort' parameter can be used to specify a value field to sort ++ on. The default if unspecified is 'hitcount' and the default sort ++ order is 'ascending'. To sort in the opposite direction, append ++ .descending' to the sort key. ++ ++ The 'pause' parameter can be used to pause an existing hist trigger ++ or to start a hist trigger but not log any events until told to do ++ so. 'continue' or 'cont' can be used to start or restart a paused ++ hist trigger. ++ ++ The 'clear' parameter will clear the contents of a running hist ++ trigger and leave its current paused/active state. ++ ++ Note that the 'pause', 'cont', and 'clear' parameters should be ++ applied using 'append' shell operator ('>>') if applied to an ++ existing trigger, rather than via the '>' operator, which will cause ++ the trigger to be removed through truncation. ++ ++- enable_hist/disable_hist ++ ++ The enable_hist and disable_hist triggers can be used to have one ++ event conditionally start and stop another event's already-attached ++ hist trigger. Any number of enable_hist and disable_hist triggers ++ can be attached to a given event, allowing that event to kick off ++ and stop aggregations on a host of other events. ++ ++ The format is very similar to the enable/disable_event triggers: ++ ++ enable_hist::[:count] ++ disable_hist::[:count] ++ ++ Instead of enabling or disabling the tracing of the target event ++ into the trace buffer as the enable/disable_event triggers do, the ++ enable/disable_hist triggers enable or disable the aggregation of ++ the target event into a hash table. ++ ++ A typical usage scenario for the enable_hist/disable_hist triggers ++ would be to first set up a paused hist trigger on some event, ++ followed by an enable_hist/disable_hist pair that turns the hist ++ aggregation on and off when conditions of interest are hit: ++ ++ # echo 'hist:keys=skbaddr.hex:vals=len:pause' > \ ++ /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger ++ ++ # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \ ++ /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger ++ ++ # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \ ++ /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger ++ ++ The above sets up an initially paused hist trigger which is unpaused ++ and starts aggregating events when a given program is executed, and ++ which stops aggregating when the process exits and the hist trigger ++ is paused again. ++ ++ The examples below provide a more concrete illustration of the ++ concepts and typical usage patterns discussed above. ++ ++ ++6.2 'hist' trigger examples ++--------------------------- ++ ++ The first set of examples creates aggregations using the kmalloc ++ event. The fields that can be used for the hist trigger are listed ++ in the kmalloc event's format file: ++ ++ # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/format ++ name: kmalloc ++ ID: 374 ++ format: ++ field:unsigned short common_type; offset:0; size:2; signed:0; ++ field:unsigned char common_flags; offset:2; size:1; signed:0; ++ field:unsigned char common_preempt_count; offset:3; size:1; signed:0; ++ field:int common_pid; offset:4; size:4; signed:1; ++ ++ field:unsigned long call_site; offset:8; size:8; signed:0; ++ field:const void * ptr; offset:16; size:8; signed:0; ++ field:size_t bytes_req; offset:24; size:8; signed:0; ++ field:size_t bytes_alloc; offset:32; size:8; signed:0; ++ field:gfp_t gfp_flags; offset:40; size:4; signed:0; ++ ++ We'll start by creating a hist trigger that generates a simple table ++ that lists the total number of bytes requested for each function in ++ the kernel that made one or more calls to kmalloc: ++ ++ # echo 'hist:key=call_site:val=bytes_req' > \ ++ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger ++ ++ This tells the tracing system to create a 'hist' trigger using the ++ call_site field of the kmalloc event as the key for the table, which ++ just means that each unique call_site address will have an entry ++ created for it in the table. The 'val=bytes_req' parameter tells ++ the hist trigger that for each unique entry (call_site) in the ++ table, it should keep a running total of the number of bytes ++ requested by that call_site. ++ ++ We'll let it run for awhile and then dump the contents of the 'hist' ++ file in the kmalloc event's subdirectory (for readability, a number ++ of entries have been omitted): ++ ++ # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist ++ # trigger info: hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active] ++ ++ { call_site: 18446744072106379007 } hitcount: 1 bytes_req: 176 ++ { call_site: 18446744071579557049 } hitcount: 1 bytes_req: 1024 ++ { call_site: 18446744071580608289 } hitcount: 1 bytes_req: 16384 ++ { call_site: 18446744071581827654 } hitcount: 1 bytes_req: 24 ++ { call_site: 18446744071580700980 } hitcount: 1 bytes_req: 8 ++ { call_site: 18446744071579359876 } hitcount: 1 bytes_req: 152 ++ { call_site: 18446744071580795365 } hitcount: 3 bytes_req: 144 ++ { call_site: 18446744071581303129 } hitcount: 3 bytes_req: 144 ++ { call_site: 18446744071580713234 } hitcount: 4 bytes_req: 2560 ++ { call_site: 18446744071580933750 } hitcount: 4 bytes_req: 736 ++ . ++ . ++ . ++ { call_site: 18446744072106047046 } hitcount: 69 bytes_req: 5576 ++ { call_site: 18446744071582116407 } hitcount: 73 bytes_req: 2336 ++ { call_site: 18446744072106054684 } hitcount: 136 bytes_req: 140504 ++ { call_site: 18446744072106224230 } hitcount: 136 bytes_req: 19584 ++ { call_site: 18446744072106078074 } hitcount: 153 bytes_req: 2448 ++ { call_site: 18446744072106062406 } hitcount: 153 bytes_req: 36720 ++ { call_site: 18446744071582507929 } hitcount: 153 bytes_req: 37088 ++ { call_site: 18446744072102520590 } hitcount: 273 bytes_req: 10920 ++ { call_site: 18446744071582143559 } hitcount: 358 bytes_req: 716 ++ { call_site: 18446744072106465852 } hitcount: 417 bytes_req: 56712 ++ { call_site: 18446744072102523378 } hitcount: 485 bytes_req: 27160 ++ { call_site: 18446744072099568646 } hitcount: 1676 bytes_req: 33520 ++ ++ Totals: ++ Hits: 4610 ++ Entries: 45 ++ Dropped: 0 ++ ++ The output displays a line for each entry, beginning with the key ++ specified in the trigger, followed by the value(s) also specified in ++ the trigger. At the beginning of the output is a line that displays ++ the trigger info, which can also be displayed by reading the ++ 'trigger' file: ++ ++ # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger ++ hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active] ++ ++ At the end of the output are a few lines that display the overall ++ totals for the run. The 'Hits' field shows the total number of ++ times the event trigger was hit, the 'Entries' field shows the total ++ number of used entries in the hash table, and the 'Dropped' field ++ shows the number of hits that were dropped because the number of ++ used entries for the run exceeded the maximum number of entries ++ allowed for the table (normally 0, but if not a hint that you may ++ want to increase the size of the table using the 'size' parameter). ++ ++ Notice in the above output that there's an extra field, 'hitcount', ++ which wasn't specified in the trigger. Also notice that in the ++ trigger info output, there's a parameter, 'sort=hitcount', which ++ wasn't specified in the trigger either. The reason for that is that ++ every trigger implicitly keeps a count of the total number of hits ++ attributed to a given entry, called the 'hitcount'. That hitcount ++ information is explicitly displayed in the output, and in the ++ absence of a user-specified sort parameter, is used as the default ++ sort field. ++ ++ The value 'hitcount' can be used in place of an explicit value in ++ the 'values' parameter if you don't really need to have any ++ particular field summed and are mainly interested in hit ++ frequencies. ++ ++ To turn the hist trigger off, simply call up the trigger in the ++ command history and re-execute it with a '!' prepended: ++ ++ # echo '!hist:key=call_site:val=bytes_req' > \ ++ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger ++ ++ Finally, notice that the call_site as displayed in the output above ++ isn't really very useful. It's an address, but normally addresses ++ are displayed in hex. To have a numeric field displayed as a hex ++ value, simply append '.hex' to the field name in the trigger: ++ ++ # echo 'hist:key=call_site.hex:val=bytes_req' > \ ++ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger ++ ++ # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist ++ # trigger info: hist:keys=call_site.hex:vals=bytes_req:sort=hitcount:size=2048 [active] ++ ++ { call_site: ffffffffa026b291 } hitcount: 1 bytes_req: 433 ++ { call_site: ffffffffa07186ff } hitcount: 1 bytes_req: 176 ++ { call_site: ffffffff811ae721 } hitcount: 1 bytes_req: 16384 ++ { call_site: ffffffff811c5134 } hitcount: 1 bytes_req: 8 ++ { call_site: ffffffffa04a9ebb } hitcount: 1 bytes_req: 511 ++ { call_site: ffffffff8122e0a6 } hitcount: 1 bytes_req: 12 ++ { call_site: ffffffff8107da84 } hitcount: 1 bytes_req: 152 ++ { call_site: ffffffff812d8246 } hitcount: 1 bytes_req: 24 ++ { call_site: ffffffff811dc1e5 } hitcount: 3 bytes_req: 144 ++ { call_site: ffffffffa02515e8 } hitcount: 3 bytes_req: 648 ++ { call_site: ffffffff81258159 } hitcount: 3 bytes_req: 144 ++ { call_site: ffffffff811c80f4 } hitcount: 4 bytes_req: 544 ++ . ++ . ++ . ++ { call_site: ffffffffa06c7646 } hitcount: 106 bytes_req: 8024 ++ { call_site: ffffffffa06cb246 } hitcount: 132 bytes_req: 31680 ++ { call_site: ffffffffa06cef7a } hitcount: 132 bytes_req: 2112 ++ { call_site: ffffffff8137e399 } hitcount: 132 bytes_req: 23232 ++ { call_site: ffffffffa06c941c } hitcount: 185 bytes_req: 171360 ++ { call_site: ffffffffa06f2a66 } hitcount: 185 bytes_req: 26640 ++ { call_site: ffffffffa036a70e } hitcount: 265 bytes_req: 10600 ++ { call_site: ffffffff81325447 } hitcount: 292 bytes_req: 584 ++ { call_site: ffffffffa072da3c } hitcount: 446 bytes_req: 60656 ++ { call_site: ffffffffa036b1f2 } hitcount: 526 bytes_req: 29456 ++ { call_site: ffffffffa0099c06 } hitcount: 1780 bytes_req: 35600 ++ ++ Totals: ++ Hits: 4775 ++ Entries: 46 ++ Dropped: 0 ++ ++ Even that's only marginally more useful - while hex values do look ++ more like addresses, what users are typically more interested in ++ when looking at text addresses are the corresponding symbols ++ instead. To have an address displayed as symbolic value instead, ++ simply append '.sym' or '.sym-offset' to the field name in the ++ trigger: ++ ++ # echo 'hist:key=call_site.sym:val=bytes_req' > \ ++ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger ++ ++ # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist ++ # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=hitcount:size=2048 [active] ++ ++ { call_site: [ffffffff810adcb9] syslog_print_all } hitcount: 1 bytes_req: 1024 ++ { call_site: [ffffffff8154bc62] usb_control_msg } hitcount: 1 bytes_req: 8 ++ { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid] } hitcount: 1 bytes_req: 7 ++ { call_site: [ffffffff8154acbe] usb_alloc_urb } hitcount: 1 bytes_req: 192 ++ { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid] } hitcount: 1 bytes_req: 7 ++ { call_site: [ffffffff811e3a25] __seq_open_private } hitcount: 1 bytes_req: 40 ++ { call_site: [ffffffff8109524a] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 ++ { call_site: [ffffffff811febd5] fsnotify_alloc_group } hitcount: 2 bytes_req: 528 ++ { call_site: [ffffffff81440f58] __tty_buffer_request_room } hitcount: 2 bytes_req: 2624 ++ { call_site: [ffffffff81200ba6] inotify_new_group } hitcount: 2 bytes_req: 96 ++ { call_site: [ffffffffa05e19af] ieee80211_start_tx_ba_session [mac80211] } hitcount: 2 bytes_req: 464 ++ { call_site: [ffffffff81672406] tcp_get_metrics } hitcount: 2 bytes_req: 304 ++ { call_site: [ffffffff81097ec2] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 ++ { call_site: [ffffffff81089b05] sched_create_group } hitcount: 2 bytes_req: 1424 ++ . ++ . ++ . ++ { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915] } hitcount: 1185 bytes_req: 123240 ++ { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm] } hitcount: 1185 bytes_req: 104280 ++ { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915] } hitcount: 1402 bytes_req: 190672 ++ { call_site: [ffffffff812891ca] ext4_find_extent } hitcount: 1518 bytes_req: 146208 ++ { call_site: [ffffffffa029070e] drm_vma_node_allow [drm] } hitcount: 1746 bytes_req: 69840 ++ { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 2021 bytes_req: 792312 ++ { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm] } hitcount: 2592 bytes_req: 145152 ++ { call_site: [ffffffffa0489a66] intel_ring_begin [i915] } hitcount: 2629 bytes_req: 378576 ++ { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915] } hitcount: 2629 bytes_req: 3783248 ++ { call_site: [ffffffff81325607] apparmor_file_alloc_security } hitcount: 5192 bytes_req: 10384 ++ { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid] } hitcount: 5529 bytes_req: 110584 ++ { call_site: [ffffffff8131ebf7] aa_alloc_task_context } hitcount: 21943 bytes_req: 702176 ++ { call_site: [ffffffff8125847d] ext4_htree_store_dirent } hitcount: 55759 bytes_req: 5074265 ++ ++ Totals: ++ Hits: 109928 ++ Entries: 71 ++ Dropped: 0 ++ ++ Because the default sort key above is 'hitcount', the above shows a ++ the list of call_sites by increasing hitcount, so that at the bottom ++ we see the functions that made the most kmalloc calls during the ++ run. If instead we we wanted to see the top kmalloc callers in ++ terms of the number of bytes requested rather than the number of ++ calls, and we wanted the top caller to appear at the top, we can use ++ the 'sort' parameter, along with the 'descending' modifier: ++ ++ # echo 'hist:key=call_site.sym:val=bytes_req:sort=bytes_req.descending' > \ ++ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger ++ ++ # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist ++ # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=bytes_req.descending:size=2048 [active] ++ ++ { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915] } hitcount: 2186 bytes_req: 3397464 ++ { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 1790 bytes_req: 712176 ++ { call_site: [ffffffff8125847d] ext4_htree_store_dirent } hitcount: 8132 bytes_req: 513135 ++ { call_site: [ffffffff811e2a1b] seq_buf_alloc } hitcount: 106 bytes_req: 440128 ++ { call_site: [ffffffffa0489a66] intel_ring_begin [i915] } hitcount: 2186 bytes_req: 314784 ++ { call_site: [ffffffff812891ca] ext4_find_extent } hitcount: 2174 bytes_req: 208992 ++ { call_site: [ffffffff811ae8e1] __kmalloc } hitcount: 8 bytes_req: 131072 ++ { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915] } hitcount: 859 bytes_req: 116824 ++ { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm] } hitcount: 1834 bytes_req: 102704 ++ { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915] } hitcount: 972 bytes_req: 101088 ++ { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm] } hitcount: 972 bytes_req: 85536 ++ { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid] } hitcount: 3333 bytes_req: 66664 ++ { call_site: [ffffffff8137e559] sg_kmalloc } hitcount: 209 bytes_req: 61632 ++ . ++ . ++ . ++ { call_site: [ffffffff81095225] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 ++ { call_site: [ffffffff81097ec2] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 ++ { call_site: [ffffffff812d8406] copy_semundo } hitcount: 2 bytes_req: 48 ++ { call_site: [ffffffff81200ba6] inotify_new_group } hitcount: 1 bytes_req: 48 ++ { call_site: [ffffffffa027121a] drm_getmagic [drm] } hitcount: 1 bytes_req: 48 ++ { call_site: [ffffffff811e3a25] __seq_open_private } hitcount: 1 bytes_req: 40 ++ { call_site: [ffffffff811c52f4] bprm_change_interp } hitcount: 2 bytes_req: 16 ++ { call_site: [ffffffff8154bc62] usb_control_msg } hitcount: 1 bytes_req: 8 ++ { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid] } hitcount: 1 bytes_req: 7 ++ { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid] } hitcount: 1 bytes_req: 7 ++ ++ Totals: ++ Hits: 32133 ++ Entries: 81 ++ Dropped: 0 ++ ++ To display the offset and size information in addition to the symbol ++ name, just use 'sym-offset' instead: ++ ++ # echo 'hist:key=call_site.sym-offset:val=bytes_req:sort=bytes_req.descending' > \ ++ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger ++ ++ # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist ++ # trigger info: hist:keys=call_site.sym-offset:vals=bytes_req:sort=bytes_req.descending:size=2048 [active] ++ ++ { call_site: [ffffffffa046041c] i915_gem_execbuffer2+0x6c/0x2c0 [i915] } hitcount: 4569 bytes_req: 3163720 ++ { call_site: [ffffffffa0489a66] intel_ring_begin+0xc6/0x1f0 [i915] } hitcount: 4569 bytes_req: 657936 ++ { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23+0x694/0x1020 [i915] } hitcount: 1519 bytes_req: 472936 ++ { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23+0x516/0x1020 [i915] } hitcount: 3050 bytes_req: 211832 ++ { call_site: [ffffffff811e2a1b] seq_buf_alloc+0x1b/0x50 } hitcount: 34 bytes_req: 148384 ++ { call_site: [ffffffffa04a580c] intel_crtc_page_flip+0xbc/0x870 [i915] } hitcount: 1385 bytes_req: 144040 ++ { call_site: [ffffffff811ae8e1] __kmalloc+0x191/0x1b0 } hitcount: 8 bytes_req: 131072 ++ { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl+0x282/0x360 [drm] } hitcount: 1385 bytes_req: 121880 ++ { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc+0x32/0x100 [drm] } hitcount: 1848 bytes_req: 103488 ++ { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state+0x2c/0xa0 [i915] } hitcount: 461 bytes_req: 62696 ++ { call_site: [ffffffffa029070e] drm_vma_node_allow+0x2e/0xd0 [drm] } hitcount: 1541 bytes_req: 61640 ++ { call_site: [ffffffff815f8d7b] sk_prot_alloc+0xcb/0x1b0 } hitcount: 57 bytes_req: 57456 ++ . ++ . ++ . ++ { call_site: [ffffffff8109524a] alloc_fair_sched_group+0x5a/0x1a0 } hitcount: 2 bytes_req: 128 ++ { call_site: [ffffffffa027b921] drm_vm_open_locked+0x31/0xa0 [drm] } hitcount: 3 bytes_req: 96 ++ { call_site: [ffffffff8122e266] proc_self_follow_link+0x76/0xb0 } hitcount: 8 bytes_req: 96 ++ { call_site: [ffffffff81213e80] load_elf_binary+0x240/0x1650 } hitcount: 3 bytes_req: 84 ++ { call_site: [ffffffff8154bc62] usb_control_msg+0x42/0x110 } hitcount: 1 bytes_req: 8 ++ { call_site: [ffffffffa00bf6fe] hidraw_send_report+0x7e/0x1a0 [hid] } hitcount: 1 bytes_req: 7 ++ { call_site: [ffffffffa00bf1ca] hidraw_report_event+0x8a/0x120 [hid] } hitcount: 1 bytes_req: 7 ++ ++ Totals: ++ Hits: 26098 ++ Entries: 64 ++ Dropped: 0 ++ ++ We can also add multiple fields to the 'values' parameter. For ++ example, we might want to see the total number of bytes allocated ++ alongside bytes requested, and display the result sorted by bytes ++ allocated in a descending order: ++ ++ # echo 'hist:keys=call_site.sym:values=bytes_req,bytes_alloc:sort=bytes_alloc.descending' > \ ++ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger ++ ++ # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist ++ # trigger info: hist:keys=call_site.sym:vals=bytes_req,bytes_alloc:sort=bytes_alloc.descending:size=2048 [active] ++ ++ { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915] } hitcount: 7403 bytes_req: 4084360 bytes_alloc: 5958016 ++ { call_site: [ffffffff811e2a1b] seq_buf_alloc } hitcount: 541 bytes_req: 2213968 bytes_alloc: 2228224 ++ { call_site: [ffffffffa0489a66] intel_ring_begin [i915] } hitcount: 7404 bytes_req: 1066176 bytes_alloc: 1421568 ++ { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 1565 bytes_req: 557368 bytes_alloc: 1037760 ++ { call_site: [ffffffff8125847d] ext4_htree_store_dirent } hitcount: 9557 bytes_req: 595778 bytes_alloc: 695744 ++ { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 5839 bytes_req: 430680 bytes_alloc: 470400 ++ { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915] } hitcount: 2388 bytes_req: 324768 bytes_alloc: 458496 ++ { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm] } hitcount: 3911 bytes_req: 219016 bytes_alloc: 250304 ++ { call_site: [ffffffff815f8d7b] sk_prot_alloc } hitcount: 235 bytes_req: 236880 bytes_alloc: 240640 ++ { call_site: [ffffffff8137e559] sg_kmalloc } hitcount: 557 bytes_req: 169024 bytes_alloc: 221760 ++ { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid] } hitcount: 9378 bytes_req: 187548 bytes_alloc: 206312 ++ { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915] } hitcount: 1519 bytes_req: 157976 bytes_alloc: 194432 ++ . ++ . ++ . ++ { call_site: [ffffffff8109bd3b] sched_autogroup_create_attach } hitcount: 2 bytes_req: 144 bytes_alloc: 192 ++ { call_site: [ffffffff81097ee8] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 ++ { call_site: [ffffffff8109524a] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 ++ { call_site: [ffffffff81095225] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 ++ { call_site: [ffffffff81097ec2] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 ++ { call_site: [ffffffff81213e80] load_elf_binary } hitcount: 3 bytes_req: 84 bytes_alloc: 96 ++ { call_site: [ffffffff81079a2e] kthread_create_on_node } hitcount: 1 bytes_req: 56 bytes_alloc: 64 ++ { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid] } hitcount: 1 bytes_req: 7 bytes_alloc: 8 ++ { call_site: [ffffffff8154bc62] usb_control_msg } hitcount: 1 bytes_req: 8 bytes_alloc: 8 ++ { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid] } hitcount: 1 bytes_req: 7 bytes_alloc: 8 ++ ++ Totals: ++ Hits: 66598 ++ Entries: 65 ++ Dropped: 0 ++ ++ Finally, to finish off our kmalloc example, instead of simply having ++ the hist trigger display symbolic call_sites, we can have the hist ++ trigger additionally display the complete set of kernel stack traces ++ that led to each call_site. To do that, we simply use the special ++ value 'stacktrace' for the key parameter: ++ ++ # echo 'hist:keys=stacktrace:values=bytes_req,bytes_alloc:sort=bytes_alloc' > \ ++ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger ++ ++ The above trigger will use the kernel stack trace in effect when an ++ event is triggered as the key for the hash table. This allows the ++ enumeration of every kernel callpath that led up to a particular ++ event, along with a running total of any of the event fields for ++ that event. Here we tally bytes requested and bytes allocated for ++ every callpath in the system that led up to a kmalloc (in this case ++ every callpath to a kmalloc for a kernel compile): ++ ++ # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist ++ # trigger info: hist:keys=stacktrace:vals=bytes_req,bytes_alloc:sort=bytes_alloc:size=2048 [active] ++ ++ { stacktrace: ++ __kmalloc_track_caller+0x10b/0x1a0 ++ kmemdup+0x20/0x50 ++ hidraw_report_event+0x8a/0x120 [hid] ++ hid_report_raw_event+0x3ea/0x440 [hid] ++ hid_input_report+0x112/0x190 [hid] ++ hid_irq_in+0xc2/0x260 [usbhid] ++ __usb_hcd_giveback_urb+0x72/0x120 ++ usb_giveback_urb_bh+0x9e/0xe0 ++ tasklet_hi_action+0xf8/0x100 ++ __do_softirq+0x114/0x2c0 ++ irq_exit+0xa5/0xb0 ++ do_IRQ+0x5a/0xf0 ++ ret_from_intr+0x0/0x30 ++ cpuidle_enter+0x17/0x20 ++ cpu_startup_entry+0x315/0x3e0 ++ rest_init+0x7c/0x80 ++ } hitcount: 3 bytes_req: 21 bytes_alloc: 24 ++ { stacktrace: ++ __kmalloc_track_caller+0x10b/0x1a0 ++ kmemdup+0x20/0x50 ++ hidraw_report_event+0x8a/0x120 [hid] ++ hid_report_raw_event+0x3ea/0x440 [hid] ++ hid_input_report+0x112/0x190 [hid] ++ hid_irq_in+0xc2/0x260 [usbhid] ++ __usb_hcd_giveback_urb+0x72/0x120 ++ usb_giveback_urb_bh+0x9e/0xe0 ++ tasklet_hi_action+0xf8/0x100 ++ __do_softirq+0x114/0x2c0 ++ irq_exit+0xa5/0xb0 ++ do_IRQ+0x5a/0xf0 ++ ret_from_intr+0x0/0x30 ++ } hitcount: 3 bytes_req: 21 bytes_alloc: 24 ++ { stacktrace: ++ kmem_cache_alloc_trace+0xeb/0x150 ++ aa_alloc_task_context+0x27/0x40 ++ apparmor_cred_prepare+0x1f/0x50 ++ security_prepare_creds+0x16/0x20 ++ prepare_creds+0xdf/0x1a0 ++ SyS_capset+0xb5/0x200 ++ system_call_fastpath+0x12/0x6a ++ } hitcount: 1 bytes_req: 32 bytes_alloc: 32 ++ . ++ . ++ . ++ { stacktrace: ++ __kmalloc+0x11b/0x1b0 ++ i915_gem_execbuffer2+0x6c/0x2c0 [i915] ++ drm_ioctl+0x349/0x670 [drm] ++ do_vfs_ioctl+0x2f0/0x4f0 ++ SyS_ioctl+0x81/0xa0 ++ system_call_fastpath+0x12/0x6a ++ } hitcount: 17726 bytes_req: 13944120 bytes_alloc: 19593808 ++ { stacktrace: ++ __kmalloc+0x11b/0x1b0 ++ load_elf_phdrs+0x76/0xa0 ++ load_elf_binary+0x102/0x1650 ++ search_binary_handler+0x97/0x1d0 ++ do_execveat_common.isra.34+0x551/0x6e0 ++ SyS_execve+0x3a/0x50 ++ return_from_execve+0x0/0x23 ++ } hitcount: 33348 bytes_req: 17152128 bytes_alloc: 20226048 ++ { stacktrace: ++ kmem_cache_alloc_trace+0xeb/0x150 ++ apparmor_file_alloc_security+0x27/0x40 ++ security_file_alloc+0x16/0x20 ++ get_empty_filp+0x93/0x1c0 ++ path_openat+0x31/0x5f0 ++ do_filp_open+0x3a/0x90 ++ do_sys_open+0x128/0x220 ++ SyS_open+0x1e/0x20 ++ system_call_fastpath+0x12/0x6a ++ } hitcount: 4766422 bytes_req: 9532844 bytes_alloc: 38131376 ++ { stacktrace: ++ __kmalloc+0x11b/0x1b0 ++ seq_buf_alloc+0x1b/0x50 ++ seq_read+0x2cc/0x370 ++ proc_reg_read+0x3d/0x80 ++ __vfs_read+0x28/0xe0 ++ vfs_read+0x86/0x140 ++ SyS_read+0x46/0xb0 ++ system_call_fastpath+0x12/0x6a ++ } hitcount: 19133 bytes_req: 78368768 bytes_alloc: 78368768 ++ ++ Totals: ++ Hits: 6085872 ++ Entries: 253 ++ Dropped: 0 ++ ++ If you key a hist trigger on common_pid, in order for example to ++ gather and display sorted totals for each process, you can use the ++ special .execname modifier to display the executable names for the ++ processes in the table rather than raw pids. The example below ++ keeps a per-process sum of total bytes read: ++ ++ # echo 'hist:key=common_pid.execname:val=count:sort=count.descending' > \ ++ /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger ++ ++ # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/hist ++ # trigger info: hist:keys=common_pid.execname:vals=count:sort=count.descending:size=2048 [active] ++ ++ { common_pid: gnome-terminal [ 3196] } hitcount: 280 count: 1093512 ++ { common_pid: Xorg [ 1309] } hitcount: 525 count: 256640 ++ { common_pid: compiz [ 2889] } hitcount: 59 count: 254400 ++ { common_pid: bash [ 8710] } hitcount: 3 count: 66369 ++ { common_pid: dbus-daemon-lau [ 8703] } hitcount: 49 count: 47739 ++ { common_pid: irqbalance [ 1252] } hitcount: 27 count: 27648 ++ { common_pid: 01ifupdown [ 8705] } hitcount: 3 count: 17216 ++ { common_pid: dbus-daemon [ 772] } hitcount: 10 count: 12396 ++ { common_pid: Socket Thread [ 8342] } hitcount: 11 count: 11264 ++ { common_pid: nm-dhcp-client. [ 8701] } hitcount: 6 count: 7424 ++ { common_pid: gmain [ 1315] } hitcount: 18 count: 6336 ++ . ++ . ++ . ++ { common_pid: postgres [ 1892] } hitcount: 2 count: 32 ++ { common_pid: postgres [ 1891] } hitcount: 2 count: 32 ++ { common_pid: gmain [ 8704] } hitcount: 2 count: 32 ++ { common_pid: upstart-dbus-br [ 2740] } hitcount: 21 count: 21 ++ { common_pid: nm-dispatcher.a [ 8696] } hitcount: 1 count: 16 ++ { common_pid: indicator-datet [ 2904] } hitcount: 1 count: 16 ++ { common_pid: gdbus [ 2998] } hitcount: 1 count: 16 ++ { common_pid: rtkit-daemon [ 2052] } hitcount: 1 count: 8 ++ { common_pid: init [ 1] } hitcount: 2 count: 2 ++ ++ Totals: ++ Hits: 2116 ++ Entries: 51 ++ Dropped: 0 ++ ++ Similarly, if you key a hist trigger on syscall id, for example to ++ gather and display a list of systemwide syscall hits, you can use ++ the special .syscall modifier to display the syscall names rather ++ than raw ids. The example below keeps a running total of syscall ++ counts for the system during the run: ++ ++ # echo 'hist:key=id.syscall:val=hitcount' > \ ++ /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger ++ ++ # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist ++ # trigger info: hist:keys=id.syscall:vals=hitcount:sort=hitcount:size=2048 [active] ++ ++ { id: sys_fsync [ 74] } hitcount: 1 ++ { id: sys_newuname [ 63] } hitcount: 1 ++ { id: sys_prctl [157] } hitcount: 1 ++ { id: sys_statfs [137] } hitcount: 1 ++ { id: sys_symlink [ 88] } hitcount: 1 ++ { id: sys_sendmmsg [307] } hitcount: 1 ++ { id: sys_semctl [ 66] } hitcount: 1 ++ { id: sys_readlink [ 89] } hitcount: 3 ++ { id: sys_bind [ 49] } hitcount: 3 ++ { id: sys_getsockname [ 51] } hitcount: 3 ++ { id: sys_unlink [ 87] } hitcount: 3 ++ { id: sys_rename [ 82] } hitcount: 4 ++ { id: unknown_syscall [ 58] } hitcount: 4 ++ { id: sys_connect [ 42] } hitcount: 4 ++ { id: sys_getpid [ 39] } hitcount: 4 ++ . ++ . ++ . ++ { id: sys_rt_sigprocmask [ 14] } hitcount: 952 ++ { id: sys_futex [202] } hitcount: 1534 ++ { id: sys_write [ 1] } hitcount: 2689 ++ { id: sys_setitimer [ 38] } hitcount: 2797 ++ { id: sys_read [ 0] } hitcount: 3202 ++ { id: sys_select [ 23] } hitcount: 3773 ++ { id: sys_writev [ 20] } hitcount: 4531 ++ { id: sys_poll [ 7] } hitcount: 8314 ++ { id: sys_recvmsg [ 47] } hitcount: 13738 ++ { id: sys_ioctl [ 16] } hitcount: 21843 ++ ++ Totals: ++ Hits: 67612 ++ Entries: 72 ++ Dropped: 0 ++ ++ The syscall counts above provide a rough overall picture of system ++ call activity on the system; we can see for example that the most ++ popular system call on this system was the 'sys_ioctl' system call. ++ ++ We can use 'compound' keys to refine that number and provide some ++ further insight as to which processes exactly contribute to the ++ overall ioctl count. ++ ++ The command below keeps a hitcount for every unique combination of ++ system call id and pid - the end result is essentially a table ++ that keeps a per-pid sum of system call hits. The results are ++ sorted using the system call id as the primary key, and the ++ hitcount sum as the secondary key: ++ ++ # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount' > \ ++ /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger ++ ++ # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist ++ # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 [active] ++ ++ { id: sys_read [ 0], common_pid: rtkit-daemon [ 1877] } hitcount: 1 ++ { id: sys_read [ 0], common_pid: gdbus [ 2976] } hitcount: 1 ++ { id: sys_read [ 0], common_pid: console-kit-dae [ 3400] } hitcount: 1 ++ { id: sys_read [ 0], common_pid: postgres [ 1865] } hitcount: 1 ++ { id: sys_read [ 0], common_pid: deja-dup-monito [ 3543] } hitcount: 2 ++ { id: sys_read [ 0], common_pid: NetworkManager [ 890] } hitcount: 2 ++ { id: sys_read [ 0], common_pid: evolution-calen [ 3048] } hitcount: 2 ++ { id: sys_read [ 0], common_pid: postgres [ 1864] } hitcount: 2 ++ { id: sys_read [ 0], common_pid: nm-applet [ 3022] } hitcount: 2 ++ { id: sys_read [ 0], common_pid: whoopsie [ 1212] } hitcount: 2 ++ . ++ . ++ . ++ { id: sys_ioctl [ 16], common_pid: bash [ 8479] } hitcount: 1 ++ { id: sys_ioctl [ 16], common_pid: bash [ 3472] } hitcount: 12 ++ { id: sys_ioctl [ 16], common_pid: gnome-terminal [ 3199] } hitcount: 16 ++ { id: sys_ioctl [ 16], common_pid: Xorg [ 1267] } hitcount: 1808 ++ { id: sys_ioctl [ 16], common_pid: compiz [ 2994] } hitcount: 5580 ++ . ++ . ++ . ++ { id: sys_waitid [247], common_pid: upstart-dbus-br [ 2690] } hitcount: 3 ++ { id: sys_waitid [247], common_pid: upstart-dbus-br [ 2688] } hitcount: 16 ++ { id: sys_inotify_add_watch [254], common_pid: gmain [ 975] } hitcount: 2 ++ { id: sys_inotify_add_watch [254], common_pid: gmain [ 3204] } hitcount: 4 ++ { id: sys_inotify_add_watch [254], common_pid: gmain [ 2888] } hitcount: 4 ++ { id: sys_inotify_add_watch [254], common_pid: gmain [ 3003] } hitcount: 4 ++ { id: sys_inotify_add_watch [254], common_pid: gmain [ 2873] } hitcount: 4 ++ { id: sys_inotify_add_watch [254], common_pid: gmain [ 3196] } hitcount: 6 ++ { id: sys_openat [257], common_pid: java [ 2623] } hitcount: 2 ++ { id: sys_eventfd2 [290], common_pid: ibus-ui-gtk3 [ 2760] } hitcount: 4 ++ { id: sys_eventfd2 [290], common_pid: compiz [ 2994] } hitcount: 6 ++ ++ Totals: ++ Hits: 31536 ++ Entries: 323 ++ Dropped: 0 ++ ++ The above list does give us a breakdown of the ioctl syscall by ++ pid, but it also gives us quite a bit more than that, which we ++ don't really care about at the moment. Since we know the syscall ++ id for sys_ioctl (16, displayed next to the sys_ioctl name), we ++ can use that to filter out all the other syscalls: ++ ++ # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount if id == 16' > \ ++ /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger ++ ++ # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist ++ # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 if id == 16 [active] ++ ++ { id: sys_ioctl [ 16], common_pid: gmain [ 2769] } hitcount: 1 ++ { id: sys_ioctl [ 16], common_pid: evolution-addre [ 8571] } hitcount: 1 ++ { id: sys_ioctl [ 16], common_pid: gmain [ 3003] } hitcount: 1 ++ { id: sys_ioctl [ 16], common_pid: gmain [ 2781] } hitcount: 1 ++ { id: sys_ioctl [ 16], common_pid: gmain [ 2829] } hitcount: 1 ++ { id: sys_ioctl [ 16], common_pid: bash [ 8726] } hitcount: 1 ++ { id: sys_ioctl [ 16], common_pid: bash [ 8508] } hitcount: 1 ++ { id: sys_ioctl [ 16], common_pid: gmain [ 2970] } hitcount: 1 ++ { id: sys_ioctl [ 16], common_pid: gmain [ 2768] } hitcount: 1 ++ . ++ . ++ . ++ { id: sys_ioctl [ 16], common_pid: pool [ 8559] } hitcount: 45 ++ { id: sys_ioctl [ 16], common_pid: pool [ 8555] } hitcount: 48 ++ { id: sys_ioctl [ 16], common_pid: pool [ 8551] } hitcount: 48 ++ { id: sys_ioctl [ 16], common_pid: avahi-daemon [ 896] } hitcount: 66 ++ { id: sys_ioctl [ 16], common_pid: Xorg [ 1267] } hitcount: 26674 ++ { id: sys_ioctl [ 16], common_pid: compiz [ 2994] } hitcount: 73443 ++ ++ Totals: ++ Hits: 101162 ++ Entries: 103 ++ Dropped: 0 ++ ++ The above output shows that 'compiz' and 'Xorg' are far and away ++ the heaviest ioctl callers (which might lead to questions about ++ whether they really need to be making all those calls and to ++ possible avenues for further investigation.) ++ ++ The compound key examples used a key and a sum value (hitcount) to ++ sort the output, but we can just as easily use two keys instead. ++ Here's an example where we use a compound key composed of the the ++ common_pid and size event fields. Sorting with pid as the primary ++ key and 'size' as the secondary key allows us to display an ++ ordered summary of the recvfrom sizes, with counts, received by ++ each process: ++ ++ # echo 'hist:key=common_pid.execname,size:val=hitcount:sort=common_pid,size' > \ ++ /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/trigger ++ ++ # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/hist ++ # trigger info: hist:keys=common_pid.execname,size:vals=hitcount:sort=common_pid.execname,size:size=2048 [active] ++ ++ { common_pid: smbd [ 784], size: 4 } hitcount: 1 ++ { common_pid: dnsmasq [ 1412], size: 4096 } hitcount: 672 ++ { common_pid: postgres [ 1796], size: 1000 } hitcount: 6 ++ { common_pid: postgres [ 1867], size: 1000 } hitcount: 10 ++ { common_pid: bamfdaemon [ 2787], size: 28 } hitcount: 2 ++ { common_pid: bamfdaemon [ 2787], size: 14360 } hitcount: 1 ++ { common_pid: compiz [ 2994], size: 8 } hitcount: 1 ++ { common_pid: compiz [ 2994], size: 20 } hitcount: 11 ++ { common_pid: gnome-terminal [ 3199], size: 4 } hitcount: 2 ++ { common_pid: firefox [ 8817], size: 4 } hitcount: 1 ++ { common_pid: firefox [ 8817], size: 8 } hitcount: 5 ++ { common_pid: firefox [ 8817], size: 588 } hitcount: 2 ++ { common_pid: firefox [ 8817], size: 628 } hitcount: 1 ++ { common_pid: firefox [ 8817], size: 6944 } hitcount: 1 ++ { common_pid: firefox [ 8817], size: 408880 } hitcount: 2 ++ { common_pid: firefox [ 8822], size: 8 } hitcount: 2 ++ { common_pid: firefox [ 8822], size: 160 } hitcount: 2 ++ { common_pid: firefox [ 8822], size: 320 } hitcount: 2 ++ { common_pid: firefox [ 8822], size: 352 } hitcount: 1 ++ . ++ . ++ . ++ { common_pid: pool [ 8923], size: 1960 } hitcount: 10 ++ { common_pid: pool [ 8923], size: 2048 } hitcount: 10 ++ { common_pid: pool [ 8924], size: 1960 } hitcount: 10 ++ { common_pid: pool [ 8924], size: 2048 } hitcount: 10 ++ { common_pid: pool [ 8928], size: 1964 } hitcount: 4 ++ { common_pid: pool [ 8928], size: 1965 } hitcount: 2 ++ { common_pid: pool [ 8928], size: 2048 } hitcount: 6 ++ { common_pid: pool [ 8929], size: 1982 } hitcount: 1 ++ { common_pid: pool [ 8929], size: 2048 } hitcount: 1 ++ ++ Totals: ++ Hits: 2016 ++ Entries: 224 ++ Dropped: 0 ++ ++ The above example also illustrates the fact that although a compound ++ key is treated as a single entity for hashing purposes, the sub-keys ++ it's composed of can be accessed independently. ++ ++ The next example uses a string field as the hash key and ++ demonstrates how you can manually pause and continue a hist trigger. ++ In this example, we'll aggregate fork counts and don't expect a ++ large number of entries in the hash table, so we'll drop it to a ++ much smaller number, say 256: ++ ++ # echo 'hist:key=child_comm:val=hitcount:size=256' > \ ++ /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger ++ ++ # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist ++ # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active] ++ ++ { child_comm: dconf worker } hitcount: 1 ++ { child_comm: ibus-daemon } hitcount: 1 ++ { child_comm: whoopsie } hitcount: 1 ++ { child_comm: smbd } hitcount: 1 ++ { child_comm: gdbus } hitcount: 1 ++ { child_comm: kthreadd } hitcount: 1 ++ { child_comm: dconf worker } hitcount: 1 ++ { child_comm: evolution-alarm } hitcount: 2 ++ { child_comm: Socket Thread } hitcount: 2 ++ { child_comm: postgres } hitcount: 2 ++ { child_comm: bash } hitcount: 3 ++ { child_comm: compiz } hitcount: 3 ++ { child_comm: evolution-sourc } hitcount: 4 ++ { child_comm: dhclient } hitcount: 4 ++ { child_comm: pool } hitcount: 5 ++ { child_comm: nm-dispatcher.a } hitcount: 8 ++ { child_comm: firefox } hitcount: 8 ++ { child_comm: dbus-daemon } hitcount: 8 ++ { child_comm: glib-pacrunner } hitcount: 10 ++ { child_comm: evolution } hitcount: 23 ++ ++ Totals: ++ Hits: 89 ++ Entries: 20 ++ Dropped: 0 ++ ++ If we want to pause the hist trigger, we can simply append :pause to ++ the command that started the trigger. Notice that the trigger info ++ displays as [paused]: ++ ++ # echo 'hist:key=child_comm:val=hitcount:size=256:pause' >> \ ++ /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger ++ ++ # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist ++ # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [paused] ++ ++ { child_comm: dconf worker } hitcount: 1 ++ { child_comm: kthreadd } hitcount: 1 ++ { child_comm: dconf worker } hitcount: 1 ++ { child_comm: gdbus } hitcount: 1 ++ { child_comm: ibus-daemon } hitcount: 1 ++ { child_comm: Socket Thread } hitcount: 2 ++ { child_comm: evolution-alarm } hitcount: 2 ++ { child_comm: smbd } hitcount: 2 ++ { child_comm: bash } hitcount: 3 ++ { child_comm: whoopsie } hitcount: 3 ++ { child_comm: compiz } hitcount: 3 ++ { child_comm: evolution-sourc } hitcount: 4 ++ { child_comm: pool } hitcount: 5 ++ { child_comm: postgres } hitcount: 6 ++ { child_comm: firefox } hitcount: 8 ++ { child_comm: dhclient } hitcount: 10 ++ { child_comm: emacs } hitcount: 12 ++ { child_comm: dbus-daemon } hitcount: 20 ++ { child_comm: nm-dispatcher.a } hitcount: 20 ++ { child_comm: evolution } hitcount: 35 ++ { child_comm: glib-pacrunner } hitcount: 59 ++ ++ Totals: ++ Hits: 199 ++ Entries: 21 ++ Dropped: 0 ++ ++ To manually continue having the trigger aggregate events, append ++ :cont instead. Notice that the trigger info displays as [active] ++ again, and the data has changed: ++ ++ # echo 'hist:key=child_comm:val=hitcount:size=256:cont' >> \ ++ /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger ++ ++ # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist ++ # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active] ++ ++ { child_comm: dconf worker } hitcount: 1 ++ { child_comm: dconf worker } hitcount: 1 ++ { child_comm: kthreadd } hitcount: 1 ++ { child_comm: gdbus } hitcount: 1 ++ { child_comm: ibus-daemon } hitcount: 1 ++ { child_comm: Socket Thread } hitcount: 2 ++ { child_comm: evolution-alarm } hitcount: 2 ++ { child_comm: smbd } hitcount: 2 ++ { child_comm: whoopsie } hitcount: 3 ++ { child_comm: compiz } hitcount: 3 ++ { child_comm: evolution-sourc } hitcount: 4 ++ { child_comm: bash } hitcount: 5 ++ { child_comm: pool } hitcount: 5 ++ { child_comm: postgres } hitcount: 6 ++ { child_comm: firefox } hitcount: 8 ++ { child_comm: dhclient } hitcount: 11 ++ { child_comm: emacs } hitcount: 12 ++ { child_comm: dbus-daemon } hitcount: 22 ++ { child_comm: nm-dispatcher.a } hitcount: 22 ++ { child_comm: evolution } hitcount: 35 ++ { child_comm: glib-pacrunner } hitcount: 59 ++ ++ Totals: ++ Hits: 206 ++ Entries: 21 ++ Dropped: 0 ++ ++ The previous example showed how to start and stop a hist trigger by ++ appending 'pause' and 'continue' to the hist trigger command. A ++ hist trigger can also be started in a paused state by initially ++ starting the trigger with ':pause' appended. This allows you to ++ start the trigger only when you're ready to start collecting data ++ and not before. For example, you could start the trigger in a ++ paused state, then unpause it and do something you want to measure, ++ then pause the trigger again when done. ++ ++ Of course, doing this manually can be difficult and error-prone, but ++ it is possible to automatically start and stop a hist trigger based ++ on some condition, via the enable_hist and disable_hist triggers. ++ ++ For example, suppose we wanted to take a look at the relative ++ weights in terms of skb length for each callpath that leads to a ++ netif_receieve_skb event when downloading a decent-sized file using ++ wget. ++ ++ First we set up an initially paused stacktrace trigger on the ++ netif_receive_skb event: ++ ++ # echo 'hist:key=stacktrace:vals=len:pause' > \ ++ /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger ++ ++ Next, we set up an 'enable_hist' trigger on the sched_process_exec ++ event, with an 'if filename==/usr/bin/wget' filter. The effect of ++ this new trigger is that it will 'unpause' the hist trigger we just ++ set up on netif_receive_skb if and only if it sees a ++ sched_process_exec event with a filename of '/usr/bin/wget'. When ++ that happens, all netif_receive_skb events are aggregated into a ++ hash table keyed on stacktrace: ++ ++ # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \ ++ /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger ++ ++ The aggregation continues until the netif_receive_skb is paused ++ again, which is what the following disable_hist event does by ++ creating a similar setup on the sched_process_exit event, using the ++ filter 'comm==wget': ++ ++ # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \ ++ /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger ++ ++ Whenever a process exits and the comm field of the disable_hist ++ trigger filter matches 'comm==wget', the netif_receive_skb hist ++ trigger is disabled. ++ ++ The overall effect is that netif_receive_skb events are aggregated ++ into the hash table for only the duration of the wget. Executing a ++ wget command and then listing the 'hist' file will display the ++ output generated by the wget command: ++ ++ $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz ++ ++ # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist ++ # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused] ++ ++ { stacktrace: ++ __netif_receive_skb_core+0x46d/0x990 ++ __netif_receive_skb+0x18/0x60 ++ netif_receive_skb_internal+0x23/0x90 ++ napi_gro_receive+0xc8/0x100 ++ ieee80211_deliver_skb+0xd6/0x270 [mac80211] ++ ieee80211_rx_handlers+0xccf/0x22f0 [mac80211] ++ ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211] ++ ieee80211_rx+0x31d/0x900 [mac80211] ++ iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm] ++ iwl_rx_dispatch+0x8e/0xf0 [iwldvm] ++ iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi] ++ irq_thread_fn+0x20/0x50 ++ irq_thread+0x11f/0x150 ++ kthread+0xd2/0xf0 ++ ret_from_fork+0x42/0x70 ++ } hitcount: 85 len: 28884 ++ { stacktrace: ++ __netif_receive_skb_core+0x46d/0x990 ++ __netif_receive_skb+0x18/0x60 ++ netif_receive_skb_internal+0x23/0x90 ++ napi_gro_complete+0xa4/0xe0 ++ dev_gro_receive+0x23a/0x360 ++ napi_gro_receive+0x30/0x100 ++ ieee80211_deliver_skb+0xd6/0x270 [mac80211] ++ ieee80211_rx_handlers+0xccf/0x22f0 [mac80211] ++ ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211] ++ ieee80211_rx+0x31d/0x900 [mac80211] ++ iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm] ++ iwl_rx_dispatch+0x8e/0xf0 [iwldvm] ++ iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi] ++ irq_thread_fn+0x20/0x50 ++ irq_thread+0x11f/0x150 ++ kthread+0xd2/0xf0 ++ } hitcount: 98 len: 664329 ++ { stacktrace: ++ __netif_receive_skb_core+0x46d/0x990 ++ __netif_receive_skb+0x18/0x60 ++ process_backlog+0xa8/0x150 ++ net_rx_action+0x15d/0x340 ++ __do_softirq+0x114/0x2c0 ++ do_softirq_own_stack+0x1c/0x30 ++ do_softirq+0x65/0x70 ++ __local_bh_enable_ip+0xb5/0xc0 ++ ip_finish_output+0x1f4/0x840 ++ ip_output+0x6b/0xc0 ++ ip_local_out_sk+0x31/0x40 ++ ip_send_skb+0x1a/0x50 ++ udp_send_skb+0x173/0x2a0 ++ udp_sendmsg+0x2bf/0x9f0 ++ inet_sendmsg+0x64/0xa0 ++ sock_sendmsg+0x3d/0x50 ++ } hitcount: 115 len: 13030 ++ { stacktrace: ++ __netif_receive_skb_core+0x46d/0x990 ++ __netif_receive_skb+0x18/0x60 ++ netif_receive_skb_internal+0x23/0x90 ++ napi_gro_complete+0xa4/0xe0 ++ napi_gro_flush+0x6d/0x90 ++ iwl_pcie_irq_handler+0x92a/0x12f0 [iwlwifi] ++ irq_thread_fn+0x20/0x50 ++ irq_thread+0x11f/0x150 ++ kthread+0xd2/0xf0 ++ ret_from_fork+0x42/0x70 ++ } hitcount: 934 len: 5512212 ++ ++ Totals: ++ Hits: 1232 ++ Entries: 4 ++ Dropped: 0 ++ ++ The above shows all the netif_receive_skb callpaths and their total ++ lengths for the duration of the wget command. ++ ++ The 'clear' hist trigger param can be used to clear the hash table. ++ Suppose we wanted to try another run of the previous example but ++ this time also wanted to see the complete list of events that went ++ into the histogram. In order to avoid having to set everything up ++ again, we can just clear the histogram first: ++ ++ # echo 'hist:key=stacktrace:vals=len:clear' >> \ ++ /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger ++ ++ Just to verify that it is in fact cleared, here's what we now see in ++ the hist file: ++ ++ # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist ++ # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused] ++ ++ Totals: ++ Hits: 0 ++ Entries: 0 ++ Dropped: 0 ++ ++ Since we want to see the detailed list of every netif_receive_skb ++ event occurring during the new run, which are in fact the same ++ events being aggregated into the hash table, we add some additional ++ 'enable_event' events to the triggering sched_process_exec and ++ sched_process_exit events as such: ++ ++ # echo 'enable_event:net:netif_receive_skb if filename==/usr/bin/wget' > \ ++ /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger ++ ++ # echo 'disable_event:net:netif_receive_skb if comm==wget' > \ ++ /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger ++ ++ If you read the trigger files for the sched_process_exec and ++ sched_process_exit triggers, you should see two triggers for each: ++ one enabling/disabling the hist aggregation and the other ++ enabling/disabling the logging of events: ++ ++ # cat /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger ++ enable_event:net:netif_receive_skb:unlimited if filename==/usr/bin/wget ++ enable_hist:net:netif_receive_skb:unlimited if filename==/usr/bin/wget ++ ++ # cat /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger ++ enable_event:net:netif_receive_skb:unlimited if comm==wget ++ disable_hist:net:netif_receive_skb:unlimited if comm==wget ++ ++ In other words, whenever either of the sched_process_exec or ++ sched_process_exit events is hit and matches 'wget', it enables or ++ disables both the histogram and the event log, and what you end up ++ with is a hash table and set of events just covering the specified ++ duration. Run the wget command again: ++ ++ $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz ++ ++ Displaying the 'hist' file should show something similar to what you ++ saw in the last run, but this time you should also see the ++ individual events in the trace file: ++ ++ # cat /sys/kernel/debug/tracing/trace ++ ++ # tracer: nop ++ # ++ # entries-in-buffer/entries-written: 183/1426 #P:4 ++ # ++ # _-----=> irqs-off ++ # / _----=> need-resched ++ # | / _---=> hardirq/softirq ++ # || / _--=> preempt-depth ++ # ||| / delay ++ # TASK-PID CPU# |||| TIMESTAMP FUNCTION ++ # | | | |||| | | ++ wget-15108 [000] ..s1 31769.606929: netif_receive_skb: dev=lo skbaddr=ffff88009c353100 len=60 ++ wget-15108 [000] ..s1 31769.606999: netif_receive_skb: dev=lo skbaddr=ffff88009c353200 len=60 ++ dnsmasq-1382 [000] ..s1 31769.677652: netif_receive_skb: dev=lo skbaddr=ffff88009c352b00 len=130 ++ dnsmasq-1382 [000] ..s1 31769.685917: netif_receive_skb: dev=lo skbaddr=ffff88009c352200 len=138 ++ ##### CPU 2 buffer started #### ++ irq/29-iwlwifi-559 [002] ..s. 31772.031529: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433d00 len=2948 ++ irq/29-iwlwifi-559 [002] ..s. 31772.031572: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432200 len=1500 ++ irq/29-iwlwifi-559 [002] ..s. 31772.032196: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433100 len=2948 ++ irq/29-iwlwifi-559 [002] ..s. 31772.032761: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433000 len=2948 ++ irq/29-iwlwifi-559 [002] ..s. 31772.033220: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432e00 len=1500 ++ . ++ . ++ . ++ ++ The following example demonstrates how multiple hist triggers can be ++ attached to a given event. This capability can be useful for ++ creating a set of different summaries derived from the same set of ++ events, or for comparing the effects of different filters, among ++ other things. ++ ++ # echo 'hist:keys=skbaddr.hex:vals=len if len < 0' >> \ ++ /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger ++ # echo 'hist:keys=skbaddr.hex:vals=len if len > 4096' >> \ ++ /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger ++ # echo 'hist:keys=skbaddr.hex:vals=len if len == 256' >> \ ++ /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger ++ # echo 'hist:keys=skbaddr.hex:vals=len' >> \ ++ /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger ++ # echo 'hist:keys=len:vals=common_preempt_count' >> \ ++ /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger ++ ++ The above set of commands create four triggers differing only in ++ their filters, along with a completely different though fairly ++ nonsensical trigger. Note that in order to append multiple hist ++ triggers to the same file, you should use the '>>' operator to ++ append them ('>' will also add the new hist trigger, but will remove ++ any existing hist triggers beforehand). ++ ++ Displaying the contents of the 'hist' file for the event shows the ++ contents of all five histograms: ++ ++ # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist ++ ++ # event histogram ++ # ++ # trigger info: hist:keys=len:vals=hitcount,common_preempt_count:sort=hitcount:size=2048 [active] ++ # ++ ++ { len: 176 } hitcount: 1 common_preempt_count: 0 ++ { len: 223 } hitcount: 1 common_preempt_count: 0 ++ { len: 4854 } hitcount: 1 common_preempt_count: 0 ++ { len: 395 } hitcount: 1 common_preempt_count: 0 ++ { len: 177 } hitcount: 1 common_preempt_count: 0 ++ { len: 446 } hitcount: 1 common_preempt_count: 0 ++ { len: 1601 } hitcount: 1 common_preempt_count: 0 ++ . ++ . ++ . ++ { len: 1280 } hitcount: 66 common_preempt_count: 0 ++ { len: 116 } hitcount: 81 common_preempt_count: 40 ++ { len: 708 } hitcount: 112 common_preempt_count: 0 ++ { len: 46 } hitcount: 221 common_preempt_count: 0 ++ { len: 1264 } hitcount: 458 common_preempt_count: 0 ++ ++ Totals: ++ Hits: 1428 ++ Entries: 147 ++ Dropped: 0 ++ ++ ++ # event histogram ++ # ++ # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active] ++ # ++ ++ { skbaddr: ffff8800baee5e00 } hitcount: 1 len: 130 ++ { skbaddr: ffff88005f3d5600 } hitcount: 1 len: 1280 ++ { skbaddr: ffff88005f3d4900 } hitcount: 1 len: 1280 ++ { skbaddr: ffff88009fed6300 } hitcount: 1 len: 115 ++ { skbaddr: ffff88009fe0ad00 } hitcount: 1 len: 115 ++ { skbaddr: ffff88008cdb1900 } hitcount: 1 len: 46 ++ { skbaddr: ffff880064b5ef00 } hitcount: 1 len: 118 ++ { skbaddr: ffff880044e3c700 } hitcount: 1 len: 60 ++ { skbaddr: ffff880100065900 } hitcount: 1 len: 46 ++ { skbaddr: ffff8800d46bd500 } hitcount: 1 len: 116 ++ { skbaddr: ffff88005f3d5f00 } hitcount: 1 len: 1280 ++ { skbaddr: ffff880100064700 } hitcount: 1 len: 365 ++ { skbaddr: ffff8800badb6f00 } hitcount: 1 len: 60 ++ . ++ . ++ . ++ { skbaddr: ffff88009fe0be00 } hitcount: 27 len: 24677 ++ { skbaddr: ffff88009fe0a400 } hitcount: 27 len: 23052 ++ { skbaddr: ffff88009fe0b700 } hitcount: 31 len: 25589 ++ { skbaddr: ffff88009fe0b600 } hitcount: 32 len: 27326 ++ { skbaddr: ffff88006a462800 } hitcount: 68 len: 71678 ++ { skbaddr: ffff88006a463700 } hitcount: 70 len: 72678 ++ { skbaddr: ffff88006a462b00 } hitcount: 71 len: 77589 ++ { skbaddr: ffff88006a463600 } hitcount: 73 len: 71307 ++ { skbaddr: ffff88006a462200 } hitcount: 81 len: 81032 ++ ++ Totals: ++ Hits: 1451 ++ Entries: 318 ++ Dropped: 0 ++ ++ ++ # event histogram ++ # ++ # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len == 256 [active] ++ # ++ ++ ++ Totals: ++ Hits: 0 ++ Entries: 0 ++ Dropped: 0 ++ ++ ++ # event histogram ++ # ++ # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len > 4096 [active] ++ # ++ ++ { skbaddr: ffff88009fd2c300 } hitcount: 1 len: 7212 ++ { skbaddr: ffff8800d2bcce00 } hitcount: 1 len: 7212 ++ { skbaddr: ffff8800d2bcd700 } hitcount: 1 len: 7212 ++ { skbaddr: ffff8800d2bcda00 } hitcount: 1 len: 21492 ++ { skbaddr: ffff8800ae2e2d00 } hitcount: 1 len: 7212 ++ { skbaddr: ffff8800d2bcdb00 } hitcount: 1 len: 7212 ++ { skbaddr: ffff88006a4df500 } hitcount: 1 len: 4854 ++ { skbaddr: ffff88008ce47b00 } hitcount: 1 len: 18636 ++ { skbaddr: ffff8800ae2e2200 } hitcount: 1 len: 12924 ++ { skbaddr: ffff88005f3e1000 } hitcount: 1 len: 4356 ++ { skbaddr: ffff8800d2bcdc00 } hitcount: 2 len: 24420 ++ { skbaddr: ffff8800d2bcc200 } hitcount: 2 len: 12996 ++ ++ Totals: ++ Hits: 14 ++ Entries: 12 ++ Dropped: 0 ++ ++ ++ # event histogram ++ # ++ # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len < 0 [active] ++ # ++ ++ ++ Totals: ++ Hits: 0 ++ Entries: 0 ++ Dropped: 0 ++ ++ Named triggers can be used to have triggers share a common set of ++ histogram data. This capability is mostly useful for combining the ++ output of events generated by tracepoints contained inside inline ++ functions, but names can be used in a hist trigger on any event. ++ For example, these two triggers when hit will update the same 'len' ++ field in the shared 'foo' histogram data: ++ ++ # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \ ++ /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger ++ # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \ ++ /sys/kernel/debug/tracing/events/net/netif_rx/trigger ++ ++ You can see that they're updating common histogram data by reading ++ each event's hist files at the same time: ++ ++ # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist; ++ cat /sys/kernel/debug/tracing/events/net/netif_rx/hist ++ ++ # event histogram ++ # ++ # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active] ++ # ++ ++ { skbaddr: ffff88000ad53500 } hitcount: 1 len: 46 ++ { skbaddr: ffff8800af5a1500 } hitcount: 1 len: 76 ++ { skbaddr: ffff8800d62a1900 } hitcount: 1 len: 46 ++ { skbaddr: ffff8800d2bccb00 } hitcount: 1 len: 468 ++ { skbaddr: ffff8800d3c69900 } hitcount: 1 len: 46 ++ { skbaddr: ffff88009ff09100 } hitcount: 1 len: 52 ++ { skbaddr: ffff88010f13ab00 } hitcount: 1 len: 168 ++ { skbaddr: ffff88006a54f400 } hitcount: 1 len: 46 ++ { skbaddr: ffff8800d2bcc500 } hitcount: 1 len: 260 ++ { skbaddr: ffff880064505000 } hitcount: 1 len: 46 ++ { skbaddr: ffff8800baf24e00 } hitcount: 1 len: 32 ++ { skbaddr: ffff88009fe0ad00 } hitcount: 1 len: 46 ++ { skbaddr: ffff8800d3edff00 } hitcount: 1 len: 44 ++ { skbaddr: ffff88009fe0b400 } hitcount: 1 len: 168 ++ { skbaddr: ffff8800a1c55a00 } hitcount: 1 len: 40 ++ { skbaddr: ffff8800d2bcd100 } hitcount: 1 len: 40 ++ { skbaddr: ffff880064505f00 } hitcount: 1 len: 174 ++ { skbaddr: ffff8800a8bff200 } hitcount: 1 len: 160 ++ { skbaddr: ffff880044e3cc00 } hitcount: 1 len: 76 ++ { skbaddr: ffff8800a8bfe700 } hitcount: 1 len: 46 ++ { skbaddr: ffff8800d2bcdc00 } hitcount: 1 len: 32 ++ { skbaddr: ffff8800a1f64800 } hitcount: 1 len: 46 ++ { skbaddr: ffff8800d2bcde00 } hitcount: 1 len: 988 ++ { skbaddr: ffff88006a5dea00 } hitcount: 1 len: 46 ++ { skbaddr: ffff88002e37a200 } hitcount: 1 len: 44 ++ { skbaddr: ffff8800a1f32c00 } hitcount: 2 len: 676 ++ { skbaddr: ffff88000ad52600 } hitcount: 2 len: 107 ++ { skbaddr: ffff8800a1f91e00 } hitcount: 2 len: 92 ++ { skbaddr: ffff8800af5a0200 } hitcount: 2 len: 142 ++ { skbaddr: ffff8800d2bcc600 } hitcount: 2 len: 220 ++ { skbaddr: ffff8800ba36f500 } hitcount: 2 len: 92 ++ { skbaddr: ffff8800d021f800 } hitcount: 2 len: 92 ++ { skbaddr: ffff8800a1f33600 } hitcount: 2 len: 675 ++ { skbaddr: ffff8800a8bfff00 } hitcount: 3 len: 138 ++ { skbaddr: ffff8800d62a1300 } hitcount: 3 len: 138 ++ { skbaddr: ffff88002e37a100 } hitcount: 4 len: 184 ++ { skbaddr: ffff880064504400 } hitcount: 4 len: 184 ++ { skbaddr: ffff8800a8bfec00 } hitcount: 4 len: 184 ++ { skbaddr: ffff88000ad53700 } hitcount: 5 len: 230 ++ { skbaddr: ffff8800d2bcdb00 } hitcount: 5 len: 196 ++ { skbaddr: ffff8800a1f90000 } hitcount: 6 len: 276 ++ { skbaddr: ffff88006a54f900 } hitcount: 6 len: 276 ++ ++ Totals: ++ Hits: 81 ++ Entries: 42 ++ Dropped: 0 ++ # event histogram ++ # ++ # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active] ++ # ++ ++ { skbaddr: ffff88000ad53500 } hitcount: 1 len: 46 ++ { skbaddr: ffff8800af5a1500 } hitcount: 1 len: 76 ++ { skbaddr: ffff8800d62a1900 } hitcount: 1 len: 46 ++ { skbaddr: ffff8800d2bccb00 } hitcount: 1 len: 468 ++ { skbaddr: ffff8800d3c69900 } hitcount: 1 len: 46 ++ { skbaddr: ffff88009ff09100 } hitcount: 1 len: 52 ++ { skbaddr: ffff88010f13ab00 } hitcount: 1 len: 168 ++ { skbaddr: ffff88006a54f400 } hitcount: 1 len: 46 ++ { skbaddr: ffff8800d2bcc500 } hitcount: 1 len: 260 ++ { skbaddr: ffff880064505000 } hitcount: 1 len: 46 ++ { skbaddr: ffff8800baf24e00 } hitcount: 1 len: 32 ++ { skbaddr: ffff88009fe0ad00 } hitcount: 1 len: 46 ++ { skbaddr: ffff8800d3edff00 } hitcount: 1 len: 44 ++ { skbaddr: ffff88009fe0b400 } hitcount: 1 len: 168 ++ { skbaddr: ffff8800a1c55a00 } hitcount: 1 len: 40 ++ { skbaddr: ffff8800d2bcd100 } hitcount: 1 len: 40 ++ { skbaddr: ffff880064505f00 } hitcount: 1 len: 174 ++ { skbaddr: ffff8800a8bff200 } hitcount: 1 len: 160 ++ { skbaddr: ffff880044e3cc00 } hitcount: 1 len: 76 ++ { skbaddr: ffff8800a8bfe700 } hitcount: 1 len: 46 ++ { skbaddr: ffff8800d2bcdc00 } hitcount: 1 len: 32 ++ { skbaddr: ffff8800a1f64800 } hitcount: 1 len: 46 ++ { skbaddr: ffff8800d2bcde00 } hitcount: 1 len: 988 ++ { skbaddr: ffff88006a5dea00 } hitcount: 1 len: 46 ++ { skbaddr: ffff88002e37a200 } hitcount: 1 len: 44 ++ { skbaddr: ffff8800a1f32c00 } hitcount: 2 len: 676 ++ { skbaddr: ffff88000ad52600 } hitcount: 2 len: 107 ++ { skbaddr: ffff8800a1f91e00 } hitcount: 2 len: 92 ++ { skbaddr: ffff8800af5a0200 } hitcount: 2 len: 142 ++ { skbaddr: ffff8800d2bcc600 } hitcount: 2 len: 220 ++ { skbaddr: ffff8800ba36f500 } hitcount: 2 len: 92 ++ { skbaddr: ffff8800d021f800 } hitcount: 2 len: 92 ++ { skbaddr: ffff8800a1f33600 } hitcount: 2 len: 675 ++ { skbaddr: ffff8800a8bfff00 } hitcount: 3 len: 138 ++ { skbaddr: ffff8800d62a1300 } hitcount: 3 len: 138 ++ { skbaddr: ffff88002e37a100 } hitcount: 4 len: 184 ++ { skbaddr: ffff880064504400 } hitcount: 4 len: 184 ++ { skbaddr: ffff8800a8bfec00 } hitcount: 4 len: 184 ++ { skbaddr: ffff88000ad53700 } hitcount: 5 len: 230 ++ { skbaddr: ffff8800d2bcdb00 } hitcount: 5 len: 196 ++ { skbaddr: ffff8800a1f90000 } hitcount: 6 len: 276 ++ { skbaddr: ffff88006a54f900 } hitcount: 6 len: 276 ++ ++ Totals: ++ Hits: 81 ++ Entries: 42 ++ Dropped: 0 ++ ++ And here's an example that shows how to combine histogram data from ++ any two events even if they don't share any 'compatible' fields ++ other than 'hitcount' and 'stacktrace'. These commands create a ++ couple of triggers named 'bar' using those fields: ++ ++ # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \ ++ /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger ++ # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \ ++ /sys/kernel/debug/tracing/events/net/netif_rx/trigger ++ ++ And displaying the output of either shows some interesting if ++ somewhat confusing output: ++ ++ # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist ++ # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist ++ ++ # event histogram ++ # ++ # trigger info: hist:name=bar:keys=stacktrace:vals=hitcount:sort=hitcount:size=2048 [active] ++ # ++ ++ { stacktrace: ++ _do_fork+0x18e/0x330 ++ kernel_thread+0x29/0x30 ++ kthreadd+0x154/0x1b0 ++ ret_from_fork+0x3f/0x70 ++ } hitcount: 1 ++ { stacktrace: ++ netif_rx_internal+0xb2/0xd0 ++ netif_rx_ni+0x20/0x70 ++ dev_loopback_xmit+0xaa/0xd0 ++ ip_mc_output+0x126/0x240 ++ ip_local_out_sk+0x31/0x40 ++ igmp_send_report+0x1e9/0x230 ++ igmp_timer_expire+0xe9/0x120 ++ call_timer_fn+0x39/0xf0 ++ run_timer_softirq+0x1e1/0x290 ++ __do_softirq+0xfd/0x290 ++ irq_exit+0x98/0xb0 ++ smp_apic_timer_interrupt+0x4a/0x60 ++ apic_timer_interrupt+0x6d/0x80 ++ cpuidle_enter+0x17/0x20 ++ call_cpuidle+0x3b/0x60 ++ cpu_startup_entry+0x22d/0x310 ++ } hitcount: 1 ++ { stacktrace: ++ netif_rx_internal+0xb2/0xd0 ++ netif_rx_ni+0x20/0x70 ++ dev_loopback_xmit+0xaa/0xd0 ++ ip_mc_output+0x17f/0x240 ++ ip_local_out_sk+0x31/0x40 ++ ip_send_skb+0x1a/0x50 ++ udp_send_skb+0x13e/0x270 ++ udp_sendmsg+0x2bf/0x980 ++ inet_sendmsg+0x67/0xa0 ++ sock_sendmsg+0x38/0x50 ++ SYSC_sendto+0xef/0x170 ++ SyS_sendto+0xe/0x10 ++ entry_SYSCALL_64_fastpath+0x12/0x6a ++ } hitcount: 2 ++ { stacktrace: ++ netif_rx_internal+0xb2/0xd0 ++ netif_rx+0x1c/0x60 ++ loopback_xmit+0x6c/0xb0 ++ dev_hard_start_xmit+0x219/0x3a0 ++ __dev_queue_xmit+0x415/0x4f0 ++ dev_queue_xmit_sk+0x13/0x20 ++ ip_finish_output2+0x237/0x340 ++ ip_finish_output+0x113/0x1d0 ++ ip_output+0x66/0xc0 ++ ip_local_out_sk+0x31/0x40 ++ ip_send_skb+0x1a/0x50 ++ udp_send_skb+0x16d/0x270 ++ udp_sendmsg+0x2bf/0x980 ++ inet_sendmsg+0x67/0xa0 ++ sock_sendmsg+0x38/0x50 ++ ___sys_sendmsg+0x14e/0x270 ++ } hitcount: 76 ++ { stacktrace: ++ netif_rx_internal+0xb2/0xd0 ++ netif_rx+0x1c/0x60 ++ loopback_xmit+0x6c/0xb0 ++ dev_hard_start_xmit+0x219/0x3a0 ++ __dev_queue_xmit+0x415/0x4f0 ++ dev_queue_xmit_sk+0x13/0x20 ++ ip_finish_output2+0x237/0x340 ++ ip_finish_output+0x113/0x1d0 ++ ip_output+0x66/0xc0 ++ ip_local_out_sk+0x31/0x40 ++ ip_send_skb+0x1a/0x50 ++ udp_send_skb+0x16d/0x270 ++ udp_sendmsg+0x2bf/0x980 ++ inet_sendmsg+0x67/0xa0 ++ sock_sendmsg+0x38/0x50 ++ ___sys_sendmsg+0x269/0x270 ++ } hitcount: 77 ++ { stacktrace: ++ netif_rx_internal+0xb2/0xd0 ++ netif_rx+0x1c/0x60 ++ loopback_xmit+0x6c/0xb0 ++ dev_hard_start_xmit+0x219/0x3a0 ++ __dev_queue_xmit+0x415/0x4f0 ++ dev_queue_xmit_sk+0x13/0x20 ++ ip_finish_output2+0x237/0x340 ++ ip_finish_output+0x113/0x1d0 ++ ip_output+0x66/0xc0 ++ ip_local_out_sk+0x31/0x40 ++ ip_send_skb+0x1a/0x50 ++ udp_send_skb+0x16d/0x270 ++ udp_sendmsg+0x2bf/0x980 ++ inet_sendmsg+0x67/0xa0 ++ sock_sendmsg+0x38/0x50 ++ SYSC_sendto+0xef/0x170 ++ } hitcount: 88 ++ { stacktrace: ++ _do_fork+0x18e/0x330 ++ SyS_clone+0x19/0x20 ++ entry_SYSCALL_64_fastpath+0x12/0x6a ++ } hitcount: 244 ++ ++ Totals: ++ Hits: 489 ++ Entries: 7 ++ Dropped: 0 diff --git a/debian/patches/features/all/rt/0010-iommu-amd-Return-proper-error-code-in-irq_remapping_.patch b/debian/patches/features/all/rt/0010-iommu-amd-Return-proper-error-code-in-irq_remapping_.patch new file mode 100644 index 000000000..3200a7e34 --- /dev/null +++ b/debian/patches/features/all/rt/0010-iommu-amd-Return-proper-error-code-in-irq_remapping_.patch @@ -0,0 +1,41 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 22 Mar 2018 16:22:42 +0100 +Subject: [PATCH 10/10] iommu/amd: Return proper error code in + irq_remapping_alloc() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 29d049be9438278c47253a74cf8d0ddf36bd5d68 + +In the unlikely case when alloc_irq_table() is not able to return a +remap table then "ret" will be assigned with an error code. Later, the +code checks `index' and if it is negative (which it is because it is +initialized with `-1') and then then function properly aborts but +returns `-1' instead `-ENOMEM' what was intended. +In order to correct this, I assign -ENOMEM to index. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Joerg Roedel +--- + drivers/iommu/amd_iommu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -4110,7 +4110,7 @@ static int irq_remapping_alloc(struct ir + struct amd_ir_data *data = NULL; + struct irq_cfg *cfg; + int i, ret, devid; +- int index = -1; ++ int index; + + if (!info) + return -EINVAL; +@@ -4152,7 +4152,7 @@ static int irq_remapping_alloc(struct ir + WARN_ON(table->min_index != 32); + index = info->ioapic_pin; + } else { +- ret = -ENOMEM; ++ index = -ENOMEM; + } + } else { + bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI); diff --git a/debian/patches/features/all/rt/0010-take-out-orphan-externs-empty_string-slash_string.patch b/debian/patches/features/all/rt/0010-take-out-orphan-externs-empty_string-slash_string.patch new file mode 100644 index 000000000..66c4b552c --- /dev/null +++ b/debian/patches/features/all/rt/0010-take-out-orphan-externs-empty_string-slash_string.patch @@ -0,0 +1,25 @@ +From: Al Viro +Date: Wed, 7 Mar 2018 12:47:04 -0500 +Subject: [PATCH 10/17] take out orphan externs (empty_string/slash_string) +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 903ddaf49329076862d65f7284d825759ff67bd6 + +Signed-off-by: Al Viro +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/dcache.h | 2 -- + 1 file changed, 2 deletions(-) + +--- a/include/linux/dcache.h ++++ b/include/linux/dcache.h +@@ -56,9 +56,7 @@ struct qstr { + + #define QSTR_INIT(n,l) { { { .len = l } }, .name = n } + +-extern const char empty_string[]; + extern const struct qstr empty_name; +-extern const char slash_string[]; + extern const struct qstr slash_name; + + struct dentry_stat_t { diff --git a/debian/patches/features/all/rt/0010-tracing-Add-Documentation-for-log2-modifier.patch b/debian/patches/features/all/rt/0010-tracing-Add-Documentation-for-log2-modifier.patch new file mode 100644 index 000000000..150aac7b7 --- /dev/null +++ b/debian/patches/features/all/rt/0010-tracing-Add-Documentation-for-log2-modifier.patch @@ -0,0 +1,28 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:36 -0600 +Subject: [PATCH 10/48] tracing: Add Documentation for log2 modifier +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Add a line for the log2 modifier, to keep it aligned with +tracing/README. + +Link: http://lkml.kernel.org/r/a419028bccab155749a4b8702d5b97af75f1578f.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit fcb5b95a2bb931f8e72e2dbd2def67382dd99d42) +Signed-off-by: Sebastian Andrzej Siewior +--- + Documentation/trace/histogram.txt | 1 + + 1 file changed, 1 insertion(+) + +--- a/Documentation/trace/histogram.txt ++++ b/Documentation/trace/histogram.txt +@@ -73,6 +73,7 @@ + .sym-offset display an address as a symbol and offset + .syscall display a syscall id as a system call name + .execname display a common_pid as a program name ++ .log2 display log2 value rather than raw number + + Note that in general the semantics of a given field aren't + interpreted when applying a modifier to it, but there are some diff --git a/debian/patches/features/all/rt/0011-fold-lookup_real-into-__lookup_hash.patch b/debian/patches/features/all/rt/0011-fold-lookup_real-into-__lookup_hash.patch new file mode 100644 index 000000000..87e20d5ef --- /dev/null +++ b/debian/patches/features/all/rt/0011-fold-lookup_real-into-__lookup_hash.patch @@ -0,0 +1,76 @@ +From: Al Viro +Date: Thu, 8 Mar 2018 11:00:45 -0500 +Subject: [PATCH 11/17] fold lookup_real() into __lookup_hash() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit a03ece5ff2bd7a9abaa0e8ddfe5f79d79e5984c8 + +Signed-off-by: Al Viro +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/namei.c | 41 +++++++++++++++++------------------------ + 1 file changed, 17 insertions(+), 24 deletions(-) + +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -1475,43 +1475,36 @@ static struct dentry *lookup_dcache(cons + } + + /* +- * Call i_op->lookup on the dentry. The dentry must be negative and +- * unhashed. +- * +- * dir->d_inode->i_mutex must be held ++ * Parent directory has inode locked exclusive. This is one ++ * and only case when ->lookup() gets called on non in-lookup ++ * dentries - as the matter of fact, this only gets called ++ * when directory is guaranteed to have no in-lookup children ++ * at all. + */ +-static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry, +- unsigned int flags) +-{ +- struct dentry *old; +- +- /* Don't create child dentry for a dead directory. */ +- if (unlikely(IS_DEADDIR(dir))) { +- dput(dentry); +- return ERR_PTR(-ENOENT); +- } +- +- old = dir->i_op->lookup(dir, dentry, flags); +- if (unlikely(old)) { +- dput(dentry); +- dentry = old; +- } +- return dentry; +-} +- + static struct dentry *__lookup_hash(const struct qstr *name, + struct dentry *base, unsigned int flags) + { + struct dentry *dentry = lookup_dcache(name, base, flags); ++ struct dentry *old; ++ struct inode *dir = base->d_inode; + + if (dentry) + return dentry; + ++ /* Don't create child dentry for a dead directory. */ ++ if (unlikely(IS_DEADDIR(dir))) ++ return ERR_PTR(-ENOENT); ++ + dentry = d_alloc(base, name); + if (unlikely(!dentry)) + return ERR_PTR(-ENOMEM); + +- return lookup_real(base->d_inode, dentry, flags); ++ old = dir->i_op->lookup(dir, dentry, flags); ++ if (unlikely(old)) { ++ dput(dentry); ++ dentry = old; ++ } ++ return dentry; + } + + static int lookup_fast(struct nameidata *nd, diff --git a/debian/patches/features/all/rt/0011-tracing-Add-support-to-detect-and-avoid-duplicates.patch b/debian/patches/features/all/rt/0011-tracing-Add-support-to-detect-and-avoid-duplicates.patch new file mode 100644 index 000000000..79701425c --- /dev/null +++ b/debian/patches/features/all/rt/0011-tracing-Add-support-to-detect-and-avoid-duplicates.patch @@ -0,0 +1,119 @@ +From: Vedang Patel +Date: Mon, 15 Jan 2018 20:51:37 -0600 +Subject: [PATCH 11/48] tracing: Add support to detect and avoid duplicates +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +A duplicate in the tracing_map hash table is when 2 different entries +have the same key and, as a result, the key_hash. This is possible due +to a race condition in the algorithm. This race condition is inherent to +the algorithm and not a bug. This was fine because, until now, we were +only interested in the sum of all the values related to a particular +key (the duplicates are dealt with in tracing_map_sort_entries()). But, +with the inclusion of variables[1], we are interested in individual +values. So, it will not be clear what value to choose when +there are duplicates. So, the duplicates need to be removed. + +The duplicates can occur in the code in the following scenarios: + +- A thread is in the process of adding a new element. It has +successfully executed cmpxchg() and inserted the key. But, it is still +not done acquiring the trace_map_elt struct, populating it and storing +the pointer to the struct in the value field of tracing_map hash table. +If another thread comes in at this time and wants to add an element with +the same key, it will not see the current element and add a new one. + +- There are multiple threads trying to execute cmpxchg at the same time, +one of the threads will succeed and the others will fail. The ones which +fail will go ahead increment 'idx' and add a new element there creating +a duplicate. + +This patch detects and avoids the first condition by asking the thread +which detects the duplicate to loop one more time. There is also a +possibility of infinite loop if the thread which is trying to insert +goes to sleep indefinitely and the one which is trying to insert a new +element detects a duplicate. Which is why, the thread loops for +map_size iterations before returning NULL. + +The second scenario is avoided by preventing the threads which failed +cmpxchg() from incrementing idx. This way, they will loop +around and check if the thread which succeeded in executing cmpxchg() +had the same key. + +[1] http://lkml.kernel.org/r/cover.1498510759.git.tom.zanussi@linux.intel.com + +Link: http://lkml.kernel.org/r/e178e89ec399240331d383bd5913d649713110f4.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Vedang Patel +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit bd0a7ab135d0d0872296c3ae3c4f816a9a4c3dee) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/tracing_map.c | 41 ++++++++++++++++++++++++++++++++++++----- + 1 file changed, 36 insertions(+), 5 deletions(-) + +--- a/kernel/trace/tracing_map.c ++++ b/kernel/trace/tracing_map.c +@@ -414,7 +414,9 @@ static inline struct tracing_map_elt * + __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only) + { + u32 idx, key_hash, test_key; ++ int dup_try = 0; + struct tracing_map_entry *entry; ++ struct tracing_map_elt *val; + + key_hash = jhash(key, map->key_size, 0); + if (key_hash == 0) +@@ -426,11 +428,33 @@ static inline struct tracing_map_elt * + entry = TRACING_MAP_ENTRY(map->map, idx); + test_key = entry->key; + +- if (test_key && test_key == key_hash && entry->val && +- keys_match(key, entry->val->key, map->key_size)) { +- if (!lookup_only) +- atomic64_inc(&map->hits); +- return entry->val; ++ if (test_key && test_key == key_hash) { ++ val = READ_ONCE(entry->val); ++ if (val && ++ keys_match(key, val->key, map->key_size)) { ++ if (!lookup_only) ++ atomic64_inc(&map->hits); ++ return val; ++ } else if (unlikely(!val)) { ++ /* ++ * The key is present. But, val (pointer to elt ++ * struct) is still NULL. which means some other ++ * thread is in the process of inserting an ++ * element. ++ * ++ * On top of that, it's key_hash is same as the ++ * one being inserted right now. So, it's ++ * possible that the element has the same ++ * key as well. ++ */ ++ ++ dup_try++; ++ if (dup_try > map->map_size) { ++ atomic64_inc(&map->drops); ++ break; ++ } ++ continue; ++ } + } + + if (!test_key) { +@@ -452,6 +476,13 @@ static inline struct tracing_map_elt * + atomic64_inc(&map->hits); + + return entry->val; ++ } else { ++ /* ++ * cmpxchg() failed. Loop around once ++ * more to check what key was inserted. ++ */ ++ dup_try++; ++ continue; + } + } + diff --git a/debian/patches/features/all/rt/0012-debugfs_lookup-switch-to-lookup_one_len_unlocked.patch b/debian/patches/features/all/rt/0012-debugfs_lookup-switch-to-lookup_one_len_unlocked.patch new file mode 100644 index 000000000..917a30a05 --- /dev/null +++ b/debian/patches/features/all/rt/0012-debugfs_lookup-switch-to-lookup_one_len_unlocked.patch @@ -0,0 +1,27 @@ +From: Al Viro +Date: Thu, 8 Mar 2018 11:01:22 -0500 +Subject: [PATCH 12/17] debugfs_lookup(): switch to lookup_one_len_unlocked() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit cd1c0c9321999737073dcfc3364e194e02604bce + +Signed-off-by: Al Viro +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/debugfs/inode.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +--- a/fs/debugfs/inode.c ++++ b/fs/debugfs/inode.c +@@ -270,10 +270,7 @@ struct dentry *debugfs_lookup(const char + if (!parent) + parent = debugfs_mount->mnt_root; + +- inode_lock(d_inode(parent)); +- dentry = lookup_one_len(name, parent, strlen(name)); +- inode_unlock(d_inode(parent)); +- ++ dentry = lookup_one_len_unlocked(name, parent, strlen(name)); + if (IS_ERR(dentry)) + return NULL; + if (!d_really_is_positive(dentry)) { diff --git a/debian/patches/features/all/rt/0012-tracing-Remove-code-which-merges-duplicates.patch b/debian/patches/features/all/rt/0012-tracing-Remove-code-which-merges-duplicates.patch new file mode 100644 index 000000000..cc4f0d22b --- /dev/null +++ b/debian/patches/features/all/rt/0012-tracing-Remove-code-which-merges-duplicates.patch @@ -0,0 +1,193 @@ +From: Vedang Patel +Date: Mon, 15 Jan 2018 20:51:38 -0600 +Subject: [PATCH 12/48] tracing: Remove code which merges duplicates +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +We now have the logic to detect and remove duplicates in the +tracing_map hash table. The code which merges duplicates in the +histogram is redundant now. So, modify this code just to detect +duplicates. The duplication detection code is still kept to ensure +that any rare race condition which might cause duplicates does not go +unnoticed. + +Link: http://lkml.kernel.org/r/55215cf59e2674391bdaf772fdafc4c393352b03.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Vedang Patel +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 3f7f4cc21fc62ff7da7d34b5ca95a69d73a1f764) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 11 ----- + kernel/trace/tracing_map.c | 83 ++------------------------------------- + kernel/trace/tracing_map.h | 7 --- + 3 files changed, 6 insertions(+), 95 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -340,16 +340,6 @@ static int hist_trigger_elt_comm_alloc(s + return 0; + } + +-static void hist_trigger_elt_comm_copy(struct tracing_map_elt *to, +- struct tracing_map_elt *from) +-{ +- char *comm_from = from->private_data; +- char *comm_to = to->private_data; +- +- if (comm_from) +- memcpy(comm_to, comm_from, TASK_COMM_LEN + 1); +-} +- + static void hist_trigger_elt_comm_init(struct tracing_map_elt *elt) + { + char *comm = elt->private_data; +@@ -360,7 +350,6 @@ static void hist_trigger_elt_comm_init(s + + static const struct tracing_map_ops hist_trigger_elt_comm_ops = { + .elt_alloc = hist_trigger_elt_comm_alloc, +- .elt_copy = hist_trigger_elt_comm_copy, + .elt_free = hist_trigger_elt_comm_free, + .elt_init = hist_trigger_elt_comm_init, + }; +--- a/kernel/trace/tracing_map.c ++++ b/kernel/trace/tracing_map.c +@@ -847,67 +847,15 @@ create_sort_entry(void *key, struct trac + return sort_entry; + } + +-static struct tracing_map_elt *copy_elt(struct tracing_map_elt *elt) +-{ +- struct tracing_map_elt *dup_elt; +- unsigned int i; +- +- dup_elt = tracing_map_elt_alloc(elt->map); +- if (IS_ERR(dup_elt)) +- return NULL; +- +- if (elt->map->ops && elt->map->ops->elt_copy) +- elt->map->ops->elt_copy(dup_elt, elt); +- +- dup_elt->private_data = elt->private_data; +- memcpy(dup_elt->key, elt->key, elt->map->key_size); +- +- for (i = 0; i < elt->map->n_fields; i++) { +- atomic64_set(&dup_elt->fields[i].sum, +- atomic64_read(&elt->fields[i].sum)); +- dup_elt->fields[i].cmp_fn = elt->fields[i].cmp_fn; +- } +- +- return dup_elt; +-} +- +-static int merge_dup(struct tracing_map_sort_entry **sort_entries, +- unsigned int target, unsigned int dup) +-{ +- struct tracing_map_elt *target_elt, *elt; +- bool first_dup = (target - dup) == 1; +- int i; +- +- if (first_dup) { +- elt = sort_entries[target]->elt; +- target_elt = copy_elt(elt); +- if (!target_elt) +- return -ENOMEM; +- sort_entries[target]->elt = target_elt; +- sort_entries[target]->elt_copied = true; +- } else +- target_elt = sort_entries[target]->elt; +- +- elt = sort_entries[dup]->elt; +- +- for (i = 0; i < elt->map->n_fields; i++) +- atomic64_add(atomic64_read(&elt->fields[i].sum), +- &target_elt->fields[i].sum); +- +- sort_entries[dup]->dup = true; +- +- return 0; +-} +- +-static int merge_dups(struct tracing_map_sort_entry **sort_entries, ++static void detect_dups(struct tracing_map_sort_entry **sort_entries, + int n_entries, unsigned int key_size) + { + unsigned int dups = 0, total_dups = 0; +- int err, i, j; ++ int i; + void *key; + + if (n_entries < 2) +- return total_dups; ++ return; + + sort(sort_entries, n_entries, sizeof(struct tracing_map_sort_entry *), + (int (*)(const void *, const void *))cmp_entries_dup, NULL); +@@ -916,30 +864,14 @@ static int merge_dups(struct tracing_map + for (i = 1; i < n_entries; i++) { + if (!memcmp(sort_entries[i]->key, key, key_size)) { + dups++; total_dups++; +- err = merge_dup(sort_entries, i - dups, i); +- if (err) +- return err; + continue; + } + key = sort_entries[i]->key; + dups = 0; + } + +- if (!total_dups) +- return total_dups; +- +- for (i = 0, j = 0; i < n_entries; i++) { +- if (!sort_entries[i]->dup) { +- sort_entries[j] = sort_entries[i]; +- if (j++ != i) +- sort_entries[i] = NULL; +- } else { +- destroy_sort_entry(sort_entries[i]); +- sort_entries[i] = NULL; +- } +- } +- +- return total_dups; ++ WARN_ONCE(total_dups > 0, ++ "Duplicates detected: %d\n", total_dups); + } + + static bool is_key(struct tracing_map *map, unsigned int field_idx) +@@ -1065,10 +997,7 @@ int tracing_map_sort_entries(struct trac + return 1; + } + +- ret = merge_dups(entries, n_entries, map->key_size); +- if (ret < 0) +- goto free; +- n_entries -= ret; ++ detect_dups(entries, n_entries, map->key_size); + + if (is_key(map, sort_keys[0].field_idx)) + cmp_entries_fn = cmp_entries_key; +--- a/kernel/trace/tracing_map.h ++++ b/kernel/trace/tracing_map.h +@@ -215,11 +215,6 @@ struct tracing_map { + * Element allocation occurs before tracing begins, when the + * tracing_map_init() call is made by client code. + * +- * @elt_copy: At certain points in the lifetime of an element, it may +- * need to be copied. The copy should include a copy of the +- * client-allocated data, which can be copied into the 'to' +- * element from the 'from' element. +- * + * @elt_free: When a tracing_map_elt is freed, this function is called + * and allows client-allocated per-element data to be freed. + * +@@ -233,8 +228,6 @@ struct tracing_map { + */ + struct tracing_map_ops { + int (*elt_alloc)(struct tracing_map_elt *elt); +- void (*elt_copy)(struct tracing_map_elt *to, +- struct tracing_map_elt *from); + void (*elt_free)(struct tracing_map_elt *elt); + void (*elt_clear)(struct tracing_map_elt *elt); + void (*elt_init)(struct tracing_map_elt *elt); diff --git a/debian/patches/features/all/rt/0013-lustre-get-rid-of-pointless-casts-to-struct-dentry.patch b/debian/patches/features/all/rt/0013-lustre-get-rid-of-pointless-casts-to-struct-dentry.patch new file mode 100644 index 000000000..73f66a86c --- /dev/null +++ b/debian/patches/features/all/rt/0013-lustre-get-rid-of-pointless-casts-to-struct-dentry.patch @@ -0,0 +1,45 @@ +From: Al Viro +Date: Fri, 9 Mar 2018 18:06:03 -0500 +Subject: [PATCH 13/17] lustre: get rid of pointless casts to struct dentry * +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 5bf1ddf7ee0e23598a620ef9ea2b0f00e804859d + +... when feeding const struct dentry * to primitives taking +exactly that. + +Signed-off-by: Al Viro +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/staging/lustre/lustre/llite/dcache.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/staging/lustre/lustre/llite/dcache.c ++++ b/drivers/staging/lustre/lustre/llite/dcache.c +@@ -90,7 +90,7 @@ static int ll_dcompare(const struct dent + d_count(dentry)); + + /* mountpoint is always valid */ +- if (d_mountpoint((struct dentry *)dentry)) ++ if (d_mountpoint(dentry)) + return 0; + + if (d_lustre_invalid(dentry)) +@@ -111,7 +111,7 @@ static int ll_ddelete(const struct dentr + LASSERT(de); + + CDEBUG(D_DENTRY, "%s dentry %pd (%p, parent %p, inode %p) %s%s\n", +- d_lustre_invalid((struct dentry *)de) ? "deleting" : "keeping", ++ d_lustre_invalid(de) ? "deleting" : "keeping", + de, de, de->d_parent, d_inode(de), + d_unhashed(de) ? "" : "hashed,", + list_empty(&de->d_subdirs) ? "" : "subdirs"); +@@ -119,7 +119,7 @@ static int ll_ddelete(const struct dentr + /* kernel >= 2.6.38 last refcount is decreased after this function. */ + LASSERT(d_count(de) == 1); + +- if (d_lustre_invalid((struct dentry *)de)) ++ if (d_lustre_invalid(de)) + return 1; + return 0; + } diff --git a/debian/patches/features/all/rt/0013-ring-buffer-Add-interface-for-setting-absolute-time-.patch b/debian/patches/features/all/rt/0013-ring-buffer-Add-interface-for-setting-absolute-time-.patch new file mode 100644 index 000000000..391834c7b --- /dev/null +++ b/debian/patches/features/all/rt/0013-ring-buffer-Add-interface-for-setting-absolute-time-.patch @@ -0,0 +1,133 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:39 -0600 +Subject: [PATCH 13/48] ring-buffer: Add interface for setting absolute time + stamps +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Define a new function, tracing_set_time_stamp_abs(), which can be used +to enable or disable the use of absolute timestamps rather than time +deltas for a trace array. + +Only the interface is added here; a subsequent patch will add the +underlying implementation. + +Link: http://lkml.kernel.org/r/ce96119de44c7fe0ee44786d15254e9b493040d3.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Baohong Liu +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 22753475c5232cd6f024746d6a6696a4dd2683ab) +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/ring_buffer.h | 2 ++ + kernel/trace/ring_buffer.c | 11 +++++++++++ + kernel/trace/trace.c | 33 ++++++++++++++++++++++++++++++++- + kernel/trace/trace.h | 3 +++ + 4 files changed, 48 insertions(+), 1 deletion(-) + +--- a/include/linux/ring_buffer.h ++++ b/include/linux/ring_buffer.h +@@ -178,6 +178,8 @@ void ring_buffer_normalize_time_stamp(st + int cpu, u64 *ts); + void ring_buffer_set_clock(struct ring_buffer *buffer, + u64 (*clock)(void)); ++void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs); ++bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer); + + size_t ring_buffer_page_len(void *page); + +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -488,6 +488,7 @@ struct ring_buffer { + u64 (*clock)(void); + + struct rb_irq_work irq_work; ++ bool time_stamp_abs; + }; + + struct ring_buffer_iter { +@@ -1387,6 +1388,16 @@ void ring_buffer_set_clock(struct ring_b + buffer->clock = clock; + } + ++void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs) ++{ ++ buffer->time_stamp_abs = abs; ++} ++ ++bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer) ++{ ++ return buffer->time_stamp_abs; ++} ++ + static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); + + static inline unsigned long rb_page_entries(struct buffer_page *bpage) +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -2269,7 +2269,7 @@ trace_event_buffer_lock_reserve(struct r + + *current_rb = trace_file->tr->trace_buffer.buffer; + +- if ((trace_file->flags & ++ if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags & + (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) && + (entry = this_cpu_read(trace_buffered_event))) { + /* Try to use the per cpu buffer first */ +@@ -6281,6 +6281,37 @@ static int tracing_clock_open(struct ino + + return ret; + } ++ ++int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs) ++{ ++ int ret = 0; ++ ++ mutex_lock(&trace_types_lock); ++ ++ if (abs && tr->time_stamp_abs_ref++) ++ goto out; ++ ++ if (!abs) { ++ if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (--tr->time_stamp_abs_ref) ++ goto out; ++ } ++ ++ ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs); ++ ++#ifdef CONFIG_TRACER_MAX_TRACE ++ if (tr->max_buffer.buffer) ++ ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs); ++#endif ++ out: ++ mutex_unlock(&trace_types_lock); ++ ++ return ret; ++} + + struct ftrace_buffer_info { + struct trace_iterator iter; +--- a/kernel/trace/trace.h ++++ b/kernel/trace/trace.h +@@ -273,6 +273,7 @@ struct trace_array { + /* function tracing enabled */ + int function_enabled; + #endif ++ int time_stamp_abs_ref; + }; + + enum { +@@ -286,6 +287,8 @@ extern struct mutex trace_types_lock; + extern int trace_array_get(struct trace_array *tr); + extern void trace_array_put(struct trace_array *tr); + ++extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs); ++ + /* + * The global tracer (top) should be the first trace array added, + * but we check the flag anyway. diff --git a/debian/patches/features/all/rt/0014-oprofilefs-don-t-oops-on-allocation-failure.patch b/debian/patches/features/all/rt/0014-oprofilefs-don-t-oops-on-allocation-failure.patch new file mode 100644 index 000000000..8678c67da --- /dev/null +++ b/debian/patches/features/all/rt/0014-oprofilefs-don-t-oops-on-allocation-failure.patch @@ -0,0 +1,27 @@ +From: Al Viro +Date: Sat, 10 Mar 2018 16:40:33 -0500 +Subject: [PATCH 14/17] oprofilefs: don't oops on allocation failure +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit a7498968338da9b928f5d8054acc8be6ed2bc14c + +... just short-circuit the creation of potential children + +Signed-off-by: Al Viro +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/oprofile/oprofilefs.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/oprofile/oprofilefs.c ++++ b/drivers/oprofile/oprofilefs.c +@@ -138,6 +138,9 @@ static int __oprofilefs_create_file(stru + struct dentry *dentry; + struct inode *inode; + ++ if (!root) ++ return -ENOMEM; ++ + inode_lock(d_inode(root)); + dentry = d_alloc_name(root, name); + if (!dentry) { diff --git a/debian/patches/features/all/rt/0014-ring-buffer-Redefine-the-unimplemented-RINGBUF_TYPE_.patch b/debian/patches/features/all/rt/0014-ring-buffer-Redefine-the-unimplemented-RINGBUF_TYPE_.patch new file mode 100644 index 000000000..4af3636b0 --- /dev/null +++ b/debian/patches/features/all/rt/0014-ring-buffer-Redefine-the-unimplemented-RINGBUF_TYPE_.patch @@ -0,0 +1,323 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:40 -0600 +Subject: [PATCH 14/48] ring-buffer: Redefine the unimplemented + RINGBUF_TYPE_TIME_STAMP +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +RINGBUF_TYPE_TIME_STAMP is defined but not used, and from what I can +gather was reserved for something like an absolute timestamp feature +for the ring buffer, if not a complete replacement of the current +time_delta scheme. + +This code redefines RINGBUF_TYPE_TIME_STAMP to implement absolute time +stamps. Another way to look at it is that it essentially forces +extended time_deltas for all events. + +The motivation for doing this is to enable time_deltas that aren't +dependent on previous events in the ring buffer, making it feasible to +use the ring_buffer_event timetamps in a more random-access way, for +purposes other than serial event printing. + +To set/reset this mode, use tracing_set_timestamp_abs() from the +previous interface patch. + +Link: http://lkml.kernel.org/r/477b362dba1ce7fab9889a1a8e885a62c472f041.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 58c0bd803060b0c0c9de8751382a7af5f507d74d) +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/ring_buffer.h | 12 ++--- + kernel/trace/ring_buffer.c | 104 ++++++++++++++++++++++++++++++++------------ + 2 files changed, 83 insertions(+), 33 deletions(-) + +--- a/include/linux/ring_buffer.h ++++ b/include/linux/ring_buffer.h +@@ -34,10 +34,12 @@ struct ring_buffer_event { + * array[0] = time delta (28 .. 59) + * size = 8 bytes + * +- * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock +- * array[0] = tv_nsec +- * array[1..2] = tv_sec +- * size = 16 bytes ++ * @RINGBUF_TYPE_TIME_STAMP: Absolute timestamp ++ * Same format as TIME_EXTEND except that the ++ * value is an absolute timestamp, not a delta ++ * event.time_delta contains bottom 27 bits ++ * array[0] = top (28 .. 59) bits ++ * size = 8 bytes + * + * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX: + * Data record +@@ -54,12 +56,12 @@ enum ring_buffer_type { + RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28, + RINGBUF_TYPE_PADDING, + RINGBUF_TYPE_TIME_EXTEND, +- /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */ + RINGBUF_TYPE_TIME_STAMP, + }; + + unsigned ring_buffer_event_length(struct ring_buffer_event *event); + void *ring_buffer_event_data(struct ring_buffer_event *event); ++u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event); + + /* + * ring_buffer_discard_commit will remove an event that has not +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -41,6 +41,8 @@ int ring_buffer_print_entry_header(struc + RINGBUF_TYPE_PADDING); + trace_seq_printf(s, "\ttime_extend : type == %d\n", + RINGBUF_TYPE_TIME_EXTEND); ++ trace_seq_printf(s, "\ttime_stamp : type == %d\n", ++ RINGBUF_TYPE_TIME_STAMP); + trace_seq_printf(s, "\tdata max type_len == %d\n", + RINGBUF_TYPE_DATA_TYPE_LEN_MAX); + +@@ -140,12 +142,15 @@ int ring_buffer_print_entry_header(struc + + enum { + RB_LEN_TIME_EXTEND = 8, +- RB_LEN_TIME_STAMP = 16, ++ RB_LEN_TIME_STAMP = 8, + }; + + #define skip_time_extend(event) \ + ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND)) + ++#define extended_time(event) \ ++ (event->type_len >= RINGBUF_TYPE_TIME_EXTEND) ++ + static inline int rb_null_event(struct ring_buffer_event *event) + { + return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; +@@ -209,7 +214,7 @@ rb_event_ts_length(struct ring_buffer_ev + { + unsigned len = 0; + +- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { ++ if (extended_time(event)) { + /* time extends include the data event after it */ + len = RB_LEN_TIME_EXTEND; + event = skip_time_extend(event); +@@ -231,7 +236,7 @@ unsigned ring_buffer_event_length(struct + { + unsigned length; + +- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) ++ if (extended_time(event)) + event = skip_time_extend(event); + + length = rb_event_length(event); +@@ -248,7 +253,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_leng + static __always_inline void * + rb_event_data(struct ring_buffer_event *event) + { +- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) ++ if (extended_time(event)) + event = skip_time_extend(event); + BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); + /* If length is in len field, then array[0] has the data */ +@@ -275,6 +280,27 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data + #define TS_MASK ((1ULL << TS_SHIFT) - 1) + #define TS_DELTA_TEST (~TS_MASK) + ++/** ++ * ring_buffer_event_time_stamp - return the event's extended timestamp ++ * @event: the event to get the timestamp of ++ * ++ * Returns the extended timestamp associated with a data event. ++ * An extended time_stamp is a 64-bit timestamp represented ++ * internally in a special way that makes the best use of space ++ * contained within a ring buffer event. This function decodes ++ * it and maps it to a straight u64 value. ++ */ ++u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event) ++{ ++ u64 ts; ++ ++ ts = event->array[0]; ++ ts <<= TS_SHIFT; ++ ts += event->time_delta; ++ ++ return ts; ++} ++ + /* Flag when events were overwritten */ + #define RB_MISSED_EVENTS (1 << 31) + /* Missed count stored at end */ +@@ -2222,12 +2248,15 @@ rb_move_tail(struct ring_buffer_per_cpu + + /* Slow path, do not inline */ + static noinline struct ring_buffer_event * +-rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) ++rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs) + { +- event->type_len = RINGBUF_TYPE_TIME_EXTEND; ++ if (abs) ++ event->type_len = RINGBUF_TYPE_TIME_STAMP; ++ else ++ event->type_len = RINGBUF_TYPE_TIME_EXTEND; + +- /* Not the first event on the page? */ +- if (rb_event_index(event)) { ++ /* Not the first event on the page, or not delta? */ ++ if (abs || rb_event_index(event)) { + event->time_delta = delta & TS_MASK; + event->array[0] = delta >> TS_SHIFT; + } else { +@@ -2270,7 +2299,9 @@ rb_update_event(struct ring_buffer_per_c + * add it to the start of the resevered space. + */ + if (unlikely(info->add_timestamp)) { +- event = rb_add_time_stamp(event, delta); ++ bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer); ++ ++ event = rb_add_time_stamp(event, info->delta, abs); + length -= RB_LEN_TIME_EXTEND; + delta = 0; + } +@@ -2458,7 +2489,7 @@ static __always_inline void rb_end_commi + + static inline void rb_event_discard(struct ring_buffer_event *event) + { +- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) ++ if (extended_time(event)) + event = skip_time_extend(event); + + /* array[0] holds the actual length for the discarded event */ +@@ -2502,10 +2533,11 @@ rb_update_write_stamp(struct ring_buffer + cpu_buffer->write_stamp = + cpu_buffer->commit_page->page->time_stamp; + else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { +- delta = event->array[0]; +- delta <<= TS_SHIFT; +- delta += event->time_delta; ++ delta = ring_buffer_event_time_stamp(event); + cpu_buffer->write_stamp += delta; ++ } else if (event->type_len == RINGBUF_TYPE_TIME_STAMP) { ++ delta = ring_buffer_event_time_stamp(event); ++ cpu_buffer->write_stamp = delta; + } else + cpu_buffer->write_stamp += event->time_delta; + } +@@ -2685,7 +2717,7 @@ static struct ring_buffer_event * + * If this is the first commit on the page, then it has the same + * timestamp as the page itself. + */ +- if (!tail) ++ if (!tail && !ring_buffer_time_stamp_abs(cpu_buffer->buffer)) + info->delta = 0; + + /* See if we shot pass the end of this buffer page */ +@@ -2762,8 +2794,11 @@ rb_reserve_next_event(struct ring_buffer + /* make sure this diff is calculated here */ + barrier(); + +- /* Did the write stamp get updated already? */ +- if (likely(info.ts >= cpu_buffer->write_stamp)) { ++ if (ring_buffer_time_stamp_abs(buffer)) { ++ info.delta = info.ts; ++ rb_handle_timestamp(cpu_buffer, &info); ++ } else /* Did the write stamp get updated already? */ ++ if (likely(info.ts >= cpu_buffer->write_stamp)) { + info.delta = diff; + if (unlikely(test_time_stamp(info.delta))) + rb_handle_timestamp(cpu_buffer, &info); +@@ -3445,14 +3480,13 @@ rb_update_read_stamp(struct ring_buffer_ + return; + + case RINGBUF_TYPE_TIME_EXTEND: +- delta = event->array[0]; +- delta <<= TS_SHIFT; +- delta += event->time_delta; ++ delta = ring_buffer_event_time_stamp(event); + cpu_buffer->read_stamp += delta; + return; + + case RINGBUF_TYPE_TIME_STAMP: +- /* FIXME: not implemented */ ++ delta = ring_buffer_event_time_stamp(event); ++ cpu_buffer->read_stamp = delta; + return; + + case RINGBUF_TYPE_DATA: +@@ -3476,14 +3510,13 @@ rb_update_iter_read_stamp(struct ring_bu + return; + + case RINGBUF_TYPE_TIME_EXTEND: +- delta = event->array[0]; +- delta <<= TS_SHIFT; +- delta += event->time_delta; ++ delta = ring_buffer_event_time_stamp(event); + iter->read_stamp += delta; + return; + + case RINGBUF_TYPE_TIME_STAMP: +- /* FIXME: not implemented */ ++ delta = ring_buffer_event_time_stamp(event); ++ iter->read_stamp = delta; + return; + + case RINGBUF_TYPE_DATA: +@@ -3707,6 +3740,8 @@ rb_buffer_peek(struct ring_buffer_per_cp + struct buffer_page *reader; + int nr_loops = 0; + ++ if (ts) ++ *ts = 0; + again: + /* + * We repeat when a time extend is encountered. +@@ -3743,12 +3778,17 @@ rb_buffer_peek(struct ring_buffer_per_cp + goto again; + + case RINGBUF_TYPE_TIME_STAMP: +- /* FIXME: not implemented */ ++ if (ts) { ++ *ts = ring_buffer_event_time_stamp(event); ++ ring_buffer_normalize_time_stamp(cpu_buffer->buffer, ++ cpu_buffer->cpu, ts); ++ } ++ /* Internal data, OK to advance */ + rb_advance_reader(cpu_buffer); + goto again; + + case RINGBUF_TYPE_DATA: +- if (ts) { ++ if (ts && !(*ts)) { + *ts = cpu_buffer->read_stamp + event->time_delta; + ring_buffer_normalize_time_stamp(cpu_buffer->buffer, + cpu_buffer->cpu, ts); +@@ -3773,6 +3813,9 @@ rb_iter_peek(struct ring_buffer_iter *it + struct ring_buffer_event *event; + int nr_loops = 0; + ++ if (ts) ++ *ts = 0; ++ + cpu_buffer = iter->cpu_buffer; + buffer = cpu_buffer->buffer; + +@@ -3825,12 +3868,17 @@ rb_iter_peek(struct ring_buffer_iter *it + goto again; + + case RINGBUF_TYPE_TIME_STAMP: +- /* FIXME: not implemented */ ++ if (ts) { ++ *ts = ring_buffer_event_time_stamp(event); ++ ring_buffer_normalize_time_stamp(cpu_buffer->buffer, ++ cpu_buffer->cpu, ts); ++ } ++ /* Internal data, OK to advance */ + rb_advance_iter(iter); + goto again; + + case RINGBUF_TYPE_DATA: +- if (ts) { ++ if (ts && !(*ts)) { + *ts = iter->read_stamp + event->time_delta; + ring_buffer_normalize_time_stamp(buffer, + cpu_buffer->cpu, ts); diff --git a/debian/patches/features/all/rt/0015-make-non-exchanging-__d_move-copy-d_parent-rather-th.patch b/debian/patches/features/all/rt/0015-make-non-exchanging-__d_move-copy-d_parent-rather-th.patch new file mode 100644 index 000000000..4f9935510 --- /dev/null +++ b/debian/patches/features/all/rt/0015-make-non-exchanging-__d_move-copy-d_parent-rather-th.patch @@ -0,0 +1,212 @@ +From: Al Viro +Date: Sat, 10 Mar 2018 23:15:52 -0500 +Subject: [PATCH 15/17] make non-exchanging __d_move() copy ->d_parent rather + than swap them +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 076515fc926793e162fc6525bed1679ef2bbf269 + +Currently d_move(from, to) does the following: + * name/parent of from <- old name/parent of to, from hashed there + * to is unhashed + * name of to is preserved + * if from used to be detached, to gets detached + * if from used to be attached, parent of to <- old parent of from. + +That's both user-visibly bogus and complicates reasoning a lot. +Much saner semantics would be + * name/parent of from <- name/parent of to, from hashed there. + * to is unhashed + * name/parent of to is unchanged. + +The price, of course, is that old parent of from might lose a reference. +However, + * all potentially cross-directory callers of d_move() have both +parents pinned directly; typically, dentries themselves are grabbed +only after we have grabbed and locked both parents. IOW, the decrement +of old parent's refcount in case of d_move() won't reach zero. + * __d_move() from d_splice_alias() is done to detached alias. +No refcount decrements in that case + * __d_move() from __d_unalias() *can* get the refcount to zero. +So let's grab a reference to alias' old parent before calling __d_unalias() +and dput() it after we'd dropped rename_lock. + +That does make d_splice_alias() potentially blocking. However, it has +no callers in non-sleepable contexts (and the case where we'd grown +that dget/dput pair is _very_ rare, so performance is not an issue). + +Another thing that needs adjustment is unlocking in the end of __d_move(); +folded it in. And cleaned the remnants of bogus ordering from the +"lock them in the beginning" counterpart - it's never been right and +now (well, for 7 years now) we have that thing always serialized on +rename_lock anyway. + +Signed-off-by: Al Viro +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/dcache.c | 93 +++++++++++++++++++----------------------------------------- + 1 file changed, 30 insertions(+), 63 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -67,9 +67,7 @@ + * dentry->d_lock + * + * If no ancestor relationship: +- * if (dentry1 < dentry2) +- * dentry1->d_lock +- * dentry2->d_lock ++ * arbitrary, since it's serialized on rename_lock + */ + int sysctl_vfs_cache_pressure __read_mostly = 100; + EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); +@@ -2777,9 +2775,6 @@ static void copy_name(struct dentry *den + + static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) + { +- /* +- * XXXX: do we really need to take target->d_lock? +- */ + if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent) + spin_lock(&target->d_parent->d_lock); + else { +@@ -2793,40 +2788,11 @@ static void dentry_lock_for_move(struct + DENTRY_D_LOCK_NESTED); + } + } +- if (target < dentry) { +- spin_lock_nested(&target->d_lock, 2); +- spin_lock_nested(&dentry->d_lock, 3); +- } else { +- spin_lock_nested(&dentry->d_lock, 2); +- spin_lock_nested(&target->d_lock, 3); +- } +-} +- +-static void dentry_unlock_for_move(struct dentry *dentry, struct dentry *target) +-{ +- if (target->d_parent != dentry->d_parent) +- spin_unlock(&dentry->d_parent->d_lock); +- if (target->d_parent != target) +- spin_unlock(&target->d_parent->d_lock); +- spin_unlock(&target->d_lock); +- spin_unlock(&dentry->d_lock); ++ spin_lock_nested(&dentry->d_lock, 2); ++ spin_lock_nested(&target->d_lock, 3); + } + + /* +- * When switching names, the actual string doesn't strictly have to +- * be preserved in the target - because we're dropping the target +- * anyway. As such, we can just do a simple memcpy() to copy over +- * the new name before we switch, unless we are going to rehash +- * it. Note that if we *do* unhash the target, we are not allowed +- * to rehash it without giving it a new name/hash key - whether +- * we swap or overwrite the names here, resulting name won't match +- * the reality in filesystem; it's only there for d_path() purposes. +- * Note that all of this is happening under rename_lock, so the +- * any hash lookup seeing it in the middle of manipulations will +- * be discarded anyway. So we do not care what happens to the hash +- * key in that case. +- */ +-/* + * __d_move - move a dentry + * @dentry: entry to move + * @target: new dentry +@@ -2840,6 +2806,7 @@ static void dentry_unlock_for_move(struc + static void __d_move(struct dentry *dentry, struct dentry *target, + bool exchange) + { ++ struct dentry *old_parent; + struct inode *dir = NULL; + unsigned n; + if (!dentry->d_inode) +@@ -2858,49 +2825,47 @@ static void __d_move(struct dentry *dent + write_seqcount_begin(&dentry->d_seq); + write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); + ++ old_parent = dentry->d_parent; ++ + /* unhash both */ + if (!d_unhashed(dentry)) + ___d_drop(dentry); + if (!d_unhashed(target)) + ___d_drop(target); + +- /* Switch the names.. */ +- if (exchange) +- swap_names(dentry, target); +- else ++ /* ... and switch them in the tree */ ++ dentry->d_parent = target->d_parent; ++ if (!exchange) { + copy_name(dentry, target); +- +- /* rehash in new place(s) */ +- __d_rehash(dentry); +- if (exchange) +- __d_rehash(target); +- else + target->d_hash.pprev = NULL; +- +- /* ... and switch them in the tree */ +- if (IS_ROOT(dentry)) { +- /* splicing a tree */ +- dentry->d_flags |= DCACHE_RCUACCESS; +- dentry->d_parent = target->d_parent; +- target->d_parent = target; +- list_del_init(&target->d_child); +- list_move(&dentry->d_child, &dentry->d_parent->d_subdirs); ++ dentry->d_parent->d_lockref.count++; ++ if (dentry == old_parent) ++ dentry->d_flags |= DCACHE_RCUACCESS; ++ else ++ WARN_ON(!--old_parent->d_lockref.count); + } else { +- /* swapping two dentries */ +- swap(dentry->d_parent, target->d_parent); ++ target->d_parent = old_parent; ++ swap_names(dentry, target); + list_move(&target->d_child, &target->d_parent->d_subdirs); +- list_move(&dentry->d_child, &dentry->d_parent->d_subdirs); +- if (exchange) +- fsnotify_update_flags(target); +- fsnotify_update_flags(dentry); ++ __d_rehash(target); ++ fsnotify_update_flags(target); + } ++ list_move(&dentry->d_child, &dentry->d_parent->d_subdirs); ++ __d_rehash(dentry); ++ fsnotify_update_flags(dentry); + + write_seqcount_end(&target->d_seq); + write_seqcount_end(&dentry->d_seq); + + if (dir) + end_dir_add(dir, n); +- dentry_unlock_for_move(dentry, target); ++ ++ if (dentry->d_parent != old_parent) ++ spin_unlock(&dentry->d_parent->d_lock); ++ if (dentry != old_parent) ++ spin_unlock(&old_parent->d_lock); ++ spin_unlock(&target->d_lock); ++ spin_unlock(&dentry->d_lock); + } + + /* +@@ -3048,12 +3013,14 @@ struct dentry *d_splice_alias(struct ino + inode->i_sb->s_type->name, + inode->i_sb->s_id); + } else if (!IS_ROOT(new)) { ++ struct dentry *old_parent = dget(new->d_parent); + int err = __d_unalias(inode, dentry, new); + write_sequnlock(&rename_lock); + if (err) { + dput(new); + new = ERR_PTR(err); + } ++ dput(old_parent); + } else { + __d_move(new, dentry, false); + write_sequnlock(&rename_lock); diff --git a/debian/patches/features/all/rt/0015-tracing-Add-timestamp_mode-trace-file.patch b/debian/patches/features/all/rt/0015-tracing-Add-timestamp_mode-trace-file.patch new file mode 100644 index 000000000..46d33434a --- /dev/null +++ b/debian/patches/features/all/rt/0015-tracing-Add-timestamp_mode-trace-file.patch @@ -0,0 +1,138 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:41 -0600 +Subject: [PATCH 15/48] tracing: Add timestamp_mode trace file +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Add a new option flag indicating whether or not the ring buffer is in +'absolute timestamp' mode. + +Currently this is only set/unset by hist triggers that make use of a +common_timestamp. As such, there's no reason to make this writeable +for users - its purpose is only to allow users to determine +unequivocally whether or not the ring buffer is in that mode (although +absolute timestamps can coexist with the normal delta timestamps, when +the ring buffer is in absolute mode, timestamps written while absolute +mode is in effect take up more space in the buffer, and are not as +efficient). + +Link: http://lkml.kernel.org/r/e8aa7b1cde1cf15014e66545d06ac6ef2ebba456.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 0eba34f9bf5b66217355a6a66054b3194aca123d) +Signed-off-by: Sebastian Andrzej Siewior +--- + Documentation/trace/ftrace.txt | 24 ++++++++++++++++++++ + kernel/trace/trace.c | 47 +++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 71 insertions(+) + +--- a/Documentation/trace/ftrace.txt ++++ b/Documentation/trace/ftrace.txt +@@ -539,6 +539,30 @@ After mounting tracefs you will have acc + + See events.txt for more information. + ++ timestamp_mode: ++ ++ Certain tracers may change the timestamp mode used when ++ logging trace events into the event buffer. Events with ++ different modes can coexist within a buffer but the mode in ++ effect when an event is logged determines which timestamp mode ++ is used for that event. The default timestamp mode is ++ 'delta'. ++ ++ Usual timestamp modes for tracing: ++ ++ # cat timestamp_mode ++ [delta] absolute ++ ++ The timestamp mode with the square brackets around it is the ++ one in effect. ++ ++ delta: Default timestamp mode - timestamp is a delta against ++ a per-buffer timestamp. ++ ++ absolute: The timestamp is a full timestamp, not a delta ++ against some other value. As such it takes up more ++ space and is less efficient. ++ + hwlat_detector: + + Directory for the Hardware Latency Detector. +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -4515,6 +4515,9 @@ static const char readme_msg[] = + #ifdef CONFIG_X86_64 + " x86-tsc: TSC cycle counter\n" + #endif ++ "\n timestamp_mode\t-view the mode used to timestamp events\n" ++ " delta: Delta difference against a buffer-wide timestamp\n" ++ " absolute: Absolute (standalone) timestamp\n" + "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n" + "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n" + " tracing_cpumask\t- Limit which CPUs to trace\n" +@@ -6282,6 +6285,40 @@ static int tracing_clock_open(struct ino + return ret; + } + ++static int tracing_time_stamp_mode_show(struct seq_file *m, void *v) ++{ ++ struct trace_array *tr = m->private; ++ ++ mutex_lock(&trace_types_lock); ++ ++ if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer)) ++ seq_puts(m, "delta [absolute]\n"); ++ else ++ seq_puts(m, "[delta] absolute\n"); ++ ++ mutex_unlock(&trace_types_lock); ++ ++ return 0; ++} ++ ++static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file) ++{ ++ struct trace_array *tr = inode->i_private; ++ int ret; ++ ++ if (tracing_disabled) ++ return -ENODEV; ++ ++ if (trace_array_get(tr)) ++ return -ENODEV; ++ ++ ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private); ++ if (ret < 0) ++ trace_array_put(tr); ++ ++ return ret; ++} ++ + int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs) + { + int ret = 0; +@@ -6560,6 +6597,13 @@ static const struct file_operations trac + .write = tracing_clock_write, + }; + ++static const struct file_operations trace_time_stamp_mode_fops = { ++ .open = tracing_time_stamp_mode_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = tracing_single_release_tr, ++}; ++ + #ifdef CONFIG_TRACER_SNAPSHOT + static const struct file_operations snapshot_fops = { + .open = tracing_snapshot_open, +@@ -7882,6 +7926,9 @@ init_tracer_tracefs(struct trace_array * + trace_create_file("tracing_on", 0644, d_tracer, + tr, &rb_simple_fops); + ++ trace_create_file("timestamp_mode", 0444, d_tracer, tr, ++ &trace_time_stamp_mode_fops); ++ + create_trace_options_dir(tr); + + #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) diff --git a/debian/patches/features/all/rt/0016-fold-dentry_lock_for_move-into-its-sole-caller-and-c.patch b/debian/patches/features/all/rt/0016-fold-dentry_lock_for_move-into-its-sole-caller-and-c.patch new file mode 100644 index 000000000..5a385d7f3 --- /dev/null +++ b/debian/patches/features/all/rt/0016-fold-dentry_lock_for_move-into-its-sole-caller-and-c.patch @@ -0,0 +1,91 @@ +From: Al Viro +Date: Sun, 11 Mar 2018 15:15:46 -0400 +Subject: [PATCH 16/17] fold dentry_lock_for_move() into its sole caller and + clean it up +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 42177007aa277af3e37bf2ae3efdfe795c81d700 + +Signed-off-by: Al Viro +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/dcache.c | 49 +++++++++++++++++++++++-------------------------- + 1 file changed, 23 insertions(+), 26 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -2773,25 +2773,6 @@ static void copy_name(struct dentry *den + kfree_rcu(old_name, u.head); + } + +-static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) +-{ +- if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent) +- spin_lock(&target->d_parent->d_lock); +- else { +- if (d_ancestor(dentry->d_parent, target->d_parent)) { +- spin_lock(&dentry->d_parent->d_lock); +- spin_lock_nested(&target->d_parent->d_lock, +- DENTRY_D_LOCK_NESTED); +- } else { +- spin_lock(&target->d_parent->d_lock); +- spin_lock_nested(&dentry->d_parent->d_lock, +- DENTRY_D_LOCK_NESTED); +- } +- } +- spin_lock_nested(&dentry->d_lock, 2); +- spin_lock_nested(&target->d_lock, 3); +-} +- + /* + * __d_move - move a dentry + * @dentry: entry to move +@@ -2806,16 +2787,34 @@ static void dentry_lock_for_move(struct + static void __d_move(struct dentry *dentry, struct dentry *target, + bool exchange) + { +- struct dentry *old_parent; ++ struct dentry *old_parent, *p; + struct inode *dir = NULL; + unsigned n; +- if (!dentry->d_inode) +- printk(KERN_WARNING "VFS: moving negative dcache entry\n"); + +- BUG_ON(d_ancestor(dentry, target)); ++ WARN_ON(!dentry->d_inode); ++ if (WARN_ON(dentry == target)) ++ return; ++ + BUG_ON(d_ancestor(target, dentry)); ++ old_parent = dentry->d_parent; ++ p = d_ancestor(old_parent, target); ++ if (IS_ROOT(dentry)) { ++ BUG_ON(p); ++ spin_lock(&target->d_parent->d_lock); ++ } else if (!p) { ++ /* target is not a descendent of dentry->d_parent */ ++ spin_lock(&target->d_parent->d_lock); ++ spin_lock_nested(&old_parent->d_lock, DENTRY_D_LOCK_NESTED); ++ } else { ++ BUG_ON(p == dentry); ++ spin_lock(&old_parent->d_lock); ++ if (p != target) ++ spin_lock_nested(&target->d_parent->d_lock, ++ DENTRY_D_LOCK_NESTED); ++ } ++ spin_lock_nested(&dentry->d_lock, 2); ++ spin_lock_nested(&target->d_lock, 3); + +- dentry_lock_for_move(dentry, target); + if (unlikely(d_in_lookup(target))) { + dir = target->d_parent->d_inode; + n = start_dir_add(dir); +@@ -2825,8 +2824,6 @@ static void __d_move(struct dentry *dent + write_seqcount_begin(&dentry->d_seq); + write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); + +- old_parent = dentry->d_parent; +- + /* unhash both */ + if (!d_unhashed(dentry)) + ___d_drop(dentry); diff --git a/debian/patches/features/all/rt/0016-tracing-Give-event-triggers-access-to-ring_buffer_ev.patch b/debian/patches/features/all/rt/0016-tracing-Give-event-triggers-access-to-ring_buffer_ev.patch new file mode 100644 index 000000000..69e083d97 --- /dev/null +++ b/debian/patches/features/all/rt/0016-tracing-Give-event-triggers-access-to-ring_buffer_ev.patch @@ -0,0 +1,303 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:42 -0600 +Subject: [PATCH 16/48] tracing: Give event triggers access to + ring_buffer_event +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +The ring_buffer event can provide a timestamp that may be useful to +various triggers - pass it into the handlers for that purpose. + +Link: http://lkml.kernel.org/r/6de592683b59fa70ffa5d43d0109896623fc1367.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 373514437a6f75b5cfe890742b590f2c12f6c335) +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/trace_events.h | 14 ++++++---- + kernel/trace/trace.h | 9 +++--- + kernel/trace/trace_events_hist.c | 11 +++++--- + kernel/trace/trace_events_trigger.c | 47 ++++++++++++++++++++++-------------- + 4 files changed, 49 insertions(+), 32 deletions(-) + +--- a/include/linux/trace_events.h ++++ b/include/linux/trace_events.h +@@ -430,11 +430,13 @@ enum event_trigger_type { + + extern int filter_match_preds(struct event_filter *filter, void *rec); + +-extern enum event_trigger_type event_triggers_call(struct trace_event_file *file, +- void *rec); +-extern void event_triggers_post_call(struct trace_event_file *file, +- enum event_trigger_type tt, +- void *rec); ++extern enum event_trigger_type ++event_triggers_call(struct trace_event_file *file, void *rec, ++ struct ring_buffer_event *event); ++extern void ++event_triggers_post_call(struct trace_event_file *file, ++ enum event_trigger_type tt, ++ void *rec, struct ring_buffer_event *event); + + bool trace_event_ignore_this_pid(struct trace_event_file *trace_file); + +@@ -454,7 +456,7 @@ trace_trigger_soft_disabled(struct trace + + if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) { + if (eflags & EVENT_FILE_FL_TRIGGER_MODE) +- event_triggers_call(file, NULL); ++ event_triggers_call(file, NULL, NULL); + if (eflags & EVENT_FILE_FL_SOFT_DISABLED) + return true; + if (eflags & EVENT_FILE_FL_PID_FILTER) +--- a/kernel/trace/trace.h ++++ b/kernel/trace/trace.h +@@ -1294,7 +1294,7 @@ static inline bool + unsigned long eflags = file->flags; + + if (eflags & EVENT_FILE_FL_TRIGGER_COND) +- *tt = event_triggers_call(file, entry); ++ *tt = event_triggers_call(file, entry, event); + + if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || + (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && +@@ -1331,7 +1331,7 @@ event_trigger_unlock_commit(struct trace + trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc); + + if (tt) +- event_triggers_post_call(file, tt, entry); ++ event_triggers_post_call(file, tt, entry, event); + } + + /** +@@ -1364,7 +1364,7 @@ event_trigger_unlock_commit_regs(struct + irq_flags, pc, regs); + + if (tt) +- event_triggers_post_call(file, tt, entry); ++ event_triggers_post_call(file, tt, entry, event); + } + + #define FILTER_PRED_INVALID ((unsigned short)-1) +@@ -1589,7 +1589,8 @@ extern int register_trigger_hist_enable_ + */ + struct event_trigger_ops { + void (*func)(struct event_trigger_data *data, +- void *rec); ++ void *rec, ++ struct ring_buffer_event *rbe); + int (*init)(struct event_trigger_ops *ops, + struct event_trigger_data *data); + void (*free)(struct event_trigger_ops *ops, +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -909,7 +909,8 @@ static inline void add_to_key(char *comp + memcpy(compound_key + key_field->offset, key, size); + } + +-static void event_hist_trigger(struct event_trigger_data *data, void *rec) ++static void event_hist_trigger(struct event_trigger_data *data, void *rec, ++ struct ring_buffer_event *event) + { + struct hist_trigger_data *hist_data = data->private_data; + bool use_compound_key = (hist_data->n_keys > 1); +@@ -1658,7 +1659,8 @@ static struct event_command trigger_hist + } + + static void +-hist_enable_trigger(struct event_trigger_data *data, void *rec) ++hist_enable_trigger(struct event_trigger_data *data, void *rec, ++ struct ring_buffer_event *event) + { + struct enable_trigger_data *enable_data = data->private_data; + struct event_trigger_data *test; +@@ -1674,7 +1676,8 @@ hist_enable_trigger(struct event_trigger + } + + static void +-hist_enable_count_trigger(struct event_trigger_data *data, void *rec) ++hist_enable_count_trigger(struct event_trigger_data *data, void *rec, ++ struct ring_buffer_event *event) + { + if (!data->count) + return; +@@ -1682,7 +1685,7 @@ hist_enable_count_trigger(struct event_t + if (data->count != -1) + (data->count)--; + +- hist_enable_trigger(data, rec); ++ hist_enable_trigger(data, rec, event); + } + + static struct event_trigger_ops hist_enable_trigger_ops = { +--- a/kernel/trace/trace_events_trigger.c ++++ b/kernel/trace/trace_events_trigger.c +@@ -63,7 +63,8 @@ void trigger_data_free(struct event_trig + * any trigger that should be deferred, ETT_NONE if nothing to defer. + */ + enum event_trigger_type +-event_triggers_call(struct trace_event_file *file, void *rec) ++event_triggers_call(struct trace_event_file *file, void *rec, ++ struct ring_buffer_event *event) + { + struct event_trigger_data *data; + enum event_trigger_type tt = ETT_NONE; +@@ -76,7 +77,7 @@ event_triggers_call(struct trace_event_f + if (data->paused) + continue; + if (!rec) { +- data->ops->func(data, rec); ++ data->ops->func(data, rec, event); + continue; + } + filter = rcu_dereference_sched(data->filter); +@@ -86,7 +87,7 @@ event_triggers_call(struct trace_event_f + tt |= data->cmd_ops->trigger_type; + continue; + } +- data->ops->func(data, rec); ++ data->ops->func(data, rec, event); + } + return tt; + } +@@ -108,7 +109,7 @@ EXPORT_SYMBOL_GPL(event_triggers_call); + void + event_triggers_post_call(struct trace_event_file *file, + enum event_trigger_type tt, +- void *rec) ++ void *rec, struct ring_buffer_event *event) + { + struct event_trigger_data *data; + +@@ -116,7 +117,7 @@ event_triggers_post_call(struct trace_ev + if (data->paused) + continue; + if (data->cmd_ops->trigger_type & tt) +- data->ops->func(data, rec); ++ data->ops->func(data, rec, event); + } + } + EXPORT_SYMBOL_GPL(event_triggers_post_call); +@@ -909,7 +910,8 @@ void set_named_trigger_data(struct event + } + + static void +-traceon_trigger(struct event_trigger_data *data, void *rec) ++traceon_trigger(struct event_trigger_data *data, void *rec, ++ struct ring_buffer_event *event) + { + if (tracing_is_on()) + return; +@@ -918,7 +920,8 @@ traceon_trigger(struct event_trigger_dat + } + + static void +-traceon_count_trigger(struct event_trigger_data *data, void *rec) ++traceon_count_trigger(struct event_trigger_data *data, void *rec, ++ struct ring_buffer_event *event) + { + if (tracing_is_on()) + return; +@@ -933,7 +936,8 @@ traceon_count_trigger(struct event_trigg + } + + static void +-traceoff_trigger(struct event_trigger_data *data, void *rec) ++traceoff_trigger(struct event_trigger_data *data, void *rec, ++ struct ring_buffer_event *event) + { + if (!tracing_is_on()) + return; +@@ -942,7 +946,8 @@ traceoff_trigger(struct event_trigger_da + } + + static void +-traceoff_count_trigger(struct event_trigger_data *data, void *rec) ++traceoff_count_trigger(struct event_trigger_data *data, void *rec, ++ struct ring_buffer_event *event) + { + if (!tracing_is_on()) + return; +@@ -1039,13 +1044,15 @@ static struct event_command trigger_trac + + #ifdef CONFIG_TRACER_SNAPSHOT + static void +-snapshot_trigger(struct event_trigger_data *data, void *rec) ++snapshot_trigger(struct event_trigger_data *data, void *rec, ++ struct ring_buffer_event *event) + { + tracing_snapshot(); + } + + static void +-snapshot_count_trigger(struct event_trigger_data *data, void *rec) ++snapshot_count_trigger(struct event_trigger_data *data, void *rec, ++ struct ring_buffer_event *event) + { + if (!data->count) + return; +@@ -1053,7 +1060,7 @@ snapshot_count_trigger(struct event_trig + if (data->count != -1) + (data->count)--; + +- snapshot_trigger(data, rec); ++ snapshot_trigger(data, rec, event); + } + + static int +@@ -1141,13 +1148,15 @@ static __init int register_trigger_snaps + #endif + + static void +-stacktrace_trigger(struct event_trigger_data *data, void *rec) ++stacktrace_trigger(struct event_trigger_data *data, void *rec, ++ struct ring_buffer_event *event) + { + trace_dump_stack(STACK_SKIP); + } + + static void +-stacktrace_count_trigger(struct event_trigger_data *data, void *rec) ++stacktrace_count_trigger(struct event_trigger_data *data, void *rec, ++ struct ring_buffer_event *event) + { + if (!data->count) + return; +@@ -1155,7 +1164,7 @@ stacktrace_count_trigger(struct event_tr + if (data->count != -1) + (data->count)--; + +- stacktrace_trigger(data, rec); ++ stacktrace_trigger(data, rec, event); + } + + static int +@@ -1217,7 +1226,8 @@ static __init void unregister_trigger_tr + } + + static void +-event_enable_trigger(struct event_trigger_data *data, void *rec) ++event_enable_trigger(struct event_trigger_data *data, void *rec, ++ struct ring_buffer_event *event) + { + struct enable_trigger_data *enable_data = data->private_data; + +@@ -1228,7 +1238,8 @@ event_enable_trigger(struct event_trigge + } + + static void +-event_enable_count_trigger(struct event_trigger_data *data, void *rec) ++event_enable_count_trigger(struct event_trigger_data *data, void *rec, ++ struct ring_buffer_event *event) + { + struct enable_trigger_data *enable_data = data->private_data; + +@@ -1242,7 +1253,7 @@ event_enable_count_trigger(struct event_ + if (data->count != -1) + (data->count)--; + +- event_enable_trigger(data, rec); ++ event_enable_trigger(data, rec, event); + } + + int event_enable_trigger_print(struct seq_file *m, diff --git a/debian/patches/features/all/rt/0017-d_genocide-move-export-to-definition.patch b/debian/patches/features/all/rt/0017-d_genocide-move-export-to-definition.patch new file mode 100644 index 000000000..555f6f1b2 --- /dev/null +++ b/debian/patches/features/all/rt/0017-d_genocide-move-export-to-definition.patch @@ -0,0 +1,33 @@ +From: Al Viro +Date: Thu, 29 Mar 2018 15:08:21 -0400 +Subject: [PATCH 17/17] d_genocide: move export to definition +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit cbd4a5bcb25b5ed0c1c64bc969b893cad9b78acc + +Signed-off-by: Al Viro +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/dcache.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -3095,6 +3095,8 @@ void d_genocide(struct dentry *parent) + d_walk(parent, parent, d_genocide_kill, NULL); + } + ++EXPORT_SYMBOL(d_genocide); ++ + void d_tmpfile(struct dentry *dentry, struct inode *inode) + { + inode_dec_link_count(inode); +@@ -3174,8 +3176,6 @@ static void __init dcache_init(void) + struct kmem_cache *names_cachep __read_mostly; + EXPORT_SYMBOL(names_cachep); + +-EXPORT_SYMBOL(d_genocide); +- + void __init vfs_caches_init_early(void) + { + int i; diff --git a/debian/patches/features/all/rt/0017-tracing-Add-ring-buffer-event-param-to-hist-field-fu.patch b/debian/patches/features/all/rt/0017-tracing-Add-ring-buffer-event-param-to-hist-field-fu.patch new file mode 100644 index 000000000..a1ec9d415 --- /dev/null +++ b/debian/patches/features/all/rt/0017-tracing-Add-ring-buffer-event-param-to-hist-field-fu.patch @@ -0,0 +1,144 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:43 -0600 +Subject: [PATCH 17/48] tracing: Add ring buffer event param to hist field + functions +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Some events such as timestamps require access to a ring_buffer_event +struct; add a param so that hist field functions can access that. + +Link: http://lkml.kernel.org/r/2ff4af18e72b6002eb86b26b2a7f39cef7d1dfe4.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit df7253a730d0aaef760d45ea234dc087ba7cac88) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 39 ++++++++++++++++++++++++--------------- + 1 file changed, 24 insertions(+), 15 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -26,7 +26,8 @@ + + struct hist_field; + +-typedef u64 (*hist_field_fn_t) (struct hist_field *field, void *event); ++typedef u64 (*hist_field_fn_t) (struct hist_field *field, void *event, ++ struct ring_buffer_event *rbe); + + #define HIST_FIELD_OPERANDS_MAX 2 + +@@ -40,24 +41,28 @@ struct hist_field { + struct hist_field *operands[HIST_FIELD_OPERANDS_MAX]; + }; + +-static u64 hist_field_none(struct hist_field *field, void *event) ++static u64 hist_field_none(struct hist_field *field, void *event, ++ struct ring_buffer_event *rbe) + { + return 0; + } + +-static u64 hist_field_counter(struct hist_field *field, void *event) ++static u64 hist_field_counter(struct hist_field *field, void *event, ++ struct ring_buffer_event *rbe) + { + return 1; + } + +-static u64 hist_field_string(struct hist_field *hist_field, void *event) ++static u64 hist_field_string(struct hist_field *hist_field, void *event, ++ struct ring_buffer_event *rbe) + { + char *addr = (char *)(event + hist_field->field->offset); + + return (u64)(unsigned long)addr; + } + +-static u64 hist_field_dynstring(struct hist_field *hist_field, void *event) ++static u64 hist_field_dynstring(struct hist_field *hist_field, void *event, ++ struct ring_buffer_event *rbe) + { + u32 str_item = *(u32 *)(event + hist_field->field->offset); + int str_loc = str_item & 0xffff; +@@ -66,24 +71,28 @@ static u64 hist_field_dynstring(struct h + return (u64)(unsigned long)addr; + } + +-static u64 hist_field_pstring(struct hist_field *hist_field, void *event) ++static u64 hist_field_pstring(struct hist_field *hist_field, void *event, ++ struct ring_buffer_event *rbe) + { + char **addr = (char **)(event + hist_field->field->offset); + + return (u64)(unsigned long)*addr; + } + +-static u64 hist_field_log2(struct hist_field *hist_field, void *event) ++static u64 hist_field_log2(struct hist_field *hist_field, void *event, ++ struct ring_buffer_event *rbe) + { + struct hist_field *operand = hist_field->operands[0]; + +- u64 val = operand->fn(operand, event); ++ u64 val = operand->fn(operand, event, rbe); + + return (u64) ilog2(roundup_pow_of_two(val)); + } + + #define DEFINE_HIST_FIELD_FN(type) \ +-static u64 hist_field_##type(struct hist_field *hist_field, void *event)\ ++ static u64 hist_field_##type(struct hist_field *hist_field, \ ++ void *event, \ ++ struct ring_buffer_event *rbe) \ + { \ + type *addr = (type *)(event + hist_field->field->offset); \ + \ +@@ -871,8 +880,8 @@ create_hist_data(unsigned int map_bits, + } + + static void hist_trigger_elt_update(struct hist_trigger_data *hist_data, +- struct tracing_map_elt *elt, +- void *rec) ++ struct tracing_map_elt *elt, void *rec, ++ struct ring_buffer_event *rbe) + { + struct hist_field *hist_field; + unsigned int i; +@@ -880,7 +889,7 @@ static void hist_trigger_elt_update(stru + + for_each_hist_val_field(i, hist_data) { + hist_field = hist_data->fields[i]; +- hist_val = hist_field->fn(hist_field, rec); ++ hist_val = hist_field->fn(hist_field, rec, rbe); + tracing_map_update_sum(elt, i, hist_val); + } + } +@@ -910,7 +919,7 @@ static inline void add_to_key(char *comp + } + + static void event_hist_trigger(struct event_trigger_data *data, void *rec, +- struct ring_buffer_event *event) ++ struct ring_buffer_event *rbe) + { + struct hist_trigger_data *hist_data = data->private_data; + bool use_compound_key = (hist_data->n_keys > 1); +@@ -939,7 +948,7 @@ static void event_hist_trigger(struct ev + + key = entries; + } else { +- field_contents = key_field->fn(key_field, rec); ++ field_contents = key_field->fn(key_field, rec, rbe); + if (key_field->flags & HIST_FIELD_FL_STRING) { + key = (void *)(unsigned long)field_contents; + use_compound_key = true; +@@ -956,7 +965,7 @@ static void event_hist_trigger(struct ev + + elt = tracing_map_insert(hist_data->map, key); + if (elt) +- hist_trigger_elt_update(hist_data, elt, rec); ++ hist_trigger_elt_update(hist_data, elt, rec, rbe); + } + + static void hist_trigger_stacktrace_print(struct seq_file *m, diff --git a/debian/patches/features/all/rt/0018-tracing-Break-out-hist-trigger-assignment-parsing.patch b/debian/patches/features/all/rt/0018-tracing-Break-out-hist-trigger-assignment-parsing.patch new file mode 100644 index 000000000..fabdcf7b3 --- /dev/null +++ b/debian/patches/features/all/rt/0018-tracing-Break-out-hist-trigger-assignment-parsing.patch @@ -0,0 +1,113 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:44 -0600 +Subject: [PATCH 18/48] tracing: Break out hist trigger assignment parsing +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +This will make it easier to add variables, and makes the parsing code +cleaner regardless. + +Link: http://lkml.kernel.org/r/e574b3291bbe15e35a4dfc87e5395aa715701c98.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Rajvi Jingar +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 3c1e23def1291b21a2057f883ccc0456418dc5ad) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 72 +++++++++++++++++++++++++++------------ + 1 file changed, 51 insertions(+), 21 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -251,6 +251,51 @@ static void destroy_hist_trigger_attrs(s + kfree(attrs); + } + ++static int parse_assignment(char *str, struct hist_trigger_attrs *attrs) ++{ ++ int ret = 0; ++ ++ if ((strncmp(str, "key=", strlen("key=")) == 0) || ++ (strncmp(str, "keys=", strlen("keys=")) == 0)) { ++ attrs->keys_str = kstrdup(str, GFP_KERNEL); ++ if (!attrs->keys_str) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ } else if ((strncmp(str, "val=", strlen("val=")) == 0) || ++ (strncmp(str, "vals=", strlen("vals=")) == 0) || ++ (strncmp(str, "values=", strlen("values=")) == 0)) { ++ attrs->vals_str = kstrdup(str, GFP_KERNEL); ++ if (!attrs->vals_str) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ } else if (strncmp(str, "sort=", strlen("sort=")) == 0) { ++ attrs->sort_key_str = kstrdup(str, GFP_KERNEL); ++ if (!attrs->sort_key_str) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ } else if (strncmp(str, "name=", strlen("name=")) == 0) { ++ attrs->name = kstrdup(str, GFP_KERNEL); ++ if (!attrs->name) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ } else if (strncmp(str, "size=", strlen("size=")) == 0) { ++ int map_bits = parse_map_size(str); ++ ++ if (map_bits < 0) { ++ ret = map_bits; ++ goto out; ++ } ++ attrs->map_bits = map_bits; ++ } else ++ ret = -EINVAL; ++ out: ++ return ret; ++} ++ + static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str) + { + struct hist_trigger_attrs *attrs; +@@ -263,33 +308,18 @@ static struct hist_trigger_attrs *parse_ + while (trigger_str) { + char *str = strsep(&trigger_str, ":"); + +- if ((strncmp(str, "key=", strlen("key=")) == 0) || +- (strncmp(str, "keys=", strlen("keys=")) == 0)) +- attrs->keys_str = kstrdup(str, GFP_KERNEL); +- else if ((strncmp(str, "val=", strlen("val=")) == 0) || +- (strncmp(str, "vals=", strlen("vals=")) == 0) || +- (strncmp(str, "values=", strlen("values=")) == 0)) +- attrs->vals_str = kstrdup(str, GFP_KERNEL); +- else if (strncmp(str, "sort=", strlen("sort=")) == 0) +- attrs->sort_key_str = kstrdup(str, GFP_KERNEL); +- else if (strncmp(str, "name=", strlen("name=")) == 0) +- attrs->name = kstrdup(str, GFP_KERNEL); +- else if (strcmp(str, "pause") == 0) ++ if (strchr(str, '=')) { ++ ret = parse_assignment(str, attrs); ++ if (ret) ++ goto free; ++ } else if (strcmp(str, "pause") == 0) + attrs->pause = true; + else if ((strcmp(str, "cont") == 0) || + (strcmp(str, "continue") == 0)) + attrs->cont = true; + else if (strcmp(str, "clear") == 0) + attrs->clear = true; +- else if (strncmp(str, "size=", strlen("size=")) == 0) { +- int map_bits = parse_map_size(str); +- +- if (map_bits < 0) { +- ret = map_bits; +- goto free; +- } +- attrs->map_bits = map_bits; +- } else { ++ else { + ret = -EINVAL; + goto free; + } diff --git a/debian/patches/features/all/rt/0019-tracing-Add-hist-trigger-timestamp-support.patch b/debian/patches/features/all/rt/0019-tracing-Add-hist-trigger-timestamp-support.patch new file mode 100644 index 000000000..c505f0f53 --- /dev/null +++ b/debian/patches/features/all/rt/0019-tracing-Add-hist-trigger-timestamp-support.patch @@ -0,0 +1,247 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:45 -0600 +Subject: [PATCH 19/48] tracing: Add hist trigger timestamp support +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Add support for a timestamp event field. This is actually a 'pseudo-' +event field in that it behaves like it's part of the event record, but +is really part of the corresponding ring buffer event. + +To make use of the timestamp field, users can specify +"common_timestamp" as a field name for any histogram. Note that this +doesn't make much sense on its own either as either a key or value, +but needs to be supported even so, since follow-on patches will add +support for making use of this field in time deltas. The +common_timestamp 'field' is not a bona fide event field - so you won't +find it in the event description - but rather it's a synthetic field +that can be used like a real field. + +Note that the use of this field requires the ring buffer be put into +'absolute timestamp' mode, which saves the complete timestamp for each +event rather than an offset. This mode will be enabled if and only if +a histogram makes use of the "common_timestamp" field. + +Link: http://lkml.kernel.org/r/97afbd646ed146e26271f3458b4b33e16d7817c2.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Baohong Liu +[kasan use-after-free fix] +Signed-off-by: Vedang Patel +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 5d9d58b00ff82078deac8557c91359cd13c8959d) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 94 +++++++++++++++++++++++++++++---------- + 1 file changed, 71 insertions(+), 23 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -89,6 +89,12 @@ static u64 hist_field_log2(struct hist_f + return (u64) ilog2(roundup_pow_of_two(val)); + } + ++static u64 hist_field_timestamp(struct hist_field *hist_field, void *event, ++ struct ring_buffer_event *rbe) ++{ ++ return ring_buffer_event_time_stamp(rbe); ++} ++ + #define DEFINE_HIST_FIELD_FN(type) \ + static u64 hist_field_##type(struct hist_field *hist_field, \ + void *event, \ +@@ -135,6 +141,7 @@ enum hist_field_flags { + HIST_FIELD_FL_SYSCALL = 1 << 7, + HIST_FIELD_FL_STACKTRACE = 1 << 8, + HIST_FIELD_FL_LOG2 = 1 << 9, ++ HIST_FIELD_FL_TIMESTAMP = 1 << 10, + }; + + struct hist_trigger_attrs { +@@ -159,6 +166,7 @@ struct hist_trigger_data { + struct trace_event_file *event_file; + struct hist_trigger_attrs *attrs; + struct tracing_map *map; ++ bool enable_timestamps; + }; + + static const char *hist_field_name(struct hist_field *field, +@@ -173,6 +181,8 @@ static const char *hist_field_name(struc + field_name = field->field->name; + else if (field->flags & HIST_FIELD_FL_LOG2) + field_name = hist_field_name(field->operands[0], ++level); ++ else if (field->flags & HIST_FIELD_FL_TIMESTAMP) ++ field_name = "common_timestamp"; + + if (field_name == NULL) + field_name = ""; +@@ -440,6 +450,12 @@ static struct hist_field *create_hist_fi + goto out; + } + ++ if (flags & HIST_FIELD_FL_TIMESTAMP) { ++ hist_field->fn = hist_field_timestamp; ++ hist_field->size = sizeof(u64); ++ goto out; ++ } ++ + if (WARN_ON_ONCE(!field)) + goto out; + +@@ -517,10 +533,15 @@ static int create_val_field(struct hist_ + } + } + +- field = trace_find_event_field(file->event_call, field_name); +- if (!field || !field->size) { +- ret = -EINVAL; +- goto out; ++ if (strcmp(field_name, "common_timestamp") == 0) { ++ flags |= HIST_FIELD_FL_TIMESTAMP; ++ hist_data->enable_timestamps = true; ++ } else { ++ field = trace_find_event_field(file->event_call, field_name); ++ if (!field || !field->size) { ++ ret = -EINVAL; ++ goto out; ++ } + } + + hist_data->fields[val_idx] = create_hist_field(field, flags); +@@ -615,16 +636,22 @@ static int create_key_field(struct hist_ + } + } + +- field = trace_find_event_field(file->event_call, field_name); +- if (!field || !field->size) { +- ret = -EINVAL; +- goto out; +- } ++ if (strcmp(field_name, "common_timestamp") == 0) { ++ flags |= HIST_FIELD_FL_TIMESTAMP; ++ hist_data->enable_timestamps = true; ++ key_size = sizeof(u64); ++ } else { ++ field = trace_find_event_field(file->event_call, field_name); ++ if (!field || !field->size) { ++ ret = -EINVAL; ++ goto out; ++ } + +- if (is_string_field(field)) +- key_size = MAX_FILTER_STR_VAL; +- else +- key_size = field->size; ++ if (is_string_field(field)) ++ key_size = MAX_FILTER_STR_VAL; ++ else ++ key_size = field->size; ++ } + } + + hist_data->fields[key_idx] = create_hist_field(field, flags); +@@ -820,6 +847,9 @@ static int create_tracing_map_fields(str + + if (hist_field->flags & HIST_FIELD_FL_STACKTRACE) + cmp_fn = tracing_map_cmp_none; ++ else if (!field) ++ cmp_fn = tracing_map_cmp_num(hist_field->size, ++ hist_field->is_signed); + else if (is_string_field(field)) + cmp_fn = tracing_map_cmp_string; + else +@@ -1215,7 +1245,11 @@ static void hist_field_print(struct seq_ + { + const char *field_name = hist_field_name(hist_field, 0); + +- seq_printf(m, "%s", field_name); ++ if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP) ++ seq_puts(m, "common_timestamp"); ++ else if (field_name) ++ seq_printf(m, "%s", field_name); ++ + if (hist_field->flags) { + const char *flags_str = get_hist_field_flags(hist_field); + +@@ -1266,27 +1300,25 @@ static int event_hist_trigger_print(stru + + for (i = 0; i < hist_data->n_sort_keys; i++) { + struct tracing_map_sort_key *sort_key; ++ unsigned int idx; + + sort_key = &hist_data->sort_keys[i]; ++ idx = sort_key->field_idx; ++ ++ if (WARN_ON(idx >= TRACING_MAP_FIELDS_MAX)) ++ return -EINVAL; + + if (i > 0) + seq_puts(m, ","); + +- if (sort_key->field_idx == HITCOUNT_IDX) ++ if (idx == HITCOUNT_IDX) + seq_puts(m, "hitcount"); +- else { +- unsigned int idx = sort_key->field_idx; +- +- if (WARN_ON(idx >= TRACING_MAP_FIELDS_MAX)) +- return -EINVAL; +- ++ else + hist_field_print(m, hist_data->fields[idx]); +- } + + if (sort_key->descending) + seq_puts(m, ".descending"); + } +- + seq_printf(m, ":size=%u", (1 << hist_data->map->map_bits)); + + if (data->filter_str) +@@ -1454,6 +1486,10 @@ static bool hist_trigger_match(struct ev + return false; + if (key_field->offset != key_field_test->offset) + return false; ++ if (key_field->size != key_field_test->size) ++ return false; ++ if (key_field->is_signed != key_field_test->is_signed) ++ return false; + } + + for (i = 0; i < hist_data->n_sort_keys; i++) { +@@ -1536,6 +1572,9 @@ static int hist_register_trigger(char *g + + update_cond_flag(file); + ++ if (hist_data->enable_timestamps) ++ tracing_set_time_stamp_abs(file->tr, true); ++ + if (trace_event_trigger_enable_disable(file, 1) < 0) { + list_del_rcu(&data->list); + update_cond_flag(file); +@@ -1570,17 +1609,26 @@ static void hist_unregister_trigger(char + + if (unregistered && test->ops->free) + test->ops->free(test->ops, test); ++ ++ if (hist_data->enable_timestamps) { ++ if (unregistered) ++ tracing_set_time_stamp_abs(file->tr, false); ++ } + } + + static void hist_unreg_all(struct trace_event_file *file) + { + struct event_trigger_data *test, *n; ++ struct hist_trigger_data *hist_data; + + list_for_each_entry_safe(test, n, &file->triggers, list) { + if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { ++ hist_data = test->private_data; + list_del_rcu(&test->list); + trace_event_trigger_enable_disable(file, 0); + update_cond_flag(file); ++ if (hist_data->enable_timestamps) ++ tracing_set_time_stamp_abs(file->tr, false); + if (test->ops->free) + test->ops->free(test->ops, test); + } diff --git a/debian/patches/features/all/rt/0020-tracing-Add-per-element-variable-support-to-tracing_.patch b/debian/patches/features/all/rt/0020-tracing-Add-per-element-variable-support-to-tracing_.patch new file mode 100644 index 000000000..b1a41a5f0 --- /dev/null +++ b/debian/patches/features/all/rt/0020-tracing-Add-per-element-variable-support-to-tracing_.patch @@ -0,0 +1,225 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:46 -0600 +Subject: [PATCH 20/48] tracing: Add per-element variable support to + tracing_map +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +In order to allow information to be passed between trace events, add +support for per-element variables to tracing_map. This provides a +means for histograms to associate a value or values with an entry when +it's saved or updated, and retrieved by a subsequent event occurrences. + +Variables can be set using tracing_map_set_var() and read using +tracing_map_read_var(). tracing_map_var_set() returns true or false +depending on whether or not the variable has been set or not, which is +important for event-matching applications. + +tracing_map_read_var_once() reads the variable and resets it to the +'unset' state, implementing read-once variables, which are also +important for event-matching uses. + +Link: http://lkml.kernel.org/r/7fa001108252556f0c6dd9d63145eabfe3370d1a.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 42a38132f9e154e1fa2dd2182dff17f9c0e7ee7e) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/tracing_map.c | 108 +++++++++++++++++++++++++++++++++++++++++++++ + kernel/trace/tracing_map.h | 11 ++++ + 2 files changed, 119 insertions(+) + +--- a/kernel/trace/tracing_map.c ++++ b/kernel/trace/tracing_map.c +@@ -66,6 +66,73 @@ u64 tracing_map_read_sum(struct tracing_ + return (u64)atomic64_read(&elt->fields[i].sum); + } + ++/** ++ * tracing_map_set_var - Assign a tracing_map_elt's variable field ++ * @elt: The tracing_map_elt ++ * @i: The index of the given variable associated with the tracing_map_elt ++ * @n: The value to assign ++ * ++ * Assign n to variable i associated with the specified tracing_map_elt ++ * instance. The index i is the index returned by the call to ++ * tracing_map_add_var() when the tracing map was set up. ++ */ ++void tracing_map_set_var(struct tracing_map_elt *elt, unsigned int i, u64 n) ++{ ++ atomic64_set(&elt->vars[i], n); ++ elt->var_set[i] = true; ++} ++ ++/** ++ * tracing_map_var_set - Return whether or not a variable has been set ++ * @elt: The tracing_map_elt ++ * @i: The index of the given variable associated with the tracing_map_elt ++ * ++ * Return true if the variable has been set, false otherwise. The ++ * index i is the index returned by the call to tracing_map_add_var() ++ * when the tracing map was set up. ++ */ ++bool tracing_map_var_set(struct tracing_map_elt *elt, unsigned int i) ++{ ++ return elt->var_set[i]; ++} ++ ++/** ++ * tracing_map_read_var - Return the value of a tracing_map_elt's variable field ++ * @elt: The tracing_map_elt ++ * @i: The index of the given variable associated with the tracing_map_elt ++ * ++ * Retrieve the value of the variable i associated with the specified ++ * tracing_map_elt instance. The index i is the index returned by the ++ * call to tracing_map_add_var() when the tracing map was set ++ * up. ++ * ++ * Return: The variable value associated with field i for elt. ++ */ ++u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i) ++{ ++ return (u64)atomic64_read(&elt->vars[i]); ++} ++ ++/** ++ * tracing_map_read_var_once - Return and reset a tracing_map_elt's variable field ++ * @elt: The tracing_map_elt ++ * @i: The index of the given variable associated with the tracing_map_elt ++ * ++ * Retrieve the value of the variable i associated with the specified ++ * tracing_map_elt instance, and reset the variable to the 'not set' ++ * state. The index i is the index returned by the call to ++ * tracing_map_add_var() when the tracing map was set up. The reset ++ * essentially makes the variable a read-once variable if it's only ++ * accessed using this function. ++ * ++ * Return: The variable value associated with field i for elt. ++ */ ++u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i) ++{ ++ elt->var_set[i] = false; ++ return (u64)atomic64_read(&elt->vars[i]); ++} ++ + int tracing_map_cmp_string(void *val_a, void *val_b) + { + char *a = val_a; +@@ -171,6 +238,28 @@ int tracing_map_add_sum_field(struct tra + } + + /** ++ * tracing_map_add_var - Add a field describing a tracing_map var ++ * @map: The tracing_map ++ * ++ * Add a var to the map and return the index identifying it in the map ++ * and associated tracing_map_elts. This is the index used for ++ * instance to update a var for a particular tracing_map_elt using ++ * tracing_map_update_var() or reading it via tracing_map_read_var(). ++ * ++ * Return: The index identifying the var in the map and associated ++ * tracing_map_elts, or -EINVAL on error. ++ */ ++int tracing_map_add_var(struct tracing_map *map) ++{ ++ int ret = -EINVAL; ++ ++ if (map->n_vars < TRACING_MAP_VARS_MAX) ++ ret = map->n_vars++; ++ ++ return ret; ++} ++ ++/** + * tracing_map_add_key_field - Add a field describing a tracing_map key + * @map: The tracing_map + * @offset: The offset within the key +@@ -280,6 +369,11 @@ static void tracing_map_elt_clear(struct + if (elt->fields[i].cmp_fn == tracing_map_cmp_atomic64) + atomic64_set(&elt->fields[i].sum, 0); + ++ for (i = 0; i < elt->map->n_vars; i++) { ++ atomic64_set(&elt->vars[i], 0); ++ elt->var_set[i] = false; ++ } ++ + if (elt->map->ops && elt->map->ops->elt_clear) + elt->map->ops->elt_clear(elt); + } +@@ -306,6 +400,8 @@ static void tracing_map_elt_free(struct + if (elt->map->ops && elt->map->ops->elt_free) + elt->map->ops->elt_free(elt); + kfree(elt->fields); ++ kfree(elt->vars); ++ kfree(elt->var_set); + kfree(elt->key); + kfree(elt); + } +@@ -332,6 +428,18 @@ static struct tracing_map_elt *tracing_m + err = -ENOMEM; + goto free; + } ++ ++ elt->vars = kcalloc(map->n_vars, sizeof(*elt->vars), GFP_KERNEL); ++ if (!elt->vars) { ++ err = -ENOMEM; ++ goto free; ++ } ++ ++ elt->var_set = kcalloc(map->n_vars, sizeof(*elt->var_set), GFP_KERNEL); ++ if (!elt->var_set) { ++ err = -ENOMEM; ++ goto free; ++ } + + tracing_map_elt_init_fields(elt); + +--- a/kernel/trace/tracing_map.h ++++ b/kernel/trace/tracing_map.h +@@ -10,6 +10,7 @@ + #define TRACING_MAP_VALS_MAX 3 + #define TRACING_MAP_FIELDS_MAX (TRACING_MAP_KEYS_MAX + \ + TRACING_MAP_VALS_MAX) ++#define TRACING_MAP_VARS_MAX 16 + #define TRACING_MAP_SORT_KEYS_MAX 2 + + typedef int (*tracing_map_cmp_fn_t) (void *val_a, void *val_b); +@@ -137,6 +138,8 @@ struct tracing_map_field { + struct tracing_map_elt { + struct tracing_map *map; + struct tracing_map_field *fields; ++ atomic64_t *vars; ++ bool *var_set; + void *key; + void *private_data; + }; +@@ -192,6 +195,7 @@ struct tracing_map { + int key_idx[TRACING_MAP_KEYS_MAX]; + unsigned int n_keys; + struct tracing_map_sort_key sort_key; ++ unsigned int n_vars; + atomic64_t hits; + atomic64_t drops; + }; +@@ -241,6 +245,7 @@ tracing_map_create(unsigned int map_bits + extern int tracing_map_init(struct tracing_map *map); + + extern int tracing_map_add_sum_field(struct tracing_map *map); ++extern int tracing_map_add_var(struct tracing_map *map); + extern int tracing_map_add_key_field(struct tracing_map *map, + unsigned int offset, + tracing_map_cmp_fn_t cmp_fn); +@@ -260,7 +265,13 @@ extern int tracing_map_cmp_none(void *va + + extern void tracing_map_update_sum(struct tracing_map_elt *elt, + unsigned int i, u64 n); ++extern void tracing_map_set_var(struct tracing_map_elt *elt, ++ unsigned int i, u64 n); ++extern bool tracing_map_var_set(struct tracing_map_elt *elt, unsigned int i); + extern u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i); ++extern u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i); ++extern u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i); ++ + extern void tracing_map_set_field_descr(struct tracing_map *map, + unsigned int i, + unsigned int key_offset, diff --git a/debian/patches/features/all/rt/0021-tracing-Add-hist_data-member-to-hist_field.patch b/debian/patches/features/all/rt/0021-tracing-Add-hist_data-member-to-hist_field.patch new file mode 100644 index 000000000..1c4d098d0 --- /dev/null +++ b/debian/patches/features/all/rt/0021-tracing-Add-hist_data-member-to-hist_field.patch @@ -0,0 +1,83 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:47 -0600 +Subject: [PATCH 21/48] tracing: Add hist_data member to hist_field +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Allow hist_data access via hist_field. Some users of hist_fields +require or will require more access to the associated hist_data. + +Link: http://lkml.kernel.org/r/d04cd0768f5228ebb4ac0ba4a847bc4d14d4826f.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 14ab3edac407939009700c04215935576250e969) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -39,6 +39,7 @@ struct hist_field { + unsigned int offset; + unsigned int is_signed; + struct hist_field *operands[HIST_FIELD_OPERANDS_MAX]; ++ struct hist_trigger_data *hist_data; + }; + + static u64 hist_field_none(struct hist_field *field, void *event, +@@ -420,7 +421,8 @@ static void destroy_hist_field(struct hi + kfree(hist_field); + } + +-static struct hist_field *create_hist_field(struct ftrace_event_field *field, ++static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, ++ struct ftrace_event_field *field, + unsigned long flags) + { + struct hist_field *hist_field; +@@ -432,6 +434,8 @@ static struct hist_field *create_hist_fi + if (!hist_field) + return NULL; + ++ hist_field->hist_data = hist_data; ++ + if (flags & HIST_FIELD_FL_HITCOUNT) { + hist_field->fn = hist_field_counter; + goto out; +@@ -445,7 +449,7 @@ static struct hist_field *create_hist_fi + if (flags & HIST_FIELD_FL_LOG2) { + unsigned long fl = flags & ~HIST_FIELD_FL_LOG2; + hist_field->fn = hist_field_log2; +- hist_field->operands[0] = create_hist_field(field, fl); ++ hist_field->operands[0] = create_hist_field(hist_data, field, fl); + hist_field->size = hist_field->operands[0]->size; + goto out; + } +@@ -498,7 +502,7 @@ static void destroy_hist_fields(struct h + static int create_hitcount_val(struct hist_trigger_data *hist_data) + { + hist_data->fields[HITCOUNT_IDX] = +- create_hist_field(NULL, HIST_FIELD_FL_HITCOUNT); ++ create_hist_field(hist_data, NULL, HIST_FIELD_FL_HITCOUNT); + if (!hist_data->fields[HITCOUNT_IDX]) + return -ENOMEM; + +@@ -544,7 +548,7 @@ static int create_val_field(struct hist_ + } + } + +- hist_data->fields[val_idx] = create_hist_field(field, flags); ++ hist_data->fields[val_idx] = create_hist_field(hist_data, field, flags); + if (!hist_data->fields[val_idx]) { + ret = -ENOMEM; + goto out; +@@ -654,7 +658,7 @@ static int create_key_field(struct hist_ + } + } + +- hist_data->fields[key_idx] = create_hist_field(field, flags); ++ hist_data->fields[key_idx] = create_hist_field(hist_data, field, flags); + if (!hist_data->fields[key_idx]) { + ret = -ENOMEM; + goto out; diff --git a/debian/patches/features/all/rt/0022-tracing-Add-usecs-modifier-for-hist-trigger-timestam.patch b/debian/patches/features/all/rt/0022-tracing-Add-usecs-modifier-for-hist-trigger-timestam.patch new file mode 100644 index 000000000..4f4550688 --- /dev/null +++ b/debian/patches/features/all/rt/0022-tracing-Add-usecs-modifier-for-hist-trigger-timestam.patch @@ -0,0 +1,158 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:48 -0600 +Subject: [PATCH 22/48] tracing: Add usecs modifier for hist trigger timestamps +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Appending .usecs onto a common_timestamp field will cause the +timestamp value to be in microseconds instead of the default +nanoseconds. A typical latency histogram using usecs would look like +this: + + # echo 'hist:keys=pid,prio:ts0=common_timestamp.usecs ... + # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0 ... + +This also adds an external trace_clock_in_ns() to trace.c for the +timestamp conversion. + +Link: http://lkml.kernel.org/r/4e813705a170b3e13e97dc3135047362fb1a39f3.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 4fa4fdb0fe5d0e87e05b0c5b443cec2269ec0609) +Signed-off-by: Sebastian Andrzej Siewior +--- + Documentation/trace/histogram.txt | 1 + + kernel/trace/trace.c | 13 +++++++++++-- + kernel/trace/trace.h | 2 ++ + kernel/trace/trace_events_hist.c | 28 ++++++++++++++++++++++------ + 4 files changed, 36 insertions(+), 8 deletions(-) + +--- a/Documentation/trace/histogram.txt ++++ b/Documentation/trace/histogram.txt +@@ -74,6 +74,7 @@ + .syscall display a syscall id as a system call name + .execname display a common_pid as a program name + .log2 display log2 value rather than raw number ++ .usecs display a common_timestamp in microseconds + + Note that in general the semantics of a given field aren't + interpreted when applying a modifier to it, but there are some +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -1168,6 +1168,14 @@ static struct { + ARCH_TRACE_CLOCKS + }; + ++bool trace_clock_in_ns(struct trace_array *tr) ++{ ++ if (trace_clocks[tr->clock_id].in_ns) ++ return true; ++ ++ return false; ++} ++ + /* + * trace_parser_get_init - gets the buffer for trace parser + */ +@@ -4694,8 +4702,9 @@ static const char readme_msg[] = + "\t .sym display an address as a symbol\n" + "\t .sym-offset display an address as a symbol and offset\n" + "\t .execname display a common_pid as a program name\n" +- "\t .syscall display a syscall id as a syscall name\n\n" +- "\t .log2 display log2 value rather than raw number\n\n" ++ "\t .syscall display a syscall id as a syscall name\n" ++ "\t .log2 display log2 value rather than raw number\n" ++ "\t .usecs display a common_timestamp in microseconds\n\n" + "\t The 'pause' parameter can be used to pause an existing hist\n" + "\t trigger or to start a hist trigger but not log any events\n" + "\t until told to do so. 'continue' can be used to start or\n" +--- a/kernel/trace/trace.h ++++ b/kernel/trace/trace.h +@@ -289,6 +289,8 @@ extern void trace_array_put(struct trace + + extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs); + ++extern bool trace_clock_in_ns(struct trace_array *tr); ++ + /* + * The global tracer (top) should be the first trace array added, + * but we check the flag anyway. +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -90,12 +90,6 @@ static u64 hist_field_log2(struct hist_f + return (u64) ilog2(roundup_pow_of_two(val)); + } + +-static u64 hist_field_timestamp(struct hist_field *hist_field, void *event, +- struct ring_buffer_event *rbe) +-{ +- return ring_buffer_event_time_stamp(rbe); +-} +- + #define DEFINE_HIST_FIELD_FN(type) \ + static u64 hist_field_##type(struct hist_field *hist_field, \ + void *event, \ +@@ -143,6 +137,7 @@ enum hist_field_flags { + HIST_FIELD_FL_STACKTRACE = 1 << 8, + HIST_FIELD_FL_LOG2 = 1 << 9, + HIST_FIELD_FL_TIMESTAMP = 1 << 10, ++ HIST_FIELD_FL_TIMESTAMP_USECS = 1 << 11, + }; + + struct hist_trigger_attrs { +@@ -153,6 +148,7 @@ struct hist_trigger_attrs { + bool pause; + bool cont; + bool clear; ++ bool ts_in_usecs; + unsigned int map_bits; + }; + +@@ -170,6 +166,20 @@ struct hist_trigger_data { + bool enable_timestamps; + }; + ++static u64 hist_field_timestamp(struct hist_field *hist_field, void *event, ++ struct ring_buffer_event *rbe) ++{ ++ struct hist_trigger_data *hist_data = hist_field->hist_data; ++ struct trace_array *tr = hist_data->event_file->tr; ++ ++ u64 ts = ring_buffer_event_time_stamp(rbe); ++ ++ if (hist_data->attrs->ts_in_usecs && trace_clock_in_ns(tr)) ++ ts = ns2usecs(ts); ++ ++ return ts; ++} ++ + static const char *hist_field_name(struct hist_field *field, + unsigned int level) + { +@@ -634,6 +644,8 @@ static int create_key_field(struct hist_ + flags |= HIST_FIELD_FL_SYSCALL; + else if (strcmp(field_str, "log2") == 0) + flags |= HIST_FIELD_FL_LOG2; ++ else if (strcmp(field_str, "usecs") == 0) ++ flags |= HIST_FIELD_FL_TIMESTAMP_USECS; + else { + ret = -EINVAL; + goto out; +@@ -643,6 +655,8 @@ static int create_key_field(struct hist_ + if (strcmp(field_name, "common_timestamp") == 0) { + flags |= HIST_FIELD_FL_TIMESTAMP; + hist_data->enable_timestamps = true; ++ if (flags & HIST_FIELD_FL_TIMESTAMP_USECS) ++ hist_data->attrs->ts_in_usecs = true; + key_size = sizeof(u64); + } else { + field = trace_find_event_field(file->event_call, field_name); +@@ -1241,6 +1255,8 @@ static const char *get_hist_field_flags( + flags_str = "syscall"; + else if (hist_field->flags & HIST_FIELD_FL_LOG2) + flags_str = "log2"; ++ else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS) ++ flags_str = "usecs"; + + return flags_str; + } diff --git a/debian/patches/features/all/rt/0023-tracing-Add-variable-support-to-hist-triggers.patch b/debian/patches/features/all/rt/0023-tracing-Add-variable-support-to-hist-triggers.patch new file mode 100644 index 000000000..78ceff137 --- /dev/null +++ b/debian/patches/features/all/rt/0023-tracing-Add-variable-support-to-hist-triggers.patch @@ -0,0 +1,783 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:49 -0600 +Subject: [PATCH 23/48] tracing: Add variable support to hist triggers +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Add support for saving the value of a current event's event field by +assigning it to a variable that can be read by a subsequent event. + +The basic syntax for saving a variable is to simply prefix a unique +variable name not corresponding to any keyword along with an '=' sign +to any event field. + +Both keys and values can be saved and retrieved in this way: + + # echo 'hist:keys=next_pid:vals=$ts0:ts0=common_timestamp ... + # echo 'hist:timer_pid=common_pid:key=$timer_pid ...' + +If a variable isn't a key variable or prefixed with 'vals=', the +associated event field will be saved in a variable but won't be summed +as a value: + + # echo 'hist:keys=next_pid:ts1=common_timestamp:... + +Multiple variables can be assigned at the same time: + + # echo 'hist:keys=pid:vals=$ts0,$b,field2:ts0=common_timestamp,b=field1 ... + +Multiple (or single) variables can also be assigned at the same time +using separate assignments: + + # echo 'hist:keys=pid:vals=$ts0:ts0=common_timestamp:b=field1:c=field2 ... + +Variables set as above can be used by being referenced from another +event, as described in a subsequent patch. + +Link: http://lkml.kernel.org/r/fc93c4944d9719dbcb1d0067be627d44e98e2adc.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Baohong Liu +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit b073711690e3af61965e53f197a56638b3c65a81) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 370 ++++++++++++++++++++++++++++++++++----- + 1 file changed, 331 insertions(+), 39 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -30,6 +30,13 @@ typedef u64 (*hist_field_fn_t) (struct h + struct ring_buffer_event *rbe); + + #define HIST_FIELD_OPERANDS_MAX 2 ++#define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX) ++ ++struct hist_var { ++ char *name; ++ struct hist_trigger_data *hist_data; ++ unsigned int idx; ++}; + + struct hist_field { + struct ftrace_event_field *field; +@@ -40,6 +47,7 @@ struct hist_field { + unsigned int is_signed; + struct hist_field *operands[HIST_FIELD_OPERANDS_MAX]; + struct hist_trigger_data *hist_data; ++ struct hist_var var; + }; + + static u64 hist_field_none(struct hist_field *field, void *event, +@@ -138,6 +146,13 @@ enum hist_field_flags { + HIST_FIELD_FL_LOG2 = 1 << 9, + HIST_FIELD_FL_TIMESTAMP = 1 << 10, + HIST_FIELD_FL_TIMESTAMP_USECS = 1 << 11, ++ HIST_FIELD_FL_VAR = 1 << 12, ++}; ++ ++struct var_defs { ++ unsigned int n_vars; ++ char *name[TRACING_MAP_VARS_MAX]; ++ char *expr[TRACING_MAP_VARS_MAX]; + }; + + struct hist_trigger_attrs { +@@ -150,13 +165,19 @@ struct hist_trigger_attrs { + bool clear; + bool ts_in_usecs; + unsigned int map_bits; ++ ++ char *assignment_str[TRACING_MAP_VARS_MAX]; ++ unsigned int n_assignments; ++ ++ struct var_defs var_defs; + }; + + struct hist_trigger_data { +- struct hist_field *fields[TRACING_MAP_FIELDS_MAX]; ++ struct hist_field *fields[HIST_FIELDS_MAX]; + unsigned int n_vals; + unsigned int n_keys; + unsigned int n_fields; ++ unsigned int n_vars; + unsigned int key_size; + struct tracing_map_sort_key sort_keys[TRACING_MAP_SORT_KEYS_MAX]; + unsigned int n_sort_keys; +@@ -164,6 +185,7 @@ struct hist_trigger_data { + struct hist_trigger_attrs *attrs; + struct tracing_map *map; + bool enable_timestamps; ++ bool remove; + }; + + static u64 hist_field_timestamp(struct hist_field *hist_field, void *event, +@@ -180,6 +202,48 @@ static u64 hist_field_timestamp(struct h + return ts; + } + ++static struct hist_field *find_var_field(struct hist_trigger_data *hist_data, ++ const char *var_name) ++{ ++ struct hist_field *hist_field, *found = NULL; ++ int i; ++ ++ for_each_hist_field(i, hist_data) { ++ hist_field = hist_data->fields[i]; ++ if (hist_field && hist_field->flags & HIST_FIELD_FL_VAR && ++ strcmp(hist_field->var.name, var_name) == 0) { ++ found = hist_field; ++ break; ++ } ++ } ++ ++ return found; ++} ++ ++static struct hist_field *find_var(struct hist_trigger_data *hist_data, ++ struct trace_event_file *file, ++ const char *var_name) ++{ ++ struct hist_trigger_data *test_data; ++ struct event_trigger_data *test; ++ struct hist_field *hist_field; ++ ++ hist_field = find_var_field(hist_data, var_name); ++ if (hist_field) ++ return hist_field; ++ ++ list_for_each_entry_rcu(test, &file->triggers, list) { ++ if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { ++ test_data = test->private_data; ++ hist_field = find_var_field(test_data, var_name); ++ if (hist_field) ++ return hist_field; ++ } ++ } ++ ++ return NULL; ++} ++ + static const char *hist_field_name(struct hist_field *field, + unsigned int level) + { +@@ -262,9 +326,14 @@ static int parse_map_size(char *str) + + static void destroy_hist_trigger_attrs(struct hist_trigger_attrs *attrs) + { ++ unsigned int i; ++ + if (!attrs) + return; + ++ for (i = 0; i < attrs->n_assignments; i++) ++ kfree(attrs->assignment_str[i]); ++ + kfree(attrs->name); + kfree(attrs->sort_key_str); + kfree(attrs->keys_str); +@@ -311,8 +380,22 @@ static int parse_assignment(char *str, s + goto out; + } + attrs->map_bits = map_bits; +- } else +- ret = -EINVAL; ++ } else { ++ char *assignment; ++ ++ if (attrs->n_assignments == TRACING_MAP_VARS_MAX) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ assignment = kstrdup(str, GFP_KERNEL); ++ if (!assignment) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ attrs->assignment_str[attrs->n_assignments++] = assignment; ++ } + out: + return ret; + } +@@ -428,12 +511,15 @@ static void destroy_hist_field(struct hi + for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) + destroy_hist_field(hist_field->operands[i], level + 1); + ++ kfree(hist_field->var.name); ++ + kfree(hist_field); + } + + static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, + struct ftrace_event_field *field, +- unsigned long flags) ++ unsigned long flags, ++ char *var_name) + { + struct hist_field *hist_field; + +@@ -459,7 +545,7 @@ static struct hist_field *create_hist_fi + if (flags & HIST_FIELD_FL_LOG2) { + unsigned long fl = flags & ~HIST_FIELD_FL_LOG2; + hist_field->fn = hist_field_log2; +- hist_field->operands[0] = create_hist_field(hist_data, field, fl); ++ hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL); + hist_field->size = hist_field->operands[0]->size; + goto out; + } +@@ -494,14 +580,23 @@ static struct hist_field *create_hist_fi + hist_field->field = field; + hist_field->flags = flags; + ++ if (var_name) { ++ hist_field->var.name = kstrdup(var_name, GFP_KERNEL); ++ if (!hist_field->var.name) ++ goto free; ++ } ++ + return hist_field; ++ free: ++ destroy_hist_field(hist_field, 0); ++ return NULL; + } + + static void destroy_hist_fields(struct hist_trigger_data *hist_data) + { + unsigned int i; + +- for (i = 0; i < TRACING_MAP_FIELDS_MAX; i++) { ++ for (i = 0; i < HIST_FIELDS_MAX; i++) { + if (hist_data->fields[i]) { + destroy_hist_field(hist_data->fields[i], 0); + hist_data->fields[i] = NULL; +@@ -512,11 +607,12 @@ static void destroy_hist_fields(struct h + static int create_hitcount_val(struct hist_trigger_data *hist_data) + { + hist_data->fields[HITCOUNT_IDX] = +- create_hist_field(hist_data, NULL, HIST_FIELD_FL_HITCOUNT); ++ create_hist_field(hist_data, NULL, HIST_FIELD_FL_HITCOUNT, NULL); + if (!hist_data->fields[HITCOUNT_IDX]) + return -ENOMEM; + + hist_data->n_vals++; ++ hist_data->n_fields++; + + if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX)) + return -EINVAL; +@@ -524,19 +620,16 @@ static int create_hitcount_val(struct hi + return 0; + } + +-static int create_val_field(struct hist_trigger_data *hist_data, +- unsigned int val_idx, +- struct trace_event_file *file, +- char *field_str) ++static int __create_val_field(struct hist_trigger_data *hist_data, ++ unsigned int val_idx, ++ struct trace_event_file *file, ++ char *var_name, char *field_str, ++ unsigned long flags) + { + struct ftrace_event_field *field = NULL; +- unsigned long flags = 0; + char *field_name; + int ret = 0; + +- if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX)) +- return -EINVAL; +- + field_name = strsep(&field_str, "."); + if (field_str) { + if (strcmp(field_str, "hex") == 0) +@@ -558,25 +651,58 @@ static int create_val_field(struct hist_ + } + } + +- hist_data->fields[val_idx] = create_hist_field(hist_data, field, flags); ++ hist_data->fields[val_idx] = create_hist_field(hist_data, field, flags, var_name); + if (!hist_data->fields[val_idx]) { + ret = -ENOMEM; + goto out; + } + + ++hist_data->n_vals; ++ ++hist_data->n_fields; + +- if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX)) ++ if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX)) + ret = -EINVAL; + out: + return ret; + } + ++static int create_val_field(struct hist_trigger_data *hist_data, ++ unsigned int val_idx, ++ struct trace_event_file *file, ++ char *field_str) ++{ ++ if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX)) ++ return -EINVAL; ++ ++ return __create_val_field(hist_data, val_idx, file, NULL, field_str, 0); ++} ++ ++static int create_var_field(struct hist_trigger_data *hist_data, ++ unsigned int val_idx, ++ struct trace_event_file *file, ++ char *var_name, char *expr_str) ++{ ++ unsigned long flags = 0; ++ ++ if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX)) ++ return -EINVAL; ++ if (find_var(hist_data, file, var_name) && !hist_data->remove) { ++ return -EINVAL; ++ } ++ ++ flags |= HIST_FIELD_FL_VAR; ++ hist_data->n_vars++; ++ if (WARN_ON(hist_data->n_vars > TRACING_MAP_VARS_MAX)) ++ return -EINVAL; ++ ++ return __create_val_field(hist_data, val_idx, file, var_name, expr_str, flags); ++} ++ + static int create_val_fields(struct hist_trigger_data *hist_data, + struct trace_event_file *file) + { + char *fields_str, *field_str; +- unsigned int i, j; ++ unsigned int i, j = 1; + int ret; + + ret = create_hitcount_val(hist_data); +@@ -596,12 +722,15 @@ static int create_val_fields(struct hist + field_str = strsep(&fields_str, ","); + if (!field_str) + break; ++ + if (strcmp(field_str, "hitcount") == 0) + continue; ++ + ret = create_val_field(hist_data, j++, file, field_str); + if (ret) + goto out; + } ++ + if (fields_str && (strcmp(fields_str, "hitcount") != 0)) + ret = -EINVAL; + out: +@@ -615,11 +744,12 @@ static int create_key_field(struct hist_ + char *field_str) + { + struct ftrace_event_field *field = NULL; ++ struct hist_field *hist_field = NULL; + unsigned long flags = 0; + unsigned int key_size; + int ret = 0; + +- if (WARN_ON(key_idx >= TRACING_MAP_FIELDS_MAX)) ++ if (WARN_ON(key_idx >= HIST_FIELDS_MAX)) + return -EINVAL; + + flags |= HIST_FIELD_FL_KEY; +@@ -627,6 +757,7 @@ static int create_key_field(struct hist_ + if (strcmp(field_str, "stacktrace") == 0) { + flags |= HIST_FIELD_FL_STACKTRACE; + key_size = sizeof(unsigned long) * HIST_STACKTRACE_DEPTH; ++ hist_field = create_hist_field(hist_data, NULL, flags, NULL); + } else { + char *field_name = strsep(&field_str, "."); + +@@ -672,7 +803,7 @@ static int create_key_field(struct hist_ + } + } + +- hist_data->fields[key_idx] = create_hist_field(hist_data, field, flags); ++ hist_data->fields[key_idx] = create_hist_field(hist_data, field, flags, NULL); + if (!hist_data->fields[key_idx]) { + ret = -ENOMEM; + goto out; +@@ -688,6 +819,7 @@ static int create_key_field(struct hist_ + } + + hist_data->n_keys++; ++ hist_data->n_fields++; + + if (WARN_ON(hist_data->n_keys > TRACING_MAP_KEYS_MAX)) + return -EINVAL; +@@ -731,21 +863,111 @@ static int create_key_fields(struct hist + return ret; + } + ++static int create_var_fields(struct hist_trigger_data *hist_data, ++ struct trace_event_file *file) ++{ ++ unsigned int i, j = hist_data->n_vals; ++ int ret = 0; ++ ++ unsigned int n_vars = hist_data->attrs->var_defs.n_vars; ++ ++ for (i = 0; i < n_vars; i++) { ++ char *var_name = hist_data->attrs->var_defs.name[i]; ++ char *expr = hist_data->attrs->var_defs.expr[i]; ++ ++ ret = create_var_field(hist_data, j++, file, var_name, expr); ++ if (ret) ++ goto out; ++ } ++ out: ++ return ret; ++} ++ ++static void free_var_defs(struct hist_trigger_data *hist_data) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < hist_data->attrs->var_defs.n_vars; i++) { ++ kfree(hist_data->attrs->var_defs.name[i]); ++ kfree(hist_data->attrs->var_defs.expr[i]); ++ } ++ ++ hist_data->attrs->var_defs.n_vars = 0; ++} ++ ++static int parse_var_defs(struct hist_trigger_data *hist_data) ++{ ++ char *s, *str, *var_name, *field_str; ++ unsigned int i, j, n_vars = 0; ++ int ret = 0; ++ ++ for (i = 0; i < hist_data->attrs->n_assignments; i++) { ++ str = hist_data->attrs->assignment_str[i]; ++ for (j = 0; j < TRACING_MAP_VARS_MAX; j++) { ++ field_str = strsep(&str, ","); ++ if (!field_str) ++ break; ++ ++ var_name = strsep(&field_str, "="); ++ if (!var_name || !field_str) { ++ ret = -EINVAL; ++ goto free; ++ } ++ ++ if (n_vars == TRACING_MAP_VARS_MAX) { ++ ret = -EINVAL; ++ goto free; ++ } ++ ++ s = kstrdup(var_name, GFP_KERNEL); ++ if (!s) { ++ ret = -ENOMEM; ++ goto free; ++ } ++ hist_data->attrs->var_defs.name[n_vars] = s; ++ ++ s = kstrdup(field_str, GFP_KERNEL); ++ if (!s) { ++ kfree(hist_data->attrs->var_defs.name[n_vars]); ++ ret = -ENOMEM; ++ goto free; ++ } ++ hist_data->attrs->var_defs.expr[n_vars++] = s; ++ ++ hist_data->attrs->var_defs.n_vars = n_vars; ++ } ++ } ++ ++ return ret; ++ free: ++ free_var_defs(hist_data); ++ ++ return ret; ++} ++ + static int create_hist_fields(struct hist_trigger_data *hist_data, + struct trace_event_file *file) + { + int ret; + ++ ret = parse_var_defs(hist_data); ++ if (ret) ++ goto out; ++ + ret = create_val_fields(hist_data, file); + if (ret) + goto out; + +- ret = create_key_fields(hist_data, file); ++ ret = create_var_fields(hist_data, file); + if (ret) + goto out; + +- hist_data->n_fields = hist_data->n_vals + hist_data->n_keys; ++ ret = create_key_fields(hist_data, file); ++ if (ret) ++ goto out; + out: ++ free_var_defs(hist_data); ++ + return ret; + } + +@@ -768,7 +990,7 @@ static int create_sort_keys(struct hist_ + char *fields_str = hist_data->attrs->sort_key_str; + struct tracing_map_sort_key *sort_key; + int descending, ret = 0; +- unsigned int i, j; ++ unsigned int i, j, k; + + hist_data->n_sort_keys = 1; /* we always have at least one, hitcount */ + +@@ -816,12 +1038,19 @@ static int create_sort_keys(struct hist_ + continue; + } + +- for (j = 1; j < hist_data->n_fields; j++) { ++ for (j = 1, k = 1; j < hist_data->n_fields; j++) { ++ unsigned int idx; ++ + hist_field = hist_data->fields[j]; ++ if (hist_field->flags & HIST_FIELD_FL_VAR) ++ continue; ++ ++ idx = k++; ++ + test_name = hist_field_name(hist_field, 0); + + if (strcmp(field_name, test_name) == 0) { +- sort_key->field_idx = j; ++ sort_key->field_idx = idx; + descending = is_descending(field_str); + if (descending < 0) { + ret = descending; +@@ -836,6 +1065,7 @@ static int create_sort_keys(struct hist_ + break; + } + } ++ + hist_data->n_sort_keys = i; + out: + return ret; +@@ -876,12 +1106,19 @@ static int create_tracing_map_fields(str + idx = tracing_map_add_key_field(map, + hist_field->offset, + cmp_fn); +- +- } else ++ } else if (!(hist_field->flags & HIST_FIELD_FL_VAR)) + idx = tracing_map_add_sum_field(map); + + if (idx < 0) + return idx; ++ ++ if (hist_field->flags & HIST_FIELD_FL_VAR) { ++ idx = tracing_map_add_var(map); ++ if (idx < 0) ++ return idx; ++ hist_field->var.idx = idx; ++ hist_field->var.hist_data = hist_data; ++ } + } + + return 0; +@@ -905,7 +1142,8 @@ static bool need_tracing_map_ops(struct + static struct hist_trigger_data * + create_hist_data(unsigned int map_bits, + struct hist_trigger_attrs *attrs, +- struct trace_event_file *file) ++ struct trace_event_file *file, ++ bool remove) + { + const struct tracing_map_ops *map_ops = NULL; + struct hist_trigger_data *hist_data; +@@ -916,6 +1154,7 @@ create_hist_data(unsigned int map_bits, + return ERR_PTR(-ENOMEM); + + hist_data->attrs = attrs; ++ hist_data->remove = remove; + + ret = create_hist_fields(hist_data, file); + if (ret) +@@ -962,14 +1201,28 @@ static void hist_trigger_elt_update(stru + struct ring_buffer_event *rbe) + { + struct hist_field *hist_field; +- unsigned int i; ++ unsigned int i, var_idx; + u64 hist_val; + + for_each_hist_val_field(i, hist_data) { + hist_field = hist_data->fields[i]; + hist_val = hist_field->fn(hist_field, rec, rbe); ++ if (hist_field->flags & HIST_FIELD_FL_VAR) { ++ var_idx = hist_field->var.idx; ++ tracing_map_set_var(elt, var_idx, hist_val); ++ continue; ++ } + tracing_map_update_sum(elt, i, hist_val); + } ++ ++ for_each_hist_key_field(i, hist_data) { ++ hist_field = hist_data->fields[i]; ++ if (hist_field->flags & HIST_FIELD_FL_VAR) { ++ hist_val = hist_field->fn(hist_field, rec, rbe); ++ var_idx = hist_field->var.idx; ++ tracing_map_set_var(elt, var_idx, hist_val); ++ } ++ } + } + + static inline void add_to_key(char *compound_key, void *key, +@@ -1144,6 +1397,9 @@ hist_trigger_entry_print(struct seq_file + for (i = 1; i < hist_data->n_vals; i++) { + field_name = hist_field_name(hist_data->fields[i], 0); + ++ if (hist_data->fields[i]->flags & HIST_FIELD_FL_VAR) ++ continue; ++ + if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) { + seq_printf(m, " %s: %10llx", field_name, + tracing_map_read_sum(elt, i)); +@@ -1265,6 +1521,9 @@ static void hist_field_print(struct seq_ + { + const char *field_name = hist_field_name(hist_field, 0); + ++ if (hist_field->var.name) ++ seq_printf(m, "%s=", hist_field->var.name); ++ + if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP) + seq_puts(m, "common_timestamp"); + else if (field_name) +@@ -1283,7 +1542,8 @@ static int event_hist_trigger_print(stru + struct event_trigger_data *data) + { + struct hist_trigger_data *hist_data = data->private_data; +- struct hist_field *key_field; ++ struct hist_field *field; ++ bool have_var = false; + unsigned int i; + + seq_puts(m, "hist:"); +@@ -1294,25 +1554,47 @@ static int event_hist_trigger_print(stru + seq_puts(m, "keys="); + + for_each_hist_key_field(i, hist_data) { +- key_field = hist_data->fields[i]; ++ field = hist_data->fields[i]; + + if (i > hist_data->n_vals) + seq_puts(m, ","); + +- if (key_field->flags & HIST_FIELD_FL_STACKTRACE) ++ if (field->flags & HIST_FIELD_FL_STACKTRACE) + seq_puts(m, "stacktrace"); + else +- hist_field_print(m, key_field); ++ hist_field_print(m, field); + } + + seq_puts(m, ":vals="); + + for_each_hist_val_field(i, hist_data) { ++ field = hist_data->fields[i]; ++ if (field->flags & HIST_FIELD_FL_VAR) { ++ have_var = true; ++ continue; ++ } ++ + if (i == HITCOUNT_IDX) + seq_puts(m, "hitcount"); + else { + seq_puts(m, ","); +- hist_field_print(m, hist_data->fields[i]); ++ hist_field_print(m, field); ++ } ++ } ++ ++ if (have_var) { ++ unsigned int n = 0; ++ ++ seq_puts(m, ":"); ++ ++ for_each_hist_val_field(i, hist_data) { ++ field = hist_data->fields[i]; ++ ++ if (field->flags & HIST_FIELD_FL_VAR) { ++ if (n++) ++ seq_puts(m, ","); ++ hist_field_print(m, field); ++ } + } + } + +@@ -1320,7 +1602,10 @@ static int event_hist_trigger_print(stru + + for (i = 0; i < hist_data->n_sort_keys; i++) { + struct tracing_map_sort_key *sort_key; +- unsigned int idx; ++ unsigned int idx, first_key_idx; ++ ++ /* skip VAR vals */ ++ first_key_idx = hist_data->n_vals - hist_data->n_vars; + + sort_key = &hist_data->sort_keys[i]; + idx = sort_key->field_idx; +@@ -1333,8 +1618,11 @@ static int event_hist_trigger_print(stru + + if (idx == HITCOUNT_IDX) + seq_puts(m, "hitcount"); +- else ++ else { ++ if (idx >= first_key_idx) ++ idx += hist_data->n_vars; + hist_field_print(m, hist_data->fields[idx]); ++ } + + if (sort_key->descending) + seq_puts(m, ".descending"); +@@ -1631,7 +1919,7 @@ static void hist_unregister_trigger(char + test->ops->free(test->ops, test); + + if (hist_data->enable_timestamps) { +- if (unregistered) ++ if (!hist_data->remove || unregistered) + tracing_set_time_stamp_abs(file->tr, false); + } + } +@@ -1664,12 +1952,16 @@ static int event_hist_trigger_func(struc + struct hist_trigger_attrs *attrs; + struct event_trigger_ops *trigger_ops; + struct hist_trigger_data *hist_data; ++ bool remove = false; + char *trigger; + int ret = 0; + + if (!param) + return -EINVAL; + ++ if (glob[0] == '!') ++ remove = true; ++ + /* separate the trigger from the filter (k:v [if filter]) */ + trigger = strsep(¶m, " \t"); + if (!trigger) +@@ -1682,7 +1974,7 @@ static int event_hist_trigger_func(struc + if (attrs->map_bits) + hist_trigger_bits = attrs->map_bits; + +- hist_data = create_hist_data(hist_trigger_bits, attrs, file); ++ hist_data = create_hist_data(hist_trigger_bits, attrs, file, remove); + if (IS_ERR(hist_data)) { + destroy_hist_trigger_attrs(attrs); + return PTR_ERR(hist_data); +@@ -1711,7 +2003,7 @@ static int event_hist_trigger_func(struc + goto out_free; + } + +- if (glob[0] == '!') { ++ if (remove) { + cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); + ret = 0; + goto out_free; diff --git a/debian/patches/features/all/rt/0024-tracing-Account-for-variables-in-named-trigger-compa.patch b/debian/patches/features/all/rt/0024-tracing-Account-for-variables-in-named-trigger-compa.patch new file mode 100644 index 000000000..de939a65a --- /dev/null +++ b/debian/patches/features/all/rt/0024-tracing-Account-for-variables-in-named-trigger-compa.patch @@ -0,0 +1,46 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:50 -0600 +Subject: [PATCH 24/48] tracing: Account for variables in named trigger + compatibility +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Named triggers must also have the same set of variables in order to be +considered compatible - update the trigger match test to account for +that. + +The reason for this requirement is that named triggers with variables +are meant to allow one or more events to set the same variable. + +Link: http://lkml.kernel.org/r/a17eae6328a99917f9d5c66129c9fcd355279ee9.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit f94add7df3d72bc8e659f9491e25d91c9dae1b44) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -1610,7 +1610,7 @@ static int event_hist_trigger_print(stru + sort_key = &hist_data->sort_keys[i]; + idx = sort_key->field_idx; + +- if (WARN_ON(idx >= TRACING_MAP_FIELDS_MAX)) ++ if (WARN_ON(idx >= HIST_FIELDS_MAX)) + return -EINVAL; + + if (i > 0) +@@ -1798,6 +1798,11 @@ static bool hist_trigger_match(struct ev + return false; + if (key_field->is_signed != key_field_test->is_signed) + return false; ++ if (!!key_field->var.name != !!key_field_test->var.name) ++ return false; ++ if (key_field->var.name && ++ strcmp(key_field->var.name, key_field_test->var.name) != 0) ++ return false; + } + + for (i = 0; i < hist_data->n_sort_keys; i++) { diff --git a/debian/patches/features/all/rt/0025-tracing-Move-get_hist_field_flags.patch b/debian/patches/features/all/rt/0025-tracing-Move-get_hist_field_flags.patch new file mode 100644 index 000000000..fddb6ea2e --- /dev/null +++ b/debian/patches/features/all/rt/0025-tracing-Move-get_hist_field_flags.patch @@ -0,0 +1,78 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:51 -0600 +Subject: [PATCH 25/48] tracing: Move get_hist_field_flags() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Move get_hist_field_flags() to make it more easily accessible for new +code (and keep the move separate from new functionality). + +Link: http://lkml.kernel.org/r/32470f0a7047ec7a6e84ba5ec89d6142cc6ede7d.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit fde3bce553d359c01beb9a6fce4013b65076aff3) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 44 +++++++++++++++++++-------------------- + 1 file changed, 22 insertions(+), 22 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -497,6 +497,28 @@ static const struct tracing_map_ops hist + .elt_init = hist_trigger_elt_comm_init, + }; + ++static const char *get_hist_field_flags(struct hist_field *hist_field) ++{ ++ const char *flags_str = NULL; ++ ++ if (hist_field->flags & HIST_FIELD_FL_HEX) ++ flags_str = "hex"; ++ else if (hist_field->flags & HIST_FIELD_FL_SYM) ++ flags_str = "sym"; ++ else if (hist_field->flags & HIST_FIELD_FL_SYM_OFFSET) ++ flags_str = "sym-offset"; ++ else if (hist_field->flags & HIST_FIELD_FL_EXECNAME) ++ flags_str = "execname"; ++ else if (hist_field->flags & HIST_FIELD_FL_SYSCALL) ++ flags_str = "syscall"; ++ else if (hist_field->flags & HIST_FIELD_FL_LOG2) ++ flags_str = "log2"; ++ else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS) ++ flags_str = "usecs"; ++ ++ return flags_str; ++} ++ + static void destroy_hist_field(struct hist_field *hist_field, + unsigned int level) + { +@@ -1495,28 +1517,6 @@ const struct file_operations event_hist_ + .release = single_release, + }; + +-static const char *get_hist_field_flags(struct hist_field *hist_field) +-{ +- const char *flags_str = NULL; +- +- if (hist_field->flags & HIST_FIELD_FL_HEX) +- flags_str = "hex"; +- else if (hist_field->flags & HIST_FIELD_FL_SYM) +- flags_str = "sym"; +- else if (hist_field->flags & HIST_FIELD_FL_SYM_OFFSET) +- flags_str = "sym-offset"; +- else if (hist_field->flags & HIST_FIELD_FL_EXECNAME) +- flags_str = "execname"; +- else if (hist_field->flags & HIST_FIELD_FL_SYSCALL) +- flags_str = "syscall"; +- else if (hist_field->flags & HIST_FIELD_FL_LOG2) +- flags_str = "log2"; +- else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS) +- flags_str = "usecs"; +- +- return flags_str; +-} +- + static void hist_field_print(struct seq_file *m, struct hist_field *hist_field) + { + const char *field_name = hist_field_name(hist_field, 0); diff --git a/debian/patches/features/all/rt/0026-tracing-Add-simple-expression-support-to-hist-trigge.patch b/debian/patches/features/all/rt/0026-tracing-Add-simple-expression-support-to-hist-trigge.patch new file mode 100644 index 000000000..3170b0dc0 --- /dev/null +++ b/debian/patches/features/all/rt/0026-tracing-Add-simple-expression-support-to-hist-trigge.patch @@ -0,0 +1,628 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:52 -0600 +Subject: [PATCH 26/48] tracing: Add simple expression support to hist triggers +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Add support for simple addition, subtraction, and unary expressions +(-(expr) and expr, where expr = b-a, a+b, a+b+c) to hist triggers, in +order to support a minimal set of useful inter-event calculations. + +These operations are needed for calculating latencies between events +(timestamp1-timestamp0) and for combined latencies (latencies over 3 +or more events). + +In the process, factor out some common code from key and value +parsing. + +Link: http://lkml.kernel.org/r/9a9308ead4fe32a433d9c7e95921fb798394f6b2.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +[kbuild test robot fix, add static to parse_atom()] +Signed-off-by: Fengguang Wu +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 26c5cb5e4790fec96e3eba02c347e78fa72273a8) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 487 +++++++++++++++++++++++++++++++++------ + 1 file changed, 413 insertions(+), 74 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -32,6 +32,13 @@ typedef u64 (*hist_field_fn_t) (struct h + #define HIST_FIELD_OPERANDS_MAX 2 + #define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX) + ++enum field_op_id { ++ FIELD_OP_NONE, ++ FIELD_OP_PLUS, ++ FIELD_OP_MINUS, ++ FIELD_OP_UNARY_MINUS, ++}; ++ + struct hist_var { + char *name; + struct hist_trigger_data *hist_data; +@@ -48,6 +55,8 @@ struct hist_field { + struct hist_field *operands[HIST_FIELD_OPERANDS_MAX]; + struct hist_trigger_data *hist_data; + struct hist_var var; ++ enum field_op_id operator; ++ char *name; + }; + + static u64 hist_field_none(struct hist_field *field, void *event, +@@ -98,6 +107,41 @@ static u64 hist_field_log2(struct hist_f + return (u64) ilog2(roundup_pow_of_two(val)); + } + ++static u64 hist_field_plus(struct hist_field *hist_field, void *event, ++ struct ring_buffer_event *rbe) ++{ ++ struct hist_field *operand1 = hist_field->operands[0]; ++ struct hist_field *operand2 = hist_field->operands[1]; ++ ++ u64 val1 = operand1->fn(operand1, event, rbe); ++ u64 val2 = operand2->fn(operand2, event, rbe); ++ ++ return val1 + val2; ++} ++ ++static u64 hist_field_minus(struct hist_field *hist_field, void *event, ++ struct ring_buffer_event *rbe) ++{ ++ struct hist_field *operand1 = hist_field->operands[0]; ++ struct hist_field *operand2 = hist_field->operands[1]; ++ ++ u64 val1 = operand1->fn(operand1, event, rbe); ++ u64 val2 = operand2->fn(operand2, event, rbe); ++ ++ return val1 - val2; ++} ++ ++static u64 hist_field_unary_minus(struct hist_field *hist_field, void *event, ++ struct ring_buffer_event *rbe) ++{ ++ struct hist_field *operand = hist_field->operands[0]; ++ ++ s64 sval = (s64)operand->fn(operand, event, rbe); ++ u64 val = (u64)-sval; ++ ++ return val; ++} ++ + #define DEFINE_HIST_FIELD_FN(type) \ + static u64 hist_field_##type(struct hist_field *hist_field, \ + void *event, \ +@@ -147,6 +191,7 @@ enum hist_field_flags { + HIST_FIELD_FL_TIMESTAMP = 1 << 10, + HIST_FIELD_FL_TIMESTAMP_USECS = 1 << 11, + HIST_FIELD_FL_VAR = 1 << 12, ++ HIST_FIELD_FL_EXPR = 1 << 13, + }; + + struct var_defs { +@@ -258,6 +303,8 @@ static const char *hist_field_name(struc + field_name = hist_field_name(field->operands[0], ++level); + else if (field->flags & HIST_FIELD_FL_TIMESTAMP) + field_name = "common_timestamp"; ++ else if (field->flags & HIST_FIELD_FL_EXPR) ++ field_name = field->name; + + if (field_name == NULL) + field_name = ""; +@@ -519,12 +566,104 @@ static const char *get_hist_field_flags( + return flags_str; + } + ++static void expr_field_str(struct hist_field *field, char *expr) ++{ ++ strcat(expr, hist_field_name(field, 0)); ++ ++ if (field->flags) { ++ const char *flags_str = get_hist_field_flags(field); ++ ++ if (flags_str) { ++ strcat(expr, "."); ++ strcat(expr, flags_str); ++ } ++ } ++} ++ ++static char *expr_str(struct hist_field *field, unsigned int level) ++{ ++ char *expr; ++ ++ if (level > 1) ++ return NULL; ++ ++ expr = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL); ++ if (!expr) ++ return NULL; ++ ++ if (!field->operands[0]) { ++ expr_field_str(field, expr); ++ return expr; ++ } ++ ++ if (field->operator == FIELD_OP_UNARY_MINUS) { ++ char *subexpr; ++ ++ strcat(expr, "-("); ++ subexpr = expr_str(field->operands[0], ++level); ++ if (!subexpr) { ++ kfree(expr); ++ return NULL; ++ } ++ strcat(expr, subexpr); ++ strcat(expr, ")"); ++ ++ kfree(subexpr); ++ ++ return expr; ++ } ++ ++ expr_field_str(field->operands[0], expr); ++ ++ switch (field->operator) { ++ case FIELD_OP_MINUS: ++ strcat(expr, "-"); ++ break; ++ case FIELD_OP_PLUS: ++ strcat(expr, "+"); ++ break; ++ default: ++ kfree(expr); ++ return NULL; ++ } ++ ++ expr_field_str(field->operands[1], expr); ++ ++ return expr; ++} ++ ++static int contains_operator(char *str) ++{ ++ enum field_op_id field_op = FIELD_OP_NONE; ++ char *op; ++ ++ op = strpbrk(str, "+-"); ++ if (!op) ++ return FIELD_OP_NONE; ++ ++ switch (*op) { ++ case '-': ++ if (*str == '-') ++ field_op = FIELD_OP_UNARY_MINUS; ++ else ++ field_op = FIELD_OP_MINUS; ++ break; ++ case '+': ++ field_op = FIELD_OP_PLUS; ++ break; ++ default: ++ break; ++ } ++ ++ return field_op; ++} ++ + static void destroy_hist_field(struct hist_field *hist_field, + unsigned int level) + { + unsigned int i; + +- if (level > 2) ++ if (level > 3) + return; + + if (!hist_field) +@@ -534,6 +673,7 @@ static void destroy_hist_field(struct hi + destroy_hist_field(hist_field->operands[i], level + 1); + + kfree(hist_field->var.name); ++ kfree(hist_field->name); + + kfree(hist_field); + } +@@ -554,6 +694,9 @@ static struct hist_field *create_hist_fi + + hist_field->hist_data = hist_data; + ++ if (flags & HIST_FIELD_FL_EXPR) ++ goto out; /* caller will populate */ ++ + if (flags & HIST_FIELD_FL_HITCOUNT) { + hist_field->fn = hist_field_counter; + goto out; +@@ -626,6 +769,257 @@ static void destroy_hist_fields(struct h + } + } + ++static struct ftrace_event_field * ++parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, ++ char *field_str, unsigned long *flags) ++{ ++ struct ftrace_event_field *field = NULL; ++ char *field_name, *modifier, *str; ++ ++ modifier = str = kstrdup(field_str, GFP_KERNEL); ++ if (!modifier) ++ return ERR_PTR(-ENOMEM); ++ ++ field_name = strsep(&modifier, "."); ++ if (modifier) { ++ if (strcmp(modifier, "hex") == 0) ++ *flags |= HIST_FIELD_FL_HEX; ++ else if (strcmp(modifier, "sym") == 0) ++ *flags |= HIST_FIELD_FL_SYM; ++ else if (strcmp(modifier, "sym-offset") == 0) ++ *flags |= HIST_FIELD_FL_SYM_OFFSET; ++ else if ((strcmp(modifier, "execname") == 0) && ++ (strcmp(field_name, "common_pid") == 0)) ++ *flags |= HIST_FIELD_FL_EXECNAME; ++ else if (strcmp(modifier, "syscall") == 0) ++ *flags |= HIST_FIELD_FL_SYSCALL; ++ else if (strcmp(modifier, "log2") == 0) ++ *flags |= HIST_FIELD_FL_LOG2; ++ else if (strcmp(modifier, "usecs") == 0) ++ *flags |= HIST_FIELD_FL_TIMESTAMP_USECS; ++ else { ++ field = ERR_PTR(-EINVAL); ++ goto out; ++ } ++ } ++ ++ if (strcmp(field_name, "common_timestamp") == 0) { ++ *flags |= HIST_FIELD_FL_TIMESTAMP; ++ hist_data->enable_timestamps = true; ++ if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS) ++ hist_data->attrs->ts_in_usecs = true; ++ } else { ++ field = trace_find_event_field(file->event_call, field_name); ++ if (!field || !field->size) { ++ field = ERR_PTR(-EINVAL); ++ goto out; ++ } ++ } ++ out: ++ kfree(str); ++ ++ return field; ++} ++ ++static struct hist_field *parse_atom(struct hist_trigger_data *hist_data, ++ struct trace_event_file *file, char *str, ++ unsigned long *flags, char *var_name) ++{ ++ struct ftrace_event_field *field = NULL; ++ struct hist_field *hist_field = NULL; ++ int ret = 0; ++ ++ field = parse_field(hist_data, file, str, flags); ++ if (IS_ERR(field)) { ++ ret = PTR_ERR(field); ++ goto out; ++ } ++ ++ hist_field = create_hist_field(hist_data, field, *flags, var_name); ++ if (!hist_field) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ return hist_field; ++ out: ++ return ERR_PTR(ret); ++} ++ ++static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, ++ struct trace_event_file *file, ++ char *str, unsigned long flags, ++ char *var_name, unsigned int level); ++ ++static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, ++ struct trace_event_file *file, ++ char *str, unsigned long flags, ++ char *var_name, unsigned int level) ++{ ++ struct hist_field *operand1, *expr = NULL; ++ unsigned long operand_flags; ++ int ret = 0; ++ char *s; ++ ++ // we support only -(xxx) i.e. explicit parens required ++ ++ if (level > 3) { ++ ret = -EINVAL; ++ goto free; ++ } ++ ++ str++; // skip leading '-' ++ ++ s = strchr(str, '('); ++ if (s) ++ str++; ++ else { ++ ret = -EINVAL; ++ goto free; ++ } ++ ++ s = strrchr(str, ')'); ++ if (s) ++ *s = '\0'; ++ else { ++ ret = -EINVAL; // no closing ')' ++ goto free; ++ } ++ ++ flags |= HIST_FIELD_FL_EXPR; ++ expr = create_hist_field(hist_data, NULL, flags, var_name); ++ if (!expr) { ++ ret = -ENOMEM; ++ goto free; ++ } ++ ++ operand_flags = 0; ++ operand1 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level); ++ if (IS_ERR(operand1)) { ++ ret = PTR_ERR(operand1); ++ goto free; ++ } ++ ++ expr->flags |= operand1->flags & ++ (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); ++ expr->fn = hist_field_unary_minus; ++ expr->operands[0] = operand1; ++ expr->operator = FIELD_OP_UNARY_MINUS; ++ expr->name = expr_str(expr, 0); ++ ++ return expr; ++ free: ++ destroy_hist_field(expr, 0); ++ return ERR_PTR(ret); ++} ++ ++static int check_expr_operands(struct hist_field *operand1, ++ struct hist_field *operand2) ++{ ++ unsigned long operand1_flags = operand1->flags; ++ unsigned long operand2_flags = operand2->flags; ++ ++ if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) != ++ (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS)) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, ++ struct trace_event_file *file, ++ char *str, unsigned long flags, ++ char *var_name, unsigned int level) ++{ ++ struct hist_field *operand1 = NULL, *operand2 = NULL, *expr = NULL; ++ unsigned long operand_flags; ++ int field_op, ret = -EINVAL; ++ char *sep, *operand1_str; ++ ++ if (level > 3) ++ return ERR_PTR(-EINVAL); ++ ++ field_op = contains_operator(str); ++ ++ if (field_op == FIELD_OP_NONE) ++ return parse_atom(hist_data, file, str, &flags, var_name); ++ ++ if (field_op == FIELD_OP_UNARY_MINUS) ++ return parse_unary(hist_data, file, str, flags, var_name, ++level); ++ ++ switch (field_op) { ++ case FIELD_OP_MINUS: ++ sep = "-"; ++ break; ++ case FIELD_OP_PLUS: ++ sep = "+"; ++ break; ++ default: ++ goto free; ++ } ++ ++ operand1_str = strsep(&str, sep); ++ if (!operand1_str || !str) ++ goto free; ++ ++ operand_flags = 0; ++ operand1 = parse_atom(hist_data, file, operand1_str, ++ &operand_flags, NULL); ++ if (IS_ERR(operand1)) { ++ ret = PTR_ERR(operand1); ++ operand1 = NULL; ++ goto free; ++ } ++ ++ // rest of string could be another expression e.g. b+c in a+b+c ++ operand_flags = 0; ++ operand2 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level); ++ if (IS_ERR(operand2)) { ++ ret = PTR_ERR(operand2); ++ operand2 = NULL; ++ goto free; ++ } ++ ++ ret = check_expr_operands(operand1, operand2); ++ if (ret) ++ goto free; ++ ++ flags |= HIST_FIELD_FL_EXPR; ++ ++ flags |= operand1->flags & ++ (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); ++ ++ expr = create_hist_field(hist_data, NULL, flags, var_name); ++ if (!expr) { ++ ret = -ENOMEM; ++ goto free; ++ } ++ ++ expr->operands[0] = operand1; ++ expr->operands[1] = operand2; ++ expr->operator = field_op; ++ expr->name = expr_str(expr, 0); ++ ++ switch (field_op) { ++ case FIELD_OP_MINUS: ++ expr->fn = hist_field_minus; ++ break; ++ case FIELD_OP_PLUS: ++ expr->fn = hist_field_plus; ++ break; ++ default: ++ goto free; ++ } ++ ++ return expr; ++ free: ++ destroy_hist_field(operand1, 0); ++ destroy_hist_field(operand2, 0); ++ destroy_hist_field(expr, 0); ++ ++ return ERR_PTR(ret); ++} ++ + static int create_hitcount_val(struct hist_trigger_data *hist_data) + { + hist_data->fields[HITCOUNT_IDX] = +@@ -648,37 +1042,17 @@ static int __create_val_field(struct his + char *var_name, char *field_str, + unsigned long flags) + { +- struct ftrace_event_field *field = NULL; +- char *field_name; ++ struct hist_field *hist_field; + int ret = 0; + +- field_name = strsep(&field_str, "."); +- if (field_str) { +- if (strcmp(field_str, "hex") == 0) +- flags |= HIST_FIELD_FL_HEX; +- else { +- ret = -EINVAL; +- goto out; +- } +- } +- +- if (strcmp(field_name, "common_timestamp") == 0) { +- flags |= HIST_FIELD_FL_TIMESTAMP; +- hist_data->enable_timestamps = true; +- } else { +- field = trace_find_event_field(file->event_call, field_name); +- if (!field || !field->size) { +- ret = -EINVAL; +- goto out; +- } +- } +- +- hist_data->fields[val_idx] = create_hist_field(hist_data, field, flags, var_name); +- if (!hist_data->fields[val_idx]) { +- ret = -ENOMEM; ++ hist_field = parse_expr(hist_data, file, field_str, flags, var_name, 0); ++ if (IS_ERR(hist_field)) { ++ ret = PTR_ERR(hist_field); + goto out; + } + ++ hist_data->fields[val_idx] = hist_field; ++ + ++hist_data->n_vals; + ++hist_data->n_fields; + +@@ -765,8 +1139,8 @@ static int create_key_field(struct hist_ + struct trace_event_file *file, + char *field_str) + { +- struct ftrace_event_field *field = NULL; + struct hist_field *hist_field = NULL; ++ + unsigned long flags = 0; + unsigned int key_size; + int ret = 0; +@@ -781,60 +1155,24 @@ static int create_key_field(struct hist_ + key_size = sizeof(unsigned long) * HIST_STACKTRACE_DEPTH; + hist_field = create_hist_field(hist_data, NULL, flags, NULL); + } else { +- char *field_name = strsep(&field_str, "."); +- +- if (field_str) { +- if (strcmp(field_str, "hex") == 0) +- flags |= HIST_FIELD_FL_HEX; +- else if (strcmp(field_str, "sym") == 0) +- flags |= HIST_FIELD_FL_SYM; +- else if (strcmp(field_str, "sym-offset") == 0) +- flags |= HIST_FIELD_FL_SYM_OFFSET; +- else if ((strcmp(field_str, "execname") == 0) && +- (strcmp(field_name, "common_pid") == 0)) +- flags |= HIST_FIELD_FL_EXECNAME; +- else if (strcmp(field_str, "syscall") == 0) +- flags |= HIST_FIELD_FL_SYSCALL; +- else if (strcmp(field_str, "log2") == 0) +- flags |= HIST_FIELD_FL_LOG2; +- else if (strcmp(field_str, "usecs") == 0) +- flags |= HIST_FIELD_FL_TIMESTAMP_USECS; +- else { +- ret = -EINVAL; +- goto out; +- } ++ hist_field = parse_expr(hist_data, file, field_str, flags, ++ NULL, 0); ++ if (IS_ERR(hist_field)) { ++ ret = PTR_ERR(hist_field); ++ goto out; + } + +- if (strcmp(field_name, "common_timestamp") == 0) { +- flags |= HIST_FIELD_FL_TIMESTAMP; +- hist_data->enable_timestamps = true; +- if (flags & HIST_FIELD_FL_TIMESTAMP_USECS) +- hist_data->attrs->ts_in_usecs = true; +- key_size = sizeof(u64); +- } else { +- field = trace_find_event_field(file->event_call, field_name); +- if (!field || !field->size) { +- ret = -EINVAL; +- goto out; +- } +- +- if (is_string_field(field)) +- key_size = MAX_FILTER_STR_VAL; +- else +- key_size = field->size; +- } ++ key_size = hist_field->size; + } + +- hist_data->fields[key_idx] = create_hist_field(hist_data, field, flags, NULL); +- if (!hist_data->fields[key_idx]) { +- ret = -ENOMEM; +- goto out; +- } ++ hist_data->fields[key_idx] = hist_field; + + key_size = ALIGN(key_size, sizeof(u64)); + hist_data->fields[key_idx]->size = key_size; + hist_data->fields[key_idx]->offset = key_offset; ++ + hist_data->key_size += key_size; ++ + if (hist_data->key_size > HIST_KEY_SIZE_MAX) { + ret = -EINVAL; + goto out; +@@ -1419,7 +1757,8 @@ hist_trigger_entry_print(struct seq_file + for (i = 1; i < hist_data->n_vals; i++) { + field_name = hist_field_name(hist_data->fields[i], 0); + +- if (hist_data->fields[i]->flags & HIST_FIELD_FL_VAR) ++ if (hist_data->fields[i]->flags & HIST_FIELD_FL_VAR || ++ hist_data->fields[i]->flags & HIST_FIELD_FL_EXPR) + continue; + + if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) { diff --git a/debian/patches/features/all/rt/0027-tracing-Generalize-per-element-hist-trigger-data.patch b/debian/patches/features/all/rt/0027-tracing-Generalize-per-element-hist-trigger-data.patch new file mode 100644 index 000000000..932045066 --- /dev/null +++ b/debian/patches/features/all/rt/0027-tracing-Generalize-per-element-hist-trigger-data.patch @@ -0,0 +1,159 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:53 -0600 +Subject: [PATCH 27/48] tracing: Generalize per-element hist trigger data +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Up until now, hist triggers only needed per-element support for saving +'comm' data, which was saved directly as a private data pointer. + +In anticipation of the need to save other data besides 'comm', add a +new hist_elt_data struct for the purpose, and switch the current +'comm'-related code over to that. + +Link: http://lkml.kernel.org/r/4502c338c965ddf5fc19fb1ec4764391e001ed4b.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 8102d0cb859d223564b17afb01e33701f57191d1) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 76 ++++++++++++++++++++++----------------- + 1 file changed, 43 insertions(+), 33 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -289,6 +289,10 @@ static struct hist_field *find_var(struc + return NULL; + } + ++struct hist_elt_data { ++ char *comm; ++}; ++ + static const char *hist_field_name(struct hist_field *field, + unsigned int level) + { +@@ -503,45 +507,61 @@ static inline void save_comm(char *comm, + memcpy(comm, task->comm, TASK_COMM_LEN); + } + +-static void hist_trigger_elt_comm_free(struct tracing_map_elt *elt) ++static void hist_elt_data_free(struct hist_elt_data *elt_data) ++{ ++ kfree(elt_data->comm); ++ kfree(elt_data); ++} ++ ++static void hist_trigger_elt_data_free(struct tracing_map_elt *elt) + { +- kfree((char *)elt->private_data); ++ struct hist_elt_data *elt_data = elt->private_data; ++ ++ hist_elt_data_free(elt_data); + } + +-static int hist_trigger_elt_comm_alloc(struct tracing_map_elt *elt) ++static int hist_trigger_elt_data_alloc(struct tracing_map_elt *elt) + { + struct hist_trigger_data *hist_data = elt->map->private_data; ++ unsigned int size = TASK_COMM_LEN; ++ struct hist_elt_data *elt_data; + struct hist_field *key_field; + unsigned int i; + ++ elt_data = kzalloc(sizeof(*elt_data), GFP_KERNEL); ++ if (!elt_data) ++ return -ENOMEM; ++ + for_each_hist_key_field(i, hist_data) { + key_field = hist_data->fields[i]; + + if (key_field->flags & HIST_FIELD_FL_EXECNAME) { +- unsigned int size = TASK_COMM_LEN + 1; +- +- elt->private_data = kzalloc(size, GFP_KERNEL); +- if (!elt->private_data) ++ elt_data->comm = kzalloc(size, GFP_KERNEL); ++ if (!elt_data->comm) { ++ kfree(elt_data); + return -ENOMEM; ++ } + break; + } + } + ++ elt->private_data = elt_data; ++ + return 0; + } + +-static void hist_trigger_elt_comm_init(struct tracing_map_elt *elt) ++static void hist_trigger_elt_data_init(struct tracing_map_elt *elt) + { +- char *comm = elt->private_data; ++ struct hist_elt_data *elt_data = elt->private_data; + +- if (comm) +- save_comm(comm, current); ++ if (elt_data->comm) ++ save_comm(elt_data->comm, current); + } + +-static const struct tracing_map_ops hist_trigger_elt_comm_ops = { +- .elt_alloc = hist_trigger_elt_comm_alloc, +- .elt_free = hist_trigger_elt_comm_free, +- .elt_init = hist_trigger_elt_comm_init, ++static const struct tracing_map_ops hist_trigger_elt_data_ops = { ++ .elt_alloc = hist_trigger_elt_data_alloc, ++ .elt_free = hist_trigger_elt_data_free, ++ .elt_init = hist_trigger_elt_data_init, + }; + + static const char *get_hist_field_flags(struct hist_field *hist_field) +@@ -1484,21 +1504,6 @@ static int create_tracing_map_fields(str + return 0; + } + +-static bool need_tracing_map_ops(struct hist_trigger_data *hist_data) +-{ +- struct hist_field *key_field; +- unsigned int i; +- +- for_each_hist_key_field(i, hist_data) { +- key_field = hist_data->fields[i]; +- +- if (key_field->flags & HIST_FIELD_FL_EXECNAME) +- return true; +- } +- +- return false; +-} +- + static struct hist_trigger_data * + create_hist_data(unsigned int map_bits, + struct hist_trigger_attrs *attrs, +@@ -1524,8 +1529,7 @@ create_hist_data(unsigned int map_bits, + if (ret) + goto free; + +- if (need_tracing_map_ops(hist_data)) +- map_ops = &hist_trigger_elt_comm_ops; ++ map_ops = &hist_trigger_elt_data_ops; + + hist_data->map = tracing_map_create(map_bits, hist_data->key_size, + map_ops, hist_data); +@@ -1713,7 +1717,13 @@ hist_trigger_entry_print(struct seq_file + seq_printf(m, "%s: [%llx] %-55s", field_name, + uval, str); + } else if (key_field->flags & HIST_FIELD_FL_EXECNAME) { +- char *comm = elt->private_data; ++ struct hist_elt_data *elt_data = elt->private_data; ++ char *comm; ++ ++ if (WARN_ON_ONCE(!elt_data)) ++ return; ++ ++ comm = elt_data->comm; + + uval = *(u64 *)(key + key_field->offset); + seq_printf(m, "%s: %-16s[%10llu]", field_name, diff --git a/debian/patches/features/all/rt/0028-tracing-Pass-tracing_map_elt-to-hist_field-accessor-.patch b/debian/patches/features/all/rt/0028-tracing-Pass-tracing_map_elt-to-hist_field-accessor-.patch new file mode 100644 index 000000000..914141d7c --- /dev/null +++ b/debian/patches/features/all/rt/0028-tracing-Pass-tracing_map_elt-to-hist_field-accessor-.patch @@ -0,0 +1,226 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:54 -0600 +Subject: [PATCH 28/48] tracing: Pass tracing_map_elt to hist_field accessor + functions +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Some accessor functions, such as for variable references, require +access to a corrsponding tracing_map_elt. + +Add a tracing_map_elt param to the function signature and update the +accessor functions accordingly. + +Link: http://lkml.kernel.org/r/e0f292b068e9e4948da1d5af21b5ae0efa9b5717.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 8405bbbbc9dc0d88ffc92848cb8f0bda2c7a1b30) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 91 ++++++++++++++++++++++++--------------- + 1 file changed, 57 insertions(+), 34 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -26,8 +26,10 @@ + + struct hist_field; + +-typedef u64 (*hist_field_fn_t) (struct hist_field *field, void *event, +- struct ring_buffer_event *rbe); ++typedef u64 (*hist_field_fn_t) (struct hist_field *field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event); + + #define HIST_FIELD_OPERANDS_MAX 2 + #define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX) +@@ -59,28 +61,36 @@ struct hist_field { + char *name; + }; + +-static u64 hist_field_none(struct hist_field *field, void *event, +- struct ring_buffer_event *rbe) ++static u64 hist_field_none(struct hist_field *field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) + { + return 0; + } + +-static u64 hist_field_counter(struct hist_field *field, void *event, +- struct ring_buffer_event *rbe) ++static u64 hist_field_counter(struct hist_field *field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) + { + return 1; + } + +-static u64 hist_field_string(struct hist_field *hist_field, void *event, +- struct ring_buffer_event *rbe) ++static u64 hist_field_string(struct hist_field *hist_field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) + { + char *addr = (char *)(event + hist_field->field->offset); + + return (u64)(unsigned long)addr; + } + +-static u64 hist_field_dynstring(struct hist_field *hist_field, void *event, +- struct ring_buffer_event *rbe) ++static u64 hist_field_dynstring(struct hist_field *hist_field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) + { + u32 str_item = *(u32 *)(event + hist_field->field->offset); + int str_loc = str_item & 0xffff; +@@ -89,54 +99,64 @@ static u64 hist_field_dynstring(struct h + return (u64)(unsigned long)addr; + } + +-static u64 hist_field_pstring(struct hist_field *hist_field, void *event, +- struct ring_buffer_event *rbe) ++static u64 hist_field_pstring(struct hist_field *hist_field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) + { + char **addr = (char **)(event + hist_field->field->offset); + + return (u64)(unsigned long)*addr; + } + +-static u64 hist_field_log2(struct hist_field *hist_field, void *event, +- struct ring_buffer_event *rbe) ++static u64 hist_field_log2(struct hist_field *hist_field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) + { + struct hist_field *operand = hist_field->operands[0]; + +- u64 val = operand->fn(operand, event, rbe); ++ u64 val = operand->fn(operand, elt, rbe, event); + + return (u64) ilog2(roundup_pow_of_two(val)); + } + +-static u64 hist_field_plus(struct hist_field *hist_field, void *event, +- struct ring_buffer_event *rbe) ++static u64 hist_field_plus(struct hist_field *hist_field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) + { + struct hist_field *operand1 = hist_field->operands[0]; + struct hist_field *operand2 = hist_field->operands[1]; + +- u64 val1 = operand1->fn(operand1, event, rbe); +- u64 val2 = operand2->fn(operand2, event, rbe); ++ u64 val1 = operand1->fn(operand1, elt, rbe, event); ++ u64 val2 = operand2->fn(operand2, elt, rbe, event); + + return val1 + val2; + } + +-static u64 hist_field_minus(struct hist_field *hist_field, void *event, +- struct ring_buffer_event *rbe) ++static u64 hist_field_minus(struct hist_field *hist_field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) + { + struct hist_field *operand1 = hist_field->operands[0]; + struct hist_field *operand2 = hist_field->operands[1]; + +- u64 val1 = operand1->fn(operand1, event, rbe); +- u64 val2 = operand2->fn(operand2, event, rbe); ++ u64 val1 = operand1->fn(operand1, elt, rbe, event); ++ u64 val2 = operand2->fn(operand2, elt, rbe, event); + + return val1 - val2; + } + +-static u64 hist_field_unary_minus(struct hist_field *hist_field, void *event, +- struct ring_buffer_event *rbe) ++static u64 hist_field_unary_minus(struct hist_field *hist_field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) + { + struct hist_field *operand = hist_field->operands[0]; + +- s64 sval = (s64)operand->fn(operand, event, rbe); ++ s64 sval = (s64)operand->fn(operand, elt, rbe, event); + u64 val = (u64)-sval; + + return val; +@@ -144,8 +164,9 @@ static u64 hist_field_unary_minus(struct + + #define DEFINE_HIST_FIELD_FN(type) \ + static u64 hist_field_##type(struct hist_field *hist_field, \ +- void *event, \ +- struct ring_buffer_event *rbe) \ ++ struct tracing_map_elt *elt, \ ++ struct ring_buffer_event *rbe, \ ++ void *event) \ + { \ + type *addr = (type *)(event + hist_field->field->offset); \ + \ +@@ -233,8 +254,10 @@ struct hist_trigger_data { + bool remove; + }; + +-static u64 hist_field_timestamp(struct hist_field *hist_field, void *event, +- struct ring_buffer_event *rbe) ++static u64 hist_field_timestamp(struct hist_field *hist_field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) + { + struct hist_trigger_data *hist_data = hist_field->hist_data; + struct trace_array *tr = hist_data->event_file->tr; +@@ -1570,7 +1593,7 @@ static void hist_trigger_elt_update(stru + + for_each_hist_val_field(i, hist_data) { + hist_field = hist_data->fields[i]; +- hist_val = hist_field->fn(hist_field, rec, rbe); ++ hist_val = hist_field->fn(hist_field, elt, rbe, rec); + if (hist_field->flags & HIST_FIELD_FL_VAR) { + var_idx = hist_field->var.idx; + tracing_map_set_var(elt, var_idx, hist_val); +@@ -1582,7 +1605,7 @@ static void hist_trigger_elt_update(stru + for_each_hist_key_field(i, hist_data) { + hist_field = hist_data->fields[i]; + if (hist_field->flags & HIST_FIELD_FL_VAR) { +- hist_val = hist_field->fn(hist_field, rec, rbe); ++ hist_val = hist_field->fn(hist_field, elt, rbe, rec); + var_idx = hist_field->var.idx; + tracing_map_set_var(elt, var_idx, hist_val); + } +@@ -1620,9 +1643,9 @@ static void event_hist_trigger(struct ev + bool use_compound_key = (hist_data->n_keys > 1); + unsigned long entries[HIST_STACKTRACE_DEPTH]; + char compound_key[HIST_KEY_SIZE_MAX]; ++ struct tracing_map_elt *elt = NULL; + struct stack_trace stacktrace; + struct hist_field *key_field; +- struct tracing_map_elt *elt; + u64 field_contents; + void *key = NULL; + unsigned int i; +@@ -1643,7 +1666,7 @@ static void event_hist_trigger(struct ev + + key = entries; + } else { +- field_contents = key_field->fn(key_field, rec, rbe); ++ field_contents = key_field->fn(key_field, elt, rbe, rec); + if (key_field->flags & HIST_FIELD_FL_STRING) { + key = (void *)(unsigned long)field_contents; + use_compound_key = true; diff --git a/debian/patches/features/all/rt/0029-tracing-Add-hist_field-type-field.patch b/debian/patches/features/all/rt/0029-tracing-Add-hist_field-type-field.patch new file mode 100644 index 000000000..ce4bf9764 --- /dev/null +++ b/debian/patches/features/all/rt/0029-tracing-Add-hist_field-type-field.patch @@ -0,0 +1,118 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:55 -0600 +Subject: [PATCH 29/48] tracing: Add hist_field 'type' field +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Future support for synthetic events requires hist_field 'type' +information, so add a field for that. + +Also, make other hist_field attribute usage consistent (size, +is_signed, etc). + +Link: http://lkml.kernel.org/r/3fd12a2e86316b05151ba0d7c68268e780af2c9d.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit d544a468f82526e97cc80c18a019708eb203b00a) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 33 +++++++++++++++++++++++++++++++++ + 1 file changed, 33 insertions(+) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -54,6 +54,7 @@ struct hist_field { + unsigned int size; + unsigned int offset; + unsigned int is_signed; ++ const char *type; + struct hist_field *operands[HIST_FIELD_OPERANDS_MAX]; + struct hist_trigger_data *hist_data; + struct hist_var var; +@@ -717,6 +718,7 @@ static void destroy_hist_field(struct hi + + kfree(hist_field->var.name); + kfree(hist_field->name); ++ kfree(hist_field->type); + + kfree(hist_field); + } +@@ -742,6 +744,10 @@ static struct hist_field *create_hist_fi + + if (flags & HIST_FIELD_FL_HITCOUNT) { + hist_field->fn = hist_field_counter; ++ hist_field->size = sizeof(u64); ++ hist_field->type = kstrdup("u64", GFP_KERNEL); ++ if (!hist_field->type) ++ goto free; + goto out; + } + +@@ -755,12 +761,18 @@ static struct hist_field *create_hist_fi + hist_field->fn = hist_field_log2; + hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL); + hist_field->size = hist_field->operands[0]->size; ++ hist_field->type = kstrdup(hist_field->operands[0]->type, GFP_KERNEL); ++ if (!hist_field->type) ++ goto free; + goto out; + } + + if (flags & HIST_FIELD_FL_TIMESTAMP) { + hist_field->fn = hist_field_timestamp; + hist_field->size = sizeof(u64); ++ hist_field->type = kstrdup("u64", GFP_KERNEL); ++ if (!hist_field->type) ++ goto free; + goto out; + } + +@@ -770,6 +782,11 @@ static struct hist_field *create_hist_fi + if (is_string_field(field)) { + flags |= HIST_FIELD_FL_STRING; + ++ hist_field->size = MAX_FILTER_STR_VAL; ++ hist_field->type = kstrdup(field->type, GFP_KERNEL); ++ if (!hist_field->type) ++ goto free; ++ + if (field->filter_type == FILTER_STATIC_STRING) + hist_field->fn = hist_field_string; + else if (field->filter_type == FILTER_DYN_STRING) +@@ -777,6 +794,12 @@ static struct hist_field *create_hist_fi + else + hist_field->fn = hist_field_pstring; + } else { ++ hist_field->size = field->size; ++ hist_field->is_signed = field->is_signed; ++ hist_field->type = kstrdup(field->type, GFP_KERNEL); ++ if (!hist_field->type) ++ goto free; ++ + hist_field->fn = select_value_fn(field->size, + field->is_signed); + if (!hist_field->fn) { +@@ -949,6 +972,11 @@ static struct hist_field *parse_unary(st + expr->operands[0] = operand1; + expr->operator = FIELD_OP_UNARY_MINUS; + expr->name = expr_str(expr, 0); ++ expr->type = kstrdup(operand1->type, GFP_KERNEL); ++ if (!expr->type) { ++ ret = -ENOMEM; ++ goto free; ++ } + + return expr; + free: +@@ -1042,6 +1070,11 @@ static struct hist_field *parse_expr(str + expr->operands[1] = operand2; + expr->operator = field_op; + expr->name = expr_str(expr, 0); ++ expr->type = kstrdup(operand1->type, GFP_KERNEL); ++ if (!expr->type) { ++ ret = -ENOMEM; ++ goto free; ++ } + + switch (field_op) { + case FIELD_OP_MINUS: diff --git a/debian/patches/features/all/rt/0030-tracing-Add-variable-reference-handling-to-hist-trig.patch b/debian/patches/features/all/rt/0030-tracing-Add-variable-reference-handling-to-hist-trig.patch new file mode 100644 index 000000000..a9ec8bc73 --- /dev/null +++ b/debian/patches/features/all/rt/0030-tracing-Add-variable-reference-handling-to-hist-trig.patch @@ -0,0 +1,956 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:56 -0600 +Subject: [PATCH 30/48] tracing: Add variable reference handling to hist + triggers +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Add the necessary infrastructure to allow the variables defined on one +event to be referenced in another. This allows variables set by a +previous event to be referenced and used in expressions combining the +variable values saved by that previous event and the event fields of +the current event. For example, here's how a latency can be +calculated and saved into yet another variable named 'wakeup_lat': + + # echo 'hist:keys=pid,prio:ts0=common_timestamp ... + # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp-$ts0 ... + +In the first event, the event's timetamp is saved into the variable +ts0. In the next line, ts0 is subtracted from the second event's +timestamp to produce the latency. + +Further users of variable references will be described in subsequent +patches, such as for instance how the 'wakeup_lat' variable above can +be displayed in a latency histogram. + +Link: http://lkml.kernel.org/r/b1d3e6975374e34d501ff417c20189c3f9b2c7b8.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 434c1d5831194e72e6eb30d46534d75b5a985eb7) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace.c | 2 + kernel/trace/trace.h | 3 + kernel/trace/trace_events_hist.c | 661 +++++++++++++++++++++++++++++++++++- + kernel/trace/trace_events_trigger.c | 6 + 4 files changed, 656 insertions(+), 16 deletions(-) + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -7783,6 +7783,7 @@ static int instance_mkdir(const char *na + + INIT_LIST_HEAD(&tr->systems); + INIT_LIST_HEAD(&tr->events); ++ INIT_LIST_HEAD(&tr->hist_vars); + + if (allocate_trace_buffers(tr, trace_buf_size) < 0) + goto out_free_tr; +@@ -8533,6 +8534,7 @@ ssize_t trace_parse_run_command(struct f + + INIT_LIST_HEAD(&global_trace.systems); + INIT_LIST_HEAD(&global_trace.events); ++ INIT_LIST_HEAD(&global_trace.hist_vars); + list_add(&global_trace.list, &ftrace_trace_arrays); + + apply_trace_boot_options(); +--- a/kernel/trace/trace.h ++++ b/kernel/trace/trace.h +@@ -274,6 +274,7 @@ struct trace_array { + int function_enabled; + #endif + int time_stamp_abs_ref; ++ struct list_head hist_vars; + }; + + enum { +@@ -1548,6 +1549,8 @@ extern void pause_named_trigger(struct e + extern void unpause_named_trigger(struct event_trigger_data *data); + extern void set_named_trigger_data(struct event_trigger_data *data, + struct event_trigger_data *named_data); ++extern struct event_trigger_data * ++get_named_trigger_data(struct event_trigger_data *data); + extern int register_event_command(struct event_command *cmd); + extern int unregister_event_command(struct event_command *cmd); + extern int register_trigger_hist_enable_disable_cmds(void); +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -59,7 +59,12 @@ struct hist_field { + struct hist_trigger_data *hist_data; + struct hist_var var; + enum field_op_id operator; ++ char *system; ++ char *event_name; + char *name; ++ unsigned int var_idx; ++ unsigned int var_ref_idx; ++ bool read_once; + }; + + static u64 hist_field_none(struct hist_field *field, +@@ -214,6 +219,7 @@ enum hist_field_flags { + HIST_FIELD_FL_TIMESTAMP_USECS = 1 << 11, + HIST_FIELD_FL_VAR = 1 << 12, + HIST_FIELD_FL_EXPR = 1 << 13, ++ HIST_FIELD_FL_VAR_REF = 1 << 14, + }; + + struct var_defs { +@@ -253,6 +259,8 @@ struct hist_trigger_data { + struct tracing_map *map; + bool enable_timestamps; + bool remove; ++ struct hist_field *var_refs[TRACING_MAP_VARS_MAX]; ++ unsigned int n_var_refs; + }; + + static u64 hist_field_timestamp(struct hist_field *hist_field, +@@ -271,6 +279,214 @@ static u64 hist_field_timestamp(struct h + return ts; + } + ++struct hist_var_data { ++ struct list_head list; ++ struct hist_trigger_data *hist_data; ++}; ++ ++static struct hist_field * ++check_field_for_var_ref(struct hist_field *hist_field, ++ struct hist_trigger_data *var_data, ++ unsigned int var_idx) ++{ ++ struct hist_field *found = NULL; ++ ++ if (hist_field && hist_field->flags & HIST_FIELD_FL_VAR_REF) { ++ if (hist_field->var.idx == var_idx && ++ hist_field->var.hist_data == var_data) { ++ found = hist_field; ++ } ++ } ++ ++ return found; ++} ++ ++static struct hist_field * ++check_field_for_var_refs(struct hist_trigger_data *hist_data, ++ struct hist_field *hist_field, ++ struct hist_trigger_data *var_data, ++ unsigned int var_idx, ++ unsigned int level) ++{ ++ struct hist_field *found = NULL; ++ unsigned int i; ++ ++ if (level > 3) ++ return found; ++ ++ if (!hist_field) ++ return found; ++ ++ found = check_field_for_var_ref(hist_field, var_data, var_idx); ++ if (found) ++ return found; ++ ++ for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) { ++ struct hist_field *operand; ++ ++ operand = hist_field->operands[i]; ++ found = check_field_for_var_refs(hist_data, operand, var_data, ++ var_idx, level + 1); ++ if (found) ++ return found; ++ } ++ ++ return found; ++} ++ ++static struct hist_field *find_var_ref(struct hist_trigger_data *hist_data, ++ struct hist_trigger_data *var_data, ++ unsigned int var_idx) ++{ ++ struct hist_field *hist_field, *found = NULL; ++ unsigned int i; ++ ++ for_each_hist_field(i, hist_data) { ++ hist_field = hist_data->fields[i]; ++ found = check_field_for_var_refs(hist_data, hist_field, ++ var_data, var_idx, 0); ++ if (found) ++ return found; ++ } ++ ++ return found; ++} ++ ++static struct hist_field *find_any_var_ref(struct hist_trigger_data *hist_data, ++ unsigned int var_idx) ++{ ++ struct trace_array *tr = hist_data->event_file->tr; ++ struct hist_field *found = NULL; ++ struct hist_var_data *var_data; ++ ++ list_for_each_entry(var_data, &tr->hist_vars, list) { ++ if (var_data->hist_data == hist_data) ++ continue; ++ found = find_var_ref(var_data->hist_data, hist_data, var_idx); ++ if (found) ++ break; ++ } ++ ++ return found; ++} ++ ++static bool check_var_refs(struct hist_trigger_data *hist_data) ++{ ++ struct hist_field *field; ++ bool found = false; ++ int i; ++ ++ for_each_hist_field(i, hist_data) { ++ field = hist_data->fields[i]; ++ if (field && field->flags & HIST_FIELD_FL_VAR) { ++ if (find_any_var_ref(hist_data, field->var.idx)) { ++ found = true; ++ break; ++ } ++ } ++ } ++ ++ return found; ++} ++ ++static struct hist_var_data *find_hist_vars(struct hist_trigger_data *hist_data) ++{ ++ struct trace_array *tr = hist_data->event_file->tr; ++ struct hist_var_data *var_data, *found = NULL; ++ ++ list_for_each_entry(var_data, &tr->hist_vars, list) { ++ if (var_data->hist_data == hist_data) { ++ found = var_data; ++ break; ++ } ++ } ++ ++ return found; ++} ++ ++static bool field_has_hist_vars(struct hist_field *hist_field, ++ unsigned int level) ++{ ++ int i; ++ ++ if (level > 3) ++ return false; ++ ++ if (!hist_field) ++ return false; ++ ++ if (hist_field->flags & HIST_FIELD_FL_VAR || ++ hist_field->flags & HIST_FIELD_FL_VAR_REF) ++ return true; ++ ++ for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) { ++ struct hist_field *operand; ++ ++ operand = hist_field->operands[i]; ++ if (field_has_hist_vars(operand, level + 1)) ++ return true; ++ } ++ ++ return false; ++} ++ ++static bool has_hist_vars(struct hist_trigger_data *hist_data) ++{ ++ struct hist_field *hist_field; ++ int i; ++ ++ for_each_hist_field(i, hist_data) { ++ hist_field = hist_data->fields[i]; ++ if (field_has_hist_vars(hist_field, 0)) ++ return true; ++ } ++ ++ return false; ++} ++ ++static int save_hist_vars(struct hist_trigger_data *hist_data) ++{ ++ struct trace_array *tr = hist_data->event_file->tr; ++ struct hist_var_data *var_data; ++ ++ var_data = find_hist_vars(hist_data); ++ if (var_data) ++ return 0; ++ ++ if (trace_array_get(tr) < 0) ++ return -ENODEV; ++ ++ var_data = kzalloc(sizeof(*var_data), GFP_KERNEL); ++ if (!var_data) { ++ trace_array_put(tr); ++ return -ENOMEM; ++ } ++ ++ var_data->hist_data = hist_data; ++ list_add(&var_data->list, &tr->hist_vars); ++ ++ return 0; ++} ++ ++static void remove_hist_vars(struct hist_trigger_data *hist_data) ++{ ++ struct trace_array *tr = hist_data->event_file->tr; ++ struct hist_var_data *var_data; ++ ++ var_data = find_hist_vars(hist_data); ++ if (!var_data) ++ return; ++ ++ if (WARN_ON(check_var_refs(hist_data))) ++ return; ++ ++ list_del(&var_data->list); ++ ++ kfree(var_data); ++ ++ trace_array_put(tr); ++} ++ + static struct hist_field *find_var_field(struct hist_trigger_data *hist_data, + const char *var_name) + { +@@ -313,10 +529,137 @@ static struct hist_field *find_var(struc + return NULL; + } + ++static struct trace_event_file *find_var_file(struct trace_array *tr, ++ char *system, ++ char *event_name, ++ char *var_name) ++{ ++ struct hist_trigger_data *var_hist_data; ++ struct hist_var_data *var_data; ++ struct trace_event_file *file, *found = NULL; ++ ++ if (system) ++ return find_event_file(tr, system, event_name); ++ ++ list_for_each_entry(var_data, &tr->hist_vars, list) { ++ var_hist_data = var_data->hist_data; ++ file = var_hist_data->event_file; ++ if (file == found) ++ continue; ++ ++ if (find_var_field(var_hist_data, var_name)) { ++ if (found) ++ return NULL; ++ ++ found = file; ++ } ++ } ++ ++ return found; ++} ++ ++static struct hist_field *find_file_var(struct trace_event_file *file, ++ const char *var_name) ++{ ++ struct hist_trigger_data *test_data; ++ struct event_trigger_data *test; ++ struct hist_field *hist_field; ++ ++ list_for_each_entry_rcu(test, &file->triggers, list) { ++ if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { ++ test_data = test->private_data; ++ hist_field = find_var_field(test_data, var_name); ++ if (hist_field) ++ return hist_field; ++ } ++ } ++ ++ return NULL; ++} ++ ++static struct hist_field *find_event_var(struct hist_trigger_data *hist_data, ++ char *system, ++ char *event_name, ++ char *var_name) ++{ ++ struct trace_array *tr = hist_data->event_file->tr; ++ struct hist_field *hist_field = NULL; ++ struct trace_event_file *file; ++ ++ file = find_var_file(tr, system, event_name, var_name); ++ if (!file) ++ return NULL; ++ ++ hist_field = find_file_var(file, var_name); ++ ++ return hist_field; ++} ++ + struct hist_elt_data { + char *comm; ++ u64 *var_ref_vals; + }; + ++static u64 hist_field_var_ref(struct hist_field *hist_field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) ++{ ++ struct hist_elt_data *elt_data; ++ u64 var_val = 0; ++ ++ elt_data = elt->private_data; ++ var_val = elt_data->var_ref_vals[hist_field->var_ref_idx]; ++ ++ return var_val; ++} ++ ++static bool resolve_var_refs(struct hist_trigger_data *hist_data, void *key, ++ u64 *var_ref_vals, bool self) ++{ ++ struct hist_trigger_data *var_data; ++ struct tracing_map_elt *var_elt; ++ struct hist_field *hist_field; ++ unsigned int i, var_idx; ++ bool resolved = true; ++ u64 var_val = 0; ++ ++ for (i = 0; i < hist_data->n_var_refs; i++) { ++ hist_field = hist_data->var_refs[i]; ++ var_idx = hist_field->var.idx; ++ var_data = hist_field->var.hist_data; ++ ++ if (var_data == NULL) { ++ resolved = false; ++ break; ++ } ++ ++ if ((self && var_data != hist_data) || ++ (!self && var_data == hist_data)) ++ continue; ++ ++ var_elt = tracing_map_lookup(var_data->map, key); ++ if (!var_elt) { ++ resolved = false; ++ break; ++ } ++ ++ if (!tracing_map_var_set(var_elt, var_idx)) { ++ resolved = false; ++ break; ++ } ++ ++ if (self || !hist_field->read_once) ++ var_val = tracing_map_read_var(var_elt, var_idx); ++ else ++ var_val = tracing_map_read_var_once(var_elt, var_idx); ++ ++ var_ref_vals[i] = var_val; ++ } ++ ++ return resolved; ++} ++ + static const char *hist_field_name(struct hist_field *field, + unsigned int level) + { +@@ -331,8 +674,20 @@ static const char *hist_field_name(struc + field_name = hist_field_name(field->operands[0], ++level); + else if (field->flags & HIST_FIELD_FL_TIMESTAMP) + field_name = "common_timestamp"; +- else if (field->flags & HIST_FIELD_FL_EXPR) +- field_name = field->name; ++ else if (field->flags & HIST_FIELD_FL_EXPR || ++ field->flags & HIST_FIELD_FL_VAR_REF) { ++ if (field->system) { ++ static char full_name[MAX_FILTER_STR_VAL]; ++ ++ strcat(full_name, field->system); ++ strcat(full_name, "."); ++ strcat(full_name, field->event_name); ++ strcat(full_name, "."); ++ strcat(full_name, field->name); ++ field_name = full_name; ++ } else ++ field_name = field->name; ++ } + + if (field_name == NULL) + field_name = ""; +@@ -612,6 +967,9 @@ static const char *get_hist_field_flags( + + static void expr_field_str(struct hist_field *field, char *expr) + { ++ if (field->flags & HIST_FIELD_FL_VAR_REF) ++ strcat(expr, "$"); ++ + strcat(expr, hist_field_name(field, 0)); + + if (field->flags) { +@@ -742,6 +1100,11 @@ static struct hist_field *create_hist_fi + if (flags & HIST_FIELD_FL_EXPR) + goto out; /* caller will populate */ + ++ if (flags & HIST_FIELD_FL_VAR_REF) { ++ hist_field->fn = hist_field_var_ref; ++ goto out; ++ } ++ + if (flags & HIST_FIELD_FL_HITCOUNT) { + hist_field->fn = hist_field_counter; + hist_field->size = sizeof(u64); +@@ -835,6 +1198,144 @@ static void destroy_hist_fields(struct h + } + } + ++static int init_var_ref(struct hist_field *ref_field, ++ struct hist_field *var_field, ++ char *system, char *event_name) ++{ ++ int err = 0; ++ ++ ref_field->var.idx = var_field->var.idx; ++ ref_field->var.hist_data = var_field->hist_data; ++ ref_field->size = var_field->size; ++ ref_field->is_signed = var_field->is_signed; ++ ref_field->flags |= var_field->flags & ++ (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); ++ ++ if (system) { ++ ref_field->system = kstrdup(system, GFP_KERNEL); ++ if (!ref_field->system) ++ return -ENOMEM; ++ } ++ ++ if (event_name) { ++ ref_field->event_name = kstrdup(event_name, GFP_KERNEL); ++ if (!ref_field->event_name) { ++ err = -ENOMEM; ++ goto free; ++ } ++ } ++ ++ ref_field->name = kstrdup(var_field->var.name, GFP_KERNEL); ++ if (!ref_field->name) { ++ err = -ENOMEM; ++ goto free; ++ } ++ ++ ref_field->type = kstrdup(var_field->type, GFP_KERNEL); ++ if (!ref_field->type) { ++ err = -ENOMEM; ++ goto free; ++ } ++ out: ++ return err; ++ free: ++ kfree(ref_field->system); ++ kfree(ref_field->event_name); ++ kfree(ref_field->name); ++ ++ goto out; ++} ++ ++static struct hist_field *create_var_ref(struct hist_field *var_field, ++ char *system, char *event_name) ++{ ++ unsigned long flags = HIST_FIELD_FL_VAR_REF; ++ struct hist_field *ref_field; ++ ++ ref_field = create_hist_field(var_field->hist_data, NULL, flags, NULL); ++ if (ref_field) { ++ if (init_var_ref(ref_field, var_field, system, event_name)) { ++ destroy_hist_field(ref_field, 0); ++ return NULL; ++ } ++ } ++ ++ return ref_field; ++} ++ ++static bool is_var_ref(char *var_name) ++{ ++ if (!var_name || strlen(var_name) < 2 || var_name[0] != '$') ++ return false; ++ ++ return true; ++} ++ ++static char *field_name_from_var(struct hist_trigger_data *hist_data, ++ char *var_name) ++{ ++ char *name, *field; ++ unsigned int i; ++ ++ for (i = 0; i < hist_data->attrs->var_defs.n_vars; i++) { ++ name = hist_data->attrs->var_defs.name[i]; ++ ++ if (strcmp(var_name, name) == 0) { ++ field = hist_data->attrs->var_defs.expr[i]; ++ if (contains_operator(field) || is_var_ref(field)) ++ continue; ++ return field; ++ } ++ } ++ ++ return NULL; ++} ++ ++static char *local_field_var_ref(struct hist_trigger_data *hist_data, ++ char *system, char *event_name, ++ char *var_name) ++{ ++ struct trace_event_call *call; ++ ++ if (system && event_name) { ++ call = hist_data->event_file->event_call; ++ ++ if (strcmp(system, call->class->system) != 0) ++ return NULL; ++ ++ if (strcmp(event_name, trace_event_name(call)) != 0) ++ return NULL; ++ } ++ ++ if (!!system != !!event_name) ++ return NULL; ++ ++ if (!is_var_ref(var_name)) ++ return NULL; ++ ++ var_name++; ++ ++ return field_name_from_var(hist_data, var_name); ++} ++ ++static struct hist_field *parse_var_ref(struct hist_trigger_data *hist_data, ++ char *system, char *event_name, ++ char *var_name) ++{ ++ struct hist_field *var_field = NULL, *ref_field = NULL; ++ ++ if (!is_var_ref(var_name)) ++ return NULL; ++ ++ var_name++; ++ ++ var_field = find_event_var(hist_data, system, event_name, var_name); ++ if (var_field) ++ ref_field = create_var_ref(var_field, system, event_name); ++ ++ return ref_field; ++} ++ + static struct ftrace_event_field * + parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, + char *field_str, unsigned long *flags) +@@ -891,10 +1392,40 @@ static struct hist_field *parse_atom(str + struct trace_event_file *file, char *str, + unsigned long *flags, char *var_name) + { ++ char *s, *ref_system = NULL, *ref_event = NULL, *ref_var = str; + struct ftrace_event_field *field = NULL; + struct hist_field *hist_field = NULL; + int ret = 0; + ++ s = strchr(str, '.'); ++ if (s) { ++ s = strchr(++s, '.'); ++ if (s) { ++ ref_system = strsep(&str, "."); ++ if (!str) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ref_event = strsep(&str, "."); ++ if (!str) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ref_var = str; ++ } ++ } ++ ++ s = local_field_var_ref(hist_data, ref_system, ref_event, ref_var); ++ if (!s) { ++ hist_field = parse_var_ref(hist_data, ref_system, ref_event, ref_var); ++ if (hist_field) { ++ hist_data->var_refs[hist_data->n_var_refs] = hist_field; ++ hist_field->var_ref_idx = hist_data->n_var_refs++; ++ return hist_field; ++ } ++ } else ++ str = s; ++ + field = parse_field(hist_data, file, str, flags); + if (IS_ERR(field)) { + ret = PTR_ERR(field); +@@ -1066,6 +1597,9 @@ static struct hist_field *parse_expr(str + goto free; + } + ++ operand1->read_once = true; ++ operand2->read_once = true; ++ + expr->operands[0] = operand1; + expr->operands[1] = operand2; + expr->operator = field_op; +@@ -1238,6 +1772,12 @@ static int create_key_field(struct hist_ + goto out; + } + ++ if (hist_field->flags & HIST_FIELD_FL_VAR_REF) { ++ destroy_hist_field(hist_field, 0); ++ ret = -EINVAL; ++ goto out; ++ } ++ + key_size = hist_field->size; + } + +@@ -1576,6 +2116,7 @@ create_hist_data(unsigned int map_bits, + + hist_data->attrs = attrs; + hist_data->remove = remove; ++ hist_data->event_file = file; + + ret = create_hist_fields(hist_data, file); + if (ret) +@@ -1598,12 +2139,6 @@ create_hist_data(unsigned int map_bits, + ret = create_tracing_map_fields(hist_data); + if (ret) + goto free; +- +- ret = tracing_map_init(hist_data->map); +- if (ret) +- goto free; +- +- hist_data->event_file = file; + out: + return hist_data; + free: +@@ -1618,12 +2153,17 @@ create_hist_data(unsigned int map_bits, + + static void hist_trigger_elt_update(struct hist_trigger_data *hist_data, + struct tracing_map_elt *elt, void *rec, +- struct ring_buffer_event *rbe) ++ struct ring_buffer_event *rbe, ++ u64 *var_ref_vals) + { ++ struct hist_elt_data *elt_data; + struct hist_field *hist_field; + unsigned int i, var_idx; + u64 hist_val; + ++ elt_data = elt->private_data; ++ elt_data->var_ref_vals = var_ref_vals; ++ + for_each_hist_val_field(i, hist_data) { + hist_field = hist_data->fields[i]; + hist_val = hist_field->fn(hist_field, elt, rbe, rec); +@@ -1675,6 +2215,7 @@ static void event_hist_trigger(struct ev + struct hist_trigger_data *hist_data = data->private_data; + bool use_compound_key = (hist_data->n_keys > 1); + unsigned long entries[HIST_STACKTRACE_DEPTH]; ++ u64 var_ref_vals[TRACING_MAP_VARS_MAX]; + char compound_key[HIST_KEY_SIZE_MAX]; + struct tracing_map_elt *elt = NULL; + struct stack_trace stacktrace; +@@ -1714,9 +2255,15 @@ static void event_hist_trigger(struct ev + if (use_compound_key) + key = compound_key; + ++ if (hist_data->n_var_refs && ++ !resolve_var_refs(hist_data, key, var_ref_vals, false)) ++ return; ++ + elt = tracing_map_insert(hist_data->map, key); +- if (elt) +- hist_trigger_elt_update(hist_data, elt, rec, rbe); ++ if (!elt) ++ return; ++ ++ hist_trigger_elt_update(hist_data, elt, rec, rbe, var_ref_vals); + } + + static void hist_trigger_stacktrace_print(struct seq_file *m, +@@ -1931,8 +2478,11 @@ static void hist_field_print(struct seq_ + + if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP) + seq_puts(m, "common_timestamp"); +- else if (field_name) ++ else if (field_name) { ++ if (hist_field->flags & HIST_FIELD_FL_VAR_REF) ++ seq_putc(m, '$'); + seq_printf(m, "%s", field_name); ++ } + + if (hist_field->flags) { + const char *flags_str = get_hist_field_flags(hist_field); +@@ -2072,7 +2622,11 @@ static void event_hist_trigger_free(stru + if (!data->ref) { + if (data->name) + del_named_trigger(data); ++ + trigger_data_free(data); ++ ++ remove_hist_vars(hist_data); ++ + destroy_hist_data(hist_data); + } + } +@@ -2285,23 +2839,55 @@ static int hist_register_trigger(char *g + goto out; + } + +- list_add_rcu(&data->list, &file->triggers); + ret++; + +- update_cond_flag(file); +- + if (hist_data->enable_timestamps) + tracing_set_time_stamp_abs(file->tr, true); ++ out: ++ return ret; ++} ++ ++static int hist_trigger_enable(struct event_trigger_data *data, ++ struct trace_event_file *file) ++{ ++ int ret = 0; ++ ++ list_add_tail_rcu(&data->list, &file->triggers); ++ ++ update_cond_flag(file); + + if (trace_event_trigger_enable_disable(file, 1) < 0) { + list_del_rcu(&data->list); + update_cond_flag(file); + ret--; + } +- out: ++ + return ret; + } + ++static bool hist_trigger_check_refs(struct event_trigger_data *data, ++ struct trace_event_file *file) ++{ ++ struct hist_trigger_data *hist_data = data->private_data; ++ struct event_trigger_data *test, *named_data = NULL; ++ ++ if (hist_data->attrs->name) ++ named_data = find_named_trigger(hist_data->attrs->name); ++ ++ list_for_each_entry_rcu(test, &file->triggers, list) { ++ if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { ++ if (!hist_trigger_match(data, test, named_data, false)) ++ continue; ++ hist_data = test->private_data; ++ if (check_var_refs(hist_data)) ++ return true; ++ break; ++ } ++ } ++ ++ return false; ++} ++ + static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops, + struct event_trigger_data *data, + struct trace_event_file *file) +@@ -2334,11 +2920,30 @@ static void hist_unregister_trigger(char + } + } + ++static bool hist_file_check_refs(struct trace_event_file *file) ++{ ++ struct hist_trigger_data *hist_data; ++ struct event_trigger_data *test; ++ ++ list_for_each_entry_rcu(test, &file->triggers, list) { ++ if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { ++ hist_data = test->private_data; ++ if (check_var_refs(hist_data)) ++ return true; ++ } ++ } ++ ++ return false; ++} ++ + static void hist_unreg_all(struct trace_event_file *file) + { + struct event_trigger_data *test, *n; + struct hist_trigger_data *hist_data; + ++ if (hist_file_check_refs(file)) ++ return; ++ + list_for_each_entry_safe(test, n, &file->triggers, list) { + if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { + hist_data = test->private_data; +@@ -2414,6 +3019,11 @@ static int event_hist_trigger_func(struc + } + + if (remove) { ++ if (hist_trigger_check_refs(trigger_data, file)) { ++ ret = -EBUSY; ++ goto out_free; ++ } ++ + cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); + ret = 0; + goto out_free; +@@ -2431,14 +3041,33 @@ static int event_hist_trigger_func(struc + goto out_free; + } else if (ret < 0) + goto out_free; ++ ++ if (get_named_trigger_data(trigger_data)) ++ goto enable; ++ ++ if (has_hist_vars(hist_data)) ++ save_hist_vars(hist_data); ++ ++ ret = tracing_map_init(hist_data->map); ++ if (ret) ++ goto out_unreg; ++enable: ++ ret = hist_trigger_enable(trigger_data, file); ++ if (ret) ++ goto out_unreg; ++ + /* Just return zero, not the number of registered triggers */ + ret = 0; + out: + return ret; ++ out_unreg: ++ cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); + out_free: + if (cmd_ops->set_filter) + cmd_ops->set_filter(NULL, trigger_data, NULL); + ++ remove_hist_vars(hist_data); ++ + kfree(trigger_data); + + destroy_hist_data(hist_data); +--- a/kernel/trace/trace_events_trigger.c ++++ b/kernel/trace/trace_events_trigger.c +@@ -909,6 +909,12 @@ void set_named_trigger_data(struct event + data->named_data = named_data; + } + ++struct event_trigger_data * ++get_named_trigger_data(struct event_trigger_data *data) ++{ ++ return data->named_data; ++} ++ + static void + traceon_trigger(struct event_trigger_data *data, void *rec, + struct ring_buffer_event *event) diff --git a/debian/patches/features/all/rt/0031-tracing-Add-hist-trigger-action-hook.patch b/debian/patches/features/all/rt/0031-tracing-Add-hist-trigger-action-hook.patch new file mode 100644 index 000000000..bf92ce2cf --- /dev/null +++ b/debian/patches/features/all/rt/0031-tracing-Add-hist-trigger-action-hook.patch @@ -0,0 +1,216 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:57 -0600 +Subject: [PATCH 31/48] tracing: Add hist trigger action hook +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Add a hook for executing extra actions whenever a histogram entry is +added or updated. + +The default 'action' when a hist entry is added to a histogram is to +update the set of values associated with it. Some applications may +want to perform additional actions at that point, such as generate +another event, or compare and save a maximum. + +Add a simple framework for doing that; specific actions will be +implemented on top of it in later patches. + +Link: http://lkml.kernel.org/r/9482ba6a3eaf5ca6e60954314beacd0e25c05b24.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit b91ae245c2f781e6da0532d8545f51a0f1291cc0) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 106 ++++++++++++++++++++++++++++++++++++++- + 1 file changed, 104 insertions(+), 2 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -33,6 +33,7 @@ typedef u64 (*hist_field_fn_t) (struct h + + #define HIST_FIELD_OPERANDS_MAX 2 + #define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX) ++#define HIST_ACTIONS_MAX 8 + + enum field_op_id { + FIELD_OP_NONE, +@@ -242,6 +243,9 @@ struct hist_trigger_attrs { + char *assignment_str[TRACING_MAP_VARS_MAX]; + unsigned int n_assignments; + ++ char *action_str[HIST_ACTIONS_MAX]; ++ unsigned int n_actions; ++ + struct var_defs var_defs; + }; + +@@ -261,6 +265,21 @@ struct hist_trigger_data { + bool remove; + struct hist_field *var_refs[TRACING_MAP_VARS_MAX]; + unsigned int n_var_refs; ++ ++ struct action_data *actions[HIST_ACTIONS_MAX]; ++ unsigned int n_actions; ++}; ++ ++struct action_data; ++ ++typedef void (*action_fn_t) (struct hist_trigger_data *hist_data, ++ struct tracing_map_elt *elt, void *rec, ++ struct ring_buffer_event *rbe, ++ struct action_data *data, u64 *var_ref_vals); ++ ++struct action_data { ++ action_fn_t fn; ++ unsigned int var_ref_idx; + }; + + static u64 hist_field_timestamp(struct hist_field *hist_field, +@@ -764,6 +783,9 @@ static void destroy_hist_trigger_attrs(s + for (i = 0; i < attrs->n_assignments; i++) + kfree(attrs->assignment_str[i]); + ++ for (i = 0; i < attrs->n_actions; i++) ++ kfree(attrs->action_str[i]); ++ + kfree(attrs->name); + kfree(attrs->sort_key_str); + kfree(attrs->keys_str); +@@ -771,6 +793,16 @@ static void destroy_hist_trigger_attrs(s + kfree(attrs); + } + ++static int parse_action(char *str, struct hist_trigger_attrs *attrs) ++{ ++ int ret = 0; ++ ++ if (attrs->n_actions >= HIST_ACTIONS_MAX) ++ return ret; ++ ++ return ret; ++} ++ + static int parse_assignment(char *str, struct hist_trigger_attrs *attrs) + { + int ret = 0; +@@ -854,8 +886,9 @@ static struct hist_trigger_attrs *parse_ + else if (strcmp(str, "clear") == 0) + attrs->clear = true; + else { +- ret = -EINVAL; +- goto free; ++ ret = parse_action(str, attrs); ++ if (ret) ++ goto free; + } + } + +@@ -2047,11 +2080,55 @@ static int create_sort_keys(struct hist_ + return ret; + } + ++static void destroy_actions(struct hist_trigger_data *hist_data) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < hist_data->n_actions; i++) { ++ struct action_data *data = hist_data->actions[i]; ++ ++ kfree(data); ++ } ++} ++ ++static int parse_actions(struct hist_trigger_data *hist_data) ++{ ++ unsigned int i; ++ int ret = 0; ++ char *str; ++ ++ for (i = 0; i < hist_data->attrs->n_actions; i++) { ++ str = hist_data->attrs->action_str[i]; ++ } ++ ++ return ret; ++} ++ ++static int create_actions(struct hist_trigger_data *hist_data, ++ struct trace_event_file *file) ++{ ++ struct action_data *data; ++ unsigned int i; ++ int ret = 0; ++ ++ for (i = 0; i < hist_data->attrs->n_actions; i++) { ++ data = hist_data->actions[i]; ++ } ++ ++ return ret; ++} ++ + static void destroy_hist_data(struct hist_trigger_data *hist_data) + { ++ if (!hist_data) ++ return; ++ + destroy_hist_trigger_attrs(hist_data->attrs); + destroy_hist_fields(hist_data); + tracing_map_destroy(hist_data->map); ++ ++ destroy_actions(hist_data); ++ + kfree(hist_data); + } + +@@ -2118,6 +2195,10 @@ create_hist_data(unsigned int map_bits, + hist_data->remove = remove; + hist_data->event_file = file; + ++ ret = parse_actions(hist_data); ++ if (ret) ++ goto free; ++ + ret = create_hist_fields(hist_data, file); + if (ret) + goto free; +@@ -2209,6 +2290,20 @@ static inline void add_to_key(char *comp + memcpy(compound_key + key_field->offset, key, size); + } + ++static void ++hist_trigger_actions(struct hist_trigger_data *hist_data, ++ struct tracing_map_elt *elt, void *rec, ++ struct ring_buffer_event *rbe, u64 *var_ref_vals) ++{ ++ struct action_data *data; ++ unsigned int i; ++ ++ for (i = 0; i < hist_data->n_actions; i++) { ++ data = hist_data->actions[i]; ++ data->fn(hist_data, elt, rec, rbe, data, var_ref_vals); ++ } ++} ++ + static void event_hist_trigger(struct event_trigger_data *data, void *rec, + struct ring_buffer_event *rbe) + { +@@ -2264,6 +2359,9 @@ static void event_hist_trigger(struct ev + return; + + hist_trigger_elt_update(hist_data, elt, rec, rbe, var_ref_vals); ++ ++ if (resolve_var_refs(hist_data, key, var_ref_vals, true)) ++ hist_trigger_actions(hist_data, elt, rec, rbe, var_ref_vals); + } + + static void hist_trigger_stacktrace_print(struct seq_file *m, +@@ -3048,6 +3146,10 @@ static int event_hist_trigger_func(struc + if (has_hist_vars(hist_data)) + save_hist_vars(hist_data); + ++ ret = create_actions(hist_data, file); ++ if (ret) ++ goto out_unreg; ++ + ret = tracing_map_init(hist_data->map); + if (ret) + goto out_unreg; diff --git a/debian/patches/features/all/rt/0032-tracing-Add-support-for-synthetic-events.patch b/debian/patches/features/all/rt/0032-tracing-Add-support-for-synthetic-events.patch new file mode 100644 index 000000000..f81d55414 --- /dev/null +++ b/debian/patches/features/all/rt/0032-tracing-Add-support-for-synthetic-events.patch @@ -0,0 +1,1042 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:58 -0600 +Subject: [PATCH 32/48] tracing: Add support for 'synthetic' events +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Synthetic events are user-defined events generated from hist trigger +variables saved from one or more other events. + +To define a synthetic event, the user writes a simple specification +consisting of the name of the new event along with one or more +variables and their type(s), to the tracing/synthetic_events file. + +For instance, the following creates a new event named 'wakeup_latency' +with 3 fields: lat, pid, and prio: + + # echo 'wakeup_latency u64 lat; pid_t pid; int prio' >> \ + /sys/kernel/debug/tracing/synthetic_events + +Reading the tracing/synthetic_events file lists all the +currently-defined synthetic events, in this case the event we defined +above: + + # cat /sys/kernel/debug/tracing/synthetic_events + wakeup_latency u64 lat; pid_t pid; int prio + +At this point, the synthetic event is ready to use, and a histogram +can be defined using it: + + # echo 'hist:keys=pid,prio,lat.log2:sort=pid,lat' >> \ + /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger + +The new event is created under the tracing/events/synthetic/ directory +and looks and behaves just like any other event: + + # ls /sys/kernel/debug/tracing/events/synthetic/wakeup_latency + enable filter format hist id trigger + +Although a histogram can be defined for it, nothing will happen until +an action tracing that event via the trace_synth() function occurs. +The trace_synth() function is very similar to all the other trace_* +invocations spread throughout the kernel, except in this case the +trace_ function and its corresponding tracepoint isn't statically +generated but defined by the user at run-time. + +How this can be automatically hooked up via a hist trigger 'action' is +discussed in a subsequent patch. + +Link: http://lkml.kernel.org/r/c68df2284b7d172669daf9be29db62ad49bbc559.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +[fix noderef.cocci warnings, sizeof pointer for kcalloc of event->fields] +Signed-off-by: Fengguang Wu +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit cc9371f8641efd7ce6c8d4e1fd44eae249deadb4) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 895 ++++++++++++++++++++++++++++++++++++++- + 1 file changed, 893 insertions(+), 2 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -20,10 +20,16 @@ + #include + #include + #include ++#include + + #include "tracing_map.h" + #include "trace.h" + ++#define SYNTH_SYSTEM "synthetic" ++#define SYNTH_FIELDS_MAX 16 ++ ++#define STR_VAR_LEN_MAX 32 /* must be multiple of sizeof(u64) */ ++ + struct hist_field; + + typedef u64 (*hist_field_fn_t) (struct hist_field *field, +@@ -270,6 +276,26 @@ struct hist_trigger_data { + unsigned int n_actions; + }; + ++struct synth_field { ++ char *type; ++ char *name; ++ size_t size; ++ bool is_signed; ++ bool is_string; ++}; ++ ++struct synth_event { ++ struct list_head list; ++ int ref; ++ char *name; ++ struct synth_field **fields; ++ unsigned int n_fields; ++ unsigned int n_u64; ++ struct trace_event_class class; ++ struct trace_event_call call; ++ struct tracepoint *tp; ++}; ++ + struct action_data; + + typedef void (*action_fn_t) (struct hist_trigger_data *hist_data, +@@ -282,6 +308,790 @@ struct action_data { + unsigned int var_ref_idx; + }; + ++static LIST_HEAD(synth_event_list); ++static DEFINE_MUTEX(synth_event_mutex); ++ ++struct synth_trace_event { ++ struct trace_entry ent; ++ u64 fields[]; ++}; ++ ++static int synth_event_define_fields(struct trace_event_call *call) ++{ ++ struct synth_trace_event trace; ++ int offset = offsetof(typeof(trace), fields); ++ struct synth_event *event = call->data; ++ unsigned int i, size, n_u64; ++ char *name, *type; ++ bool is_signed; ++ int ret = 0; ++ ++ for (i = 0, n_u64 = 0; i < event->n_fields; i++) { ++ size = event->fields[i]->size; ++ is_signed = event->fields[i]->is_signed; ++ type = event->fields[i]->type; ++ name = event->fields[i]->name; ++ ret = trace_define_field(call, type, name, offset, size, ++ is_signed, FILTER_OTHER); ++ if (ret) ++ break; ++ ++ if (event->fields[i]->is_string) { ++ offset += STR_VAR_LEN_MAX; ++ n_u64 += STR_VAR_LEN_MAX / sizeof(u64); ++ } else { ++ offset += sizeof(u64); ++ n_u64++; ++ } ++ } ++ ++ event->n_u64 = n_u64; ++ ++ return ret; ++} ++ ++static bool synth_field_signed(char *type) ++{ ++ if (strncmp(type, "u", 1) == 0) ++ return false; ++ ++ return true; ++} ++ ++static int synth_field_is_string(char *type) ++{ ++ if (strstr(type, "char[") != NULL) ++ return true; ++ ++ return false; ++} ++ ++static int synth_field_string_size(char *type) ++{ ++ char buf[4], *end, *start; ++ unsigned int len; ++ int size, err; ++ ++ start = strstr(type, "char["); ++ if (start == NULL) ++ return -EINVAL; ++ start += strlen("char["); ++ ++ end = strchr(type, ']'); ++ if (!end || end < start) ++ return -EINVAL; ++ ++ len = end - start; ++ if (len > 3) ++ return -EINVAL; ++ ++ strncpy(buf, start, len); ++ buf[len] = '\0'; ++ ++ err = kstrtouint(buf, 0, &size); ++ if (err) ++ return err; ++ ++ if (size > STR_VAR_LEN_MAX) ++ return -EINVAL; ++ ++ return size; ++} ++ ++static int synth_field_size(char *type) ++{ ++ int size = 0; ++ ++ if (strcmp(type, "s64") == 0) ++ size = sizeof(s64); ++ else if (strcmp(type, "u64") == 0) ++ size = sizeof(u64); ++ else if (strcmp(type, "s32") == 0) ++ size = sizeof(s32); ++ else if (strcmp(type, "u32") == 0) ++ size = sizeof(u32); ++ else if (strcmp(type, "s16") == 0) ++ size = sizeof(s16); ++ else if (strcmp(type, "u16") == 0) ++ size = sizeof(u16); ++ else if (strcmp(type, "s8") == 0) ++ size = sizeof(s8); ++ else if (strcmp(type, "u8") == 0) ++ size = sizeof(u8); ++ else if (strcmp(type, "char") == 0) ++ size = sizeof(char); ++ else if (strcmp(type, "unsigned char") == 0) ++ size = sizeof(unsigned char); ++ else if (strcmp(type, "int") == 0) ++ size = sizeof(int); ++ else if (strcmp(type, "unsigned int") == 0) ++ size = sizeof(unsigned int); ++ else if (strcmp(type, "long") == 0) ++ size = sizeof(long); ++ else if (strcmp(type, "unsigned long") == 0) ++ size = sizeof(unsigned long); ++ else if (strcmp(type, "pid_t") == 0) ++ size = sizeof(pid_t); ++ else if (synth_field_is_string(type)) ++ size = synth_field_string_size(type); ++ ++ return size; ++} ++ ++static const char *synth_field_fmt(char *type) ++{ ++ const char *fmt = "%llu"; ++ ++ if (strcmp(type, "s64") == 0) ++ fmt = "%lld"; ++ else if (strcmp(type, "u64") == 0) ++ fmt = "%llu"; ++ else if (strcmp(type, "s32") == 0) ++ fmt = "%d"; ++ else if (strcmp(type, "u32") == 0) ++ fmt = "%u"; ++ else if (strcmp(type, "s16") == 0) ++ fmt = "%d"; ++ else if (strcmp(type, "u16") == 0) ++ fmt = "%u"; ++ else if (strcmp(type, "s8") == 0) ++ fmt = "%d"; ++ else if (strcmp(type, "u8") == 0) ++ fmt = "%u"; ++ else if (strcmp(type, "char") == 0) ++ fmt = "%d"; ++ else if (strcmp(type, "unsigned char") == 0) ++ fmt = "%u"; ++ else if (strcmp(type, "int") == 0) ++ fmt = "%d"; ++ else if (strcmp(type, "unsigned int") == 0) ++ fmt = "%u"; ++ else if (strcmp(type, "long") == 0) ++ fmt = "%ld"; ++ else if (strcmp(type, "unsigned long") == 0) ++ fmt = "%lu"; ++ else if (strcmp(type, "pid_t") == 0) ++ fmt = "%d"; ++ else if (synth_field_is_string(type)) ++ fmt = "%s"; ++ ++ return fmt; ++} ++ ++static enum print_line_t print_synth_event(struct trace_iterator *iter, ++ int flags, ++ struct trace_event *event) ++{ ++ struct trace_array *tr = iter->tr; ++ struct trace_seq *s = &iter->seq; ++ struct synth_trace_event *entry; ++ struct synth_event *se; ++ unsigned int i, n_u64; ++ char print_fmt[32]; ++ const char *fmt; ++ ++ entry = (struct synth_trace_event *)iter->ent; ++ se = container_of(event, struct synth_event, call.event); ++ ++ trace_seq_printf(s, "%s: ", se->name); ++ ++ for (i = 0, n_u64 = 0; i < se->n_fields; i++) { ++ if (trace_seq_has_overflowed(s)) ++ goto end; ++ ++ fmt = synth_field_fmt(se->fields[i]->type); ++ ++ /* parameter types */ ++ if (tr->trace_flags & TRACE_ITER_VERBOSE) ++ trace_seq_printf(s, "%s ", fmt); ++ ++ snprintf(print_fmt, sizeof(print_fmt), "%%s=%s%%s", fmt); ++ ++ /* parameter values */ ++ if (se->fields[i]->is_string) { ++ trace_seq_printf(s, print_fmt, se->fields[i]->name, ++ (char *)&entry->fields[n_u64], ++ i == se->n_fields - 1 ? "" : " "); ++ n_u64 += STR_VAR_LEN_MAX / sizeof(u64); ++ } else { ++ trace_seq_printf(s, print_fmt, se->fields[i]->name, ++ entry->fields[n_u64], ++ i == se->n_fields - 1 ? "" : " "); ++ n_u64++; ++ } ++ } ++end: ++ trace_seq_putc(s, '\n'); ++ ++ return trace_handle_return(s); ++} ++ ++static struct trace_event_functions synth_event_funcs = { ++ .trace = print_synth_event ++}; ++ ++static notrace void trace_event_raw_event_synth(void *__data, ++ u64 *var_ref_vals, ++ unsigned int var_ref_idx) ++{ ++ struct trace_event_file *trace_file = __data; ++ struct synth_trace_event *entry; ++ struct trace_event_buffer fbuffer; ++ struct synth_event *event; ++ unsigned int i, n_u64; ++ int fields_size = 0; ++ ++ event = trace_file->event_call->data; ++ ++ if (trace_trigger_soft_disabled(trace_file)) ++ return; ++ ++ fields_size = event->n_u64 * sizeof(u64); ++ ++ entry = trace_event_buffer_reserve(&fbuffer, trace_file, ++ sizeof(*entry) + fields_size); ++ if (!entry) ++ return; ++ ++ for (i = 0, n_u64 = 0; i < event->n_fields; i++) { ++ if (event->fields[i]->is_string) { ++ char *str_val = (char *)(long)var_ref_vals[var_ref_idx + i]; ++ char *str_field = (char *)&entry->fields[n_u64]; ++ ++ strncpy(str_field, str_val, STR_VAR_LEN_MAX); ++ n_u64 += STR_VAR_LEN_MAX / sizeof(u64); ++ } else { ++ entry->fields[n_u64] = var_ref_vals[var_ref_idx + i]; ++ n_u64++; ++ } ++ } ++ ++ trace_event_buffer_commit(&fbuffer); ++} ++ ++static void free_synth_event_print_fmt(struct trace_event_call *call) ++{ ++ if (call) { ++ kfree(call->print_fmt); ++ call->print_fmt = NULL; ++ } ++} ++ ++static int __set_synth_event_print_fmt(struct synth_event *event, ++ char *buf, int len) ++{ ++ const char *fmt; ++ int pos = 0; ++ int i; ++ ++ /* When len=0, we just calculate the needed length */ ++#define LEN_OR_ZERO (len ? len - pos : 0) ++ ++ pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); ++ for (i = 0; i < event->n_fields; i++) { ++ fmt = synth_field_fmt(event->fields[i]->type); ++ pos += snprintf(buf + pos, LEN_OR_ZERO, "%s=%s%s", ++ event->fields[i]->name, fmt, ++ i == event->n_fields - 1 ? "" : ", "); ++ } ++ pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); ++ ++ for (i = 0; i < event->n_fields; i++) { ++ pos += snprintf(buf + pos, LEN_OR_ZERO, ++ ", REC->%s", event->fields[i]->name); ++ } ++ ++#undef LEN_OR_ZERO ++ ++ /* return the length of print_fmt */ ++ return pos; ++} ++ ++static int set_synth_event_print_fmt(struct trace_event_call *call) ++{ ++ struct synth_event *event = call->data; ++ char *print_fmt; ++ int len; ++ ++ /* First: called with 0 length to calculate the needed length */ ++ len = __set_synth_event_print_fmt(event, NULL, 0); ++ ++ print_fmt = kmalloc(len + 1, GFP_KERNEL); ++ if (!print_fmt) ++ return -ENOMEM; ++ ++ /* Second: actually write the @print_fmt */ ++ __set_synth_event_print_fmt(event, print_fmt, len + 1); ++ call->print_fmt = print_fmt; ++ ++ return 0; ++} ++ ++static void free_synth_field(struct synth_field *field) ++{ ++ kfree(field->type); ++ kfree(field->name); ++ kfree(field); ++} ++ ++static struct synth_field *parse_synth_field(char *field_type, ++ char *field_name) ++{ ++ struct synth_field *field; ++ int len, ret = 0; ++ char *array; ++ ++ if (field_type[0] == ';') ++ field_type++; ++ ++ len = strlen(field_name); ++ if (field_name[len - 1] == ';') ++ field_name[len - 1] = '\0'; ++ ++ field = kzalloc(sizeof(*field), GFP_KERNEL); ++ if (!field) ++ return ERR_PTR(-ENOMEM); ++ ++ len = strlen(field_type) + 1; ++ array = strchr(field_name, '['); ++ if (array) ++ len += strlen(array); ++ field->type = kzalloc(len, GFP_KERNEL); ++ if (!field->type) { ++ ret = -ENOMEM; ++ goto free; ++ } ++ strcat(field->type, field_type); ++ if (array) { ++ strcat(field->type, array); ++ *array = '\0'; ++ } ++ ++ field->size = synth_field_size(field->type); ++ if (!field->size) { ++ ret = -EINVAL; ++ goto free; ++ } ++ ++ if (synth_field_is_string(field->type)) ++ field->is_string = true; ++ ++ field->is_signed = synth_field_signed(field->type); ++ ++ field->name = kstrdup(field_name, GFP_KERNEL); ++ if (!field->name) { ++ ret = -ENOMEM; ++ goto free; ++ } ++ out: ++ return field; ++ free: ++ free_synth_field(field); ++ field = ERR_PTR(ret); ++ goto out; ++} ++ ++static void free_synth_tracepoint(struct tracepoint *tp) ++{ ++ if (!tp) ++ return; ++ ++ kfree(tp->name); ++ kfree(tp); ++} ++ ++static struct tracepoint *alloc_synth_tracepoint(char *name) ++{ ++ struct tracepoint *tp; ++ ++ tp = kzalloc(sizeof(*tp), GFP_KERNEL); ++ if (!tp) ++ return ERR_PTR(-ENOMEM); ++ ++ tp->name = kstrdup(name, GFP_KERNEL); ++ if (!tp->name) { ++ kfree(tp); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ return tp; ++} ++ ++typedef void (*synth_probe_func_t) (void *__data, u64 *var_ref_vals, ++ unsigned int var_ref_idx); ++ ++static inline void trace_synth(struct synth_event *event, u64 *var_ref_vals, ++ unsigned int var_ref_idx) ++{ ++ struct tracepoint *tp = event->tp; ++ ++ if (unlikely(atomic_read(&tp->key.enabled) > 0)) { ++ struct tracepoint_func *probe_func_ptr; ++ synth_probe_func_t probe_func; ++ void *__data; ++ ++ if (!(cpu_online(raw_smp_processor_id()))) ++ return; ++ ++ probe_func_ptr = rcu_dereference_sched((tp)->funcs); ++ if (probe_func_ptr) { ++ do { ++ probe_func = probe_func_ptr->func; ++ __data = probe_func_ptr->data; ++ probe_func(__data, var_ref_vals, var_ref_idx); ++ } while ((++probe_func_ptr)->func); ++ } ++ } ++} ++ ++static struct synth_event *find_synth_event(const char *name) ++{ ++ struct synth_event *event; ++ ++ list_for_each_entry(event, &synth_event_list, list) { ++ if (strcmp(event->name, name) == 0) ++ return event; ++ } ++ ++ return NULL; ++} ++ ++static int register_synth_event(struct synth_event *event) ++{ ++ struct trace_event_call *call = &event->call; ++ int ret = 0; ++ ++ event->call.class = &event->class; ++ event->class.system = kstrdup(SYNTH_SYSTEM, GFP_KERNEL); ++ if (!event->class.system) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ event->tp = alloc_synth_tracepoint(event->name); ++ if (IS_ERR(event->tp)) { ++ ret = PTR_ERR(event->tp); ++ event->tp = NULL; ++ goto out; ++ } ++ ++ INIT_LIST_HEAD(&call->class->fields); ++ call->event.funcs = &synth_event_funcs; ++ call->class->define_fields = synth_event_define_fields; ++ ++ ret = register_trace_event(&call->event); ++ if (!ret) { ++ ret = -ENODEV; ++ goto out; ++ } ++ call->flags = TRACE_EVENT_FL_TRACEPOINT; ++ call->class->reg = trace_event_reg; ++ call->class->probe = trace_event_raw_event_synth; ++ call->data = event; ++ call->tp = event->tp; ++ ++ ret = trace_add_event_call(call); ++ if (ret) { ++ pr_warn("Failed to register synthetic event: %s\n", ++ trace_event_name(call)); ++ goto err; ++ } ++ ++ ret = set_synth_event_print_fmt(call); ++ if (ret < 0) { ++ trace_remove_event_call(call); ++ goto err; ++ } ++ out: ++ return ret; ++ err: ++ unregister_trace_event(&call->event); ++ goto out; ++} ++ ++static int unregister_synth_event(struct synth_event *event) ++{ ++ struct trace_event_call *call = &event->call; ++ int ret; ++ ++ ret = trace_remove_event_call(call); ++ ++ return ret; ++} ++ ++static void free_synth_event(struct synth_event *event) ++{ ++ unsigned int i; ++ ++ if (!event) ++ return; ++ ++ for (i = 0; i < event->n_fields; i++) ++ free_synth_field(event->fields[i]); ++ ++ kfree(event->fields); ++ kfree(event->name); ++ kfree(event->class.system); ++ free_synth_tracepoint(event->tp); ++ free_synth_event_print_fmt(&event->call); ++ kfree(event); ++} ++ ++static struct synth_event *alloc_synth_event(char *event_name, int n_fields, ++ struct synth_field **fields) ++{ ++ struct synth_event *event; ++ unsigned int i; ++ ++ event = kzalloc(sizeof(*event), GFP_KERNEL); ++ if (!event) { ++ event = ERR_PTR(-ENOMEM); ++ goto out; ++ } ++ ++ event->name = kstrdup(event_name, GFP_KERNEL); ++ if (!event->name) { ++ kfree(event); ++ event = ERR_PTR(-ENOMEM); ++ goto out; ++ } ++ ++ event->fields = kcalloc(n_fields, sizeof(*event->fields), GFP_KERNEL); ++ if (!event->fields) { ++ free_synth_event(event); ++ event = ERR_PTR(-ENOMEM); ++ goto out; ++ } ++ ++ for (i = 0; i < n_fields; i++) ++ event->fields[i] = fields[i]; ++ ++ event->n_fields = n_fields; ++ out: ++ return event; ++} ++ ++static void add_or_delete_synth_event(struct synth_event *event, int delete) ++{ ++ if (delete) ++ free_synth_event(event); ++ else { ++ mutex_lock(&synth_event_mutex); ++ if (!find_synth_event(event->name)) ++ list_add(&event->list, &synth_event_list); ++ else ++ free_synth_event(event); ++ mutex_unlock(&synth_event_mutex); ++ } ++} ++ ++static int create_synth_event(int argc, char **argv) ++{ ++ struct synth_field *field, *fields[SYNTH_FIELDS_MAX]; ++ struct synth_event *event = NULL; ++ bool delete_event = false; ++ int i, n_fields = 0, ret = 0; ++ char *name; ++ ++ mutex_lock(&synth_event_mutex); ++ ++ /* ++ * Argument syntax: ++ * - Add synthetic event: field[;field] ... ++ * - Remove synthetic event: ! field[;field] ... ++ * where 'field' = type field_name ++ */ ++ if (argc < 1) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ name = argv[0]; ++ if (name[0] == '!') { ++ delete_event = true; ++ name++; ++ } ++ ++ event = find_synth_event(name); ++ if (event) { ++ if (delete_event) { ++ if (event->ref) { ++ event = NULL; ++ ret = -EBUSY; ++ goto out; ++ } ++ list_del(&event->list); ++ goto out; ++ } ++ event = NULL; ++ ret = -EEXIST; ++ goto out; ++ } else if (delete_event) ++ goto out; ++ ++ if (argc < 2) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ for (i = 1; i < argc - 1; i++) { ++ if (strcmp(argv[i], ";") == 0) ++ continue; ++ if (n_fields == SYNTH_FIELDS_MAX) { ++ ret = -EINVAL; ++ goto err; ++ } ++ ++ field = parse_synth_field(argv[i], argv[i + 1]); ++ if (IS_ERR(field)) { ++ ret = PTR_ERR(field); ++ goto err; ++ } ++ fields[n_fields] = field; ++ i++; n_fields++; ++ } ++ ++ if (i < argc) { ++ ret = -EINVAL; ++ goto err; ++ } ++ ++ event = alloc_synth_event(name, n_fields, fields); ++ if (IS_ERR(event)) { ++ ret = PTR_ERR(event); ++ event = NULL; ++ goto err; ++ } ++ out: ++ mutex_unlock(&synth_event_mutex); ++ ++ if (event) { ++ if (delete_event) { ++ ret = unregister_synth_event(event); ++ add_or_delete_synth_event(event, !ret); ++ } else { ++ ret = register_synth_event(event); ++ add_or_delete_synth_event(event, ret); ++ } ++ } ++ ++ return ret; ++ err: ++ mutex_unlock(&synth_event_mutex); ++ ++ for (i = 0; i < n_fields; i++) ++ free_synth_field(fields[i]); ++ free_synth_event(event); ++ ++ return ret; ++} ++ ++static int release_all_synth_events(void) ++{ ++ struct list_head release_events; ++ struct synth_event *event, *e; ++ int ret = 0; ++ ++ INIT_LIST_HEAD(&release_events); ++ ++ mutex_lock(&synth_event_mutex); ++ ++ list_for_each_entry(event, &synth_event_list, list) { ++ if (event->ref) { ++ mutex_unlock(&synth_event_mutex); ++ return -EBUSY; ++ } ++ } ++ ++ list_splice_init(&event->list, &release_events); ++ ++ mutex_unlock(&synth_event_mutex); ++ ++ list_for_each_entry_safe(event, e, &release_events, list) { ++ list_del(&event->list); ++ ++ ret = unregister_synth_event(event); ++ add_or_delete_synth_event(event, !ret); ++ } ++ ++ return ret; ++} ++ ++ ++static void *synth_events_seq_start(struct seq_file *m, loff_t *pos) ++{ ++ mutex_lock(&synth_event_mutex); ++ ++ return seq_list_start(&synth_event_list, *pos); ++} ++ ++static void *synth_events_seq_next(struct seq_file *m, void *v, loff_t *pos) ++{ ++ return seq_list_next(v, &synth_event_list, pos); ++} ++ ++static void synth_events_seq_stop(struct seq_file *m, void *v) ++{ ++ mutex_unlock(&synth_event_mutex); ++} ++ ++static int synth_events_seq_show(struct seq_file *m, void *v) ++{ ++ struct synth_field *field; ++ struct synth_event *event = v; ++ unsigned int i; ++ ++ seq_printf(m, "%s\t", event->name); ++ ++ for (i = 0; i < event->n_fields; i++) { ++ field = event->fields[i]; ++ ++ /* parameter values */ ++ seq_printf(m, "%s %s%s", field->type, field->name, ++ i == event->n_fields - 1 ? "" : "; "); ++ } ++ ++ seq_putc(m, '\n'); ++ ++ return 0; ++} ++ ++static const struct seq_operations synth_events_seq_op = { ++ .start = synth_events_seq_start, ++ .next = synth_events_seq_next, ++ .stop = synth_events_seq_stop, ++ .show = synth_events_seq_show ++}; ++ ++static int synth_events_open(struct inode *inode, struct file *file) ++{ ++ int ret; ++ ++ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { ++ ret = release_all_synth_events(); ++ if (ret < 0) ++ return ret; ++ } ++ ++ return seq_open(file, &synth_events_seq_op); ++} ++ ++static ssize_t synth_events_write(struct file *file, ++ const char __user *buffer, ++ size_t count, loff_t *ppos) ++{ ++ return trace_parse_run_command(file, buffer, count, ppos, ++ create_synth_event); ++} ++ ++static const struct file_operations synth_events_fops = { ++ .open = synth_events_open, ++ .write = synth_events_write, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = seq_release, ++}; ++ + static u64 hist_field_timestamp(struct hist_field *hist_field, + struct tracing_map_elt *elt, + struct ring_buffer_event *rbe, +@@ -2963,6 +3773,28 @@ static int hist_trigger_enable(struct ev + return ret; + } + ++static bool have_hist_trigger_match(struct event_trigger_data *data, ++ struct trace_event_file *file) ++{ ++ struct hist_trigger_data *hist_data = data->private_data; ++ struct event_trigger_data *test, *named_data = NULL; ++ bool match = false; ++ ++ if (hist_data->attrs->name) ++ named_data = find_named_trigger(hist_data->attrs->name); ++ ++ list_for_each_entry_rcu(test, &file->triggers, list) { ++ if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { ++ if (hist_trigger_match(data, test, named_data, false)) { ++ match = true; ++ break; ++ } ++ } ++ } ++ ++ return match; ++} ++ + static bool hist_trigger_check_refs(struct event_trigger_data *data, + struct trace_event_file *file) + { +@@ -3038,6 +3870,8 @@ static void hist_unreg_all(struct trace_ + { + struct event_trigger_data *test, *n; + struct hist_trigger_data *hist_data; ++ struct synth_event *se; ++ const char *se_name; + + if (hist_file_check_refs(file)) + return; +@@ -3047,6 +3881,14 @@ static void hist_unreg_all(struct trace_ + hist_data = test->private_data; + list_del_rcu(&test->list); + trace_event_trigger_enable_disable(file, 0); ++ ++ mutex_lock(&synth_event_mutex); ++ se_name = trace_event_name(file->event_call); ++ se = find_synth_event(se_name); ++ if (se) ++ se->ref--; ++ mutex_unlock(&synth_event_mutex); ++ + update_cond_flag(file); + if (hist_data->enable_timestamps) + tracing_set_time_stamp_abs(file->tr, false); +@@ -3065,6 +3907,8 @@ static int event_hist_trigger_func(struc + struct hist_trigger_attrs *attrs; + struct event_trigger_ops *trigger_ops; + struct hist_trigger_data *hist_data; ++ struct synth_event *se; ++ const char *se_name; + bool remove = false; + char *trigger; + int ret = 0; +@@ -3095,10 +3939,11 @@ static int event_hist_trigger_func(struc + + trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger); + +- ret = -ENOMEM; + trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL); +- if (!trigger_data) ++ if (!trigger_data) { ++ ret = -ENOMEM; + goto out_free; ++ } + + trigger_data->count = -1; + trigger_data->ops = trigger_ops; +@@ -3117,12 +3962,23 @@ static int event_hist_trigger_func(struc + } + + if (remove) { ++ if (!have_hist_trigger_match(trigger_data, file)) ++ goto out_free; ++ + if (hist_trigger_check_refs(trigger_data, file)) { + ret = -EBUSY; + goto out_free; + } + + cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); ++ ++ mutex_lock(&synth_event_mutex); ++ se_name = trace_event_name(file->event_call); ++ se = find_synth_event(se_name); ++ if (se) ++ se->ref--; ++ mutex_unlock(&synth_event_mutex); ++ + ret = 0; + goto out_free; + } +@@ -3158,6 +4014,13 @@ static int event_hist_trigger_func(struc + if (ret) + goto out_unreg; + ++ mutex_lock(&synth_event_mutex); ++ se_name = trace_event_name(file->event_call); ++ se = find_synth_event(se_name); ++ if (se) ++ se->ref++; ++ mutex_unlock(&synth_event_mutex); ++ + /* Just return zero, not the number of registered triggers */ + ret = 0; + out: +@@ -3330,3 +4193,31 @@ static __init void unregister_trigger_hi + + return ret; + } ++ ++static __init int trace_events_hist_init(void) ++{ ++ struct dentry *entry = NULL; ++ struct dentry *d_tracer; ++ int err = 0; ++ ++ d_tracer = tracing_init_dentry(); ++ if (IS_ERR(d_tracer)) { ++ err = PTR_ERR(d_tracer); ++ goto err; ++ } ++ ++ entry = tracefs_create_file("synthetic_events", 0644, d_tracer, ++ NULL, &synth_events_fops); ++ if (!entry) { ++ err = -ENODEV; ++ goto err; ++ } ++ ++ return err; ++ err: ++ pr_warn("Could not create tracefs 'synthetic_events' entry\n"); ++ ++ return err; ++} ++ ++fs_initcall(trace_events_hist_init); diff --git a/debian/patches/features/all/rt/0033-tracing-Add-support-for-field-variables.patch b/debian/patches/features/all/rt/0033-tracing-Add-support-for-field-variables.patch new file mode 100644 index 000000000..c0311153b --- /dev/null +++ b/debian/patches/features/all/rt/0033-tracing-Add-support-for-field-variables.patch @@ -0,0 +1,667 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:51:59 -0600 +Subject: [PATCH 33/48] tracing: Add support for 'field variables' +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Users should be able to directly specify event fields in hist trigger +'actions' rather than being forced to explicitly create a variable for +that purpose. + +Add support allowing fields to be used directly in actions, which +essentially does just that - creates 'invisible' variables for each +bare field specified in an action. If a bare field refers to a field +on another (matching) event, it even creates a special histogram for +the purpose (since variables can't be defined on an existing histogram +after histogram creation). + +Here's a simple example that demonstrates both. Basically the +onmatch() action creates a list of variables corresponding to the +parameters of the synthetic event to be generated, and then uses those +values to generate the event. So for the wakeup_latency synthetic +event 'call' below the first param, $wakeup_lat, is a variable defined +explicitly on sched_switch, where 'next_pid' is just a normal field on +sched_switch, and prio is a normal field on sched_waking. + +Since the mechanism works on variables, those two normal fields just +have 'invisible' variables created internally for them. In the case of +'prio', which is on another event, we actually need to create an +additional hist trigger and define the invisible variable on that, since +once a hist trigger is defined, variables can't be added to it later. + + echo 'wakeup_latency u64 lat; pid_t pid; int prio' >> + /sys/kernel/debug/tracing/synthetic_events + + echo 'hist:keys=pid:ts0=common_timestamp.usecs >> + /sys/kernel/debug/tracing/events/sched/sched_waking/trigger + +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0: + onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,prio) + >> /sys/kernel/debug/tracing/events/sched/sched_switch/trigger + +Link: http://lkml.kernel.org/r/8e8dcdac1ea180ed7a3689e1caeeccede9dc42b3.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 5fcd8c6efab39371cb3ce51b8b391a43e83a94de) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 531 ++++++++++++++++++++++++++++++++++++++- + 1 file changed, 530 insertions(+), 1 deletion(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -255,6 +255,16 @@ struct hist_trigger_attrs { + struct var_defs var_defs; + }; + ++struct field_var { ++ struct hist_field *var; ++ struct hist_field *val; ++}; ++ ++struct field_var_hist { ++ struct hist_trigger_data *hist_data; ++ char *cmd; ++}; ++ + struct hist_trigger_data { + struct hist_field *fields[HIST_FIELDS_MAX]; + unsigned int n_vals; +@@ -274,6 +284,12 @@ struct hist_trigger_data { + + struct action_data *actions[HIST_ACTIONS_MAX]; + unsigned int n_actions; ++ ++ struct field_var *field_vars[SYNTH_FIELDS_MAX]; ++ unsigned int n_field_vars; ++ unsigned int n_field_var_str; ++ struct field_var_hist *field_var_hists[SYNTH_FIELDS_MAX]; ++ unsigned int n_field_var_hists; + }; + + struct synth_field { +@@ -1427,6 +1443,7 @@ static struct hist_field *find_event_var + struct hist_elt_data { + char *comm; + u64 *var_ref_vals; ++ char *field_var_str[SYNTH_FIELDS_MAX]; + }; + + static u64 hist_field_var_ref(struct hist_field *hist_field, +@@ -1731,6 +1748,11 @@ static inline void save_comm(char *comm, + + static void hist_elt_data_free(struct hist_elt_data *elt_data) + { ++ unsigned int i; ++ ++ for (i = 0; i < SYNTH_FIELDS_MAX; i++) ++ kfree(elt_data->field_var_str[i]); ++ + kfree(elt_data->comm); + kfree(elt_data); + } +@@ -1748,7 +1770,7 @@ static int hist_trigger_elt_data_alloc(s + unsigned int size = TASK_COMM_LEN; + struct hist_elt_data *elt_data; + struct hist_field *key_field; +- unsigned int i; ++ unsigned int i, n_str; + + elt_data = kzalloc(sizeof(*elt_data), GFP_KERNEL); + if (!elt_data) +@@ -1767,6 +1789,18 @@ static int hist_trigger_elt_data_alloc(s + } + } + ++ n_str = hist_data->n_field_var_str; ++ ++ size = STR_VAR_LEN_MAX; ++ ++ for (i = 0; i < n_str; i++) { ++ elt_data->field_var_str[i] = kzalloc(size, GFP_KERNEL); ++ if (!elt_data->field_var_str[i]) { ++ hist_elt_data_free(elt_data); ++ return -ENOMEM; ++ } ++ } ++ + elt->private_data = elt_data; + + return 0; +@@ -2473,6 +2507,470 @@ static struct hist_field *parse_expr(str + return ERR_PTR(ret); + } + ++static char *find_trigger_filter(struct hist_trigger_data *hist_data, ++ struct trace_event_file *file) ++{ ++ struct event_trigger_data *test; ++ ++ list_for_each_entry_rcu(test, &file->triggers, list) { ++ if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { ++ if (test->private_data == hist_data) ++ return test->filter_str; ++ } ++ } ++ ++ return NULL; ++} ++ ++static struct event_command trigger_hist_cmd; ++static int event_hist_trigger_func(struct event_command *cmd_ops, ++ struct trace_event_file *file, ++ char *glob, char *cmd, char *param); ++ ++static bool compatible_keys(struct hist_trigger_data *target_hist_data, ++ struct hist_trigger_data *hist_data, ++ unsigned int n_keys) ++{ ++ struct hist_field *target_hist_field, *hist_field; ++ unsigned int n, i, j; ++ ++ if (hist_data->n_fields - hist_data->n_vals != n_keys) ++ return false; ++ ++ i = hist_data->n_vals; ++ j = target_hist_data->n_vals; ++ ++ for (n = 0; n < n_keys; n++) { ++ hist_field = hist_data->fields[i + n]; ++ target_hist_field = target_hist_data->fields[j + n]; ++ ++ if (strcmp(hist_field->type, target_hist_field->type) != 0) ++ return false; ++ if (hist_field->size != target_hist_field->size) ++ return false; ++ if (hist_field->is_signed != target_hist_field->is_signed) ++ return false; ++ } ++ ++ return true; ++} ++ ++static struct hist_trigger_data * ++find_compatible_hist(struct hist_trigger_data *target_hist_data, ++ struct trace_event_file *file) ++{ ++ struct hist_trigger_data *hist_data; ++ struct event_trigger_data *test; ++ unsigned int n_keys; ++ ++ n_keys = target_hist_data->n_fields - target_hist_data->n_vals; ++ ++ list_for_each_entry_rcu(test, &file->triggers, list) { ++ if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { ++ hist_data = test->private_data; ++ ++ if (compatible_keys(target_hist_data, hist_data, n_keys)) ++ return hist_data; ++ } ++ } ++ ++ return NULL; ++} ++ ++static struct trace_event_file *event_file(struct trace_array *tr, ++ char *system, char *event_name) ++{ ++ struct trace_event_file *file; ++ ++ file = find_event_file(tr, system, event_name); ++ if (!file) ++ return ERR_PTR(-EINVAL); ++ ++ return file; ++} ++ ++static struct hist_field * ++find_synthetic_field_var(struct hist_trigger_data *target_hist_data, ++ char *system, char *event_name, char *field_name) ++{ ++ struct hist_field *event_var; ++ char *synthetic_name; ++ ++ synthetic_name = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL); ++ if (!synthetic_name) ++ return ERR_PTR(-ENOMEM); ++ ++ strcpy(synthetic_name, "synthetic_"); ++ strcat(synthetic_name, field_name); ++ ++ event_var = find_event_var(target_hist_data, system, event_name, synthetic_name); ++ ++ kfree(synthetic_name); ++ ++ return event_var; ++} ++ ++/** ++ * create_field_var_hist - Automatically create a histogram and var for a field ++ * @target_hist_data: The target hist trigger ++ * @subsys_name: Optional subsystem name ++ * @event_name: Optional event name ++ * @field_name: The name of the field (and the resulting variable) ++ * ++ * Hist trigger actions fetch data from variables, not directly from ++ * events. However, for convenience, users are allowed to directly ++ * specify an event field in an action, which will be automatically ++ * converted into a variable on their behalf. ++ ++ * If a user specifies a field on an event that isn't the event the ++ * histogram currently being defined (the target event histogram), the ++ * only way that can be accomplished is if a new hist trigger is ++ * created and the field variable defined on that. ++ * ++ * This function creates a new histogram compatible with the target ++ * event (meaning a histogram with the same key as the target ++ * histogram), and creates a variable for the specified field, but ++ * with 'synthetic_' prepended to the variable name in order to avoid ++ * collision with normal field variables. ++ * ++ * Return: The variable created for the field. ++ */ ++struct hist_field * ++create_field_var_hist(struct hist_trigger_data *target_hist_data, ++ char *subsys_name, char *event_name, char *field_name) ++{ ++ struct trace_array *tr = target_hist_data->event_file->tr; ++ struct hist_field *event_var = ERR_PTR(-EINVAL); ++ struct hist_trigger_data *hist_data; ++ unsigned int i, n, first = true; ++ struct field_var_hist *var_hist; ++ struct trace_event_file *file; ++ struct hist_field *key_field; ++ char *saved_filter; ++ char *cmd; ++ int ret; ++ ++ if (target_hist_data->n_field_var_hists >= SYNTH_FIELDS_MAX) ++ return ERR_PTR(-EINVAL); ++ ++ file = event_file(tr, subsys_name, event_name); ++ ++ if (IS_ERR(file)) { ++ ret = PTR_ERR(file); ++ return ERR_PTR(ret); ++ } ++ ++ /* ++ * Look for a histogram compatible with target. We'll use the ++ * found histogram specification to create a new matching ++ * histogram with our variable on it. target_hist_data is not ++ * yet a registered histogram so we can't use that. ++ */ ++ hist_data = find_compatible_hist(target_hist_data, file); ++ if (!hist_data) ++ return ERR_PTR(-EINVAL); ++ ++ /* See if a synthetic field variable has already been created */ ++ event_var = find_synthetic_field_var(target_hist_data, subsys_name, ++ event_name, field_name); ++ if (!IS_ERR_OR_NULL(event_var)) ++ return event_var; ++ ++ var_hist = kzalloc(sizeof(*var_hist), GFP_KERNEL); ++ if (!var_hist) ++ return ERR_PTR(-ENOMEM); ++ ++ cmd = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL); ++ if (!cmd) { ++ kfree(var_hist); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ /* Use the same keys as the compatible histogram */ ++ strcat(cmd, "keys="); ++ ++ for_each_hist_key_field(i, hist_data) { ++ key_field = hist_data->fields[i]; ++ if (!first) ++ strcat(cmd, ","); ++ strcat(cmd, key_field->field->name); ++ first = false; ++ } ++ ++ /* Create the synthetic field variable specification */ ++ strcat(cmd, ":synthetic_"); ++ strcat(cmd, field_name); ++ strcat(cmd, "="); ++ strcat(cmd, field_name); ++ ++ /* Use the same filter as the compatible histogram */ ++ saved_filter = find_trigger_filter(hist_data, file); ++ if (saved_filter) { ++ strcat(cmd, " if "); ++ strcat(cmd, saved_filter); ++ } ++ ++ var_hist->cmd = kstrdup(cmd, GFP_KERNEL); ++ if (!var_hist->cmd) { ++ kfree(cmd); ++ kfree(var_hist); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ /* Save the compatible histogram information */ ++ var_hist->hist_data = hist_data; ++ ++ /* Create the new histogram with our variable */ ++ ret = event_hist_trigger_func(&trigger_hist_cmd, file, ++ "", "hist", cmd); ++ if (ret) { ++ kfree(cmd); ++ kfree(var_hist->cmd); ++ kfree(var_hist); ++ return ERR_PTR(ret); ++ } ++ ++ kfree(cmd); ++ ++ /* If we can't find the variable, something went wrong */ ++ event_var = find_synthetic_field_var(target_hist_data, subsys_name, ++ event_name, field_name); ++ if (IS_ERR_OR_NULL(event_var)) { ++ kfree(var_hist->cmd); ++ kfree(var_hist); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ n = target_hist_data->n_field_var_hists; ++ target_hist_data->field_var_hists[n] = var_hist; ++ target_hist_data->n_field_var_hists++; ++ ++ return event_var; ++} ++ ++struct hist_field * ++find_target_event_var(struct hist_trigger_data *hist_data, ++ char *subsys_name, char *event_name, char *var_name) ++{ ++ struct trace_event_file *file = hist_data->event_file; ++ struct hist_field *hist_field = NULL; ++ ++ if (subsys_name) { ++ struct trace_event_call *call; ++ ++ if (!event_name) ++ return NULL; ++ ++ call = file->event_call; ++ ++ if (strcmp(subsys_name, call->class->system) != 0) ++ return NULL; ++ ++ if (strcmp(event_name, trace_event_name(call)) != 0) ++ return NULL; ++ } ++ ++ hist_field = find_var_field(hist_data, var_name); ++ ++ return hist_field; ++} ++ ++static inline void __update_field_vars(struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *rec, ++ struct field_var **field_vars, ++ unsigned int n_field_vars, ++ unsigned int field_var_str_start) ++{ ++ struct hist_elt_data *elt_data = elt->private_data; ++ unsigned int i, j, var_idx; ++ u64 var_val; ++ ++ for (i = 0, j = field_var_str_start; i < n_field_vars; i++) { ++ struct field_var *field_var = field_vars[i]; ++ struct hist_field *var = field_var->var; ++ struct hist_field *val = field_var->val; ++ ++ var_val = val->fn(val, elt, rbe, rec); ++ var_idx = var->var.idx; ++ ++ if (val->flags & HIST_FIELD_FL_STRING) { ++ char *str = elt_data->field_var_str[j++]; ++ char *val_str = (char *)(uintptr_t)var_val; ++ ++ strncpy(str, val_str, STR_VAR_LEN_MAX); ++ var_val = (u64)(uintptr_t)str; ++ } ++ tracing_map_set_var(elt, var_idx, var_val); ++ } ++} ++ ++static void update_field_vars(struct hist_trigger_data *hist_data, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *rec) ++{ ++ __update_field_vars(elt, rbe, rec, hist_data->field_vars, ++ hist_data->n_field_vars, 0); ++} ++ ++static struct hist_field *create_var(struct hist_trigger_data *hist_data, ++ struct trace_event_file *file, ++ char *name, int size, const char *type) ++{ ++ struct hist_field *var; ++ int idx; ++ ++ if (find_var(hist_data, file, name) && !hist_data->remove) { ++ var = ERR_PTR(-EINVAL); ++ goto out; ++ } ++ ++ var = kzalloc(sizeof(struct hist_field), GFP_KERNEL); ++ if (!var) { ++ var = ERR_PTR(-ENOMEM); ++ goto out; ++ } ++ ++ idx = tracing_map_add_var(hist_data->map); ++ if (idx < 0) { ++ kfree(var); ++ var = ERR_PTR(-EINVAL); ++ goto out; ++ } ++ ++ var->flags = HIST_FIELD_FL_VAR; ++ var->var.idx = idx; ++ var->var.hist_data = var->hist_data = hist_data; ++ var->size = size; ++ var->var.name = kstrdup(name, GFP_KERNEL); ++ var->type = kstrdup(type, GFP_KERNEL); ++ if (!var->var.name || !var->type) { ++ kfree(var->var.name); ++ kfree(var->type); ++ kfree(var); ++ var = ERR_PTR(-ENOMEM); ++ } ++ out: ++ return var; ++} ++ ++static struct field_var *create_field_var(struct hist_trigger_data *hist_data, ++ struct trace_event_file *file, ++ char *field_name) ++{ ++ struct hist_field *val = NULL, *var = NULL; ++ unsigned long flags = HIST_FIELD_FL_VAR; ++ struct field_var *field_var; ++ int ret = 0; ++ ++ if (hist_data->n_field_vars >= SYNTH_FIELDS_MAX) { ++ ret = -EINVAL; ++ goto err; ++ } ++ ++ val = parse_atom(hist_data, file, field_name, &flags, NULL); ++ if (IS_ERR(val)) { ++ ret = PTR_ERR(val); ++ goto err; ++ } ++ ++ var = create_var(hist_data, file, field_name, val->size, val->type); ++ if (IS_ERR(var)) { ++ kfree(val); ++ ret = PTR_ERR(var); ++ goto err; ++ } ++ ++ field_var = kzalloc(sizeof(struct field_var), GFP_KERNEL); ++ if (!field_var) { ++ kfree(val); ++ kfree(var); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ field_var->var = var; ++ field_var->val = val; ++ out: ++ return field_var; ++ err: ++ field_var = ERR_PTR(ret); ++ goto out; ++} ++ ++/** ++ * create_target_field_var - Automatically create a variable for a field ++ * @target_hist_data: The target hist trigger ++ * @subsys_name: Optional subsystem name ++ * @event_name: Optional event name ++ * @var_name: The name of the field (and the resulting variable) ++ * ++ * Hist trigger actions fetch data from variables, not directly from ++ * events. However, for convenience, users are allowed to directly ++ * specify an event field in an action, which will be automatically ++ * converted into a variable on their behalf. ++ ++ * This function creates a field variable with the name var_name on ++ * the hist trigger currently being defined on the target event. If ++ * subsys_name and event_name are specified, this function simply ++ * verifies that they do in fact match the target event subsystem and ++ * event name. ++ * ++ * Return: The variable created for the field. ++ */ ++struct field_var * ++create_target_field_var(struct hist_trigger_data *target_hist_data, ++ char *subsys_name, char *event_name, char *var_name) ++{ ++ struct trace_event_file *file = target_hist_data->event_file; ++ ++ if (subsys_name) { ++ struct trace_event_call *call; ++ ++ if (!event_name) ++ return NULL; ++ ++ call = file->event_call; ++ ++ if (strcmp(subsys_name, call->class->system) != 0) ++ return NULL; ++ ++ if (strcmp(event_name, trace_event_name(call)) != 0) ++ return NULL; ++ } ++ ++ return create_field_var(target_hist_data, file, var_name); ++} ++ ++static void destroy_field_var(struct field_var *field_var) ++{ ++ if (!field_var) ++ return; ++ ++ destroy_hist_field(field_var->var, 0); ++ destroy_hist_field(field_var->val, 0); ++ ++ kfree(field_var); ++} ++ ++static void destroy_field_vars(struct hist_trigger_data *hist_data) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < hist_data->n_field_vars; i++) ++ destroy_field_var(hist_data->field_vars[i]); ++} ++ ++void save_field_var(struct hist_trigger_data *hist_data, ++ struct field_var *field_var) ++{ ++ hist_data->field_vars[hist_data->n_field_vars++] = field_var; ++ ++ if (field_var->val->flags & HIST_FIELD_FL_STRING) ++ hist_data->n_field_var_str++; ++} ++ + static int create_hitcount_val(struct hist_trigger_data *hist_data) + { + hist_data->fields[HITCOUNT_IDX] = +@@ -2928,6 +3426,16 @@ static int create_actions(struct hist_tr + return ret; + } + ++static void destroy_field_var_hists(struct hist_trigger_data *hist_data) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < hist_data->n_field_var_hists; i++) { ++ kfree(hist_data->field_var_hists[i]->cmd); ++ kfree(hist_data->field_var_hists[i]); ++ } ++} ++ + static void destroy_hist_data(struct hist_trigger_data *hist_data) + { + if (!hist_data) +@@ -2938,6 +3446,8 @@ static void destroy_hist_data(struct his + tracing_map_destroy(hist_data->map); + + destroy_actions(hist_data); ++ destroy_field_vars(hist_data); ++ destroy_field_var_hists(hist_data); + + kfree(hist_data); + } +@@ -3074,6 +3584,8 @@ static void hist_trigger_elt_update(stru + tracing_map_set_var(elt, var_idx, hist_val); + } + } ++ ++ update_field_vars(hist_data, elt, rbe, rec); + } + + static inline void add_to_key(char *compound_key, void *key, +@@ -3518,6 +4030,21 @@ static int event_hist_trigger_init(struc + return 0; + } + ++static void unregister_field_var_hists(struct hist_trigger_data *hist_data) ++{ ++ struct trace_event_file *file; ++ unsigned int i; ++ char *cmd; ++ int ret; ++ ++ for (i = 0; i < hist_data->n_field_var_hists; i++) { ++ file = hist_data->field_var_hists[i]->hist_data->event_file; ++ cmd = hist_data->field_var_hists[i]->cmd; ++ ret = event_hist_trigger_func(&trigger_hist_cmd, file, ++ "!hist", "hist", cmd); ++ } ++} ++ + static void event_hist_trigger_free(struct event_trigger_ops *ops, + struct event_trigger_data *data) + { +@@ -3535,6 +4062,8 @@ static void event_hist_trigger_free(stru + + remove_hist_vars(hist_data); + ++ unregister_field_var_hists(hist_data); ++ + destroy_hist_data(hist_data); + } + } diff --git a/debian/patches/features/all/rt/0034-tracing-Add-onmatch-hist-trigger-action-support.patch b/debian/patches/features/all/rt/0034-tracing-Add-onmatch-hist-trigger-action-support.patch new file mode 100644 index 000000000..5a52da9cc --- /dev/null +++ b/debian/patches/features/all/rt/0034-tracing-Add-onmatch-hist-trigger-action-support.patch @@ -0,0 +1,688 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:52:00 -0600 +Subject: [PATCH 34/48] tracing: Add 'onmatch' hist trigger action support +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Add an 'onmatch(matching.event).(param list)' +hist trigger action which is invoked with the set of variables or +event fields named in the 'param list'. The result is the generation +of a synthetic event that consists of the values contained in those +variables and/or fields at the time the invoking event was hit. + +As an example the below defines a simple synthetic event using a +variable defined on the sched_wakeup_new event, and shows the event +definition with unresolved fields, since the sched_wakeup_new event +with the testpid variable hasn't been defined yet: + + # echo 'wakeup_new_test pid_t pid; int prio' >> \ + /sys/kernel/debug/tracing/synthetic_events + + # cat /sys/kernel/debug/tracing/synthetic_events + wakeup_new_test pid_t pid; int prio + +The following hist trigger both defines a testpid variable and +specifies an onmatch() trace action that uses that variable along with +a non-variable field to generate a wakeup_new_test synthetic event +whenever a sched_wakeup_new event occurs, which because of the 'if +comm == "cyclictest"' filter only happens when the executable is +cyclictest: + + # echo 'hist:testpid=pid:keys=$testpid:\ + onmatch(sched.sched_wakeup_new).wakeup_new_test($testpid, prio) \ + if comm=="cyclictest"' >> \ + /sys/kernel/debug/tracing/events/sched/sched_wakeup_new/trigger + +Creating and displaying a histogram based on those events is now just +a matter of using the fields and new synthetic event in the +tracing/events/synthetic directory, as usual: + + # echo 'hist:keys=pid,prio:sort=pid,prio' >> \ + /sys/kernel/debug/tracing/events/synthetic/wakeup_new_test/trigger + +Link: http://lkml.kernel.org/r/8c2a574bcb7530c876629c901ecd23911b14afe8.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Rajvi Jingar +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit ea82307e63ec125d8612d8cedd2618669f674226) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 488 +++++++++++++++++++++++++++++++++++++-- + 1 file changed, 475 insertions(+), 13 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -285,6 +285,8 @@ struct hist_trigger_data { + struct action_data *actions[HIST_ACTIONS_MAX]; + unsigned int n_actions; + ++ struct hist_field *synth_var_refs[SYNTH_FIELDS_MAX]; ++ unsigned int n_synth_var_refs; + struct field_var *field_vars[SYNTH_FIELDS_MAX]; + unsigned int n_field_vars; + unsigned int n_field_var_str; +@@ -321,7 +323,18 @@ typedef void (*action_fn_t) (struct hist + + struct action_data { + action_fn_t fn; +- unsigned int var_ref_idx; ++ unsigned int n_params; ++ char *params[SYNTH_FIELDS_MAX]; ++ ++ union { ++ struct { ++ unsigned int var_ref_idx; ++ char *match_event; ++ char *match_event_system; ++ char *synth_event_name; ++ struct synth_event *synth_event; ++ } onmatch; ++ }; + }; + + static LIST_HEAD(synth_event_list); +@@ -887,6 +900,21 @@ static struct synth_event *alloc_synth_e + return event; + } + ++static void action_trace(struct hist_trigger_data *hist_data, ++ struct tracing_map_elt *elt, void *rec, ++ struct ring_buffer_event *rbe, ++ struct action_data *data, u64 *var_ref_vals) ++{ ++ struct synth_event *event = data->onmatch.synth_event; ++ ++ trace_synth(event, var_ref_vals, data->onmatch.var_ref_idx); ++} ++ ++struct hist_var_data { ++ struct list_head list; ++ struct hist_trigger_data *hist_data; ++}; ++ + static void add_or_delete_synth_event(struct synth_event *event, int delete) + { + if (delete) +@@ -1124,11 +1152,6 @@ static u64 hist_field_timestamp(struct h + return ts; + } + +-struct hist_var_data { +- struct list_head list; +- struct hist_trigger_data *hist_data; +-}; +- + static struct hist_field * + check_field_for_var_ref(struct hist_field *hist_field, + struct hist_trigger_data *var_data, +@@ -1194,6 +1217,14 @@ static struct hist_field *find_var_ref(s + return found; + } + ++ for (i = 0; i < hist_data->n_synth_var_refs; i++) { ++ hist_field = hist_data->synth_var_refs[i]; ++ found = check_field_for_var_refs(hist_data, hist_field, ++ var_data, var_idx, 0); ++ if (found) ++ return found; ++ } ++ + return found; + } + +@@ -1422,6 +1453,37 @@ static struct hist_field *find_file_var( + return NULL; + } + ++static struct hist_field * ++find_match_var(struct hist_trigger_data *hist_data, char *var_name) ++{ ++ struct trace_array *tr = hist_data->event_file->tr; ++ struct hist_field *hist_field, *found = NULL; ++ struct trace_event_file *file; ++ unsigned int i; ++ ++ for (i = 0; i < hist_data->n_actions; i++) { ++ struct action_data *data = hist_data->actions[i]; ++ ++ if (data->fn == action_trace) { ++ char *system = data->onmatch.match_event_system; ++ char *event_name = data->onmatch.match_event; ++ ++ file = find_var_file(tr, system, event_name, var_name); ++ if (!file) ++ continue; ++ hist_field = find_file_var(file, var_name); ++ if (hist_field) { ++ if (found) { ++ return ERR_PTR(-EINVAL); ++ } ++ ++ found = hist_field; ++ } ++ } ++ } ++ return found; ++} ++ + static struct hist_field *find_event_var(struct hist_trigger_data *hist_data, + char *system, + char *event_name, +@@ -1431,6 +1493,14 @@ static struct hist_field *find_event_var + struct hist_field *hist_field = NULL; + struct trace_event_file *file; + ++ if (!system || !event_name) { ++ hist_field = find_match_var(hist_data, var_name); ++ if (IS_ERR(hist_field)) ++ return NULL; ++ if (hist_field) ++ return hist_field; ++ } ++ + file = find_var_file(tr, system, event_name, var_name); + if (!file) + return NULL; +@@ -1622,11 +1692,21 @@ static void destroy_hist_trigger_attrs(s + + static int parse_action(char *str, struct hist_trigger_attrs *attrs) + { +- int ret = 0; ++ int ret = -EINVAL; + + if (attrs->n_actions >= HIST_ACTIONS_MAX) + return ret; + ++ if ((strncmp(str, "onmatch(", strlen("onmatch(")) == 0)) { ++ attrs->action_str[attrs->n_actions] = kstrdup(str, GFP_KERNEL); ++ if (!attrs->action_str[attrs->n_actions]) { ++ ret = -ENOMEM; ++ return ret; ++ } ++ attrs->n_actions++; ++ ret = 0; ++ } ++ + return ret; + } + +@@ -2635,7 +2715,7 @@ find_synthetic_field_var(struct hist_tri + * + * Return: The variable created for the field. + */ +-struct hist_field * ++static struct hist_field * + create_field_var_hist(struct hist_trigger_data *target_hist_data, + char *subsys_name, char *event_name, char *field_name) + { +@@ -2748,7 +2828,7 @@ create_field_var_hist(struct hist_trigge + return event_var; + } + +-struct hist_field * ++static struct hist_field * + find_target_event_var(struct hist_trigger_data *hist_data, + char *subsys_name, char *event_name, char *var_name) + { +@@ -2919,7 +2999,7 @@ static struct field_var *create_field_va + * + * Return: The variable created for the field. + */ +-struct field_var * ++static struct field_var * + create_target_field_var(struct hist_trigger_data *target_hist_data, + char *subsys_name, char *event_name, char *var_name) + { +@@ -2943,6 +3023,27 @@ create_target_field_var(struct hist_trig + return create_field_var(target_hist_data, file, var_name); + } + ++static void onmatch_destroy(struct action_data *data) ++{ ++ unsigned int i; ++ ++ mutex_lock(&synth_event_mutex); ++ ++ kfree(data->onmatch.match_event); ++ kfree(data->onmatch.match_event_system); ++ kfree(data->onmatch.synth_event_name); ++ ++ for (i = 0; i < data->n_params; i++) ++ kfree(data->params[i]); ++ ++ if (data->onmatch.synth_event) ++ data->onmatch.synth_event->ref--; ++ ++ kfree(data); ++ ++ mutex_unlock(&synth_event_mutex); ++} ++ + static void destroy_field_var(struct field_var *field_var) + { + if (!field_var) +@@ -2962,8 +3063,8 @@ static void destroy_field_vars(struct hi + destroy_field_var(hist_data->field_vars[i]); + } + +-void save_field_var(struct hist_trigger_data *hist_data, +- struct field_var *field_var) ++static void save_field_var(struct hist_trigger_data *hist_data, ++ struct field_var *field_var) + { + hist_data->field_vars[hist_data->n_field_vars++] = field_var; + +@@ -2971,6 +3072,304 @@ void save_field_var(struct hist_trigger_ + hist_data->n_field_var_str++; + } + ++ ++static void destroy_synth_var_refs(struct hist_trigger_data *hist_data) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < hist_data->n_synth_var_refs; i++) ++ destroy_hist_field(hist_data->synth_var_refs[i], 0); ++} ++ ++static void save_synth_var_ref(struct hist_trigger_data *hist_data, ++ struct hist_field *var_ref) ++{ ++ hist_data->synth_var_refs[hist_data->n_synth_var_refs++] = var_ref; ++ ++ hist_data->var_refs[hist_data->n_var_refs] = var_ref; ++ var_ref->var_ref_idx = hist_data->n_var_refs++; ++} ++ ++static int check_synth_field(struct synth_event *event, ++ struct hist_field *hist_field, ++ unsigned int field_pos) ++{ ++ struct synth_field *field; ++ ++ if (field_pos >= event->n_fields) ++ return -EINVAL; ++ ++ field = event->fields[field_pos]; ++ ++ if (strcmp(field->type, hist_field->type) != 0) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static int parse_action_params(char *params, struct action_data *data) ++{ ++ char *param, *saved_param; ++ int ret = 0; ++ ++ while (params) { ++ if (data->n_params >= SYNTH_FIELDS_MAX) ++ goto out; ++ ++ param = strsep(¶ms, ","); ++ if (!param) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ param = strstrip(param); ++ if (strlen(param) < 2) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ saved_param = kstrdup(param, GFP_KERNEL); ++ if (!saved_param) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ data->params[data->n_params++] = saved_param; ++ } ++ out: ++ return ret; ++} ++ ++static struct hist_field * ++onmatch_find_var(struct hist_trigger_data *hist_data, struct action_data *data, ++ char *system, char *event, char *var) ++{ ++ struct hist_field *hist_field; ++ ++ var++; /* skip '$' */ ++ ++ hist_field = find_target_event_var(hist_data, system, event, var); ++ if (!hist_field) { ++ if (!system) { ++ system = data->onmatch.match_event_system; ++ event = data->onmatch.match_event; ++ } ++ ++ hist_field = find_event_var(hist_data, system, event, var); ++ } ++ ++ return hist_field; ++} ++ ++static struct hist_field * ++onmatch_create_field_var(struct hist_trigger_data *hist_data, ++ struct action_data *data, char *system, ++ char *event, char *var) ++{ ++ struct hist_field *hist_field = NULL; ++ struct field_var *field_var; ++ ++ /* ++ * First try to create a field var on the target event (the ++ * currently being defined). This will create a variable for ++ * unqualified fields on the target event, or if qualified, ++ * target fields that have qualified names matching the target. ++ */ ++ field_var = create_target_field_var(hist_data, system, event, var); ++ ++ if (field_var && !IS_ERR(field_var)) { ++ save_field_var(hist_data, field_var); ++ hist_field = field_var->var; ++ } else { ++ field_var = NULL; ++ /* ++ * If no explicit system.event is specfied, default to ++ * looking for fields on the onmatch(system.event.xxx) ++ * event. ++ */ ++ if (!system) { ++ system = data->onmatch.match_event_system; ++ event = data->onmatch.match_event; ++ } ++ ++ /* ++ * At this point, we're looking at a field on another ++ * event. Because we can't modify a hist trigger on ++ * another event to add a variable for a field, we need ++ * to create a new trigger on that event and create the ++ * variable at the same time. ++ */ ++ hist_field = create_field_var_hist(hist_data, system, event, var); ++ if (IS_ERR(hist_field)) ++ goto free; ++ } ++ out: ++ return hist_field; ++ free: ++ destroy_field_var(field_var); ++ hist_field = NULL; ++ goto out; ++} ++ ++static int onmatch_create(struct hist_trigger_data *hist_data, ++ struct trace_event_file *file, ++ struct action_data *data) ++{ ++ char *event_name, *param, *system = NULL; ++ struct hist_field *hist_field, *var_ref; ++ unsigned int i, var_ref_idx; ++ unsigned int field_pos = 0; ++ struct synth_event *event; ++ int ret = 0; ++ ++ mutex_lock(&synth_event_mutex); ++ event = find_synth_event(data->onmatch.synth_event_name); ++ if (!event) { ++ mutex_unlock(&synth_event_mutex); ++ return -EINVAL; ++ } ++ event->ref++; ++ mutex_unlock(&synth_event_mutex); ++ ++ var_ref_idx = hist_data->n_var_refs; ++ ++ for (i = 0; i < data->n_params; i++) { ++ char *p; ++ ++ p = param = kstrdup(data->params[i], GFP_KERNEL); ++ if (!param) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ system = strsep(¶m, "."); ++ if (!param) { ++ param = (char *)system; ++ system = event_name = NULL; ++ } else { ++ event_name = strsep(¶m, "."); ++ if (!param) { ++ kfree(p); ++ ret = -EINVAL; ++ goto err; ++ } ++ } ++ ++ if (param[0] == '$') ++ hist_field = onmatch_find_var(hist_data, data, system, ++ event_name, param); ++ else ++ hist_field = onmatch_create_field_var(hist_data, data, ++ system, ++ event_name, ++ param); ++ ++ if (!hist_field) { ++ kfree(p); ++ ret = -EINVAL; ++ goto err; ++ } ++ ++ if (check_synth_field(event, hist_field, field_pos) == 0) { ++ var_ref = create_var_ref(hist_field, system, event_name); ++ if (!var_ref) { ++ kfree(p); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ save_synth_var_ref(hist_data, var_ref); ++ field_pos++; ++ kfree(p); ++ continue; ++ } ++ ++ kfree(p); ++ ret = -EINVAL; ++ goto err; ++ } ++ ++ if (field_pos != event->n_fields) { ++ ret = -EINVAL; ++ goto err; ++ } ++ ++ data->fn = action_trace; ++ data->onmatch.synth_event = event; ++ data->onmatch.var_ref_idx = var_ref_idx; ++ out: ++ return ret; ++ err: ++ mutex_lock(&synth_event_mutex); ++ event->ref--; ++ mutex_unlock(&synth_event_mutex); ++ ++ goto out; ++} ++ ++static struct action_data *onmatch_parse(struct trace_array *tr, char *str) ++{ ++ char *match_event, *match_event_system; ++ char *synth_event_name, *params; ++ struct action_data *data; ++ int ret = -EINVAL; ++ ++ data = kzalloc(sizeof(*data), GFP_KERNEL); ++ if (!data) ++ return ERR_PTR(-ENOMEM); ++ ++ match_event = strsep(&str, ")"); ++ if (!match_event || !str) ++ goto free; ++ ++ match_event_system = strsep(&match_event, "."); ++ if (!match_event) ++ goto free; ++ ++ if (IS_ERR(event_file(tr, match_event_system, match_event))) ++ goto free; ++ ++ data->onmatch.match_event = kstrdup(match_event, GFP_KERNEL); ++ if (!data->onmatch.match_event) { ++ ret = -ENOMEM; ++ goto free; ++ } ++ ++ data->onmatch.match_event_system = kstrdup(match_event_system, GFP_KERNEL); ++ if (!data->onmatch.match_event_system) { ++ ret = -ENOMEM; ++ goto free; ++ } ++ ++ strsep(&str, "."); ++ if (!str) ++ goto free; ++ ++ synth_event_name = strsep(&str, "("); ++ if (!synth_event_name || !str) ++ goto free; ++ ++ data->onmatch.synth_event_name = kstrdup(synth_event_name, GFP_KERNEL); ++ if (!data->onmatch.synth_event_name) { ++ ret = -ENOMEM; ++ goto free; ++ } ++ ++ params = strsep(&str, ")"); ++ if (!params || !str || (str && strlen(str))) ++ goto free; ++ ++ ret = parse_action_params(params, data); ++ if (ret) ++ goto free; ++ out: ++ return data; ++ free: ++ onmatch_destroy(data); ++ data = ERR_PTR(ret); ++ goto out; ++} ++ + static int create_hitcount_val(struct hist_trigger_data *hist_data) + { + hist_data->fields[HITCOUNT_IDX] = +@@ -3395,18 +3794,39 @@ static void destroy_actions(struct hist_ + for (i = 0; i < hist_data->n_actions; i++) { + struct action_data *data = hist_data->actions[i]; + +- kfree(data); ++ if (data->fn == action_trace) ++ onmatch_destroy(data); ++ else ++ kfree(data); + } + } + + static int parse_actions(struct hist_trigger_data *hist_data) + { ++ struct trace_array *tr = hist_data->event_file->tr; ++ struct action_data *data; + unsigned int i; + int ret = 0; + char *str; + + for (i = 0; i < hist_data->attrs->n_actions; i++) { + str = hist_data->attrs->action_str[i]; ++ ++ if (strncmp(str, "onmatch(", strlen("onmatch(")) == 0) { ++ char *action_str = str + strlen("onmatch("); ++ ++ data = onmatch_parse(tr, action_str); ++ if (IS_ERR(data)) { ++ ret = PTR_ERR(data); ++ break; ++ } ++ data->fn = action_trace; ++ } else { ++ ret = -EINVAL; ++ break; ++ } ++ ++ hist_data->actions[hist_data->n_actions++] = data; + } + + return ret; +@@ -3421,11 +3841,50 @@ static int create_actions(struct hist_tr + + for (i = 0; i < hist_data->attrs->n_actions; i++) { + data = hist_data->actions[i]; ++ ++ if (data->fn == action_trace) { ++ ret = onmatch_create(hist_data, file, data); ++ if (ret) ++ return ret; ++ } + } + + return ret; + } + ++static void print_onmatch_spec(struct seq_file *m, ++ struct hist_trigger_data *hist_data, ++ struct action_data *data) ++{ ++ unsigned int i; ++ ++ seq_printf(m, ":onmatch(%s.%s).", data->onmatch.match_event_system, ++ data->onmatch.match_event); ++ ++ seq_printf(m, "%s(", data->onmatch.synth_event->name); ++ ++ for (i = 0; i < data->n_params; i++) { ++ if (i) ++ seq_puts(m, ","); ++ seq_printf(m, "%s", data->params[i]); ++ } ++ ++ seq_puts(m, ")"); ++} ++ ++static void print_actions_spec(struct seq_file *m, ++ struct hist_trigger_data *hist_data) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < hist_data->n_actions; i++) { ++ struct action_data *data = hist_data->actions[i]; ++ ++ if (data->fn == action_trace) ++ print_onmatch_spec(m, hist_data, data); ++ } ++} ++ + static void destroy_field_var_hists(struct hist_trigger_data *hist_data) + { + unsigned int i; +@@ -3448,6 +3907,7 @@ static void destroy_hist_data(struct his + destroy_actions(hist_data); + destroy_field_vars(hist_data); + destroy_field_var_hists(hist_data); ++ destroy_synth_var_refs(hist_data); + + kfree(hist_data); + } +@@ -4004,6 +4464,8 @@ static int event_hist_trigger_print(stru + } + seq_printf(m, ":size=%u", (1 << hist_data->map->map_bits)); + ++ print_actions_spec(m, hist_data); ++ + if (data->filter_str) + seq_printf(m, " if %s", data->filter_str); + diff --git a/debian/patches/features/all/rt/0035-tracing-Add-onmax-hist-trigger-action-support.patch b/debian/patches/features/all/rt/0035-tracing-Add-onmax-hist-trigger-action-support.patch new file mode 100644 index 000000000..67c7a798e --- /dev/null +++ b/debian/patches/features/all/rt/0035-tracing-Add-onmax-hist-trigger-action-support.patch @@ -0,0 +1,487 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:52:01 -0600 +Subject: [PATCH 35/48] tracing: Add 'onmax' hist trigger action support +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Add an 'onmax(var).save(field,...)' hist trigger action which is +invoked whenever an event exceeds the current maximum. + +The end result is that the trace event fields or variables specified +as the onmax.save() params will be saved if 'var' exceeds the current +maximum for that hist trigger entry. This allows context from the +event that exhibited the new maximum to be saved for later reference. +When the histogram is displayed, additional fields displaying the +saved values will be printed. + +As an example the below defines a couple of hist triggers, one for +sched_wakeup and another for sched_switch, keyed on pid. Whenever a +sched_wakeup occurs, the timestamp is saved in the entry corresponding +to the current pid, and when the scheduler switches back to that pid, +the timestamp difference is calculated. If the resulting latency +exceeds the current maximum latency, the specified save() values are +saved: + + # echo 'hist:keys=pid:ts0=common_timestamp.usecs \ + if comm=="cyclictest"' >> \ + /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger + + # echo 'hist:keys=next_pid:\ + wakeup_lat=common_timestamp.usecs-$ts0:\ + onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) \ + if next_comm=="cyclictest"' >> \ + /sys/kernel/debug/tracing/events/sched/sched_switch/trigger + +When the histogram is displayed, the max value and the saved values +corresponding to the max are displayed following the rest of the +fields: + + # cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist + + { next_pid: 3728 } hitcount: 199 \ + max: 123 next_comm: cyclictest prev_pid: 0 \ + prev_prio: 120 prev_comm: swapper/3 + { next_pid: 3730 } hitcount: 1321 \ + max: 15 next_comm: cyclictest prev_pid: 0 \ + prev_prio: 120 prev_comm: swapper/1 + { next_pid: 3729 } hitcount: 1973\ + max: 25 next_comm: cyclictest prev_pid: 0 \ + prev_prio: 120 prev_comm: swapper/0 + + Totals: + Hits: 3493 + Entries: 3 + Dropped: 0 + +Link: http://lkml.kernel.org/r/006907f71b1e839bb059337ec3c496f84fcb71de.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 4e30c922f0a19496ff424edd5c473666e1690601) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 331 ++++++++++++++++++++++++++++++++++----- + 1 file changed, 296 insertions(+), 35 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -292,6 +292,10 @@ struct hist_trigger_data { + unsigned int n_field_var_str; + struct field_var_hist *field_var_hists[SYNTH_FIELDS_MAX]; + unsigned int n_field_var_hists; ++ ++ struct field_var *max_vars[SYNTH_FIELDS_MAX]; ++ unsigned int n_max_vars; ++ unsigned int n_max_var_str; + }; + + struct synth_field { +@@ -334,6 +338,14 @@ struct action_data { + char *synth_event_name; + struct synth_event *synth_event; + } onmatch; ++ ++ struct { ++ char *var_str; ++ char *fn_name; ++ unsigned int max_var_ref_idx; ++ struct hist_field *max_var; ++ struct hist_field *var; ++ } onmax; + }; + }; + +@@ -1697,7 +1709,8 @@ static int parse_action(char *str, struc + if (attrs->n_actions >= HIST_ACTIONS_MAX) + return ret; + +- if ((strncmp(str, "onmatch(", strlen("onmatch(")) == 0)) { ++ if ((strncmp(str, "onmatch(", strlen("onmatch(")) == 0) || ++ (strncmp(str, "onmax(", strlen("onmax(")) == 0)) { + attrs->action_str[attrs->n_actions] = kstrdup(str, GFP_KERNEL); + if (!attrs->action_str[attrs->n_actions]) { + ret = -ENOMEM; +@@ -1869,7 +1882,7 @@ static int hist_trigger_elt_data_alloc(s + } + } + +- n_str = hist_data->n_field_var_str; ++ n_str = hist_data->n_field_var_str + hist_data->n_max_var_str; + + size = STR_VAR_LEN_MAX; + +@@ -2894,6 +2907,15 @@ static void update_field_vars(struct his + hist_data->n_field_vars, 0); + } + ++static void update_max_vars(struct hist_trigger_data *hist_data, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *rec) ++{ ++ __update_field_vars(elt, rbe, rec, hist_data->max_vars, ++ hist_data->n_max_vars, hist_data->n_field_var_str); ++} ++ + static struct hist_field *create_var(struct hist_trigger_data *hist_data, + struct trace_event_file *file, + char *name, int size, const char *type) +@@ -3023,6 +3045,227 @@ create_target_field_var(struct hist_trig + return create_field_var(target_hist_data, file, var_name); + } + ++static void onmax_print(struct seq_file *m, ++ struct hist_trigger_data *hist_data, ++ struct tracing_map_elt *elt, ++ struct action_data *data) ++{ ++ unsigned int i, save_var_idx, max_idx = data->onmax.max_var->var.idx; ++ ++ seq_printf(m, "\n\tmax: %10llu", tracing_map_read_var(elt, max_idx)); ++ ++ for (i = 0; i < hist_data->n_max_vars; i++) { ++ struct hist_field *save_val = hist_data->max_vars[i]->val; ++ struct hist_field *save_var = hist_data->max_vars[i]->var; ++ u64 val; ++ ++ save_var_idx = save_var->var.idx; ++ ++ val = tracing_map_read_var(elt, save_var_idx); ++ ++ if (save_val->flags & HIST_FIELD_FL_STRING) { ++ seq_printf(m, " %s: %-32s", save_var->var.name, ++ (char *)(uintptr_t)(val)); ++ } else ++ seq_printf(m, " %s: %10llu", save_var->var.name, val); ++ } ++} ++ ++static void onmax_save(struct hist_trigger_data *hist_data, ++ struct tracing_map_elt *elt, void *rec, ++ struct ring_buffer_event *rbe, ++ struct action_data *data, u64 *var_ref_vals) ++{ ++ unsigned int max_idx = data->onmax.max_var->var.idx; ++ unsigned int max_var_ref_idx = data->onmax.max_var_ref_idx; ++ ++ u64 var_val, max_val; ++ ++ var_val = var_ref_vals[max_var_ref_idx]; ++ max_val = tracing_map_read_var(elt, max_idx); ++ ++ if (var_val <= max_val) ++ return; ++ ++ tracing_map_set_var(elt, max_idx, var_val); ++ ++ update_max_vars(hist_data, elt, rbe, rec); ++} ++ ++static void onmax_destroy(struct action_data *data) ++{ ++ unsigned int i; ++ ++ destroy_hist_field(data->onmax.max_var, 0); ++ destroy_hist_field(data->onmax.var, 0); ++ ++ kfree(data->onmax.var_str); ++ kfree(data->onmax.fn_name); ++ ++ for (i = 0; i < data->n_params; i++) ++ kfree(data->params[i]); ++ ++ kfree(data); ++} ++ ++static int onmax_create(struct hist_trigger_data *hist_data, ++ struct action_data *data) ++{ ++ struct trace_event_file *file = hist_data->event_file; ++ struct hist_field *var_field, *ref_field, *max_var; ++ unsigned int var_ref_idx = hist_data->n_var_refs; ++ struct field_var *field_var; ++ char *onmax_var_str, *param; ++ unsigned long flags; ++ unsigned int i; ++ int ret = 0; ++ ++ onmax_var_str = data->onmax.var_str; ++ if (onmax_var_str[0] != '$') ++ return -EINVAL; ++ onmax_var_str++; ++ ++ var_field = find_target_event_var(hist_data, NULL, NULL, onmax_var_str); ++ if (!var_field) ++ return -EINVAL; ++ ++ flags = HIST_FIELD_FL_VAR_REF; ++ ref_field = create_hist_field(hist_data, NULL, flags, NULL); ++ if (!ref_field) ++ return -ENOMEM; ++ ++ if (init_var_ref(ref_field, var_field, NULL, NULL)) { ++ destroy_hist_field(ref_field, 0); ++ ret = -ENOMEM; ++ goto out; ++ } ++ hist_data->var_refs[hist_data->n_var_refs] = ref_field; ++ ref_field->var_ref_idx = hist_data->n_var_refs++; ++ data->onmax.var = ref_field; ++ ++ data->fn = onmax_save; ++ data->onmax.max_var_ref_idx = var_ref_idx; ++ max_var = create_var(hist_data, file, "max", sizeof(u64), "u64"); ++ if (IS_ERR(max_var)) { ++ ret = PTR_ERR(max_var); ++ goto out; ++ } ++ data->onmax.max_var = max_var; ++ ++ for (i = 0; i < data->n_params; i++) { ++ param = kstrdup(data->params[i], GFP_KERNEL); ++ if (!param) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ field_var = create_target_field_var(hist_data, NULL, NULL, param); ++ if (IS_ERR(field_var)) { ++ ret = PTR_ERR(field_var); ++ kfree(param); ++ goto out; ++ } ++ ++ hist_data->max_vars[hist_data->n_max_vars++] = field_var; ++ if (field_var->val->flags & HIST_FIELD_FL_STRING) ++ hist_data->n_max_var_str++; ++ ++ kfree(param); ++ } ++ out: ++ return ret; ++} ++ ++static int parse_action_params(char *params, struct action_data *data) ++{ ++ char *param, *saved_param; ++ int ret = 0; ++ ++ while (params) { ++ if (data->n_params >= SYNTH_FIELDS_MAX) ++ goto out; ++ ++ param = strsep(¶ms, ","); ++ if (!param) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ param = strstrip(param); ++ if (strlen(param) < 2) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ saved_param = kstrdup(param, GFP_KERNEL); ++ if (!saved_param) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ data->params[data->n_params++] = saved_param; ++ } ++ out: ++ return ret; ++} ++ ++static struct action_data *onmax_parse(char *str) ++{ ++ char *onmax_fn_name, *onmax_var_str; ++ struct action_data *data; ++ int ret = -EINVAL; ++ ++ data = kzalloc(sizeof(*data), GFP_KERNEL); ++ if (!data) ++ return ERR_PTR(-ENOMEM); ++ ++ onmax_var_str = strsep(&str, ")"); ++ if (!onmax_var_str || !str) { ++ ret = -EINVAL; ++ goto free; ++ } ++ ++ data->onmax.var_str = kstrdup(onmax_var_str, GFP_KERNEL); ++ if (!data->onmax.var_str) { ++ ret = -ENOMEM; ++ goto free; ++ } ++ ++ strsep(&str, "."); ++ if (!str) ++ goto free; ++ ++ onmax_fn_name = strsep(&str, "("); ++ if (!onmax_fn_name || !str) ++ goto free; ++ ++ if (strncmp(onmax_fn_name, "save", strlen("save")) == 0) { ++ char *params = strsep(&str, ")"); ++ ++ if (!params) { ++ ret = -EINVAL; ++ goto free; ++ } ++ ++ ret = parse_action_params(params, data); ++ if (ret) ++ goto free; ++ } else ++ goto free; ++ ++ data->onmax.fn_name = kstrdup(onmax_fn_name, GFP_KERNEL); ++ if (!data->onmax.fn_name) { ++ ret = -ENOMEM; ++ goto free; ++ } ++ out: ++ return data; ++ free: ++ onmax_destroy(data); ++ data = ERR_PTR(ret); ++ goto out; ++} ++ + static void onmatch_destroy(struct action_data *data) + { + unsigned int i; +@@ -3107,39 +3350,6 @@ static int check_synth_field(struct synt + return 0; + } + +-static int parse_action_params(char *params, struct action_data *data) +-{ +- char *param, *saved_param; +- int ret = 0; +- +- while (params) { +- if (data->n_params >= SYNTH_FIELDS_MAX) +- goto out; +- +- param = strsep(¶ms, ","); +- if (!param) { +- ret = -EINVAL; +- goto out; +- } +- +- param = strstrip(param); +- if (strlen(param) < 2) { +- ret = -EINVAL; +- goto out; +- } +- +- saved_param = kstrdup(param, GFP_KERNEL); +- if (!saved_param) { +- ret = -ENOMEM; +- goto out; +- } +- +- data->params[data->n_params++] = saved_param; +- } +- out: +- return ret; +-} +- + static struct hist_field * + onmatch_find_var(struct hist_trigger_data *hist_data, struct action_data *data, + char *system, char *event, char *var) +@@ -3796,6 +4006,8 @@ static void destroy_actions(struct hist_ + + if (data->fn == action_trace) + onmatch_destroy(data); ++ else if (data->fn == onmax_save) ++ onmax_destroy(data); + else + kfree(data); + } +@@ -3821,6 +4033,15 @@ static int parse_actions(struct hist_tri + break; + } + data->fn = action_trace; ++ } else if (strncmp(str, "onmax(", strlen("onmax(")) == 0) { ++ char *action_str = str + strlen("onmax("); ++ ++ data = onmax_parse(action_str); ++ if (IS_ERR(data)) { ++ ret = PTR_ERR(data); ++ break; ++ } ++ data->fn = onmax_save; + } else { + ret = -EINVAL; + break; +@@ -3846,12 +4067,48 @@ static int create_actions(struct hist_tr + ret = onmatch_create(hist_data, file, data); + if (ret) + return ret; ++ } else if (data->fn == onmax_save) { ++ ret = onmax_create(hist_data, data); ++ if (ret) ++ return ret; + } + } + + return ret; + } + ++static void print_actions(struct seq_file *m, ++ struct hist_trigger_data *hist_data, ++ struct tracing_map_elt *elt) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < hist_data->n_actions; i++) { ++ struct action_data *data = hist_data->actions[i]; ++ ++ if (data->fn == onmax_save) ++ onmax_print(m, hist_data, elt, data); ++ } ++} ++ ++static void print_onmax_spec(struct seq_file *m, ++ struct hist_trigger_data *hist_data, ++ struct action_data *data) ++{ ++ unsigned int i; ++ ++ seq_puts(m, ":onmax("); ++ seq_printf(m, "%s", data->onmax.var_str); ++ seq_printf(m, ").%s(", data->onmax.fn_name); ++ ++ for (i = 0; i < hist_data->n_max_vars; i++) { ++ seq_printf(m, "%s", hist_data->max_vars[i]->var->var.name); ++ if (i < hist_data->n_max_vars - 1) ++ seq_puts(m, ","); ++ } ++ seq_puts(m, ")"); ++} ++ + static void print_onmatch_spec(struct seq_file *m, + struct hist_trigger_data *hist_data, + struct action_data *data) +@@ -3882,6 +4139,8 @@ static void print_actions_spec(struct se + + if (data->fn == action_trace) + print_onmatch_spec(m, hist_data, data); ++ else if (data->fn == onmax_save) ++ print_onmax_spec(m, hist_data, data); + } + } + +@@ -4263,6 +4522,8 @@ hist_trigger_entry_print(struct seq_file + } + } + ++ print_actions(m, hist_data, elt); ++ + seq_puts(m, "\n"); + } + diff --git a/debian/patches/features/all/rt/0036-tracing-Allow-whitespace-to-surround-hist-trigger-fi.patch b/debian/patches/features/all/rt/0036-tracing-Allow-whitespace-to-surround-hist-trigger-fi.patch new file mode 100644 index 000000000..f3b84ec38 --- /dev/null +++ b/debian/patches/features/all/rt/0036-tracing-Allow-whitespace-to-surround-hist-trigger-fi.patch @@ -0,0 +1,76 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:52:02 -0600 +Subject: [PATCH 36/48] tracing: Allow whitespace to surround hist trigger + filter +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +The existing code only allows for one space before and after the 'if' +specifying the filter for a hist trigger. Add code to make that more +permissive as far as whitespace goes. Specifically, we want to allow +spaces in the trigger itself now that we have additional syntax +(onmatch/onmax) where spaces are more natural e.g. spaces after commas +in param lists. + +Link: http://lkml.kernel.org/r/1053090c3c308d4f431accdeb59dff4b511d4554.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit ab257ec0f8eb50c58fafd50b1cb5352553f31ccf) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 37 ++++++++++++++++++++++++++++++++----- + 1 file changed, 32 insertions(+), 5 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -5162,7 +5162,7 @@ static int event_hist_trigger_func(struc + struct synth_event *se; + const char *se_name; + bool remove = false; +- char *trigger; ++ char *trigger, *p; + int ret = 0; + + if (!param) +@@ -5171,10 +5171,37 @@ static int event_hist_trigger_func(struc + if (glob[0] == '!') + remove = true; + +- /* separate the trigger from the filter (k:v [if filter]) */ +- trigger = strsep(¶m, " \t"); +- if (!trigger) +- return -EINVAL; ++ /* ++ * separate the trigger from the filter (k:v [if filter]) ++ * allowing for whitespace in the trigger ++ */ ++ p = trigger = param; ++ do { ++ p = strstr(p, "if"); ++ if (!p) ++ break; ++ if (p == param) ++ return -EINVAL; ++ if (*(p - 1) != ' ' && *(p - 1) != '\t') { ++ p++; ++ continue; ++ } ++ if (p >= param + strlen(param) - strlen("if") - 1) ++ return -EINVAL; ++ if (*(p + strlen("if")) != ' ' && *(p + strlen("if")) != '\t') { ++ p++; ++ continue; ++ } ++ break; ++ } while (p); ++ ++ if (!p) ++ param = NULL; ++ else { ++ *(p - 1) = '\0'; ++ param = strstrip(p); ++ trigger = strstrip(trigger); ++ } + + attrs = parse_hist_trigger_attrs(trigger); + if (IS_ERR(attrs)) diff --git a/debian/patches/features/all/rt/0037-tracing-Add-cpu-field-for-hist-triggers.patch b/debian/patches/features/all/rt/0037-tracing-Add-cpu-field-for-hist-triggers.patch new file mode 100644 index 000000000..a45049ca5 --- /dev/null +++ b/debian/patches/features/all/rt/0037-tracing-Add-cpu-field-for-hist-triggers.patch @@ -0,0 +1,115 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:52:03 -0600 +Subject: [PATCH 37/48] tracing: Add cpu field for hist triggers +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +A common key to use in a histogram is the cpuid - add a new cpu +'synthetic' field named 'cpu' for that purpose. + +Link: http://lkml.kernel.org/r/89537645bfc957e0d76e2cacf5f0ada88691a6cc.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 4bfaa88f0e0e98e706d57647452e4d37afd78d00) +Signed-off-by: Sebastian Andrzej Siewior +--- + Documentation/trace/histogram.txt | 15 +++++++++++++++ + kernel/trace/trace_events_hist.c | 28 +++++++++++++++++++++++++++- + 2 files changed, 42 insertions(+), 1 deletion(-) + +--- a/Documentation/trace/histogram.txt ++++ b/Documentation/trace/histogram.txt +@@ -172,6 +172,21 @@ + The examples below provide a more concrete illustration of the + concepts and typical usage patterns discussed above. + ++ 'special' event fields ++ ------------------------ ++ ++ There are a number of 'special event fields' available for use as ++ keys or values in a hist trigger. These look like and behave as if ++ they were actual event fields, but aren't really part of the event's ++ field definition or format file. They are however available for any ++ event, and can be used anywhere an actual event field could be. ++ They are: ++ ++ common_timestamp u64 - timestamp (from ring buffer) associated ++ with the event, in nanoseconds. May be ++ modified by .usecs to have timestamps ++ interpreted as microseconds. ++ cpu int - the cpu on which the event occurred. + + 6.2 'hist' trigger examples + --------------------------- +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -227,6 +227,7 @@ enum hist_field_flags { + HIST_FIELD_FL_VAR = 1 << 12, + HIST_FIELD_FL_EXPR = 1 << 13, + HIST_FIELD_FL_VAR_REF = 1 << 14, ++ HIST_FIELD_FL_CPU = 1 << 15, + }; + + struct var_defs { +@@ -1164,6 +1165,16 @@ static u64 hist_field_timestamp(struct h + return ts; + } + ++static u64 hist_field_cpu(struct hist_field *hist_field, ++ struct tracing_map_elt *elt, ++ struct ring_buffer_event *rbe, ++ void *event) ++{ ++ int cpu = smp_processor_id(); ++ ++ return cpu; ++} ++ + static struct hist_field * + check_field_for_var_ref(struct hist_field *hist_field, + struct hist_trigger_data *var_data, +@@ -1602,6 +1613,8 @@ static const char *hist_field_name(struc + field_name = hist_field_name(field->operands[0], ++level); + else if (field->flags & HIST_FIELD_FL_TIMESTAMP) + field_name = "common_timestamp"; ++ else if (field->flags & HIST_FIELD_FL_CPU) ++ field_name = "cpu"; + else if (field->flags & HIST_FIELD_FL_EXPR || + field->flags & HIST_FIELD_FL_VAR_REF) { + if (field->system) { +@@ -2109,6 +2122,15 @@ static struct hist_field *create_hist_fi + goto out; + } + ++ if (flags & HIST_FIELD_FL_CPU) { ++ hist_field->fn = hist_field_cpu; ++ hist_field->size = sizeof(int); ++ hist_field->type = kstrdup("unsigned int", GFP_KERNEL); ++ if (!hist_field->type) ++ goto free; ++ goto out; ++ } ++ + if (WARN_ON_ONCE(!field)) + goto out; + +@@ -2345,7 +2367,9 @@ parse_field(struct hist_trigger_data *hi + hist_data->enable_timestamps = true; + if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS) + hist_data->attrs->ts_in_usecs = true; +- } else { ++ } else if (strcmp(field_name, "cpu") == 0) ++ *flags |= HIST_FIELD_FL_CPU; ++ else { + field = trace_find_event_field(file->event_call, field_name); + if (!field || !field->size) { + field = ERR_PTR(-EINVAL); +@@ -4619,6 +4643,8 @@ static void hist_field_print(struct seq_ + + if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP) + seq_puts(m, "common_timestamp"); ++ else if (hist_field->flags & HIST_FIELD_FL_CPU) ++ seq_puts(m, "cpu"); + else if (field_name) { + if (hist_field->flags & HIST_FIELD_FL_VAR_REF) + seq_putc(m, '$'); diff --git a/debian/patches/features/all/rt/0038-tracing-Add-hist-trigger-support-for-variable-refere.patch b/debian/patches/features/all/rt/0038-tracing-Add-hist-trigger-support-for-variable-refere.patch new file mode 100644 index 000000000..d9d515fe4 --- /dev/null +++ b/debian/patches/features/all/rt/0038-tracing-Add-hist-trigger-support-for-variable-refere.patch @@ -0,0 +1,165 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:52:04 -0600 +Subject: [PATCH 38/48] tracing: Add hist trigger support for variable + reference aliases +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Add support for alias=$somevar where alias can be used as +onmatch.xxx($alias). + +Aliases are a way of creating a new name for an existing variable, for +flexibly in making naming more clear in certain cases. For example in +the below the user perhaps feels that using $new_lat in the synthetic +event invocation is opaque or doesn't fit well stylistically with +previous triggers, so creates an alias of $new_lat named $latency and +uses that in the call instead: + + # echo 'hist:keys=next_pid:new_lat=common_timestamp.usecs' > + /sys/kernel/debug/tracing/events/sched/sched_switch/trigger + + # echo 'hist:keys=pid:latency=$new_lat: + onmatch(sched.sched_switch).wake2($latency,pid)' > + /sys/kernel/debug/tracing/events/synthetic/wake1/trigger + +Link: http://lkml.kernel.org/r/ef20a65d921af3a873a6f1e8c71407c926d5586f.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 53c5a4f99f1a5f6ba304453716da571f3e51bc79) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 74 +++++++++++++++++++++++++++++++++++---- + 1 file changed, 67 insertions(+), 7 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -228,6 +228,7 @@ enum hist_field_flags { + HIST_FIELD_FL_EXPR = 1 << 13, + HIST_FIELD_FL_VAR_REF = 1 << 14, + HIST_FIELD_FL_CPU = 1 << 15, ++ HIST_FIELD_FL_ALIAS = 1 << 16, + }; + + struct var_defs { +@@ -1609,7 +1610,8 @@ static const char *hist_field_name(struc + + if (field->field) + field_name = field->field->name; +- else if (field->flags & HIST_FIELD_FL_LOG2) ++ else if (field->flags & HIST_FIELD_FL_LOG2 || ++ field->flags & HIST_FIELD_FL_ALIAS) + field_name = hist_field_name(field->operands[0], ++level); + else if (field->flags & HIST_FIELD_FL_TIMESTAMP) + field_name = "common_timestamp"; +@@ -2080,7 +2082,7 @@ static struct hist_field *create_hist_fi + + hist_field->hist_data = hist_data; + +- if (flags & HIST_FIELD_FL_EXPR) ++ if (flags & HIST_FIELD_FL_EXPR || flags & HIST_FIELD_FL_ALIAS) + goto out; /* caller will populate */ + + if (flags & HIST_FIELD_FL_VAR_REF) { +@@ -2217,10 +2219,18 @@ static int init_var_ref(struct hist_fiel + } + } + +- ref_field->name = kstrdup(var_field->var.name, GFP_KERNEL); +- if (!ref_field->name) { +- err = -ENOMEM; +- goto free; ++ if (var_field->var.name) { ++ ref_field->name = kstrdup(var_field->var.name, GFP_KERNEL); ++ if (!ref_field->name) { ++ err = -ENOMEM; ++ goto free; ++ } ++ } else if (var_field->name) { ++ ref_field->name = kstrdup(var_field->name, GFP_KERNEL); ++ if (!ref_field->name) { ++ err = -ENOMEM; ++ goto free; ++ } + } + + ref_field->type = kstrdup(var_field->type, GFP_KERNEL); +@@ -2382,6 +2392,28 @@ parse_field(struct hist_trigger_data *hi + return field; + } + ++static struct hist_field *create_alias(struct hist_trigger_data *hist_data, ++ struct hist_field *var_ref, ++ char *var_name) ++{ ++ struct hist_field *alias = NULL; ++ unsigned long flags = HIST_FIELD_FL_ALIAS | HIST_FIELD_FL_VAR; ++ ++ alias = create_hist_field(hist_data, NULL, flags, var_name); ++ if (!alias) ++ return NULL; ++ ++ alias->fn = var_ref->fn; ++ alias->operands[0] = var_ref; ++ ++ if (init_var_ref(alias, var_ref, var_ref->system, var_ref->event_name)) { ++ destroy_hist_field(alias, 0); ++ return NULL; ++ } ++ ++ return alias; ++} ++ + static struct hist_field *parse_atom(struct hist_trigger_data *hist_data, + struct trace_event_file *file, char *str, + unsigned long *flags, char *var_name) +@@ -2415,6 +2447,13 @@ static struct hist_field *parse_atom(str + if (hist_field) { + hist_data->var_refs[hist_data->n_var_refs] = hist_field; + hist_field->var_ref_idx = hist_data->n_var_refs++; ++ if (var_name) { ++ hist_field = create_alias(hist_data, hist_field, var_name); ++ if (!hist_field) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ } + return hist_field; + } + } else +@@ -2515,6 +2554,26 @@ static int check_expr_operands(struct hi + unsigned long operand1_flags = operand1->flags; + unsigned long operand2_flags = operand2->flags; + ++ if ((operand1_flags & HIST_FIELD_FL_VAR_REF) || ++ (operand1_flags & HIST_FIELD_FL_ALIAS)) { ++ struct hist_field *var; ++ ++ var = find_var_field(operand1->var.hist_data, operand1->name); ++ if (!var) ++ return -EINVAL; ++ operand1_flags = var->flags; ++ } ++ ++ if ((operand2_flags & HIST_FIELD_FL_VAR_REF) || ++ (operand2_flags & HIST_FIELD_FL_ALIAS)) { ++ struct hist_field *var; ++ ++ var = find_var_field(operand2->var.hist_data, operand2->name); ++ if (!var) ++ return -EINVAL; ++ operand2_flags = var->flags; ++ } ++ + if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) != + (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS)) + return -EINVAL; +@@ -4646,7 +4705,8 @@ static void hist_field_print(struct seq_ + else if (hist_field->flags & HIST_FIELD_FL_CPU) + seq_puts(m, "cpu"); + else if (field_name) { +- if (hist_field->flags & HIST_FIELD_FL_VAR_REF) ++ if (hist_field->flags & HIST_FIELD_FL_VAR_REF || ++ hist_field->flags & HIST_FIELD_FL_ALIAS) + seq_putc(m, '$'); + seq_printf(m, "%s", field_name); + } diff --git a/debian/patches/features/all/rt/0039-tracing-Add-last-error-error-facility-for-hist-trigg.patch b/debian/patches/features/all/rt/0039-tracing-Add-last-error-error-facility-for-hist-trigg.patch new file mode 100644 index 000000000..0d5b6a296 --- /dev/null +++ b/debian/patches/features/all/rt/0039-tracing-Add-last-error-error-facility-for-hist-trigg.patch @@ -0,0 +1,503 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:52:05 -0600 +Subject: [PATCH 39/48] tracing: Add 'last error' error facility for hist + triggers +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +With the addition of variables and actions, it's become necessary to +provide more detailed error information to users about syntax errors. + +Add a 'last error' facility accessible via the erroring event's 'hist' +file. Reading the hist file after an error will display more detailed +information about what went wrong, if information is available. This +extended error information will be available until the next hist +trigger command for that event. + + # echo xxx > /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger + echo: write error: Invalid argument + + # cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/hist + + ERROR: Couldn't yyy: zzz + Last command: xxx + +Also add specific error messages for variable and action errors. + +Link: http://lkml.kernel.org/r/64e9c422fc8aeafcc2f7a3b4328c0cffe7969129.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 215016863b5ec1ee5db5e20f32ffe015a497209f) +Signed-off-by: Sebastian Andrzej Siewior +--- + Documentation/trace/histogram.txt | 20 ++++ + kernel/trace/trace_events_hist.c | 164 ++++++++++++++++++++++++++++++++++---- + 2 files changed, 170 insertions(+), 14 deletions(-) + +--- a/Documentation/trace/histogram.txt ++++ b/Documentation/trace/histogram.txt +@@ -188,6 +188,26 @@ + interpreted as microseconds. + cpu int - the cpu on which the event occurred. + ++ Extended error information ++ -------------------------- ++ ++ For some error conditions encountered when invoking a hist trigger ++ command, extended error information is available via the ++ corresponding event's 'hist' file. Reading the hist file after an ++ error will display more detailed information about what went wrong, ++ if information is available. This extended error information will ++ be available until the next hist trigger command for that event. ++ ++ If available for a given error condition, the extended error ++ information and usage takes the following form: ++ ++ # echo xxx > /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger ++ echo: write error: Invalid argument ++ ++ # cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/hist ++ ERROR: Couldn't yyy: zzz ++ Last command: xxx ++ + 6.2 'hist' trigger examples + --------------------------- + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -351,6 +351,65 @@ struct action_data { + }; + }; + ++ ++static char last_hist_cmd[MAX_FILTER_STR_VAL]; ++static char hist_err_str[MAX_FILTER_STR_VAL]; ++ ++static void last_cmd_set(char *str) ++{ ++ if (!str) ++ return; ++ ++ strncpy(last_hist_cmd, str, MAX_FILTER_STR_VAL - 1); ++} ++ ++static void hist_err(char *str, char *var) ++{ ++ int maxlen = MAX_FILTER_STR_VAL - 1; ++ ++ if (!str) ++ return; ++ ++ if (strlen(hist_err_str)) ++ return; ++ ++ if (!var) ++ var = ""; ++ ++ if (strlen(hist_err_str) + strlen(str) + strlen(var) > maxlen) ++ return; ++ ++ strcat(hist_err_str, str); ++ strcat(hist_err_str, var); ++} ++ ++static void hist_err_event(char *str, char *system, char *event, char *var) ++{ ++ char err[MAX_FILTER_STR_VAL]; ++ ++ if (system && var) ++ snprintf(err, MAX_FILTER_STR_VAL, "%s.%s.%s", system, event, var); ++ else if (system) ++ snprintf(err, MAX_FILTER_STR_VAL, "%s.%s", system, event); ++ else ++ strncpy(err, var, MAX_FILTER_STR_VAL); ++ ++ hist_err(str, err); ++} ++ ++static void hist_err_clear(void) ++{ ++ hist_err_str[0] = '\0'; ++} ++ ++static bool have_hist_err(void) ++{ ++ if (strlen(hist_err_str)) ++ return true; ++ ++ return false; ++} ++ + static LIST_HEAD(synth_event_list); + static DEFINE_MUTEX(synth_event_mutex); + +@@ -1448,8 +1507,10 @@ static struct trace_event_file *find_var + continue; + + if (find_var_field(var_hist_data, var_name)) { +- if (found) ++ if (found) { ++ hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name); + return NULL; ++ } + + found = file; + } +@@ -1498,6 +1559,7 @@ find_match_var(struct hist_trigger_data + hist_field = find_file_var(file, var_name); + if (hist_field) { + if (found) { ++ hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name); + return ERR_PTR(-EINVAL); + } + +@@ -1781,6 +1843,7 @@ static int parse_assignment(char *str, s + char *assignment; + + if (attrs->n_assignments == TRACING_MAP_VARS_MAX) { ++ hist_err("Too many variables defined: ", str); + ret = -EINVAL; + goto out; + } +@@ -2335,6 +2398,10 @@ static struct hist_field *parse_var_ref( + if (var_field) + ref_field = create_var_ref(var_field, system, event_name); + ++ if (!ref_field) ++ hist_err_event("Couldn't find variable: $", ++ system, event_name, var_name); ++ + return ref_field; + } + +@@ -2494,6 +2561,7 @@ static struct hist_field *parse_unary(st + // we support only -(xxx) i.e. explicit parens required + + if (level > 3) { ++ hist_err("Too many subexpressions (3 max): ", str); + ret = -EINVAL; + goto free; + } +@@ -2575,8 +2643,10 @@ static int check_expr_operands(struct hi + } + + if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) != +- (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS)) ++ (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS)) { ++ hist_err("Timestamp units in expression don't match", NULL); + return -EINVAL; ++ } + + return 0; + } +@@ -2591,8 +2661,10 @@ static struct hist_field *parse_expr(str + int field_op, ret = -EINVAL; + char *sep, *operand1_str; + +- if (level > 3) ++ if (level > 3) { ++ hist_err("Too many subexpressions (3 max): ", str); + return ERR_PTR(-EINVAL); ++ } + + field_op = contains_operator(str); + +@@ -2826,12 +2898,17 @@ create_field_var_hist(struct hist_trigge + char *cmd; + int ret; + +- if (target_hist_data->n_field_var_hists >= SYNTH_FIELDS_MAX) ++ if (target_hist_data->n_field_var_hists >= SYNTH_FIELDS_MAX) { ++ hist_err_event("onmatch: Too many field variables defined: ", ++ subsys_name, event_name, field_name); + return ERR_PTR(-EINVAL); ++ } + + file = event_file(tr, subsys_name, event_name); + + if (IS_ERR(file)) { ++ hist_err_event("onmatch: Event file not found: ", ++ subsys_name, event_name, field_name); + ret = PTR_ERR(file); + return ERR_PTR(ret); + } +@@ -2843,8 +2920,11 @@ create_field_var_hist(struct hist_trigge + * yet a registered histogram so we can't use that. + */ + hist_data = find_compatible_hist(target_hist_data, file); +- if (!hist_data) ++ if (!hist_data) { ++ hist_err_event("onmatch: Matching event histogram not found: ", ++ subsys_name, event_name, field_name); + return ERR_PTR(-EINVAL); ++ } + + /* See if a synthetic field variable has already been created */ + event_var = find_synthetic_field_var(target_hist_data, subsys_name, +@@ -2903,6 +2983,8 @@ create_field_var_hist(struct hist_trigge + kfree(cmd); + kfree(var_hist->cmd); + kfree(var_hist); ++ hist_err_event("onmatch: Couldn't create histogram for field: ", ++ subsys_name, event_name, field_name); + return ERR_PTR(ret); + } + +@@ -2914,6 +2996,8 @@ create_field_var_hist(struct hist_trigge + if (IS_ERR_OR_NULL(event_var)) { + kfree(var_hist->cmd); + kfree(var_hist); ++ hist_err_event("onmatch: Couldn't find synthetic variable: ", ++ subsys_name, event_name, field_name); + return ERR_PTR(-EINVAL); + } + +@@ -3050,18 +3134,21 @@ static struct field_var *create_field_va + int ret = 0; + + if (hist_data->n_field_vars >= SYNTH_FIELDS_MAX) { ++ hist_err("Too many field variables defined: ", field_name); + ret = -EINVAL; + goto err; + } + + val = parse_atom(hist_data, file, field_name, &flags, NULL); + if (IS_ERR(val)) { ++ hist_err("Couldn't parse field variable: ", field_name); + ret = PTR_ERR(val); + goto err; + } + + var = create_var(hist_data, file, field_name, val->size, val->type); + if (IS_ERR(var)) { ++ hist_err("Couldn't create or find variable: ", field_name); + kfree(val); + ret = PTR_ERR(var); + goto err; +@@ -3204,13 +3291,17 @@ static int onmax_create(struct hist_trig + int ret = 0; + + onmax_var_str = data->onmax.var_str; +- if (onmax_var_str[0] != '$') ++ if (onmax_var_str[0] != '$') { ++ hist_err("onmax: For onmax(x), x must be a variable: ", onmax_var_str); + return -EINVAL; ++ } + onmax_var_str++; + + var_field = find_target_event_var(hist_data, NULL, NULL, onmax_var_str); +- if (!var_field) ++ if (!var_field) { ++ hist_err("onmax: Couldn't find onmax variable: ", onmax_var_str); + return -EINVAL; ++ } + + flags = HIST_FIELD_FL_VAR_REF; + ref_field = create_hist_field(hist_data, NULL, flags, NULL); +@@ -3230,6 +3321,7 @@ static int onmax_create(struct hist_trig + data->onmax.max_var_ref_idx = var_ref_idx; + max_var = create_var(hist_data, file, "max", sizeof(u64), "u64"); + if (IS_ERR(max_var)) { ++ hist_err("onmax: Couldn't create onmax variable: ", "max"); + ret = PTR_ERR(max_var); + goto out; + } +@@ -3244,6 +3336,7 @@ static int onmax_create(struct hist_trig + + field_var = create_target_field_var(hist_data, NULL, NULL, param); + if (IS_ERR(field_var)) { ++ hist_err("onmax: Couldn't create field variable: ", param); + ret = PTR_ERR(field_var); + kfree(param); + goto out; +@@ -3276,6 +3369,7 @@ static int parse_action_params(char *par + + param = strstrip(param); + if (strlen(param) < 2) { ++ hist_err("Invalid action param: ", param); + ret = -EINVAL; + goto out; + } +@@ -3451,6 +3545,9 @@ onmatch_find_var(struct hist_trigger_dat + hist_field = find_event_var(hist_data, system, event, var); + } + ++ if (!hist_field) ++ hist_err_event("onmatch: Couldn't find onmatch param: $", system, event, var); ++ + return hist_field; + } + +@@ -3518,6 +3615,7 @@ static int onmatch_create(struct hist_tr + mutex_lock(&synth_event_mutex); + event = find_synth_event(data->onmatch.synth_event_name); + if (!event) { ++ hist_err("onmatch: Couldn't find synthetic event: ", data->onmatch.synth_event_name); + mutex_unlock(&synth_event_mutex); + return -EINVAL; + } +@@ -3577,12 +3675,15 @@ static int onmatch_create(struct hist_tr + continue; + } + ++ hist_err_event("onmatch: Param type doesn't match synthetic event field type: ", ++ system, event_name, param); + kfree(p); + ret = -EINVAL; + goto err; + } + + if (field_pos != event->n_fields) { ++ hist_err("onmatch: Param count doesn't match synthetic event field count: ", event->name); + ret = -EINVAL; + goto err; + } +@@ -3612,15 +3713,22 @@ static struct action_data *onmatch_parse + return ERR_PTR(-ENOMEM); + + match_event = strsep(&str, ")"); +- if (!match_event || !str) ++ if (!match_event || !str) { ++ hist_err("onmatch: Missing closing paren: ", match_event); + goto free; ++ } + + match_event_system = strsep(&match_event, "."); +- if (!match_event) ++ if (!match_event) { ++ hist_err("onmatch: Missing subsystem for match event: ", match_event_system); + goto free; ++ } + +- if (IS_ERR(event_file(tr, match_event_system, match_event))) ++ if (IS_ERR(event_file(tr, match_event_system, match_event))) { ++ hist_err_event("onmatch: Invalid subsystem or event name: ", ++ match_event_system, match_event, NULL); + goto free; ++ } + + data->onmatch.match_event = kstrdup(match_event, GFP_KERNEL); + if (!data->onmatch.match_event) { +@@ -3635,12 +3743,16 @@ static struct action_data *onmatch_parse + } + + strsep(&str, "."); +- if (!str) ++ if (!str) { ++ hist_err("onmatch: Missing . after onmatch(): ", str); + goto free; ++ } + + synth_event_name = strsep(&str, "("); +- if (!synth_event_name || !str) ++ if (!synth_event_name || !str) { ++ hist_err("onmatch: Missing opening paramlist paren: ", synth_event_name); + goto free; ++ } + + data->onmatch.synth_event_name = kstrdup(synth_event_name, GFP_KERNEL); + if (!data->onmatch.synth_event_name) { +@@ -3649,8 +3761,10 @@ static struct action_data *onmatch_parse + } + + params = strsep(&str, ")"); +- if (!params || !str || (str && strlen(str))) ++ if (!params || !str || (str && strlen(str))) { ++ hist_err("onmatch: Missing closing paramlist paren: ", params); + goto free; ++ } + + ret = parse_action_params(params, data); + if (ret) +@@ -3725,7 +3839,9 @@ static int create_var_field(struct hist_ + + if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX)) + return -EINVAL; ++ + if (find_var(hist_data, file, var_name) && !hist_data->remove) { ++ hist_err("Variable already defined: ", var_name); + return -EINVAL; + } + +@@ -3806,6 +3922,7 @@ static int create_key_field(struct hist_ + } + + if (hist_field->flags & HIST_FIELD_FL_VAR_REF) { ++ hist_err("Using variable references as keys not supported: ", field_str); + destroy_hist_field(hist_field, 0); + ret = -EINVAL; + goto out; +@@ -3919,11 +4036,13 @@ static int parse_var_defs(struct hist_tr + + var_name = strsep(&field_str, "="); + if (!var_name || !field_str) { ++ hist_err("Malformed assignment: ", var_name); + ret = -EINVAL; + goto free; + } + + if (n_vars == TRACING_MAP_VARS_MAX) { ++ hist_err("Too many variables defined: ", var_name); + ret = -EINVAL; + goto free; + } +@@ -4675,6 +4794,11 @@ static int hist_show(struct seq_file *m, + hist_trigger_show(m, data, n++); + } + ++ if (have_hist_err()) { ++ seq_printf(m, "\nERROR: %s\n", hist_err_str); ++ seq_printf(m, " Last command: %s\n", last_hist_cmd); ++ } ++ + out_unlock: + mutex_unlock(&event_mutex); + +@@ -5039,6 +5163,7 @@ static int hist_register_trigger(char *g + if (named_data) { + if (!hist_trigger_match(data, named_data, named_data, + true)) { ++ hist_err("Named hist trigger doesn't match existing named trigger (includes variables): ", hist_data->attrs->name); + ret = -EINVAL; + goto out; + } +@@ -5058,13 +5183,16 @@ static int hist_register_trigger(char *g + test->paused = false; + else if (hist_data->attrs->clear) + hist_clear(test); +- else ++ else { ++ hist_err("Hist trigger already exists", NULL); + ret = -EEXIST; ++ } + goto out; + } + } + new: + if (hist_data->attrs->cont || hist_data->attrs->clear) { ++ hist_err("Can't clear or continue a nonexistent hist trigger", NULL); + ret = -ENOENT; + goto out; + } +@@ -5251,6 +5379,11 @@ static int event_hist_trigger_func(struc + char *trigger, *p; + int ret = 0; + ++ if (glob && strlen(glob)) { ++ last_cmd_set(param); ++ hist_err_clear(); ++ } ++ + if (!param) + return -EINVAL; + +@@ -5389,6 +5522,9 @@ static int event_hist_trigger_func(struc + /* Just return zero, not the number of registered triggers */ + ret = 0; + out: ++ if (ret == 0) ++ hist_err_clear(); ++ + return ret; + out_unreg: + cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); diff --git a/debian/patches/features/all/rt/0040-tracing-Add-inter-event-hist-trigger-Documentation.patch b/debian/patches/features/all/rt/0040-tracing-Add-inter-event-hist-trigger-Documentation.patch new file mode 100644 index 000000000..094a705ef --- /dev/null +++ b/debian/patches/features/all/rt/0040-tracing-Add-inter-event-hist-trigger-Documentation.patch @@ -0,0 +1,406 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:52:06 -0600 +Subject: [PATCH 40/48] tracing: Add inter-event hist trigger Documentation +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Add background and details on inter-event hist triggers, including +hist variables, synthetic events, and actions. + +Link: http://lkml.kernel.org/r/b0414efb66535aa52aa7411f58c3d56724027fce.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Baohong Liu +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 7d5f30af5e39e572f6984c1083fe79fd7dc34d04) +Signed-off-by: Sebastian Andrzej Siewior +--- + Documentation/trace/histogram.txt | 381 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 381 insertions(+) + +--- a/Documentation/trace/histogram.txt ++++ b/Documentation/trace/histogram.txt +@@ -1603,3 +1603,384 @@ + Hits: 489 + Entries: 7 + Dropped: 0 ++ ++ ++2.2 Inter-event hist triggers ++----------------------------- ++ ++Inter-event hist triggers are hist triggers that combine values from ++one or more other events and create a histogram using that data. Data ++from an inter-event histogram can in turn become the source for ++further combined histograms, thus providing a chain of related ++histograms, which is important for some applications. ++ ++The most important example of an inter-event quantity that can be used ++in this manner is latency, which is simply a difference in timestamps ++between two events. Although latency is the most important ++inter-event quantity, note that because the support is completely ++general across the trace event subsystem, any event field can be used ++in an inter-event quantity. ++ ++An example of a histogram that combines data from other histograms ++into a useful chain would be a 'wakeupswitch latency' histogram that ++combines a 'wakeup latency' histogram and a 'switch latency' ++histogram. ++ ++Normally, a hist trigger specification consists of a (possibly ++compound) key along with one or more numeric values, which are ++continually updated sums associated with that key. A histogram ++specification in this case consists of individual key and value ++specifications that refer to trace event fields associated with a ++single event type. ++ ++The inter-event hist trigger extension allows fields from multiple ++events to be referenced and combined into a multi-event histogram ++specification. In support of this overall goal, a few enabling ++features have been added to the hist trigger support: ++ ++ - In order to compute an inter-event quantity, a value from one ++ event needs to saved and then referenced from another event. This ++ requires the introduction of support for histogram 'variables'. ++ ++ - The computation of inter-event quantities and their combination ++ require some minimal amount of support for applying simple ++ expressions to variables (+ and -). ++ ++ - A histogram consisting of inter-event quantities isn't logically a ++ histogram on either event (so having the 'hist' file for either ++ event host the histogram output doesn't really make sense). To ++ address the idea that the histogram is associated with a ++ combination of events, support is added allowing the creation of ++ 'synthetic' events that are events derived from other events. ++ These synthetic events are full-fledged events just like any other ++ and can be used as such, as for instance to create the ++ 'combination' histograms mentioned previously. ++ ++ - A set of 'actions' can be associated with histogram entries - ++ these can be used to generate the previously mentioned synthetic ++ events, but can also be used for other purposes, such as for ++ example saving context when a 'max' latency has been hit. ++ ++ - Trace events don't have a 'timestamp' associated with them, but ++ there is an implicit timestamp saved along with an event in the ++ underlying ftrace ring buffer. This timestamp is now exposed as a ++ a synthetic field named 'common_timestamp' which can be used in ++ histograms as if it were any other event field; it isn't an actual ++ field in the trace format but rather is a synthesized value that ++ nonetheless can be used as if it were an actual field. By default ++ it is in units of nanoseconds; appending '.usecs' to a ++ common_timestamp field changes the units to microseconds. ++ ++These features are decribed in more detail in the following sections. ++ ++2.2.1 Histogram Variables ++------------------------- ++ ++Variables are simply named locations used for saving and retrieving ++values between matching events. A 'matching' event is defined as an ++event that has a matching key - if a variable is saved for a histogram ++entry corresponding to that key, any subsequent event with a matching ++key can access that variable. ++ ++A variable's value is normally available to any subsequent event until ++it is set to something else by a subsequent event. The one exception ++to that rule is that any variable used in an expression is essentially ++'read-once' - once it's used by an expression in a subsequent event, ++it's reset to its 'unset' state, which means it can't be used again ++unless it's set again. This ensures not only that an event doesn't ++use an uninitialized variable in a calculation, but that that variable ++is used only once and not for any unrelated subsequent match. ++ ++The basic syntax for saving a variable is to simply prefix a unique ++variable name not corresponding to any keyword along with an '=' sign ++to any event field. ++ ++Either keys or values can be saved and retrieved in this way. This ++creates a variable named 'ts0' for a histogram entry with the key ++'next_pid': ++ ++ # echo 'hist:keys=next_pid:vals=$ts0:ts0=common_timestamp ... >> \ ++ event/trigger ++ ++The ts0 variable can be accessed by any subsequent event having the ++same pid as 'next_pid'. ++ ++Variable references are formed by prepending the variable name with ++the '$' sign. Thus for example, the ts0 variable above would be ++referenced as '$ts0' in expressions. ++ ++Because 'vals=' is used, the common_timestamp variable value above ++will also be summed as a normal histogram value would (though for a ++timestamp it makes little sense). ++ ++The below shows that a key value can also be saved in the same way: ++ ++ # echo 'hist:timer_pid=common_pid:key=timer_pid ...' >> event/trigger ++ ++If a variable isn't a key variable or prefixed with 'vals=', the ++associated event field will be saved in a variable but won't be summed ++as a value: ++ ++ # echo 'hist:keys=next_pid:ts1=common_timestamp ... >> event/trigger ++ ++Multiple variables can be assigned at the same time. The below would ++result in both ts0 and b being created as variables, with both ++common_timestamp and field1 additionally being summed as values: ++ ++ # echo 'hist:keys=pid:vals=$ts0,$b:ts0=common_timestamp,b=field1 ... >> \ ++ event/trigger ++ ++Note that variable assignments can appear either preceding or ++following their use. The command below behaves identically to the ++command above: ++ ++ # echo 'hist:keys=pid:ts0=common_timestamp,b=field1:vals=$ts0,$b ... >> \ ++ event/trigger ++ ++Any number of variables not bound to a 'vals=' prefix can also be ++assigned by simply separating them with colons. Below is the same ++thing but without the values being summed in the histogram: ++ ++ # echo 'hist:keys=pid:ts0=common_timestamp:b=field1 ... >> event/trigger ++ ++Variables set as above can be referenced and used in expressions on ++another event. ++ ++For example, here's how a latency can be calculated: ++ ++ # echo 'hist:keys=pid,prio:ts0=common_timestamp ... >> event1/trigger ++ # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp-$ts0 ... >> event2/trigger ++ ++In the first line above, the event's timetamp is saved into the ++variable ts0. In the next line, ts0 is subtracted from the second ++event's timestamp to produce the latency, which is then assigned into ++yet another variable, 'wakeup_lat'. The hist trigger below in turn ++makes use of the wakeup_lat variable to compute a combined latency ++using the same key and variable from yet another event: ++ ++ # echo 'hist:key=pid:wakeupswitch_lat=$wakeup_lat+$switchtime_lat ... >> event3/trigger ++ ++2.2.2 Synthetic Events ++---------------------- ++ ++Synthetic events are user-defined events generated from hist trigger ++variables or fields associated with one or more other events. Their ++purpose is to provide a mechanism for displaying data spanning ++multiple events consistent with the existing and already familiar ++usage for normal events. ++ ++To define a synthetic event, the user writes a simple specification ++consisting of the name of the new event along with one or more ++variables and their types, which can be any valid field type, ++separated by semicolons, to the tracing/synthetic_events file. ++ ++For instance, the following creates a new event named 'wakeup_latency' ++with 3 fields: lat, pid, and prio. Each of those fields is simply a ++variable reference to a variable on another event: ++ ++ # echo 'wakeup_latency \ ++ u64 lat; \ ++ pid_t pid; \ ++ int prio' >> \ ++ /sys/kernel/debug/tracing/synthetic_events ++ ++Reading the tracing/synthetic_events file lists all the currently ++defined synthetic events, in this case the event defined above: ++ ++ # cat /sys/kernel/debug/tracing/synthetic_events ++ wakeup_latency u64 lat; pid_t pid; int prio ++ ++An existing synthetic event definition can be removed by prepending ++the command that defined it with a '!': ++ ++ # echo '!wakeup_latency u64 lat pid_t pid int prio' >> \ ++ /sys/kernel/debug/tracing/synthetic_events ++ ++At this point, there isn't yet an actual 'wakeup_latency' event ++instantiated in the event subsytem - for this to happen, a 'hist ++trigger action' needs to be instantiated and bound to actual fields ++and variables defined on other events (see Section 6.3.3 below). ++ ++Once that is done, an event instance is created, and a histogram can ++be defined using it: ++ ++ # echo 'hist:keys=pid,prio,lat.log2:sort=pid,lat' >> \ ++ /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger ++ ++The new event is created under the tracing/events/synthetic/ directory ++and looks and behaves just like any other event: ++ ++ # ls /sys/kernel/debug/tracing/events/synthetic/wakeup_latency ++ enable filter format hist id trigger ++ ++Like any other event, once a histogram is enabled for the event, the ++output can be displayed by reading the event's 'hist' file. ++ ++2.2.3 Hist trigger 'actions' ++---------------------------- ++ ++A hist trigger 'action' is a function that's executed whenever a ++histogram entry is added or updated. ++ ++The default 'action' if no special function is explicity specified is ++as it always has been, to simply update the set of values associated ++with an entry. Some applications, however, may want to perform ++additional actions at that point, such as generate another event, or ++compare and save a maximum. ++ ++The following additional actions are available. To specify an action ++for a given event, simply specify the action between colons in the ++hist trigger specification. ++ ++ - onmatch(matching.event).(param list) ++ ++ The 'onmatch(matching.event).(params)' hist ++ trigger action is invoked whenever an event matches and the ++ histogram entry would be added or updated. It causes the named ++ synthetic event to be generated with the values given in the ++ 'param list'. The result is the generation of a synthetic event ++ that consists of the values contained in those variables at the ++ time the invoking event was hit. ++ ++ The 'param list' consists of one or more parameters which may be ++ either variables or fields defined on either the 'matching.event' ++ or the target event. The variables or fields specified in the ++ param list may be either fully-qualified or unqualified. If a ++ variable is specified as unqualified, it must be unique between ++ the two events. A field name used as a param can be unqualified ++ if it refers to the target event, but must be fully qualified if ++ it refers to the matching event. A fully-qualified name is of the ++ form 'system.event_name.$var_name' or 'system.event_name.field'. ++ ++ The 'matching.event' specification is simply the fully qualified ++ event name of the event that matches the target event for the ++ onmatch() functionality, in the form 'system.event_name'. ++ ++ Finally, the number and type of variables/fields in the 'param ++ list' must match the number and types of the fields in the ++ synthetic event being generated. ++ ++ As an example the below defines a simple synthetic event and uses ++ a variable defined on the sched_wakeup_new event as a parameter ++ when invoking the synthetic event. Here we define the synthetic ++ event: ++ ++ # echo 'wakeup_new_test pid_t pid' >> \ ++ /sys/kernel/debug/tracing/synthetic_events ++ ++ # cat /sys/kernel/debug/tracing/synthetic_events ++ wakeup_new_test pid_t pid ++ ++ The following hist trigger both defines the missing testpid ++ variable and specifies an onmatch() action that generates a ++ wakeup_new_test synthetic event whenever a sched_wakeup_new event ++ occurs, which because of the 'if comm == "cyclictest"' filter only ++ happens when the executable is cyclictest: ++ ++ # echo 'hist:keys=$testpid:testpid=pid:onmatch(sched.sched_wakeup_new).\ ++ wakeup_new_test($testpid) if comm=="cyclictest"' >> \ ++ /sys/kernel/debug/tracing/events/sched/sched_wakeup_new/trigger ++ ++ Creating and displaying a histogram based on those events is now ++ just a matter of using the fields and new synthetic event in the ++ tracing/events/synthetic directory, as usual: ++ ++ # echo 'hist:keys=pid:sort=pid' >> \ ++ /sys/kernel/debug/tracing/events/synthetic/wakeup_new_test/trigger ++ ++ Running 'cyclictest' should cause wakeup_new events to generate ++ wakeup_new_test synthetic events which should result in histogram ++ output in the wakeup_new_test event's hist file: ++ ++ # cat /sys/kernel/debug/tracing/events/synthetic/wakeup_new_test/hist ++ ++ A more typical usage would be to use two events to calculate a ++ latency. The following example uses a set of hist triggers to ++ produce a 'wakeup_latency' histogram: ++ ++ First, we define a 'wakeup_latency' synthetic event: ++ ++ # echo 'wakeup_latency u64 lat; pid_t pid; int prio' >> \ ++ /sys/kernel/debug/tracing/synthetic_events ++ ++ Next, we specify that whenever we see a sched_waking event for a ++ cyclictest thread, save the timestamp in a 'ts0' variable: ++ ++ # echo 'hist:keys=$saved_pid:saved_pid=pid:ts0=common_timestamp.usecs \ ++ if comm=="cyclictest"' >> \ ++ /sys/kernel/debug/tracing/events/sched/sched_waking/trigger ++ ++ Then, when the corresponding thread is actually scheduled onto the ++ CPU by a sched_switch event, calculate the latency and use that ++ along with another variable and an event field to generate a ++ wakeup_latency synthetic event: ++ ++ # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:\ ++ onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,\ ++ $saved_pid,next_prio) if next_comm=="cyclictest"' >> \ ++ /sys/kernel/debug/tracing/events/sched/sched_switch/trigger ++ ++ We also need to create a histogram on the wakeup_latency synthetic ++ event in order to aggregate the generated synthetic event data: ++ ++ # echo 'hist:keys=pid,prio,lat:sort=pid,lat' >> \ ++ /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger ++ ++ Finally, once we've run cyclictest to actually generate some ++ events, we can see the output by looking at the wakeup_latency ++ synthetic event's hist file: ++ ++ # cat /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/hist ++ ++ - onmax(var).save(field,.. .) ++ ++ The 'onmax(var).save(field,...)' hist trigger action is invoked ++ whenever the value of 'var' associated with a histogram entry ++ exceeds the current maximum contained in that variable. ++ ++ The end result is that the trace event fields specified as the ++ onmax.save() params will be saved if 'var' exceeds the current ++ maximum for that hist trigger entry. This allows context from the ++ event that exhibited the new maximum to be saved for later ++ reference. When the histogram is displayed, additional fields ++ displaying the saved values will be printed. ++ ++ As an example the below defines a couple of hist triggers, one for ++ sched_waking and another for sched_switch, keyed on pid. Whenever ++ a sched_waking occurs, the timestamp is saved in the entry ++ corresponding to the current pid, and when the scheduler switches ++ back to that pid, the timestamp difference is calculated. If the ++ resulting latency, stored in wakeup_lat, exceeds the current ++ maximum latency, the values specified in the save() fields are ++ recoreded: ++ ++ # echo 'hist:keys=pid:ts0=common_timestamp.usecs \ ++ if comm=="cyclictest"' >> \ ++ /sys/kernel/debug/tracing/events/sched/sched_waking/trigger ++ ++ # echo 'hist:keys=next_pid:\ ++ wakeup_lat=common_timestamp.usecs-$ts0:\ ++ onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) \ ++ if next_comm=="cyclictest"' >> \ ++ /sys/kernel/debug/tracing/events/sched/sched_switch/trigger ++ ++ When the histogram is displayed, the max value and the saved ++ values corresponding to the max are displayed following the rest ++ of the fields: ++ ++ # cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist ++ { next_pid: 2255 } hitcount: 239 ++ common_timestamp-ts0: 0 ++ max: 27 ++ next_comm: cyclictest ++ prev_pid: 0 prev_prio: 120 prev_comm: swapper/1 ++ ++ { next_pid: 2256 } hitcount: 2355 ++ common_timestamp-ts0: 0 ++ max: 49 next_comm: cyclictest ++ prev_pid: 0 prev_prio: 120 prev_comm: swapper/0 ++ ++ Totals: ++ Hits: 12970 ++ Entries: 2 ++ Dropped: 0 diff --git a/debian/patches/features/all/rt/0041-tracing-Make-tracing_set_clock-non-static.patch b/debian/patches/features/all/rt/0041-tracing-Make-tracing_set_clock-non-static.patch new file mode 100644 index 000000000..75b806992 --- /dev/null +++ b/debian/patches/features/all/rt/0041-tracing-Make-tracing_set_clock-non-static.patch @@ -0,0 +1,44 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:52:07 -0600 +Subject: [PATCH 41/48] tracing: Make tracing_set_clock() non-static +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Allow tracing code outside of trace.c to access tracing_set_clock(). + +Some applications may require a particular clock in order to function +properly, such as latency calculations. + +Also, add an accessor returning the current clock string. + +Link: http://lkml.kernel.org/r/6d1c53e9ee2163f54e1849f5376573f54f0e6009.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit f8913a56885a33eda24452c1839102c305bf7df5) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace.c | 2 +- + kernel/trace/trace.h | 1 + + 2 files changed, 2 insertions(+), 1 deletion(-) + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -6214,7 +6214,7 @@ static int tracing_clock_show(struct seq + return 0; + } + +-static int tracing_set_clock(struct trace_array *tr, const char *clockstr) ++int tracing_set_clock(struct trace_array *tr, const char *clockstr) + { + int i; + +--- a/kernel/trace/trace.h ++++ b/kernel/trace/trace.h +@@ -289,6 +289,7 @@ extern int trace_array_get(struct trace_ + extern void trace_array_put(struct trace_array *tr); + + extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs); ++extern int tracing_set_clock(struct trace_array *tr, const char *clockstr); + + extern bool trace_clock_in_ns(struct trace_array *tr); + diff --git a/debian/patches/features/all/rt/0042-tracing-Add-a-clock-attribute-for-hist-triggers.patch b/debian/patches/features/all/rt/0042-tracing-Add-a-clock-attribute-for-hist-triggers.patch new file mode 100644 index 000000000..ce173fce3 --- /dev/null +++ b/debian/patches/features/all/rt/0042-tracing-Add-a-clock-attribute-for-hist-triggers.patch @@ -0,0 +1,138 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:52:08 -0600 +Subject: [PATCH 42/48] tracing: Add a clock attribute for hist triggers +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +The default clock if timestamps are used in a histogram is "global". +If timestamps aren't used, the clock is irrelevant. + +Use the "clock=" param only if you want to override the default +"global" clock for a histogram with timestamps. + +Link: http://lkml.kernel.org/r/427bed1389c5d22aa40c3e0683e30cc3d151e260.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Rajvi Jingar +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 77e7689e0b182465cfcd7c328061b70eecdcde31) +Signed-off-by: Sebastian Andrzej Siewior +--- + Documentation/trace/histogram.txt | 11 +++++++++ + kernel/trace/trace_events_hist.c | 42 +++++++++++++++++++++++++++++++++++--- + 2 files changed, 49 insertions(+), 4 deletions(-) + +--- a/Documentation/trace/histogram.txt ++++ b/Documentation/trace/histogram.txt +@@ -1671,7 +1671,16 @@ specification. In support of this overa + it is in units of nanoseconds; appending '.usecs' to a + common_timestamp field changes the units to microseconds. + +-These features are decribed in more detail in the following sections. ++A note on inter-event timestamps: If common_timestamp is used in a ++histogram, the trace buffer is automatically switched over to using ++absolute timestamps and the "global" trace clock, in order to avoid ++bogus timestamp differences with other clocks that aren't coherent ++across CPUs. This can be overridden by specifying one of the other ++trace clocks instead, using the "clock=XXX" hist trigger attribute, ++where XXX is any of the clocks listed in the tracing/trace_clock ++pseudo-file. ++ ++These features are described in more detail in the following sections. + + 2.2.1 Histogram Variables + ------------------------- +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -242,6 +242,7 @@ struct hist_trigger_attrs { + char *vals_str; + char *sort_key_str; + char *name; ++ char *clock; + bool pause; + bool cont; + bool clear; +@@ -1776,6 +1777,7 @@ static void destroy_hist_trigger_attrs(s + kfree(attrs->sort_key_str); + kfree(attrs->keys_str); + kfree(attrs->vals_str); ++ kfree(attrs->clock); + kfree(attrs); + } + +@@ -1831,6 +1833,19 @@ static int parse_assignment(char *str, s + ret = -ENOMEM; + goto out; + } ++ } else if (strncmp(str, "clock=", strlen("clock=")) == 0) { ++ strsep(&str, "="); ++ if (!str) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ str = strstrip(str); ++ attrs->clock = kstrdup(str, GFP_KERNEL); ++ if (!attrs->clock) { ++ ret = -ENOMEM; ++ goto out; ++ } + } else if (strncmp(str, "size=", strlen("size=")) == 0) { + int map_bits = parse_map_size(str); + +@@ -1895,6 +1910,14 @@ static struct hist_trigger_attrs *parse_ + goto free; + } + ++ if (!attrs->clock) { ++ attrs->clock = kstrdup("global", GFP_KERNEL); ++ if (!attrs->clock) { ++ ret = -ENOMEM; ++ goto free; ++ } ++ } ++ + return attrs; + free: + destroy_hist_trigger_attrs(attrs); +@@ -4934,6 +4957,8 @@ static int event_hist_trigger_print(stru + seq_puts(m, ".descending"); + } + seq_printf(m, ":size=%u", (1 << hist_data->map->map_bits)); ++ if (hist_data->enable_timestamps) ++ seq_printf(m, ":clock=%s", hist_data->attrs->clock); + + print_actions_spec(m, hist_data); + +@@ -5201,7 +5226,6 @@ static int hist_register_trigger(char *g + data->paused = true; + + if (named_data) { +- destroy_hist_data(data->private_data); + data->private_data = named_data->private_data; + set_named_trigger_data(data, named_data); + data->ops = &event_hist_trigger_named_ops; +@@ -5213,10 +5237,22 @@ static int hist_register_trigger(char *g + goto out; + } + +- ret++; ++ if (hist_data->enable_timestamps) { ++ char *clock = hist_data->attrs->clock; ++ ++ ret = tracing_set_clock(file->tr, hist_data->attrs->clock); ++ if (ret) { ++ hist_err("Couldn't set trace_clock: ", clock); ++ goto out; ++ } + +- if (hist_data->enable_timestamps) + tracing_set_time_stamp_abs(file->tr, true); ++ } ++ ++ if (named_data) ++ destroy_hist_data(hist_data); ++ ++ ret++; + out: + return ret; + } diff --git a/debian/patches/features/all/rt/0045-ring-buffer-Add-nesting-for-adding-events-within-eve.patch b/debian/patches/features/all/rt/0045-ring-buffer-Add-nesting-for-adding-events-within-eve.patch new file mode 100644 index 000000000..623843d64 --- /dev/null +++ b/debian/patches/features/all/rt/0045-ring-buffer-Add-nesting-for-adding-events-within-eve.patch @@ -0,0 +1,120 @@ +From: "Steven Rostedt (VMware)" +Date: Wed, 7 Feb 2018 17:26:32 -0500 +Subject: [PATCH 45/48] ring-buffer: Add nesting for adding events within + events +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +The ring-buffer code has recusion protection in case tracing ends up tracing +itself, the ring-buffer will detect that it was called at the same context +(normal, softirq, interrupt or NMI), and not continue to record the event. + +With the histogram synthetic events, they are called while tracing another +event at the same context. The recusion protection triggers because it +detects tracing at the same context and stops it. + +Add ring_buffer_nest_start() and ring_buffer_nest_end() that will notify the +ring buffer that a trace is about to happen within another trace and that it +is intended, and not to trigger the recursion blocking. + +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit f932ff1d98c482716b4b71a5d76b2aa3d65f66f0) +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/ring_buffer.h | 3 ++ + kernel/trace/ring_buffer.c | 57 +++++++++++++++++++++++++++++++++++++++++--- + 2 files changed, 57 insertions(+), 3 deletions(-) + +--- a/include/linux/ring_buffer.h ++++ b/include/linux/ring_buffer.h +@@ -117,6 +117,9 @@ int ring_buffer_unlock_commit(struct rin + int ring_buffer_write(struct ring_buffer *buffer, + unsigned long length, void *data); + ++void ring_buffer_nest_start(struct ring_buffer *buffer); ++void ring_buffer_nest_end(struct ring_buffer *buffer); ++ + struct ring_buffer_event * + ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, + unsigned long *lost_events); +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -477,6 +477,7 @@ struct ring_buffer_per_cpu { + struct buffer_page *reader_page; + unsigned long lost_events; + unsigned long last_overrun; ++ unsigned long nest; + local_t entries_bytes; + local_t entries; + local_t overrun; +@@ -2629,10 +2630,10 @@ trace_recursive_lock(struct ring_buffer_ + bit = pc & NMI_MASK ? RB_CTX_NMI : + pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ; + +- if (unlikely(val & (1 << bit))) ++ if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) + return 1; + +- val |= (1 << bit); ++ val |= (1 << (bit + cpu_buffer->nest)); + cpu_buffer->current_context = val; + + return 0; +@@ -2641,7 +2642,57 @@ trace_recursive_lock(struct ring_buffer_ + static __always_inline void + trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) + { +- cpu_buffer->current_context &= cpu_buffer->current_context - 1; ++ cpu_buffer->current_context &= ++ cpu_buffer->current_context - (1 << cpu_buffer->nest); ++} ++ ++/* The recursive locking above uses 4 bits */ ++#define NESTED_BITS 4 ++ ++/** ++ * ring_buffer_nest_start - Allow to trace while nested ++ * @buffer: The ring buffer to modify ++ * ++ * The ring buffer has a safty mechanism to prevent recursion. ++ * But there may be a case where a trace needs to be done while ++ * tracing something else. In this case, calling this function ++ * will allow this function to nest within a currently active ++ * ring_buffer_lock_reserve(). ++ * ++ * Call this function before calling another ring_buffer_lock_reserve() and ++ * call ring_buffer_nest_end() after the nested ring_buffer_unlock_commit(). ++ */ ++void ring_buffer_nest_start(struct ring_buffer *buffer) ++{ ++ struct ring_buffer_per_cpu *cpu_buffer; ++ int cpu; ++ ++ /* Enabled by ring_buffer_nest_end() */ ++ preempt_disable_notrace(); ++ cpu = raw_smp_processor_id(); ++ cpu_buffer = buffer->buffers[cpu]; ++ /* This is the shift value for the above recusive locking */ ++ cpu_buffer->nest += NESTED_BITS; ++} ++ ++/** ++ * ring_buffer_nest_end - Allow to trace while nested ++ * @buffer: The ring buffer to modify ++ * ++ * Must be called after ring_buffer_nest_start() and after the ++ * ring_buffer_unlock_commit(). ++ */ ++void ring_buffer_nest_end(struct ring_buffer *buffer) ++{ ++ struct ring_buffer_per_cpu *cpu_buffer; ++ int cpu; ++ ++ /* disabled by ring_buffer_nest_start() */ ++ cpu = raw_smp_processor_id(); ++ cpu_buffer = buffer->buffers[cpu]; ++ /* This is the shift value for the above recusive locking */ ++ cpu_buffer->nest -= NESTED_BITS; ++ preempt_enable_notrace(); + } + + /** diff --git a/debian/patches/features/all/rt/0046-tracing-Use-the-ring-buffer-nesting-to-allow-synthet.patch b/debian/patches/features/all/rt/0046-tracing-Use-the-ring-buffer-nesting-to-allow-synthet.patch new file mode 100644 index 000000000..a429eb6eb --- /dev/null +++ b/debian/patches/features/all/rt/0046-tracing-Use-the-ring-buffer-nesting-to-allow-synthet.patch @@ -0,0 +1,55 @@ +From: "Steven Rostedt (VMware)" +Date: Wed, 7 Feb 2018 17:29:46 -0500 +Subject: [PATCH 46/48] tracing: Use the ring-buffer nesting to allow synthetic + events to be traced +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Synthetic events can be done within the recording of other events. Notify +the ring buffer via ring_buffer_nest_start() and ring_buffer_nest_end() that +this is intended and not to block it due to its recursion protection. + +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 92c571543120ffed5e725f5b57b9de0b535e9d0a) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace_events_hist.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -640,6 +640,7 @@ static notrace void trace_event_raw_even + struct trace_event_file *trace_file = __data; + struct synth_trace_event *entry; + struct trace_event_buffer fbuffer; ++ struct ring_buffer *buffer; + struct synth_event *event; + unsigned int i, n_u64; + int fields_size = 0; +@@ -651,10 +652,17 @@ static notrace void trace_event_raw_even + + fields_size = event->n_u64 * sizeof(u64); + ++ /* ++ * Avoid ring buffer recursion detection, as this event ++ * is being performed within another event. ++ */ ++ buffer = trace_file->tr->trace_buffer.buffer; ++ ring_buffer_nest_start(buffer); ++ + entry = trace_event_buffer_reserve(&fbuffer, trace_file, + sizeof(*entry) + fields_size); + if (!entry) +- return; ++ goto out; + + for (i = 0, n_u64 = 0; i < event->n_fields; i++) { + if (event->fields[i]->is_string) { +@@ -670,6 +678,8 @@ static notrace void trace_event_raw_even + } + + trace_event_buffer_commit(&fbuffer); ++out: ++ ring_buffer_nest_end(buffer); + } + + static void free_synth_event_print_fmt(struct trace_event_call *call) diff --git a/debian/patches/features/all/rt/0047-tracing-Add-inter-event-blurb-to-HIST_TRIGGERS-confi.patch b/debian/patches/features/all/rt/0047-tracing-Add-inter-event-blurb-to-HIST_TRIGGERS-confi.patch new file mode 100644 index 000000000..76e1d9797 --- /dev/null +++ b/debian/patches/features/all/rt/0047-tracing-Add-inter-event-blurb-to-HIST_TRIGGERS-confi.patch @@ -0,0 +1,34 @@ +From: Tom Zanussi +Date: Mon, 15 Jan 2018 20:52:10 -0600 +Subject: [PATCH 47/48] tracing: Add inter-event blurb to HIST_TRIGGERS config + option +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +So that users know that inter-event tracing is supported as part of +the HIST_TRIGGERS option, include text to that effect in the help +text. + +Link: http://lkml.kernel.org/r/a38e24231d8d980be636b56d35814570acfd167a.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit 02942764c4fd12caeb29868822b7744fa91a9ad0) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/Kconfig | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/kernel/trace/Kconfig ++++ b/kernel/trace/Kconfig +@@ -606,7 +606,10 @@ config HIST_TRIGGERS + event activity as an initial guide for further investigation + using more advanced tools. + +- See Documentation/trace/events.txt. ++ Inter-event tracing of quantities such as latencies is also ++ supported using hist triggers under this option. ++ ++ See Documentation/trace/histogram.txt. + If in doubt, say N. + + config MMIOTRACE_TEST diff --git a/debian/patches/features/all/rt/0048-selftests-ftrace-Add-inter-event-hist-triggers-testc.patch b/debian/patches/features/all/rt/0048-selftests-ftrace-Add-inter-event-hist-triggers-testc.patch new file mode 100644 index 000000000..26f66720d --- /dev/null +++ b/debian/patches/features/all/rt/0048-selftests-ftrace-Add-inter-event-hist-triggers-testc.patch @@ -0,0 +1,443 @@ +From: Rajvi Jingar +Date: Mon, 15 Jan 2018 20:52:11 -0600 +Subject: [PATCH 48/48] selftests: ftrace: Add inter-event hist triggers + testcases +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + + This adds inter-event hist triggers testcases which covers following: + - create/remove synthetic event + - disable histogram for synthetic event + - extended error support + - field variable support + - histogram variables + - histogram trigger onmatch action + - histogram trigger onmax action + - histogram trigger onmatch-onmax action + - simple expression support + - combined histogram + + Here is the test result. + === Ftrace unit tests === + [1] event trigger - test extended error support [PASS] + [2] event trigger - test field variable support [PASS] + [3] event trigger - test inter-event combined histogram trigger [PASS] + [4] event trigger - test inter-event histogram trigger onmatch action [PASS] + [5] event trigger - test inter-event histogram trigger onmatch-onmax action [PASS] + [6] event trigger - test inter-event histogram trigger onmax action [PASS] + [7] event trigger - test synthetic event create remove [PASS] + +Link: http://lkml.kernel.org/r/e07ef1e72f7bf0f84dc87c9b736d6dc91b4b0b49.1516069914.git.tom.zanussi@linux.intel.com + +Signed-off-by: Rajvi Jingar +Signed-off-by: Tom Zanussi +Signed-off-by: Steven Rostedt (VMware) +(cherry picked from commit fb08b656dc9caee4a097bc4d8e050e2ead59bc24) +Signed-off-by: Sebastian Andrzej Siewior +--- + tools/testing/selftests/ftrace/test.d/functions | 7 + + tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc | 39 ++++++ + tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc | 54 +++++++++ + tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc | 58 ++++++++++ + tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc | 50 ++++++++ + tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc | 50 ++++++++ + tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc | 48 ++++++++ + tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc | 54 +++++++++ + 8 files changed, 360 insertions(+) + create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc + create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc + create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc + create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc + create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc + create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc + create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc + +--- a/tools/testing/selftests/ftrace/test.d/functions ++++ b/tools/testing/selftests/ftrace/test.d/functions +@@ -59,6 +59,13 @@ disable_events() { + echo 0 > events/enable + } + ++clear_synthetic_events() { # reset all current synthetic events ++ grep -v ^# synthetic_events | ++ while read line; do ++ echo "!$line" >> synthetic_events ++ done ++} ++ + initialize_ftrace() { # Reset ftrace to initial-state + # As the initial state, ftrace will be set to nop tracer, + # no events, no triggers, no filters, no function filters, +--- /dev/null ++++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc +@@ -0,0 +1,39 @@ ++#!/bin/sh ++# description: event trigger - test extended error support ++ ++ ++do_reset() { ++ reset_trigger ++ echo > set_event ++ clear_trace ++} ++ ++fail() { #msg ++ do_reset ++ echo $1 ++ exit_fail ++} ++ ++if [ ! -f set_event ]; then ++ echo "event tracing is not supported" ++ exit_unsupported ++fi ++ ++if [ ! -f synthetic_events ]; then ++ echo "synthetic event is not supported" ++ exit_unsupported ++fi ++ ++reset_tracer ++do_reset ++ ++echo "Test extended error support" ++echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger ++echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger &>/dev/null ++if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then ++ fail "Failed to generate extended error in histogram" ++fi ++ ++do_reset ++ ++exit 0 +--- /dev/null ++++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc +@@ -0,0 +1,54 @@ ++#!/bin/sh ++# description: event trigger - test field variable support ++ ++do_reset() { ++ reset_trigger ++ echo > set_event ++ clear_trace ++} ++ ++fail() { #msg ++ do_reset ++ echo $1 ++ exit_fail ++} ++ ++if [ ! -f set_event ]; then ++ echo "event tracing is not supported" ++ exit_unsupported ++fi ++ ++if [ ! -f synthetic_events ]; then ++ echo "synthetic event is not supported" ++ exit_unsupported ++fi ++ ++clear_synthetic_events ++reset_tracer ++do_reset ++ ++echo "Test field variable support" ++ ++echo 'wakeup_latency u64 lat; pid_t pid; int prio; char comm[16]' > synthetic_events ++echo 'hist:keys=comm:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger ++echo 'hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger ++echo 'hist:keys=pid,prio,comm:vals=lat:sort=pid,prio' > events/synthetic/wakeup_latency/trigger ++ ++ping localhost -c 3 ++if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then ++ fail "Failed to create inter-event histogram" ++fi ++ ++if ! grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then ++ fail "Failed to create histogram with field variable" ++fi ++ ++echo '!hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger ++ ++if grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then ++ fail "Failed to remove histogram with field variable" ++fi ++ ++do_reset ++ ++exit 0 +--- /dev/null ++++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc +@@ -0,0 +1,58 @@ ++#!/bin/sh ++# description: event trigger - test inter-event combined histogram trigger ++ ++do_reset() { ++ reset_trigger ++ echo > set_event ++ clear_trace ++} ++ ++fail() { #msg ++ do_reset ++ echo $1 ++ exit_fail ++} ++ ++if [ ! -f set_event ]; then ++ echo "event tracing is not supported" ++ exit_unsupported ++fi ++ ++if [ ! -f synthetic_events ]; then ++ echo "synthetic event is not supported" ++ exit_unsupported ++fi ++ ++reset_tracer ++do_reset ++clear_synthetic_events ++ ++echo "Test create synthetic event" ++ ++echo 'waking_latency u64 lat pid_t pid' > synthetic_events ++if [ ! -d events/synthetic/waking_latency ]; then ++ fail "Failed to create waking_latency synthetic event" ++fi ++ ++echo "Test combined histogram" ++ ++echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger ++echo 'hist:keys=pid:waking_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).waking_latency($waking_lat,pid) if comm=="ping"' > events/sched/sched_wakeup/trigger ++echo 'hist:keys=pid,lat:sort=pid,lat' > events/synthetic/waking_latency/trigger ++ ++echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events ++echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger ++echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger ++ ++echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events ++echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger ++echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger ++ ++ping localhost -c 3 ++if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then ++ fail "Failed to create combined histogram" ++fi ++ ++do_reset ++ ++exit 0 +--- /dev/null ++++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc +@@ -0,0 +1,50 @@ ++#!/bin/sh ++# description: event trigger - test inter-event histogram trigger onmatch action ++ ++do_reset() { ++ reset_trigger ++ echo > set_event ++ clear_trace ++} ++ ++fail() { #msg ++ do_reset ++ echo $1 ++ exit_fail ++} ++ ++if [ ! -f set_event ]; then ++ echo "event tracing is not supported" ++ exit_unsupported ++fi ++ ++if [ ! -f synthetic_events ]; then ++ echo "synthetic event is not supported" ++ exit_unsupported ++fi ++ ++clear_synthetic_events ++reset_tracer ++do_reset ++ ++echo "Test create synthetic event" ++ ++echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events ++if [ ! -d events/synthetic/wakeup_latency ]; then ++ fail "Failed to create wakeup_latency synthetic event" ++fi ++ ++echo "Test create histogram for synthetic event" ++echo "Test histogram variables,simple expression support and onmatch action" ++ ++echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger ++echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger ++echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger ++ping localhost -c 5 ++if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then ++ fail "Failed to create onmatch action inter-event histogram" ++fi ++ ++do_reset ++ ++exit 0 +--- /dev/null ++++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc +@@ -0,0 +1,50 @@ ++#!/bin/sh ++# description: event trigger - test inter-event histogram trigger onmatch-onmax action ++ ++do_reset() { ++ reset_trigger ++ echo > set_event ++ clear_trace ++} ++ ++fail() { #msg ++ do_reset ++ echo $1 ++ exit_fail ++} ++ ++if [ ! -f set_event ]; then ++ echo "event tracing is not supported" ++ exit_unsupported ++fi ++ ++if [ ! -f synthetic_events ]; then ++ echo "synthetic event is not supported" ++ exit_unsupported ++fi ++ ++clear_synthetic_events ++reset_tracer ++do_reset ++ ++echo "Test create synthetic event" ++ ++echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events ++if [ ! -d events/synthetic/wakeup_latency ]; then ++ fail "Failed to create wakeup_latency synthetic event" ++fi ++ ++echo "Test create histogram for synthetic event" ++echo "Test histogram variables,simple expression support and onmatch-onmax action" ++ ++echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger ++echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm):onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger ++echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger ++ping localhost -c 5 ++if [ ! grep -q "ping" events/synthetic/wakeup_latency/hist -o ! grep -q "max:" events/sched/sched_switch/hist]; then ++ fail "Failed to create onmatch-onmax action inter-event histogram" ++fi ++ ++do_reset ++ ++exit 0 +--- /dev/null ++++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc +@@ -0,0 +1,48 @@ ++#!/bin/sh ++# description: event trigger - test inter-event histogram trigger onmax action ++ ++do_reset() { ++ reset_trigger ++ echo > set_event ++ clear_trace ++} ++ ++fail() { #msg ++ do_reset ++ echo $1 ++ exit_fail ++} ++ ++if [ ! -f set_event ]; then ++ echo "event tracing is not supported" ++ exit_unsupported ++fi ++ ++if [ ! -f synthetic_events ]; then ++ echo "synthetic event is not supported" ++ exit_unsupported ++fi ++ ++clear_synthetic_events ++reset_tracer ++do_reset ++ ++echo "Test create synthetic event" ++ ++echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events ++if [ ! -d events/synthetic/wakeup_latency ]; then ++ fail "Failed to create wakeup_latency synthetic event" ++fi ++ ++echo "Test onmax action" ++ ++echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_waking/trigger ++echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger ++ping localhost -c 3 ++if ! grep -q "max:" events/sched/sched_switch/hist; then ++ fail "Failed to create onmax action inter-event histogram" ++fi ++ ++do_reset ++ ++exit 0 +--- /dev/null ++++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc +@@ -0,0 +1,54 @@ ++#!/bin/sh ++# description: event trigger - test synthetic event create remove ++do_reset() { ++ reset_trigger ++ echo > set_event ++ clear_trace ++} ++ ++fail() { #msg ++ do_reset ++ echo $1 ++ exit_fail ++} ++ ++if [ ! -f set_event ]; then ++ echo "event tracing is not supported" ++ exit_unsupported ++fi ++ ++if [ ! -f synthetic_events ]; then ++ echo "synthetic event is not supported" ++ exit_unsupported ++fi ++ ++clear_synthetic_events ++reset_tracer ++do_reset ++ ++echo "Test create synthetic event" ++ ++echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events ++if [ ! -d events/synthetic/wakeup_latency ]; then ++ fail "Failed to create wakeup_latency synthetic event" ++fi ++ ++reset_trigger ++ ++echo "Test create synthetic event with an error" ++echo 'wakeup_latency u64 lat pid_t pid char' > synthetic_events > /dev/null ++if [ -d events/synthetic/wakeup_latency ]; then ++ fail "Created wakeup_latency synthetic event with an invalid format" ++fi ++ ++reset_trigger ++ ++echo "Test remove synthetic event" ++echo '!wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events ++if [ -d events/synthetic/wakeup_latency ]; then ++ fail "Failed to delete wakeup_latency synthetic event" ++fi ++ ++do_reset ++ ++exit 0 diff --git a/debian/patches/features/all/rt/ACPICA-Convert-acpi_gbl_hardware-lock-back-to-an-acp.patch b/debian/patches/features/all/rt/ACPICA-Convert-acpi_gbl_hardware-lock-back-to-an-acp.patch new file mode 100644 index 000000000..9c9a18e66 --- /dev/null +++ b/debian/patches/features/all/rt/ACPICA-Convert-acpi_gbl_hardware-lock-back-to-an-acp.patch @@ -0,0 +1,121 @@ +From: Steven Rostedt +Date: Wed, 25 Apr 2018 16:01:37 +0200 +Subject: [PATCH] ACPICA: Convert acpi_gbl_hardware lock back to an + acpi_raw_spinlock +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +We hit the following bug with -RT: + +|BUG: scheduling while atomic: swapper/7/0/0x00000002 +|Pid: 0, comm: swapper/7 Not tainted 3.6.11-rt28.19.el6rt.x86_64.debug #1 +|Call Trace: +| rt_spin_lock+0x16/0x40 +| __schedule_bug+0x67/0x90 +| __schedule+0x793/0x7a0 +| acpi_os_acquire_lock+0x1f/0x23 +| acpi_write_bit_register+0x33/0xb0 +| rt_spin_lock_slowlock+0xe5/0x2f0 +| acpi_idle_enter_bm+0x8a/0x28e +… +As the acpi code disables interrupts in acpi_idle_enter_bm, and calls +code that grabs the acpi lock, it causes issues as the lock is currently +in RT a sleeping lock. + +The lock was converted from a raw to a sleeping lock due to some +previous issues, and tests that showed it didn't seem to matter. +Unfortunately, it did matter for one of our boxes. + +This patch converts the lock back to a raw lock. I've run this code on a +few of my own machines, one being my laptop that uses the acpi quite +extensively. I've been able to suspend and resume without issues. + +[ tglx: Made the change exclusive for acpi_gbl_hardware_lock ] + +Signed-off-by: Steven Rostedt +Cc: John Kacur +Cc: Clark Williams +Link: http://lkml.kernel.org/r/1360765565.23152.5.camel@gandalf.local.home +Signed-off-by: Thomas Gleixner +[bigeasy: shorten the backtrace, use the type acpi_raw_spinlock incl. + accessor] +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/acpi/acpica/acglobal.h | 2 +- + drivers/acpi/acpica/hwregs.c | 4 ++-- + drivers/acpi/acpica/hwxface.c | 4 ++-- + drivers/acpi/acpica/utmutex.c | 4 ++-- + 4 files changed, 7 insertions(+), 7 deletions(-) + +--- a/drivers/acpi/acpica/acglobal.h ++++ b/drivers/acpi/acpica/acglobal.h +@@ -116,7 +116,7 @@ ACPI_GLOBAL(u8, acpi_gbl_global_lock_pen + * interrupt level + */ + ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock); /* For GPE data structs and registers */ +-ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */ ++ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */ + ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock); + + /* Mutex for _OSI support */ +--- a/drivers/acpi/acpica/hwregs.c ++++ b/drivers/acpi/acpica/hwregs.c +@@ -426,14 +426,14 @@ acpi_status acpi_hw_clear_acpi_status(vo + ACPI_BITMASK_ALL_FIXED_STATUS, + ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address))); + +- lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); ++ lock_flags = acpi_os_acquire_raw_lock(acpi_gbl_hardware_lock); + + /* Clear the fixed events in PM1 A/B */ + + status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS, + ACPI_BITMASK_ALL_FIXED_STATUS); + +- acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); ++ acpi_os_release_raw_lock(acpi_gbl_hardware_lock, lock_flags); + + if (ACPI_FAILURE(status)) { + goto exit; +--- a/drivers/acpi/acpica/hwxface.c ++++ b/drivers/acpi/acpica/hwxface.c +@@ -261,7 +261,7 @@ acpi_status acpi_write_bit_register(u32 + return_ACPI_STATUS(AE_BAD_PARAMETER); + } + +- lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); ++ lock_flags = acpi_os_acquire_raw_lock(acpi_gbl_hardware_lock); + + /* + * At this point, we know that the parent register is one of the +@@ -322,7 +322,7 @@ acpi_status acpi_write_bit_register(u32 + + unlock_and_exit: + +- acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); ++ acpi_os_release_raw_lock(acpi_gbl_hardware_lock, lock_flags); + return_ACPI_STATUS(status); + } + +--- a/drivers/acpi/acpica/utmutex.c ++++ b/drivers/acpi/acpica/utmutex.c +@@ -88,7 +88,7 @@ acpi_status acpi_ut_mutex_initialize(voi + return_ACPI_STATUS (status); + } + +- status = acpi_os_create_lock (&acpi_gbl_hardware_lock); ++ status = acpi_os_create_raw_lock(&acpi_gbl_hardware_lock); + if (ACPI_FAILURE (status)) { + return_ACPI_STATUS (status); + } +@@ -145,7 +145,7 @@ void acpi_ut_mutex_terminate(void) + /* Delete the spinlocks */ + + acpi_os_delete_lock(acpi_gbl_gpe_lock); +- acpi_os_delete_lock(acpi_gbl_hardware_lock); ++ acpi_os_delete_raw_lock(acpi_gbl_hardware_lock); + acpi_os_delete_lock(acpi_gbl_reference_count_lock); + + /* Delete the reader/writer lock */ diff --git a/debian/patches/features/all/rt/ACPICA-provide-abstraction-for-raw_spinlock_t.patch b/debian/patches/features/all/rt/ACPICA-provide-abstraction-for-raw_spinlock_t.patch new file mode 100644 index 000000000..cc05d85d1 --- /dev/null +++ b/debian/patches/features/all/rt/ACPICA-provide-abstraction-for-raw_spinlock_t.patch @@ -0,0 +1,123 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 25 Apr 2018 15:19:42 +0200 +Subject: [PATCH] ACPICA: provide abstraction for raw_spinlock_t +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Provide a new lock type acpi_raw_spinlock which is implemented as +raw_spinlock_t on Linux. This type should be used in code which covers +small areas of code and disables interrupts only for short time even on +a realtime OS. +There is a fallback to spinlock_t if an OS does not provide an +implementation for acpi_raw_spinlock. + +Signed-off-by: Sebastian Andrzej Siewior +--- + include/acpi/acpiosxf.h | 21 +++++++++++++++++++++ + include/acpi/actypes.h | 4 ++++ + include/acpi/platform/aclinux.h | 5 +++++ + include/acpi/platform/aclinuxex.h | 30 ++++++++++++++++++++++++++++++ + 4 files changed, 60 insertions(+) + +--- a/include/acpi/acpiosxf.h ++++ b/include/acpi/acpiosxf.h +@@ -132,6 +132,27 @@ void acpi_os_release_lock(acpi_spinlock + #endif + + /* ++ * RAW spinlock primitives. If the OS does not provide them, fallback to ++ * spinlock primitives ++ */ ++#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_raw_lock ++# define acpi_os_create_raw_lock(out_handle) acpi_os_create_lock(out_handle) ++#endif ++ ++#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_delete_raw_lock ++# define acpi_os_delete_raw_lock(handle) acpi_os_delete_lock(handle) ++#endif ++ ++#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_acquire_raw_lock ++# define acpi_os_acquire_raw_lock(handle) acpi_os_acquire_lock(handle) ++#endif ++ ++#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_release_raw_lock ++# define acpi_os_release_raw_lock(handle, flags) \ ++ acpi_os_release_lock(handle, flags) ++#endif ++ ++/* + * Semaphore primitives + */ + #ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_semaphore +--- a/include/acpi/actypes.h ++++ b/include/acpi/actypes.h +@@ -279,6 +279,10 @@ typedef u64 acpi_physical_address; + #define acpi_spinlock void * + #endif + ++#ifndef acpi_raw_spinlock ++#define acpi_raw_spinlock acpi_spinlock ++#endif ++ + #ifndef acpi_semaphore + #define acpi_semaphore void * + #endif +--- a/include/acpi/platform/aclinux.h ++++ b/include/acpi/platform/aclinux.h +@@ -134,6 +134,7 @@ + + #define acpi_cache_t struct kmem_cache + #define acpi_spinlock spinlock_t * ++#define acpi_raw_spinlock raw_spinlock_t * + #define acpi_cpu_flags unsigned long + + /* Use native linux version of acpi_os_allocate_zeroed */ +@@ -151,6 +152,10 @@ + #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_acquire_object + #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id + #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock ++#define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_raw_lock ++#define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_delete_raw_lock ++#define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_acquire_raw_lock ++#define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_release_raw_lock + + /* + * OSL interfaces used by debugger/disassembler +--- a/include/acpi/platform/aclinuxex.h ++++ b/include/acpi/platform/aclinuxex.h +@@ -124,6 +124,36 @@ static inline acpi_thread_id acpi_os_get + lock ? AE_OK : AE_NO_MEMORY; \ + }) + ++ ++#define acpi_os_create_raw_lock(__handle) \ ++ ({ \ ++ raw_spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock)); \ ++ if (lock) { \ ++ *(__handle) = lock; \ ++ raw_spin_lock_init(*(__handle)); \ ++ } \ ++ lock ? AE_OK : AE_NO_MEMORY; \ ++ }) ++ ++static inline acpi_cpu_flags acpi_os_acquire_raw_lock(acpi_raw_spinlock lockp) ++{ ++ acpi_cpu_flags flags; ++ ++ raw_spin_lock_irqsave(lockp, flags); ++ return flags; ++} ++ ++static inline void acpi_os_release_raw_lock(acpi_raw_spinlock lockp, ++ acpi_cpu_flags flags) ++{ ++ raw_spin_unlock_irqrestore(lockp, flags); ++} ++ ++static inline void acpi_os_delete_raw_lock(acpi_raw_spinlock handle) ++{ ++ ACPI_FREE(handle); ++} ++ + static inline u8 acpi_os_readable(void *pointer, acpi_size length) + { + return TRUE; diff --git a/debian/patches/features/all/rt/ALSA-pcm-Hide-local_irq_disable-enable-and-local_irq.patch b/debian/patches/features/all/rt/ALSA-pcm-Hide-local_irq_disable-enable-and-local_irq.patch new file mode 100644 index 000000000..5303df395 --- /dev/null +++ b/debian/patches/features/all/rt/ALSA-pcm-Hide-local_irq_disable-enable-and-local_irq.patch @@ -0,0 +1,152 @@ +From: Anna-Maria Gleixner +Date: Thu, 29 Mar 2018 17:09:27 +0200 +Subject: [PATCH] ALSA: pcm: Hide local_irq_disable/enable() and + local_irqsave/restore() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +The snd_pcm_stream_lock_irq*() functions decouple disabling interrupts +from the actual locking process. This does not work as expected if the +locking primitives are replaced like on preempt-rt. + +Provide one function for locking which uses correct locking primitives. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + sound/core/pcm_native.c | 85 ++++++++++++++++++++++++++++++++---------------- + 1 file changed, 57 insertions(+), 28 deletions(-) + +--- a/sound/core/pcm_native.c ++++ b/sound/core/pcm_native.c +@@ -99,6 +99,57 @@ static inline void down_write_nonblock(s + cond_resched(); + } + ++#define PCM_LOCK_DEFAULT 0 ++#define PCM_LOCK_IRQ 1 ++#define PCM_LOCK_IRQSAVE 2 ++ ++static unsigned long __snd_pcm_stream_lock_mode(struct snd_pcm_substream *substream, ++ unsigned int mode) ++{ ++ unsigned long flags = 0; ++ if (substream->pcm->nonatomic) { ++ down_read_nested(&snd_pcm_link_rwsem, SINGLE_DEPTH_NESTING); ++ mutex_lock(&substream->self_group.mutex); ++ } else { ++ switch (mode) { ++ case PCM_LOCK_DEFAULT: ++ read_lock(&snd_pcm_link_rwlock); ++ break; ++ case PCM_LOCK_IRQ: ++ read_lock_irq(&snd_pcm_link_rwlock); ++ break; ++ case PCM_LOCK_IRQSAVE: ++ read_lock_irqsave(&snd_pcm_link_rwlock, flags); ++ break; ++ } ++ spin_lock(&substream->self_group.lock); ++ } ++ return flags; ++} ++ ++static void __snd_pcm_stream_unlock_mode(struct snd_pcm_substream *substream, ++ unsigned int mode, unsigned long flags) ++{ ++ if (substream->pcm->nonatomic) { ++ mutex_unlock(&substream->self_group.mutex); ++ up_read(&snd_pcm_link_rwsem); ++ } else { ++ spin_unlock(&substream->self_group.lock); ++ ++ switch (mode) { ++ case PCM_LOCK_DEFAULT: ++ read_unlock(&snd_pcm_link_rwlock); ++ break; ++ case PCM_LOCK_IRQ: ++ read_unlock_irq(&snd_pcm_link_rwlock); ++ break; ++ case PCM_LOCK_IRQSAVE: ++ read_unlock_irqrestore(&snd_pcm_link_rwlock, flags); ++ break; ++ } ++ } ++} ++ + /** + * snd_pcm_stream_lock - Lock the PCM stream + * @substream: PCM substream +@@ -109,13 +160,7 @@ static inline void down_write_nonblock(s + */ + void snd_pcm_stream_lock(struct snd_pcm_substream *substream) + { +- if (substream->pcm->nonatomic) { +- down_read_nested(&snd_pcm_link_rwsem, SINGLE_DEPTH_NESTING); +- mutex_lock(&substream->self_group.mutex); +- } else { +- read_lock(&snd_pcm_link_rwlock); +- spin_lock(&substream->self_group.lock); +- } ++ __snd_pcm_stream_lock_mode(substream, PCM_LOCK_DEFAULT); + } + EXPORT_SYMBOL_GPL(snd_pcm_stream_lock); + +@@ -127,13 +172,7 @@ EXPORT_SYMBOL_GPL(snd_pcm_stream_lock); + */ + void snd_pcm_stream_unlock(struct snd_pcm_substream *substream) + { +- if (substream->pcm->nonatomic) { +- mutex_unlock(&substream->self_group.mutex); +- up_read(&snd_pcm_link_rwsem); +- } else { +- spin_unlock(&substream->self_group.lock); +- read_unlock(&snd_pcm_link_rwlock); +- } ++ __snd_pcm_stream_unlock_mode(substream, PCM_LOCK_DEFAULT, 0); + } + EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock); + +@@ -147,9 +186,7 @@ EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock) + */ + void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream) + { +- if (!substream->pcm->nonatomic) +- local_irq_disable(); +- snd_pcm_stream_lock(substream); ++ __snd_pcm_stream_lock_mode(substream, PCM_LOCK_IRQ); + } + EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq); + +@@ -161,19 +198,13 @@ EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_ir + */ + void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream) + { +- snd_pcm_stream_unlock(substream); +- if (!substream->pcm->nonatomic) +- local_irq_enable(); ++ __snd_pcm_stream_unlock_mode(substream, PCM_LOCK_IRQ, 0); + } + EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irq); + + unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream) + { +- unsigned long flags = 0; +- if (!substream->pcm->nonatomic) +- local_irq_save(flags); +- snd_pcm_stream_lock(substream); +- return flags; ++ return __snd_pcm_stream_lock_mode(substream, PCM_LOCK_IRQSAVE); + } + EXPORT_SYMBOL_GPL(_snd_pcm_stream_lock_irqsave); + +@@ -187,9 +218,7 @@ EXPORT_SYMBOL_GPL(_snd_pcm_stream_lock_i + void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream, + unsigned long flags) + { +- snd_pcm_stream_unlock(substream); +- if (!substream->pcm->nonatomic) +- local_irq_restore(flags); ++ __snd_pcm_stream_unlock_mode(substream, PCM_LOCK_IRQSAVE, flags); + } + EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irqrestore); + diff --git a/debian/patches/features/all/rt/ARM-enable-irq-in-translation-section-permission-fau.patch b/debian/patches/features/all/rt/ARM-enable-irq-in-translation-section-permission-fau.patch index 19ccb4ced..1c31c4bc4 100644 --- a/debian/patches/features/all/rt/ARM-enable-irq-in-translation-section-permission-fau.patch +++ b/debian/patches/features/all/rt/ARM-enable-irq-in-translation-section-permission-fau.patch @@ -1,7 +1,7 @@ From: "Yadi.hu" Date: Wed, 10 Dec 2014 10:32:09 +0800 Subject: ARM: enable irq in translation/section permission fault handlers -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Probably happens on all ARM, with CONFIG_PREEMPT_RT_FULL @@ -64,7 +64,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c -@@ -434,6 +434,9 @@ do_translation_fault(unsigned long addr, +@@ -433,6 +433,9 @@ do_translation_fault(unsigned long addr, if (addr < TASK_SIZE) return do_page_fault(addr, fsr, regs); @@ -74,7 +74,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (user_mode(regs)) goto bad_area; -@@ -501,6 +504,9 @@ do_translation_fault(unsigned long addr, +@@ -500,6 +503,9 @@ do_translation_fault(unsigned long addr, static int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { diff --git a/debian/patches/features/all/rt/HACK-printk-drop-the-logbuf_lock-more-often.patch b/debian/patches/features/all/rt/HACK-printk-drop-the-logbuf_lock-more-often.patch index f498f70e9..ca2851654 100644 --- a/debian/patches/features/all/rt/HACK-printk-drop-the-logbuf_lock-more-often.patch +++ b/debian/patches/features/all/rt/HACK-printk-drop-the-logbuf_lock-more-often.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 21 Mar 2013 19:01:05 +0100 Subject: printk: Drop the logbuf_lock more often -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The lock is hold with irgs off. The latency drops 500us+ on my arm bugs with a "full" buffer after executing "dmesg" on the shell. @@ -13,7 +13,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -1407,6 +1407,8 @@ static int syslog_print_all(char __user +@@ -1411,6 +1411,8 @@ static int syslog_print_all(char __user { char *text; int len = 0; @@ -22,7 +22,7 @@ Signed-off-by: Sebastian Andrzej Siewior text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); if (!text) -@@ -1418,6 +1420,14 @@ static int syslog_print_all(char __user +@@ -1422,6 +1424,14 @@ static int syslog_print_all(char __user u64 seq; u32 idx; @@ -37,7 +37,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Find first record that fits, including all following records, * into the user-provided buffer for this dump. -@@ -1430,6 +1440,14 @@ static int syslog_print_all(char __user +@@ -1434,6 +1444,14 @@ static int syslog_print_all(char __user len += msg_print_text(msg, true, NULL, 0); idx = log_next(idx); seq++; @@ -52,7 +52,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /* move first record forward until length fits into the buffer */ -@@ -1441,6 +1459,14 @@ static int syslog_print_all(char __user +@@ -1445,6 +1463,14 @@ static int syslog_print_all(char __user len -= msg_print_text(msg, true, NULL, 0); idx = log_next(idx); seq++; @@ -67,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /* last message fitting into this dump */ -@@ -1479,6 +1505,7 @@ static int syslog_print_all(char __user +@@ -1483,6 +1509,7 @@ static int syslog_print_all(char __user clear_seq = log_next_seq; clear_idx = log_next_idx; } diff --git a/debian/patches/features/all/rt/IB-ipoib-replace-local_irq_disable-with-proper-locki.patch b/debian/patches/features/all/rt/IB-ipoib-replace-local_irq_disable-with-proper-locki.patch new file mode 100644 index 000000000..b347226bb --- /dev/null +++ b/debian/patches/features/all/rt/IB-ipoib-replace-local_irq_disable-with-proper-locki.patch @@ -0,0 +1,74 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 11 Apr 2018 13:34:26 +0200 +Subject: [PATCH] IB/ipoib: replace local_irq_disable() with proper locking +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Commit 78bfe0b5b67f ("IPoIB: Take dev->xmit_lock around mc_list accesses") +introduced xmit_lock lock in ipoib_mcast_restart_task() and commit +932ff279a43a ("[NET]: Add netif_tx_lock") preserved the locking order while +dev->xmit_lock has been replaced with a helper. The netif_tx_lock should +not be acquired with disabled interrupts because it is meant to be a BH +disabling lock. + +The priv->lock is always acquired with interrupts disabled. The only +place where netif_addr_lock() and priv->lock nest ist +ipoib_mcast_restart_task(). By reversing the lock order and taking +netif_addr lock with bottom halfs disabled it is possible to get rid of +the local_irq_save() completely. + +This requires to take priv->lock with spin_lock_irq() inside the netif_addr +locked section. It's safe to do so because the caller is either a worker +function or __ipoib_ib_dev_flush() which are both calling with interrupts +enabled. + +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 15 ++++++--------- + 1 file changed, 6 insertions(+), 9 deletions(-) + +--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c ++++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +@@ -886,7 +886,6 @@ void ipoib_mcast_restart_task(struct wor + struct netdev_hw_addr *ha; + struct ipoib_mcast *mcast, *tmcast; + LIST_HEAD(remove_list); +- unsigned long flags; + struct ib_sa_mcmember_rec rec; + + if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) +@@ -898,9 +897,8 @@ void ipoib_mcast_restart_task(struct wor + + ipoib_dbg_mcast(priv, "restarting multicast task\n"); + +- local_irq_save(flags); +- netif_addr_lock(dev); +- spin_lock(&priv->lock); ++ netif_addr_lock_bh(dev); ++ spin_lock_irq(&priv->lock); + + /* + * Unfortunately, the networking core only gives us a list of all of +@@ -978,9 +976,8 @@ void ipoib_mcast_restart_task(struct wor + } + } + +- spin_unlock(&priv->lock); +- netif_addr_unlock(dev); +- local_irq_restore(flags); ++ spin_unlock_irq(&priv->lock); ++ netif_addr_unlock_bh(dev); + + ipoib_mcast_remove_list(&remove_list); + +@@ -988,9 +985,9 @@ void ipoib_mcast_restart_task(struct wor + * Double check that we are still up + */ + if (test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { +- spin_lock_irqsave(&priv->lock, flags); ++ spin_lock_irq(&priv->lock); + __ipoib_mcast_schedule_join_thread(priv, NULL, 0); +- spin_unlock_irqrestore(&priv->lock, flags); ++ spin_unlock_irq(&priv->lock); + } + } + diff --git a/debian/patches/features/all/rt/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch b/debian/patches/features/all/rt/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch index f51a9972b..c5a70e959 100644 --- a/debian/patches/features/all/rt/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch +++ b/debian/patches/features/all/rt/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch @@ -1,7 +1,7 @@ From: Josh Cartwright Date: Thu, 11 Feb 2016 11:54:01 -0600 Subject: KVM: arm/arm64: downgrade preempt_disable()d region to migrate_disable() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz kvm_arch_vcpu_ioctl_run() disables the use of preemption when updating the vgic and timer states to prevent the calling task from migrating to @@ -23,27 +23,27 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c -@@ -650,7 +650,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v +@@ -678,7 +678,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v * involves poking the GIC, which must be done in a * non-preemptible context. */ - preempt_disable(); + migrate_disable(); - kvm_pmu_flush_hwstate(vcpu); - -@@ -687,7 +687,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v - kvm_pmu_sync_hwstate(vcpu); - kvm_timer_sync_hwstate(vcpu); + /* Flush FP/SIMD state that can't survive guest entry/exit */ + kvm_fpsimd_flush_cpu_state(); +@@ -729,7 +729,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v + kvm_timer_sync_hwstate(vcpu); kvm_vgic_sync_hwstate(vcpu); + local_irq_enable(); - preempt_enable(); + migrate_enable(); continue; } -@@ -742,7 +742,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v - - kvm_vgic_sync_hwstate(vcpu); +@@ -803,7 +803,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v + /* Exit types that need handling before we can be preempted */ + handle_exit_early(vcpu, run, ret); - preempt_enable(); + migrate_enable(); diff --git a/debian/patches/features/all/rt/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch b/debian/patches/features/all/rt/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch index 4edbf930e..ca19c395a 100644 --- a/debian/patches/features/all/rt/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch +++ b/debian/patches/features/all/rt/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch @@ -5,7 +5,7 @@ Cc: Anna Schumaker , linux-nfs@vger.kernel.org, linux-kernel@vger.kernel.org, tglx@linutronix.de Subject: NFSv4: replace seqcount_t with a seqlock_t -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The raw_write_seqcount_begin() in nfs4_reclaim_open_state() bugs me because it maps to preempt_disable() in -RT which I can't have at this @@ -31,7 +31,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c -@@ -150,11 +150,11 @@ static int nfs_delegation_claim_opens(st +@@ -151,11 +151,11 @@ static int nfs_delegation_claim_opens(st sp = state->owner; /* Block nfs4_proc_unlck */ mutex_lock(&sp->so_delegreturn_mutex); @@ -58,7 +58,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c -@@ -2638,7 +2638,7 @@ static int _nfs4_open_and_get_state(stru +@@ -2778,7 +2778,7 @@ static int _nfs4_open_and_get_state(stru unsigned int seq; int ret; @@ -67,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior ret = _nfs4_proc_open(opendata); if (ret != 0) -@@ -2676,7 +2676,7 @@ static int _nfs4_open_and_get_state(stru +@@ -2816,7 +2816,7 @@ static int _nfs4_open_and_get_state(stru if (d_inode(dentry) == state->inode) { nfs_inode_attach_open_context(ctx); @@ -78,7 +78,7 @@ Signed-off-by: Sebastian Andrzej Siewior out: --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c -@@ -494,7 +494,7 @@ nfs4_alloc_state_owner(struct nfs_server +@@ -502,7 +502,7 @@ nfs4_alloc_state_owner(struct nfs_server nfs4_init_seqid_counter(&sp->so_seqid); atomic_set(&sp->so_count, 1); INIT_LIST_HEAD(&sp->so_lru); @@ -87,7 +87,7 @@ Signed-off-by: Sebastian Andrzej Siewior mutex_init(&sp->so_delegreturn_mutex); return sp; } -@@ -1516,8 +1516,12 @@ static int nfs4_reclaim_open_state(struc +@@ -1554,8 +1554,12 @@ static int nfs4_reclaim_open_state(struc * recovering after a network partition or a reboot from a * server that doesn't support a grace period. */ @@ -101,7 +101,7 @@ Signed-off-by: Sebastian Andrzej Siewior restart: list_for_each_entry(state, &sp->so_states, open_states) { if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) -@@ -1586,14 +1590,20 @@ static int nfs4_reclaim_open_state(struc +@@ -1624,14 +1628,20 @@ static int nfs4_reclaim_open_state(struc spin_lock(&sp->so_lock); goto restart; } diff --git a/debian/patches/features/all/rt/RCU-skip-the-schedule-in-RCU-section-warning-on-UP-t.patch b/debian/patches/features/all/rt/RCU-skip-the-schedule-in-RCU-section-warning-on-UP-t.patch new file mode 100644 index 000000000..b96978c4b --- /dev/null +++ b/debian/patches/features/all/rt/RCU-skip-the-schedule-in-RCU-section-warning-on-UP-t.patch @@ -0,0 +1,121 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 16 Feb 2018 11:45:13 +0100 +Subject: [PATCH] RCU: skip the "schedule() in RCU section" warning on UP, + too +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +In "RCU: we need to skip that warning but only on sleeping locks" we +skipped a warning on SMP systems in case we schedule out in a RCU +section while attempt to obtain a sleeping lock. This is also required +on UP systems. +In order to do so, I introduce a tiny version of migrate_disable() + +_enable() which only update the counters which we then can check against +on RT && !SMP. + +Cc: stable-rt@vger.kernel.org +Reported-by: Grygorii Strashko +Tested-by: Grygorii Strashko +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/preempt.h | 9 +++++++++ + include/linux/sched.h | 6 ++++++ + kernel/rcu/tree_plugin.h | 2 +- + kernel/sched/core.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ + 4 files changed, 61 insertions(+), 1 deletion(-) + +--- a/include/linux/preempt.h ++++ b/include/linux/preempt.h +@@ -211,6 +211,15 @@ extern void migrate_enable(void); + + int __migrate_disabled(struct task_struct *p); + ++#elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE) ++ ++extern void migrate_disable(void); ++extern void migrate_enable(void); ++static inline int __migrate_disabled(struct task_struct *p) ++{ ++ return 0; ++} ++ + #else + #define migrate_disable() barrier() + #define migrate_enable() barrier() +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -604,6 +604,12 @@ struct task_struct { + # ifdef CONFIG_SCHED_DEBUG + int migrate_disable_atomic; + # endif ++ ++#elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE) ++ int migrate_disable; ++# ifdef CONFIG_SCHED_DEBUG ++ int migrate_disable_atomic; ++# endif + #endif + + #ifdef CONFIG_PREEMPT_RCU +--- a/kernel/rcu/tree_plugin.h ++++ b/kernel/rcu/tree_plugin.h +@@ -328,7 +328,7 @@ static void rcu_preempt_note_context_swi + int mg_counter = 0; + + lockdep_assert_irqs_disabled(); +-#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP) ++#if defined(CONFIG_PREEMPT_RT_BASE) + mg_counter = t->migrate_disable; + #endif + WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0 && !mg_counter); +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -7278,4 +7278,49 @@ void migrate_enable(void) + preempt_enable(); + } + EXPORT_SYMBOL(migrate_enable); ++ ++#elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE) ++void migrate_disable(void) ++{ ++ struct task_struct *p = current; ++ ++ if (in_atomic() || irqs_disabled()) { ++#ifdef CONFIG_SCHED_DEBUG ++ p->migrate_disable_atomic++; ++#endif ++ return; ++ } ++#ifdef CONFIG_SCHED_DEBUG ++ if (unlikely(p->migrate_disable_atomic)) { ++ tracing_off(); ++ WARN_ON_ONCE(1); ++ } ++#endif ++ ++ p->migrate_disable++; ++} ++EXPORT_SYMBOL(migrate_disable); ++ ++void migrate_enable(void) ++{ ++ struct task_struct *p = current; ++ ++ if (in_atomic() || irqs_disabled()) { ++#ifdef CONFIG_SCHED_DEBUG ++ p->migrate_disable_atomic--; ++#endif ++ return; ++ } ++ ++#ifdef CONFIG_SCHED_DEBUG ++ if (unlikely(p->migrate_disable_atomic)) { ++ tracing_off(); ++ WARN_ON_ONCE(1); ++ } ++#endif ++ ++ WARN_ON_ONCE(p->migrate_disable <= 0); ++ p->migrate_disable--; ++} ++EXPORT_SYMBOL(migrate_enable); + #endif diff --git a/debian/patches/features/all/rt/RCU-we-need-to-skip-that-warning-but-only-on-sleepin.patch b/debian/patches/features/all/rt/RCU-we-need-to-skip-that-warning-but-only-on-sleepin.patch index 5eb1ecf08..75f2eb64d 100644 --- a/debian/patches/features/all/rt/RCU-we-need-to-skip-that-warning-but-only-on-sleepin.patch +++ b/debian/patches/features/all/rt/RCU-we-need-to-skip-that-warning-but-only-on-sleepin.patch @@ -2,7 +2,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 21 Sep 2017 14:25:13 +0200 Subject: [PATCH] RCU: we need to skip that warning but only on sleeping locks -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz This check is okay for upstream. On RT we trigger this while blocking on sleeping lock. In this case, it is okay to schedule() within a RCU @@ -19,13 +19,13 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h -@@ -323,9 +323,13 @@ static void rcu_preempt_note_context_swi +@@ -325,9 +325,13 @@ static void rcu_preempt_note_context_swi struct task_struct *t = current; struct rcu_data *rdp; struct rcu_node *rnp; + int mg_counter = 0; - RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_preempt_note_context_switch() invoked with interrupts enabled!!!\n"); + lockdep_assert_irqs_disabled(); - WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0); +#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP) + mg_counter = t->migrate_disable; diff --git a/debian/patches/features/all/rt/Revert-mm-vmstat.c-fix-vmstat_update-preemption-BUG.patch b/debian/patches/features/all/rt/Revert-mm-vmstat.c-fix-vmstat_update-preemption-BUG.patch new file mode 100644 index 000000000..e23b864bb --- /dev/null +++ b/debian/patches/features/all/rt/Revert-mm-vmstat.c-fix-vmstat_update-preemption-BUG.patch @@ -0,0 +1,50 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 11 Apr 2018 11:27:44 +0200 +Subject: [PATCH] Revert mm/vmstat.c: fix vmstat_update() preemption BUG +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +This patch reverts commit c7f26ccfb2c3 ("mm/vmstat.c: fix +vmstat_update() preemption BUG"). +Steven saw a "using smp_processor_id() in preemptible" message and +added a preempt_disable() section around it to keep it quiet. This is +not the right thing to do it does not fix the real problem. + +vmstat_update() is invoked by a kworker on a specific CPU. This worker +it bound to this CPU. The name of the worker was "kworker/1:1" so it +should have been a worker which was bound to CPU1. A worker which can +run on any CPU would have a `u' before the first digit. + +smp_processor_id() can be used in a preempt-enabled region as long as +the task is bound to a single CPU which is the case here. If it could +run on an arbitrary CPU then this is the problem we have an should seek +to resolve. +Not only this smp_processor_id() must not be migrated to another CPU but +also refresh_cpu_vm_stats() which might access wrong per-CPU variables. +Not to mention that other code relies on the fact that such a worker +runs on one specific CPU only. + +Therefore I revert that commit and we should look instead what broke the +affinity mask of the kworker. + +Cc: Steven J. Hill +Cc: Tejun Heo +Cc: Andrew Morton +Signed-off-by: Sebastian Andrzej Siewior +--- + mm/vmstat.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/mm/vmstat.c ++++ b/mm/vmstat.c +@@ -1839,11 +1839,9 @@ static void vmstat_update(struct work_st + * to occur in the future. Keep on running the + * update worker thread. + */ +- preempt_disable(); + queue_delayed_work_on(smp_processor_id(), mm_percpu_wq, + this_cpu_ptr(&vmstat_work), + round_jiffies_relative(sysctl_stat_interval)); +- preempt_enable(); + } + } + diff --git a/debian/patches/features/all/rt/SCSI-libsas-remove-irq-save-in-sas_ata_qc_issue.patch b/debian/patches/features/all/rt/SCSI-libsas-remove-irq-save-in-sas_ata_qc_issue.patch new file mode 100644 index 000000000..1e1c3d3b7 --- /dev/null +++ b/debian/patches/features/all/rt/SCSI-libsas-remove-irq-save-in-sas_ata_qc_issue.patch @@ -0,0 +1,47 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 12 Apr 2018 09:16:22 +0200 +Subject: [PATCH] [SCSI] libsas: remove irq save in sas_ata_qc_issue() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Since commit 312d3e56119a ("[SCSI] libsas: remove ata_port.lock +management duties from lldds") the sas_ata_qc_issue() function unlocks +the ata_port.lock and disables interrupts before doing so. +That lock is always taken with disabled interrupts so at this point, the +interrupts are already disabled. There is no need to disable the +interrupts before the unlock operation because they are already +disabled. +Restoring the interrupt state later does not change anything because +they were disabled and remain disabled. Therefore remove the operations +which do not change the behaviour. + +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/scsi/libsas/sas_ata.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/drivers/scsi/libsas/sas_ata.c ++++ b/drivers/scsi/libsas/sas_ata.c +@@ -176,7 +176,6 @@ static void sas_ata_task_done(struct sas + + static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) + { +- unsigned long flags; + struct sas_task *task; + struct scatterlist *sg; + int ret = AC_ERR_SYSTEM; +@@ -190,7 +189,6 @@ static unsigned int sas_ata_qc_issue(str + /* TODO: audit callers to ensure they are ready for qc_issue to + * unconditionally re-enable interrupts + */ +- local_irq_save(flags); + spin_unlock(ap->lock); + + /* If the device fell off, no sense in issuing commands */ +@@ -252,7 +250,6 @@ static unsigned int sas_ata_qc_issue(str + + out: + spin_lock(ap->lock); +- local_irq_restore(flags); + return ret; + } + diff --git a/debian/patches/features/all/rt/SCSI-qla2xxx-remove-irq-save-in-qla2x00_poll.patch b/debian/patches/features/all/rt/SCSI-qla2xxx-remove-irq-save-in-qla2x00_poll.patch new file mode 100644 index 000000000..4bc67706b --- /dev/null +++ b/debian/patches/features/all/rt/SCSI-qla2xxx-remove-irq-save-in-qla2x00_poll.patch @@ -0,0 +1,40 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 12 Apr 2018 09:55:25 +0200 +Subject: [PATCH] [SCSI] qla2xxx: remove irq save in qla2x00_poll() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +In commit d2ba5675d899 ("[SCSI] qla2xxx: Disable local-interrupts while +polling for RISC status.") added a local_irq_disable() before invoking +the ->intr_handler callback. The function, which was used in this +callback, did not disable interrupts while acquiring the spin_lock so a +deadlock was possible and this change was one possible solution. + +The function in question was qla2300_intr_handler() and is using +spin_lock_irqsave() since commit 43fac4d97a1a ("[SCSI] qla2xxx: Resolve +a performance issue in interrupt"). +I checked all other ->intr_handler callbacks and all of them use the +irqsave variant so it is safe to remove the local_irq_save() block now. + +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/scsi/qla2xxx/qla_inline.h | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_inline.h ++++ b/drivers/scsi/qla2xxx/qla_inline.h +@@ -57,14 +57,12 @@ qla2x00_debounce_register(volatile uint1 + static inline void + qla2x00_poll(struct rsp_que *rsp) + { +- unsigned long flags; + struct qla_hw_data *ha = rsp->hw; +- local_irq_save(flags); ++ + if (IS_P3P_TYPE(ha)) + qla82xx_poll(0, rsp); + else + ha->isp_ops->intr_handler(0, rsp); +- local_irq_restore(flags); + } + + static inline uint8_t * diff --git a/debian/patches/features/all/rt/add_migrate_disable.patch b/debian/patches/features/all/rt/add_migrate_disable.patch index f33a098c8..be97196d6 100644 --- a/debian/patches/features/all/rt/add_migrate_disable.patch +++ b/debian/patches/features/all/rt/add_migrate_disable.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Sat, 27 May 2017 19:02:06 +0200 Subject: kernel/sched/core: add migrate_disable() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz --- include/linux/preempt.h | 23 ++++++++ @@ -52,7 +52,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4 #ifdef MODULE --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -580,6 +580,13 @@ struct task_struct { +@@ -592,6 +592,13 @@ struct task_struct { int nr_cpus_allowed; const cpumask_t *cpus_ptr; cpumask_t cpus_mask; @@ -80,7 +80,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4 * boot command line: --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -1022,7 +1022,15 @@ void set_cpus_allowed_common(struct task +@@ -1033,7 +1033,15 @@ void set_cpus_allowed_common(struct task p->nr_cpus_allowed = cpumask_weight(new_mask); } @@ -97,7 +97,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4 { struct rq *rq = task_rq(p); bool queued, running; -@@ -1051,6 +1059,20 @@ void do_set_cpus_allowed(struct task_str +@@ -1062,6 +1070,20 @@ void do_set_cpus_allowed(struct task_str set_curr_task(rq, p); } @@ -118,7 +118,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4 /* * Change a given task's CPU affinity. Migrate the thread to a * proper CPU and schedule it away if the CPU it's executing on -@@ -1109,9 +1131,16 @@ static int __set_cpus_allowed_ptr(struct +@@ -1120,9 +1142,16 @@ static int __set_cpus_allowed_ptr(struct } /* Can the task run on the task's current CPU? If so, we're done */ @@ -136,7 +136,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4 dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); if (task_running(rq, p) || p->state == TASK_WAKING) { struct migration_arg arg = { p, dest_cpu }; -@@ -6759,3 +6788,100 @@ const u32 sched_prio_to_wmult[40] = { +@@ -7027,3 +7056,100 @@ const u32 sched_prio_to_wmult[40] = { /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717, /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, }; @@ -239,7 +239,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4 +#endif --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c -@@ -1017,6 +1017,10 @@ void proc_sched_show_task(struct task_st +@@ -1030,6 +1030,10 @@ void proc_sched_show_task(struct task_st P(dl.runtime); P(dl.deadline); } diff --git a/debian/patches/features/all/rt/alim15x3-move-irq-restore-before-pci_dev_put.patch b/debian/patches/features/all/rt/alim15x3-move-irq-restore-before-pci_dev_put.patch new file mode 100644 index 000000000..f7d4b0ca4 --- /dev/null +++ b/debian/patches/features/all/rt/alim15x3-move-irq-restore-before-pci_dev_put.patch @@ -0,0 +1,34 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 3 Apr 2018 15:13:20 +0200 +Subject: [PATCH] alim15x3: move irq-restore before pci_dev_put() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +init_chipset_ali15x3() initializes the chipset during init with disabled +interrupts. There is no need to keep the interrupts disabled during +pci_dev_put(). +Move the irq-restore before pci_dev_put() is invoked. + +Side note: The same init is performed in +drivers/ata/pata_ali.c::ali_init_chipset() without disabled interrupts. +It looks that the same hardware is supported in the ATA land. Would it +make sense to remove this driver since it is supported in the other +subsystem? + +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/ide/alim15x3.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/ide/alim15x3.c ++++ b/drivers/ide/alim15x3.c +@@ -323,9 +323,9 @@ static int init_chipset_ali15x3(struct p + + pci_write_config_byte(dev, 0x53, tmpbyte); + } ++ local_irq_restore(flags); + pci_dev_put(north); + pci_dev_put(isa_dev); +- local_irq_restore(flags); + return 0; + } + diff --git a/debian/patches/features/all/rt/apparmor-use-a-locallock-instead-preempt_disable.patch b/debian/patches/features/all/rt/apparmor-use-a-locallock-instead-preempt_disable.patch index a9643f99f..650ac6ae2 100644 --- a/debian/patches/features/all/rt/apparmor-use-a-locallock-instead-preempt_disable.patch +++ b/debian/patches/features/all/rt/apparmor-use-a-locallock-instead-preempt_disable.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 11 Oct 2017 17:43:49 +0200 Subject: apparmor: use a locallock instead preempt_disable() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz get_buffers() disables preemption which acts as a lock for the per-CPU variable. Since we can't disable preemption here on RT, a local_lock is diff --git a/debian/patches/features/all/rt/arch-arm64-Add-lazy-preempt-support.patch b/debian/patches/features/all/rt/arch-arm64-Add-lazy-preempt-support.patch index 4ee962fa3..56ffb2f13 100644 --- a/debian/patches/features/all/rt/arch-arm64-Add-lazy-preempt-support.patch +++ b/debian/patches/features/all/rt/arch-arm64-Add-lazy-preempt-support.patch @@ -1,7 +1,7 @@ From: Anders Roxell Date: Thu, 14 May 2015 17:52:17 +0200 Subject: arch/arm64: Add lazy preempt support -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz arm64 is missing support for PREEMPT_RT. The main feature which is lacking is support for lazy preemption. The arch-specific entry code, @@ -21,7 +21,7 @@ Signed-off-by: Anders Roxell --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig -@@ -103,6 +103,7 @@ config ARM64 +@@ -123,6 +123,7 @@ config ARM64 select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -39,7 +39,7 @@ Signed-off-by: Anders Roxell }; #define INIT_THREAD_INFO(tsk) \ -@@ -82,6 +83,7 @@ void arch_setup_new_exec(void); +@@ -82,6 +83,7 @@ void arch_release_task_struct(struct tas #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ #define TIF_FSCHECK 5 /* Check FS is USER_DS on return */ @@ -47,7 +47,7 @@ Signed-off-by: Anders Roxell #define TIF_NOHZ 7 #define TIF_SYSCALL_TRACE 8 #define TIF_SYSCALL_AUDIT 9 -@@ -97,6 +99,7 @@ void arch_setup_new_exec(void); +@@ -99,6 +101,7 @@ void arch_release_task_struct(struct tas #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) @@ -55,7 +55,7 @@ Signed-off-by: Anders Roxell #define _TIF_NOHZ (1 << TIF_NOHZ) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -@@ -108,8 +111,9 @@ void arch_setup_new_exec(void); +@@ -111,8 +114,9 @@ void arch_release_task_struct(struct tas #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ @@ -68,7 +68,7 @@ Signed-off-by: Anders Roxell _TIF_NOHZ) --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c -@@ -38,6 +38,7 @@ int main(void) +@@ -40,6 +40,7 @@ int main(void) BLANK(); DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); @@ -78,7 +78,7 @@ Signed-off-by: Anders Roxell DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S -@@ -570,11 +570,16 @@ ENDPROC(el1_sync) +@@ -603,11 +603,16 @@ ENDPROC(el1_sync) #ifdef CONFIG_PREEMPT ldr w24, [tsk, #TSK_TI_PREEMPT] // get preempt count @@ -98,7 +98,7 @@ Signed-off-by: Anders Roxell #endif #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_on -@@ -588,6 +593,7 @@ ENDPROC(el1_irq) +@@ -621,6 +626,7 @@ ENDPROC(el1_irq) 1: bl preempt_schedule_irq // irq en/disable is done inside ldr x0, [tsk, #TSK_TI_FLAGS] // get new tasks TI_FLAGS tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? @@ -108,12 +108,12 @@ Signed-off-by: Anders Roxell --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c -@@ -755,7 +755,7 @@ asmlinkage void do_notify_resume(struct +@@ -912,7 +912,7 @@ asmlinkage void do_notify_resume(struct /* Check valid user FS if needed */ addr_limit_user_check(); - if (thread_flags & _TIF_NEED_RESCHED) { + if (thread_flags & _TIF_NEED_RESCHED_MASK) { - schedule(); - } else { - local_irq_enable(); + /* Unmask Debug and SError for the next task */ + local_daif_restore(DAIF_PROCCTX_NOIRQ); + diff --git a/debian/patches/features/all/rt/arm-convert-boot-lock-to-raw.patch b/debian/patches/features/all/rt/arm-convert-boot-lock-to-raw.patch index beff4f24b..c4cf7d162 100644 --- a/debian/patches/features/all/rt/arm-convert-boot-lock-to-raw.patch +++ b/debian/patches/features/all/rt/arm-convert-boot-lock-to-raw.patch @@ -1,7 +1,7 @@ From: Frank Rowand Date: Mon, 19 Sep 2011 14:51:14 -0700 Subject: arm: Convert arm boot_lock to raw -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The arm boot_lock is used by the secondary processor startup code. The locking task is the idle thread, which has idle->sched_class == &idle_sched_class. @@ -32,7 +32,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/arm/mach-exynos/platsmp.c +++ b/arch/arm/mach-exynos/platsmp.c -@@ -229,7 +229,7 @@ static void __iomem *scu_base_addr(void) +@@ -224,7 +224,7 @@ static void __iomem *scu_base_addr(void) return (void __iomem *)(S5P_VA_SCU); } @@ -41,7 +41,7 @@ Signed-off-by: Thomas Gleixner static void exynos_secondary_init(unsigned int cpu) { -@@ -242,8 +242,8 @@ static void exynos_secondary_init(unsign +@@ -237,8 +237,8 @@ static void exynos_secondary_init(unsign /* * Synchronise with the boot thread. */ @@ -52,7 +52,7 @@ Signed-off-by: Thomas Gleixner } int exynos_set_boot_addr(u32 core_id, unsigned long boot_addr) -@@ -307,7 +307,7 @@ static int exynos_boot_secondary(unsigne +@@ -302,7 +302,7 @@ static int exynos_boot_secondary(unsigne * Set synchronisation state between this boot processor * and the secondary one */ @@ -61,7 +61,7 @@ Signed-off-by: Thomas Gleixner /* * The secondary processor is waiting to be released from -@@ -334,7 +334,7 @@ static int exynos_boot_secondary(unsigne +@@ -329,7 +329,7 @@ static int exynos_boot_secondary(unsigne if (timeout == 0) { printk(KERN_ERR "cpu1 power enable failed"); @@ -70,7 +70,7 @@ Signed-off-by: Thomas Gleixner return -ETIMEDOUT; } } -@@ -380,7 +380,7 @@ static int exynos_boot_secondary(unsigne +@@ -375,7 +375,7 @@ static int exynos_boot_secondary(unsigne * calibrations, then wait for it to finish */ fail: diff --git a/debian/patches/features/all/rt/arm-disable-NEON-in-kernel-mode.patch b/debian/patches/features/all/rt/arm-disable-NEON-in-kernel-mode.patch index 5521f7f48..c3dad2411 100644 --- a/debian/patches/features/all/rt/arm-disable-NEON-in-kernel-mode.patch +++ b/debian/patches/features/all/rt/arm-disable-NEON-in-kernel-mode.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Fri, 1 Dec 2017 10:42:03 +0100 Subject: [PATCH] arm*: disable NEON in kernel mode -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz NEON in kernel mode is used by the crypto algorithms and raid6 code. While the raid6 code looks okay, the crypto algorithms do not: NEON @@ -15,13 +15,13 @@ Cc: stable-rt@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior --- arch/arm/Kconfig | 2 +- - arch/arm64/crypto/Kconfig | 20 ++++++++++---------- + arch/arm64/crypto/Kconfig | 26 +++++++++++++------------- arch/arm64/crypto/crc32-ce-glue.c | 3 ++- - 3 files changed, 13 insertions(+), 12 deletions(-) + 3 files changed, 16 insertions(+), 15 deletions(-) --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig -@@ -2164,7 +2164,7 @@ config NEON +@@ -2166,7 +2166,7 @@ config NEON config KERNEL_MODE_NEON bool "Support for NEON in kernel mode" @@ -32,7 +32,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig -@@ -19,19 +19,19 @@ config CRYPTO_SHA512_ARM64 +@@ -19,37 +19,37 @@ config CRYPTO_SHA512_ARM64 config CRYPTO_SHA1_ARM64_CE tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)" @@ -48,6 +48,27 @@ Signed-off-by: Sebastian Andrzej Siewior select CRYPTO_HASH select CRYPTO_SHA256_ARM64 + config CRYPTO_SHA512_ARM64_CE + tristate "SHA-384/SHA-512 digest algorithm (ARMv8 Crypto Extensions)" +- depends on KERNEL_MODE_NEON ++ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE + select CRYPTO_HASH + select CRYPTO_SHA512_ARM64 + + config CRYPTO_SHA3_ARM64 + tristate "SHA3 digest algorithm (ARMv8.2 Crypto Extensions)" +- depends on KERNEL_MODE_NEON ++ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE + select CRYPTO_HASH + select CRYPTO_SHA3 + + config CRYPTO_SM3_ARM64_CE + tristate "SM3 digest algorithm (ARMv8.2 Crypto Extensions)" +- depends on KERNEL_MODE_NEON ++ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE + select CRYPTO_HASH + select CRYPTO_SM3 + config CRYPTO_GHASH_ARM64_CE tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions" - depends on KERNEL_MODE_NEON @@ -55,7 +76,7 @@ Signed-off-by: Sebastian Andrzej Siewior select CRYPTO_HASH select CRYPTO_GF128MUL select CRYPTO_AES -@@ -39,7 +39,7 @@ config CRYPTO_GHASH_ARM64_CE +@@ -57,7 +57,7 @@ config CRYPTO_GHASH_ARM64_CE config CRYPTO_CRCT10DIF_ARM64_CE tristate "CRCT10DIF digest algorithm using PMULL instructions" @@ -64,7 +85,7 @@ Signed-off-by: Sebastian Andrzej Siewior select CRYPTO_HASH config CRYPTO_CRC32_ARM64_CE -@@ -53,13 +53,13 @@ config CRYPTO_AES_ARM64 +@@ -71,13 +71,13 @@ config CRYPTO_AES_ARM64 config CRYPTO_AES_ARM64_CE tristate "AES core cipher using ARMv8 Crypto Extensions" @@ -80,7 +101,7 @@ Signed-off-by: Sebastian Andrzej Siewior select CRYPTO_ALGAPI select CRYPTO_AES_ARM64_CE select CRYPTO_AES_ARM64 -@@ -67,7 +67,7 @@ config CRYPTO_AES_ARM64_CE_CCM +@@ -85,7 +85,7 @@ config CRYPTO_AES_ARM64_CE_CCM config CRYPTO_AES_ARM64_CE_BLK tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions" @@ -89,7 +110,7 @@ Signed-off-by: Sebastian Andrzej Siewior select CRYPTO_BLKCIPHER select CRYPTO_AES_ARM64_CE select CRYPTO_AES_ARM64 -@@ -75,7 +75,7 @@ config CRYPTO_AES_ARM64_CE_BLK +@@ -93,7 +93,7 @@ config CRYPTO_AES_ARM64_CE_BLK config CRYPTO_AES_ARM64_NEON_BLK tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions" @@ -98,7 +119,7 @@ Signed-off-by: Sebastian Andrzej Siewior select CRYPTO_BLKCIPHER select CRYPTO_AES_ARM64 select CRYPTO_AES -@@ -83,13 +83,13 @@ config CRYPTO_AES_ARM64_NEON_BLK +@@ -101,13 +101,13 @@ config CRYPTO_AES_ARM64_NEON_BLK config CRYPTO_CHACHA20_NEON tristate "NEON accelerated ChaCha20 symmetric cipher" @@ -116,7 +137,7 @@ Signed-off-by: Sebastian Andrzej Siewior select CRYPTO_AES_ARM64 --- a/arch/arm64/crypto/crc32-ce-glue.c +++ b/arch/arm64/crypto/crc32-ce-glue.c -@@ -206,7 +206,8 @@ static struct shash_alg crc32_pmull_algs +@@ -208,7 +208,8 @@ static struct shash_alg crc32_pmull_algs static int __init crc32_pmull_mod_init(void) { diff --git a/debian/patches/features/all/rt/arm-enable-highmem-for-rt.patch b/debian/patches/features/all/rt/arm-enable-highmem-for-rt.patch index 148b9fa94..8f9875086 100644 --- a/debian/patches/features/all/rt/arm-enable-highmem-for-rt.patch +++ b/debian/patches/features/all/rt/arm-enable-highmem-for-rt.patch @@ -1,7 +1,7 @@ Subject: arm: Enable highmem for rt From: Thomas Gleixner Date: Wed, 13 Feb 2013 11:03:11 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz fixup highmem for ARM. diff --git a/debian/patches/features/all/rt/arm-highmem-flush-tlb-on-unmap.patch b/debian/patches/features/all/rt/arm-highmem-flush-tlb-on-unmap.patch index a6b22f0be..f4a829c2c 100644 --- a/debian/patches/features/all/rt/arm-highmem-flush-tlb-on-unmap.patch +++ b/debian/patches/features/all/rt/arm-highmem-flush-tlb-on-unmap.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Mon, 11 Mar 2013 21:37:27 +0100 Subject: arm/highmem: Flush tlb on unmap -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The tlb should be flushed on unmap and thus make the mapping entry invalid. This is only done in the non-debug case which does not look diff --git a/debian/patches/features/all/rt/arm-include-definition-for-cpumask_t.patch b/debian/patches/features/all/rt/arm-include-definition-for-cpumask_t.patch index 5e97cac72..e2ae846ec 100644 --- a/debian/patches/features/all/rt/arm-include-definition-for-cpumask_t.patch +++ b/debian/patches/features/all/rt/arm-include-definition-for-cpumask_t.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 22 Dec 2016 17:28:33 +0100 Subject: [PATCH] arm: include definition for cpumask_t -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz This definition gets pulled in by other files. With the (later) split of RCU and spinlock.h it won't compile anymore. diff --git a/debian/patches/features/all/rt/arm-kprobe-replace-patch_lock-to-raw-lock.patch b/debian/patches/features/all/rt/arm-kprobe-replace-patch_lock-to-raw-lock.patch index 9ba0859a9..a9f426d17 100644 --- a/debian/patches/features/all/rt/arm-kprobe-replace-patch_lock-to-raw-lock.patch +++ b/debian/patches/features/all/rt/arm-kprobe-replace-patch_lock-to-raw-lock.patch @@ -1,7 +1,7 @@ From: Yang Shi Date: Thu, 10 Nov 2016 16:17:55 -0800 Subject: [PATCH] arm: kprobe: replace patch_lock to raw lock -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz When running kprobe on -rt kernel, the below bug is caught: diff --git a/debian/patches/features/all/rt/arm-preempt-lazy-support.patch b/debian/patches/features/all/rt/arm-preempt-lazy-support.patch index 89d41db4b..d287cc029 100644 --- a/debian/patches/features/all/rt/arm-preempt-lazy-support.patch +++ b/debian/patches/features/all/rt/arm-preempt-lazy-support.patch @@ -1,7 +1,7 @@ Subject: arm: Add support for lazy preemption From: Thomas Gleixner Date: Wed, 31 Oct 2012 12:04:11 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Implement the arm pieces for lazy preempt. @@ -17,7 +17,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig -@@ -85,6 +85,7 @@ config ARM +@@ -88,6 +88,7 @@ config ARM select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -35,7 +35,7 @@ Signed-off-by: Thomas Gleixner mm_segment_t addr_limit; /* address limit */ struct task_struct *task; /* main task structure */ __u32 cpu; /* cpu */ -@@ -142,7 +143,8 @@ extern int vfp_restore_user_hwstate(stru +@@ -139,7 +140,8 @@ extern int vfp_restore_user_hwstate(stru #define TIF_SYSCALL_TRACE 4 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ @@ -45,7 +45,7 @@ Signed-off-by: Thomas Gleixner #define TIF_NOHZ 12 /* in adaptive nohz mode */ #define TIF_USING_IWMMXT 17 -@@ -152,6 +154,7 @@ extern int vfp_restore_user_hwstate(stru +@@ -149,6 +151,7 @@ extern int vfp_restore_user_hwstate(stru #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -53,7 +53,7 @@ Signed-off-by: Thomas Gleixner #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -@@ -167,7 +170,8 @@ extern int vfp_restore_user_hwstate(stru +@@ -164,7 +167,8 @@ extern int vfp_restore_user_hwstate(stru * Change these and you break ASM code in entry-common.S */ #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ @@ -65,7 +65,7 @@ Signed-off-by: Thomas Gleixner #endif /* __ASM_ARM_THREAD_INFO_H */ --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c -@@ -65,6 +65,7 @@ int main(void) +@@ -67,6 +67,7 @@ int main(void) BLANK(); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); @@ -75,7 +75,7 @@ Signed-off-by: Thomas Gleixner DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S -@@ -220,11 +220,18 @@ ENDPROC(__dabt_svc) +@@ -216,11 +216,18 @@ ENDPROC(__dabt_svc) #ifdef CONFIG_PREEMPT ldr r8, [tsk, #TI_PREEMPT] @ get preempt count @@ -96,7 +96,7 @@ Signed-off-by: Thomas Gleixner #endif svc_exit r5, irq = 1 @ return from exception -@@ -239,8 +246,14 @@ ENDPROC(__irq_svc) +@@ -235,8 +242,14 @@ ENDPROC(__irq_svc) 1: bl preempt_schedule_irq @ irq en/disable is done inside ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS tst r0, #_TIF_NEED_RESCHED @@ -114,7 +114,7 @@ Signed-off-by: Thomas Gleixner __und_fault: --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S -@@ -53,7 +53,9 @@ saved_pc .req lr +@@ -54,7 +54,9 @@ saved_pc .req lr cmp r2, #TASK_SIZE blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing @@ -125,7 +125,7 @@ Signed-off-by: Thomas Gleixner bne fast_work_pending -@@ -83,8 +85,11 @@ ENDPROC(ret_fast_syscall) +@@ -84,8 +86,11 @@ ENDPROC(ret_fast_syscall) cmp r2, #TASK_SIZE blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing @@ -140,7 +140,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c -@@ -615,7 +615,8 @@ do_work_pending(struct pt_regs *regs, un +@@ -638,7 +638,8 @@ do_work_pending(struct pt_regs *regs, un */ trace_hardirqs_off(); do { diff --git a/debian/patches/features/all/rt/arm-unwind-use_raw_lock.patch b/debian/patches/features/all/rt/arm-unwind-use_raw_lock.patch index 5ceb310e8..14655268a 100644 --- a/debian/patches/features/all/rt/arm-unwind-use_raw_lock.patch +++ b/debian/patches/features/all/rt/arm-unwind-use_raw_lock.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Fri, 20 Sep 2013 14:31:54 +0200 Subject: arm/unwind: use a raw_spin_lock -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Mostly unwind is done with irqs enabled however SLUB may call it with irqs disabled while creating a new SLUB cache. diff --git a/debian/patches/features/all/rt/arm64-xen--Make-XEN-depend-on-non-rt.patch b/debian/patches/features/all/rt/arm64-xen--Make-XEN-depend-on-non-rt.patch index 6f9c4dd22..e9bd6a4e6 100644 --- a/debian/patches/features/all/rt/arm64-xen--Make-XEN-depend-on-non-rt.patch +++ b/debian/patches/features/all/rt/arm64-xen--Make-XEN-depend-on-non-rt.patch @@ -1,7 +1,7 @@ Subject: arm64/xen: Make XEN depend on !RT From: Thomas Gleixner Date: Mon, 12 Oct 2015 11:18:40 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz It's not ready and probably never will be, unless xen folks have a look at it. @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig -@@ -774,7 +774,7 @@ config XEN_DOM0 +@@ -846,7 +846,7 @@ config XEN_DOM0 config XEN bool "Xen guest support on ARM64" diff --git a/debian/patches/features/all/rt/at91_dont_enable_disable_clock.patch b/debian/patches/features/all/rt/at91_dont_enable_disable_clock.patch index 92bdf1fc8..d30357daf 100644 --- a/debian/patches/features/all/rt/at91_dont_enable_disable_clock.patch +++ b/debian/patches/features/all/rt/at91_dont_enable_disable_clock.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 09 Mar 2016 10:51:06 +0100 Subject: arm: at91: do not disable/enable clocks in a row -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Currently the driver will disable the clock and enable it one line later if it is switching from periodic mode into one shot. diff --git a/debian/patches/features/all/rt/block-Remove-redundant-WARN_ON.patch b/debian/patches/features/all/rt/block-Remove-redundant-WARN_ON.patch new file mode 100644 index 000000000..08acbbc09 --- /dev/null +++ b/debian/patches/features/all/rt/block-Remove-redundant-WARN_ON.patch @@ -0,0 +1,28 @@ +From: Anna-Maria Gleixner +Date: Wed, 11 Apr 2018 15:05:43 +0200 +Subject: [PATCH] block: Remove redundant WARN_ON() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Commit 2fff8a924d4c ("block: Check locking assumptions at runtime") added a +lockdep_assert_held(q->queue_lock) which makes the WARN_ON() redundant +because lockdep will detect and warn about context violations. + +The unconditional WARN_ON() does not provide real additional value, so it +can be removed. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + block/blk-core.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -288,7 +288,6 @@ EXPORT_SYMBOL(blk_start_queue_async); + void blk_start_queue(struct request_queue *q) + { + lockdep_assert_held(q->queue_lock); +- WARN_ON(!in_interrupt() && !irqs_disabled()); + WARN_ON_ONCE(q->mq_ops); + + queue_flag_clear(QUEUE_FLAG_STOPPED, q); diff --git a/debian/patches/features/all/rt/block-avoid-disabling-interrupts-during-kmap_atomic.patch b/debian/patches/features/all/rt/block-avoid-disabling-interrupts-during-kmap_atomic.patch new file mode 100644 index 000000000..4b9ba26fb --- /dev/null +++ b/debian/patches/features/all/rt/block-avoid-disabling-interrupts-during-kmap_atomic.patch @@ -0,0 +1,39 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 10 Apr 2018 17:32:57 +0200 +Subject: [PATCH] block: don't disable interrupts during kmap_atomic() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +bounce_copy_vec() disables interrupts around kmap_atomic(). This is a +leftover from the old kmap_atomic() implementation which relied on fixed +mapping slots, so the caller had to make sure that the same slot could not +be reused from an interrupting context. + +kmap_atomic() was changed to dynamic slots long ago and commit 1ec9c5ddc17a +("include/linux/highmem.h: remove the second argument of k[un]map_atomic()") +removed the slot assignements, but the callers were not checked for now +redundant interrupt disabling. + +Remove the conditional interrupt disable. + +Signed-off-by: Sebastian Andrzej Siewior +--- + block/bounce.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/block/bounce.c ++++ b/block/bounce.c +@@ -63,14 +63,11 @@ static __init int init_emergency_pool(vo + */ + static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom) + { +- unsigned long flags; + unsigned char *vto; + +- local_irq_save(flags); + vto = kmap_atomic(to->bv_page); + memcpy(vto + to->bv_offset, vfrom, to->bv_len); + kunmap_atomic(vto); +- local_irq_restore(flags); + } + + #else /* CONFIG_HIGHMEM */ diff --git a/debian/patches/features/all/rt/block-blk-mq-move-blk_queue_usage_counter_release-in.patch b/debian/patches/features/all/rt/block-blk-mq-move-blk_queue_usage_counter_release-in.patch new file mode 100644 index 000000000..033248c7b --- /dev/null +++ b/debian/patches/features/all/rt/block-blk-mq-move-blk_queue_usage_counter_release-in.patch @@ -0,0 +1,112 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 13 Mar 2018 13:49:16 +0100 +Subject: [PATCH] block: blk-mq: move blk_queue_usage_counter_release() + into process context +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +| BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:914 +| in_atomic(): 1, irqs_disabled(): 0, pid: 255, name: kworker/u257:6 +| 5 locks held by kworker/u257:6/255: +| #0: ("events_unbound"){.+.+.+}, at: [] process_one_work+0x171/0x5e0 +| #1: ((&entry->work)){+.+.+.}, at: [] process_one_work+0x171/0x5e0 +| #2: (&shost->scan_mutex){+.+.+.}, at: [] __scsi_add_device+0xa3/0x130 [scsi_mod] +| #3: (&set->tag_list_lock){+.+...}, at: [] blk_mq_init_queue+0x96a/0xa50 +| #4: (rcu_read_lock_sched){......}, at: [] percpu_ref_kill_and_confirm+0x1d/0x120 +| Preemption disabled at:[] blk_mq_freeze_queue_start+0x56/0x70 +| +| CPU: 2 PID: 255 Comm: kworker/u257:6 Not tainted 3.18.7-rt0+ #1 +| Workqueue: events_unbound async_run_entry_fn +| 0000000000000003 ffff8800bc29f998 ffffffff815b3a12 0000000000000000 +| 0000000000000000 ffff8800bc29f9b8 ffffffff8109aa16 ffff8800bc29fa28 +| ffff8800bc5d1bc8 ffff8800bc29f9e8 ffffffff815b8dd4 ffff880000000000 +| Call Trace: +| [] dump_stack+0x4f/0x7c +| [] __might_sleep+0x116/0x190 +| [] rt_spin_lock+0x24/0x60 +| [] __wake_up+0x29/0x60 +| [] blk_mq_usage_counter_release+0x1e/0x20 +| [] percpu_ref_kill_and_confirm+0x106/0x120 +| [] blk_mq_freeze_queue_start+0x56/0x70 +| [] blk_mq_update_tag_set_depth+0x40/0xd0 +| [] blk_mq_init_queue+0x98c/0xa50 +| [] scsi_mq_alloc_queue+0x20/0x60 [scsi_mod] +| [] scsi_alloc_sdev+0x2f5/0x370 [scsi_mod] +| [] scsi_probe_and_add_lun+0x9e4/0xdd0 [scsi_mod] +| [] __scsi_add_device+0x126/0x130 [scsi_mod] +| [] ata_scsi_scan_host+0xaf/0x200 [libata] +| [] async_port_probe+0x46/0x60 [libata] +| [] async_run_entry_fn+0x3b/0xf0 +| [] process_one_work+0x201/0x5e0 + +percpu_ref_kill_and_confirm() invokes blk_mq_usage_counter_release() in +a rcu-sched region. swait based wake queue can't be used due to +wake_up_all() usage and disabled interrupts in !RT configs (as reported +by Corey Minyard). +The wq_has_sleeper() check has been suggested by Peter Zijlstra. + +Signed-off-by: Sebastian Andrzej Siewior +--- + block/blk-core.c | 14 +++++++++++++- + include/linux/blkdev.h | 2 ++ + 2 files changed, 15 insertions(+), 1 deletion(-) + +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -876,12 +876,21 @@ void blk_queue_exit(struct request_queue + percpu_ref_put(&q->q_usage_counter); + } + ++static void blk_queue_usage_counter_release_swork(struct swork_event *sev) ++{ ++ struct request_queue *q = ++ container_of(sev, struct request_queue, mq_pcpu_wake); ++ ++ wake_up_all(&q->mq_freeze_wq); ++} ++ + static void blk_queue_usage_counter_release(struct percpu_ref *ref) + { + struct request_queue *q = + container_of(ref, struct request_queue, q_usage_counter); + +- wake_up_all(&q->mq_freeze_wq); ++ if (wq_has_sleeper(&q->mq_freeze_wq)) ++ swork_queue(&q->mq_pcpu_wake); + } + + static void blk_rq_timed_out_timer(struct timer_list *t) +@@ -958,6 +967,7 @@ struct request_queue *blk_alloc_queue_no + __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); + + init_waitqueue_head(&q->mq_freeze_wq); ++ INIT_SWORK(&q->mq_pcpu_wake, blk_queue_usage_counter_release_swork); + + /* + * Init percpu_ref in atomic mode so that it's faster to shutdown. +@@ -3838,6 +3848,8 @@ int __init blk_dev_init(void) + if (!kblockd_workqueue) + panic("Failed to create kblockd\n"); + ++ BUG_ON(swork_get()); ++ + request_cachep = kmem_cache_create("blkdev_requests", + sizeof(struct request), 0, SLAB_PANIC, NULL); + +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -653,6 +654,7 @@ struct request_queue { + #endif + struct rcu_head rcu_head; + wait_queue_head_t mq_freeze_wq; ++ struct swork_event mq_pcpu_wake; + struct percpu_ref q_usage_counter; + struct list_head all_q_node; + diff --git a/debian/patches/features/all/rt/block-mq-don-t-complete-requests-via-IPI.patch b/debian/patches/features/all/rt/block-mq-don-t-complete-requests-via-IPI.patch index 5f1fe70e9..d64533dc1 100644 --- a/debian/patches/features/all/rt/block-mq-don-t-complete-requests-via-IPI.patch +++ b/debian/patches/features/all/rt/block-mq-don-t-complete-requests-via-IPI.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 29 Jan 2015 15:10:08 +0100 Subject: block/mq: don't complete requests via IPI -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The IPI runs in hardirq context and there are sleeping locks. This patch moves the completion into a workqueue. @@ -16,7 +16,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/block/blk-core.c +++ b/block/blk-core.c -@@ -116,6 +116,9 @@ void blk_rq_init(struct request_queue *q +@@ -117,6 +117,9 @@ void blk_rq_init(struct request_queue *q INIT_LIST_HEAD(&rq->queuelist); INIT_LIST_HEAD(&rq->timeout_list); @@ -28,9 +28,9 @@ Signed-off-by: Sebastian Andrzej Siewior rq->__sector = (sector_t) -1; --- a/block/blk-mq.c +++ b/block/blk-mq.c -@@ -318,6 +318,9 @@ static struct request *blk_mq_rq_ctx_ini - /* tag was already set */ +@@ -311,6 +311,9 @@ static struct request *blk_mq_rq_ctx_ini rq->extra_len = 0; + rq->__deadline = 0; +#ifdef CONFIG_PREEMPT_RT_FULL + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work); @@ -38,7 +38,7 @@ Signed-off-by: Sebastian Andrzej Siewior INIT_LIST_HEAD(&rq->timeout_list); rq->timeout = 0; -@@ -512,12 +515,24 @@ void blk_mq_end_request(struct request * +@@ -518,12 +521,24 @@ void blk_mq_end_request(struct request * } EXPORT_SYMBOL(blk_mq_end_request); @@ -63,7 +63,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void __blk_mq_complete_request(struct request *rq) { -@@ -542,10 +557,18 @@ static void __blk_mq_complete_request(st +@@ -551,10 +566,18 @@ static void __blk_mq_complete_request(st shared = cpus_share_cache(cpu, ctx->cpu); if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) { @@ -84,7 +84,7 @@ Signed-off-by: Sebastian Andrzej Siewior } --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h -@@ -226,7 +226,7 @@ static inline u16 blk_mq_unique_tag_to_t +@@ -245,7 +245,7 @@ static inline u16 blk_mq_unique_tag_to_t return unique_tag & BLK_MQ_UNIQUE_TAG_MASK; } @@ -95,13 +95,13 @@ Signed-off-by: Sebastian Andrzej Siewior void blk_mq_end_request(struct request *rq, blk_status_t error); --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h -@@ -134,6 +134,9 @@ typedef __u32 __bitwise req_flags_t; +@@ -142,6 +142,9 @@ typedef __u32 __bitwise req_flags_t; */ struct request { - struct list_head queuelist; + struct request_queue *q; +#ifdef CONFIG_PREEMPT_RT_FULL + struct work_struct work; +#endif - union { - struct __call_single_data csd; - u64 fifo_time; + struct blk_mq_ctx *mq_ctx; + + int cpu; diff --git a/debian/patches/features/all/rt/block-mq-drop-preempt-disable.patch b/debian/patches/features/all/rt/block-mq-drop-preempt-disable.patch index f6248f072..bb355ad73 100644 --- a/debian/patches/features/all/rt/block-mq-drop-preempt-disable.patch +++ b/debian/patches/features/all/rt/block-mq-drop-preempt-disable.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Tue, 14 Jul 2015 14:26:34 +0200 Subject: block/mq: do not invoke preempt_disable() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz preempt_disable() and get_cpu() don't play well together with the sleeping locks it tries to allocate later. @@ -14,7 +14,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/block/blk-mq.c +++ b/block/blk-mq.c -@@ -537,7 +537,7 @@ static void __blk_mq_complete_request(st +@@ -546,7 +546,7 @@ static void __blk_mq_complete_request(st return; } @@ -23,7 +23,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags)) shared = cpus_share_cache(cpu, ctx->cpu); -@@ -549,7 +549,7 @@ static void __blk_mq_complete_request(st +@@ -558,7 +558,7 @@ static void __blk_mq_complete_request(st } else { rq->q->softirq_done_fn(rq); } @@ -31,8 +31,8 @@ Signed-off-by: Sebastian Andrzej Siewior + put_cpu_light(); } - /** -@@ -1197,14 +1197,14 @@ static void __blk_mq_delay_run_hw_queue( + static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx) +@@ -1405,14 +1405,14 @@ static void __blk_mq_delay_run_hw_queue( return; if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) { @@ -49,4 +49,4 @@ Signed-off-by: Sebastian Andrzej Siewior + put_cpu_light(); } - kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx), + kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work, diff --git a/debian/patches/features/all/rt/block-mq-use-cpu_light.patch b/debian/patches/features/all/rt/block-mq-use-cpu_light.patch index 89a3fbb67..ddfc89585 100644 --- a/debian/patches/features/all/rt/block-mq-use-cpu_light.patch +++ b/debian/patches/features/all/rt/block-mq-use-cpu_light.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 9 Apr 2014 10:37:23 +0200 Subject: block: mq: use cpu_light() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz there is a might sleep splat because get_cpu() disables preemption and later we grab a lock. As a workaround for this we use get_cpu_light(). @@ -13,7 +13,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/block/blk-mq.h +++ b/block/blk-mq.h -@@ -98,12 +98,12 @@ static inline struct blk_mq_ctx *__blk_m +@@ -147,12 +147,12 @@ static inline struct blk_mq_ctx *__blk_m */ static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q) { diff --git a/debian/patches/features/all/rt/block-shorten-interrupt-disabled-regions.patch b/debian/patches/features/all/rt/block-shorten-interrupt-disabled-regions.patch index 3628f801f..93c2eb83e 100644 --- a/debian/patches/features/all/rt/block-shorten-interrupt-disabled-regions.patch +++ b/debian/patches/features/all/rt/block-shorten-interrupt-disabled-regions.patch @@ -1,40 +1,38 @@ Subject: block: Shorten interrupt disabled regions From: Thomas Gleixner Date: Wed, 22 Jun 2011 19:47:02 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz -Moving the blk_sched_flush_plug() call out of the interrupt/preempt -disabled region in the scheduler allows us to replace +Commit 9c40cef2b799 ("sched: Move blk_schedule_flush_plug() out of +__schedule()") moved the blk_schedule_flush_plug() call out of the +interrupt/preempt disabled region in the scheduler. This allows to replace local_irq_save/restore(flags) by local_irq_disable/enable() in -blk_flush_plug(). +blk_flush_plug_list(). -Now instead of doing this we disable interrupts explicitely when we -lock the request_queue and reenable them when we drop the lock. That -allows interrupts to be handled when the plug list contains requests -for more than one queue. - -Aside of that this change makes the scope of the irq disabled region -more obvious. The current code confused the hell out of me when -looking at: +But it makes more sense to disable interrupts explicitly when the request +queue is locked end reenable them when the request to is unlocked. This +shortens the interrupt disabled section which is important when the plug +list contains requests for more than one queue. The comment which claims +that disabling interrupts around the loop is misleading as the called +functions can reenable interrupts unconditionally anyway and obfuscates the +scope badly: local_irq_save(flags); spin_lock(q->queue_lock); ... queue_unplugged(q...); scsi_request_fn(); - spin_unlock(q->queue_lock); - spin_lock(shost->host_lock); - spin_unlock_irq(shost->host_lock); + spin_unlock_irq(q->queue_lock); -------------------^^^ ???? spin_lock_irq(q->queue_lock); - spin_unlock(q->lock); + spin_unlock(q->queue_lock); local_irq_restore(flags); -Also add a comment to __blk_run_queue() documenting that -q->request_fn() can drop q->queue_lock and reenable interrupts, but -must return with q->queue_lock held and interrupts disabled. +Aside of that the detached interrupt disabling is a constant pain for +PREEMPT_RT as it requires patching and special casing when RT is enabled +while with the spin_*_irq() variants this happens automatically. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra @@ -48,7 +46,7 @@ Link: http://lkml.kernel.org/r/20110622174919.025446432@linutronix.de --- a/block/blk-core.c +++ b/block/blk-core.c -@@ -3291,7 +3291,7 @@ static void queue_unplugged(struct reque +@@ -3520,7 +3520,7 @@ static void queue_unplugged(struct reque blk_run_queue_async(q); else __blk_run_queue(q); @@ -57,7 +55,7 @@ Link: http://lkml.kernel.org/r/20110622174919.025446432@linutronix.de } static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule) -@@ -3339,7 +3339,6 @@ EXPORT_SYMBOL(blk_check_plugged); +@@ -3568,7 +3568,6 @@ EXPORT_SYMBOL(blk_check_plugged); void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) { struct request_queue *q; @@ -65,7 +63,7 @@ Link: http://lkml.kernel.org/r/20110622174919.025446432@linutronix.de struct request *rq; LIST_HEAD(list); unsigned int depth; -@@ -3359,11 +3358,6 @@ void blk_flush_plug_list(struct blk_plug +@@ -3588,11 +3587,6 @@ void blk_flush_plug_list(struct blk_plug q = NULL; depth = 0; @@ -77,7 +75,7 @@ Link: http://lkml.kernel.org/r/20110622174919.025446432@linutronix.de while (!list_empty(&list)) { rq = list_entry_rq(list.next); list_del_init(&rq->queuelist); -@@ -3376,7 +3370,7 @@ void blk_flush_plug_list(struct blk_plug +@@ -3605,7 +3599,7 @@ void blk_flush_plug_list(struct blk_plug queue_unplugged(q, depth, from_schedule); q = rq->q; depth = 0; @@ -86,7 +84,7 @@ Link: http://lkml.kernel.org/r/20110622174919.025446432@linutronix.de } /* -@@ -3403,8 +3397,6 @@ void blk_flush_plug_list(struct blk_plug +@@ -3632,8 +3626,6 @@ void blk_flush_plug_list(struct blk_plug */ if (q) queue_unplugged(q, depth, from_schedule); diff --git a/debian/patches/features/all/rt/block-use-cpu-chill.patch b/debian/patches/features/all/rt/block-use-cpu-chill.patch index 683efd128..a1d83f7c7 100644 --- a/debian/patches/features/all/rt/block-use-cpu-chill.patch +++ b/debian/patches/features/all/rt/block-use-cpu-chill.patch @@ -1,7 +1,7 @@ Subject: block: Use cpu_chill() for retry loops From: Thomas Gleixner Date: Thu, 20 Dec 2012 18:28:26 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Retry loops on RT might loop forever when the modifying side was preempted. Steven also observed a live lock when there was a diff --git a/debian/patches/features/all/rt/cgroups-use-simple-wait-in-css_release.patch b/debian/patches/features/all/rt/cgroups-use-simple-wait-in-css_release.patch index 69cee582e..03bfa36b2 100644 --- a/debian/patches/features/all/rt/cgroups-use-simple-wait-in-css_release.patch +++ b/debian/patches/features/all/rt/cgroups-use-simple-wait-in-css_release.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Fri, 13 Feb 2015 15:52:24 +0100 Subject: cgroups: use simple wait in css_release() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz To avoid: |BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:914 @@ -35,15 +35,15 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h -@@ -19,6 +19,7 @@ - #include +@@ -20,6 +20,7 @@ + #include #include #include +#include #ifdef CONFIG_CGROUPS -@@ -152,6 +153,7 @@ struct cgroup_subsys_state { +@@ -153,6 +154,7 @@ struct cgroup_subsys_state { /* percpu_ref killing and RCU release */ struct rcu_head rcu_head; struct work_struct destroy_work; @@ -53,7 +53,7 @@ Signed-off-by: Sebastian Andrzej Siewior * PI: the parent css. Placed here for cache proximity to following --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c -@@ -4498,10 +4498,10 @@ static void css_free_rcu_fn(struct rcu_h +@@ -4582,10 +4582,10 @@ static void css_free_rcu_fn(struct rcu_h queue_work(cgroup_destroy_wq, &css->destroy_work); } @@ -66,7 +66,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct cgroup_subsys *ss = css->ss; struct cgroup *cgrp = css->cgroup; -@@ -4552,8 +4552,8 @@ static void css_release(struct percpu_re +@@ -4639,8 +4639,8 @@ static void css_release(struct percpu_re struct cgroup_subsys_state *css = container_of(ref, struct cgroup_subsys_state, refcnt); @@ -77,7 +77,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static void init_and_link_css(struct cgroup_subsys_state *css, -@@ -5259,6 +5259,7 @@ static int __init cgroup_wq_init(void) +@@ -5359,6 +5359,7 @@ static int __init cgroup_wq_init(void) */ cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); BUG_ON(!cgroup_destroy_wq); diff --git a/debian/patches/features/all/rt/clocksource-tclib-allow-higher-clockrates.patch b/debian/patches/features/all/rt/clocksource-tclib-allow-higher-clockrates.patch index e73faf0ac..61d21c91a 100644 --- a/debian/patches/features/all/rt/clocksource-tclib-allow-higher-clockrates.patch +++ b/debian/patches/features/all/rt/clocksource-tclib-allow-higher-clockrates.patch @@ -1,7 +1,7 @@ From: Benedikt Spranger Date: Mon, 8 Mar 2010 18:57:04 +0100 Subject: clocksource: TCLIB: Allow higher clock rates for clock events -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz As default the TCLIB uses the 32KiHz base clock rate for clock events. Add a compile time selection to allow higher clock resulution. diff --git a/debian/patches/features/all/rt/completion-use-simple-wait-queues.patch b/debian/patches/features/all/rt/completion-use-simple-wait-queues.patch index 655660fe6..c19b57dca 100644 --- a/debian/patches/features/all/rt/completion-use-simple-wait-queues.patch +++ b/debian/patches/features/all/rt/completion-use-simple-wait-queues.patch @@ -1,7 +1,7 @@ Subject: completion: Use simple wait queues From: Thomas Gleixner Date: Fri, 11 Jan 2013 11:23:51 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Completions have no long lasting callbacks and therefor do not need the complex waitqueue variant. Use simple waitqueues which reduces the @@ -12,7 +12,7 @@ Signed-off-by: Thomas Gleixner drivers/net/wireless/intersil/orinoco/orinoco_usb.c | 2 - drivers/usb/gadget/function/f_fs.c | 2 - drivers/usb/gadget/legacy/inode.c | 4 +- - include/linux/completion.h | 10 ++--- + include/linux/completion.h | 8 ++-- include/linux/suspend.h | 6 +++ include/linux/swait.h | 1 kernel/power/hibernate.c | 7 ++++ @@ -20,7 +20,7 @@ Signed-off-by: Thomas Gleixner kernel/sched/completion.c | 34 ++++++++++---------- kernel/sched/core.c | 10 ++++- kernel/sched/swait.c | 20 +++++++++++ - 11 files changed, 72 insertions(+), 28 deletions(-) + 11 files changed, 71 insertions(+), 27 deletions(-) --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c @@ -35,7 +35,7 @@ Signed-off-by: Thomas Gleixner break; --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c -@@ -1610,7 +1610,7 @@ static void ffs_data_put(struct ffs_data +@@ -1608,7 +1608,7 @@ static void ffs_data_put(struct ffs_data pr_info("%s(): freeing\n", __func__); ffs_data_clear(ffs); BUG_ON(waitqueue_active(&ffs->ev.waitq) || @@ -46,7 +46,7 @@ Signed-off-by: Thomas Gleixner kfree(ffs->dev_name); --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c -@@ -347,7 +347,7 @@ ep_io (struct ep_data *epdata, void *buf +@@ -343,7 +343,7 @@ ep_io (struct ep_data *epdata, void *buf spin_unlock_irq (&epdata->dev->lock); if (likely (value == 0)) { @@ -55,7 +55,7 @@ Signed-off-by: Thomas Gleixner if (value != 0) { spin_lock_irq (&epdata->dev->lock); if (likely (epdata->ep != NULL)) { -@@ -356,7 +356,7 @@ ep_io (struct ep_data *epdata, void *buf +@@ -352,7 +352,7 @@ ep_io (struct ep_data *epdata, void *buf usb_ep_dequeue (epdata->ep, epdata->req); spin_unlock_irq (&epdata->dev->lock); @@ -72,33 +72,28 @@ Signed-off-by: Thomas Gleixner -#include +#include - #ifdef CONFIG_LOCKDEP_COMPLETIONS - #include - #endif -@@ -28,7 +28,7 @@ + + /* + * struct completion - structure used to maintain state for a "completion" +@@ -25,7 +25,7 @@ */ struct completion { unsigned int done; - wait_queue_head_t wait; + struct swait_queue_head wait; - #ifdef CONFIG_LOCKDEP_COMPLETIONS - struct lockdep_map_cross map; - #endif -@@ -67,11 +67,11 @@ static inline void complete_release_comm + }; + + #define init_completion_map(x, m) __init_completion(x) +@@ -34,7 +34,7 @@ static inline void complete_acquire(stru + static inline void complete_release(struct completion *x) {} - #ifdef CONFIG_LOCKDEP_COMPLETIONS - #define COMPLETION_INITIALIZER(work) \ -- { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait), \ -+ { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait), \ - STATIC_CROSS_LOCKDEP_MAP_INIT("(complete)" #work, &(work)) } - #else #define COMPLETION_INITIALIZER(work) \ - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } + { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) } - #endif - #define COMPLETION_INITIALIZER_ONSTACK(work) \ -@@ -117,7 +117,7 @@ static inline void complete_release_comm + #define COMPLETION_INITIALIZER_ONSTACK_MAP(work, map) \ + (*({ init_completion_map(&(work), &(map)); &(work); })) +@@ -85,7 +85,7 @@ static inline void complete_release(stru static inline void __init_completion(struct completion *x) { x->done = 0; @@ -191,17 +186,13 @@ Signed-off-by: Thomas Gleixner EXPORT_SYMBOL(pm_suspend); --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c -@@ -32,7 +32,7 @@ void complete(struct completion *x) +@@ -32,12 +32,12 @@ void complete(struct completion *x) { unsigned long flags; - spin_lock_irqsave(&x->wait.lock, flags); + raw_spin_lock_irqsave(&x->wait.lock, flags); - /* - * Perform commit of crossrelease here. -@@ -41,8 +41,8 @@ void complete(struct completion *x) - if (x->done != UINT_MAX) x->done++; - __wake_up_locked(&x->wait, TASK_NORMAL, 1); @@ -211,7 +202,7 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(complete); -@@ -66,10 +66,10 @@ void complete_all(struct completion *x) +@@ -61,10 +61,10 @@ void complete_all(struct completion *x) { unsigned long flags; @@ -225,7 +216,7 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(complete_all); -@@ -78,20 +78,20 @@ do_wait_for_common(struct completion *x, +@@ -73,20 +73,20 @@ do_wait_for_common(struct completion *x, long (*action)(long), long timeout, int state) { if (!x->done) { @@ -251,7 +242,7 @@ Signed-off-by: Thomas Gleixner if (!x->done) return timeout; } -@@ -108,9 +108,9 @@ static inline long __sched +@@ -103,9 +103,9 @@ static inline long __sched complete_acquire(x); @@ -263,7 +254,7 @@ Signed-off-by: Thomas Gleixner complete_release(x); -@@ -299,12 +299,12 @@ bool try_wait_for_completion(struct comp +@@ -294,12 +294,12 @@ bool try_wait_for_completion(struct comp if (!READ_ONCE(x->done)) return 0; @@ -278,7 +269,7 @@ Signed-off-by: Thomas Gleixner return ret; } EXPORT_SYMBOL(try_wait_for_completion); -@@ -330,8 +330,8 @@ bool completion_done(struct completion * +@@ -325,8 +325,8 @@ bool completion_done(struct completion * * otherwise we can end up freeing the completion before complete() * is done referencing it. */ @@ -291,7 +282,7 @@ Signed-off-by: Thomas Gleixner EXPORT_SYMBOL(completion_done); --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -6816,7 +6816,10 @@ void migrate_disable(void) +@@ -7075,7 +7075,10 @@ void migrate_disable(void) return; } #ifdef CONFIG_SCHED_DEBUG @@ -303,7 +294,7 @@ Signed-off-by: Thomas Gleixner #endif if (p->migrate_disable) { -@@ -6846,7 +6849,10 @@ void migrate_enable(void) +@@ -7105,7 +7108,10 @@ void migrate_enable(void) } #ifdef CONFIG_SCHED_DEBUG diff --git a/debian/patches/features/all/rt/cond-resched-lock-rt-tweak.patch b/debian/patches/features/all/rt/cond-resched-lock-rt-tweak.patch index 4e7af0931..a1c85ded2 100644 --- a/debian/patches/features/all/rt/cond-resched-lock-rt-tweak.patch +++ b/debian/patches/features/all/rt/cond-resched-lock-rt-tweak.patch @@ -1,7 +1,7 @@ Subject: sched: Use the proper LOCK_OFFSET for cond_resched() From: Thomas Gleixner Date: Sun, 17 Jul 2011 22:51:33 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz RT does not increment preempt count when a 'sleeping' spinlock is locked. Update PREEMPT_LOCK_OFFSET for that case. diff --git a/debian/patches/features/all/rt/cond-resched-softirq-rt.patch b/debian/patches/features/all/rt/cond-resched-softirq-rt.patch index 22ee28042..39efb546f 100644 --- a/debian/patches/features/all/rt/cond-resched-softirq-rt.patch +++ b/debian/patches/features/all/rt/cond-resched-softirq-rt.patch @@ -1,7 +1,7 @@ Subject: sched: Take RT softirq semantics into account in cond_resched() From: Thomas Gleixner Date: Thu, 14 Jul 2011 09:56:44 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The softirq semantics work different on -RT. There is no SOFTIRQ_MASK in the preemption counter which leads to the BUG_ON() statement in @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1606,12 +1606,16 @@ extern int __cond_resched_lock(spinlock_ +@@ -1625,12 +1625,16 @@ extern int __cond_resched_lock(spinlock_ __cond_resched_lock(lock); \ }) @@ -35,7 +35,7 @@ Signed-off-by: Thomas Gleixner { --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -4945,6 +4945,7 @@ int __cond_resched_lock(spinlock_t *lock +@@ -5020,6 +5020,7 @@ int __cond_resched_lock(spinlock_t *lock } EXPORT_SYMBOL(__cond_resched_lock); @@ -43,7 +43,7 @@ Signed-off-by: Thomas Gleixner int __sched __cond_resched_softirq(void) { BUG_ON(!in_softirq()); -@@ -4958,6 +4959,7 @@ int __sched __cond_resched_softirq(void) +@@ -5033,6 +5034,7 @@ int __sched __cond_resched_softirq(void) return 0; } EXPORT_SYMBOL(__cond_resched_softirq); diff --git a/debian/patches/features/all/rt/connector-cn_proc-Protect-send_msg-with-a-local-lock.patch b/debian/patches/features/all/rt/connector-cn_proc-Protect-send_msg-with-a-local-lock.patch index 08db6486c..1fc5b02a8 100644 --- a/debian/patches/features/all/rt/connector-cn_proc-Protect-send_msg-with-a-local-lock.patch +++ b/debian/patches/features/all/rt/connector-cn_proc-Protect-send_msg-with-a-local-lock.patch @@ -2,7 +2,7 @@ From: Mike Galbraith Date: Sun, 16 Oct 2016 05:11:54 +0200 Subject: [PATCH] connector/cn_proc: Protect send_msg() with a local lock on RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz |BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:931 |in_atomic(): 1, irqs_disabled(): 0, pid: 31807, name: sleep diff --git a/debian/patches/features/all/rt/cpu-hotplug--Implement-CPU-pinning.patch b/debian/patches/features/all/rt/cpu-hotplug--Implement-CPU-pinning.patch index 8ebfed1d0..d95e2d0ec 100644 --- a/debian/patches/features/all/rt/cpu-hotplug--Implement-CPU-pinning.patch +++ b/debian/patches/features/all/rt/cpu-hotplug--Implement-CPU-pinning.patch @@ -1,7 +1,7 @@ Subject: cpu/hotplug: Implement CPU pinning From: Thomas Gleixner Date: Wed, 19 Jul 2017 17:31:20 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Signed-off-by: Thomas Gleixner --- @@ -11,7 +11,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -589,6 +589,7 @@ struct task_struct { +@@ -602,6 +602,7 @@ struct task_struct { #if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP) int migrate_disable; int migrate_disable_update; diff --git a/debian/patches/features/all/rt/cpufreq-drop-K8-s-driver-from-beeing-selected.patch b/debian/patches/features/all/rt/cpufreq-drop-K8-s-driver-from-beeing-selected.patch index 8963c1a5b..3aa578eda 100644 --- a/debian/patches/features/all/rt/cpufreq-drop-K8-s-driver-from-beeing-selected.patch +++ b/debian/patches/features/all/rt/cpufreq-drop-K8-s-driver-from-beeing-selected.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 9 Apr 2015 15:23:01 +0200 Subject: cpufreq: drop K8's driver from beeing selected -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Ralf posted a picture of a backtrace from diff --git a/debian/patches/features/all/rt/cpumask-disable-offstack-on-rt.patch b/debian/patches/features/all/rt/cpumask-disable-offstack-on-rt.patch index 7a81573e6..237698e20 100644 --- a/debian/patches/features/all/rt/cpumask-disable-offstack-on-rt.patch +++ b/debian/patches/features/all/rt/cpumask-disable-offstack-on-rt.patch @@ -1,7 +1,7 @@ Subject: cpumask: Disable CONFIG_CPUMASK_OFFSTACK for RT From: Thomas Gleixner Date: Wed, 14 Dec 2011 01:03:49 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz There are "valid" GFP_ATOMIC allocations such as @@ -47,7 +47,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig -@@ -934,7 +934,7 @@ config IOMMU_HELPER +@@ -945,7 +945,7 @@ config IOMMU_HELPER config MAXSMP bool "Enable Maximum number of SMP Processors and NUMA Nodes" depends on X86_64 && SMP && DEBUG_KERNEL diff --git a/debian/patches/features/all/rt/cpuset-Convert-callback_lock-to-raw_spinlock_t.patch b/debian/patches/features/all/rt/cpuset-Convert-callback_lock-to-raw_spinlock_t.patch index 76674c7fc..6e20c59ac 100644 --- a/debian/patches/features/all/rt/cpuset-Convert-callback_lock-to-raw_spinlock_t.patch +++ b/debian/patches/features/all/rt/cpuset-Convert-callback_lock-to-raw_spinlock_t.patch @@ -1,7 +1,7 @@ From: Mike Galbraith Date: Sun, 8 Jan 2017 09:32:25 +0100 Subject: [PATCH] cpuset: Convert callback_lock to raw_spinlock_t -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The two commits below add up to a cpuset might_sleep() splat for RT: @@ -60,7 +60,7 @@ Signed-off-by: Sebastian Andrzej Siewior static struct workqueue_struct *cpuset_migrate_mm_wq; -@@ -926,9 +926,9 @@ static void update_cpumasks_hier(struct +@@ -921,9 +921,9 @@ static void update_cpumasks_hier(struct continue; rcu_read_unlock(); @@ -72,7 +72,7 @@ Signed-off-by: Sebastian Andrzej Siewior WARN_ON(!is_in_v2_mode() && !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); -@@ -993,9 +993,9 @@ static int update_cpumask(struct cpuset +@@ -988,9 +988,9 @@ static int update_cpumask(struct cpuset if (retval < 0) return retval; @@ -84,7 +84,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* use trialcs->cpus_allowed as a temp variable */ update_cpumasks_hier(cs, trialcs->cpus_allowed); -@@ -1179,9 +1179,9 @@ static void update_nodemasks_hier(struct +@@ -1174,9 +1174,9 @@ static void update_nodemasks_hier(struct continue; rcu_read_unlock(); @@ -96,7 +96,7 @@ Signed-off-by: Sebastian Andrzej Siewior WARN_ON(!is_in_v2_mode() && !nodes_equal(cp->mems_allowed, cp->effective_mems)); -@@ -1249,9 +1249,9 @@ static int update_nodemask(struct cpuset +@@ -1244,9 +1244,9 @@ static int update_nodemask(struct cpuset if (retval < 0) goto done; @@ -108,7 +108,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* use trialcs->mems_allowed as a temp variable */ update_nodemasks_hier(cs, &trialcs->mems_allowed); -@@ -1342,9 +1342,9 @@ static int update_flag(cpuset_flagbits_t +@@ -1337,9 +1337,9 @@ static int update_flag(cpuset_flagbits_t spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) || (is_spread_page(cs) != is_spread_page(trialcs))); @@ -120,7 +120,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) rebuild_sched_domains_locked(); -@@ -1759,7 +1759,7 @@ static int cpuset_common_seq_show(struct +@@ -1754,7 +1754,7 @@ static int cpuset_common_seq_show(struct cpuset_filetype_t type = seq_cft(sf)->private; int ret = 0; @@ -129,7 +129,7 @@ Signed-off-by: Sebastian Andrzej Siewior switch (type) { case FILE_CPULIST: -@@ -1778,7 +1778,7 @@ static int cpuset_common_seq_show(struct +@@ -1773,7 +1773,7 @@ static int cpuset_common_seq_show(struct ret = -EINVAL; } @@ -138,7 +138,7 @@ Signed-off-by: Sebastian Andrzej Siewior return ret; } -@@ -1993,12 +1993,12 @@ static int cpuset_css_online(struct cgro +@@ -1988,12 +1988,12 @@ static int cpuset_css_online(struct cgro cpuset_inc(); @@ -153,7 +153,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) goto out_unlock; -@@ -2025,12 +2025,12 @@ static int cpuset_css_online(struct cgro +@@ -2020,12 +2020,12 @@ static int cpuset_css_online(struct cgro } rcu_read_unlock(); @@ -168,7 +168,7 @@ Signed-off-by: Sebastian Andrzej Siewior out_unlock: mutex_unlock(&cpuset_mutex); return 0; -@@ -2069,7 +2069,7 @@ static void cpuset_css_free(struct cgrou +@@ -2064,7 +2064,7 @@ static void cpuset_css_free(struct cgrou static void cpuset_bind(struct cgroup_subsys_state *root_css) { mutex_lock(&cpuset_mutex); @@ -177,7 +177,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (is_in_v2_mode()) { cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); -@@ -2080,7 +2080,7 @@ static void cpuset_bind(struct cgroup_su +@@ -2075,7 +2075,7 @@ static void cpuset_bind(struct cgroup_su top_cpuset.mems_allowed = top_cpuset.effective_mems; } @@ -186,7 +186,7 @@ Signed-off-by: Sebastian Andrzej Siewior mutex_unlock(&cpuset_mutex); } -@@ -2178,12 +2178,12 @@ hotplug_update_tasks_legacy(struct cpuse +@@ -2173,12 +2173,12 @@ hotplug_update_tasks_legacy(struct cpuse { bool is_empty; @@ -201,7 +201,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Don't call update_tasks_cpumask() if the cpuset becomes empty, -@@ -2220,10 +2220,10 @@ hotplug_update_tasks(struct cpuset *cs, +@@ -2215,10 +2215,10 @@ hotplug_update_tasks(struct cpuset *cs, if (nodes_empty(*new_mems)) *new_mems = parent_cs(cs)->effective_mems; @@ -214,7 +214,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (cpus_updated) update_tasks_cpumask(cs); -@@ -2316,21 +2316,21 @@ static void cpuset_hotplug_workfn(struct +@@ -2311,21 +2311,21 @@ static void cpuset_hotplug_workfn(struct /* synchronize cpus_allowed to cpu_active_mask */ if (cpus_updated) { @@ -240,7 +240,7 @@ Signed-off-by: Sebastian Andrzej Siewior update_tasks_nodemask(&top_cpuset); } -@@ -2429,11 +2429,11 @@ void cpuset_cpus_allowed(struct task_str +@@ -2424,11 +2424,11 @@ void cpuset_cpus_allowed(struct task_str { unsigned long flags; @@ -254,7 +254,7 @@ Signed-off-by: Sebastian Andrzej Siewior } void cpuset_cpus_allowed_fallback(struct task_struct *tsk) -@@ -2481,11 +2481,11 @@ nodemask_t cpuset_mems_allowed(struct ta +@@ -2476,11 +2476,11 @@ nodemask_t cpuset_mems_allowed(struct ta nodemask_t mask; unsigned long flags; @@ -268,7 +268,7 @@ Signed-off-by: Sebastian Andrzej Siewior return mask; } -@@ -2577,14 +2577,14 @@ bool __cpuset_node_allowed(int node, gfp +@@ -2572,14 +2572,14 @@ bool __cpuset_node_allowed(int node, gfp return true; /* Not hardwall and node outside mems_allowed: scan up cpusets */ diff --git a/debian/patches/features/all/rt/crypto-Reduce-preempt-disabled-regions-more-algos.patch b/debian/patches/features/all/rt/crypto-Reduce-preempt-disabled-regions-more-algos.patch index 8455a6f35..97e5da5b0 100644 --- a/debian/patches/features/all/rt/crypto-Reduce-preempt-disabled-regions-more-algos.patch +++ b/debian/patches/features/all/rt/crypto-Reduce-preempt-disabled-regions-more-algos.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Fri, 21 Feb 2014 17:24:04 +0100 Subject: crypto: Reduce preempt disabled regions, more algos -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Don Estabrook reported | kernel: WARNING: CPU: 2 PID: 858 at kernel/sched/core.c:2428 migrate_disable+0xed/0x100() @@ -53,7 +53,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); const unsigned int bsize = CAST5_BLOCK_SIZE; unsigned int nbytes; -@@ -75,7 +75,7 @@ static int ecb_crypt(struct blkcipher_de +@@ -73,7 +73,7 @@ static int ecb_crypt(struct blkcipher_de u8 *wsrc = walk->src.virt.addr; u8 *wdst = walk->dst.virt.addr; @@ -62,7 +62,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* Process multi-block batch */ if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { -@@ -103,10 +103,9 @@ static int ecb_crypt(struct blkcipher_de +@@ -102,10 +102,9 @@ static int ecb_crypt(struct blkcipher_de } while (nbytes >= bsize); done: @@ -74,7 +74,7 @@ Signed-off-by: Sebastian Andrzej Siewior return err; } -@@ -227,7 +226,7 @@ static unsigned int __cbc_decrypt(struct +@@ -226,7 +225,7 @@ static unsigned int __cbc_decrypt(struct static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { @@ -83,7 +83,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct blkcipher_walk walk; int err; -@@ -236,12 +235,11 @@ static int cbc_decrypt(struct blkcipher_ +@@ -235,12 +234,11 @@ static int cbc_decrypt(struct blkcipher_ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; while ((nbytes = walk.nbytes)) { @@ -98,7 +98,7 @@ Signed-off-by: Sebastian Andrzej Siewior return err; } -@@ -310,7 +308,7 @@ static unsigned int __ctr_crypt(struct b +@@ -309,7 +307,7 @@ static unsigned int __ctr_crypt(struct b static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { @@ -107,7 +107,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct blkcipher_walk walk; int err; -@@ -319,13 +317,12 @@ static int ctr_crypt(struct blkcipher_de +@@ -318,13 +316,12 @@ static int ctr_crypt(struct blkcipher_de desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { diff --git a/debian/patches/features/all/rt/crypto-limit-more-FPU-enabled-sections.patch b/debian/patches/features/all/rt/crypto-limit-more-FPU-enabled-sections.patch index e6e6f6464..7be453d8f 100644 --- a/debian/patches/features/all/rt/crypto-limit-more-FPU-enabled-sections.patch +++ b/debian/patches/features/all/rt/crypto-limit-more-FPU-enabled-sections.patch @@ -4,7 +4,7 @@ Subject: [PATCH] crypto: limit more FPU-enabled sections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Those crypto drivers use SSE/AVX/… for their crypto work and in order to do so in kernel they need to enable the "FPU" in kernel mode which diff --git a/debian/patches/features/all/rt/d_delete-get-rid-of-trylock-loop.patch b/debian/patches/features/all/rt/d_delete-get-rid-of-trylock-loop.patch new file mode 100644 index 000000000..f3751323b --- /dev/null +++ b/debian/patches/features/all/rt/d_delete-get-rid-of-trylock-loop.patch @@ -0,0 +1,60 @@ +From: Al Viro +Date: Fri, 23 Feb 2018 21:02:31 -0500 +Subject: [PATCH] d_delete(): get rid of trylock loop +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit c19457f0aed7fae73bb40e68ffcc72f36e3966a5 + +just grab ->i_lock first; we have a positive dentry, nothing's going +to happen to inode + +Signed-off-by: Al Viro +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/dcache.c | 28 +++++++++------------------- + 1 file changed, 9 insertions(+), 19 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -2377,32 +2377,22 @@ EXPORT_SYMBOL(d_hash_and_lookup); + + void d_delete(struct dentry * dentry) + { +- struct inode *inode; +- int isdir = 0; ++ struct inode *inode = dentry->d_inode; ++ int isdir = d_is_dir(dentry); ++ ++ spin_lock(&inode->i_lock); ++ spin_lock(&dentry->d_lock); + /* + * Are we the only user? + */ +-again: +- spin_lock(&dentry->d_lock); +- inode = dentry->d_inode; +- isdir = S_ISDIR(inode->i_mode); + if (dentry->d_lockref.count == 1) { +- if (!spin_trylock(&inode->i_lock)) { +- spin_unlock(&dentry->d_lock); +- cpu_relax(); +- goto again; +- } + dentry->d_flags &= ~DCACHE_CANT_MOUNT; + dentry_unlink_inode(dentry); +- fsnotify_nameremove(dentry, isdir); +- return; +- } +- +- if (!d_unhashed(dentry)) ++ } else { + __d_drop(dentry); +- +- spin_unlock(&dentry->d_lock); +- ++ spin_unlock(&dentry->d_lock); ++ spin_unlock(&inode->i_lock); ++ } + fsnotify_nameremove(dentry, isdir); + } + EXPORT_SYMBOL(d_delete); diff --git a/debian/patches/features/all/rt/debugobjects-rt.patch b/debian/patches/features/all/rt/debugobjects-rt.patch index 34cde2755..a65ceddb7 100644 --- a/debian/patches/features/all/rt/debugobjects-rt.patch +++ b/debian/patches/features/all/rt/debugobjects-rt.patch @@ -1,7 +1,7 @@ Subject: debugobjects: Make RT aware From: Thomas Gleixner Date: Sun, 17 Jul 2011 21:41:35 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Avoid filling the pool / allocating memory with irqs off(). diff --git a/debian/patches/features/all/rt/delayacct-use-raw_spinlocks.patch b/debian/patches/features/all/rt/delayacct-use-raw_spinlocks.patch index ddc3b9526..5d3442aaf 100644 --- a/debian/patches/features/all/rt/delayacct-use-raw_spinlocks.patch +++ b/debian/patches/features/all/rt/delayacct-use-raw_spinlocks.patch @@ -1,7 +1,9 @@ From: Sebastian Andrzej Siewior Date: Sat, 20 May 2017 12:32:23 +0200 Subject: [PATCH] delayacct: use raw_spinlocks -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +upstream commit 02acc80d19edb0d5684c997b2004ad19f9f5236e try_to_wake_up() might invoke delayacct_blkio_end() while holding the pi_lock. The lock is only held for a short amount of time so it should diff --git a/debian/patches/features/all/rt/dm-rq-remove-BUG_ON-irqs_disabled-check.patch b/debian/patches/features/all/rt/dm-rq-remove-BUG_ON-irqs_disabled-check.patch new file mode 100644 index 000000000..c5567abc3 --- /dev/null +++ b/debian/patches/features/all/rt/dm-rq-remove-BUG_ON-irqs_disabled-check.patch @@ -0,0 +1,31 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 27 Mar 2018 16:24:15 +0200 +Subject: [PATCH] dm rq: remove BUG_ON(!irqs_disabled) check +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +In commit 052189a2ec95 ("dm: remove superfluous irq disablement in +dm_request_fn") the spin_lock_irq() was replaced with spin_lock() + a +check for disabled interrupts. Later the locking part was removed in +commit 2eb6e1e3aa87 ("dm: submit stacked requests in irq enabled +context") but the BUG_ON() check remained. + +Since the original purpose for the "are-irqs-off" check is gone (the +->queue_lock has been removed) remove it. + +Cc: Keith Busch +Cc: Mike Snitzer +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/md/dm-rq.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/md/dm-rq.c ++++ b/drivers/md/dm-rq.c +@@ -688,7 +688,6 @@ static void dm_old_request_fn(struct req + /* Establish tio->ti before queuing work (map_tio_request) */ + tio->ti = ti; + kthread_queue_work(&md->kworker, &tio->work); +- BUG_ON(!irqs_disabled()); + } + } + diff --git a/debian/patches/features/all/rt/drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch b/debian/patches/features/all/rt/drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch index 04b59d47a..a76ef6652 100644 --- a/debian/patches/features/all/rt/drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch +++ b/debian/patches/features/all/rt/drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch @@ -2,7 +2,7 @@ From: Mike Galbraith Date: Thu, 31 Mar 2016 04:08:28 +0200 Subject: [PATCH] drivers/block/zram: Replace bit spinlocks with rtmutex for -rt -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz They're nondeterministic, and lead to ___might_sleep() splats in -rt. OTOH, they're a lot less wasteful than an rtmutex per page. @@ -16,7 +16,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c -@@ -756,6 +756,30 @@ static DEVICE_ATTR_RO(io_stat); +@@ -748,6 +748,30 @@ static DEVICE_ATTR_RO(io_stat); static DEVICE_ATTR_RO(mm_stat); static DEVICE_ATTR_RO(debug_stat); @@ -47,7 +47,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void zram_slot_lock(struct zram *zram, u32 index) { bit_spin_lock(ZRAM_ACCESS, &zram->table[index].value); -@@ -765,6 +789,7 @@ static void zram_slot_unlock(struct zram +@@ -757,6 +781,7 @@ static void zram_slot_unlock(struct zram { bit_spin_unlock(ZRAM_ACCESS, &zram->table[index].value); } @@ -55,7 +55,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void zram_meta_free(struct zram *zram, u64 disksize) { -@@ -794,6 +819,7 @@ static bool zram_meta_alloc(struct zram +@@ -786,6 +811,7 @@ static bool zram_meta_alloc(struct zram return false; } diff --git a/debian/patches/features/all/rt/drivers-md-raid5-Do-not-disable-irq-on-release_inact.patch b/debian/patches/features/all/rt/drivers-md-raid5-Do-not-disable-irq-on-release_inact.patch new file mode 100644 index 000000000..133b143f1 --- /dev/null +++ b/debian/patches/features/all/rt/drivers-md-raid5-Do-not-disable-irq-on-release_inact.patch @@ -0,0 +1,31 @@ +From: Anna-Maria Gleixner +Date: Wed, 4 Apr 2018 11:43:59 +0200 +Subject: [PATCH] drivers/md/raid5: Do not disable irq on + release_inactive_stripe_list() call +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +There is no need to invoke release_inactive_stripe_list() with interrupts +disabled. All call sites, except raid5_release_stripe(), unlock +->device_lock and enable interrupts before invoking the function. + +Make it consistent. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/md/raid5.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/md/raid5.c ++++ b/drivers/md/raid5.c +@@ -414,9 +414,8 @@ void raid5_release_stripe(struct stripe_ + INIT_LIST_HEAD(&list); + hash = sh->hash_lock_index; + do_release_stripe(conf, sh, &list); +- spin_unlock(&conf->device_lock); ++ spin_unlock_irqrestore(&conf->device_lock, flags); + release_inactive_stripe_list(conf, &list, hash); +- local_irq_restore(flags); + } + } + diff --git a/debian/patches/features/all/rt/drivers-md-raid5-Use-irqsave-variant-of-atomic_dec_a.patch b/debian/patches/features/all/rt/drivers-md-raid5-Use-irqsave-variant-of-atomic_dec_a.patch new file mode 100644 index 000000000..5351683de --- /dev/null +++ b/debian/patches/features/all/rt/drivers-md-raid5-Use-irqsave-variant-of-atomic_dec_a.patch @@ -0,0 +1,37 @@ +From: Anna-Maria Gleixner +Date: Wed, 4 Apr 2018 11:43:58 +0200 +Subject: [PATCH] drivers/md/raid5: Use irqsave variant of + atomic_dec_and_lock() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +The irqsave variant of atomic_dec_and_lock handles irqsave/restore when +taking/releasing the spin lock. With this variant the call of +local_irq_save is no longer required. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/md/raid5.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/drivers/md/raid5.c ++++ b/drivers/md/raid5.c +@@ -409,16 +409,15 @@ void raid5_release_stripe(struct stripe_ + md_wakeup_thread(conf->mddev->thread); + return; + slow_path: +- local_irq_save(flags); + /* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */ +- if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) { ++ if (atomic_dec_and_lock_irqsave(&sh->count, &conf->device_lock, flags)) { + INIT_LIST_HEAD(&list); + hash = sh->hash_lock_index; + do_release_stripe(conf, sh, &list); + spin_unlock(&conf->device_lock); + release_inactive_stripe_list(conf, &list, hash); ++ local_irq_restore(flags); + } +- local_irq_restore(flags); + } + + static inline void remove_hash(struct stripe_head *sh) diff --git a/debian/patches/features/all/rt/drivers-net-8139-disable-irq-nosync.patch b/debian/patches/features/all/rt/drivers-net-8139-disable-irq-nosync.patch index 8b2dcbb67..6863e835b 100644 --- a/debian/patches/features/all/rt/drivers-net-8139-disable-irq-nosync.patch +++ b/debian/patches/features/all/rt/drivers-net-8139-disable-irq-nosync.patch @@ -1,7 +1,9 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:29:24 -0500 Subject: drivers/net: Use disable_irq_nosync() in 8139too -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +upstream commit af3e0fcf78879f718c5f73df0814951bd7057d34 Use disable_irq_nosync() instead of disable_irq() as this might be called in atomic context with netpoll. diff --git a/debian/patches/features/all/rt/drivers-random-reduce-preempt-disabled-region.patch b/debian/patches/features/all/rt/drivers-random-reduce-preempt-disabled-region.patch index 2d671366f..a2f0b4a64 100644 --- a/debian/patches/features/all/rt/drivers-random-reduce-preempt-disabled-region.patch +++ b/debian/patches/features/all/rt/drivers-random-reduce-preempt-disabled-region.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:29:30 -0500 Subject: drivers: random: Reduce preempt disabled region -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz No need to keep preemption disabled across the whole function. @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/char/random.c +++ b/drivers/char/random.c -@@ -1017,8 +1017,6 @@ static void add_timer_randomness(struct +@@ -1122,8 +1122,6 @@ static void add_timer_randomness(struct } sample; long delta, delta2, delta3; @@ -23,7 +23,7 @@ Signed-off-by: Thomas Gleixner sample.jiffies = jiffies; sample.cycles = random_get_entropy(); sample.num = num; -@@ -1059,7 +1057,6 @@ static void add_timer_randomness(struct +@@ -1164,7 +1162,6 @@ static void add_timer_randomness(struct */ credit_entropy_bits(r, min_t(int, fls(delta>>1), 11)); } diff --git a/debian/patches/features/all/rt/drivers-tty-fix-omap-lock-crap.patch b/debian/patches/features/all/rt/drivers-tty-fix-omap-lock-crap.patch index 680c27ad0..7116db145 100644 --- a/debian/patches/features/all/rt/drivers-tty-fix-omap-lock-crap.patch +++ b/debian/patches/features/all/rt/drivers-tty-fix-omap-lock-crap.patch @@ -1,7 +1,7 @@ Subject: tty/serial/omap: Make the locking RT aware From: Thomas Gleixner Date: Thu, 28 Jul 2011 13:32:57 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The lock is a sleeping lock and local_irq_save() is not the optimsation we are looking for. Redo it to make it work on -RT and @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c -@@ -1311,13 +1311,10 @@ serial_omap_console_write(struct console +@@ -1307,13 +1307,10 @@ serial_omap_console_write(struct console pm_runtime_get_sync(up->dev); @@ -31,7 +31,7 @@ Signed-off-by: Thomas Gleixner /* * First save the IER then disable the interrupts -@@ -1346,8 +1343,7 @@ serial_omap_console_write(struct console +@@ -1342,8 +1339,7 @@ serial_omap_console_write(struct console pm_runtime_mark_last_busy(up->dev); pm_runtime_put_autosuspend(up->dev); if (locked) diff --git a/debian/patches/features/all/rt/drivers-tty-pl011-irq-disable-madness.patch b/debian/patches/features/all/rt/drivers-tty-pl011-irq-disable-madness.patch index 72d5fb522..07488d0a3 100644 --- a/debian/patches/features/all/rt/drivers-tty-pl011-irq-disable-madness.patch +++ b/debian/patches/features/all/rt/drivers-tty-pl011-irq-disable-madness.patch @@ -1,7 +1,7 @@ Subject: tty/serial/pl011: Make the locking work on RT From: Thomas Gleixner Date: Tue, 08 Jan 2013 21:36:51 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The lock is a sleeping lock and local_irq_save() is not the optimsation we are looking for. Redo it to make it work on -RT and non-RT. @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c -@@ -2220,13 +2220,19 @@ pl011_console_write(struct console *co, +@@ -2200,13 +2200,19 @@ pl011_console_write(struct console *co, clk_enable(uap->clk); @@ -36,7 +36,7 @@ Signed-off-by: Thomas Gleixner /* * First save the CR then disable the interrupts -@@ -2252,8 +2258,7 @@ pl011_console_write(struct console *co, +@@ -2232,8 +2238,7 @@ pl011_console_write(struct console *co, pl011_write(old_cr, uap, REG_CR); if (locked) diff --git a/debian/patches/features/all/rt/drivers-zram-Don-t-disable-preemption-in-zcomp_strea.patch b/debian/patches/features/all/rt/drivers-zram-Don-t-disable-preemption-in-zcomp_strea.patch index 7e9b9fc50..eb23e3fde 100644 --- a/debian/patches/features/all/rt/drivers-zram-Don-t-disable-preemption-in-zcomp_strea.patch +++ b/debian/patches/features/all/rt/drivers-zram-Don-t-disable-preemption-in-zcomp_strea.patch @@ -2,7 +2,7 @@ From: Mike Galbraith Date: Thu, 20 Oct 2016 11:15:22 +0200 Subject: [PATCH] drivers/zram: Don't disable preemption in zcomp_stream_get/put() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz In v4.7, the driver switched to percpu compression streams, disabling preemption via get/put_cpu_ptr(). Use a per-zcomp_strm lock here. We @@ -63,7 +63,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* dynamic per-device compression frontend */ --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c -@@ -871,6 +871,7 @@ static int __zram_bvec_read(struct zram +@@ -863,6 +863,7 @@ static int __zram_bvec_read(struct zram unsigned long handle; unsigned int size; void *src, *dst; @@ -71,7 +71,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (zram_wb_enabled(zram)) { zram_slot_lock(zram, index); -@@ -905,6 +906,7 @@ static int __zram_bvec_read(struct zram +@@ -897,6 +898,7 @@ static int __zram_bvec_read(struct zram size = zram_get_obj_size(zram, index); @@ -79,7 +79,7 @@ Signed-off-by: Sebastian Andrzej Siewior src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); if (size == PAGE_SIZE) { dst = kmap_atomic(page); -@@ -912,14 +914,13 @@ static int __zram_bvec_read(struct zram +@@ -904,14 +906,13 @@ static int __zram_bvec_read(struct zram kunmap_atomic(dst); ret = 0; } else { diff --git a/debian/patches/features/all/rt/drivers-zram-fix-zcomp_stream_get-smp_processor_id-u.patch b/debian/patches/features/all/rt/drivers-zram-fix-zcomp_stream_get-smp_processor_id-u.patch index 0ce81e465..163eb3334 100644 --- a/debian/patches/features/all/rt/drivers-zram-fix-zcomp_stream_get-smp_processor_id-u.patch +++ b/debian/patches/features/all/rt/drivers-zram-fix-zcomp_stream_get-smp_processor_id-u.patch @@ -2,7 +2,7 @@ From: Mike Galbraith Date: Wed, 23 Aug 2017 11:57:29 +0200 Subject: [PATCH] drivers/zram: fix zcomp_stream_get() smp_processor_id() use in preemptible code -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Use get_local_ptr() instead this_cpu_ptr() to avoid a warning regarding smp_processor_id() in preemptible code. diff --git a/debian/patches/features/all/rt/drmi915_Use_local_lockunlock_irq()_in_intel_pipe_update_startend().patch b/debian/patches/features/all/rt/drmi915_Use_local_lockunlock_irq()_in_intel_pipe_update_startend().patch index 4f8297eaf..bb2d8714a 100644 --- a/debian/patches/features/all/rt/drmi915_Use_local_lockunlock_irq()_in_intel_pipe_update_startend().patch +++ b/debian/patches/features/all/rt/drmi915_Use_local_lockunlock_irq()_in_intel_pipe_update_startend().patch @@ -1,7 +1,7 @@ Subject: drm,i915: Use local_lock/unlock_irq() in intel_pipe_update_start/end() From: Mike Galbraith Date: Sat, 27 Feb 2016 09:01:42 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz [ 8.014039] BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:918 @@ -57,8 +57,8 @@ Cc: Sebastian Andrzej Siewior Cc: linux-rt-users Signed-off-by: Thomas Gleixner --- - drivers/gpu/drm/i915/intel_sprite.c | 11 ++++++----- - 1 file changed, 6 insertions(+), 5 deletions(-) + drivers/gpu/drm/i915/intel_sprite.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -70,16 +70,16 @@ Signed-off-by: Thomas Gleixner #include "intel_drv.h" #include "intel_frontbuffer.h" #include -@@ -67,7 +68,7 @@ int intel_usecs_to_scanlines(const struc - } - +@@ -74,6 +75,8 @@ int intel_usecs_to_scanlines(const struc #define VBLANK_EVASION_TIME_US 100 -- + #endif + +static DEFINE_LOCAL_IRQ_LOCK(pipe_update_lock); ++ /** * intel_pipe_update_start() - start update of a set of display registers - * @crtc: the crtc of which the registers are going to be updated -@@ -102,7 +103,7 @@ void intel_pipe_update_start(struct inte + * @new_crtc_state: the new crtc state +@@ -107,7 +110,7 @@ void intel_pipe_update_start(const struc VBLANK_EVASION_TIME_US); max = vblank_start - 1; @@ -88,7 +88,7 @@ Signed-off-by: Thomas Gleixner if (min <= 0 || max <= 0) return; -@@ -132,11 +133,11 @@ void intel_pipe_update_start(struct inte +@@ -137,11 +140,11 @@ void intel_pipe_update_start(const struc break; } @@ -102,8 +102,8 @@ Signed-off-by: Thomas Gleixner } finish_wait(wq, &wait); -@@ -201,7 +202,7 @@ void intel_pipe_update_end(struct intel_ - crtc->base.state->event = NULL; +@@ -206,7 +209,7 @@ void intel_pipe_update_end(struct intel_ + new_crtc_state->base.event = NULL; } - local_irq_enable(); diff --git a/debian/patches/features/all/rt/drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch b/debian/patches/features/all/rt/drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch index 243f62d92..a5fd2851b 100644 --- a/debian/patches/features/all/rt/drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch +++ b/debian/patches/features/all/rt/drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch @@ -1,7 +1,7 @@ Subject: drm,radeon,i915: Use preempt_disable/enable_rt() where recommended From: Mike Galbraith Date: Sat, 27 Feb 2016 08:09:11 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz DRM folks identified the spots, so use them. @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c -@@ -867,6 +867,7 @@ static bool i915_get_crtc_scanoutpos(str +@@ -936,6 +936,7 @@ static bool i915_get_crtc_scanoutpos(str spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ @@ -24,7 +24,7 @@ Signed-off-by: Thomas Gleixner /* Get optional system timestamp before query. */ if (stime) -@@ -918,6 +919,7 @@ static bool i915_get_crtc_scanoutpos(str +@@ -987,6 +988,7 @@ static bool i915_get_crtc_scanoutpos(str *etime = ktime_get(); /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ @@ -34,7 +34,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c -@@ -1839,6 +1839,7 @@ int radeon_get_crtc_scanoutpos(struct dr +@@ -1834,6 +1834,7 @@ int radeon_get_crtc_scanoutpos(struct dr struct radeon_device *rdev = dev->dev_private; /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ @@ -42,7 +42,7 @@ Signed-off-by: Thomas Gleixner /* Get optional system timestamp before query. */ if (stime) -@@ -1931,6 +1932,7 @@ int radeon_get_crtc_scanoutpos(struct dr +@@ -1926,6 +1927,7 @@ int radeon_get_crtc_scanoutpos(struct dr *etime = ktime_get(); /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ diff --git a/debian/patches/features/all/rt/epoll-use-get-cpu-light.patch b/debian/patches/features/all/rt/epoll-use-get-cpu-light.patch index d52a9ec81..6f239c8b3 100644 --- a/debian/patches/features/all/rt/epoll-use-get-cpu-light.patch +++ b/debian/patches/features/all/rt/epoll-use-get-cpu-light.patch @@ -1,7 +1,7 @@ Subject: fs/epoll: Do not disable preemption on RT From: Thomas Gleixner Date: Fri, 08 Jul 2011 16:35:35 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz ep_call_nested() takes a sleeping lock so we can't disable preemption. The light version is enough since ep_call_nested() doesn't mind beeing @@ -14,8 +14,8 @@ Signed-off-by: Thomas Gleixner --- a/fs/eventpoll.c +++ b/fs/eventpoll.c -@@ -587,12 +587,12 @@ static int ep_poll_wakeup_proc(void *pri - */ +@@ -563,12 +563,12 @@ static int ep_poll_wakeup_proc(void *pri + static void ep_poll_safewake(wait_queue_head_t *wq) { - int this_cpu = get_cpu(); @@ -28,4 +28,4 @@ Signed-off-by: Thomas Gleixner + put_cpu_light(); } - static void ep_remove_wait_queue(struct eppoll_entry *pwq) + #else diff --git a/debian/patches/features/all/rt/fs-aio-simple-simple-work.patch b/debian/patches/features/all/rt/fs-aio-simple-simple-work.patch index 06d274734..94e60bf8e 100644 --- a/debian/patches/features/all/rt/fs-aio-simple-simple-work.patch +++ b/debian/patches/features/all/rt/fs-aio-simple-simple-work.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Mon, 16 Feb 2015 18:49:10 +0100 Subject: fs/aio: simple simple work -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz |BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:768 |in_atomic(): 1, irqs_disabled(): 0, pid: 26, name: rcuos/2 @@ -25,8 +25,8 @@ Reported-By: Mike Galbraith Suggested-by: Benjamin LaHaise Signed-off-by: Sebastian Andrzej Siewior --- - fs/aio.c | 24 +++++++++++++++++------- - 1 file changed, 17 insertions(+), 7 deletions(-) + fs/aio.c | 15 +++++++++++++-- + 1 file changed, 13 insertions(+), 2 deletions(-) --- a/fs/aio.c +++ b/fs/aio.c @@ -38,16 +38,15 @@ Signed-off-by: Sebastian Andrzej Siewior #include #include -@@ -115,7 +116,7 @@ struct kioctx { - struct page **ring_pages; - long nr_pages; +@@ -117,6 +118,7 @@ struct kioctx { -- struct work_struct free_work; -+ struct swork_event free_work; + struct rcu_head free_rcu; + struct work_struct free_work; /* see free_ioctx() */ ++ struct swork_event free_swork; /* see free_ioctx() */ /* * signals when all in-flight requests are done -@@ -258,6 +259,7 @@ static int __init aio_setup(void) +@@ -259,6 +261,7 @@ static int __init aio_setup(void) .mount = aio_mount, .kill_sb = kill_anon_super, }; @@ -55,30 +54,7 @@ Signed-off-by: Sebastian Andrzej Siewior aio_mnt = kern_mount(&aio_fs); if (IS_ERR(aio_mnt)) panic("Failed to create aio fs mount."); -@@ -588,9 +590,9 @@ static int kiocb_cancel(struct aio_kiocb - return cancel(&kiocb->common); - } - --static void free_ioctx(struct work_struct *work) -+static void free_ioctx(struct swork_event *sev) - { -- struct kioctx *ctx = container_of(work, struct kioctx, free_work); -+ struct kioctx *ctx = container_of(sev, struct kioctx, free_work); - - pr_debug("freeing %p\n", ctx); - -@@ -609,8 +611,8 @@ static void free_ioctx_reqs(struct percp - if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) - complete(&ctx->rq_wait->comp); - -- INIT_WORK(&ctx->free_work, free_ioctx); -- schedule_work(&ctx->free_work); -+ INIT_SWORK(&ctx->free_work, free_ioctx); -+ swork_queue(&ctx->free_work); - } - - /* -@@ -618,9 +620,9 @@ static void free_ioctx_reqs(struct percp +@@ -633,9 +636,9 @@ static void free_ioctx_reqs(struct percp * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - * now it's safe to cancel any that need to be. */ @@ -86,11 +62,11 @@ Signed-off-by: Sebastian Andrzej Siewior +static void free_ioctx_users_work(struct swork_event *sev) { - struct kioctx *ctx = container_of(ref, struct kioctx, users); -+ struct kioctx *ctx = container_of(sev, struct kioctx, free_work); ++ struct kioctx *ctx = container_of(sev, struct kioctx, free_swork); struct aio_kiocb *req; spin_lock_irq(&ctx->ctx_lock); -@@ -639,6 +641,14 @@ static void free_ioctx_users(struct perc +@@ -654,6 +657,14 @@ static void free_ioctx_users(struct perc percpu_ref_put(&ctx->reqs); } @@ -98,8 +74,8 @@ Signed-off-by: Sebastian Andrzej Siewior +{ + struct kioctx *ctx = container_of(ref, struct kioctx, users); + -+ INIT_SWORK(&ctx->free_work, free_ioctx_users_work); -+ swork_queue(&ctx->free_work); ++ INIT_SWORK(&ctx->free_swork, free_ioctx_users_work); ++ swork_queue(&ctx->free_swork); +} + static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) diff --git a/debian/patches/features/all/rt/fs-dcache-Move-dentry_kill-below-lock_parent.patch b/debian/patches/features/all/rt/fs-dcache-Move-dentry_kill-below-lock_parent.patch new file mode 100644 index 000000000..08e8d708a --- /dev/null +++ b/debian/patches/features/all/rt/fs-dcache-Move-dentry_kill-below-lock_parent.patch @@ -0,0 +1,97 @@ +From: John Ogness +Date: Fri, 23 Feb 2018 00:50:21 +0100 +Subject: [PATCH] fs/dcache: Move dentry_kill() below lock_parent() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit c1d0c1a2b51e86124b7ba8ff9054698e2036d8e7 + +A subsequent patch will modify dentry_kill() to call lock_parent(). +Move the dentry_kill() implementation "as is" below lock_parent() +first. This will help simplify the review of the subsequent patch +with dentry_kill() changes. + +Signed-off-by: John Ogness +Signed-off-by: Al Viro +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/dcache.c | 62 ++++++++++++++++++++++++++++++------------------------------ + 1 file changed, 31 insertions(+), 31 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -588,37 +588,6 @@ static void __dentry_kill(struct dentry + dentry_free(dentry); + } + +-/* +- * Finish off a dentry we've decided to kill. +- * dentry->d_lock must be held, returns with it unlocked. +- * Returns dentry requiring refcount drop, or NULL if we're done. +- */ +-static struct dentry *dentry_kill(struct dentry *dentry) +- __releases(dentry->d_lock) +-{ +- struct inode *inode = dentry->d_inode; +- struct dentry *parent = NULL; +- +- if (inode && unlikely(!spin_trylock(&inode->i_lock))) +- goto failed; +- +- if (!IS_ROOT(dentry)) { +- parent = dentry->d_parent; +- if (unlikely(!spin_trylock(&parent->d_lock))) { +- if (inode) +- spin_unlock(&inode->i_lock); +- goto failed; +- } +- } +- +- __dentry_kill(dentry); +- return parent; +- +-failed: +- spin_unlock(&dentry->d_lock); +- return dentry; /* try again with same dentry */ +-} +- + static inline struct dentry *lock_parent(struct dentry *dentry) + { + struct dentry *parent = dentry->d_parent; +@@ -659,6 +628,37 @@ static inline struct dentry *lock_parent + } + + /* ++ * Finish off a dentry we've decided to kill. ++ * dentry->d_lock must be held, returns with it unlocked. ++ * Returns dentry requiring refcount drop, or NULL if we're done. ++ */ ++static struct dentry *dentry_kill(struct dentry *dentry) ++ __releases(dentry->d_lock) ++{ ++ struct inode *inode = dentry->d_inode; ++ struct dentry *parent = NULL; ++ ++ if (inode && unlikely(!spin_trylock(&inode->i_lock))) ++ goto failed; ++ ++ if (!IS_ROOT(dentry)) { ++ parent = dentry->d_parent; ++ if (unlikely(!spin_trylock(&parent->d_lock))) { ++ if (inode) ++ spin_unlock(&inode->i_lock); ++ goto failed; ++ } ++ } ++ ++ __dentry_kill(dentry); ++ return parent; ++ ++failed: ++ spin_unlock(&dentry->d_lock); ++ return dentry; /* try again with same dentry */ ++} ++ ++/* + * Try to do a lockless dput(), and return whether that was successful. + * + * If unsuccessful, we return false, having already taken the dentry lock. diff --git a/debian/patches/features/all/rt/fs-dcache-Remove-stale-comment-from-dentry_kill.patch b/debian/patches/features/all/rt/fs-dcache-Remove-stale-comment-from-dentry_kill.patch new file mode 100644 index 000000000..a2906190d --- /dev/null +++ b/debian/patches/features/all/rt/fs-dcache-Remove-stale-comment-from-dentry_kill.patch @@ -0,0 +1,28 @@ +From: John Ogness +Date: Fri, 23 Feb 2018 00:50:20 +0100 +Subject: [PATCH] fs/dcache: Remove stale comment from dentry_kill() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 06080d100d921848b35196850ec17310469e06ba + +Commit 0d98439ea3c6 ("vfs: use lockred "dead" flag to mark unrecoverably +dead dentries") removed the `ref' parameter in dentry_kill() but its +documentation remained. Remove it. + +Signed-off-by: John Ogness +Signed-off-by: Al Viro +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/dcache.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -591,7 +591,6 @@ static void __dentry_kill(struct dentry + /* + * Finish off a dentry we've decided to kill. + * dentry->d_lock must be held, returns with it unlocked. +- * If ref is non-zero, then decrement the refcount too. + * Returns dentry requiring refcount drop, or NULL if we're done. + */ + static struct dentry *dentry_kill(struct dentry *dentry) diff --git a/debian/patches/features/all/rt/fs-dcache-bring-back-explicit-INIT_HLIST_BL_HEAD-in.patch b/debian/patches/features/all/rt/fs-dcache-bring-back-explicit-INIT_HLIST_BL_HEAD-in.patch new file mode 100644 index 000000000..e2a747c48 --- /dev/null +++ b/debian/patches/features/all/rt/fs-dcache-bring-back-explicit-INIT_HLIST_BL_HEAD-in.patch @@ -0,0 +1,54 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 13 Sep 2017 12:32:34 +0200 +Subject: [PATCH] fs/dcache: bring back explicit INIT_HLIST_BL_HEAD init +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Commit 3d375d78593c ("mm: update callers to use HASH_ZERO flag") removed +INIT_HLIST_BL_HEAD and uses the ZERO flag instead for the init. However +on RT we have also a spinlock which needs an init call so we can't use +that. + +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/dcache.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -3125,6 +3125,8 @@ static int __init set_dhash_entries(char + + static void __init dcache_init_early(void) + { ++ unsigned int loop; ++ + /* If hashes are distributed across NUMA nodes, defer + * hash allocation until vmalloc space is available. + */ +@@ -3141,11 +3143,16 @@ static void __init dcache_init_early(voi + NULL, + 0, + 0); ++ ++ for (loop = 0; loop < (1U << d_hash_shift); loop++) ++ INIT_HLIST_BL_HEAD(dentry_hashtable + loop); ++ + d_hash_shift = 32 - d_hash_shift; + } + + static void __init dcache_init(void) + { ++ unsigned int loop; + /* + * A constructor could be added for stable state like the lists, + * but it is probably not worth it because of the cache nature +@@ -3169,6 +3176,10 @@ static void __init dcache_init(void) + NULL, + 0, + 0); ++ ++ for (loop = 0; loop < (1U << d_hash_shift); loop++) ++ INIT_HLIST_BL_HEAD(dentry_hashtable + loop); ++ + d_hash_shift = 32 - d_hash_shift; + } + diff --git a/debian/patches/features/all/rt/fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch b/debian/patches/features/all/rt/fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch index 817f6efb0..def701ec1 100644 --- a/debian/patches/features/all/rt/fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch +++ b/debian/patches/features/all/rt/fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Fri, 20 Oct 2017 11:29:53 +0200 Subject: [PATCH] fs/dcache: disable preemption on i_dir_seq's write side -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz i_dir_seq is an opencoded seqcounter. Based on the code it looks like we could have two writers in parallel despite the fact that the d_lock is @@ -24,7 +24,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/fs/dcache.c +++ b/fs/dcache.c -@@ -2405,9 +2405,10 @@ EXPORT_SYMBOL(d_rehash); +@@ -2440,9 +2440,10 @@ EXPORT_SYMBOL(d_rehash); static inline unsigned start_dir_add(struct inode *dir) { @@ -37,7 +37,7 @@ Signed-off-by: Sebastian Andrzej Siewior return n; cpu_relax(); } -@@ -2415,7 +2416,8 @@ static inline unsigned start_dir_add(str +@@ -2450,7 +2451,8 @@ static inline unsigned start_dir_add(str static inline void end_dir_add(struct inode *dir, unsigned n) { @@ -47,27 +47,27 @@ Signed-off-by: Sebastian Andrzej Siewior } static void d_wait_lookup(struct dentry *dentry) -@@ -2448,7 +2450,7 @@ struct dentry *d_alloc_parallel(struct d +@@ -2483,7 +2485,7 @@ struct dentry *d_alloc_parallel(struct d retry: rcu_read_lock(); -- seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1; -+ seq = smp_load_acquire(&parent->d_inode->__i_dir_seq) & ~1; +- seq = smp_load_acquire(&parent->d_inode->i_dir_seq); ++ seq = smp_load_acquire(&parent->d_inode->__i_dir_seq); r_seq = read_seqbegin(&rename_lock); dentry = __d_lookup_rcu(parent, name, &d_seq); if (unlikely(dentry)) { -@@ -2470,7 +2472,7 @@ struct dentry *d_alloc_parallel(struct d - goto retry; +@@ -2511,7 +2513,7 @@ struct dentry *d_alloc_parallel(struct d } + hlist_bl_lock(b); -- if (unlikely(parent->d_inode->i_dir_seq != seq)) { -+ if (unlikely(parent->d_inode->__i_dir_seq != seq)) { +- if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) { ++ if (unlikely(READ_ONCE(parent->d_inode->__i_dir_seq) != seq)) { hlist_bl_unlock(b); rcu_read_unlock(); goto retry; --- a/fs/inode.c +++ b/fs/inode.c -@@ -154,7 +154,7 @@ int inode_init_always(struct super_block +@@ -155,7 +155,7 @@ int inode_init_always(struct super_block inode->i_bdev = NULL; inode->i_cdev = NULL; inode->i_link = NULL; diff --git a/debian/patches/features/all/rt/fs-dcache-use-cpu-chill-in-trylock-loops.patch b/debian/patches/features/all/rt/fs-dcache-use-cpu-chill-in-trylock-loops.patch index 64f0c4207..f4e5c101f 100644 --- a/debian/patches/features/all/rt/fs-dcache-use-cpu-chill-in-trylock-loops.patch +++ b/debian/patches/features/all/rt/fs-dcache-use-cpu-chill-in-trylock-loops.patch @@ -1,7 +1,7 @@ Subject: fs: dcache: Use cpu_chill() in trylock loops From: Thomas Gleixner Date: Wed, 07 Mar 2012 21:00:34 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Retry loops on RT might loop forever when the modifying side was preempted. Use cpu_chill() instead of cpu_relax() to let the system @@ -12,9 +12,8 @@ Signed-off-by: Thomas Gleixner --- fs/autofs4/autofs_i.h | 1 + fs/autofs4/expire.c | 2 +- - fs/dcache.c | 20 ++++++++++++++++---- fs/namespace.c | 3 ++- - 4 files changed, 20 insertions(+), 6 deletions(-) + 3 files changed, 4 insertions(+), 2 deletions(-) --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -37,56 +36,6 @@ Signed-off-by: Thomas Gleixner goto relock; } spin_unlock(&p->d_lock); ---- a/fs/dcache.c -+++ b/fs/dcache.c -@@ -19,6 +19,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -784,6 +785,8 @@ static inline bool fast_dput(struct dent - */ - void dput(struct dentry *dentry) - { -+ struct dentry *parent; -+ - if (unlikely(!dentry)) - return; - -@@ -820,9 +823,18 @@ void dput(struct dentry *dentry) - return; - - kill_it: -- dentry = dentry_kill(dentry); -- if (dentry) { -- cond_resched(); -+ parent = dentry_kill(dentry); -+ if (parent) { -+ int r; -+ -+ if (parent == dentry) { -+ /* the task with the highest priority won't schedule */ -+ r = cond_resched(); -+ if (!r) -+ cpu_chill(); -+ } else { -+ dentry = parent; -+ } - goto repeat; - } - } -@@ -2360,7 +2372,7 @@ void d_delete(struct dentry * dentry) - if (dentry->d_lockref.count == 1) { - if (!spin_trylock(&inode->i_lock)) { - spin_unlock(&dentry->d_lock); -- cpu_relax(); -+ cpu_chill(); - goto again; - } - dentry->d_flags &= ~DCACHE_CANT_MOUNT; --- a/fs/namespace.c +++ b/fs/namespace.c @@ -14,6 +14,7 @@ @@ -99,7 +48,7 @@ Signed-off-by: Thomas Gleixner #include @@ -355,7 +356,7 @@ int __mnt_want_write(struct vfsmount *m) smp_mb(); - while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) { + while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) { preempt_enable(); - cpu_relax(); + cpu_chill(); diff --git a/debian/patches/features/all/rt/fs-dcache-use-swait_queue-instead-of-waitqueue.patch b/debian/patches/features/all/rt/fs-dcache-use-swait_queue-instead-of-waitqueue.patch index 900716914..1adf2ff50 100644 --- a/debian/patches/features/all/rt/fs-dcache-use-swait_queue-instead-of-waitqueue.patch +++ b/debian/patches/features/all/rt/fs-dcache-use-swait_queue-instead-of-waitqueue.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 14 Sep 2016 14:35:49 +0200 Subject: [PATCH] fs/dcache: use swait_queue instead of waitqueue -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz __d_lookup_done() invokes wake_up_all() while holding a hlist_bl_lock() which disables preemption. As a workaround convert it to swait. @@ -34,7 +34,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/fs/dcache.c +++ b/fs/dcache.c -@@ -2434,21 +2434,24 @@ static inline void end_dir_add(struct in +@@ -2457,21 +2457,24 @@ static inline void end_dir_add(struct in static void d_wait_lookup(struct dentry *dentry) { @@ -70,7 +70,7 @@ Signed-off-by: Sebastian Andrzej Siewior { unsigned int hash = name->hash; struct hlist_bl_head *b = in_lookup_hash(parent, hash); -@@ -2557,7 +2560,7 @@ void __d_lookup_done(struct dentry *dent +@@ -2586,7 +2589,7 @@ void __d_lookup_done(struct dentry *dent hlist_bl_lock(b); dentry->d_flags &= ~DCACHE_PAR_LOOKUP; __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); @@ -92,7 +92,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* --- a/fs/namei.c +++ b/fs/namei.c -@@ -1628,7 +1628,7 @@ static struct dentry *lookup_slow(const +@@ -1601,7 +1601,7 @@ static struct dentry *lookup_slow(const { struct dentry *dentry = ERR_PTR(-ENOENT), *old; struct inode *inode = dir->d_inode; @@ -101,7 +101,7 @@ Signed-off-by: Sebastian Andrzej Siewior inode_lock_shared(inode); /* Don't go there if it's already dead */ -@@ -3101,7 +3101,7 @@ static int lookup_open(struct nameidata +@@ -3095,7 +3095,7 @@ static int lookup_open(struct nameidata struct dentry *dentry; int error, create_error = 0; umode_t mode = op->mode; @@ -112,7 +112,7 @@ Signed-off-by: Sebastian Andrzej Siewior return -ENOENT; --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c -@@ -452,7 +452,7 @@ static +@@ -445,7 +445,7 @@ static void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) { struct qstr filename = QSTR_INIT(entry->name, entry->len); @@ -121,7 +121,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct dentry *dentry; struct dentry *alias; struct inode *dir = d_inode(parent); -@@ -1443,7 +1443,7 @@ int nfs_atomic_open(struct inode *dir, s +@@ -1436,7 +1436,7 @@ int nfs_atomic_open(struct inode *dir, s struct file *file, unsigned open_flags, umode_t mode, int *opened) { @@ -152,7 +152,7 @@ Signed-off-by: Sebastian Andrzej Siewior spin_lock(&dentry->d_lock); --- a/fs/proc/base.c +++ b/fs/proc/base.c -@@ -1878,7 +1878,7 @@ bool proc_fill_cache(struct file *file, +@@ -1879,7 +1879,7 @@ bool proc_fill_cache(struct file *file, child = d_hash_and_lookup(dir, &qname); if (!child) { @@ -174,7 +174,7 @@ Signed-off-by: Sebastian Andrzej Siewior return false; --- a/include/linux/dcache.h +++ b/include/linux/dcache.h -@@ -107,7 +107,7 @@ struct dentry { +@@ -105,7 +105,7 @@ struct dentry { union { struct list_head d_lru; /* LRU list */ @@ -183,8 +183,8 @@ Signed-off-by: Sebastian Andrzej Siewior }; struct list_head d_child; /* child of parent list */ struct list_head d_subdirs; /* our children */ -@@ -237,7 +237,7 @@ extern void d_set_d_op(struct dentry *de - extern struct dentry * d_alloc(struct dentry *, const struct qstr *); +@@ -237,7 +237,7 @@ extern struct dentry * d_alloc(struct de + extern struct dentry * d_alloc_anon(struct super_block *); extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, - wait_queue_head_t *); diff --git a/debian/patches/features/all/rt/fs-jbd-replace-bh_state-lock.patch b/debian/patches/features/all/rt/fs-jbd-replace-bh_state-lock.patch index eb620a5ce..532450255 100644 --- a/debian/patches/features/all/rt/fs-jbd-replace-bh_state-lock.patch +++ b/debian/patches/features/all/rt/fs-jbd-replace-bh_state-lock.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Fri, 18 Mar 2011 10:11:25 +0100 Subject: fs: jbd/jbd2: Make state lock and journal head lock rt safe -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz bit_spin_locks break under RT. diff --git a/debian/patches/features/all/rt/fs-namespace-preemption-fix.patch b/debian/patches/features/all/rt/fs-namespace-preemption-fix.patch index 3210942c0..640136785 100644 --- a/debian/patches/features/all/rt/fs-namespace-preemption-fix.patch +++ b/debian/patches/features/all/rt/fs-namespace-preemption-fix.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Sun, 19 Jul 2009 08:44:27 -0500 Subject: fs: namespace preemption fix -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz On RT we cannot loop with preemption disabled here as mnt_make_readonly() might have been preempted. We can safely enable @@ -20,8 +20,8 @@ Signed-off-by: Thomas Gleixner * incremented count after it has set MNT_WRITE_HOLD. */ smp_mb(); -- while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) -+ while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) { +- while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) ++ while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) { + preempt_enable(); cpu_relax(); + preempt_disable(); diff --git a/debian/patches/features/all/rt/fs-nfs-turn-rmdir_sem-into-a-semaphore.patch b/debian/patches/features/all/rt/fs-nfs-turn-rmdir_sem-into-a-semaphore.patch index 22d22d13d..0b815a32d 100644 --- a/debian/patches/features/all/rt/fs-nfs-turn-rmdir_sem-into-a-semaphore.patch +++ b/debian/patches/features/all/rt/fs-nfs-turn-rmdir_sem-into-a-semaphore.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 15 Sep 2016 10:51:27 +0200 Subject: [PATCH] fs/nfs: turn rmdir_sem into a semaphore -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The RW semaphore had a reader side which used the _non_owner version because it most likely took the reader lock in one thread and released it @@ -22,7 +22,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c -@@ -1763,7 +1763,11 @@ int nfs_rmdir(struct inode *dir, struct +@@ -1756,7 +1756,11 @@ int nfs_rmdir(struct inode *dir, struct trace_nfs_rmdir_enter(dir, dentry); if (d_really_is_positive(dentry)) { @@ -34,7 +34,7 @@ Signed-off-by: Sebastian Andrzej Siewior error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); /* Ensure the VFS deletes this inode */ switch (error) { -@@ -1773,7 +1777,11 @@ int nfs_rmdir(struct inode *dir, struct +@@ -1766,7 +1770,11 @@ int nfs_rmdir(struct inode *dir, struct case -ENOENT: nfs_dentry_handle_enoent(dentry); } @@ -48,7 +48,7 @@ Signed-off-by: Sebastian Andrzej Siewior trace_nfs_rmdir_exit(dir, dentry, error); --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c -@@ -2014,7 +2014,11 @@ static void init_once(void *foo) +@@ -2039,7 +2039,11 @@ static void init_once(void *foo) atomic_long_set(&nfsi->nrequests, 0); atomic_long_set(&nfsi->commit_info.ncommit, 0); atomic_set(&nfsi->commit_info.rpcs_out, 0); @@ -125,7 +125,7 @@ Signed-off-by: Sebastian Andrzej Siewior * point dentry is definitely not a root, so we won't need --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h -@@ -162,7 +162,11 @@ struct nfs_inode { +@@ -163,7 +163,11 @@ struct nfs_inode { /* Readers: in-flight sillydelete RPC calls */ /* Writers: rmdir */ diff --git a/debian/patches/features/all/rt/fs-replace-bh_uptodate_lock-for-rt.patch b/debian/patches/features/all/rt/fs-replace-bh_uptodate_lock-for-rt.patch index d71d879e9..6b5e92593 100644 --- a/debian/patches/features/all/rt/fs-replace-bh_uptodate_lock-for-rt.patch +++ b/debian/patches/features/all/rt/fs-replace-bh_uptodate_lock-for-rt.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Fri, 18 Mar 2011 09:18:52 +0100 Subject: buffer_head: Replace bh_uptodate_lock for -rt -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Wrap the bit_spin_lock calls into a separate inline and add the RT replacements with a real spinlock. @@ -17,7 +17,7 @@ Signed-off-by: Thomas Gleixner --- a/fs/buffer.c +++ b/fs/buffer.c -@@ -302,8 +302,7 @@ static void end_buffer_async_read(struct +@@ -274,8 +274,7 @@ static void end_buffer_async_read(struct * decide that the page is now completely done. */ first = page_buffers(page); @@ -27,7 +27,7 @@ Signed-off-by: Thomas Gleixner clear_buffer_async_read(bh); unlock_buffer(bh); tmp = bh; -@@ -316,8 +315,7 @@ static void end_buffer_async_read(struct +@@ -288,8 +287,7 @@ static void end_buffer_async_read(struct } tmp = tmp->b_this_page; } while (tmp != bh); @@ -37,7 +37,7 @@ Signed-off-by: Thomas Gleixner /* * If none of the buffers had errors and they are all -@@ -329,9 +327,7 @@ static void end_buffer_async_read(struct +@@ -301,9 +299,7 @@ static void end_buffer_async_read(struct return; still_busy: @@ -48,7 +48,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -358,8 +354,7 @@ void end_buffer_async_write(struct buffe +@@ -330,8 +326,7 @@ void end_buffer_async_write(struct buffe } first = page_buffers(page); @@ -58,7 +58,7 @@ Signed-off-by: Thomas Gleixner clear_buffer_async_write(bh); unlock_buffer(bh); -@@ -371,15 +366,12 @@ void end_buffer_async_write(struct buffe +@@ -343,15 +338,12 @@ void end_buffer_async_write(struct buffe } tmp = tmp->b_this_page; } @@ -76,7 +76,7 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(end_buffer_async_write); -@@ -3417,6 +3409,7 @@ struct buffer_head *alloc_buffer_head(gf +@@ -3372,6 +3364,7 @@ struct buffer_head *alloc_buffer_head(gf struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); if (ret) { INIT_LIST_HEAD(&ret->b_assoc_buffers); @@ -108,7 +108,7 @@ Signed-off-by: Thomas Gleixner if (data_page) --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c -@@ -108,8 +108,7 @@ static void ntfs_end_buffer_async_read(s +@@ -106,8 +106,7 @@ static void ntfs_end_buffer_async_read(s "0x%llx.", (unsigned long long)bh->b_blocknr); } first = page_buffers(page); @@ -118,7 +118,7 @@ Signed-off-by: Thomas Gleixner clear_buffer_async_read(bh); unlock_buffer(bh); tmp = bh; -@@ -124,8 +123,7 @@ static void ntfs_end_buffer_async_read(s +@@ -122,8 +121,7 @@ static void ntfs_end_buffer_async_read(s } tmp = tmp->b_this_page; } while (tmp != bh); @@ -128,7 +128,7 @@ Signed-off-by: Thomas Gleixner /* * If none of the buffers had errors then we can set the page uptodate, * but we first have to perform the post read mst fixups, if the -@@ -160,9 +158,7 @@ static void ntfs_end_buffer_async_read(s +@@ -156,9 +154,7 @@ static void ntfs_end_buffer_async_read(s unlock_page(page); return; still_busy: diff --git a/debian/patches/features/all/rt/ftrace-Fix-trace-header-alignment.patch b/debian/patches/features/all/rt/ftrace-Fix-trace-header-alignment.patch index 8e78b61e6..dbddc5d63 100644 --- a/debian/patches/features/all/rt/ftrace-Fix-trace-header-alignment.patch +++ b/debian/patches/features/all/rt/ftrace-Fix-trace-header-alignment.patch @@ -1,7 +1,7 @@ From: Mike Galbraith Date: Sun, 16 Oct 2016 05:08:30 +0200 Subject: [PATCH] ftrace: Fix trace header alignment -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Line up helper arrows to the right column. @@ -15,7 +15,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c -@@ -3343,17 +3343,17 @@ get_total_entries(struct trace_buffer *b +@@ -3338,17 +3338,17 @@ get_total_entries(struct trace_buffer *b static void print_lat_help_header(struct seq_file *m) { diff --git a/debian/patches/features/all/rt/ftrace-migrate-disable-tracing.patch b/debian/patches/features/all/rt/ftrace-migrate-disable-tracing.patch index 872629885..88a217d3e 100644 --- a/debian/patches/features/all/rt/ftrace-migrate-disable-tracing.patch +++ b/debian/patches/features/all/rt/ftrace-migrate-disable-tracing.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Sun, 17 Jul 2011 21:56:42 +0200 Subject: trace: Add migrate-disabled counter to tracing output -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Signed-off-by: Thomas Gleixner --- @@ -33,7 +33,7 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL_GPL(tracing_generic_entry_update); -@@ -3344,9 +3346,10 @@ static void print_lat_help_header(struct +@@ -3339,9 +3341,10 @@ static void print_lat_help_header(struct "# | / _----=> need-resched \n" "# || / _---=> hardirq/softirq \n" "# ||| / _--=> preempt-depth \n" diff --git a/debian/patches/features/all/rt/futex-Ensure-lock-unlock-symetry-versus-pi_lock-and-.patch b/debian/patches/features/all/rt/futex-Ensure-lock-unlock-symetry-versus-pi_lock-and-.patch index 5ecd4ec24..07d03cd96 100644 --- a/debian/patches/features/all/rt/futex-Ensure-lock-unlock-symetry-versus-pi_lock-and-.patch +++ b/debian/patches/features/all/rt/futex-Ensure-lock-unlock-symetry-versus-pi_lock-and-.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Fri, 1 Mar 2013 11:17:42 +0100 Subject: futex: Ensure lock/unlock symetry versus pi_lock and hash bucket lock -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz In exit_pi_state_list() we have the following locking construct: @@ -31,7 +31,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/futex.c +++ b/kernel/futex.c -@@ -936,7 +936,9 @@ void exit_pi_state_list(struct task_stru +@@ -918,7 +918,9 @@ void exit_pi_state_list(struct task_stru if (head->next != next) { /* retain curr->pi_lock for the loop invariant */ raw_spin_unlock(&pi_state->pi_mutex.wait_lock); diff --git a/debian/patches/features/all/rt/futex-requeue-pi-fix.patch b/debian/patches/features/all/rt/futex-requeue-pi-fix.patch index ebaa24f10..dec67b4fd 100644 --- a/debian/patches/features/all/rt/futex-requeue-pi-fix.patch +++ b/debian/patches/features/all/rt/futex-requeue-pi-fix.patch @@ -1,7 +1,7 @@ From: Steven Rostedt Date: Tue, 14 Jul 2015 14:26:34 +0200 Subject: futex: Fix bug on when a requeued RT task times out -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Requeue with timeout causes a bug with PREEMPT_RT_FULL. @@ -104,7 +104,7 @@ Signed-off-by: Thomas Gleixner RT_MUTEX_FULL_CHAINWALK); --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h -@@ -130,6 +130,7 @@ enum rtmutex_chainwalk { +@@ -131,6 +131,7 @@ enum rtmutex_chainwalk { * PI-futex support (proxy locking functions, etc.): */ #define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1) diff --git a/debian/patches/features/all/rt/futex-workaround-migrate_disable-enable-in-different.patch b/debian/patches/features/all/rt/futex-workaround-migrate_disable-enable-in-different.patch index 30346cd01..fd8c94c5e 100644 --- a/debian/patches/features/all/rt/futex-workaround-migrate_disable-enable-in-different.patch +++ b/debian/patches/features/all/rt/futex-workaround-migrate_disable-enable-in-different.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Wed, 8 Mar 2017 14:23:35 +0100 Subject: [PATCH] futex: workaround migrate_disable/enable in different context -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz migrate_disable()/migrate_enable() takes a different path in atomic() vs !atomic() context. These little hacks ensure that we don't underflow / overflow @@ -16,7 +16,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/futex.c +++ b/kernel/futex.c -@@ -2816,9 +2816,18 @@ static int futex_lock_pi(u32 __user *uad +@@ -2798,9 +2798,18 @@ static int futex_lock_pi(u32 __user *uad * lock handoff sequence. */ raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); @@ -35,7 +35,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (ret) { if (ret == 1) -@@ -2965,11 +2974,21 @@ static int futex_unlock_pi(u32 __user *u +@@ -2947,11 +2956,21 @@ static int futex_unlock_pi(u32 __user *u * observed. */ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); diff --git a/debian/patches/features/all/rt/genirq-disable-irqpoll-on-rt.patch b/debian/patches/features/all/rt/genirq-disable-irqpoll-on-rt.patch index f96270578..32c60361e 100644 --- a/debian/patches/features/all/rt/genirq-disable-irqpoll-on-rt.patch +++ b/debian/patches/features/all/rt/genirq-disable-irqpoll-on-rt.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:29:57 -0500 Subject: genirq: Disable irqpoll on -rt -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Creates long latencies for no value @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c -@@ -445,6 +445,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable ir +@@ -444,6 +444,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable ir static int __init irqfixup_setup(char *str) { @@ -25,7 +25,7 @@ Signed-off-by: Thomas Gleixner irqfixup = 1; printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n"); printk(KERN_WARNING "This may impact system performance.\n"); -@@ -457,6 +461,10 @@ module_param(irqfixup, int, 0644); +@@ -456,6 +460,10 @@ module_param(irqfixup, int, 0644); static int __init irqpoll_setup(char *str) { diff --git a/debian/patches/features/all/rt/genirq-do-not-invoke-the-affinity-callback-via-a-wor.patch b/debian/patches/features/all/rt/genirq-do-not-invoke-the-affinity-callback-via-a-wor.patch index c0b1342c1..7ce2c959d 100644 --- a/debian/patches/features/all/rt/genirq-do-not-invoke-the-affinity-callback-via-a-wor.patch +++ b/debian/patches/features/all/rt/genirq-do-not-invoke-the-affinity-callback-via-a-wor.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 21 Aug 2013 17:48:46 +0200 Subject: genirq: Do not invoke the affinity callback via a workqueue on RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Joe Korty reported, that __irq_set_affinity_locked() schedules a workqueue while holding a rawlock which results in a might_sleep() @@ -46,7 +46,7 @@ Signed-off-by: Sebastian Andrzej Siewior }; --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c -@@ -226,7 +226,12 @@ int irq_set_affinity_locked(struct irq_d +@@ -227,7 +227,12 @@ int irq_set_affinity_locked(struct irq_d if (desc->affinity_notify) { kref_get(&desc->affinity_notify->kref); @@ -59,7 +59,7 @@ Signed-off-by: Sebastian Andrzej Siewior } irqd_set(data, IRQD_AFFINITY_SET); -@@ -264,10 +269,8 @@ int irq_set_affinity_hint(unsigned int i +@@ -265,10 +270,8 @@ int irq_set_affinity_hint(unsigned int i } EXPORT_SYMBOL_GPL(irq_set_affinity_hint); @@ -71,7 +71,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct irq_desc *desc = irq_to_desc(notify->irq); cpumask_var_t cpumask; unsigned long flags; -@@ -289,6 +292,35 @@ static void irq_affinity_notify(struct w +@@ -290,6 +293,35 @@ static void irq_affinity_notify(struct w kref_put(¬ify->kref, notify->release); } @@ -107,7 +107,7 @@ Signed-off-by: Sebastian Andrzej Siewior /** * irq_set_affinity_notifier - control notification of IRQ affinity changes * @irq: Interrupt for which to enable/disable notification -@@ -317,7 +349,12 @@ irq_set_affinity_notifier(unsigned int i +@@ -318,7 +350,12 @@ irq_set_affinity_notifier(unsigned int i if (notify) { notify->irq = irq; kref_init(¬ify->kref); diff --git a/debian/patches/features/all/rt/genirq-force-threading.patch b/debian/patches/features/all/rt/genirq-force-threading.patch index 3e0e6875c..a868f014f 100644 --- a/debian/patches/features/all/rt/genirq-force-threading.patch +++ b/debian/patches/features/all/rt/genirq-force-threading.patch @@ -1,7 +1,7 @@ Subject: genirq: Force interrupt thread on RT From: Thomas Gleixner Date: Sun, 03 Apr 2011 11:57:29 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Force threaded_irqs and optimize the code (force_irqthreads) in regard to this. @@ -37,9 +37,9 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_IRQ_FORCED_THREADING +# ifndef CONFIG_PREEMPT_RT_BASE __read_mostly bool force_irqthreads; + EXPORT_SYMBOL_GPL(force_irqthreads); - static int __init setup_forced_irqthreads(char *arg) -@@ -32,6 +33,7 @@ static int __init setup_forced_irqthread +@@ -33,6 +34,7 @@ static int __init setup_forced_irqthread return 0; } early_param("threadirqs", setup_forced_irqthreads); diff --git a/debian/patches/features/all/rt/genirq-update-irq_set_irqchip_state-documentation.patch b/debian/patches/features/all/rt/genirq-update-irq_set_irqchip_state-documentation.patch index a5348fa0d..260946647 100644 --- a/debian/patches/features/all/rt/genirq-update-irq_set_irqchip_state-documentation.patch +++ b/debian/patches/features/all/rt/genirq-update-irq_set_irqchip_state-documentation.patch @@ -1,7 +1,7 @@ From: Josh Cartwright Date: Thu, 11 Feb 2016 11:54:00 -0600 Subject: genirq: update irq_set_irqchip_state documentation -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz On -rt kernels, the use of migrate_disable()/migrate_enable() is sufficient to guarantee a task isn't moved to another CPU. Update the @@ -15,7 +15,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c -@@ -2202,7 +2202,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state) +@@ -2218,7 +2218,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state) * This call sets the internal irqchip state of an interrupt, * depending on the value of @which. * diff --git a/debian/patches/features/all/rt/hotplug-duct-tape-RT-rwlock-usage-for-non-RT.patch b/debian/patches/features/all/rt/hotplug-duct-tape-RT-rwlock-usage-for-non-RT.patch index f99750bbd..71f13443f 100644 --- a/debian/patches/features/all/rt/hotplug-duct-tape-RT-rwlock-usage-for-non-RT.patch +++ b/debian/patches/features/all/rt/hotplug-duct-tape-RT-rwlock-usage-for-non-RT.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Fri, 4 Aug 2017 18:31:00 +0200 Subject: [PATCH] hotplug: duct-tape RT-rwlock usage for non-RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz This type is only available on -RT. We need to craft something for non-RT. Since the only migrate_disable() user is -RT only, there is no diff --git a/debian/patches/features/all/rt/hotplug-light-get-online-cpus.patch b/debian/patches/features/all/rt/hotplug-light-get-online-cpus.patch index 46bc88450..85b6af252 100644 --- a/debian/patches/features/all/rt/hotplug-light-get-online-cpus.patch +++ b/debian/patches/features/all/rt/hotplug-light-get-online-cpus.patch @@ -1,7 +1,7 @@ Subject: hotplug: Lightweight get online cpus From: Thomas Gleixner Date: Wed, 15 Jun 2011 12:36:06 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz get_online_cpus() is a heavy weight function which involves a global mutex. migrate_disable() wants a simpler construct which prevents only @@ -20,7 +20,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/cpu.h +++ b/include/linux/cpu.h -@@ -116,6 +116,8 @@ extern void cpu_hotplug_disable(void); +@@ -106,6 +106,8 @@ extern void cpu_hotplug_disable(void); extern void cpu_hotplug_enable(void); void clear_tasks_mm_cpumask(int cpu); int cpu_down(unsigned int cpu); @@ -29,7 +29,7 @@ Signed-off-by: Thomas Gleixner #else /* CONFIG_HOTPLUG_CPU */ -@@ -126,6 +128,9 @@ static inline void cpus_read_unlock(void +@@ -116,6 +118,9 @@ static inline void cpus_read_unlock(void static inline void lockdep_assert_cpus_held(void) { } static inline void cpu_hotplug_disable(void) { } static inline void cpu_hotplug_enable(void) { } @@ -65,7 +65,7 @@ Signed-off-by: Thomas Gleixner void cpus_read_lock(void) --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -6914,6 +6914,7 @@ void migrate_disable(void) +@@ -7173,6 +7173,7 @@ void migrate_disable(void) } preempt_disable(); @@ -73,7 +73,7 @@ Signed-off-by: Thomas Gleixner migrate_disable_update_cpus_allowed(p); p->migrate_disable = 1; -@@ -6979,12 +6980,15 @@ void migrate_enable(void) +@@ -7238,12 +7239,15 @@ void migrate_enable(void) arg.task = p; arg.dest_cpu = dest_cpu; diff --git a/debian/patches/features/all/rt/hrtimer-Move-schedule_work-call-to-helper-thread.patch b/debian/patches/features/all/rt/hrtimer-Move-schedule_work-call-to-helper-thread.patch index 96d005406..4ad99523c 100644 --- a/debian/patches/features/all/rt/hrtimer-Move-schedule_work-call-to-helper-thread.patch +++ b/debian/patches/features/all/rt/hrtimer-Move-schedule_work-call-to-helper-thread.patch @@ -1,7 +1,7 @@ From: Yang Shi Date: Mon, 16 Sep 2013 14:09:19 -0700 Subject: hrtimer: Move schedule_work call to helper thread -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz When run ltp leapsec_timer test, the following call trace is caught: @@ -52,7 +52,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c -@@ -715,6 +715,29 @@ static void hrtimer_switch_to_hres(void) +@@ -714,6 +714,29 @@ static void hrtimer_switch_to_hres(void) retrigger_next_event(NULL); } @@ -82,7 +82,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void clock_was_set_work(struct work_struct *work) { clock_was_set(); -@@ -730,6 +753,7 @@ void clock_was_set_delayed(void) +@@ -729,6 +752,7 @@ void clock_was_set_delayed(void) { schedule_work(&hrtimer_work); } diff --git a/debian/patches/features/all/rt/hrtimer-by-timers-by-default-into-the-softirq-context.patch b/debian/patches/features/all/rt/hrtimer-by-timers-by-default-into-the-softirq-context.patch index 3f9a54403..3070af82d 100644 --- a/debian/patches/features/all/rt/hrtimer-by-timers-by-default-into-the-softirq-context.patch +++ b/debian/patches/features/all/rt/hrtimer-by-timers-by-default-into-the-softirq-context.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Fri, 3 Jul 2009 08:44:31 -0500 Subject: hrtimer: by timers by default into the softirq context -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz We can't have hrtimers callbacks running in hardirq context on RT. Therefore the timers are deferred to the softirq context by default. @@ -19,15 +19,15 @@ Signed-off-by: Sebastian Andrzej Siewior kernel/sched/core.c | 2 +- kernel/sched/deadline.c | 2 +- kernel/sched/rt.c | 4 ++-- - kernel/time/hrtimer.c | 34 ++++++++++++++++++++++++++++++++-- + kernel/time/hrtimer.c | 21 +++++++++++++++++++-- kernel/time/tick-broadcast-hrtimer.c | 2 +- kernel/time/tick-sched.c | 2 +- kernel/watchdog.c | 2 +- - 10 files changed, 48 insertions(+), 12 deletions(-) + 10 files changed, 35 insertions(+), 12 deletions(-) --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c -@@ -2098,7 +2098,7 @@ int kvm_create_lapic(struct kvm_vcpu *vc +@@ -2155,7 +2155,7 @@ int kvm_create_lapic(struct kvm_vcpu *vc apic->vcpu = vcpu; hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, @@ -60,7 +60,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* --- a/kernel/events/core.c +++ b/kernel/events/core.c -@@ -1042,7 +1042,7 @@ static void __perf_mux_hrtimer_init(stru +@@ -1090,7 +1090,7 @@ static void __perf_mux_hrtimer_init(stru cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval); raw_spin_lock_init(&cpuctx->hrtimer_lock); @@ -69,7 +69,7 @@ Signed-off-by: Sebastian Andrzej Siewior timer->function = perf_mux_hrtimer_handler; } -@@ -8705,7 +8705,7 @@ static void perf_swevent_init_hrtimer(st +@@ -8683,7 +8683,7 @@ static void perf_swevent_init_hrtimer(st if (!is_sampling_event(event)) return; @@ -80,7 +80,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -341,7 +341,7 @@ static void init_rq_hrtick(struct rq *rq +@@ -343,7 +343,7 @@ static void init_rq_hrtick(struct rq *rq rq->hrtick_csd.info = rq; #endif @@ -91,7 +91,7 @@ Signed-off-by: Sebastian Andrzej Siewior #else /* CONFIG_SCHED_HRTICK */ --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c -@@ -1020,7 +1020,7 @@ void init_dl_task_timer(struct sched_dl_ +@@ -1057,7 +1057,7 @@ void init_dl_task_timer(struct sched_dl_ { struct hrtimer *timer = &dl_se->dl_timer; @@ -115,7 +115,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c -@@ -1122,7 +1122,9 @@ void hrtimer_start_range_ns(struct hrtim +@@ -1119,7 +1119,9 @@ void hrtimer_start_range_ns(struct hrtim * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft * match. */ @@ -125,7 +125,7 @@ Signed-off-by: Sebastian Andrzej Siewior base = lock_hrtimer_base(timer, &flags); -@@ -1249,10 +1251,17 @@ static inline int hrtimer_clockid_to_bas +@@ -1246,10 +1248,17 @@ static inline int hrtimer_clockid_to_bas static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) { @@ -145,25 +145,7 @@ Signed-off-by: Sebastian Andrzej Siewior memset(timer, 0, sizeof(struct hrtimer)); cpu_base = raw_cpu_ptr(&hrtimer_bases); -@@ -1631,11 +1640,32 @@ static enum hrtimer_restart hrtimer_wake - return HRTIMER_NORESTART; - } - -+#ifdef CONFIG_PREEMPT_RT_FULL -+static bool task_is_realtime(struct task_struct *tsk) -+{ -+ int policy = tsk->policy; -+ -+ if (policy == SCHED_FIFO || policy == SCHED_RR) -+ return true; -+ if (policy == SCHED_DEADLINE) -+ return true; -+ return false; -+} -+#endif -+ - static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, - clockid_t clock_id, +@@ -1633,6 +1642,14 @@ static void __hrtimer_init_sleeper(struc enum hrtimer_mode mode, struct task_struct *task) { @@ -191,7 +173,7 @@ Signed-off-by: Sebastian Andrzej Siewior } --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c -@@ -1254,7 +1254,7 @@ void tick_setup_sched_timer(void) +@@ -1231,7 +1231,7 @@ void tick_setup_sched_timer(void) /* * Emulate tick processing via per-CPU hrtimers: */ @@ -202,7 +184,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* Get the next period (per-CPU) */ --- a/kernel/watchdog.c +++ b/kernel/watchdog.c -@@ -462,7 +462,7 @@ static void watchdog_enable(unsigned int +@@ -463,7 +463,7 @@ static void watchdog_enable(unsigned int * Start the timer first to prevent the NMI watchdog triggering * before the timer has a chance to fire. */ diff --git a/debian/patches/features/all/rt/hrtimer-consolidate-hrtimer_init-hrtimer_init_sleepe.patch b/debian/patches/features/all/rt/hrtimer-consolidate-hrtimer_init-hrtimer_init_sleepe.patch index 3a062713f..64e8fe7b5 100644 --- a/debian/patches/features/all/rt/hrtimer-consolidate-hrtimer_init-hrtimer_init_sleepe.patch +++ b/debian/patches/features/all/rt/hrtimer-consolidate-hrtimer_init-hrtimer_init_sleepe.patch @@ -2,7 +2,7 @@ From: Sebastian Andrzej Siewior Date: Mon, 4 Sep 2017 18:31:50 +0200 Subject: [PATCH] hrtimer: consolidate hrtimer_init() + hrtimer_init_sleeper() calls -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz hrtimer_init_sleeper() calls require a prior initialisation of the hrtimer object with hrtimer_init(). Lets make the initialisation of the @@ -20,7 +20,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/block/blk-mq.c +++ b/block/blk-mq.c -@@ -2800,10 +2800,9 @@ static bool blk_mq_poll_hybrid_sleep(str +@@ -3114,10 +3114,9 @@ static bool blk_mq_poll_hybrid_sleep(str kt = nsecs; mode = HRTIMER_MODE_REL; @@ -30,7 +30,7 @@ Signed-off-by: Sebastian Andrzej Siewior - hrtimer_init_sleeper(&hs, current); do { - if (test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags)) + if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE) break; --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -80,7 +80,7 @@ Signed-off-by: Sebastian Andrzej Siewior extern int schedule_hrtimeout_range_clock(ktime_t *expires, --- a/include/linux/wait.h +++ b/include/linux/wait.h -@@ -487,8 +487,8 @@ do { \ +@@ -489,8 +489,8 @@ do { \ int __ret = 0; \ struct hrtimer_sleeper __t; \ \ @@ -93,7 +93,7 @@ Signed-off-by: Sebastian Andrzej Siewior current->timer_slack_ns, \ --- a/kernel/futex.c +++ b/kernel/futex.c -@@ -2642,10 +2642,9 @@ static int futex_wait(u32 __user *uaddr, +@@ -2624,10 +2624,9 @@ static int futex_wait(u32 __user *uaddr, if (abs_time) { to = &timeout; @@ -107,7 +107,7 @@ Signed-off-by: Sebastian Andrzej Siewior hrtimer_set_expires_range_ns(&to->timer, *abs_time, current->timer_slack_ns); } -@@ -2744,9 +2743,8 @@ static int futex_lock_pi(u32 __user *uad +@@ -2726,9 +2725,8 @@ static int futex_lock_pi(u32 __user *uad if (time) { to = &timeout; @@ -119,7 +119,7 @@ Signed-off-by: Sebastian Andrzej Siewior hrtimer_set_expires(&to->timer, *time); } -@@ -3162,10 +3160,9 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3144,10 +3142,9 @@ static int futex_wait_requeue_pi(u32 __u if (abs_time) { to = &timeout; @@ -135,7 +135,7 @@ Signed-off-by: Sebastian Andrzej Siewior } --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c -@@ -1603,13 +1603,44 @@ static enum hrtimer_restart hrtimer_wake +@@ -1600,13 +1600,44 @@ static enum hrtimer_restart hrtimer_wake return HRTIMER_NORESTART; } @@ -181,7 +181,7 @@ Signed-off-by: Sebastian Andrzej Siewior int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts) { switch(restart->nanosleep.type) { -@@ -1633,8 +1664,6 @@ static int __sched do_nanosleep(struct h +@@ -1630,8 +1661,6 @@ static int __sched do_nanosleep(struct h { struct restart_block *restart; @@ -190,7 +190,7 @@ Signed-off-by: Sebastian Andrzej Siewior do { set_current_state(TASK_INTERRUPTIBLE); hrtimer_start_expires(&t->timer, mode); -@@ -1671,10 +1700,9 @@ static long __sched hrtimer_nanosleep_re +@@ -1668,10 +1697,9 @@ static long __sched hrtimer_nanosleep_re struct hrtimer_sleeper t; int ret; @@ -203,7 +203,7 @@ Signed-off-by: Sebastian Andrzej Siewior ret = do_nanosleep(&t, HRTIMER_MODE_ABS); destroy_hrtimer_on_stack(&t.timer); return ret; -@@ -1692,7 +1720,7 @@ long hrtimer_nanosleep(const struct time +@@ -1689,7 +1717,7 @@ long hrtimer_nanosleep(const struct time if (dl_task(current) || rt_task(current)) slack = 0; @@ -212,7 +212,7 @@ Signed-off-by: Sebastian Andrzej Siewior hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack); ret = do_nanosleep(&t, mode); if (ret != -ERESTART_RESTARTBLOCK) -@@ -1883,11 +1911,9 @@ schedule_hrtimeout_range_clock(ktime_t * +@@ -1884,11 +1912,9 @@ schedule_hrtimeout_range_clock(ktime_t * return -EINTR; } @@ -227,7 +227,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (likely(t.task)) --- a/net/core/pktgen.c +++ b/net/core/pktgen.c -@@ -2252,7 +2252,8 @@ static void spin(struct pktgen_dev *pkt_ +@@ -2161,7 +2161,8 @@ static void spin(struct pktgen_dev *pkt_ s64 remaining; struct hrtimer_sleeper t; @@ -237,7 +237,7 @@ Signed-off-by: Sebastian Andrzej Siewior hrtimer_set_expires(&t.timer, spin_until); remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer)); -@@ -2267,7 +2268,6 @@ static void spin(struct pktgen_dev *pkt_ +@@ -2176,7 +2177,6 @@ static void spin(struct pktgen_dev *pkt_ } while (ktime_compare(end_time, spin_until) < 0); } else { /* see do_nanosleep */ diff --git a/debian/patches/features/all/rt/hrtimers-prepare-full-preemption.patch b/debian/patches/features/all/rt/hrtimers-prepare-full-preemption.patch index e0023f580..f78b537f3 100644 --- a/debian/patches/features/all/rt/hrtimers-prepare-full-preemption.patch +++ b/debian/patches/features/all/rt/hrtimers-prepare-full-preemption.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:29:34 -0500 Subject: hrtimers: Prepare full preemption -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Make cancellation of a running callback in softirq context safe against preemption. @@ -11,10 +11,11 @@ Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 13 ++++++++++++- + kernel/time/alarmtimer.c | 2 +- kernel/time/hrtimer.c | 33 ++++++++++++++++++++++++++++++++- kernel/time/itimer.c | 1 + - kernel/time/posix-timers.c | 33 +++++++++++++++++++++++++++++++++ - 4 files changed, 78 insertions(+), 2 deletions(-) + kernel/time/posix-timers.c | 35 +++++++++++++++++++++++++++++++++++ + 5 files changed, 81 insertions(+), 3 deletions(-) --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -59,9 +60,20 @@ Signed-off-by: Thomas Gleixner { return timer->base->running == timer; } +--- a/kernel/time/alarmtimer.c ++++ b/kernel/time/alarmtimer.c +@@ -436,7 +436,7 @@ int alarm_cancel(struct alarm *alarm) + int ret = alarm_try_to_cancel(alarm); + if (ret >= 0) + return ret; +- cpu_relax(); ++ hrtimer_wait_for_timer(&alarm->timer); + } + } + EXPORT_SYMBOL_GPL(alarm_cancel); --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c -@@ -926,6 +926,33 @@ u64 hrtimer_forward(struct hrtimer *time +@@ -923,6 +923,33 @@ u64 hrtimer_forward(struct hrtimer *time } EXPORT_SYMBOL_GPL(hrtimer_forward); @@ -95,7 +107,7 @@ Signed-off-by: Thomas Gleixner /* * enqueue_hrtimer - internal function to (re)start a timer * -@@ -1158,7 +1185,7 @@ int hrtimer_cancel(struct hrtimer *timer +@@ -1155,7 +1182,7 @@ int hrtimer_cancel(struct hrtimer *timer if (ret >= 0) return ret; @@ -104,7 +116,7 @@ Signed-off-by: Thomas Gleixner } } EXPORT_SYMBOL_GPL(hrtimer_cancel); -@@ -1431,6 +1458,7 @@ static __latent_entropy void hrtimer_run +@@ -1428,6 +1455,7 @@ static __latent_entropy void hrtimer_run hrtimer_update_softirq_timer(cpu_base, true); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); @@ -112,8 +124,8 @@ Signed-off-by: Thomas Gleixner } #ifdef CONFIG_HIGH_RES_TIMERS -@@ -1793,6 +1821,9 @@ int hrtimers_prepare_cpu(unsigned int cp - cpu_base->hres_active = 0; +@@ -1794,6 +1822,9 @@ int hrtimers_prepare_cpu(unsigned int cp + cpu_base->softirq_next_timer = NULL; cpu_base->expires_next = KTIME_MAX; cpu_base->softirq_expires_next = KTIME_MAX; +#ifdef CONFIG_PREEMPT_RT_BASE @@ -134,8 +146,8 @@ Signed-off-by: Thomas Gleixner expires = timeval_to_ktime(value->it_value); --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c -@@ -796,6 +796,20 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_ - return overrun; +@@ -827,6 +827,22 @@ static void common_hrtimer_arm(struct k_ + hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } +/* @@ -144,18 +156,20 @@ Signed-off-by: Thomas Gleixner +static void timer_wait_for_callback(const struct k_clock *kc, struct k_itimer *timr) +{ +#ifdef CONFIG_PREEMPT_RT_FULL -+ if (kc->timer_set == common_timer_set) ++ if (kc->timer_arm == common_hrtimer_arm) + hrtimer_wait_for_timer(&timr->it.real.timer); ++ else if (kc == &alarm_clock) ++ hrtimer_wait_for_timer(&timr->it.alarm.alarmtimer.timer); + else + /* FIXME: Whacky hack for posix-cpu-timers */ + schedule_timeout(1); +#endif +} + - static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires, - bool absolute, bool sigev_none) + static int common_hrtimer_try_to_cancel(struct k_itimer *timr) { -@@ -890,6 +904,7 @@ static int do_timer_settime(timer_t time + return hrtimer_try_to_cancel(&timr->it.real.timer); +@@ -891,6 +907,7 @@ static int do_timer_settime(timer_t time if (!timr) return -EINVAL; @@ -163,7 +177,7 @@ Signed-off-by: Thomas Gleixner kc = timr->kclock; if (WARN_ON_ONCE(!kc || !kc->timer_set)) error = -EINVAL; -@@ -898,9 +913,12 @@ static int do_timer_settime(timer_t time +@@ -899,9 +916,12 @@ static int do_timer_settime(timer_t time unlock_timer(timr, flag); if (error == TIMER_RETRY) { @@ -176,7 +190,7 @@ Signed-off-by: Thomas Gleixner return error; } -@@ -982,10 +1000,15 @@ SYSCALL_DEFINE1(timer_delete, timer_t, t +@@ -983,10 +1003,15 @@ SYSCALL_DEFINE1(timer_delete, timer_t, t if (!timer) return -EINVAL; @@ -192,7 +206,7 @@ Signed-off-by: Thomas Gleixner spin_lock(¤t->sighand->siglock); list_del(&timer->list); -@@ -1011,8 +1034,18 @@ static void itimer_delete(struct k_itime +@@ -1012,8 +1037,18 @@ static void itimer_delete(struct k_itime retry_delete: spin_lock_irqsave(&timer->it_lock, flags); diff --git a/debian/patches/features/all/rt/ide-don-t-disable-interrupts-during-kmap_atomic.patch b/debian/patches/features/all/rt/ide-don-t-disable-interrupts-during-kmap_atomic.patch new file mode 100644 index 000000000..57b6e301e --- /dev/null +++ b/debian/patches/features/all/rt/ide-don-t-disable-interrupts-during-kmap_atomic.patch @@ -0,0 +1,53 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 10 Apr 2018 17:15:40 +0200 +Subject: [PATCH] ide: don't disable interrupts during kmap_atomic() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +ide_pio_bytes() disables interrupts around kmap_atomic(). This is a +leftover from the old kmap_atomic() implementation which relied on fixed +mapping slots, so the caller had to make sure that the same slot could not +be reused from an interrupting context. + +kmap_atomic() was changed to dynamic slots long ago and commit 1ec9c5ddc17a +("include/linux/highmem.h: remove the second argument of k[un]map_atomic()") +removed the slot assignements, but the callers were not checked for now +redundant interrupt disabling. + +Remove the conditional interrupt disable. + +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/ide/ide-taskfile.c | 8 -------- + 1 file changed, 8 deletions(-) + +--- a/drivers/ide/ide-taskfile.c ++++ b/drivers/ide/ide-taskfile.c +@@ -237,7 +237,6 @@ void ide_pio_bytes(ide_drive_t *drive, s + + while (len) { + unsigned nr_bytes = min(len, cursg->length - cmd->cursg_ofs); +- int page_is_high; + + page = sg_page(cursg); + offset = cursg->offset + cmd->cursg_ofs; +@@ -248,10 +247,6 @@ void ide_pio_bytes(ide_drive_t *drive, s + + nr_bytes = min_t(unsigned, nr_bytes, (PAGE_SIZE - offset)); + +- page_is_high = PageHighMem(page); +- if (page_is_high) +- local_irq_save(flags); +- + buf = kmap_atomic(page) + offset; + + cmd->nleft -= nr_bytes; +@@ -270,9 +265,6 @@ void ide_pio_bytes(ide_drive_t *drive, s + + kunmap_atomic(buf); + +- if (page_is_high) +- local_irq_restore(flags); +- + len -= nr_bytes; + } + } diff --git a/debian/patches/features/all/rt/ide-don-t-disable-interrupts-if-they-are-already-dis.patch b/debian/patches/features/all/rt/ide-don-t-disable-interrupts-if-they-are-already-dis.patch new file mode 100644 index 000000000..e1d8ff308 --- /dev/null +++ b/debian/patches/features/all/rt/ide-don-t-disable-interrupts-if-they-are-already-dis.patch @@ -0,0 +1,43 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 4 Apr 2018 21:42:28 +0200 +Subject: [PATCH] ide: Handle irq disabling consistently +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +ide_timer_expiry() disables interrupt at function entry when acquiring +hwif->lock. Before disabling the device interrupt it unlocks hwif->lock, +but interrupts stay disabled. After the call to disable_irq() interrupts +are disabled again, which is a pointless exercise. + +After the device irq handler has been invoked with interrupts disabled, +hwif->lock is acquired again with spin_lock_irq() because the device irq +handler might have reenabled interrupts. This is not documented and +confusing for the casual reader. + +Remove the redundant local_irq_disable() and add a comment which explains +why hwif->lock has to be reacquired with spin_lock_irq(). + +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/ide/ide-io.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/ide/ide-io.c ++++ b/drivers/ide/ide-io.c +@@ -659,8 +659,7 @@ void ide_timer_expiry (struct timer_list + spin_unlock(&hwif->lock); + /* disable_irq_nosync ?? */ + disable_irq(hwif->irq); +- /* local CPU only, as if we were handling an interrupt */ +- local_irq_disable(); ++ + if (hwif->polling) { + startstop = handler(drive); + } else if (drive_is_ready(drive)) { +@@ -679,6 +678,7 @@ void ide_timer_expiry (struct timer_list + startstop = ide_error(drive, "irq timeout", + hwif->tp_ops->read_status(hwif)); + } ++ /* Disable interrupts again, `handler' might have enabled it */ + spin_lock_irq(&hwif->lock); + enable_irq(hwif->irq); + if (startstop == ide_stopped && hwif->polling == 0) { diff --git a/debian/patches/features/all/rt/ide-don-t-enable-disable-interrupts-in-force-threade.patch b/debian/patches/features/all/rt/ide-don-t-enable-disable-interrupts-in-force-threade.patch new file mode 100644 index 000000000..304ca5ee4 --- /dev/null +++ b/debian/patches/features/all/rt/ide-don-t-enable-disable-interrupts-in-force-threade.patch @@ -0,0 +1,80 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 19 Apr 2018 11:22:55 +0200 +Subject: [PATCH] ide: don't enable/disable interrupts in force threaded-IRQ + mode +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +The interrupts are enabled/disabled so the interrupt handler can run +with enabled interrupts while serving the interrupt and not lose other +interrupts especially the timer tick. +If the system runs with force-threaded interrupts then there is no need +to enable the interrupts. + +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/ide/ide-iops.c | 13 +++++++++---- + drivers/ide/ide-taskfile.c | 2 +- + kernel/irq/manage.c | 1 + + 3 files changed, 11 insertions(+), 5 deletions(-) + +--- a/drivers/ide/ide-iops.c ++++ b/drivers/ide/ide-iops.c +@@ -108,6 +108,7 @@ int __ide_wait_stat(ide_drive_t *drive, + ide_hwif_t *hwif = drive->hwif; + const struct ide_tp_ops *tp_ops = hwif->tp_ops; + unsigned long flags; ++ bool irqs_threaded = force_irqthreads; + int i; + u8 stat; + +@@ -115,8 +116,10 @@ int __ide_wait_stat(ide_drive_t *drive, + stat = tp_ops->read_status(hwif); + + if (stat & ATA_BUSY) { +- local_save_flags(flags); +- local_irq_enable_in_hardirq(); ++ if (!irqs_threaded) { ++ local_save_flags(flags); ++ local_irq_enable_in_hardirq(); ++ } + timeout += jiffies; + while ((stat = tp_ops->read_status(hwif)) & ATA_BUSY) { + if (time_after(jiffies, timeout)) { +@@ -129,12 +132,14 @@ int __ide_wait_stat(ide_drive_t *drive, + if ((stat & ATA_BUSY) == 0) + break; + +- local_irq_restore(flags); ++ if (!irqs_threaded) ++ local_irq_restore(flags); + *rstat = stat; + return -EBUSY; + } + } +- local_irq_restore(flags); ++ if (!irqs_threaded) ++ local_irq_restore(flags); + } + /* + * Allow status to settle, then read it again. +--- a/drivers/ide/ide-taskfile.c ++++ b/drivers/ide/ide-taskfile.c +@@ -405,7 +405,7 @@ static ide_startstop_t pre_task_out_intr + return startstop; + } + +- if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0) ++ if (!force_irqthreads && (drive->dev_flags & IDE_DFLAG_UNMASK) == 0) + local_irq_disable(); + + ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE); +--- a/kernel/irq/manage.c ++++ b/kernel/irq/manage.c +@@ -25,6 +25,7 @@ + + #ifdef CONFIG_IRQ_FORCED_THREADING + __read_mostly bool force_irqthreads; ++EXPORT_SYMBOL_GPL(force_irqthreads); + + static int __init setup_forced_irqthreads(char *arg) + { diff --git a/debian/patches/features/all/rt/include-linux-u64_stats_sync.h-Remove-functions-with.patch b/debian/patches/features/all/rt/include-linux-u64_stats_sync.h-Remove-functions-with.patch new file mode 100644 index 000000000..c9e15025b --- /dev/null +++ b/debian/patches/features/all/rt/include-linux-u64_stats_sync.h-Remove-functions-with.patch @@ -0,0 +1,40 @@ +From: Anna-Maria Gleixner +Date: Wed, 4 Apr 2018 11:44:00 +0200 +Subject: [PATCH] net: u64_stats_sync: Remove functions without user +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Commit 67db3e4bfbc9 ("tcp: no longer hold ehash lock while calling +tcp_get_info()") removes the only users of u64_stats_update_end/begin_raw() +without removing the function in header file. + +Remove no longer used functions. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/u64_stats_sync.h | 14 -------------- + 1 file changed, 14 deletions(-) + +--- a/include/linux/u64_stats_sync.h ++++ b/include/linux/u64_stats_sync.h +@@ -112,20 +112,6 @@ u64_stats_update_end_irqrestore(struct u + #endif + } + +-static inline void u64_stats_update_begin_raw(struct u64_stats_sync *syncp) +-{ +-#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +- raw_write_seqcount_begin(&syncp->seq); +-#endif +-} +- +-static inline void u64_stats_update_end_raw(struct u64_stats_sync *syncp) +-{ +-#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +- raw_write_seqcount_end(&syncp->seq); +-#endif +-} +- + static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp) + { + #if BITS_PER_LONG==32 && defined(CONFIG_SMP) diff --git a/debian/patches/features/all/rt/iommu-amd-Cleanup-locking-in-__attach-detach_device.patch b/debian/patches/features/all/rt/iommu-amd-Cleanup-locking-in-__attach-detach_device.patch new file mode 100644 index 000000000..9c5397727 --- /dev/null +++ b/debian/patches/features/all/rt/iommu-amd-Cleanup-locking-in-__attach-detach_device.patch @@ -0,0 +1,169 @@ +From: Anna-Maria Gleixner +Date: Mon, 16 Apr 2018 16:15:24 +0200 +Subject: [PATCH] iommu/amd: Cleanup locking in __attach/detach_device() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Since introduction of the pd_bitmap_lock in commit 2bc001808904 +("iommu/amd: Split domain id out of amd_iommu_devtable_lock") +amd_iommu_devtable_lock is only taken around __detach_device() and +__attach_device() calls. + +The lock is not protecting anything as all operations are domain specific +and protected by domain->lock in __detach_device() and __attach_device(), +so amd_iommu_devtable_lock has no real purpose anymore. + +Lock domain->lock before calling into __detach_device() and +__attach_device() and simplify the implementation of those functions. Add +lockdep checks where appropriate. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/iommu/amd_iommu.c | 70 +++++++++------------------------------------- + 1 file changed, 15 insertions(+), 55 deletions(-) + +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -80,7 +80,6 @@ + */ + #define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) + +-static DEFINE_SPINLOCK(amd_iommu_devtable_lock); + static DEFINE_SPINLOCK(pd_bitmap_lock); + + /* List of all available dev_data structures */ +@@ -1884,6 +1883,8 @@ static void do_attach(struct iommu_dev_d + u16 alias; + bool ats; + ++ lockdep_assert_held(&domain->lock); ++ + iommu = amd_iommu_rlookup_table[dev_data->devid]; + alias = dev_data->alias; + ats = dev_data->ats.enabled; +@@ -1904,11 +1905,13 @@ static void do_attach(struct iommu_dev_d + device_flush_dte(dev_data); + } + +-static void do_detach(struct iommu_dev_data *dev_data) ++static void __detach_device(struct iommu_dev_data *dev_data) + { + struct amd_iommu *iommu; + u16 alias; + ++ lockdep_assert_held(&dev_data->domain->lock); ++ + iommu = amd_iommu_rlookup_table[dev_data->devid]; + alias = dev_data->alias; + +@@ -1934,32 +1937,13 @@ static void do_detach(struct iommu_dev_d + static int __attach_device(struct iommu_dev_data *dev_data, + struct protection_domain *domain) + { +- int ret; +- +- /* +- * Must be called with IRQs disabled. Warn here to detect early +- * when its not. +- */ +- WARN_ON(!irqs_disabled()); +- +- /* lock domain */ +- spin_lock(&domain->lock); +- +- ret = -EBUSY; + if (dev_data->domain != NULL) +- goto out_unlock; ++ return -EBUSY; + + /* Attach alias group root */ + do_attach(dev_data, domain); + +- ret = 0; +- +-out_unlock: +- +- /* ready */ +- spin_unlock(&domain->lock); +- +- return ret; ++ return 0; + } + + +@@ -2086,9 +2070,10 @@ static int attach_device(struct device * + } + + skip_ats_check: +- spin_lock_irqsave(&amd_iommu_devtable_lock, flags); ++ ++ spin_lock_irqsave(&domain->lock, flags); + ret = __attach_device(dev_data, domain); +- spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags); ++ spin_unlock_irqrestore(&domain->lock, flags); + + /* + * We might boot into a crash-kernel here. The crashed kernel +@@ -2101,29 +2086,7 @@ static int attach_device(struct device * + } + + /* +- * Removes a device from a protection domain (unlocked) +- */ +-static void __detach_device(struct iommu_dev_data *dev_data) +-{ +- struct protection_domain *domain; +- +- /* +- * Must be called with IRQs disabled. Warn here to detect early +- * when its not. +- */ +- WARN_ON(!irqs_disabled()); +- +- domain = dev_data->domain; +- +- spin_lock(&domain->lock); +- +- do_detach(dev_data); +- +- spin_unlock(&domain->lock); +-} +- +-/* +- * Removes a device from a protection domain (with devtable_lock held) ++ * Removes a device from a protection domain + */ + static void detach_device(struct device *dev) + { +@@ -2143,10 +2106,9 @@ static void detach_device(struct device + if (WARN_ON(!dev_data->domain)) + return; + +- /* lock device table */ +- spin_lock_irqsave(&amd_iommu_devtable_lock, flags); ++ spin_lock_irqsave(&domain->lock, flags); + __detach_device(dev_data); +- spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags); ++ spin_unlock_irqrestore(&domain->lock, flags); + + if (!dev_is_pci(dev)) + return; +@@ -2809,16 +2771,14 @@ static void cleanup_domain(struct protec + struct iommu_dev_data *entry; + unsigned long flags; + +- spin_lock_irqsave(&amd_iommu_devtable_lock, flags); +- ++ spin_lock_irqsave(&domain->lock, flags); + while (!list_empty(&domain->dev_list)) { + entry = list_first_entry(&domain->dev_list, + struct iommu_dev_data, list); + BUG_ON(!entry->domain); + __detach_device(entry); + } +- +- spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags); ++ spin_unlock_irqrestore(&domain->lock, flags); + } + + static void protection_domain_free(struct protection_domain *domain) diff --git a/debian/patches/features/all/rt/iommu-amd-Do-not-flush-when-device-is-busy.patch b/debian/patches/features/all/rt/iommu-amd-Do-not-flush-when-device-is-busy.patch new file mode 100644 index 000000000..8576a0831 --- /dev/null +++ b/debian/patches/features/all/rt/iommu-amd-Do-not-flush-when-device-is-busy.patch @@ -0,0 +1,85 @@ +From: Anna-Maria Gleixner +Date: Mon, 16 Apr 2018 16:15:25 +0200 +Subject: [PATCH] iommu/amd: Do not flush when device is busy +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +When device is already attached to a domain, there is no need to execute +the domain_flush_tlb_pde(). Therefore move the check if the domain is set +into attach_device(). + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/iommu/amd_iommu.c | 32 ++++++++++---------------------- + 1 file changed, 10 insertions(+), 22 deletions(-) + +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -1876,8 +1876,11 @@ static void clear_dte_entry(u16 devid) + amd_iommu_apply_erratum_63(devid); + } + +-static void do_attach(struct iommu_dev_data *dev_data, +- struct protection_domain *domain) ++/* ++ * This function does assigns the device visible for the hardware ++ */ ++static void __attach_device(struct iommu_dev_data *dev_data, ++ struct protection_domain *domain) + { + struct amd_iommu *iommu; + u16 alias; +@@ -1930,23 +1933,6 @@ static void __detach_device(struct iommu + device_flush_dte(dev_data); + } + +-/* +- * If a device is not yet associated with a domain, this function does +- * assigns it visible for the hardware +- */ +-static int __attach_device(struct iommu_dev_data *dev_data, +- struct protection_domain *domain) +-{ +- if (dev_data->domain != NULL) +- return -EBUSY; +- +- /* Attach alias group root */ +- do_attach(dev_data, domain); +- +- return 0; +-} +- +- + static void pdev_iommuv2_disable(struct pci_dev *pdev) + { + pci_disable_ats(pdev); +@@ -2043,7 +2029,6 @@ static int attach_device(struct device * + struct pci_dev *pdev; + struct iommu_dev_data *dev_data; + unsigned long flags; +- int ret; + + dev_data = get_dev_data(dev); + +@@ -2071,8 +2056,11 @@ static int attach_device(struct device * + + skip_ats_check: + ++ if (dev_data->domain != NULL) ++ return -EBUSY; ++ + spin_lock_irqsave(&domain->lock, flags); +- ret = __attach_device(dev_data, domain); ++ __attach_device(dev_data, domain); + spin_unlock_irqrestore(&domain->lock, flags); + + /* +@@ -2082,7 +2070,7 @@ static int attach_device(struct device * + */ + domain_flush_tlb_pde(domain); + +- return ret; ++ return 0; + } + + /* diff --git a/debian/patches/features/all/rt/iommu-amd-Prevent-possible-null-pointer-dereference-.patch b/debian/patches/features/all/rt/iommu-amd-Prevent-possible-null-pointer-dereference-.patch new file mode 100644 index 000000000..b3c894961 --- /dev/null +++ b/debian/patches/features/all/rt/iommu-amd-Prevent-possible-null-pointer-dereference-.patch @@ -0,0 +1,79 @@ +From: Anna-Maria Gleixner +Date: Mon, 16 Apr 2018 16:15:23 +0200 +Subject: [PATCH] iommu/amd: Prevent possible null pointer dereference and + infinite loop +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +The check for !dev_data->domain in __detach_device() emits a warning and +returns. The calling code in detach_device() dereferences dev_data->domain +afterwards unconditionally, so in case that dev_data->domain is NULL the +warning will be immediately followed by a NULL pointer dereference. + +The calling code in cleanup_domain() loops infinite when !dev_data->domain +and the check in __detach_device() returns immediately because dev_list is +not changed. + +do_detach() duplicates this check without throwing a warning. + +Move the check with the explanation of the do_detach() code into the caller +detach_device() and return immediately. Throw an error, when hitting the +condition in cleanup_domain(). + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/iommu/amd_iommu.c | 22 ++++++++++------------ + 1 file changed, 10 insertions(+), 12 deletions(-) + +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -1909,15 +1909,6 @@ static void do_detach(struct iommu_dev_d + struct amd_iommu *iommu; + u16 alias; + +- /* +- * First check if the device is still attached. It might already +- * be detached from its domain because the generic +- * iommu_detach_group code detached it and we try again here in +- * our alias handling. +- */ +- if (!dev_data->domain) +- return; +- + iommu = amd_iommu_rlookup_table[dev_data->devid]; + alias = dev_data->alias; + +@@ -2122,9 +2113,6 @@ static void __detach_device(struct iommu + */ + WARN_ON(!irqs_disabled()); + +- if (WARN_ON(!dev_data->domain)) +- return; +- + domain = dev_data->domain; + + spin_lock(&domain->lock); +@@ -2146,6 +2134,15 @@ static void detach_device(struct device + dev_data = get_dev_data(dev); + domain = dev_data->domain; + ++ /* ++ * First check if the device is still attached. It might already ++ * be detached from its domain because the generic ++ * iommu_detach_group code detached it and we try again here in ++ * our alias handling. ++ */ ++ if (WARN_ON(!dev_data->domain)) ++ return; ++ + /* lock device table */ + spin_lock_irqsave(&amd_iommu_devtable_lock, flags); + __detach_device(dev_data); +@@ -2817,6 +2814,7 @@ static void cleanup_domain(struct protec + while (!list_empty(&domain->dev_list)) { + entry = list_first_entry(&domain->dev_list, + struct iommu_dev_data, list); ++ BUG_ON(!entry->domain); + __detach_device(entry); + } + diff --git a/debian/patches/features/all/rt/iommu-amd-hide-unused-iommu_table_lock.patch b/debian/patches/features/all/rt/iommu-amd-hide-unused-iommu_table_lock.patch new file mode 100644 index 000000000..0e6971f4d --- /dev/null +++ b/debian/patches/features/all/rt/iommu-amd-hide-unused-iommu_table_lock.patch @@ -0,0 +1,38 @@ +From: Arnd Bergmann +Date: Wed, 4 Apr 2018 12:56:59 +0200 +Subject: [PATCH] iommu: amd: hide unused iommu_table_lock +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +The newly introduced lock is only used when CONFIG_IRQ_REMAP is enabled: + +drivers/iommu/amd_iommu.c:86:24: error: 'iommu_table_lock' defined but not used [-Werror=unused-variable] + static DEFINE_SPINLOCK(iommu_table_lock); + +This moves the definition next to the user, within the #ifdef protected +section of the file. + +Fixes: ea6166f4b83e ("iommu/amd: Split irq_lookup_table out of the amd_iommu_devtable_lock") +Signed-off-by: Arnd Bergmann +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/iommu/amd_iommu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -82,7 +82,6 @@ + + static DEFINE_SPINLOCK(amd_iommu_devtable_lock); + static DEFINE_SPINLOCK(pd_bitmap_lock); +-static DEFINE_SPINLOCK(iommu_table_lock); + + /* List of all available dev_data structures */ + static LLIST_HEAD(dev_data_list); +@@ -3573,6 +3572,7 @@ EXPORT_SYMBOL(amd_iommu_device_info); + *****************************************************************************/ + + static struct irq_chip amd_ir_chip; ++static DEFINE_SPINLOCK(iommu_table_lock); + + static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table) + { diff --git a/debian/patches/features/all/rt/irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch b/debian/patches/features/all/rt/irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch index f8ec01d5b..5854f7b97 100644 --- a/debian/patches/features/all/rt/irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch +++ b/debian/patches/features/all/rt/irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch @@ -1,7 +1,7 @@ Subject: genirq: Allow disabling of softirq processing in irq thread context From: Thomas Gleixner Date: Tue, 31 Jan 2012 13:01:27 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The processing of softirqs in irq thread context is a performance gain for the non-rt workloads of a system, but it's counterproductive for @@ -65,7 +65,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c -@@ -922,7 +922,15 @@ irq_forced_thread_fn(struct irq_desc *de +@@ -924,7 +924,15 @@ irq_forced_thread_fn(struct irq_desc *de local_bh_disable(); ret = action->thread_fn(action->irq, action->dev_id); irq_finalize_oneshot(desc, action); @@ -82,7 +82,7 @@ Signed-off-by: Thomas Gleixner return ret; } -@@ -1410,6 +1418,9 @@ static int +@@ -1427,6 +1435,9 @@ static int irqd_set(&desc->irq_data, IRQD_NO_BALANCING); } @@ -129,7 +129,7 @@ Signed-off-by: Thomas Gleixner return desc->status_use_accessors & _IRQ_PER_CPU; --- a/kernel/softirq.c +++ b/kernel/softirq.c -@@ -589,6 +589,15 @@ void __local_bh_enable(void) +@@ -590,6 +590,15 @@ void __local_bh_enable(void) } EXPORT_SYMBOL(__local_bh_enable); diff --git a/debian/patches/features/all/rt/irqwork-Move-irq-safe-work-to-irq-context.patch b/debian/patches/features/all/rt/irqwork-Move-irq-safe-work-to-irq-context.patch index 16e85a63f..30bc2a677 100644 --- a/debian/patches/features/all/rt/irqwork-Move-irq-safe-work-to-irq-context.patch +++ b/debian/patches/features/all/rt/irqwork-Move-irq-safe-work-to-irq-context.patch @@ -1,7 +1,7 @@ Subject: irqwork: Move irq safe work to irq context From: Thomas Gleixner Date: Sun, 15 Nov 2015 18:40:17 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz On architectures where arch_irq_work_has_interrupt() returns false, we end up running the irq safe work from the softirq context. That @@ -23,7 +23,7 @@ Cc: stable-rt@vger.kernel.org --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h -@@ -53,4 +53,10 @@ static inline bool irq_work_needs_cpu(vo +@@ -54,4 +54,10 @@ static inline bool irq_work_needs_cpu(vo static inline void irq_work_run(void) { } #endif @@ -36,7 +36,7 @@ Cc: stable-rt@vger.kernel.org #endif /* _LINUX_IRQ_WORK_H */ --- a/kernel/irq_work.c +++ b/kernel/irq_work.c -@@ -200,8 +200,17 @@ void irq_work_tick(void) +@@ -203,8 +203,17 @@ void irq_work_tick(void) if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) irq_work_run_list(raised); @@ -56,7 +56,7 @@ Cc: stable-rt@vger.kernel.org * Synchronize against the irq_work @entry, ensures the entry is not --- a/kernel/time/timer.c +++ b/kernel/time/timer.c -@@ -1638,7 +1638,7 @@ void update_process_times(int user_tick) +@@ -1676,7 +1676,7 @@ void update_process_times(int user_tick) scheduler_tick(); run_local_timers(); rcu_check_callbacks(user_tick); @@ -65,7 +65,7 @@ Cc: stable-rt@vger.kernel.org if (in_irq()) irq_work_tick(); #endif -@@ -1679,9 +1679,7 @@ static __latent_entropy void run_timer_s +@@ -1717,9 +1717,7 @@ static __latent_entropy void run_timer_s { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); diff --git a/debian/patches/features/all/rt/irqwork-push_most_work_into_softirq_context.patch b/debian/patches/features/all/rt/irqwork-push_most_work_into_softirq_context.patch index 1ee2358fd..aee88228d 100644 --- a/debian/patches/features/all/rt/irqwork-push_most_work_into_softirq_context.patch +++ b/debian/patches/features/all/rt/irqwork-push_most_work_into_softirq_context.patch @@ -1,7 +1,7 @@ Subject: irqwork: push most work into softirq context From: Sebastian Andrzej Siewior Date: Tue, 23 Jun 2015 15:32:51 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Initially we defered all irqwork into softirq because we didn't want the latency spikes if perf or another user was busy and delayed the RT task. @@ -20,23 +20,24 @@ Mike Galbraith, Signed-off-by: Sebastian Andrzej Siewior --- - include/linux/irq_work.h | 1 + - kernel/irq_work.c | 47 ++++++++++++++++++++++++++++++++++------------- - kernel/sched/topology.c | 1 + - kernel/time/tick-sched.c | 1 + - kernel/time/timer.c | 5 ++++- - 5 files changed, 41 insertions(+), 14 deletions(-) + include/linux/irq_work.h | 2 + + kernel/irq_work.c | 51 +++++++++++++++++++++++++++++++++++------------ + kernel/sched/topology.c | 1 + kernel/time/tick-sched.c | 1 + kernel/time/timer.c | 5 +++- + 5 files changed, 46 insertions(+), 14 deletions(-) --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h -@@ -17,6 +17,7 @@ - #define IRQ_WORK_BUSY 2UL - #define IRQ_WORK_FLAGS 3UL - #define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */ -+#define IRQ_WORK_HARD_IRQ 8UL /* Run hard IRQ context, even on RT */ +@@ -18,6 +18,8 @@ + + /* Doesn't want IPI, wait for tick: */ + #define IRQ_WORK_LAZY BIT(2) ++/* Run hard IRQ context, even on RT */ ++#define IRQ_WORK_HARD_IRQ BIT(3) + + #define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY) - struct irq_work { - unsigned long flags; --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -17,6 +17,7 @@ @@ -47,7 +48,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include -@@ -65,6 +66,8 @@ void __weak arch_irq_work_raise(void) +@@ -64,6 +65,8 @@ void __weak arch_irq_work_raise(void) */ bool irq_work_queue_on(struct irq_work *work, int cpu) { @@ -56,7 +57,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* All work should have been flushed before going offline */ WARN_ON_ONCE(cpu_is_offline(cpu)); -@@ -75,7 +78,12 @@ bool irq_work_queue_on(struct irq_work * +@@ -76,7 +79,12 @@ bool irq_work_queue_on(struct irq_work * if (!irq_work_claim(work)) return false; @@ -69,8 +70,8 @@ Signed-off-by: Sebastian Andrzej Siewior + if (llist_add(&work->llnode, list)) arch_send_call_function_single_ipi(cpu); - return true; -@@ -86,6 +94,9 @@ EXPORT_SYMBOL_GPL(irq_work_queue_on); + #else /* #ifdef CONFIG_SMP */ +@@ -89,6 +97,9 @@ bool irq_work_queue_on(struct irq_work * /* Enqueue the irq work @work on the current CPU */ bool irq_work_queue(struct irq_work *work) { @@ -80,7 +81,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* Only queue if not already pending */ if (!irq_work_claim(work)) return false; -@@ -93,13 +104,15 @@ bool irq_work_queue(struct irq_work *wor +@@ -96,13 +107,15 @@ bool irq_work_queue(struct irq_work *wor /* Queue the entry and raise the IPI if needed. */ preempt_disable(); @@ -103,7 +104,7 @@ Signed-off-by: Sebastian Andrzej Siewior arch_irq_work_raise(); } -@@ -116,9 +129,8 @@ bool irq_work_needs_cpu(void) +@@ -119,9 +132,8 @@ bool irq_work_needs_cpu(void) raised = this_cpu_ptr(&raised_list); lazy = this_cpu_ptr(&lazy_list); @@ -115,16 +116,21 @@ Signed-off-by: Sebastian Andrzej Siewior /* All work should have been flushed before going offline */ WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); -@@ -132,7 +144,7 @@ static void irq_work_run_list(struct lli - struct irq_work *work; +@@ -135,8 +147,12 @@ static void irq_work_run_list(struct lli struct llist_node *llnode; + unsigned long flags; -- BUG_ON(!irqs_disabled()); -+ BUG_ON_NONRT(!irqs_disabled()); - ++#ifndef CONFIG_PREEMPT_RT_FULL ++ /* ++ * nort: On RT IRQ-work may run in SOFTIRQ context. ++ */ + BUG_ON(!irqs_disabled()); +- ++#endif if (llist_empty(list)) return; -@@ -169,7 +181,16 @@ static void irq_work_run_list(struct lli + +@@ -168,7 +184,16 @@ static void irq_work_run_list(struct lli void irq_work_run(void) { irq_work_run_list(this_cpu_ptr(&raised_list)); @@ -144,7 +150,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c -@@ -273,6 +273,7 @@ static int init_rootdomain(struct root_d +@@ -287,6 +287,7 @@ static int init_rootdomain(struct root_d rd->rto_cpu = -1; raw_spin_lock_init(&rd->rto_lock); init_irq_work(&rd->rto_push_work, rto_push_irq_work_func); @@ -164,7 +170,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* --- a/kernel/time/timer.c +++ b/kernel/time/timer.c -@@ -1638,7 +1638,7 @@ void update_process_times(int user_tick) +@@ -1676,7 +1676,7 @@ void update_process_times(int user_tick) scheduler_tick(); run_local_timers(); rcu_check_callbacks(user_tick); @@ -173,7 +179,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (in_irq()) irq_work_tick(); #endif -@@ -1679,6 +1679,9 @@ static __latent_entropy void run_timer_s +@@ -1717,6 +1717,9 @@ static __latent_entropy void run_timer_s { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); diff --git a/debian/patches/features/all/rt/jump-label-rt.patch b/debian/patches/features/all/rt/jump-label-rt.patch index 4cf014b1c..4867300c7 100644 --- a/debian/patches/features/all/rt/jump-label-rt.patch +++ b/debian/patches/features/all/rt/jump-label-rt.patch @@ -1,7 +1,7 @@ Subject: jump-label: disable if stop_machine() is used From: Thomas Gleixner Date: Wed, 08 Jul 2015 17:14:48 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Some architectures are using stop_machine() while switching the opcode which leads to latency spikes. @@ -25,7 +25,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig -@@ -45,7 +45,7 @@ config ARM +@@ -47,7 +47,7 @@ config ARM select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 diff --git a/debian/patches/features/all/rt/kconfig-disable-a-few-options-rt.patch b/debian/patches/features/all/rt/kconfig-disable-a-few-options-rt.patch index 8e9419e40..e75f813d3 100644 --- a/debian/patches/features/all/rt/kconfig-disable-a-few-options-rt.patch +++ b/debian/patches/features/all/rt/kconfig-disable-a-few-options-rt.patch @@ -1,7 +1,7 @@ Subject: kconfig: Disable config options which are not RT compatible From: Thomas Gleixner Date: Sun, 24 Jul 2011 12:11:43 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Disable stuff which is known to have issues on RT diff --git a/debian/patches/features/all/rt/kconfig-preempt-rt-full.patch b/debian/patches/features/all/rt/kconfig-preempt-rt-full.patch index a2687ba51..4eec2bd77 100644 --- a/debian/patches/features/all/rt/kconfig-preempt-rt-full.patch +++ b/debian/patches/features/all/rt/kconfig-preempt-rt-full.patch @@ -1,7 +1,7 @@ Subject: kconfig: Add PREEMPT_RT_FULL From: Thomas Gleixner Date: Wed, 29 Jun 2011 14:58:57 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Introduce the final symbol for PREEMPT_RT_FULL. @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/init/Makefile +++ b/init/Makefile -@@ -36,4 +36,4 @@ mounts-$(CONFIG_BLK_DEV_MD) += do_mounts +@@ -34,4 +34,4 @@ mounts-$(CONFIG_BLK_DEV_MD) += do_mounts include/generated/compile.h: FORCE @$($(quiet)chk_compile.h) $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ @@ -49,7 +49,7 @@ Signed-off-by: Thomas Gleixner vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; } -@@ -58,6 +59,7 @@ UTS_VERSION="#$VERSION" +@@ -53,6 +54,7 @@ UTS_VERSION="#$VERSION" CONFIG_FLAGS="" if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi diff --git a/debian/patches/features/all/rt/kernel-SRCU-provide-a-static-initializer.patch b/debian/patches/features/all/rt/kernel-SRCU-provide-a-static-initializer.patch index 2131b2b73..bf150e3e1 100644 --- a/debian/patches/features/all/rt/kernel-SRCU-provide-a-static-initializer.patch +++ b/debian/patches/features/all/rt/kernel-SRCU-provide-a-static-initializer.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Tue, 19 Mar 2013 14:44:30 +0100 Subject: kernel/SRCU: provide a static initializer -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz There are macros for static initializer for the three out of four possible notifier types, that are: @@ -142,7 +142,7 @@ Signed-off-by: Sebastian Andrzej Siewior { \ - .sda = &name##_srcu_data, \ + .sda = &pcpu_name, \ - .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ .srcu_gp_seq_needed = 0 - 1, \ __SRCU_DEP_MAP_INIT(name) \ @@ -133,7 +133,7 @@ struct srcu_struct { diff --git a/debian/patches/features/all/rt/kernel-printk-Don-t-try-to-print-from-IRQ-NMI-region.patch b/debian/patches/features/all/rt/kernel-printk-Don-t-try-to-print-from-IRQ-NMI-region.patch index 6f021b898..bf48d2e0f 100644 --- a/debian/patches/features/all/rt/kernel-printk-Don-t-try-to-print-from-IRQ-NMI-region.patch +++ b/debian/patches/features/all/rt/kernel-printk-Don-t-try-to-print-from-IRQ-NMI-region.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 19 May 2016 17:45:27 +0200 Subject: [PATCH] kernel/printk: Don't try to print from IRQ/NMI region -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz On -RT we try to acquire sleeping locks which might lead to warnings from lockdep or a warn_on() from spin_try_lock() (which is a rtmutex on @@ -16,7 +16,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -1617,6 +1617,11 @@ static void call_console_drivers(const c +@@ -1771,6 +1771,11 @@ static void call_console_drivers(const c if (!console_drivers) return; @@ -28,7 +28,7 @@ Signed-off-by: Sebastian Andrzej Siewior migrate_disable(); for_each_console(con) { if (exclusive_console && con != exclusive_console) -@@ -2349,6 +2354,11 @@ void console_unblank(void) +@@ -2532,6 +2537,11 @@ void console_unblank(void) { struct console *c; diff --git a/debian/patches/features/all/rt/kernel-sched-Provide-a-pointer-to-the-valid-CPU-mask.patch b/debian/patches/features/all/rt/kernel-sched-Provide-a-pointer-to-the-valid-CPU-mask.patch index 224b6129a..7eb9b3ef8 100644 --- a/debian/patches/features/all/rt/kernel-sched-Provide-a-pointer-to-the-valid-CPU-mask.patch +++ b/debian/patches/features/all/rt/kernel-sched-Provide-a-pointer-to-the-valid-CPU-mask.patch @@ -4,7 +4,7 @@ Subject: [PATCH] kernel: sched: Provide a pointer to the valid CPU mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz In commit 4b53a3412d66 ("sched/core: Remove the tsk_nr_cpus_allowed() wrapper") the tsk_nr_cpus_allowed() wrapper was removed. There was not @@ -68,20 +68,20 @@ Signed-off-by: Sebastian Andrzej Siewior drivers/infiniband/hw/hfi1/sdma.c | 3 -- drivers/infiniband/hw/qib/qib_file_ops.c | 7 ++-- fs/proc/array.c | 4 +- - include/linux/init_task.h | 3 +- include/linux/sched.h | 5 ++- + init/init_task.c | 3 +- kernel/cgroup/cpuset.c | 2 - kernel/fork.c | 3 +- kernel/sched/core.c | 42 ++++++++++++++--------------- kernel/sched/cpudeadline.c | 4 +- kernel/sched/cpupri.c | 4 +- kernel/sched/deadline.c | 6 ++-- - kernel/sched/fair.c | 28 +++++++++---------- + kernel/sched/fair.c | 32 +++++++++++----------- kernel/sched/rt.c | 4 +- kernel/trace/trace_hwlat.c | 2 - lib/smp_processor_id.c | 2 - samples/trace_events/trace-events-sample.c | 2 - - 24 files changed, 78 insertions(+), 77 deletions(-) + 24 files changed, 80 insertions(+), 79 deletions(-) --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -127,7 +127,7 @@ Signed-off-by: Sebastian Andrzej Siewior out_unlock: --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c -@@ -1192,12 +1192,12 @@ static void mt_ase_fp_affinity(void) +@@ -1199,12 +1199,12 @@ static void mt_ase_fp_affinity(void) * restricted the allowed set to exclude any CPUs with FPUs, * we'll skip the procedure. */ @@ -234,7 +234,7 @@ Signed-off-by: Sebastian Andrzej Siewior cpumask_pr_args(proc_mask)); --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c -@@ -856,14 +856,13 @@ struct sdma_engine *sdma_select_user_eng +@@ -855,14 +855,13 @@ struct sdma_engine *sdma_select_user_eng { struct sdma_rht_node *rht_node; struct sdma_engine *sde = NULL; @@ -252,7 +252,7 @@ Signed-off-by: Sebastian Andrzej Siewior cpu_id = smp_processor_id(); --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c -@@ -1163,7 +1163,7 @@ static unsigned int qib_poll(struct file +@@ -1138,7 +1138,7 @@ static __poll_t qib_poll(struct file *fp static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd) { struct qib_filedata *fd = fp->private_data; @@ -261,7 +261,7 @@ Signed-off-by: Sebastian Andrzej Siewior const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus); int local_cpu; -@@ -1644,9 +1644,8 @@ static int qib_assign_ctxt(struct file * +@@ -1619,9 +1619,8 @@ static int qib_assign_ctxt(struct file * ret = find_free_ctxt(i_minor - 1, fp, uinfo); else { int unit; @@ -286,22 +286,10 @@ Signed-off-by: Sebastian Andrzej Siewior + cpumask_pr_args(task->cpus_ptr)); } - int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, ---- a/include/linux/init_task.h -+++ b/include/linux/init_task.h -@@ -234,7 +234,8 @@ extern struct cred init_cred; - .static_prio = MAX_PRIO-20, \ - .normal_prio = MAX_PRIO-20, \ - .policy = SCHED_NORMAL, \ -- .cpus_allowed = CPU_MASK_ALL, \ -+ .cpus_ptr = &tsk.cpus_mask, \ -+ .cpus_mask = CPU_MASK_ALL, \ - .nr_cpus_allowed= NR_CPUS, \ - .mm = NULL, \ - .active_mm = &init_mm, \ + static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm) --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -578,7 +578,8 @@ struct task_struct { +@@ -590,7 +590,8 @@ struct task_struct { unsigned int policy; int nr_cpus_allowed; @@ -311,7 +299,7 @@ Signed-off-by: Sebastian Andrzej Siewior #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; -@@ -1315,7 +1316,7 @@ extern struct pid *cad_pid; +@@ -1316,7 +1317,7 @@ extern struct pid *cad_pid; #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ @@ -320,9 +308,21 @@ Signed-off-by: Sebastian Andrzej Siewior #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ +--- a/init/init_task.c ++++ b/init/init_task.c +@@ -63,7 +63,8 @@ struct task_struct init_task + .static_prio = MAX_PRIO - 20, + .normal_prio = MAX_PRIO - 20, + .policy = SCHED_NORMAL, +- .cpus_allowed = CPU_MASK_ALL, ++ .cpus_ptr = &init_task.cpus_mask, ++ .cpus_mask = CPU_MASK_ALL, + .nr_cpus_allowed= NR_CPUS, + .mm = NULL, + .active_mm = &init_mm, --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c -@@ -2094,7 +2094,7 @@ static void cpuset_fork(struct task_stru +@@ -2089,7 +2089,7 @@ static void cpuset_fork(struct task_stru if (task_css_is_root(task, cpuset_cgrp_id)) return; @@ -333,7 +333,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -564,7 +564,8 @@ static struct task_struct *dup_task_stru +@@ -813,7 +813,8 @@ static struct task_struct *dup_task_stru #ifdef CONFIG_CC_STACKPROTECTOR tsk->stack_canary = get_random_canary(); #endif @@ -345,7 +345,7 @@ Signed-off-by: Sebastian Andrzej Siewior * parent) --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -959,7 +959,7 @@ static struct rq *__migrate_task(struct +@@ -970,7 +970,7 @@ static struct rq *__migrate_task(struct } /* Affinity changed (again). */ @@ -354,7 +354,7 @@ Signed-off-by: Sebastian Andrzej Siewior return rq; update_rq_clock(rq); -@@ -987,7 +987,7 @@ static int migration_cpu_stop(void *data +@@ -998,7 +998,7 @@ static int migration_cpu_stop(void *data local_irq_disable(); /* * We need to explicitly wake pending tasks before running @@ -363,7 +363,7 @@ Signed-off-by: Sebastian Andrzej Siewior * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. */ sched_ttwu_pending(); -@@ -1018,7 +1018,7 @@ static int migration_cpu_stop(void *data +@@ -1029,7 +1029,7 @@ static int migration_cpu_stop(void *data */ void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) { @@ -372,7 +372,7 @@ Signed-off-by: Sebastian Andrzej Siewior p->nr_cpus_allowed = cpumask_weight(new_mask); } -@@ -1088,7 +1088,7 @@ static int __set_cpus_allowed_ptr(struct +@@ -1099,7 +1099,7 @@ static int __set_cpus_allowed_ptr(struct goto out; } @@ -381,7 +381,7 @@ Signed-off-by: Sebastian Andrzej Siewior goto out; if (!cpumask_intersects(new_mask, cpu_valid_mask)) { -@@ -1249,10 +1249,10 @@ static int migrate_swap_stop(void *data) +@@ -1260,10 +1260,10 @@ static int migrate_swap_stop(void *data) if (task_cpu(arg->src_task) != arg->src_cpu) goto unlock; @@ -394,7 +394,7 @@ Signed-off-by: Sebastian Andrzej Siewior goto unlock; __migrate_swap_task(arg->src_task, arg->dst_cpu); -@@ -1293,10 +1293,10 @@ int migrate_swap(struct task_struct *cur +@@ -1304,10 +1304,10 @@ int migrate_swap(struct task_struct *cur if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu)) goto out; @@ -407,7 +407,7 @@ Signed-off-by: Sebastian Andrzej Siewior goto out; trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu); -@@ -1440,7 +1440,7 @@ void kick_process(struct task_struct *p) +@@ -1451,7 +1451,7 @@ void kick_process(struct task_struct *p) EXPORT_SYMBOL_GPL(kick_process); /* @@ -416,7 +416,7 @@ Signed-off-by: Sebastian Andrzej Siewior * * A few notes on cpu_active vs cpu_online: * -@@ -1480,14 +1480,14 @@ static int select_fallback_rq(int cpu, s +@@ -1491,14 +1491,14 @@ static int select_fallback_rq(int cpu, s for_each_cpu(dest_cpu, nodemask) { if (!cpu_active(dest_cpu)) continue; @@ -433,7 +433,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu)) continue; if (!cpu_online(dest_cpu)) -@@ -1532,7 +1532,7 @@ static int select_fallback_rq(int cpu, s +@@ -1543,7 +1543,7 @@ static int select_fallback_rq(int cpu, s } /* @@ -442,7 +442,7 @@ Signed-off-by: Sebastian Andrzej Siewior */ static inline int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) -@@ -1542,11 +1542,11 @@ int select_task_rq(struct task_struct *p +@@ -1553,11 +1553,11 @@ int select_task_rq(struct task_struct *p if (p->nr_cpus_allowed > 1) cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); else @@ -456,7 +456,7 @@ Signed-off-by: Sebastian Andrzej Siewior * CPU. * * Since this is common to all placement strategies, this lives here. -@@ -1554,7 +1554,7 @@ int select_task_rq(struct task_struct *p +@@ -1565,7 +1565,7 @@ int select_task_rq(struct task_struct *p * [ this allows ->select_task() to simply return task_cpu(p) and * not worry about this generic constraint ] */ @@ -465,7 +465,7 @@ Signed-off-by: Sebastian Andrzej Siewior !cpu_online(cpu))) cpu = select_fallback_rq(task_cpu(p), p); -@@ -2444,7 +2444,7 @@ void wake_up_new_task(struct task_struct +@@ -2455,7 +2455,7 @@ void wake_up_new_task(struct task_struct #ifdef CONFIG_SMP /* * Fork balancing, do it here and not earlier because: @@ -474,7 +474,7 @@ Signed-off-by: Sebastian Andrzej Siewior * - any previously selected CPU might disappear through hotplug * * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, -@@ -4161,7 +4161,7 @@ static int __sched_setscheduler(struct t +@@ -4239,7 +4239,7 @@ static int __sched_setscheduler(struct t * the entire root_domain to become SCHED_DEADLINE. We * will also fail if there's no bandwidth available. */ @@ -483,7 +483,7 @@ Signed-off-by: Sebastian Andrzej Siewior rq->rd->dl_bw.bw == 0) { task_rq_unlock(rq, p, &rf); return -EPERM; -@@ -4755,7 +4755,7 @@ long sched_getaffinity(pid_t pid, struct +@@ -4838,7 +4838,7 @@ long sched_getaffinity(pid_t pid, struct goto out_unlock; raw_spin_lock_irqsave(&p->pi_lock, flags); @@ -492,7 +492,7 @@ Signed-off-by: Sebastian Andrzej Siewior raw_spin_unlock_irqrestore(&p->pi_lock, flags); out_unlock: -@@ -5320,7 +5320,7 @@ int task_can_attach(struct task_struct * +@@ -5428,7 +5428,7 @@ int task_can_attach(struct task_struct * * allowed nodes is unnecessary. Thus, cpusets are not * applicable for such threads. This prevents checking for * success of set_cpus_allowed_ptr() on all attached tasks @@ -501,7 +501,7 @@ Signed-off-by: Sebastian Andrzej Siewior */ if (p->flags & PF_NO_SETAFFINITY) { ret = -EINVAL; -@@ -5347,7 +5347,7 @@ int migrate_task_to(struct task_struct * +@@ -5455,7 +5455,7 @@ int migrate_task_to(struct task_struct * if (curr_cpu == target_cpu) return 0; @@ -510,7 +510,7 @@ Signed-off-by: Sebastian Andrzej Siewior return -EINVAL; /* TODO: This is not properly updating schedstats */ -@@ -5484,7 +5484,7 @@ static void migrate_tasks(struct rq *dea +@@ -5592,7 +5592,7 @@ static void migrate_tasks(struct rq *dea put_prev_task(rq, next); /* @@ -555,7 +555,7 @@ Signed-off-by: Sebastian Andrzej Siewior * We have to ensure that we have at least one bit --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c -@@ -504,7 +504,7 @@ static struct rq *dl_task_offline_migrat +@@ -541,7 +541,7 @@ static struct rq *dl_task_offline_migrat * If we cannot preempt any rq, fall back to pick any * online cpu. */ @@ -564,7 +564,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (cpu >= nr_cpu_ids) { /* * Fail to find any suitable cpu. -@@ -1749,7 +1749,7 @@ static void set_curr_task_dl(struct rq * +@@ -1816,7 +1816,7 @@ static void set_curr_task_dl(struct rq * static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && @@ -573,7 +573,7 @@ Signed-off-by: Sebastian Andrzej Siewior return 1; return 0; } -@@ -1899,7 +1899,7 @@ static struct rq *find_lock_later_rq(str +@@ -1966,7 +1966,7 @@ static struct rq *find_lock_later_rq(str /* Retry if something changed. */ if (double_lock_balance(rq, later_rq)) { if (unlikely(task_rq(task) != rq || @@ -584,7 +584,7 @@ Signed-off-by: Sebastian Andrzej Siewior !task_on_rq_queued(task))) { --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c -@@ -1596,7 +1596,7 @@ static void task_numa_compare(struct tas +@@ -1588,7 +1588,7 @@ static void task_numa_compare(struct tas */ if (cur) { /* Skip this swap candidate if cannot move to the source cpu */ @@ -593,7 +593,7 @@ Signed-off-by: Sebastian Andrzej Siewior goto unlock; /* -@@ -1706,7 +1706,7 @@ static void task_numa_find_cpu(struct ta +@@ -1698,7 +1698,7 @@ static void task_numa_find_cpu(struct ta for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) { /* Skip this CPU if the source task cannot migrate */ @@ -602,7 +602,7 @@ Signed-off-by: Sebastian Andrzej Siewior continue; env->dst_cpu = cpu; -@@ -5475,7 +5475,7 @@ find_idlest_group(struct sched_domain *s +@@ -5811,7 +5811,7 @@ find_idlest_group(struct sched_domain *s /* Skip over this group if it has no CPUs allowed */ if (!cpumask_intersects(sched_group_span(group), @@ -611,7 +611,7 @@ Signed-off-by: Sebastian Andrzej Siewior continue; local_group = cpumask_test_cpu(this_cpu, -@@ -5595,7 +5595,7 @@ find_idlest_cpu(struct sched_group *grou +@@ -5931,7 +5931,7 @@ find_idlest_group_cpu(struct sched_group return cpumask_first(sched_group_span(group)); /* Traverse only the allowed CPUs */ @@ -620,7 +620,16 @@ Signed-off-by: Sebastian Andrzej Siewior if (idle_cpu(i)) { struct rq *rq = cpu_rq(i); struct cpuidle_state *idle = idle_get_state(rq); -@@ -5697,7 +5697,7 @@ static int select_idle_core(struct task_ +@@ -5971,7 +5971,7 @@ static inline int find_idlest_cpu(struct + { + int new_cpu = cpu; + +- if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed)) ++ if (!cpumask_intersects(sched_domain_span(sd), p->cpus_ptr)) + return prev_cpu; + + while (sd) { +@@ -6080,7 +6080,7 @@ static int select_idle_core(struct task_ if (!test_idle_cores(target, false)) return -1; @@ -629,7 +638,7 @@ Signed-off-by: Sebastian Andrzej Siewior for_each_cpu_wrap(core, cpus, target) { bool idle = true; -@@ -5731,7 +5731,7 @@ static int select_idle_smt(struct task_s +@@ -6114,7 +6114,7 @@ static int select_idle_smt(struct task_s return -1; for_each_cpu(cpu, cpu_smt_mask(target)) { @@ -638,7 +647,7 @@ Signed-off-by: Sebastian Andrzej Siewior continue; if (idle_cpu(cpu)) return cpu; -@@ -5794,7 +5794,7 @@ static int select_idle_cpu(struct task_s +@@ -6177,7 +6177,7 @@ static int select_idle_cpu(struct task_s for_each_cpu_wrap(cpu, sched_domain_span(sd), target) { if (!--nr) return -1; @@ -647,7 +656,16 @@ Signed-off-by: Sebastian Andrzej Siewior continue; if (idle_cpu(cpu)) break; -@@ -5949,7 +5949,7 @@ select_task_rq_fair(struct task_struct * +@@ -6214,7 +6214,7 @@ static int select_idle_sibling(struct ta + recent_used_cpu != target && + cpus_share_cache(recent_used_cpu, target) && + idle_cpu(recent_used_cpu) && +- cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) { ++ cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) { + /* + * Replace recent_used_cpu with prev as it is a potential + * candidate for the next wake. +@@ -6347,7 +6347,7 @@ select_task_rq_fair(struct task_struct * if (sd_flag & SD_BALANCE_WAKE) { record_wakee(p); want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) @@ -656,7 +674,7 @@ Signed-off-by: Sebastian Andrzej Siewior } rcu_read_lock(); -@@ -6698,14 +6698,14 @@ int can_migrate_task(struct task_struct +@@ -7095,14 +7095,14 @@ int can_migrate_task(struct task_struct /* * We do not migrate tasks that are: * 1) throttled_lb_pair, or @@ -673,7 +691,7 @@ Signed-off-by: Sebastian Andrzej Siewior int cpu; schedstat_inc(p->se.statistics.nr_failed_migrations_affine); -@@ -6725,7 +6725,7 @@ int can_migrate_task(struct task_struct +@@ -7122,7 +7122,7 @@ int can_migrate_task(struct task_struct /* Prevent to re-select dst_cpu via env's cpus */ for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) { @@ -682,7 +700,7 @@ Signed-off-by: Sebastian Andrzej Siewior env->flags |= LBF_DST_PINNED; env->new_dst_cpu = cpu; break; -@@ -7294,7 +7294,7 @@ check_cpu_capacity(struct rq *rq, struct +@@ -7692,7 +7692,7 @@ check_cpu_capacity(struct rq *rq, struct /* * Group imbalance indicates (and tries to solve) the problem where balancing @@ -691,7 +709,7 @@ Signed-off-by: Sebastian Andrzej Siewior * * Imagine a situation of two groups of 4 cpus each and 4 tasks each with a * cpumask covering 1 cpu of the first group and 3 cpus of the second group. -@@ -7870,7 +7870,7 @@ static struct sched_group *find_busiest_ +@@ -8268,7 +8268,7 @@ static struct sched_group *find_busiest_ /* * If the busiest group is imbalanced the below checks don't * work because they assume all things are equal, which typically @@ -700,7 +718,7 @@ Signed-off-by: Sebastian Andrzej Siewior */ if (busiest->group_type == group_imbalanced) goto force_balance; -@@ -8262,7 +8262,7 @@ static int load_balance(int this_cpu, st +@@ -8663,7 +8663,7 @@ static int load_balance(int this_cpu, st * if the curr task on busiest cpu can't be * moved to this_cpu */ @@ -711,7 +729,7 @@ Signed-off-by: Sebastian Andrzej Siewior env.flags |= LBF_ALL_PINNED; --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c -@@ -1592,7 +1592,7 @@ static void put_prev_task_rt(struct rq * +@@ -1594,7 +1594,7 @@ static void put_prev_task_rt(struct rq * static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && @@ -720,7 +738,7 @@ Signed-off-by: Sebastian Andrzej Siewior return 1; return 0; } -@@ -1727,7 +1727,7 @@ static struct rq *find_lock_lowest_rq(st +@@ -1729,7 +1729,7 @@ static struct rq *find_lock_lowest_rq(st * Also make sure that it wasn't scheduled on its rq. */ if (unlikely(task_rq(task) != rq || @@ -742,7 +760,7 @@ Signed-off-by: Sebastian Andrzej Siewior get_online_cpus(); --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c -@@ -23,7 +23,7 @@ notrace static unsigned int check_preemp +@@ -22,7 +22,7 @@ notrace static unsigned int check_preemp * Kernel threads bound to a single CPU can safely use * smp_processor_id(): */ diff --git a/debian/patches/features/all/rt/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch b/debian/patches/features/all/rt/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch index 9febbc7bc..897a4b694 100644 --- a/debian/patches/features/all/rt/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch +++ b/debian/patches/features/all/rt/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch @@ -2,7 +2,7 @@ From: Sebastian Andrzej Siewior Date: Mon, 21 Nov 2016 19:31:08 +0100 Subject: [PATCH] kernel/sched: move stack + kprobe clean up to __put_task_struct() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz There is no need to free the stack before the task struct. This also comes handy on -RT because we can't free memory in preempt disabled @@ -25,7 +25,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include #include #include -@@ -417,6 +418,15 @@ void __put_task_struct(struct task_struc +@@ -661,6 +662,15 @@ void __put_task_struct(struct task_struc WARN_ON(atomic_read(&tsk->usage)); WARN_ON(tsk == current); @@ -43,7 +43,7 @@ Signed-off-by: Sebastian Andrzej Siewior security_task_free(tsk); --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -2705,15 +2705,6 @@ static struct rq *finish_task_switch(str +@@ -2778,15 +2778,6 @@ static struct rq *finish_task_switch(str if (prev->sched_class->task_dead) prev->sched_class->task_dead(prev); diff --git a/debian/patches/features/all/rt/kernel-signal-Remove-no-longer-required-irqsave-rest.patch b/debian/patches/features/all/rt/kernel-signal-Remove-no-longer-required-irqsave-rest.patch new file mode 100644 index 000000000..4dd1a00f8 --- /dev/null +++ b/debian/patches/features/all/rt/kernel-signal-Remove-no-longer-required-irqsave-rest.patch @@ -0,0 +1,69 @@ +From: Anna-Maria Gleixner +Date: Wed, 4 Apr 2018 11:44:01 +0200 +Subject: [PATCH] kernel/signal: Remove no longer required irqsave/restore +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Commit a841796f11c9 ("signal: align __lock_task_sighand() irq disabling and +RCU") introduced a rcu read side critical section with interrupts +disabled. The changelog suggested that a better long-term fix would be "to +make rt_mutex_unlock() disable irqs when acquiring the rt_mutex structure's +->wait_lock". + +This long-term fix has been made in commit 4abf91047cf ("rtmutex: Make > +wait_lock irq safe") for different reason. + +Therefore revert commit a841796f11c9 ("signal: align > +__lock_task_sighand() irq disabling and RCU") as the interrupt disable +dance is not longer required. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/signal.c | 24 +++++++----------------- + 1 file changed, 7 insertions(+), 17 deletions(-) + +--- a/kernel/signal.c ++++ b/kernel/signal.c +@@ -1244,19 +1244,12 @@ struct sighand_struct *__lock_task_sigha + { + struct sighand_struct *sighand; + ++ rcu_read_lock(); + for (;;) { +- /* +- * Disable interrupts early to avoid deadlocks. +- * See rcu_read_unlock() comment header for details. +- */ +- local_irq_save(*flags); +- rcu_read_lock(); + sighand = rcu_dereference(tsk->sighand); +- if (unlikely(sighand == NULL)) { +- rcu_read_unlock(); +- local_irq_restore(*flags); ++ if (unlikely(sighand == NULL)) + break; +- } ++ + /* + * This sighand can be already freed and even reused, but + * we rely on SLAB_TYPESAFE_BY_RCU and sighand_ctor() which +@@ -1268,15 +1261,12 @@ struct sighand_struct *__lock_task_sigha + * __exit_signal(). In the latter case the next iteration + * must see ->sighand == NULL. + */ +- spin_lock(&sighand->siglock); +- if (likely(sighand == tsk->sighand)) { +- rcu_read_unlock(); ++ spin_lock_irqsave(&sighand->siglock, *flags); ++ if (likely(sighand == tsk->sighand)) + break; +- } +- spin_unlock(&sighand->siglock); +- rcu_read_unlock(); +- local_irq_restore(*flags); ++ spin_unlock_irqrestore(&sighand->siglock, *flags); + } ++ rcu_read_unlock(); + + return sighand; + } diff --git a/debian/patches/features/all/rt/kernel-softirq-unlock-with-irqs-on.patch b/debian/patches/features/all/rt/kernel-softirq-unlock-with-irqs-on.patch index bcb5347e5..7170a62b1 100644 --- a/debian/patches/features/all/rt/kernel-softirq-unlock-with-irqs-on.patch +++ b/debian/patches/features/all/rt/kernel-softirq-unlock-with-irqs-on.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Tue, 9 Feb 2016 18:17:18 +0100 Subject: kernel: softirq: unlock with irqs on -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz We unlock the lock while the interrupts are off. This isn't a problem now but will get because the migrate_disable() + enable are not @@ -14,7 +14,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/softirq.c +++ b/kernel/softirq.c -@@ -560,8 +560,10 @@ static void do_current_softirqs(void) +@@ -561,8 +561,10 @@ static void do_current_softirqs(void) do_single_softirq(i); } softirq_clr_runner(i); diff --git a/debian/patches/features/all/rt/kernel-user-Use-irqsave-variant-of-atomic_dec_and_lo.patch b/debian/patches/features/all/rt/kernel-user-Use-irqsave-variant-of-atomic_dec_and_lo.patch new file mode 100644 index 000000000..56eec54de --- /dev/null +++ b/debian/patches/features/all/rt/kernel-user-Use-irqsave-variant-of-atomic_dec_and_lo.patch @@ -0,0 +1,30 @@ +From: Anna-Maria Gleixner +Date: Wed, 4 Apr 2018 11:43:57 +0200 +Subject: [PATCH] kernel/user: Use irqsave variant of atomic_dec_and_lock() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +The irqsave variant of atomic_dec_and_lock handles irqsave/restore when +taking/releasing the spin lock. With this variant the call of +local_irq_save/restore is no longer required. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/user.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +--- a/kernel/user.c ++++ b/kernel/user.c +@@ -169,11 +169,8 @@ void free_uid(struct user_struct *up) + if (!up) + return; + +- local_irq_save(flags); +- if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) ++ if (atomic_dec_and_lock_irqsave(&up->__count, &uidhash_lock, flags)) + free_user(up, flags); +- else +- local_irq_restore(flags); + } + + struct user_struct *alloc_uid(kuid_t uid) diff --git a/debian/patches/features/all/rt/kgb-serial-hackaround.patch b/debian/patches/features/all/rt/kgb-serial-hackaround.patch index 0afe96406..106c8dcc1 100644 --- a/debian/patches/features/all/rt/kgb-serial-hackaround.patch +++ b/debian/patches/features/all/rt/kgb-serial-hackaround.patch @@ -1,7 +1,7 @@ From: Jason Wessel Date: Thu, 28 Jul 2011 12:42:23 -0500 Subject: kgdb/serial: Short term workaround -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz On 07/27/2011 04:37 PM, Thomas Gleixner wrote: > - KGDB (not yet disabled) is reportedly unusable on -rt right now due @@ -26,7 +26,7 @@ Jason. --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c -@@ -35,6 +35,7 @@ +@@ -31,6 +31,7 @@ #include #include #include diff --git a/debian/patches/features/all/rt/leds-trigger-disable-CPU-trigger-on-RT.patch b/debian/patches/features/all/rt/leds-trigger-disable-CPU-trigger-on-RT.patch index e9d95d13e..4bc08910f 100644 --- a/debian/patches/features/all/rt/leds-trigger-disable-CPU-trigger-on-RT.patch +++ b/debian/patches/features/all/rt/leds-trigger-disable-CPU-trigger-on-RT.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 23 Jan 2014 14:45:59 +0100 Subject: leds: trigger: disable CPU trigger on -RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz as it triggers: |CPU: 0 PID: 0 Comm: swapper Not tainted 3.12.8-rt10 #141 diff --git a/debian/patches/features/all/rt/libata-remove-ata_sff_data_xfer_noirq.patch b/debian/patches/features/all/rt/libata-remove-ata_sff_data_xfer_noirq.patch new file mode 100644 index 000000000..8e05cc284 --- /dev/null +++ b/debian/patches/features/all/rt/libata-remove-ata_sff_data_xfer_noirq.patch @@ -0,0 +1,197 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 19 Apr 2018 12:55:14 +0200 +Subject: [PATCH] libata: remove ata_sff_data_xfer_noirq() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +ata_sff_data_xfer_noirq() is invoked via the ->sff_data_xfer hook. The +latter is invoked by ata_pio_sector(), atapi_send_cdb() and +__atapi_pio_bytes() which in turn is invoked by ata_sff_hsm_move(). +The latter function requires that the "ap->lock" lock is held which +needs to be taken with disabled interrupts. + +There is no need have to have ata_sff_data_xfer_noirq() which invokes +ata_sff_data_xfer32() with disabled interrupts because at this point the +interrupts are already disabled. +Remove the function and its references to it and replace all callers +with ata_sff_data_xfer32(). + +Signed-off-by: Sebastian Andrzej Siewior +--- + Documentation/driver-api/libata.rst | 3 +-- + drivers/ata/libata-sff.c | 30 ------------------------------ + drivers/ata/pata_cmd640.c | 2 +- + drivers/ata/pata_icside.c | 2 +- + drivers/ata/pata_imx.c | 2 +- + drivers/ata/pata_legacy.c | 6 +++--- + drivers/ata/pata_palmld.c | 2 +- + drivers/ata/pata_pcmcia.c | 2 +- + drivers/ata/pata_platform.c | 2 +- + drivers/ata/pata_via.c | 2 +- + include/linux/libata.h | 2 -- + 11 files changed, 11 insertions(+), 44 deletions(-) + +--- a/Documentation/driver-api/libata.rst ++++ b/Documentation/driver-api/libata.rst +@@ -118,8 +118,7 @@ PIO data read/write + All bmdma-style drivers must implement this hook. This is the low-level + operation that actually copies the data bytes during a PIO data + transfer. Typically the driver will choose one of +-:c:func:`ata_sff_data_xfer_noirq`, :c:func:`ata_sff_data_xfer`, or +-:c:func:`ata_sff_data_xfer32`. ++:c:func:`ata_sff_data_xfer`, or :c:func:`ata_sff_data_xfer32`. + + ATA command execute + ~~~~~~~~~~~~~~~~~~~ +--- a/drivers/ata/libata-sff.c ++++ b/drivers/ata/libata-sff.c +@@ -658,36 +658,6 @@ unsigned int ata_sff_data_xfer32(struct + EXPORT_SYMBOL_GPL(ata_sff_data_xfer32); + + /** +- * ata_sff_data_xfer_noirq - Transfer data by PIO +- * @qc: queued command +- * @buf: data buffer +- * @buflen: buffer length +- * @rw: read/write +- * +- * Transfer data from/to the device data register by PIO. Do the +- * transfer with interrupts disabled. +- * +- * LOCKING: +- * Inherited from caller. +- * +- * RETURNS: +- * Bytes consumed. +- */ +-unsigned int ata_sff_data_xfer_noirq(struct ata_queued_cmd *qc, unsigned char *buf, +- unsigned int buflen, int rw) +-{ +- unsigned long flags; +- unsigned int consumed; +- +- local_irq_save(flags); +- consumed = ata_sff_data_xfer32(qc, buf, buflen, rw); +- local_irq_restore(flags); +- +- return consumed; +-} +-EXPORT_SYMBOL_GPL(ata_sff_data_xfer_noirq); +- +-/** + * ata_pio_sector - Transfer a sector of data. + * @qc: Command on going + * +--- a/drivers/ata/pata_cmd640.c ++++ b/drivers/ata/pata_cmd640.c +@@ -178,7 +178,7 @@ static struct scsi_host_template cmd640_ + static struct ata_port_operations cmd640_port_ops = { + .inherits = &ata_sff_port_ops, + /* In theory xfer_noirq is not needed once we kill the prefetcher */ +- .sff_data_xfer = ata_sff_data_xfer_noirq, ++ .sff_data_xfer = ata_sff_data_xfer32, + .sff_irq_check = cmd640_sff_irq_check, + .qc_issue = cmd640_qc_issue, + .cable_detect = ata_cable_40wire, +--- a/drivers/ata/pata_icside.c ++++ b/drivers/ata/pata_icside.c +@@ -324,7 +324,7 @@ static struct ata_port_operations pata_i + .inherits = &ata_bmdma_port_ops, + /* no need to build any PRD tables for DMA */ + .qc_prep = ata_noop_qc_prep, +- .sff_data_xfer = ata_sff_data_xfer_noirq, ++ .sff_data_xfer = ata_sff_data_xfer32, + .bmdma_setup = pata_icside_bmdma_setup, + .bmdma_start = pata_icside_bmdma_start, + .bmdma_stop = pata_icside_bmdma_stop, +--- a/drivers/ata/pata_imx.c ++++ b/drivers/ata/pata_imx.c +@@ -102,7 +102,7 @@ static struct scsi_host_template pata_im + + static struct ata_port_operations pata_imx_port_ops = { + .inherits = &ata_sff_port_ops, +- .sff_data_xfer = ata_sff_data_xfer_noirq, ++ .sff_data_xfer = ata_sff_data_xfer32, + .cable_detect = ata_cable_unknown, + .set_piomode = pata_imx_set_piomode, + }; +--- a/drivers/ata/pata_legacy.c ++++ b/drivers/ata/pata_legacy.c +@@ -246,12 +246,12 @@ static const struct ata_port_operations + + static struct ata_port_operations simple_port_ops = { + .inherits = &legacy_base_port_ops, +- .sff_data_xfer = ata_sff_data_xfer_noirq, ++ .sff_data_xfer = ata_sff_data_xfer32, + }; + + static struct ata_port_operations legacy_port_ops = { + .inherits = &legacy_base_port_ops, +- .sff_data_xfer = ata_sff_data_xfer_noirq, ++ .sff_data_xfer = ata_sff_data_xfer32, + .set_mode = legacy_set_mode, + }; + +@@ -341,7 +341,7 @@ static unsigned int pdc_data_xfer_vlb(st + } + local_irq_restore(flags); + } else +- buflen = ata_sff_data_xfer_noirq(qc, buf, buflen, rw); ++ buflen = ata_sff_data_xfer32(qc, buf, buflen, rw); + + return buflen; + } +--- a/drivers/ata/pata_palmld.c ++++ b/drivers/ata/pata_palmld.c +@@ -44,7 +44,7 @@ static struct scsi_host_template palmld_ + + static struct ata_port_operations palmld_port_ops = { + .inherits = &ata_sff_port_ops, +- .sff_data_xfer = ata_sff_data_xfer_noirq, ++ .sff_data_xfer = ata_sff_data_xfer32, + .cable_detect = ata_cable_40wire, + }; + +--- a/drivers/ata/pata_pcmcia.c ++++ b/drivers/ata/pata_pcmcia.c +@@ -151,7 +151,7 @@ static struct scsi_host_template pcmcia_ + + static struct ata_port_operations pcmcia_port_ops = { + .inherits = &ata_sff_port_ops, +- .sff_data_xfer = ata_sff_data_xfer_noirq, ++ .sff_data_xfer = ata_sff_data_xfer32, + .cable_detect = ata_cable_40wire, + .set_mode = pcmcia_set_mode, + }; +--- a/drivers/ata/pata_platform.c ++++ b/drivers/ata/pata_platform.c +@@ -49,7 +49,7 @@ static struct scsi_host_template pata_pl + + static struct ata_port_operations pata_platform_port_ops = { + .inherits = &ata_sff_port_ops, +- .sff_data_xfer = ata_sff_data_xfer_noirq, ++ .sff_data_xfer = ata_sff_data_xfer32, + .cable_detect = ata_cable_unknown, + .set_mode = pata_platform_set_mode, + }; +--- a/drivers/ata/pata_via.c ++++ b/drivers/ata/pata_via.c +@@ -471,7 +471,7 @@ static struct ata_port_operations via_po + + static struct ata_port_operations via_port_ops_noirq = { + .inherits = &via_port_ops, +- .sff_data_xfer = ata_sff_data_xfer_noirq, ++ .sff_data_xfer = ata_sff_data_xfer32, + }; + + /** +--- a/include/linux/libata.h ++++ b/include/linux/libata.h +@@ -1831,8 +1831,6 @@ extern unsigned int ata_sff_data_xfer(st + unsigned char *buf, unsigned int buflen, int rw); + extern unsigned int ata_sff_data_xfer32(struct ata_queued_cmd *qc, + unsigned char *buf, unsigned int buflen, int rw); +-extern unsigned int ata_sff_data_xfer_noirq(struct ata_queued_cmd *qc, +- unsigned char *buf, unsigned int buflen, int rw); + extern void ata_sff_irq_on(struct ata_port *ap); + extern void ata_sff_irq_clear(struct ata_port *ap); + extern int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, diff --git a/debian/patches/features/all/rt/list_bl-fixup-bogus-lockdep-warning.patch b/debian/patches/features/all/rt/list_bl-fixup-bogus-lockdep-warning.patch index bbdb4e5f9..e6e8187d6 100644 --- a/debian/patches/features/all/rt/list_bl-fixup-bogus-lockdep-warning.patch +++ b/debian/patches/features/all/rt/list_bl-fixup-bogus-lockdep-warning.patch @@ -1,7 +1,7 @@ From: Josh Cartwright Date: Thu, 31 Mar 2016 00:04:25 -0500 Subject: [PATCH] list_bl: fixup bogus lockdep warning -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz At first glance, the use of 'static inline' seems appropriate for INIT_HLIST_BL_HEAD(). diff --git a/debian/patches/features/all/rt/list_bl.h-make-list-head-locking-RT-safe.patch b/debian/patches/features/all/rt/list_bl.h-make-list-head-locking-RT-safe.patch index 351bdd705..a02a2909a 100644 --- a/debian/patches/features/all/rt/list_bl.h-make-list-head-locking-RT-safe.patch +++ b/debian/patches/features/all/rt/list_bl.h-make-list-head-locking-RT-safe.patch @@ -1,7 +1,7 @@ From: Paul Gortmaker Date: Fri, 21 Jun 2013 15:07:25 -0400 Subject: list_bl: Make list head locking RT safe -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz As per changes in include/linux/jbd_common.h for avoiding the bit_spin_locks on RT ("fs: jbd/jbd2: Make state lock and journal diff --git a/debian/patches/features/all/rt/localversion.patch b/debian/patches/features/all/rt/localversion.patch index 2142d5e1d..9574abb31 100644 --- a/debian/patches/features/all/rt/localversion.patch +++ b/debian/patches/features/all/rt/localversion.patch @@ -1,7 +1,7 @@ Subject: Add localversion for -RT release From: Thomas Gleixner Date: Fri, 08 Jul 2011 20:25:16 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Signed-off-by: Thomas Gleixner --- @@ -11,4 +11,4 @@ Signed-off-by: Thomas Gleixner --- /dev/null +++ b/localversion-rt @@ -0,0 +1 @@ -+-rt13 ++-rt1 diff --git a/debian/patches/features/all/rt/lockdep-Add-a-assert_in_softirq.patch b/debian/patches/features/all/rt/lockdep-Add-a-assert_in_softirq.patch new file mode 100644 index 000000000..053a610c6 --- /dev/null +++ b/debian/patches/features/all/rt/lockdep-Add-a-assert_in_softirq.patch @@ -0,0 +1,34 @@ +From: Anna-Maria Gleixner +Date: Tue, 10 Apr 2018 11:37:11 +0200 +Subject: [PATCH] lockdep: Add a assert_in_softirq() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Instead of directly warn on wrong context, check if softirq context is +set. This check could be a nop on RT. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/lockdep.h | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/include/linux/lockdep.h ++++ b/include/linux/lockdep.h +@@ -608,11 +608,17 @@ do { \ + "IRQs not disabled as expected\n"); \ + } while (0) + ++#define lockdep_assert_in_softirq() do { \ ++ WARN_ONCE(debug_locks && !current->lockdep_recursion && \ ++ !current->softirq_context, \ ++ "Not in softirq context as expected\n"); \ ++ } while (0) + #else + # define might_lock(lock) do { } while (0) + # define might_lock_read(lock) do { } while (0) + # define lockdep_assert_irqs_enabled() do { } while (0) + # define lockdep_assert_irqs_disabled() do { } while (0) ++# define lockdep_assert_in_softirq() do { } while (0) + #endif + + #ifdef CONFIG_LOCKDEP diff --git a/debian/patches/features/all/rt/lockdep-disable-self-test.patch b/debian/patches/features/all/rt/lockdep-disable-self-test.patch index 10dd3693a..5fa3f355b 100644 --- a/debian/patches/features/all/rt/lockdep-disable-self-test.patch +++ b/debian/patches/features/all/rt/lockdep-disable-self-test.patch @@ -4,7 +4,7 @@ Subject: [PATCH] lockdep: disable self-test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The self-test wasn't always 100% accurate for RT. We disabled a few tests which failed because they had a different semantic for RT. Some @@ -18,7 +18,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug -@@ -1199,7 +1199,7 @@ config DEBUG_ATOMIC_SLEEP +@@ -1187,7 +1187,7 @@ config DEBUG_ATOMIC_SLEEP config DEBUG_LOCKING_API_SELFTESTS bool "Locking API boot-time self-tests" diff --git a/debian/patches/features/all/rt/lockdep-no-softirq-accounting-on-rt.patch b/debian/patches/features/all/rt/lockdep-no-softirq-accounting-on-rt.patch index d7500a71d..3d17a2feb 100644 --- a/debian/patches/features/all/rt/lockdep-no-softirq-accounting-on-rt.patch +++ b/debian/patches/features/all/rt/lockdep-no-softirq-accounting-on-rt.patch @@ -1,63 +1,79 @@ Subject: lockdep: Make it RT aware From: Thomas Gleixner Date: Sun, 17 Jul 2011 18:51:23 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz teach lockdep that we don't really do softirqs on -RT. Signed-off-by: Thomas Gleixner --- - include/linux/irqflags.h | 26 +++++++++++++++----------- + include/linux/irqflags.h | 23 +++++++++++++++-------- + include/linux/lockdep.h | 7 ++++++- kernel/locking/lockdep.c | 2 ++ - 2 files changed, 17 insertions(+), 11 deletions(-) + 3 files changed, 23 insertions(+), 9 deletions(-) --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h -@@ -34,16 +34,6 @@ do { \ +@@ -32,14 +32,6 @@ do { \ + do { \ current->hardirq_context--; \ - crossrelease_hist_end(XHLOCK_HARD); \ } while (0) -# define lockdep_softirq_enter() \ -do { \ - current->softirq_context++; \ -- crossrelease_hist_start(XHLOCK_SOFT); \ -} while (0) -# define lockdep_softirq_exit() \ -do { \ - current->softirq_context--; \ -- crossrelease_hist_end(XHLOCK_SOFT); \ -} while (0) - # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, #else # define trace_hardirqs_on() do { } while (0) -@@ -56,9 +46,23 @@ do { \ - # define trace_softirqs_enabled(p) 0 - # define trace_hardirq_enter() do { } while (0) - # define trace_hardirq_exit() do { } while (0) -+# define INIT_TRACE_IRQFLAGS -+#endif + # define trace_hardirqs_off() do { } while (0) +@@ -54,6 +46,21 @@ do { \ + # define lockdep_softirq_enter() do { } while (0) + # define lockdep_softirq_exit() do { } while (0) + #endif + +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL) +# define lockdep_softirq_enter() \ +do { \ + current->softirq_context++; \ -+ crossrelease_hist_start(XHLOCK_SOFT); \ +} while (0) +# define lockdep_softirq_exit() \ +do { \ + current->softirq_context--; \ -+ crossrelease_hist_end(XHLOCK_SOFT); \ +} while (0) ++ +#else - # define lockdep_softirq_enter() do { } while (0) - # define lockdep_softirq_exit() do { } while (0) --# define INIT_TRACE_IRQFLAGS - #endif ++# define lockdep_softirq_enter() do { } while (0) ++# define lockdep_softirq_exit() do { } while (0) ++#endif #if defined(CONFIG_IRQSOFF_TRACER) || \ + defined(CONFIG_PREEMPT_TRACER) +--- a/include/linux/lockdep.h ++++ b/include/linux/lockdep.h +@@ -608,11 +608,16 @@ do { \ + "IRQs not disabled as expected\n"); \ + } while (0) + +-#define lockdep_assert_in_softirq() do { \ ++#ifdef CONFIG_PREEMPT_RT_FULL ++# define lockdep_assert_in_softirq() do { } while (0) ++#else ++# define lockdep_assert_in_softirq() do { \ + WARN_ONCE(debug_locks && !current->lockdep_recursion && \ + !current->softirq_context, \ + "Not in softirq context as expected\n"); \ + } while (0) ++#endif ++ + #else + # define might_lock(lock) do { } while (0) + # define might_lock_read(lock) do { } while (0) --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c -@@ -3917,6 +3917,7 @@ static void check_flags(unsigned long fl +@@ -3843,6 +3843,7 @@ static void check_flags(unsigned long fl } } @@ -65,7 +81,7 @@ Signed-off-by: Thomas Gleixner /* * We dont accurately track softirq state in e.g. * hardirq contexts (such as on 4KSTACKS), so only -@@ -3931,6 +3932,7 @@ static void check_flags(unsigned long fl +@@ -3857,6 +3858,7 @@ static void check_flags(unsigned long fl DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); } } diff --git a/debian/patches/features/all/rt/lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch b/debian/patches/features/all/rt/lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch index a200505f1..0fccab567 100644 --- a/debian/patches/features/all/rt/lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch +++ b/debian/patches/features/all/rt/lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch @@ -1,7 +1,7 @@ From: Josh Cartwright Date: Wed, 28 Jan 2015 13:08:45 -0600 Subject: lockdep: selftest: fix warnings due to missing PREEMPT_RT conditionals -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz "lockdep: Selftest: Only do hardirq context test for raw spinlock" disabled the execution of certain tests with PREEMPT_RT_FULL, but did diff --git a/debian/patches/features/all/rt/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch b/debian/patches/features/all/rt/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch index 096fcf9f6..d3daa64f3 100644 --- a/debian/patches/features/all/rt/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch +++ b/debian/patches/features/all/rt/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch @@ -1,7 +1,7 @@ Subject: lockdep: selftest: Only do hardirq context test for raw spinlock From: Yong Zhang Date: Mon, 16 Apr 2012 15:01:56 +0800 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz From: Yong Zhang diff --git a/debian/patches/features/all/rt/locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch b/debian/patches/features/all/rt/locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch index 41f32229d..bdf1cde1a 100644 --- a/debian/patches/features/all/rt/locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch +++ b/debian/patches/features/all/rt/locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch @@ -2,7 +2,7 @@ From: Sebastian Andrzej Siewior Date: Fri, 4 Aug 2017 17:40:42 +0200 Subject: [PATCH 1/2] locking: don't check for __LINUX_SPINLOCK_TYPES_H on -RT archs -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Upstream uses arch_spinlock_t within spinlock_t and requests that spinlock_types.h header file is included first. diff --git a/debian/patches/features/all/rt/locking-locktorture-Do-NOT-include-rwlock.h-directly.patch b/debian/patches/features/all/rt/locking-locktorture-Do-NOT-include-rwlock.h-directly.patch index b3ecfaaed..135de7fa2 100644 --- a/debian/patches/features/all/rt/locking-locktorture-Do-NOT-include-rwlock.h-directly.patch +++ b/debian/patches/features/all/rt/locking-locktorture-Do-NOT-include-rwlock.h-directly.patch @@ -1,7 +1,7 @@ From: "Wolfgang M. Reimer" Date: Tue, 21 Jul 2015 16:20:07 +0200 Subject: locking: locktorture: Do NOT include rwlock.h directly -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Including rwlock.h directly will cause kernel builds to fail if CONFIG_PREEMPT_RT_FULL is defined. The correct header file diff --git a/debian/patches/features/all/rt/locking-rt-mutex-fix-deadlock-in-device-mapper-block.patch b/debian/patches/features/all/rt/locking-rt-mutex-fix-deadlock-in-device-mapper-block.patch index 4b18f8a0a..c143cfc5a 100644 --- a/debian/patches/features/all/rt/locking-rt-mutex-fix-deadlock-in-device-mapper-block.patch +++ b/debian/patches/features/all/rt/locking-rt-mutex-fix-deadlock-in-device-mapper-block.patch @@ -1,7 +1,7 @@ From: Mikulas Patocka Date: Mon, 13 Nov 2017 12:56:53 -0500 Subject: [PATCH] locking/rt-mutex: fix deadlock in device mapper / block-IO -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz When some block device driver creates a bio and submits it to another block device driver, the bio is added to current->bio_list (in order to @@ -46,7 +46,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include "rtmutex_common.h" -@@ -1933,6 +1934,15 @@ rt_mutex_fastlock(struct rt_mutex *lock, +@@ -1919,6 +1920,15 @@ rt_mutex_fastlock(struct rt_mutex *lock, if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) return 0; @@ -62,7 +62,7 @@ Signed-off-by: Sebastian Andrzej Siewior return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK, ww_ctx); } -@@ -1950,6 +1960,9 @@ rt_mutex_timed_fastlock(struct rt_mutex +@@ -1936,6 +1946,9 @@ rt_mutex_timed_fastlock(struct rt_mutex likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) return 0; diff --git a/debian/patches/features/all/rt/locking-rtmutex-Handle-non-enqueued-waiters-graceful.patch b/debian/patches/features/all/rt/locking-rtmutex-Handle-non-enqueued-waiters-graceful.patch new file mode 100644 index 000000000..023e0e918 --- /dev/null +++ b/debian/patches/features/all/rt/locking-rtmutex-Handle-non-enqueued-waiters-graceful.patch @@ -0,0 +1,65 @@ +From: Peter Zijlstra +Date: Tue, 27 Mar 2018 14:14:38 +0200 +Subject: [PATCH] locking/rtmutex: Handle non enqueued waiters gracefully in + remove_waiter() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit c28d62cf52d791ba5f6db7ce525ed06b86291c82 + +In -RT task_blocks_on_rt_mutex() may return with -EAGAIN due to +(->pi_blocked_on == PI_WAKEUP_INPROGRESS) before it added itself as a +waiter. In such a case remove_waiter() must not be called because without a +waiter it will trigger the BUG_ON() statement. + +This was initially reported by Yimin Deng. Thomas Gleixner fixed it then +with an explicit check for waiters before calling remove_waiter(). + +Instead of an explicit NULL check before calling rt_mutex_top_waiter() make +the function return NULL if there are no waiters. With that fixed the now +pointless NULL check is removed from rt_mutex_slowlock(). + +Reported-and-debugged-by: Yimin Deng +Suggested-by: Thomas Gleixner +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Thomas Gleixner +Link: https://lkml.kernel.org/r/CAAh1qt=DCL9aUXNxanP5BKtiPp3m+qj4yB+gDohhXPVFCxWwzg@mail.gmail.com +Link: https://lkml.kernel.org/r/20180327121438.sss7hxg3crqy4ecd@linutronix.de +--- + kernel/locking/rtmutex.c | 3 +-- + kernel/locking/rtmutex_common.h | 11 ++++++----- + 2 files changed, 7 insertions(+), 7 deletions(-) + +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1268,8 +1268,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, + + if (unlikely(ret)) { + __set_current_state(TASK_RUNNING); +- if (rt_mutex_has_waiters(lock)) +- remove_waiter(lock, &waiter); ++ remove_waiter(lock, &waiter); + rt_mutex_handle_deadlock(ret, chwalk, &waiter); + } + +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -52,12 +52,13 @@ static inline int rt_mutex_has_waiters(s + static inline struct rt_mutex_waiter * + rt_mutex_top_waiter(struct rt_mutex *lock) + { +- struct rt_mutex_waiter *w; +- +- w = rb_entry(lock->waiters.rb_leftmost, +- struct rt_mutex_waiter, tree_entry); +- BUG_ON(w->lock != lock); ++ struct rb_node *leftmost = rb_first_cached(&lock->waiters); ++ struct rt_mutex_waiter *w = NULL; + ++ if (leftmost) { ++ w = rb_entry(leftmost, struct rt_mutex_waiter, tree_entry); ++ BUG_ON(w->lock != lock); ++ } + return w; + } + diff --git a/debian/patches/features/all/rt/locking-rtmutex-don-t-drop-the-wait_lock-twice.patch b/debian/patches/features/all/rt/locking-rtmutex-don-t-drop-the-wait_lock-twice.patch index f4e0f839e..7d7f07801 100644 --- a/debian/patches/features/all/rt/locking-rtmutex-don-t-drop-the-wait_lock-twice.patch +++ b/debian/patches/features/all/rt/locking-rtmutex-don-t-drop-the-wait_lock-twice.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 7 Sep 2017 12:38:47 +0200 Subject: locking/rtmutex: don't drop the wait_lock twice -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Since the futex rework, __rt_mutex_start_proxy_lock() does no longer acquire the wait_lock so it must not drop it. Otherwise the lock is not diff --git a/debian/patches/features/all/rt/locking-rtmutex-re-init-the-wait_lock-in-rt_mutex_in.patch b/debian/patches/features/all/rt/locking-rtmutex-re-init-the-wait_lock-in-rt_mutex_in.patch index bb7a340e2..821491d40 100644 --- a/debian/patches/features/all/rt/locking-rtmutex-re-init-the-wait_lock-in-rt_mutex_in.patch +++ b/debian/patches/features/all/rt/locking-rtmutex-re-init-the-wait_lock-in-rt_mutex_in.patch @@ -2,7 +2,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 16 Nov 2017 16:48:48 +0100 Subject: [PATCH] locking/rtmutex: re-init the wait_lock in rt_mutex_init_proxy_locked() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz We could provide a key-class for the lockdep (and fixup all callers) or move the init to all callers (like it was) in order to avoid lockdep @@ -16,7 +16,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -2272,6 +2272,14 @@ void rt_mutex_init_proxy_locked(struct r +@@ -2259,6 +2259,14 @@ void rt_mutex_init_proxy_locked(struct r struct task_struct *proxy_owner) { __rt_mutex_init(lock, NULL, NULL); diff --git a/debian/patches/features/all/rt/md-disable-bcache.patch b/debian/patches/features/all/rt/md-disable-bcache.patch index 2688b75dd..ddbc690c7 100644 --- a/debian/patches/features/all/rt/md-disable-bcache.patch +++ b/debian/patches/features/all/rt/md-disable-bcache.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 29 Aug 2013 11:48:57 +0200 Subject: md: disable bcache -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz It uses anon semaphores |drivers/md/bcache/request.c: In function ‘cached_dev_write_complete’: diff --git a/debian/patches/features/all/rt/md-raid5-percpu-handling-rt-aware.patch b/debian/patches/features/all/rt/md-raid5-percpu-handling-rt-aware.patch index 17a73f78e..29c10957d 100644 --- a/debian/patches/features/all/rt/md-raid5-percpu-handling-rt-aware.patch +++ b/debian/patches/features/all/rt/md-raid5-percpu-handling-rt-aware.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Tue, 6 Apr 2010 16:51:31 +0200 Subject: md: raid5: Make raid5_percpu handling RT aware -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz __raid_run_ops() disables preemption with get_cpu() around the access to the raid5_percpu variables. That causes scheduling while atomic @@ -21,7 +21,7 @@ Tested-by: Udo van den Heuvel --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c -@@ -2067,8 +2067,9 @@ static void raid_run_ops(struct stripe_h +@@ -2064,8 +2064,9 @@ static void raid_run_ops(struct stripe_h struct raid5_percpu *percpu; unsigned long cpu; @@ -32,7 +32,7 @@ Tested-by: Udo van den Heuvel if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { ops_run_biofill(sh); overlap_clear++; -@@ -2127,7 +2128,8 @@ static void raid_run_ops(struct stripe_h +@@ -2124,7 +2125,8 @@ static void raid_run_ops(struct stripe_h if (test_and_clear_bit(R5_Overlap, &dev->flags)) wake_up(&sh->raid_conf->wait_for_overlap); } @@ -42,7 +42,7 @@ Tested-by: Udo van den Heuvel } static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh) -@@ -6796,6 +6798,7 @@ static int raid456_cpu_up_prepare(unsign +@@ -6788,6 +6790,7 @@ static int raid456_cpu_up_prepare(unsign __func__, cpu); return -ENOMEM; } @@ -50,7 +50,7 @@ Tested-by: Udo van den Heuvel return 0; } -@@ -6806,7 +6809,6 @@ static int raid5_alloc_percpu(struct r5c +@@ -6798,7 +6801,6 @@ static int raid5_alloc_percpu(struct r5c conf->percpu = alloc_percpu(struct raid5_percpu); if (!conf->percpu) return -ENOMEM; @@ -60,7 +60,7 @@ Tested-by: Udo van den Heuvel conf->scribble_disks = max(conf->raid_disks, --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h -@@ -624,6 +624,7 @@ struct r5conf { +@@ -636,6 +636,7 @@ struct r5conf { int recovery_disabled; /* per cpu variables */ struct raid5_percpu { diff --git a/debian/patches/features/all/rt/mfd-syscon-atmel-smc-include-string.h.patch b/debian/patches/features/all/rt/mfd-syscon-atmel-smc-include-string.h.patch index 7b04e4c4d..12d9c3e70 100644 --- a/debian/patches/features/all/rt/mfd-syscon-atmel-smc-include-string.h.patch +++ b/debian/patches/features/all/rt/mfd-syscon-atmel-smc-include-string.h.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 4 Oct 2017 09:55:58 +0200 Subject: [PATCH] mfd: syscon: atmel-smc: include string.h -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The string.h header file is needed for the memset() definition. The RT build fails because it is not pulled in via other header files. diff --git a/debian/patches/features/all/rt/mips-disable-highmem-on-rt.patch b/debian/patches/features/all/rt/mips-disable-highmem-on-rt.patch index f670741ac..51c5f2e9c 100644 --- a/debian/patches/features/all/rt/mips-disable-highmem-on-rt.patch +++ b/debian/patches/features/all/rt/mips-disable-highmem-on-rt.patch @@ -1,7 +1,7 @@ Subject: mips: Disable highmem on RT From: Thomas Gleixner Date: Mon, 18 Jul 2011 17:10:12 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The current highmem handling on -RT is not compatible and needs fixups. @@ -12,7 +12,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig -@@ -2519,7 +2519,7 @@ config MIPS_ASID_BITS_VARIABLE +@@ -2516,7 +2516,7 @@ config MIPS_ASID_BITS_VARIABLE # config HIGHMEM bool "High Memory Support" diff --git a/debian/patches/features/all/rt/mm--rt--Fix-generic-kmap_atomic-for-RT.patch b/debian/patches/features/all/rt/mm--rt--Fix-generic-kmap_atomic-for-RT.patch index 56eb30e16..a6207a05f 100644 --- a/debian/patches/features/all/rt/mm--rt--Fix-generic-kmap_atomic-for-RT.patch +++ b/debian/patches/features/all/rt/mm--rt--Fix-generic-kmap_atomic-for-RT.patch @@ -1,7 +1,7 @@ Subject: mm: rt: Fix generic kmap_atomic for RT From: Thomas Gleixner Date: Sat, 19 Sep 2015 10:15:00 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The update to 4.1 brought in the mainline variant of the pagefault disable distangling from preempt count. That introduced a diff --git a/debian/patches/features/all/rt/mm-backing-dev-Use-irqsave-variant-of-atomic_dec_and.patch b/debian/patches/features/all/rt/mm-backing-dev-Use-irqsave-variant-of-atomic_dec_and.patch new file mode 100644 index 000000000..6f43542ff --- /dev/null +++ b/debian/patches/features/all/rt/mm-backing-dev-Use-irqsave-variant-of-atomic_dec_and.patch @@ -0,0 +1,31 @@ +From: Anna-Maria Gleixner +Date: Wed, 4 Apr 2018 11:43:56 +0200 +Subject: [PATCH] mm/backing-dev: Use irqsave variant of + atomic_dec_and_lock() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +The irqsave variant of atomic_dec_and_lock handles irqsave/restore when +taking/releasing the spin lock. With this variant the call of +local_irq_save/restore is no longer required. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + mm/backing-dev.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +--- a/mm/backing-dev.c ++++ b/mm/backing-dev.c +@@ -495,11 +495,8 @@ void wb_congested_put(struct bdi_writeba + { + unsigned long flags; + +- local_irq_save(flags); +- if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) { +- local_irq_restore(flags); ++ if (!atomic_dec_and_lock_irqsave(&congested->refcnt, &cgwb_lock, flags)) + return; +- } + + /* bdi might already have been destroyed leaving @congested unlinked */ + if (congested->__bdi) { diff --git a/debian/patches/features/all/rt/mm-convert-swap-to-percpu-locked.patch b/debian/patches/features/all/rt/mm-convert-swap-to-percpu-locked.patch index 369b4c120..b5e704ee3 100644 --- a/debian/patches/features/all/rt/mm-convert-swap-to-percpu-locked.patch +++ b/debian/patches/features/all/rt/mm-convert-swap-to-percpu-locked.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:29:51 -0500 Subject: mm/swap: Convert to percpu locked -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Replace global locks (get_cpu + local_irq_save) with "local_locks()". Currently there is one of for "rotate" and one for "swap". @@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/swap.h +++ b/include/linux/swap.h -@@ -312,6 +312,7 @@ extern unsigned long nr_free_pagecache_p +@@ -324,6 +324,7 @@ extern unsigned long nr_free_pagecache_p /* linux/mm/swap.c */ @@ -28,7 +28,7 @@ Signed-off-by: Thomas Gleixner extern void lru_cache_add_file(struct page *page); --- a/mm/compaction.c +++ b/mm/compaction.c -@@ -1634,10 +1634,12 @@ static enum compact_result compact_zone( +@@ -1657,10 +1657,12 @@ static enum compact_result compact_zone( block_start_pfn(cc->migrate_pfn, cc->order); if (cc->last_migrated_pfn < current_block_start) { @@ -45,7 +45,7 @@ Signed-off-by: Thomas Gleixner } --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -6862,8 +6862,9 @@ void __init free_area_init(unsigned long +@@ -6965,8 +6965,9 @@ void __init free_area_init(unsigned long static int page_alloc_cpu_dead(unsigned int cpu) { @@ -138,7 +138,7 @@ Signed-off-by: Thomas Gleixner } /** -@@ -613,9 +617,9 @@ void lru_add_drain_cpu(int cpu) +@@ -585,9 +589,9 @@ void lru_add_drain_cpu(int cpu) unsigned long flags; /* No harm done if a racing interrupt already did this */ @@ -150,7 +150,7 @@ Signed-off-by: Thomas Gleixner } pvec = &per_cpu(lru_deactivate_file_pvecs, cpu); -@@ -647,11 +651,12 @@ void deactivate_file_page(struct page *p +@@ -619,11 +623,12 @@ void deactivate_file_page(struct page *p return; if (likely(get_page_unless_zero(page))) { @@ -165,7 +165,7 @@ Signed-off-by: Thomas Gleixner } } -@@ -666,19 +671,20 @@ void mark_page_lazyfree(struct page *pag +@@ -638,19 +643,20 @@ void mark_page_lazyfree(struct page *pag { if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && !PageSwapCache(page) && !PageUnevictable(page)) { diff --git a/debian/patches/features/all/rt/mm-disable-sloub-rt.patch b/debian/patches/features/all/rt/mm-disable-sloub-rt.patch index c066b5e36..de9173052 100644 --- a/debian/patches/features/all/rt/mm-disable-sloub-rt.patch +++ b/debian/patches/features/all/rt/mm-disable-sloub-rt.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:44:03 -0500 Subject: mm: Allow only slub on RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Disable SLAB and SLOB on -RT. Only SLUB is adopted to -RT needs. @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/init/Kconfig +++ b/init/Kconfig -@@ -1526,6 +1526,7 @@ choice +@@ -1544,6 +1544,7 @@ choice config SLAB bool "SLAB" @@ -22,7 +22,7 @@ Signed-off-by: Thomas Gleixner select HAVE_HARDENED_USERCOPY_ALLOCATOR help The regular slab allocator that is established and known to work -@@ -1546,6 +1547,7 @@ config SLUB +@@ -1564,6 +1565,7 @@ config SLUB config SLOB depends on EXPERT bool "SLOB (Simple Allocator)" diff --git a/debian/patches/features/all/rt/mm-enable-slub.patch b/debian/patches/features/all/rt/mm-enable-slub.patch index ddf3e390c..5f7399f40 100644 --- a/debian/patches/features/all/rt/mm-enable-slub.patch +++ b/debian/patches/features/all/rt/mm-enable-slub.patch @@ -1,7 +1,7 @@ Subject: mm: Enable SLUB for RT From: Thomas Gleixner Date: Thu, 25 Oct 2012 10:32:35 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Make SLUB RT aware by converting locks to raw and using free lists to move the freeing out of the lock held region. @@ -9,12 +9,12 @@ move the freeing out of the lock held region. Signed-off-by: Thomas Gleixner --- mm/slab.h | 4 + - mm/slub.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++++-------------- - 2 files changed, 110 insertions(+), 30 deletions(-) + mm/slub.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++-------------- + 2 files changed, 111 insertions(+), 30 deletions(-) --- a/mm/slab.h +++ b/mm/slab.h -@@ -454,7 +454,11 @@ static inline void slab_post_alloc_hook( +@@ -452,7 +452,11 @@ static inline void slab_post_alloc_hook( * The slab lists for all objects. */ struct kmem_cache_node { @@ -28,7 +28,7 @@ Signed-off-by: Thomas Gleixner struct list_head slabs_partial; /* partial list first, better asm code */ --- a/mm/slub.c +++ b/mm/slub.c -@@ -1180,7 +1180,7 @@ static noinline int free_debug_processin +@@ -1183,7 +1183,7 @@ static noinline int free_debug_processin unsigned long uninitialized_var(flags); int ret = 0; @@ -37,7 +37,7 @@ Signed-off-by: Thomas Gleixner slab_lock(page); if (s->flags & SLAB_CONSISTENCY_CHECKS) { -@@ -1215,7 +1215,7 @@ static noinline int free_debug_processin +@@ -1218,7 +1218,7 @@ static noinline int free_debug_processin bulk_cnt, cnt); slab_unlock(page); @@ -46,7 +46,7 @@ Signed-off-by: Thomas Gleixner if (!ret) slab_fix(s, "Object at 0x%p not freed", object); return ret; -@@ -1343,6 +1343,12 @@ static inline void dec_slabs_node(struct +@@ -1346,6 +1346,12 @@ static inline void dec_slabs_node(struct #endif /* CONFIG_SLUB_DEBUG */ @@ -59,7 +59,7 @@ Signed-off-by: Thomas Gleixner /* * Hooks for other subsystems that check memory allocations. In a typical * production configuration these hooks all should produce no code at all. -@@ -1569,7 +1575,11 @@ static struct page *allocate_slab(struct +@@ -1568,7 +1574,11 @@ static struct page *allocate_slab(struct flags &= gfp_allowed_mask; @@ -71,7 +71,7 @@ Signed-off-by: Thomas Gleixner local_irq_enable(); flags |= s->allocflags; -@@ -1644,7 +1654,11 @@ static struct page *allocate_slab(struct +@@ -1627,7 +1637,11 @@ static struct page *allocate_slab(struct page->frozen = 1; out: @@ -83,7 +83,7 @@ Signed-off-by: Thomas Gleixner local_irq_disable(); if (!page) return NULL; -@@ -1704,6 +1718,16 @@ static void __free_slab(struct kmem_cach +@@ -1685,6 +1699,16 @@ static void __free_slab(struct kmem_cach __free_pages(page, order); } @@ -100,7 +100,7 @@ Signed-off-by: Thomas Gleixner #define need_reserve_slab_rcu \ (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head)) -@@ -1735,6 +1759,12 @@ static void free_slab(struct kmem_cache +@@ -1716,6 +1740,12 @@ static void free_slab(struct kmem_cache } call_rcu(head, rcu_free_slab); @@ -113,7 +113,7 @@ Signed-off-by: Thomas Gleixner } else __free_slab(s, page); } -@@ -1842,7 +1872,7 @@ static void *get_partial_node(struct kme +@@ -1823,7 +1853,7 @@ static void *get_partial_node(struct kme if (!n || !n->nr_partial) return NULL; @@ -122,7 +122,7 @@ Signed-off-by: Thomas Gleixner list_for_each_entry_safe(page, page2, &n->partial, lru) { void *t; -@@ -1867,7 +1897,7 @@ static void *get_partial_node(struct kme +@@ -1848,7 +1878,7 @@ static void *get_partial_node(struct kme break; } @@ -131,7 +131,7 @@ Signed-off-by: Thomas Gleixner return object; } -@@ -2113,7 +2143,7 @@ static void deactivate_slab(struct kmem_ +@@ -2094,7 +2124,7 @@ static void deactivate_slab(struct kmem_ * that acquire_slab() will see a slab page that * is frozen */ @@ -140,7 +140,7 @@ Signed-off-by: Thomas Gleixner } } else { m = M_FULL; -@@ -2124,7 +2154,7 @@ static void deactivate_slab(struct kmem_ +@@ -2105,7 +2135,7 @@ static void deactivate_slab(struct kmem_ * slabs from diagnostic functions will not see * any frozen slabs. */ @@ -149,7 +149,7 @@ Signed-off-by: Thomas Gleixner } } -@@ -2159,7 +2189,7 @@ static void deactivate_slab(struct kmem_ +@@ -2140,7 +2170,7 @@ static void deactivate_slab(struct kmem_ goto redo; if (lock) @@ -158,7 +158,7 @@ Signed-off-by: Thomas Gleixner if (m == M_FREE) { stat(s, DEACTIVATE_EMPTY); -@@ -2194,10 +2224,10 @@ static void unfreeze_partials(struct kme +@@ -2175,10 +2205,10 @@ static void unfreeze_partials(struct kme n2 = get_node(s, page_to_nid(page)); if (n != n2) { if (n) @@ -171,7 +171,7 @@ Signed-off-by: Thomas Gleixner } do { -@@ -2226,7 +2256,7 @@ static void unfreeze_partials(struct kme +@@ -2207,7 +2237,7 @@ static void unfreeze_partials(struct kme } if (n) @@ -180,7 +180,7 @@ Signed-off-by: Thomas Gleixner while (discard_page) { page = discard_page; -@@ -2265,14 +2295,21 @@ static void put_cpu_partial(struct kmem_ +@@ -2244,14 +2274,21 @@ static void put_cpu_partial(struct kmem_ pobjects = oldpage->pobjects; pages = oldpage->pages; if (drain && pobjects > s->cpu_partial) { @@ -202,7 +202,7 @@ Signed-off-by: Thomas Gleixner oldpage = NULL; pobjects = 0; pages = 0; -@@ -2342,7 +2379,22 @@ static bool has_cpu_slab(int cpu, void * +@@ -2321,7 +2358,22 @@ static bool has_cpu_slab(int cpu, void * static void flush_all(struct kmem_cache *s) { @@ -225,7 +225,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -2397,10 +2449,10 @@ static unsigned long count_partial(struc +@@ -2376,10 +2428,10 @@ static unsigned long count_partial(struc unsigned long x = 0; struct page *page; @@ -238,7 +238,7 @@ Signed-off-by: Thomas Gleixner return x; } #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */ -@@ -2538,8 +2590,10 @@ static inline void *get_freelist(struct +@@ -2517,8 +2569,10 @@ static inline void *get_freelist(struct * already disabled (which is the case for bulk allocation). */ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, @@ -250,7 +250,7 @@ Signed-off-by: Thomas Gleixner void *freelist; struct page *page; -@@ -2595,6 +2649,13 @@ static void *___slab_alloc(struct kmem_c +@@ -2574,6 +2628,13 @@ static void *___slab_alloc(struct kmem_c VM_BUG_ON(!c->page->frozen); c->freelist = get_freepointer(s, freelist); c->tid = next_tid(c->tid); @@ -264,7 +264,7 @@ Signed-off-by: Thomas Gleixner return freelist; new_slab: -@@ -2610,7 +2671,7 @@ static void *___slab_alloc(struct kmem_c +@@ -2589,7 +2650,7 @@ static void *___slab_alloc(struct kmem_c if (unlikely(!freelist)) { slab_out_of_memory(s, gfpflags, node); @@ -273,7 +273,7 @@ Signed-off-by: Thomas Gleixner } page = c->page; -@@ -2623,7 +2684,7 @@ static void *___slab_alloc(struct kmem_c +@@ -2602,7 +2663,7 @@ static void *___slab_alloc(struct kmem_c goto new_slab; /* Slab failed checks. Next slab needed */ deactivate_slab(s, page, get_freepointer(s, freelist), c); @@ -282,7 +282,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -2635,6 +2696,7 @@ static void *__slab_alloc(struct kmem_ca +@@ -2614,6 +2675,7 @@ static void *__slab_alloc(struct kmem_ca { void *p; unsigned long flags; @@ -290,7 +290,7 @@ Signed-off-by: Thomas Gleixner local_irq_save(flags); #ifdef CONFIG_PREEMPT -@@ -2646,8 +2708,9 @@ static void *__slab_alloc(struct kmem_ca +@@ -2625,8 +2687,9 @@ static void *__slab_alloc(struct kmem_ca c = this_cpu_ptr(s->cpu_slab); #endif @@ -301,7 +301,7 @@ Signed-off-by: Thomas Gleixner return p; } -@@ -2833,7 +2896,7 @@ static void __slab_free(struct kmem_cach +@@ -2812,7 +2875,7 @@ static void __slab_free(struct kmem_cach do { if (unlikely(n)) { @@ -310,7 +310,7 @@ Signed-off-by: Thomas Gleixner n = NULL; } prior = page->freelist; -@@ -2865,7 +2928,7 @@ static void __slab_free(struct kmem_cach +@@ -2844,7 +2907,7 @@ static void __slab_free(struct kmem_cach * Otherwise the list_lock will synchronize with * other processors updating the list of slabs. */ @@ -319,7 +319,7 @@ Signed-off-by: Thomas Gleixner } } -@@ -2907,7 +2970,7 @@ static void __slab_free(struct kmem_cach +@@ -2886,7 +2949,7 @@ static void __slab_free(struct kmem_cach add_partial(n, page, DEACTIVATE_TO_TAIL); stat(s, FREE_ADD_PARTIAL); } @@ -328,7 +328,7 @@ Signed-off-by: Thomas Gleixner return; slab_empty: -@@ -2922,7 +2985,7 @@ static void __slab_free(struct kmem_cach +@@ -2901,7 +2964,7 @@ static void __slab_free(struct kmem_cach remove_full(s, n, page); } @@ -337,7 +337,7 @@ Signed-off-by: Thomas Gleixner stat(s, FREE_SLAB); discard_slab(s, page); } -@@ -3127,6 +3190,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca +@@ -3106,6 +3169,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca void **p) { struct kmem_cache_cpu *c; @@ -345,7 +345,7 @@ Signed-off-by: Thomas Gleixner int i; /* memcg and kmem_cache debug support */ -@@ -3150,7 +3214,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca +@@ -3129,7 +3193,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca * of re-populating per CPU c->freelist */ p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, @@ -354,7 +354,7 @@ Signed-off-by: Thomas Gleixner if (unlikely(!p[i])) goto error; -@@ -3162,6 +3226,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca +@@ -3141,6 +3205,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca } c->tid = next_tid(c->tid); local_irq_enable(); @@ -362,7 +362,15 @@ Signed-off-by: Thomas Gleixner /* Clear memory outside IRQ disabled fastpath loop */ if (unlikely(flags & __GFP_ZERO)) { -@@ -3309,7 +3374,7 @@ static void +@@ -3155,6 +3220,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca + return i; + error: + local_irq_enable(); ++ free_delayed(&to_free); + slab_post_alloc_hook(s, flags, i, p); + __kmem_cache_free_bulk(s, i, p); + return 0; +@@ -3288,7 +3354,7 @@ static void init_kmem_cache_node(struct kmem_cache_node *n) { n->nr_partial = 0; @@ -371,7 +379,7 @@ Signed-off-by: Thomas Gleixner INIT_LIST_HEAD(&n->partial); #ifdef CONFIG_SLUB_DEBUG atomic_long_set(&n->nr_slabs, 0); -@@ -3663,6 +3728,10 @@ static void list_slab_objects(struct kme +@@ -3642,6 +3708,10 @@ static void list_slab_objects(struct kme const char *text) { #ifdef CONFIG_SLUB_DEBUG @@ -382,7 +390,7 @@ Signed-off-by: Thomas Gleixner void *addr = page_address(page); void *p; unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) * -@@ -3683,6 +3752,7 @@ static void list_slab_objects(struct kme +@@ -3662,6 +3732,7 @@ static void list_slab_objects(struct kme slab_unlock(page); kfree(map); #endif @@ -390,7 +398,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -3696,7 +3766,7 @@ static void free_partial(struct kmem_cac +@@ -3675,7 +3746,7 @@ static void free_partial(struct kmem_cac struct page *page, *h; BUG_ON(irqs_disabled()); @@ -399,7 +407,7 @@ Signed-off-by: Thomas Gleixner list_for_each_entry_safe(page, h, &n->partial, lru) { if (!page->inuse) { remove_partial(n, page); -@@ -3706,7 +3776,7 @@ static void free_partial(struct kmem_cac +@@ -3685,7 +3756,7 @@ static void free_partial(struct kmem_cac "Objects remaining in %s on __kmem_cache_shutdown()"); } } @@ -408,7 +416,7 @@ Signed-off-by: Thomas Gleixner list_for_each_entry_safe(page, h, &discard, lru) discard_slab(s, page); -@@ -3950,7 +4020,7 @@ int __kmem_cache_shrink(struct kmem_cach +@@ -3947,7 +4018,7 @@ int __kmem_cache_shrink(struct kmem_cach for (i = 0; i < SHRINK_PROMOTE_MAX; i++) INIT_LIST_HEAD(promote + i); @@ -417,7 +425,7 @@ Signed-off-by: Thomas Gleixner /* * Build lists of slabs to discard or promote. -@@ -3981,7 +4051,7 @@ int __kmem_cache_shrink(struct kmem_cach +@@ -3978,7 +4049,7 @@ int __kmem_cache_shrink(struct kmem_cach for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--) list_splice(promote + i, &n->partial); @@ -426,7 +434,7 @@ Signed-off-by: Thomas Gleixner /* Release empty slabs */ list_for_each_entry_safe(page, t, &discard, lru) -@@ -4194,6 +4264,12 @@ void __init kmem_cache_init(void) +@@ -4191,6 +4262,12 @@ void __init kmem_cache_init(void) { static __initdata struct kmem_cache boot_kmem_cache, boot_kmem_cache_node; @@ -439,7 +447,7 @@ Signed-off-by: Thomas Gleixner if (debug_guardpage_minorder()) slub_max_order = 0; -@@ -4402,7 +4478,7 @@ static int validate_slab_node(struct kme +@@ -4399,7 +4476,7 @@ static int validate_slab_node(struct kme struct page *page; unsigned long flags; @@ -448,7 +456,7 @@ Signed-off-by: Thomas Gleixner list_for_each_entry(page, &n->partial, lru) { validate_slab_slab(s, page, map); -@@ -4424,7 +4500,7 @@ static int validate_slab_node(struct kme +@@ -4421,7 +4498,7 @@ static int validate_slab_node(struct kme s->name, count, atomic_long_read(&n->nr_slabs)); out: @@ -457,7 +465,7 @@ Signed-off-by: Thomas Gleixner return count; } -@@ -4612,12 +4688,12 @@ static int list_locations(struct kmem_ca +@@ -4609,12 +4686,12 @@ static int list_locations(struct kmem_ca if (!atomic_long_read(&n->nr_slabs)) continue; diff --git a/debian/patches/features/all/rt/mm-make-vmstat-rt-aware.patch b/debian/patches/features/all/rt/mm-make-vmstat-rt-aware.patch index 689e0fce9..29a386c6f 100644 --- a/debian/patches/features/all/rt/mm-make-vmstat-rt-aware.patch +++ b/debian/patches/features/all/rt/mm-make-vmstat-rt-aware.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:30:13 -0500 Subject: mm/vmstat: Protect per cpu variables with preempt disable on RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Disable preemption on -RT for the vmstat code. On vanila the code runs in IRQ-off regions while on -RT it is not. "preempt_disable" ensures that the @@ -17,7 +17,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h -@@ -33,7 +33,9 @@ DECLARE_PER_CPU(struct vm_event_state, v +@@ -43,7 +43,9 @@ DECLARE_PER_CPU(struct vm_event_state, v */ static inline void __count_vm_event(enum vm_event_item item) { @@ -27,7 +27,7 @@ Signed-off-by: Thomas Gleixner } static inline void count_vm_event(enum vm_event_item item) -@@ -43,7 +45,9 @@ static inline void count_vm_event(enum v +@@ -53,7 +55,9 @@ static inline void count_vm_event(enum v static inline void __count_vm_events(enum vm_event_item item, long delta) { @@ -39,7 +39,7 @@ Signed-off-by: Thomas Gleixner static inline void count_vm_events(enum vm_event_item item, long delta) --- a/mm/vmstat.c +++ b/mm/vmstat.c -@@ -249,6 +249,7 @@ void __mod_zone_page_state(struct zone * +@@ -320,6 +320,7 @@ void __mod_zone_page_state(struct zone * long x; long t; @@ -47,7 +47,7 @@ Signed-off-by: Thomas Gleixner x = delta + __this_cpu_read(*p); t = __this_cpu_read(pcp->stat_threshold); -@@ -258,6 +259,7 @@ void __mod_zone_page_state(struct zone * +@@ -329,6 +330,7 @@ void __mod_zone_page_state(struct zone * x = 0; } __this_cpu_write(*p, x); @@ -55,7 +55,7 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(__mod_zone_page_state); -@@ -269,6 +271,7 @@ void __mod_node_page_state(struct pglist +@@ -340,6 +342,7 @@ void __mod_node_page_state(struct pglist long x; long t; @@ -63,7 +63,7 @@ Signed-off-by: Thomas Gleixner x = delta + __this_cpu_read(*p); t = __this_cpu_read(pcp->stat_threshold); -@@ -278,6 +281,7 @@ void __mod_node_page_state(struct pglist +@@ -349,6 +352,7 @@ void __mod_node_page_state(struct pglist x = 0; } __this_cpu_write(*p, x); @@ -71,7 +71,7 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(__mod_node_page_state); -@@ -310,6 +314,7 @@ void __inc_zone_state(struct zone *zone, +@@ -381,6 +385,7 @@ void __inc_zone_state(struct zone *zone, s8 __percpu *p = pcp->vm_stat_diff + item; s8 v, t; @@ -79,7 +79,7 @@ Signed-off-by: Thomas Gleixner v = __this_cpu_inc_return(*p); t = __this_cpu_read(pcp->stat_threshold); if (unlikely(v > t)) { -@@ -318,6 +323,7 @@ void __inc_zone_state(struct zone *zone, +@@ -389,6 +394,7 @@ void __inc_zone_state(struct zone *zone, zone_page_state_add(v + overstep, zone, item); __this_cpu_write(*p, -overstep); } @@ -87,7 +87,7 @@ Signed-off-by: Thomas Gleixner } void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) -@@ -326,6 +332,7 @@ void __inc_node_state(struct pglist_data +@@ -397,6 +403,7 @@ void __inc_node_state(struct pglist_data s8 __percpu *p = pcp->vm_node_stat_diff + item; s8 v, t; @@ -95,7 +95,7 @@ Signed-off-by: Thomas Gleixner v = __this_cpu_inc_return(*p); t = __this_cpu_read(pcp->stat_threshold); if (unlikely(v > t)) { -@@ -334,6 +341,7 @@ void __inc_node_state(struct pglist_data +@@ -405,6 +412,7 @@ void __inc_node_state(struct pglist_data node_page_state_add(v + overstep, pgdat, item); __this_cpu_write(*p, -overstep); } @@ -103,7 +103,7 @@ Signed-off-by: Thomas Gleixner } void __inc_zone_page_state(struct page *page, enum zone_stat_item item) -@@ -354,6 +362,7 @@ void __dec_zone_state(struct zone *zone, +@@ -425,6 +433,7 @@ void __dec_zone_state(struct zone *zone, s8 __percpu *p = pcp->vm_stat_diff + item; s8 v, t; @@ -111,7 +111,7 @@ Signed-off-by: Thomas Gleixner v = __this_cpu_dec_return(*p); t = __this_cpu_read(pcp->stat_threshold); if (unlikely(v < - t)) { -@@ -362,6 +371,7 @@ void __dec_zone_state(struct zone *zone, +@@ -433,6 +442,7 @@ void __dec_zone_state(struct zone *zone, zone_page_state_add(v - overstep, zone, item); __this_cpu_write(*p, overstep); } @@ -119,7 +119,7 @@ Signed-off-by: Thomas Gleixner } void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) -@@ -370,6 +380,7 @@ void __dec_node_state(struct pglist_data +@@ -441,6 +451,7 @@ void __dec_node_state(struct pglist_data s8 __percpu *p = pcp->vm_node_stat_diff + item; s8 v, t; @@ -127,7 +127,7 @@ Signed-off-by: Thomas Gleixner v = __this_cpu_dec_return(*p); t = __this_cpu_read(pcp->stat_threshold); if (unlikely(v < - t)) { -@@ -378,6 +389,7 @@ void __dec_node_state(struct pglist_data +@@ -449,6 +460,7 @@ void __dec_node_state(struct pglist_data node_page_state_add(v - overstep, pgdat, item); __this_cpu_write(*p, overstep); } diff --git a/debian/patches/features/all/rt/mm-memcontrol-do_not_disable_irq.patch b/debian/patches/features/all/rt/mm-memcontrol-do_not_disable_irq.patch index df25a0e8d..9b5b86320 100644 --- a/debian/patches/features/all/rt/mm-memcontrol-do_not_disable_irq.patch +++ b/debian/patches/features/all/rt/mm-memcontrol-do_not_disable_irq.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Subject: mm/memcontrol: Replace local_irq_disable with local locks Date: Wed, 28 Jan 2015 17:14:16 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz There are a few local_irq_disable() which then take sleeping locks. This patch converts them local locks. @@ -30,7 +30,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* Whether legacy memory+swap accounting is active */ static bool do_memsw_account(void) { -@@ -4621,12 +4624,12 @@ static int mem_cgroup_move_account(struc +@@ -4540,12 +4543,12 @@ static int mem_cgroup_move_account(struc ret = 0; @@ -45,7 +45,7 @@ Signed-off-by: Sebastian Andrzej Siewior out_unlock: unlock_page(page); out: -@@ -5569,10 +5572,10 @@ void mem_cgroup_commit_charge(struct pag +@@ -5488,10 +5491,10 @@ void mem_cgroup_commit_charge(struct pag commit_charge(page, memcg, lrucare); @@ -58,25 +58,25 @@ Signed-off-by: Sebastian Andrzej Siewior if (do_memsw_account() && PageSwapCache(page)) { swp_entry_t entry = { .val = page_private(page) }; -@@ -5641,7 +5644,7 @@ static void uncharge_batch(const struct +@@ -5560,7 +5563,7 @@ static void uncharge_batch(const struct memcg_oom_recover(ug->memcg); } - local_irq_save(flags); + local_lock_irqsave(event_lock, flags); - __this_cpu_sub(ug->memcg->stat->count[MEMCG_RSS], ug->nr_anon); - __this_cpu_sub(ug->memcg->stat->count[MEMCG_CACHE], ug->nr_file); - __this_cpu_sub(ug->memcg->stat->count[MEMCG_RSS_HUGE], ug->nr_huge); -@@ -5649,7 +5652,7 @@ static void uncharge_batch(const struct - __this_cpu_add(ug->memcg->stat->events[PGPGOUT], ug->pgpgout); - __this_cpu_add(ug->memcg->stat->nr_page_events, nr_pages); + __mod_memcg_state(ug->memcg, MEMCG_RSS, -ug->nr_anon); + __mod_memcg_state(ug->memcg, MEMCG_CACHE, -ug->nr_file); + __mod_memcg_state(ug->memcg, MEMCG_RSS_HUGE, -ug->nr_huge); +@@ -5568,7 +5571,7 @@ static void uncharge_batch(const struct + __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout); + __this_cpu_add(ug->memcg->stat_cpu->nr_page_events, nr_pages); memcg_check_events(ug->memcg, ug->dummy_page); - local_irq_restore(flags); + local_unlock_irqrestore(event_lock, flags); if (!mem_cgroup_is_root(ug->memcg)) css_put_many(&ug->memcg->css, nr_pages); -@@ -5812,10 +5815,10 @@ void mem_cgroup_migrate(struct page *old +@@ -5731,10 +5734,10 @@ void mem_cgroup_migrate(struct page *old commit_charge(newpage, memcg, false); @@ -89,7 +89,7 @@ Signed-off-by: Sebastian Andrzej Siewior } DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key); -@@ -5993,6 +5996,7 @@ void mem_cgroup_swapout(struct page *pag +@@ -5926,6 +5929,7 @@ void mem_cgroup_swapout(struct page *pag struct mem_cgroup *memcg, *swap_memcg; unsigned int nr_entries; unsigned short oldid; @@ -97,7 +97,7 @@ Signed-off-by: Sebastian Andrzej Siewior VM_BUG_ON_PAGE(PageLRU(page), page); VM_BUG_ON_PAGE(page_count(page), page); -@@ -6038,13 +6042,17 @@ void mem_cgroup_swapout(struct page *pag +@@ -5971,13 +5975,17 @@ void mem_cgroup_swapout(struct page *pag * important here to have the interrupts disabled because it is the * only synchronisation we have for udpating the per-CPU variables. */ diff --git a/debian/patches/features/all/rt/mm-page-alloc-use-local-lock-on-target-cpu.patch b/debian/patches/features/all/rt/mm-page-alloc-use-local-lock-on-target-cpu.patch index 49c859967..a8f036ae5 100644 --- a/debian/patches/features/all/rt/mm-page-alloc-use-local-lock-on-target-cpu.patch +++ b/debian/patches/features/all/rt/mm-page-alloc-use-local-lock-on-target-cpu.patch @@ -1,7 +1,7 @@ Subject: mm: page_alloc: Use local_lock_on() instead of plain spinlock From: Thomas Gleixner Date: Thu, 27 Sep 2012 11:11:46 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The plain spinlock while sufficient does not update the local_lock internals. Use a proper local_lock function instead to ease debugging. @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -292,9 +292,9 @@ static DEFINE_LOCAL_IRQ_LOCK(pa_lock); +@@ -294,9 +294,9 @@ static DEFINE_LOCAL_IRQ_LOCK(pa_lock); #ifdef CONFIG_PREEMPT_RT_BASE # define cpu_lock_irqsave(cpu, flags) \ diff --git a/debian/patches/features/all/rt/mm-page_alloc-reduce-lock-sections-further.patch b/debian/patches/features/all/rt/mm-page_alloc-reduce-lock-sections-further.patch index e0a718962..8b3e0f21d 100644 --- a/debian/patches/features/all/rt/mm-page_alloc-reduce-lock-sections-further.patch +++ b/debian/patches/features/all/rt/mm-page_alloc-reduce-lock-sections-further.patch @@ -1,7 +1,7 @@ From: Peter Zijlstra Date: Fri Jul 3 08:44:37 2009 -0500 Subject: mm: page_alloc: Reduce lock sections further -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Split out the pages which are to be freed into a separate list and call free_pages_bulk() outside of the percpu page allocator locks. @@ -9,12 +9,12 @@ call free_pages_bulk() outside of the percpu page allocator locks. Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner --- - mm/page_alloc.c | 93 +++++++++++++++++++++++++++++++++++++++----------------- - 1 file changed, 65 insertions(+), 28 deletions(-) + mm/page_alloc.c | 146 +++++++++++++++++++++++++++++++++++++++----------------- + 1 file changed, 104 insertions(+), 42 deletions(-) --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -1109,7 +1109,7 @@ static bool bulkfree_pcp_prepare(struct +@@ -1113,7 +1113,7 @@ static bool bulkfree_pcp_prepare(struct #endif /* CONFIG_DEBUG_VM */ /* @@ -23,12 +23,14 @@ Signed-off-by: Thomas Gleixner * Assumes all pages on list are in same zone, and of same order. * count is the number of pages to free. * -@@ -1120,15 +1120,53 @@ static bool bulkfree_pcp_prepare(struct +@@ -1123,16 +1123,64 @@ static bool bulkfree_pcp_prepare(struct + * And clear the zone's pages_scanned counter, to hold off the "all pages are * pinned" detection logic. */ - static void free_pcppages_bulk(struct zone *zone, int count, +-static void free_pcppages_bulk(struct zone *zone, int count, - struct per_cpu_pages *pcp) -+ struct list_head *list) ++static void free_pcppages_bulk(struct zone *zone, struct list_head *list, ++ bool zone_retry) { - int migratetype = 0; - int batch_free = 0; @@ -44,6 +46,18 @@ Signed-off-by: Thomas Gleixner + int mt; /* migratetype of the to-be-freed page */ + + page = list_first_entry(list, struct page, lru); ++ ++ /* ++ * free_unref_page_list() sorts pages by zone. If we end up if ++ * pages from different NUMA nodes belonging to the same ZONE ++ * index then we need to redo with the correcte ZONE pointer. ++ */ ++ if (page_zone(page) != zone) { ++ WARN_ON_ONCE(zone_retry == false); ++ if (zone_retry) ++ break; ++ } ++ + /* must delete as __free_one_page list manipulates */ + list_del(&page->lru); + @@ -59,9 +73,7 @@ Signed-off-by: Thomas Gleixner + + __free_one_page(page, page_to_pfn(page), zone, 0, mt); + trace_mm_page_pcpu_drain(page, 0, mt); -+ count--; + } -+ WARN_ON(count != 0); + spin_unlock_irqrestore(&zone->lock, flags); +} + @@ -81,7 +93,7 @@ Signed-off-by: Thomas Gleixner while (count) { struct page *page; struct list_head *list; -@@ -1144,7 +1182,7 @@ static void free_pcppages_bulk(struct zo +@@ -1148,7 +1196,7 @@ static void free_pcppages_bulk(struct zo batch_free++; if (++migratetype == MIGRATE_PCPTYPES) migratetype = 0; @@ -90,7 +102,7 @@ Signed-off-by: Thomas Gleixner } while (list_empty(list)); /* This is the only non-empty list. Free them all. */ -@@ -1152,27 +1190,12 @@ static void free_pcppages_bulk(struct zo +@@ -1156,27 +1204,12 @@ static void free_pcppages_bulk(struct zo batch_free = count; do { @@ -119,7 +131,7 @@ Signed-off-by: Thomas Gleixner } static void free_one_page(struct zone *zone, -@@ -1180,13 +1203,15 @@ static void free_one_page(struct zone *z +@@ -1184,13 +1217,15 @@ static void free_one_page(struct zone *z unsigned int order, int migratetype) { @@ -137,7 +149,7 @@ Signed-off-by: Thomas Gleixner } static void __meminit __init_single_page(struct page *page, unsigned long pfn, -@@ -2393,16 +2418,18 @@ static int rmqueue_bulk(struct zone *zon +@@ -2426,16 +2461,18 @@ static int rmqueue_bulk(struct zone *zon void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) { unsigned long flags; @@ -153,11 +165,11 @@ Signed-off-by: Thomas Gleixner pcp->count -= to_drain; } local_unlock_irqrestore(pa_lock, flags); -+ free_pcppages_bulk(zone, to_drain, &dst); ++ free_pcppages_bulk(zone, &dst, false); } #endif -@@ -2418,16 +2445,21 @@ static void drain_pages_zone(unsigned in +@@ -2451,16 +2488,21 @@ static void drain_pages_zone(unsigned in unsigned long flags; struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; @@ -177,22 +189,102 @@ Signed-off-by: Thomas Gleixner } cpu_unlock_irqrestore(cpu, flags); + if (count) -+ free_pcppages_bulk(zone, count, &dst); ++ free_pcppages_bulk(zone, &dst, false); } /* -@@ -2661,8 +2693,13 @@ void free_hot_cold_page(struct page *pag +@@ -2663,7 +2705,8 @@ static bool free_unref_page_prepare(stru + return true; + } + +-static void free_unref_page_commit(struct page *page, unsigned long pfn) ++static void free_unref_page_commit(struct page *page, unsigned long pfn, ++ struct list_head *dst) + { + struct zone *zone = page_zone(page); + struct per_cpu_pages *pcp; +@@ -2692,7 +2735,8 @@ static void free_unref_page_commit(struc pcp->count++; if (pcp->count >= pcp->high) { unsigned long batch = READ_ONCE(pcp->batch); - free_pcppages_bulk(zone, batch, pcp); -+ LIST_HEAD(dst); + -+ isolate_pcp_pages(batch, pcp, &dst); ++ isolate_pcp_pages(batch, pcp, dst); pcp->count -= batch; -+ local_unlock_irqrestore(pa_lock, flags); -+ free_pcppages_bulk(zone, batch, &dst); -+ return; } + } +@@ -2704,13 +2748,17 @@ void free_unref_page(struct page *page) + { + unsigned long flags; + unsigned long pfn = page_to_pfn(page); ++ struct zone *zone = page_zone(page); ++ LIST_HEAD(dst); - out: + if (!free_unref_page_prepare(page, pfn)) + return; + + local_lock_irqsave(pa_lock, flags); +- free_unref_page_commit(page, pfn); ++ free_unref_page_commit(page, pfn, &dst); ++ + local_unlock_irqrestore(pa_lock, flags); ++ free_pcppages_bulk(zone, &dst, false); + } + + /* +@@ -2720,7 +2768,11 @@ void free_unref_page_list(struct list_he + { + struct page *page, *next; + unsigned long flags, pfn; +- int batch_count = 0; ++ struct list_head dsts[__MAX_NR_ZONES]; ++ int i; ++ ++ for (i = 0; i < __MAX_NR_ZONES; i++) ++ INIT_LIST_HEAD(&dsts[i]); + + /* Prepare pages for freeing */ + list_for_each_entry_safe(page, next, list, lru) { +@@ -2733,22 +2785,32 @@ void free_unref_page_list(struct list_he + local_lock_irqsave(pa_lock, flags); + list_for_each_entry_safe(page, next, list, lru) { + unsigned long pfn = page_private(page); ++ enum zone_type type; + + set_page_private(page, 0); + trace_mm_page_free_batched(page); +- free_unref_page_commit(page, pfn); ++ type = page_zonenum(page); ++ free_unref_page_commit(page, pfn, &dsts[type]); + +- /* +- * Guard against excessive IRQ disabled times when we get +- * a large list of pages to free. +- */ +- if (++batch_count == SWAP_CLUSTER_MAX) { +- local_unlock_irqrestore(pa_lock, flags); +- batch_count = 0; +- local_lock_irqsave(pa_lock, flags); +- } + } + local_unlock_irqrestore(pa_lock, flags); ++ ++ i = 0; ++ do { ++ struct page *page; ++ struct zone *zone; ++ ++ if (i >= __MAX_NR_ZONES) ++ break; ++ if (list_empty(&dsts[i])) { ++ i++; ++ continue; ++ } ++ page = list_first_entry(&dsts[i], struct page, lru); ++ zone = page_zone(page); ++ ++ free_pcppages_bulk(zone, &dsts[i], true); ++ } while (1); + } + + /* diff --git a/debian/patches/features/all/rt/mm-page_alloc-rt-friendly-per-cpu-pages.patch b/debian/patches/features/all/rt/mm-page_alloc-rt-friendly-per-cpu-pages.patch index 8fcee15a6..d1ee2c3a3 100644 --- a/debian/patches/features/all/rt/mm-page_alloc-rt-friendly-per-cpu-pages.patch +++ b/debian/patches/features/all/rt/mm-page_alloc-rt-friendly-per-cpu-pages.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:29:37 -0500 Subject: mm: page_alloc: rt-friendly per-cpu pages -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz rt-friendly per-cpu pages: convert the irqs-off per-cpu locking method into a preemptible, explicit-per-cpu-locks method. @@ -13,8 +13,8 @@ Contains fixes from: Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- - mm/page_alloc.c | 55 +++++++++++++++++++++++++++++++++++++++---------------- - 1 file changed, 39 insertions(+), 16 deletions(-) + mm/page_alloc.c | 63 ++++++++++++++++++++++++++++++++++++++------------------ + 1 file changed, 43 insertions(+), 20 deletions(-) --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -26,7 +26,7 @@ Signed-off-by: Thomas Gleixner #include #include #include -@@ -287,6 +288,18 @@ EXPORT_SYMBOL(nr_node_ids); +@@ -289,6 +290,18 @@ EXPORT_SYMBOL(nr_node_ids); EXPORT_SYMBOL(nr_online_nodes); #endif @@ -45,7 +45,7 @@ Signed-off-by: Thomas Gleixner int page_group_by_mobility_disabled __read_mostly; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT -@@ -1259,10 +1272,10 @@ static void __free_pages_ok(struct page +@@ -1265,10 +1278,10 @@ static void __free_pages_ok(struct page return; migratetype = get_pfnblock_migratetype(page, pfn); @@ -58,7 +58,7 @@ Signed-off-by: Thomas Gleixner } static void __init __free_pages_boot_core(struct page *page, unsigned int order) -@@ -2382,14 +2395,14 @@ void drain_zone_pages(struct zone *zone, +@@ -2415,14 +2428,14 @@ void drain_zone_pages(struct zone *zone, unsigned long flags; int to_drain, batch; @@ -75,7 +75,7 @@ Signed-off-by: Thomas Gleixner } #endif -@@ -2406,7 +2419,7 @@ static void drain_pages_zone(unsigned in +@@ -2439,7 +2452,7 @@ static void drain_pages_zone(unsigned in struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; @@ -84,7 +84,7 @@ Signed-off-by: Thomas Gleixner pset = per_cpu_ptr(zone->pageset, cpu); pcp = &pset->pcp; -@@ -2414,7 +2427,7 @@ static void drain_pages_zone(unsigned in +@@ -2447,7 +2460,7 @@ static void drain_pages_zone(unsigned in free_pcppages_bulk(zone, pcp->count, pcp); pcp->count = 0; } @@ -93,7 +93,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -2449,6 +2462,7 @@ void drain_local_pages(struct zone *zone +@@ -2482,6 +2495,7 @@ void drain_local_pages(struct zone *zone drain_pages(cpu); } @@ -101,7 +101,7 @@ Signed-off-by: Thomas Gleixner static void drain_local_pages_wq(struct work_struct *work) { /* -@@ -2462,6 +2476,7 @@ static void drain_local_pages_wq(struct +@@ -2495,6 +2509,7 @@ static void drain_local_pages_wq(struct drain_local_pages(NULL); preempt_enable(); } @@ -109,7 +109,7 @@ Signed-off-by: Thomas Gleixner /* * Spill all the per-cpu pages from all CPUs back into the buddy allocator. -@@ -2528,7 +2543,14 @@ void drain_all_pages(struct zone *zone) +@@ -2561,7 +2576,14 @@ void drain_all_pages(struct zone *zone) else cpumask_clear_cpu(cpu, &cpus_with_pcps); } @@ -125,7 +125,7 @@ Signed-off-by: Thomas Gleixner for_each_cpu(cpu, &cpus_with_pcps) { struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu); INIT_WORK(work, drain_local_pages_wq); -@@ -2536,6 +2558,7 @@ void drain_all_pages(struct zone *zone) +@@ -2569,6 +2591,7 @@ void drain_all_pages(struct zone *zone) } for_each_cpu(cpu, &cpus_with_pcps) flush_work(per_cpu_ptr(&pcpu_drain, cpu)); @@ -133,25 +133,44 @@ Signed-off-by: Thomas Gleixner mutex_unlock(&pcpu_drain_mutex); } -@@ -2612,7 +2635,7 @@ void free_hot_cold_page(struct page *pag +@@ -2685,9 +2708,9 @@ void free_unref_page(struct page *page) + if (!free_unref_page_prepare(page, pfn)) + return; - migratetype = get_pfnblock_migratetype(page, pfn); - set_pcppage_migratetype(page, migratetype); - local_irq_save(flags); + local_lock_irqsave(pa_lock, flags); - __count_vm_event(PGFREE); - - /* -@@ -2643,7 +2666,7 @@ void free_hot_cold_page(struct page *pag - } - - out: + free_unref_page_commit(page, pfn); - local_irq_restore(flags); + local_unlock_irqrestore(pa_lock, flags); } /* -@@ -2800,7 +2823,7 @@ static struct page *rmqueue_pcplist(stru +@@ -2707,7 +2730,7 @@ void free_unref_page_list(struct list_he + set_page_private(page, pfn); + } + +- local_irq_save(flags); ++ local_lock_irqsave(pa_lock, flags); + list_for_each_entry_safe(page, next, list, lru) { + unsigned long pfn = page_private(page); + +@@ -2720,12 +2743,12 @@ void free_unref_page_list(struct list_he + * a large list of pages to free. + */ + if (++batch_count == SWAP_CLUSTER_MAX) { +- local_irq_restore(flags); ++ local_unlock_irqrestore(pa_lock, flags); + batch_count = 0; +- local_irq_save(flags); ++ local_lock_irqsave(pa_lock, flags); + } + } +- local_irq_restore(flags); ++ local_unlock_irqrestore(pa_lock, flags); + } + + /* +@@ -2859,7 +2882,7 @@ static struct page *rmqueue_pcplist(stru struct page *page; unsigned long flags; @@ -159,8 +178,8 @@ Signed-off-by: Thomas Gleixner + local_lock_irqsave(pa_lock, flags); pcp = &this_cpu_ptr(zone->pageset)->pcp; list = &pcp->lists[migratetype]; - page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list); -@@ -2808,7 +2831,7 @@ static struct page *rmqueue_pcplist(stru + page = __rmqueue_pcplist(zone, migratetype, pcp, list); +@@ -2867,7 +2890,7 @@ static struct page *rmqueue_pcplist(stru __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); zone_statistics(preferred_zone, zone); } @@ -169,7 +188,7 @@ Signed-off-by: Thomas Gleixner return page; } -@@ -2835,7 +2858,7 @@ struct page *rmqueue(struct zone *prefer +@@ -2894,7 +2917,7 @@ struct page *rmqueue(struct zone *prefer * allocate greater than order-1 page units with __GFP_NOFAIL. */ WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); @@ -178,7 +197,7 @@ Signed-off-by: Thomas Gleixner do { page = NULL; -@@ -2855,14 +2878,14 @@ struct page *rmqueue(struct zone *prefer +@@ -2914,14 +2937,14 @@ struct page *rmqueue(struct zone *prefer __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); zone_statistics(preferred_zone, zone); @@ -195,7 +214,7 @@ Signed-off-by: Thomas Gleixner return NULL; } -@@ -7707,7 +7730,7 @@ void zone_pcp_reset(struct zone *zone) +@@ -7794,7 +7817,7 @@ void zone_pcp_reset(struct zone *zone) struct per_cpu_pageset *pset; /* avoid races with drain_pages() */ @@ -204,7 +223,7 @@ Signed-off-by: Thomas Gleixner if (zone->pageset != &boot_pageset) { for_each_online_cpu(cpu) { pset = per_cpu_ptr(zone->pageset, cpu); -@@ -7716,7 +7739,7 @@ void zone_pcp_reset(struct zone *zone) +@@ -7803,7 +7826,7 @@ void zone_pcp_reset(struct zone *zone) free_percpu(zone->pageset); zone->pageset = &boot_pageset; } diff --git a/debian/patches/features/all/rt/mm-perform-lru_add_drain_all-remotely.patch b/debian/patches/features/all/rt/mm-perform-lru_add_drain_all-remotely.patch index 95202d477..2b5fb5bde 100644 --- a/debian/patches/features/all/rt/mm-perform-lru_add_drain_all-remotely.patch +++ b/debian/patches/features/all/rt/mm-perform-lru_add_drain_all-remotely.patch @@ -1,7 +1,7 @@ From: Luiz Capitulino Date: Fri, 27 May 2016 15:03:28 +0200 Subject: [PATCH] mm: perform lru_add_drain_all() remotely -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz lru_add_drain_all() works by scheduling lru_add_drain_cpu() to run on all CPUs that have non-empty LRU pagevecs and then waiting for @@ -25,7 +25,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/mm/swap.c +++ b/mm/swap.c -@@ -617,9 +617,15 @@ void lru_add_drain_cpu(int cpu) +@@ -589,9 +589,15 @@ void lru_add_drain_cpu(int cpu) unsigned long flags; /* No harm done if a racing interrupt already did this */ @@ -41,7 +41,7 @@ Signed-off-by: Sebastian Andrzej Siewior } pvec = &per_cpu(lru_deactivate_file_pvecs, cpu); -@@ -687,6 +693,16 @@ void lru_add_drain(void) +@@ -659,6 +665,16 @@ void lru_add_drain(void) local_unlock_cpu(swapvec_lock); } @@ -58,7 +58,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void lru_add_drain_per_cpu(struct work_struct *dummy) { lru_add_drain(); -@@ -694,6 +710,16 @@ static void lru_add_drain_per_cpu(struct +@@ -666,6 +682,16 @@ static void lru_add_drain_per_cpu(struct static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); @@ -72,10 +72,10 @@ Signed-off-by: Sebastian Andrzej Siewior +} +#endif + - void lru_add_drain_all_cpuslocked(void) - { - static DEFINE_MUTEX(lock); -@@ -711,21 +737,19 @@ void lru_add_drain_all_cpuslocked(void) + /* + * Doesn't need any cpu hotplug locking because we do rely on per-cpu + * kworkers being shut down before our page_alloc_cpu_dead callback is +@@ -690,21 +716,19 @@ void lru_add_drain_all(void) cpumask_clear(&has_work); for_each_online_cpu(cpu) { diff --git a/debian/patches/features/all/rt/mm-protect-activate-switch-mm.patch b/debian/patches/features/all/rt/mm-protect-activate-switch-mm.patch index f6d3d3eeb..2c799e102 100644 --- a/debian/patches/features/all/rt/mm-protect-activate-switch-mm.patch +++ b/debian/patches/features/all/rt/mm-protect-activate-switch-mm.patch @@ -1,7 +1,7 @@ From: Yong Zhang Date: Tue, 15 May 2012 13:53:56 +0800 Subject: mm: Protect activate_mm() by preempt_[disable&enable]_rt() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz User preempt_*_rt instead of local_irq_*_rt or otherwise there will be warning on ARM like below: diff --git a/debian/patches/features/all/rt/mm-rt-kmap-atomic-scheduling.patch b/debian/patches/features/all/rt/mm-rt-kmap-atomic-scheduling.patch index 22e845376..9315f52c1 100644 --- a/debian/patches/features/all/rt/mm-rt-kmap-atomic-scheduling.patch +++ b/debian/patches/features/all/rt/mm-rt-kmap-atomic-scheduling.patch @@ -1,7 +1,7 @@ Subject: mm, rt: kmap_atomic scheduling From: Peter Zijlstra Date: Thu, 28 Jul 2011 10:43:51 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz In fact, with migrate_disable() existing one could play games with kmap_atomic. You could save/restore the kmap_atomic slots on context @@ -230,7 +230,7 @@ Link: http://lkml.kernel.org/r/1311842631.5890.208.camel@twins /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; -@@ -1104,6 +1105,12 @@ struct task_struct { +@@ -1113,6 +1114,12 @@ struct task_struct { int softirq_nestcnt; unsigned int softirqs_raised; #endif diff --git a/debian/patches/features/all/rt/mm-scatterlist-dont-disable-irqs-on-RT.patch b/debian/patches/features/all/rt/mm-scatterlist-dont-disable-irqs-on-RT.patch index 68e0e1e84..c8cecbd73 100644 --- a/debian/patches/features/all/rt/mm-scatterlist-dont-disable-irqs-on-RT.patch +++ b/debian/patches/features/all/rt/mm-scatterlist-dont-disable-irqs-on-RT.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Fri, 3 Jul 2009 08:44:34 -0500 Subject: mm/scatterlist: Do not disable irqs on RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz For -RT it is enough to keep pagefault disabled (which is currently handled by kmap_atomic()). @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/lib/scatterlist.c +++ b/lib/scatterlist.c -@@ -620,7 +620,7 @@ void sg_miter_stop(struct sg_mapping_ite +@@ -788,7 +788,7 @@ void sg_miter_stop(struct sg_mapping_ite flush_kernel_dcache_page(miter->page); if (miter->__flags & SG_MITER_ATOMIC) { diff --git a/debian/patches/features/all/rt/mm-vmalloc-use-get-cpu-light.patch b/debian/patches/features/all/rt/mm-vmalloc-use-get-cpu-light.patch index c9d30f356..801a219a6 100644 --- a/debian/patches/features/all/rt/mm-vmalloc-use-get-cpu-light.patch +++ b/debian/patches/features/all/rt/mm-vmalloc-use-get-cpu-light.patch @@ -1,7 +1,7 @@ Subject: mm/vmalloc: Another preempt disable region which sucks From: Thomas Gleixner Date: Tue, 12 Jul 2011 11:39:36 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Avoid the preempt disable version of get_cpu_var(). The inner-lock should provide enough serialisation. diff --git a/debian/patches/features/all/rt/mm-workingset-do-not-protect-workingset_shadow_nodes.patch b/debian/patches/features/all/rt/mm-workingset-do-not-protect-workingset_shadow_nodes.patch index 566e044bf..ea710f3da 100644 --- a/debian/patches/features/all/rt/mm-workingset-do-not-protect-workingset_shadow_nodes.patch +++ b/debian/patches/features/all/rt/mm-workingset-do-not-protect-workingset_shadow_nodes.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 29 Jan 2015 17:19:44 +0100 Subject: mm/workingset: Do not protect workingset_shadow_nodes with irq off -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz workingset_shadow_nodes is protected by local_irq_disable(). Some users use spin_lock_irq(). @@ -10,11 +10,11 @@ so I catch users of it which will be introduced later. Signed-off-by: Sebastian Andrzej Siewior --- - include/linux/swap.h | 4 +++- - mm/filemap.c | 9 +++++++-- + include/linux/swap.h | 8 +++++--- + mm/filemap.c | 13 ++++++++++--- mm/truncate.c | 4 +++- mm/workingset.c | 31 ++++++++++++++++--------------- - 4 files changed, 29 insertions(+), 19 deletions(-) + 4 files changed, 34 insertions(+), 22 deletions(-) --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -26,16 +26,23 @@ Signed-off-by: Sebastian Andrzej Siewior #include struct notifier_block; -@@ -297,7 +298,8 @@ struct vma_swap_readahead { - void *workingset_eviction(struct address_space *mapping, struct page *page); - bool workingset_refault(void *shadow); +@@ -300,12 +301,13 @@ bool workingset_refault(void *shadow); void workingset_activation(struct page *page); --void workingset_update_node(struct radix_tree_node *node, void *private); -+void __workingset_update_node(struct radix_tree_node *node, void *private); + + /* Do not use directly, use workingset_lookup_update */ +-void workingset_update_node(struct radix_tree_node *node); ++void __workingset_update_node(struct radix_tree_node *node); +DECLARE_LOCAL_IRQ_LOCK(shadow_nodes_lock); - /* linux/mm/page_alloc.c */ - extern unsigned long totalram_pages; + /* Returns workingset_update_node() if the mapping has shadow entries. */ +-#define workingset_lookup_update(mapping) \ ++#define __workingset_lookup_update(mapping) \ + ({ \ +- radix_tree_update_node_t __helper = workingset_update_node; \ ++ radix_tree_update_node_t __helper = __workingset_update_node; \ + if (dax_mapping(mapping) || shmem_mapping(mapping)) \ + __helper = NULL; \ + __helper; \ --- a/mm/filemap.c +++ b/mm/filemap.c @@ -110,6 +110,7 @@ @@ -52,8 +59,8 @@ Signed-off-by: Sebastian Andrzej Siewior } + local_lock(shadow_nodes_lock); __radix_tree_replace(&mapping->page_tree, node, slot, page, -- workingset_update_node, mapping); -+ __workingset_update_node, mapping); +- workingset_lookup_update(mapping)); ++ __workingset_lookup_update(mapping)); + local_unlock(shadow_nodes_lock); mapping->nrpages++; return 0; @@ -70,27 +77,48 @@ Signed-off-by: Sebastian Andrzej Siewior radix_tree_clear_tags(&mapping->page_tree, node, slot); __radix_tree_replace(&mapping->page_tree, node, slot, shadow, -- workingset_update_node, mapping); -+ __workingset_update_node, mapping); +- workingset_lookup_update(mapping)); ++ __workingset_lookup_update(mapping)); } + local_unlock(shadow_nodes_lock); - if (shadow) { - mapping->nrexceptional += nr; + page->mapping = NULL; + /* Leave page->index set: truncation lookup relies upon it */ +@@ -329,6 +334,7 @@ page_cache_tree_delete_batch(struct addr + struct page *page; + pgoff_t start; + ++ local_lock(shadow_nodes_lock); + start = pvec->pages[0]->index; + radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { + if (i >= pagevec_count(pvec) && !tail_pages) +@@ -359,10 +365,11 @@ page_cache_tree_delete_batch(struct addr + } + radix_tree_clear_tags(&mapping->page_tree, iter.node, slot); + __radix_tree_replace(&mapping->page_tree, iter.node, slot, NULL, +- workingset_lookup_update(mapping)); ++ __workingset_lookup_update(mapping)); + total_pages++; + } + mapping->nrpages -= total_pages; ++ local_unlock(shadow_nodes_lock); + } + + void delete_from_page_cache_batch(struct address_space *mapping, --- a/mm/truncate.c +++ b/mm/truncate.c -@@ -41,8 +41,10 @@ static void clear_shadow_entry(struct ad - goto unlock; +@@ -40,8 +40,10 @@ static inline void __clear_shadow_entry( + return; if (*slot != entry) - goto unlock; + return; + local_lock(shadow_nodes_lock); __radix_tree_replace(&mapping->page_tree, node, slot, NULL, -- workingset_update_node, mapping); -+ __workingset_update_node, mapping); +- workingset_update_node); ++ __workingset_update_node); + local_unlock(shadow_nodes_lock); mapping->nrexceptional--; - unlock: - spin_unlock_irq(&mapping->tree_lock); + } + --- a/mm/workingset.c +++ b/mm/workingset.c @@ -338,9 +338,10 @@ void workingset_activation(struct page * @@ -101,12 +129,12 @@ Signed-off-by: Sebastian Andrzej Siewior +static struct list_lru __shadow_nodes; +DEFINE_LOCAL_IRQ_LOCK(shadow_nodes_lock); --void workingset_update_node(struct radix_tree_node *node, void *private) -+void __workingset_update_node(struct radix_tree_node *node, void *private) +-void workingset_update_node(struct radix_tree_node *node) ++void __workingset_update_node(struct radix_tree_node *node) { - struct address_space *mapping = private; - -@@ -358,10 +359,10 @@ void workingset_update_node(struct radix + /* + * Track non-empty nodes that contain only shadow entries; +@@ -352,10 +353,10 @@ void workingset_update_node(struct radix */ if (node->count && node->count == node->exceptional) { if (list_empty(&node->private_list)) @@ -119,7 +147,7 @@ Signed-off-by: Sebastian Andrzej Siewior } } -@@ -373,9 +374,9 @@ static unsigned long count_shadow_nodes( +@@ -367,9 +368,9 @@ static unsigned long count_shadow_nodes( unsigned long cache; /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ @@ -132,12 +160,12 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Approximate a reasonable limit for the radix tree nodes -@@ -475,15 +476,15 @@ static enum lru_status shadow_lru_isolat +@@ -469,15 +470,15 @@ static enum lru_status shadow_lru_isolat goto out_invalid; inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM); __radix_tree_delete_node(&mapping->page_tree, node, -- workingset_update_node, mapping); -+ __workingset_update_node, mapping); +- workingset_lookup_update(mapping)); ++ __workingset_lookup_update(mapping)); out_invalid: spin_unlock(&mapping->tree_lock); @@ -151,7 +179,7 @@ Signed-off-by: Sebastian Andrzej Siewior spin_lock(lru_lock); return ret; } -@@ -494,9 +495,9 @@ static unsigned long scan_shadow_nodes(s +@@ -488,9 +489,9 @@ static unsigned long scan_shadow_nodes(s unsigned long ret; /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ @@ -164,7 +192,7 @@ Signed-off-by: Sebastian Andrzej Siewior return ret; } -@@ -534,7 +535,7 @@ static int __init workingset_init(void) +@@ -528,7 +529,7 @@ static int __init workingset_init(void) pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", timestamp_bits, max_order, bucket_order); @@ -173,7 +201,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (ret) goto err; ret = register_shrinker(&workingset_shadow_shrinker); -@@ -542,7 +543,7 @@ static int __init workingset_init(void) +@@ -536,7 +537,7 @@ static int __init workingset_init(void) goto err_list_lru; return 0; err_list_lru: diff --git a/debian/patches/features/all/rt/mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch b/debian/patches/features/all/rt/mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch index 5fc8f4d82..84d2bfb2c 100644 --- a/debian/patches/features/all/rt/mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch +++ b/debian/patches/features/all/rt/mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch @@ -1,7 +1,7 @@ From: Mike Galbraith Date: Tue, 22 Mar 2016 11:16:09 +0100 Subject: [PATCH] mm/zsmalloc: copy with get_cpu_var() and locking -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz get_cpu_var() disables preemption and triggers a might_sleep() splat later. This is replaced with get_locked_var(). @@ -18,15 +18,15 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c -@@ -53,6 +53,7 @@ - #include +@@ -55,6 +55,7 @@ #include #include + #include +#include #define ZSPAGE_MAGIC 0x58 -@@ -70,9 +71,22 @@ +@@ -72,9 +73,22 @@ */ #define ZS_MAX_ZSPAGE_ORDER 2 #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER) @@ -50,7 +50,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Object location (, ) is encoded as * as single (unsigned long) handle value. -@@ -320,7 +334,7 @@ static void SetZsPageMovable(struct zs_p +@@ -318,7 +332,7 @@ static void SetZsPageMovable(struct zs_p static int create_cache(struct zs_pool *pool) { @@ -59,7 +59,7 @@ Signed-off-by: Sebastian Andrzej Siewior 0, 0, NULL); if (!pool->handle_cachep) return 1; -@@ -344,10 +358,27 @@ static void destroy_cache(struct zs_pool +@@ -342,10 +356,27 @@ static void destroy_cache(struct zs_pool static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp) { @@ -89,7 +89,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void cache_free_handle(struct zs_pool *pool, unsigned long handle) { kmem_cache_free(pool->handle_cachep, (void *)handle); -@@ -366,12 +397,18 @@ static void cache_free_zspage(struct zs_ +@@ -364,12 +395,18 @@ static void cache_free_zspage(struct zs_ static void record_obj(unsigned long handle, unsigned long obj) { @@ -108,7 +108,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /* zpool driver */ -@@ -460,6 +497,7 @@ MODULE_ALIAS("zpool-zsmalloc"); +@@ -451,6 +488,7 @@ MODULE_ALIAS("zpool-zsmalloc"); /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ static DEFINE_PER_CPU(struct mapping_area, zs_map_area); @@ -116,7 +116,7 @@ Signed-off-by: Sebastian Andrzej Siewior static bool is_zspage_isolated(struct zspage *zspage) { -@@ -898,7 +936,13 @@ static unsigned long location_to_obj(str +@@ -889,7 +927,13 @@ static unsigned long location_to_obj(str static unsigned long handle_to_obj(unsigned long handle) { @@ -130,7 +130,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static unsigned long obj_to_head(struct page *page, void *obj) -@@ -912,22 +956,46 @@ static unsigned long obj_to_head(struct +@@ -903,22 +947,46 @@ static unsigned long obj_to_head(struct static inline int testpin_tag(unsigned long handle) { @@ -177,7 +177,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static void reset_page(struct page *page) -@@ -1365,7 +1433,7 @@ void *zs_map_object(struct zs_pool *pool +@@ -1356,7 +1424,7 @@ void *zs_map_object(struct zs_pool *pool class = pool->size_class[class_idx]; off = (class->size * obj_idx) & ~PAGE_MASK; @@ -186,7 +186,7 @@ Signed-off-by: Sebastian Andrzej Siewior area->vm_mm = mm; if (off + class->size <= PAGE_SIZE) { /* this object is contained entirely within a page */ -@@ -1419,7 +1487,7 @@ void zs_unmap_object(struct zs_pool *poo +@@ -1410,7 +1478,7 @@ void zs_unmap_object(struct zs_pool *poo __zs_unmap_object(area, pages, off, class->size); } diff --git a/debian/patches/features/all/rt/mmci-remove-bogus-irq-save.patch b/debian/patches/features/all/rt/mmci-remove-bogus-irq-save.patch index b5bb43299..87446c83d 100644 --- a/debian/patches/features/all/rt/mmci-remove-bogus-irq-save.patch +++ b/debian/patches/features/all/rt/mmci-remove-bogus-irq-save.patch @@ -1,7 +1,9 @@ Subject: mmci: Remove bogus local_irq_save() From: Thomas Gleixner Date: Wed, 09 Jan 2013 12:11:12 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +upstream commit 99d02d6cd5610711d91f286bb67a57142028e9e6 On !RT interrupt runs with interrupts disabled. On RT it's in a thread, so no need to disable interrupts at all. @@ -13,7 +15,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c -@@ -1200,15 +1200,12 @@ static irqreturn_t mmci_pio_irq(int irq, +@@ -1253,15 +1253,12 @@ static irqreturn_t mmci_pio_irq(int irq, struct sg_mapping_iter *sg_miter = &host->sg_miter; struct variant_data *variant = host->variant; void __iomem *base = host->base; @@ -29,7 +31,7 @@ Signed-off-by: Thomas Gleixner do { unsigned int remain, len; char *buffer; -@@ -1248,8 +1245,6 @@ static irqreturn_t mmci_pio_irq(int irq, +@@ -1301,8 +1298,6 @@ static irqreturn_t mmci_pio_irq(int irq, sg_miter_stop(sg_miter); diff --git a/debian/patches/features/all/rt/mutex-no-spin-on-rt.patch b/debian/patches/features/all/rt/mutex-no-spin-on-rt.patch index 35029f99a..04bcaf695 100644 --- a/debian/patches/features/all/rt/mutex-no-spin-on-rt.patch +++ b/debian/patches/features/all/rt/mutex-no-spin-on-rt.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Sun, 17 Jul 2011 21:51:45 +0200 Subject: locking: Disable spin on owner for RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Drop spin on owner for mutex / rwsem. We are most likely not using it but… diff --git a/debian/patches/features/all/rt/net-3com-3c59x-Move-boomerang-vortex-conditional-int.patch b/debian/patches/features/all/rt/net-3com-3c59x-Move-boomerang-vortex-conditional-int.patch new file mode 100644 index 000000000..de3974af5 --- /dev/null +++ b/debian/patches/features/all/rt/net-3com-3c59x-Move-boomerang-vortex-conditional-int.patch @@ -0,0 +1,109 @@ +From: Anna-Maria Gleixner +Date: Thu, 12 Apr 2018 18:36:14 +0200 +Subject: [PATCH] net: 3com: 3c59x: Move boomerang/vortex conditional into + function +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +If vp->full_bus_master_tx is set, vp->full_bus_master_rx is set as well +(see vortex_probe1()). Therefore the conditionals for the decision if +boomerang or vortex ISR is executed have the same result. Instead of +repeating the explicit conditional execution of the boomerang/vortex ISR, +move it into an own function. + +No functional change. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/net/ethernet/3com/3c59x.c | 34 ++++++++++++++++++++-------------- + 1 file changed, 20 insertions(+), 14 deletions(-) + +--- a/drivers/net/ethernet/3com/3c59x.c ++++ b/drivers/net/ethernet/3com/3c59x.c +@@ -765,8 +765,9 @@ static netdev_tx_t boomerang_start_xmit( + struct net_device *dev); + static int vortex_rx(struct net_device *dev); + static int boomerang_rx(struct net_device *dev); +-static irqreturn_t vortex_interrupt(int irq, void *dev_id); +-static irqreturn_t boomerang_interrupt(int irq, void *dev_id); ++static irqreturn_t vortex_boomerang_interrupt(int irq, void *dev_id); ++static irqreturn_t _vortex_interrupt(int irq, struct net_device *dev); ++static irqreturn_t _boomerang_interrupt(int irq, struct net_device *dev); + static int vortex_close(struct net_device *dev); + static void dump_tx_ring(struct net_device *dev); + static void update_stats(void __iomem *ioaddr, struct net_device *dev); +@@ -838,10 +839,9 @@ MODULE_PARM_DESC(use_mmio, "3c59x: use m + #ifdef CONFIG_NET_POLL_CONTROLLER + static void poll_vortex(struct net_device *dev) + { +- struct vortex_private *vp = netdev_priv(dev); + unsigned long flags; + local_irq_save(flags); +- (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev); ++ vortex_boomerang_interrupt(dev->irq, dev); + local_irq_restore(flags); + } + #endif +@@ -1729,8 +1729,7 @@ vortex_open(struct net_device *dev) + dma_addr_t dma; + + /* Use the now-standard shared IRQ implementation. */ +- if ((retval = request_irq(dev->irq, vp->full_bus_master_rx ? +- boomerang_interrupt : vortex_interrupt, IRQF_SHARED, dev->name, dev))) { ++ if ((retval = request_irq(dev->irq, vortex_boomerang_interrupt, IRQF_SHARED, dev->name, dev))) { + pr_err("%s: Could not reserve IRQ %d\n", dev->name, dev->irq); + goto err; + } +@@ -1911,10 +1910,7 @@ static void vortex_tx_timeout(struct net + */ + unsigned long flags; + local_irq_save(flags); +- if (vp->full_bus_master_tx) +- boomerang_interrupt(dev->irq, dev); +- else +- vortex_interrupt(dev->irq, dev); ++ vortex_boomerang_interrupt(dev->irq, dev); + local_irq_restore(flags); + } + } +@@ -2267,9 +2263,8 @@ boomerang_start_xmit(struct sk_buff *skb + */ + + static irqreturn_t +-vortex_interrupt(int irq, void *dev_id) ++_vortex_interrupt(int irq, struct net_device *dev) + { +- struct net_device *dev = dev_id; + struct vortex_private *vp = netdev_priv(dev); + void __iomem *ioaddr; + int status; +@@ -2386,9 +2381,8 @@ vortex_interrupt(int irq, void *dev_id) + */ + + static irqreturn_t +-boomerang_interrupt(int irq, void *dev_id) ++_boomerang_interrupt(int irq, struct net_device *dev) + { +- struct net_device *dev = dev_id; + struct vortex_private *vp = netdev_priv(dev); + void __iomem *ioaddr; + int status; +@@ -2526,6 +2520,18 @@ boomerang_interrupt(int irq, void *dev_i + return IRQ_RETVAL(handled); + } + ++static irqreturn_t ++vortex_boomerang_interrupt(int irq, void *dev_id) ++{ ++ struct net_device *dev = dev_id; ++ struct vortex_private *vp = netdev_priv(dev); ++ ++ if (vp->full_bus_master_rx) ++ return _boomerang_interrupt(dev->irq, dev); ++ else ++ return _vortex_interrupt(dev->irq, dev); ++} ++ + static int vortex_rx(struct net_device *dev) + { + struct vortex_private *vp = netdev_priv(dev); diff --git a/debian/patches/features/all/rt/net-3com-3c59x-Pull-locking-out-of-ISR.patch b/debian/patches/features/all/rt/net-3com-3c59x-Pull-locking-out-of-ISR.patch new file mode 100644 index 000000000..56772c1dc --- /dev/null +++ b/debian/patches/features/all/rt/net-3com-3c59x-Pull-locking-out-of-ISR.patch @@ -0,0 +1,77 @@ +From: Anna-Maria Gleixner +Date: Thu, 12 Apr 2018 18:36:15 +0200 +Subject: [PATCH] net: 3com: 3c59x: Pull locking out of ISR +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Locking is done in the same way in _vortex_interrupt() and +_boomerang_interrupt(). To prevent duplication, move the locking into the +calling vortex_boomerang_interrupt() function. + +No functional change. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/net/ethernet/3com/3c59x.c | 20 +++++++++----------- + 1 file changed, 9 insertions(+), 11 deletions(-) + +--- a/drivers/net/ethernet/3com/3c59x.c ++++ b/drivers/net/ethernet/3com/3c59x.c +@@ -2273,7 +2273,6 @@ static irqreturn_t + unsigned int bytes_compl = 0, pkts_compl = 0; + + ioaddr = vp->ioaddr; +- spin_lock(&vp->lock); + + status = ioread16(ioaddr + EL3_STATUS); + +@@ -2371,7 +2370,6 @@ static irqreturn_t + pr_debug("%s: exiting interrupt, status %4.4x.\n", + dev->name, status); + handler_exit: +- spin_unlock(&vp->lock); + return IRQ_RETVAL(handled); + } + +@@ -2392,12 +2390,6 @@ static irqreturn_t + + ioaddr = vp->ioaddr; + +- +- /* +- * It seems dopey to put the spinlock this early, but we could race against vortex_tx_timeout +- * and boomerang_start_xmit +- */ +- spin_lock(&vp->lock); + vp->handling_irq = 1; + + status = ioread16(ioaddr + EL3_STATUS); +@@ -2516,7 +2508,6 @@ static irqreturn_t + dev->name, status); + handler_exit: + vp->handling_irq = 0; +- spin_unlock(&vp->lock); + return IRQ_RETVAL(handled); + } + +@@ -2525,11 +2516,18 @@ vortex_boomerang_interrupt(int irq, void + { + struct net_device *dev = dev_id; + struct vortex_private *vp = netdev_priv(dev); ++ irqreturn_t ret; ++ ++ spin_lock(&vp->lock); + + if (vp->full_bus_master_rx) +- return _boomerang_interrupt(dev->irq, dev); ++ ret = _boomerang_interrupt(dev->irq, dev); + else +- return _vortex_interrupt(dev->irq, dev); ++ ret = _vortex_interrupt(dev->irq, dev); ++ ++ spin_unlock(&vp->lock); ++ ++ return ret; + } + + static int vortex_rx(struct net_device *dev) diff --git a/debian/patches/features/all/rt/net-3com-3c59x-irq-save-variant-of-ISR.patch b/debian/patches/features/all/rt/net-3com-3c59x-irq-save-variant-of-ISR.patch new file mode 100644 index 000000000..13b425166 --- /dev/null +++ b/debian/patches/features/all/rt/net-3com-3c59x-irq-save-variant-of-ISR.patch @@ -0,0 +1,69 @@ +From: Anna-Maria Gleixner +Date: Thu, 12 Apr 2018 18:36:16 +0200 +Subject: [PATCH] net: 3com: 3c59x: irq save variant of ISR +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +When vortex_boomerang_interrupt() is invoked from vortex_tx_timeout() or +poll_vortex() interrupts must be disabled. This detaches the interrupt +disable logic from locking which requires patching for PREEMPT_RT. + +The advantage of avoiding spin_lock_irqsave() in the interrupt handler is +minimal, but converting it removes all the extra code for callers which +come not from interrupt context. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/net/ethernet/3com/3c59x.c | 18 ++++-------------- + 1 file changed, 4 insertions(+), 14 deletions(-) + +--- a/drivers/net/ethernet/3com/3c59x.c ++++ b/drivers/net/ethernet/3com/3c59x.c +@@ -839,10 +839,7 @@ MODULE_PARM_DESC(use_mmio, "3c59x: use m + #ifdef CONFIG_NET_POLL_CONTROLLER + static void poll_vortex(struct net_device *dev) + { +- unsigned long flags; +- local_irq_save(flags); + vortex_boomerang_interrupt(dev->irq, dev); +- local_irq_restore(flags); + } + #endif + +@@ -1904,15 +1901,7 @@ static void vortex_tx_timeout(struct net + pr_err("%s: Interrupt posted but not delivered --" + " IRQ blocked by another device?\n", dev->name); + /* Bad idea here.. but we might as well handle a few events. */ +- { +- /* +- * Block interrupts because vortex_interrupt does a bare spin_lock() +- */ +- unsigned long flags; +- local_irq_save(flags); +- vortex_boomerang_interrupt(dev->irq, dev); +- local_irq_restore(flags); +- } ++ vortex_boomerang_interrupt(dev->irq, dev); + } + + if (vortex_debug > 0) +@@ -2516,16 +2505,17 @@ vortex_boomerang_interrupt(int irq, void + { + struct net_device *dev = dev_id; + struct vortex_private *vp = netdev_priv(dev); ++ unsigned long flags; + irqreturn_t ret; + +- spin_lock(&vp->lock); ++ spin_lock_irqsave(&vp->lock, flags); + + if (vp->full_bus_master_rx) + ret = _boomerang_interrupt(dev->irq, dev); + else + ret = _vortex_interrupt(dev->irq, dev); + +- spin_unlock(&vp->lock); ++ spin_unlock_irqrestore(&vp->lock, flags); + + return ret; + } diff --git a/debian/patches/features/all/rt/net-Have-__napi_schedule_irqoff-disable-interrupts-o.patch b/debian/patches/features/all/rt/net-Have-__napi_schedule_irqoff-disable-interrupts-o.patch index a646b2ec4..86b149a54 100644 --- a/debian/patches/features/all/rt/net-Have-__napi_schedule_irqoff-disable-interrupts-o.patch +++ b/debian/patches/features/all/rt/net-Have-__napi_schedule_irqoff-disable-interrupts-o.patch @@ -2,7 +2,7 @@ From: Steven Rostedt Date: Tue, 6 Dec 2016 17:50:30 -0500 Subject: [PATCH] net: Have __napi_schedule_irqoff() disable interrupts on RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz A customer hit a crash where the napi sd->poll_list became corrupted. The customer had the bnx2x driver, which does a @@ -29,7 +29,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h -@@ -409,7 +409,19 @@ typedef enum rx_handler_result rx_handle +@@ -410,7 +410,19 @@ typedef enum rx_handler_result rx_handle typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb); void __napi_schedule(struct napi_struct *n); @@ -51,7 +51,7 @@ Signed-off-by: Sebastian Andrzej Siewior { --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -5238,6 +5238,7 @@ bool napi_schedule_prep(struct napi_stru +@@ -5407,6 +5407,7 @@ bool napi_schedule_prep(struct napi_stru } EXPORT_SYMBOL(napi_schedule_prep); @@ -59,7 +59,7 @@ Signed-off-by: Sebastian Andrzej Siewior /** * __napi_schedule_irqoff - schedule for receive * @n: entry to schedule -@@ -5249,6 +5250,7 @@ void __napi_schedule_irqoff(struct napi_ +@@ -5418,6 +5419,7 @@ void __napi_schedule_irqoff(struct napi_ ____napi_schedule(this_cpu_ptr(&softnet_data), n); } EXPORT_SYMBOL(__napi_schedule_irqoff); diff --git a/debian/patches/features/all/rt/net-Qdisc-use-a-seqlock-instead-seqcount.patch b/debian/patches/features/all/rt/net-Qdisc-use-a-seqlock-instead-seqcount.patch index 7d2ffa0d1..69594a046 100644 --- a/debian/patches/features/all/rt/net-Qdisc-use-a-seqlock-instead-seqcount.patch +++ b/debian/patches/features/all/rt/net-Qdisc-use-a-seqlock-instead-seqcount.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 14 Sep 2016 17:36:35 +0200 Subject: [PATCH] net/Qdisc: use a seqlock instead seqcount -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The seqcount disables preemption on -RT while it is held which can't remove. Also we don't want the reader to spin for ages if the writer is @@ -23,7 +23,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h -@@ -482,6 +482,15 @@ static inline void write_seqlock(seqlock +@@ -481,6 +481,15 @@ static inline void write_seqlock(seqlock __raw_write_seqcount_begin(&sl->seqcount); } @@ -63,7 +63,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b); -@@ -57,13 +58,13 @@ int gen_new_estimator(struct gnet_stats_ +@@ -60,13 +61,13 @@ int gen_new_estimator(struct gnet_stats_ struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *stats_lock, @@ -107,8 +107,8 @@ Signed-off-by: Sebastian Andrzej Siewior #include #include #include -@@ -90,7 +91,7 @@ struct Qdisc { - struct sk_buff *gso_skb ____cacheline_aligned_in_smp; +@@ -93,7 +94,7 @@ struct Qdisc { + struct sk_buff_head gso_skb ____cacheline_aligned_in_smp; struct qdisc_skb_head q; struct gnet_stats_basic_packed bstats; - seqcount_t running; @@ -116,7 +116,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct gnet_stats_queue qstats; unsigned long state; struct Qdisc *next_sched; -@@ -109,13 +110,22 @@ static inline void qdisc_refcount_inc(st +@@ -111,13 +112,22 @@ static inline void qdisc_refcount_inc(st refcount_inc(&qdisc->refcnt); } @@ -140,7 +140,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (qdisc_is_running(qdisc)) return false; /* Variant of write_seqcount_begin() telling lockdep a trylock -@@ -124,11 +134,16 @@ static inline bool qdisc_run_begin(struc +@@ -126,11 +136,16 @@ static inline bool qdisc_run_begin(struc raw_write_seqcount_begin(&qdisc->running); seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_); return true; @@ -157,7 +157,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) -@@ -338,7 +353,7 @@ static inline spinlock_t *qdisc_root_sle +@@ -404,7 +419,7 @@ static inline spinlock_t *qdisc_root_sle return qdisc_lock(root); } @@ -177,7 +177,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct gnet_stats_basic_cpu __percpu *cpu_bstats; u8 ewma_log; u8 intvl_log; /* period : (250ms << intvl_log) */ -@@ -128,7 +128,7 @@ int gen_new_estimator(struct gnet_stats_ +@@ -129,7 +129,7 @@ int gen_new_estimator(struct gnet_stats_ struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *stats_lock, @@ -186,7 +186,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct nlattr *opt) { struct gnet_estimator *parm = nla_data(opt); -@@ -217,7 +217,7 @@ int gen_replace_estimator(struct gnet_st +@@ -222,7 +222,7 @@ int gen_replace_estimator(struct gnet_st struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *stats_lock, @@ -230,18 +230,18 @@ Signed-off-by: Sebastian Andrzej Siewior struct gnet_stats_basic_packed *b) --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c -@@ -1081,7 +1081,7 @@ static struct Qdisc *qdisc_create(struct - rcu_assign_pointer(sch->stab, stab); - } - if (tca[TCA_RATE]) { -- seqcount_t *running; -+ net_seqlock_t *running; +@@ -1154,7 +1154,7 @@ static struct Qdisc *qdisc_create(struct + rcu_assign_pointer(sch->stab, stab); + } + if (tca[TCA_RATE]) { +- seqcount_t *running; ++ net_seqlock_t *running; - err = -EOPNOTSUPP; - if (sch->flags & TCQ_F_MQROOT) + err = -EOPNOTSUPP; + if (sch->flags & TCQ_F_MQROOT) { --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c -@@ -429,7 +429,11 @@ struct Qdisc noop_qdisc = { +@@ -582,7 +582,11 @@ struct Qdisc noop_qdisc = { .ops = &noop_qdisc_ops, .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, @@ -253,7 +253,7 @@ Signed-off-by: Sebastian Andrzej Siewior .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), }; EXPORT_SYMBOL(noop_qdisc); -@@ -628,9 +632,17 @@ struct Qdisc *qdisc_alloc(struct netdev_ +@@ -867,9 +871,17 @@ struct Qdisc *qdisc_alloc(struct netdev_ lockdep_set_class(&sch->busylock, dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); @@ -270,4 +270,4 @@ Signed-off-by: Sebastian Andrzej Siewior +#endif sch->ops = ops; - sch->enqueue = ops->enqueue; + sch->flags = ops->static_flags; diff --git a/debian/patches/features/all/rt/net-add-a-lock-around-icmp_sk.patch b/debian/patches/features/all/rt/net-add-a-lock-around-icmp_sk.patch index ab0356900..47e29d617 100644 --- a/debian/patches/features/all/rt/net-add-a-lock-around-icmp_sk.patch +++ b/debian/patches/features/all/rt/net-add-a-lock-around-icmp_sk.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 31 Aug 2016 17:54:09 +0200 Subject: [PATCH] net: add a lock around icmp_sk() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz It looks like the this_cpu_ptr() access in icmp_sk() is protected with local_bh_disable(). To avoid missing serialization in -RT I am adding diff --git a/debian/patches/features/all/rt/net-add-back-the-missing-serialization-in-ip_send_un.patch b/debian/patches/features/all/rt/net-add-back-the-missing-serialization-in-ip_send_un.patch index e79cf4d58..b8e516208 100644 --- a/debian/patches/features/all/rt/net-add-back-the-missing-serialization-in-ip_send_un.patch +++ b/debian/patches/features/all/rt/net-add-back-the-missing-serialization-in-ip_send_un.patch @@ -5,7 +5,7 @@ Subject: [PATCH] net: add back the missing serialization in MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Some time ago Sami Pietikäinen reported a crash on -RT in ip_send_unicast_reply() which was later fixed by Nicholas Mc Guire @@ -51,7 +51,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include #include -@@ -580,6 +581,7 @@ void tcp_v4_send_check(struct sock *sk, +@@ -582,6 +583,7 @@ void tcp_v4_send_check(struct sock *sk, } EXPORT_SYMBOL(tcp_v4_send_check); @@ -59,7 +59,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * This routine will send an RST to the other tcp. * -@@ -709,6 +711,7 @@ static void tcp_v4_send_reset(const stru +@@ -714,6 +716,7 @@ static void tcp_v4_send_reset(const stru arg.tos = ip_hdr(skb)->tos; arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); @@ -67,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, -@@ -718,6 +721,7 @@ static void tcp_v4_send_reset(const stru +@@ -723,6 +726,7 @@ static void tcp_v4_send_reset(const stru __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); local_bh_enable(); @@ -75,7 +75,7 @@ Signed-off-by: Sebastian Andrzej Siewior #ifdef CONFIG_TCP_MD5SIG out: -@@ -795,6 +799,7 @@ static void tcp_v4_send_ack(const struct +@@ -800,6 +804,7 @@ static void tcp_v4_send_ack(const struct arg.bound_dev_if = oif; arg.tos = tos; arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); @@ -83,7 +83,7 @@ Signed-off-by: Sebastian Andrzej Siewior local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, -@@ -803,6 +808,7 @@ static void tcp_v4_send_ack(const struct +@@ -808,6 +813,7 @@ static void tcp_v4_send_ack(const struct __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); local_bh_enable(); diff --git a/debian/patches/features/all/rt/net-another-local-irq-disable-alloc-atomic-headache.patch b/debian/patches/features/all/rt/net-another-local-irq-disable-alloc-atomic-headache.patch index a70471407..a8b5a2430 100644 --- a/debian/patches/features/all/rt/net-another-local-irq-disable-alloc-atomic-headache.patch +++ b/debian/patches/features/all/rt/net-another-local-irq-disable-alloc-atomic-headache.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Wed, 26 Sep 2012 16:21:08 +0200 Subject: net: Another local_irq_disable/kmalloc headache -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Replace it by a local lock. Though that's pretty inefficient :( @@ -12,7 +12,7 @@ Signed-off-by: Thomas Gleixner --- a/net/core/skbuff.c +++ b/net/core/skbuff.c -@@ -64,6 +64,7 @@ +@@ -63,6 +63,7 @@ #include #include #include @@ -20,7 +20,7 @@ Signed-off-by: Thomas Gleixner #include #include -@@ -334,6 +335,7 @@ struct napi_alloc_cache { +@@ -330,6 +331,7 @@ struct napi_alloc_cache { static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); @@ -28,7 +28,7 @@ Signed-off-by: Thomas Gleixner static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { -@@ -341,10 +343,10 @@ static void *__netdev_alloc_frag(unsigne +@@ -337,10 +339,10 @@ static void *__netdev_alloc_frag(unsigne unsigned long flags; void *data; @@ -41,7 +41,7 @@ Signed-off-by: Thomas Gleixner return data; } -@@ -412,13 +414,13 @@ struct sk_buff *__netdev_alloc_skb(struc +@@ -408,13 +410,13 @@ struct sk_buff *__netdev_alloc_skb(struc if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; diff --git a/debian/patches/features/all/rt/net-core-cpuhotplug-drain-input_pkt_queue-lockless.patch b/debian/patches/features/all/rt/net-core-cpuhotplug-drain-input_pkt_queue-lockless.patch index ec0ce0776..f7915e4d2 100644 --- a/debian/patches/features/all/rt/net-core-cpuhotplug-drain-input_pkt_queue-lockless.patch +++ b/debian/patches/features/all/rt/net-core-cpuhotplug-drain-input_pkt_queue-lockless.patch @@ -1,7 +1,7 @@ Subject: net/core/cpuhotplug: Drain input_pkt_queue lockless From: Grygorii Strashko Date: Fri, 9 Oct 2015 09:25:49 -0500 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz I can constantly see below error report with 4.1 RT-kernel on TI ARM dra7-evm if I'm trying to unplug cpu1: @@ -36,7 +36,7 @@ Cc: stable-rt@vger.kernel.org --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -8423,7 +8423,7 @@ static int dev_cpu_dead(unsigned int old +@@ -8708,7 +8708,7 @@ static int dev_cpu_dead(unsigned int old netif_rx_ni(skb); input_queue_head_incr(oldsd); } diff --git a/debian/patches/features/all/rt/net-core-protect-users-of-napi_alloc_cache-against-r.patch b/debian/patches/features/all/rt/net-core-protect-users-of-napi_alloc_cache-against-r.patch index 43cba43c8..d1d957eb7 100644 --- a/debian/patches/features/all/rt/net-core-protect-users-of-napi_alloc_cache-against-r.patch +++ b/debian/patches/features/all/rt/net-core-protect-users-of-napi_alloc_cache-against-r.patch @@ -2,7 +2,7 @@ From: Sebastian Andrzej Siewior Date: Fri, 15 Jan 2016 16:33:34 +0100 Subject: net/core: protect users of napi_alloc_cache against reentrance -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz On -RT the code running in BH can not be moved to another CPU so CPU local variable remain local. However the code can be preempted @@ -18,7 +18,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/net/core/skbuff.c +++ b/net/core/skbuff.c -@@ -336,6 +336,7 @@ struct napi_alloc_cache { +@@ -332,6 +332,7 @@ struct napi_alloc_cache { static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock); @@ -26,7 +26,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { -@@ -365,9 +366,13 @@ EXPORT_SYMBOL(netdev_alloc_frag); +@@ -361,9 +362,13 @@ EXPORT_SYMBOL(netdev_alloc_frag); static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { @@ -42,7 +42,7 @@ Signed-off-by: Sebastian Andrzej Siewior } void *napi_alloc_frag(unsigned int fragsz) -@@ -461,9 +466,10 @@ EXPORT_SYMBOL(__netdev_alloc_skb); +@@ -457,9 +462,10 @@ EXPORT_SYMBOL(__netdev_alloc_skb); struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, gfp_t gfp_mask) { @@ -54,7 +54,7 @@ Signed-off-by: Sebastian Andrzej Siewior len += NET_SKB_PAD + NET_IP_ALIGN; -@@ -481,7 +487,10 @@ struct sk_buff *__napi_alloc_skb(struct +@@ -477,7 +483,10 @@ struct sk_buff *__napi_alloc_skb(struct if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; @@ -65,7 +65,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (unlikely(!data)) return NULL; -@@ -492,7 +501,7 @@ struct sk_buff *__napi_alloc_skb(struct +@@ -488,7 +497,7 @@ struct sk_buff *__napi_alloc_skb(struct } /* use OR instead of assignment to avoid clearing of bits in mask */ @@ -74,7 +74,7 @@ Signed-off-by: Sebastian Andrzej Siewior skb->pfmemalloc = 1; skb->head_frag = 1; -@@ -724,23 +733,26 @@ void __consume_stateless_skb(struct sk_b +@@ -720,23 +729,26 @@ void __consume_stateless_skb(struct sk_b void __kfree_skb_flush(void) { @@ -103,7 +103,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* record skb to CPU local list */ nc->skb_cache[nc->skb_count++] = skb; -@@ -755,6 +767,7 @@ static inline void _kfree_skb_defer(stru +@@ -751,6 +763,7 @@ static inline void _kfree_skb_defer(stru nc->skb_cache); nc->skb_count = 0; } diff --git a/debian/patches/features/all/rt/net-core-use-local_bh_disable-in-netif_rx_ni.patch b/debian/patches/features/all/rt/net-core-use-local_bh_disable-in-netif_rx_ni.patch new file mode 100644 index 000000000..0dbf1b592 --- /dev/null +++ b/debian/patches/features/all/rt/net-core-use-local_bh_disable-in-netif_rx_ni.patch @@ -0,0 +1,35 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 16 Jun 2017 19:03:16 +0200 +Subject: [PATCH] net/core: use local_bh_disable() in netif_rx_ni() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +In 2004 netif_rx_ni() gained a preempt_disable() section around +netif_rx() and its do_softirq() + testing for it. The do_softirq() part +is required because netif_rx() raises the softirq but does not invoke +it. The preempt_disable() is required to remain on the same CPU which added the +skb to the per-CPU list. +All this can be avoided be putting this into a local_bh_disable()ed +section. The local_bh_enable() part will invoke do_softirq() if +required. + +Signed-off-by: Sebastian Andrzej Siewior +--- + net/core/dev.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -4189,11 +4189,9 @@ int netif_rx_ni(struct sk_buff *skb) + + trace_netif_rx_ni_entry(skb); + +- preempt_disable(); ++ local_bh_disable(); + err = netif_rx_internal(skb); +- if (local_softirq_pending()) +- do_softirq(); +- preempt_enable(); ++ local_bh_enable(); + + return err; + } diff --git a/debian/patches/features/all/rt/net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch b/debian/patches/features/all/rt/net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch index 91de2cbae..7e0d40d22 100644 --- a/debian/patches/features/all/rt/net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch +++ b/debian/patches/features/all/rt/net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 30 Mar 2016 13:36:29 +0200 Subject: [PATCH] net: dev: always take qdisc's busylock in __dev_xmit_skb() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The root-lock is dropped before dev_hard_start_xmit() is invoked and after setting the __QDISC___STATE_RUNNING bit. If this task is now pushed away @@ -21,7 +21,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -3159,7 +3159,11 @@ static inline int __dev_xmit_skb(struct +@@ -3230,7 +3230,11 @@ static inline int __dev_xmit_skb(struct * This permits qdisc->running owner to get the lock more * often and dequeue packets faster. */ diff --git a/debian/patches/features/all/rt/net-fix-iptable-xt-write-recseq-begin-rt-fallout.patch b/debian/patches/features/all/rt/net-fix-iptable-xt-write-recseq-begin-rt-fallout.patch index 7a4a247bd..7bf5fe72d 100644 --- a/debian/patches/features/all/rt/net-fix-iptable-xt-write-recseq-begin-rt-fallout.patch +++ b/debian/patches/features/all/rt/net-fix-iptable-xt-write-recseq-begin-rt-fallout.patch @@ -1,7 +1,7 @@ Subject: net: netfilter: Serialize xt_write_recseq sections on RT From: Thomas Gleixner Date: Sun, 28 Oct 2012 11:18:08 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The netfilter code relies only on the implicit semantics of local_bh_disable() for serializing wt_write_recseq sections. RT breaks @@ -25,7 +25,7 @@ Signed-off-by: Thomas Gleixner #include /* Test a struct->invflags and a boolean for inequality */ -@@ -338,6 +339,8 @@ void xt_free_table_info(struct xt_table_ +@@ -343,6 +344,8 @@ void xt_free_table_info(struct xt_table_ */ DECLARE_PER_CPU(seqcount_t, xt_recseq); @@ -34,7 +34,7 @@ Signed-off-by: Thomas Gleixner /* xt_tee_enabled - true if x_tables needs to handle reentrancy * * Enabled if current ip(6)tables ruleset has at least one -j TEE rule. -@@ -358,6 +361,9 @@ static inline unsigned int xt_write_recs +@@ -363,6 +366,9 @@ static inline unsigned int xt_write_recs { unsigned int addend; @@ -44,7 +44,7 @@ Signed-off-by: Thomas Gleixner /* * Low order bit of sequence is set if we already * called xt_write_recseq_begin(). -@@ -388,6 +394,7 @@ static inline void xt_write_recseq_end(u +@@ -393,6 +399,7 @@ static inline void xt_write_recseq_end(u /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */ smp_wmb(); __this_cpu_add(xt_recseq.sequence, addend); @@ -54,7 +54,7 @@ Signed-off-by: Thomas Gleixner /* --- a/net/netfilter/core.c +++ b/net/netfilter/core.c -@@ -21,6 +21,7 @@ +@@ -20,6 +20,7 @@ #include #include #include @@ -62,7 +62,7 @@ Signed-off-by: Thomas Gleixner #include #include #include -@@ -28,6 +29,11 @@ +@@ -27,6 +28,11 @@ #include "nf_internals.h" @@ -71,6 +71,6 @@ Signed-off-by: Thomas Gleixner +EXPORT_PER_CPU_SYMBOL(xt_write_lock); +#endif + - static DEFINE_MUTEX(afinfo_mutex); + const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly; + EXPORT_SYMBOL_GPL(nf_ipv6_ops); - const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; diff --git a/debian/patches/features/all/rt/net-mac808211-rc-warn_on.patch b/debian/patches/features/all/rt/net-mac808211-rc-warn_on.patch new file mode 100644 index 000000000..d3b3407df --- /dev/null +++ b/debian/patches/features/all/rt/net-mac808211-rc-warn_on.patch @@ -0,0 +1,54 @@ +From: Anna-Maria Gleixner +Date: Tue, 10 Apr 2018 11:37:12 +0200 +Subject: [PATCH] net: mac808211: mac802154: use lockdep_assert_in_softirq() instead own warning +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +The warning in ieee802154_rx() and ieee80211_rx_napi() is there to ensure +the softirq context for the subsequent netif_receive_skb() call. The check +could be moved into the netif_receive_skb() function to prevent all calling +functions implement the checks on their own. Use the lockdep variant for +softirq context check. While at it, add a lockdep based check for irq +enabled as mentioned in the comment above netif_receive_skb(). + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + net/core/dev.c | 3 +++ + net/mac80211/rx.c | 2 -- + net/mac802154/rx.c | 2 -- + 3 files changed, 3 insertions(+), 4 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -4720,6 +4720,9 @@ static int netif_receive_skb_internal(st + */ + int netif_receive_skb(struct sk_buff *skb) + { ++ lockdep_assert_irqs_enabled(); ++ lockdep_assert_in_softirq(); ++ + trace_netif_receive_skb_entry(skb); + + return netif_receive_skb_internal(skb); +--- a/net/mac80211/rx.c ++++ b/net/mac80211/rx.c +@@ -4245,8 +4245,6 @@ void ieee80211_rx_napi(struct ieee80211_ + struct ieee80211_supported_band *sband; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + +- WARN_ON_ONCE(softirq_count() == 0); +- + if (WARN_ON(status->band >= NUM_NL80211_BANDS)) + goto drop; + +--- a/net/mac802154/rx.c ++++ b/net/mac802154/rx.c +@@ -258,8 +258,6 @@ void ieee802154_rx(struct ieee802154_loc + { + u16 crc; + +- WARN_ON_ONCE(softirq_count() == 0); +- + if (local->suspended) + goto drop; + diff --git a/debian/patches/features/all/rt/net-make-devnet_rename_seq-a-mutex.patch b/debian/patches/features/all/rt/net-make-devnet_rename_seq-a-mutex.patch index b5ed0398e..5ad3f8aae 100644 --- a/debian/patches/features/all/rt/net-make-devnet_rename_seq-a-mutex.patch +++ b/debian/patches/features/all/rt/net-make-devnet_rename_seq-a-mutex.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 20 Mar 2013 18:06:20 +0100 Subject: net: Add a mutex around devnet_rename_seq -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz On RT write_seqcount_begin() disables preemption and device_rename() allocates memory with GFP_KERNEL and grabs later the sysfs_mutex @@ -22,7 +22,7 @@ Signed-off-by: Thomas Gleixner --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -195,6 +195,7 @@ static unsigned int napi_gen_id = NR_CPU +@@ -197,6 +197,7 @@ static unsigned int napi_gen_id = NR_CPU static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8); static seqcount_t devnet_rename_seq; @@ -30,7 +30,7 @@ Signed-off-by: Thomas Gleixner static inline void dev_base_seq_inc(struct net *net) { -@@ -920,7 +921,8 @@ int netdev_get_name(struct net *net, cha +@@ -922,7 +923,8 @@ int netdev_get_name(struct net *net, cha strcpy(name, dev->name); rcu_read_unlock(); if (read_seqcount_retry(&devnet_rename_seq, seq)) { @@ -40,7 +40,7 @@ Signed-off-by: Thomas Gleixner goto retry; } -@@ -1189,20 +1191,17 @@ int dev_change_name(struct net_device *d +@@ -1185,20 +1187,17 @@ int dev_change_name(struct net_device *d if (dev->flags & IFF_UP) return -EBUSY; @@ -67,7 +67,7 @@ Signed-off-by: Thomas Gleixner if (oldname[0] && !strchr(oldname, '%')) netdev_info(dev, "renamed from %s\n", oldname); -@@ -1215,11 +1214,12 @@ int dev_change_name(struct net_device *d +@@ -1211,11 +1210,12 @@ int dev_change_name(struct net_device *d if (ret) { memcpy(dev->name, oldname, IFNAMSIZ); dev->name_assign_type = old_assign_type; @@ -83,7 +83,7 @@ Signed-off-by: Thomas Gleixner netdev_adjacent_rename_links(dev, oldname); -@@ -1240,7 +1240,8 @@ int dev_change_name(struct net_device *d +@@ -1236,7 +1236,8 @@ int dev_change_name(struct net_device *d /* err >= 0 after dev_alloc_name() or stores the first errno */ if (err >= 0) { err = ret; @@ -93,7 +93,7 @@ Signed-off-by: Thomas Gleixner memcpy(dev->name, oldname, IFNAMSIZ); memcpy(oldname, newname, IFNAMSIZ); dev->name_assign_type = old_assign_type; -@@ -1253,6 +1254,11 @@ int dev_change_name(struct net_device *d +@@ -1249,6 +1250,11 @@ int dev_change_name(struct net_device *d } return err; diff --git a/debian/patches/features/all/rt/net-move-xmit_recursion-to-per-task-variable-on-RT.patch b/debian/patches/features/all/rt/net-move-xmit_recursion-to-per-task-variable-on-RT.patch index 0e1066355..a60c9f557 100644 --- a/debian/patches/features/all/rt/net-move-xmit_recursion-to-per-task-variable-on-RT.patch +++ b/debian/patches/features/all/rt/net-move-xmit_recursion-to-per-task-variable-on-RT.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 13 Jan 2016 15:55:02 +0100 Subject: net: move xmit_recursion to per-task variable on -RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz A softirq on -RT can be preempted. That means one task is in __dev_queue_xmit(), gets preempted and another task may enter @@ -24,7 +24,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h -@@ -2433,14 +2433,53 @@ void netdev_freemem(struct net_device *d +@@ -2479,14 +2479,53 @@ void netdev_freemem(struct net_device *d void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); @@ -81,7 +81,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1107,6 +1107,9 @@ struct task_struct { +@@ -1116,6 +1116,9 @@ struct task_struct { #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; #endif @@ -93,7 +93,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct task_struct *oom_reaper_list; --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -3222,8 +3222,10 @@ static void skb_update_prio(struct sk_bu +@@ -3302,8 +3302,10 @@ static void skb_update_prio(struct sk_bu #define skb_update_prio(skb) #endif @@ -104,7 +104,7 @@ Signed-off-by: Sebastian Andrzej Siewior /** * dev_loopback_xmit - loop back @skb -@@ -3464,8 +3466,7 @@ static int __dev_queue_xmit(struct sk_bu +@@ -3544,8 +3546,7 @@ static int __dev_queue_xmit(struct sk_bu int cpu = smp_processor_id(); /* ok because BHs are off */ if (txq->xmit_lock_owner != cpu) { @@ -113,8 +113,8 @@ Signed-off-by: Sebastian Andrzej Siewior + if (unlikely(xmit_rec_read() > XMIT_RECURSION_LIMIT)) goto recursion_alert; - skb = validate_xmit_skb(skb, dev); -@@ -3475,9 +3476,9 @@ static int __dev_queue_xmit(struct sk_bu + skb = validate_xmit_skb(skb, dev, &again); +@@ -3555,9 +3556,9 @@ static int __dev_queue_xmit(struct sk_bu HARD_TX_LOCK(dev, txq, cpu); if (!netif_xmit_stopped(txq)) { @@ -128,7 +128,7 @@ Signed-off-by: Sebastian Andrzej Siewior goto out; --- a/net/core/filter.c +++ b/net/core/filter.c -@@ -1694,7 +1694,7 @@ static inline int __bpf_tx_skb(struct ne +@@ -1704,7 +1704,7 @@ static inline int __bpf_tx_skb(struct ne { int ret; @@ -137,7 +137,7 @@ Signed-off-by: Sebastian Andrzej Siewior net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n"); kfree_skb(skb); return -ENETDOWN; -@@ -1702,9 +1702,9 @@ static inline int __bpf_tx_skb(struct ne +@@ -1712,9 +1712,9 @@ static inline int __bpf_tx_skb(struct ne skb->dev = dev; diff --git a/debian/patches/features/all/rt/net-provide-a-way-to-delegate-processing-a-softirq-t.patch b/debian/patches/features/all/rt/net-provide-a-way-to-delegate-processing-a-softirq-t.patch index 29ae735b8..ff58264b6 100644 --- a/debian/patches/features/all/rt/net-provide-a-way-to-delegate-processing-a-softirq-t.patch +++ b/debian/patches/features/all/rt/net-provide-a-way-to-delegate-processing-a-softirq-t.patch @@ -2,7 +2,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 20 Jan 2016 15:39:05 +0100 Subject: net: provide a way to delegate processing a softirq to ksoftirqd -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz If the NET_RX uses up all of his budget it moves the following NAPI invocations into the `ksoftirqd`. On -RT it does not do so. Instead it @@ -38,7 +38,7 @@ Signed-off-by: Sebastian Andrzej Siewior extern void raise_softirq(unsigned int nr); --- a/kernel/softirq.c +++ b/kernel/softirq.c -@@ -686,6 +686,27 @@ void __raise_softirq_irqoff(unsigned int +@@ -687,6 +687,27 @@ void __raise_softirq_irqoff(unsigned int } /* @@ -68,7 +68,7 @@ Signed-off-by: Sebastian Andrzej Siewior void raise_softirq_irqoff(unsigned int nr) --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -5643,7 +5643,7 @@ static __latent_entropy void net_rx_acti +@@ -5812,7 +5812,7 @@ static __latent_entropy void net_rx_acti list_splice_tail(&repoll, &list); list_splice(&list, &sd->poll_list); if (!list_empty(&sd->poll_list)) diff --git a/debian/patches/features/all/rt/net-sched-dev_deactivate_many-use-msleep-1-instead-o.patch b/debian/patches/features/all/rt/net-sched-dev_deactivate_many-use-msleep-1-instead-o.patch index aea400b65..47f16d43a 100644 --- a/debian/patches/features/all/rt/net-sched-dev_deactivate_many-use-msleep-1-instead-o.patch +++ b/debian/patches/features/all/rt/net-sched-dev_deactivate_many-use-msleep-1-instead-o.patch @@ -1,7 +1,7 @@ From: Marc Kleine-Budde Date: Wed, 5 Mar 2014 00:49:47 +0100 Subject: net: sched: Use msleep() instead of yield() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz On PREEMPT_RT enabled systems the interrupt handler run as threads at prio 50 (by default). If a high priority userspace process tries to shut down a busy @@ -47,12 +47,12 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c -@@ -930,7 +930,7 @@ void dev_deactivate_many(struct list_hea +@@ -1189,7 +1189,7 @@ void dev_deactivate_many(struct list_hea /* Wait for outstanding qdisc_run calls. */ - list_for_each_entry(dev, head, close_list) + list_for_each_entry(dev, head, close_list) { while (some_qdisc_is_busy(dev)) - yield(); + msleep(1); - } - - void dev_deactivate(struct net_device *dev) + /* The new qdisc is assigned at this point so we can safely + * unwind stale skb lists and qdisc statistics + */ diff --git a/debian/patches/features/all/rt/net-take-the-tcp_sk_lock-lock-with-BH-disabled.patch b/debian/patches/features/all/rt/net-take-the-tcp_sk_lock-lock-with-BH-disabled.patch index f07b2ebc4..67c1d45e1 100644 --- a/debian/patches/features/all/rt/net-take-the-tcp_sk_lock-lock-with-BH-disabled.patch +++ b/debian/patches/features/all/rt/net-take-the-tcp_sk_lock-lock-with-BH-disabled.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Mon, 21 Aug 2017 15:09:13 +0200 Subject: [PATCH] net: take the tcp_sk_lock lock with BH disabled -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Lockdep may complain about an unsafe locking scenario: | CPU0 CPU1 @@ -27,7 +27,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c -@@ -711,8 +711,8 @@ static void tcp_v4_send_reset(const stru +@@ -716,8 +716,8 @@ static void tcp_v4_send_reset(const stru arg.tos = ip_hdr(skb)->tos; arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); @@ -37,7 +37,7 @@ Signed-off-by: Sebastian Andrzej Siewior ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, -@@ -720,8 +720,8 @@ static void tcp_v4_send_reset(const stru +@@ -725,8 +725,8 @@ static void tcp_v4_send_reset(const stru __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); @@ -47,7 +47,7 @@ Signed-off-by: Sebastian Andrzej Siewior #ifdef CONFIG_TCP_MD5SIG out: -@@ -799,16 +799,16 @@ static void tcp_v4_send_ack(const struct +@@ -804,16 +804,16 @@ static void tcp_v4_send_ack(const struct arg.bound_dev_if = oif; arg.tos = tos; arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); diff --git a/debian/patches/features/all/rt/net-use-cpu-chill.patch b/debian/patches/features/all/rt/net-use-cpu-chill.patch index ef66bc0e5..0fdd5c349 100644 --- a/debian/patches/features/all/rt/net-use-cpu-chill.patch +++ b/debian/patches/features/all/rt/net-use-cpu-chill.patch @@ -1,7 +1,7 @@ Subject: net: Use cpu_chill() instead of cpu_relax() From: Thomas Gleixner Date: Wed, 07 Mar 2012 21:10:04 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Retry loops on RT might loop forever when the modifying side was preempted. Use cpu_chill() instead of cpu_relax() to let the system @@ -24,7 +24,7 @@ Signed-off-by: Thomas Gleixner #include #include #include -@@ -700,7 +701,7 @@ static void prb_retire_rx_blk_timer_expi +@@ -698,7 +699,7 @@ static void prb_retire_rx_blk_timer_expi if (BLOCK_NUM_PKTS(pbd)) { while (atomic_read(&pkc->blk_fill_in_prog)) { /* Waiting for skb_copy_bits to finish... */ @@ -33,7 +33,7 @@ Signed-off-by: Thomas Gleixner } } -@@ -962,7 +963,7 @@ static void prb_retire_current_block(str +@@ -960,7 +961,7 @@ static void prb_retire_current_block(str if (!(status & TP_STATUS_BLK_TMO)) { while (atomic_read(&pkc->blk_fill_in_prog)) { /* Waiting for skb_copy_bits to finish... */ diff --git a/debian/patches/features/all/rt/net-use-task_struct-instead-of-CPU-number-as-the-que.patch b/debian/patches/features/all/rt/net-use-task_struct-instead-of-CPU-number-as-the-que.patch new file mode 100644 index 000000000..ad52bc3ef --- /dev/null +++ b/debian/patches/features/all/rt/net-use-task_struct-instead-of-CPU-number-as-the-que.patch @@ -0,0 +1,146 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 21 Feb 2018 10:39:54 +0100 +Subject: [PATCH] net: use task_struct instead of CPU number as the queue + owner on -RT +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +In commit ("net: move xmit_recursion to per-task variable on -RT") the +recursion level was changed to be per-task since we can get preempted in +BH on -RT. The lock owner should consequently be recorded as the task +that holds the lock and not the CPU. Otherwise we trigger the "Dead loop +on virtual device" warning on SMP systems. + +Cc: stable-rt@vger.kernel.org +Reported-by: Kurt Kanzenbach +Tested-by: Kurt Kanzenbach +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/netdevice.h | 54 ++++++++++++++++++++++++++++++++++++++++------ + net/core/dev.c | 6 ++++- + 2 files changed, 53 insertions(+), 7 deletions(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -572,7 +572,11 @@ struct netdev_queue { + * write-mostly part + */ + spinlock_t _xmit_lock ____cacheline_aligned_in_smp; ++#ifdef CONFIG_PREEMPT_RT_FULL ++ struct task_struct *xmit_lock_owner; ++#else + int xmit_lock_owner; ++#endif + /* + * Time (in jiffies) of last Tx + */ +@@ -3596,10 +3600,48 @@ static inline u32 netif_msg_init(int deb + return (1 << debug_value) - 1; + } + ++#ifdef CONFIG_PREEMPT_RT_FULL ++static inline void netdev_queue_set_owner(struct netdev_queue *txq, int cpu) ++{ ++ txq->xmit_lock_owner = current; ++} ++ ++static inline void netdev_queue_clear_owner(struct netdev_queue *txq) ++{ ++ txq->xmit_lock_owner = NULL; ++} ++ ++static inline bool netdev_queue_has_owner(struct netdev_queue *txq) ++{ ++ if (txq->xmit_lock_owner != NULL) ++ return true; ++ return false; ++} ++ ++#else ++ ++static inline void netdev_queue_set_owner(struct netdev_queue *txq, int cpu) ++{ ++ txq->xmit_lock_owner = cpu; ++} ++ ++static inline void netdev_queue_clear_owner(struct netdev_queue *txq) ++{ ++ txq->xmit_lock_owner = -1; ++} ++ ++static inline bool netdev_queue_has_owner(struct netdev_queue *txq) ++{ ++ if (txq->xmit_lock_owner != -1) ++ return true; ++ return false; ++} ++#endif ++ + static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) + { + spin_lock(&txq->_xmit_lock); +- txq->xmit_lock_owner = cpu; ++ netdev_queue_set_owner(txq, cpu); + } + + static inline bool __netif_tx_acquire(struct netdev_queue *txq) +@@ -3616,32 +3658,32 @@ static inline void __netif_tx_release(st + static inline void __netif_tx_lock_bh(struct netdev_queue *txq) + { + spin_lock_bh(&txq->_xmit_lock); +- txq->xmit_lock_owner = smp_processor_id(); ++ netdev_queue_set_owner(txq, smp_processor_id()); + } + + static inline bool __netif_tx_trylock(struct netdev_queue *txq) + { + bool ok = spin_trylock(&txq->_xmit_lock); + if (likely(ok)) +- txq->xmit_lock_owner = smp_processor_id(); ++ netdev_queue_set_owner(txq, smp_processor_id()); + return ok; + } + + static inline void __netif_tx_unlock(struct netdev_queue *txq) + { +- txq->xmit_lock_owner = -1; ++ netdev_queue_clear_owner(txq); + spin_unlock(&txq->_xmit_lock); + } + + static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) + { +- txq->xmit_lock_owner = -1; ++ netdev_queue_clear_owner(txq); + spin_unlock_bh(&txq->_xmit_lock); + } + + static inline void txq_trans_update(struct netdev_queue *txq) + { +- if (txq->xmit_lock_owner != -1) ++ if (netdev_queue_has_owner(txq)) + txq->trans_start = jiffies; + } + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3545,7 +3545,11 @@ static int __dev_queue_xmit(struct sk_bu + if (dev->flags & IFF_UP) { + int cpu = smp_processor_id(); /* ok because BHs are off */ + ++#ifdef CONFIG_PREEMPT_RT_FULL ++ if (txq->xmit_lock_owner != current) { ++#else + if (txq->xmit_lock_owner != cpu) { ++#endif + if (unlikely(xmit_rec_read() > XMIT_RECURSION_LIMIT)) + goto recursion_alert; + +@@ -7762,7 +7766,7 @@ static void netdev_init_one_queue(struct + /* Initialize queue lock */ + spin_lock_init(&queue->_xmit_lock); + netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); +- queue->xmit_lock_owner = -1; ++ netdev_queue_clear_owner(queue); + netdev_queue_numa_node_write(queue, NUMA_NO_NODE); + queue->dev = dev; + #ifdef CONFIG_BQL diff --git a/debian/patches/features/all/rt/net-use-trylock-in-icmp_sk.patch b/debian/patches/features/all/rt/net-use-trylock-in-icmp_sk.patch index 76053792c..3f0a2d0cf 100644 --- a/debian/patches/features/all/rt/net-use-trylock-in-icmp_sk.patch +++ b/debian/patches/features/all/rt/net-use-trylock-in-icmp_sk.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 21 Sep 2017 14:42:04 +0200 Subject: net: use trylock in icmp_sk -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The locking path can be recursive (same as for sk->sk_lock.slock) and therefore we need a trylock version for the locallock, too. diff --git a/debian/patches/features/all/rt/net_disable_NET_RX_BUSY_POLL.patch b/debian/patches/features/all/rt/net_disable_NET_RX_BUSY_POLL.patch index 953bfaf9d..cca6535fc 100644 --- a/debian/patches/features/all/rt/net_disable_NET_RX_BUSY_POLL.patch +++ b/debian/patches/features/all/rt/net_disable_NET_RX_BUSY_POLL.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Sat, 27 May 2017 19:02:06 +0200 Subject: net/core: disable NET_RX_BUSY_POLL -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz sk_busy_loop() does preempt_disable() followed by a few operations which can take sleeping locks and may get long. diff --git a/debian/patches/features/all/rt/nohz-Prevent-erroneous-tick-stop-invocations.patch b/debian/patches/features/all/rt/nohz-Prevent-erroneous-tick-stop-invocations.patch index 40e1472e5..661531ec2 100644 --- a/debian/patches/features/all/rt/nohz-Prevent-erroneous-tick-stop-invocations.patch +++ b/debian/patches/features/all/rt/nohz-Prevent-erroneous-tick-stop-invocations.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Fri, 22 Dec 2017 15:51:13 +0100 Subject: [PATCH 2/4] nohz: Prevent erroneous tick stop invocations -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The conditions in irq_exit() to invoke tick_nohz_irq_exit() are: @@ -37,7 +37,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/softirq.c +++ b/kernel/softirq.c -@@ -381,7 +381,13 @@ static inline void tick_irq_exit(void) +@@ -382,7 +382,13 @@ static inline void tick_irq_exit(void) int cpu = smp_processor_id(); /* Make sure that timer wheel updates are propagated */ diff --git a/debian/patches/features/all/rt/ntfs-avoid-disabling-interrupts-during-kmap_atomic.patch b/debian/patches/features/all/rt/ntfs-avoid-disabling-interrupts-during-kmap_atomic.patch new file mode 100644 index 000000000..64a9feb1b --- /dev/null +++ b/debian/patches/features/all/rt/ntfs-avoid-disabling-interrupts-during-kmap_atomic.patch @@ -0,0 +1,52 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 10 Apr 2018 17:54:32 +0200 +Subject: [PATCH] ntfs: don't disable interrupts during kmap_atomic() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +ntfs_end_buffer_async_read() disables interrupts around kmap_atomic(). This is +a leftover from the old kmap_atomic() implementation which relied on fixed +mapping slots, so the caller had to make sure that the same slot could not be +reused from an interrupting context. + +kmap_atomic() was changed to dynamic slots long ago and commit 1ec9c5ddc17a +("include/linux/highmem.h: remove the second argument of k[un]map_atomic()") +removed the slot assignements, but the callers were not checked for now +redundant interrupt disabling. + +Remove the conditional interrupt disable. + +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/ntfs/aops.c | 4 ---- + 1 file changed, 4 deletions(-) + +--- a/fs/ntfs/aops.c ++++ b/fs/ntfs/aops.c +@@ -93,13 +93,11 @@ static void ntfs_end_buffer_async_read(s + ofs = 0; + if (file_ofs < init_size) + ofs = init_size - file_ofs; +- local_irq_save(flags); + kaddr = kmap_atomic(page); + memset(kaddr + bh_offset(bh) + ofs, 0, + bh->b_size - ofs); + flush_dcache_page(page); + kunmap_atomic(kaddr); +- local_irq_restore(flags); + } + } else { + clear_buffer_uptodate(bh); +@@ -146,13 +144,11 @@ static void ntfs_end_buffer_async_read(s + recs = PAGE_SIZE / rec_size; + /* Should have been verified before we got here... */ + BUG_ON(!recs); +- local_irq_save(flags); + kaddr = kmap_atomic(page); + for (i = 0; i < recs; i++) + post_read_mst_fixup((NTFS_RECORD*)(kaddr + + i * rec_size), rec_size); + kunmap_atomic(kaddr); +- local_irq_restore(flags); + flush_dcache_page(page); + if (likely(page_uptodate && !PageError(page))) + SetPageUptodate(page); diff --git a/debian/patches/features/all/rt/oleg-signal-rt-fix.patch b/debian/patches/features/all/rt/oleg-signal-rt-fix.patch index 004f105a6..eb1a2e304 100644 --- a/debian/patches/features/all/rt/oleg-signal-rt-fix.patch +++ b/debian/patches/features/all/rt/oleg-signal-rt-fix.patch @@ -1,7 +1,7 @@ From: Oleg Nesterov Date: Tue, 14 Jul 2015 14:26:34 +0200 Subject: signal/x86: Delay calling signals in atomic -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz On x86_64 we must disable preemption before we enable interrupts for stack faults, int3 and debugging, because the current task is using @@ -39,7 +39,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c -@@ -150,6 +150,13 @@ static void exit_to_usermode_loop(struct +@@ -151,6 +151,13 @@ static void exit_to_usermode_loop(struct if (cached_flags & _TIF_NEED_RESCHED) schedule(); @@ -77,7 +77,7 @@ Signed-off-by: Thomas Gleixner #endif --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -795,6 +795,10 @@ struct task_struct { +@@ -805,6 +805,10 @@ struct task_struct { /* Restored if set_restore_sigmask() was used: */ sigset_t saved_sigmask; struct sigpending pending; @@ -90,7 +90,7 @@ Signed-off-by: Thomas Gleixner unsigned int sas_ss_flags; --- a/kernel/signal.c +++ b/kernel/signal.c -@@ -1238,8 +1238,8 @@ int do_send_sig_info(int sig, struct sig +@@ -1185,8 +1185,8 @@ int do_send_sig_info(int sig, struct sig * We don't want to have recursive SIGSEGV's etc, for example, * that is why we also clear SIGNAL_UNKILLABLE. */ @@ -101,7 +101,7 @@ Signed-off-by: Thomas Gleixner { unsigned long int flags; int ret, blocked, ignored; -@@ -1268,6 +1268,39 @@ force_sig_info(int sig, struct siginfo * +@@ -1215,6 +1215,39 @@ force_sig_info(int sig, struct siginfo * return ret; } diff --git a/debian/patches/features/all/rt/panic-disable-random-on-rt.patch b/debian/patches/features/all/rt/panic-disable-random-on-rt.patch index 71c3e374e..db05a6308 100644 --- a/debian/patches/features/all/rt/panic-disable-random-on-rt.patch +++ b/debian/patches/features/all/rt/panic-disable-random-on-rt.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Tue, 14 Jul 2015 14:26:34 +0200 Subject: panic: skip get_random_bytes for RT_FULL in init_oops_id -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Disable on -RT. If this is invoked from irq-context we will have problems to acquire the sleeping lock. @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/panic.c +++ b/kernel/panic.c -@@ -482,9 +482,11 @@ static u64 oops_id; +@@ -486,9 +486,11 @@ static u64 oops_id; static int init_oops_id(void) { diff --git a/debian/patches/features/all/rt/patch-to-introduce-rcu-bh-qs-where-safe-from-softirq.patch b/debian/patches/features/all/rt/patch-to-introduce-rcu-bh-qs-where-safe-from-softirq.patch index 835f968de..bba2c4138 100644 --- a/debian/patches/features/all/rt/patch-to-introduce-rcu-bh-qs-where-safe-from-softirq.patch +++ b/debian/patches/features/all/rt/patch-to-introduce-rcu-bh-qs-where-safe-from-softirq.patch @@ -1,7 +1,7 @@ Subject: rcu: Make ksoftirqd do RCU quiescent states From: "Paul E. McKenney" Date: Wed, 5 Oct 2011 11:45:18 -0700 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Implementing RCU-bh in terms of RCU-preempt makes the system vulnerable to network-based denial-of-service attacks. This patch therefore @@ -68,9 +68,9 @@ Signed-off-by: Thomas Gleixner #include #include +#include + #include #include #include "../time/tick-internal.h" - #include "../locking/rtmutex_common.h" @@ -1299,7 +1300,7 @@ static void rcu_prepare_kthreads(int cpu #endif /* #else #ifdef CONFIG_RCU_BOOST */ diff --git a/debian/patches/features/all/rt/pci-switchtec-Don-t-use-completion-s-wait-queue.patch b/debian/patches/features/all/rt/pci-switchtec-Don-t-use-completion-s-wait-queue.patch index b7213058d..7c885dd93 100644 --- a/debian/patches/features/all/rt/pci-switchtec-Don-t-use-completion-s-wait-queue.patch +++ b/debian/patches/features/all/rt/pci-switchtec-Don-t-use-completion-s-wait-queue.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 4 Oct 2017 10:24:23 +0200 Subject: [PATCH] pci/switchtec: Don't use completion's wait queue -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The poll callback is using completion's wait_queue_head_t member and puts it in poll_wait() so the poll() caller gets a wakeup after command @@ -24,7 +24,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c -@@ -306,10 +306,11 @@ struct switchtec_user { +@@ -41,10 +41,11 @@ struct switchtec_user { enum mrpc_state state; @@ -37,7 +37,7 @@ Signed-off-by: Sebastian Andrzej Siewior u32 cmd; u32 status; u32 return_code; -@@ -331,7 +332,7 @@ static struct switchtec_user *stuser_cre +@@ -66,7 +67,7 @@ static struct switchtec_user *stuser_cre stuser->stdev = stdev; kref_init(&stuser->kref); INIT_LIST_HEAD(&stuser->list); @@ -46,7 +46,7 @@ Signed-off-by: Sebastian Andrzej Siewior stuser->event_cnt = atomic_read(&stdev->event_cnt); dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser); -@@ -414,7 +415,7 @@ static int mrpc_queue_cmd(struct switcht +@@ -149,7 +150,7 @@ static int mrpc_queue_cmd(struct switcht kref_get(&stuser->kref); stuser->read_len = sizeof(stuser->data); stuser_set_state(stuser, MRPC_QUEUED); @@ -55,7 +55,7 @@ Signed-off-by: Sebastian Andrzej Siewior list_add_tail(&stuser->list, &stdev->mrpc_queue); mrpc_cmd_submit(stdev); -@@ -451,7 +452,8 @@ static void mrpc_complete_cmd(struct swi +@@ -186,7 +187,8 @@ static void mrpc_complete_cmd(struct swi stuser->read_len); out: @@ -65,7 +65,7 @@ Signed-off-by: Sebastian Andrzej Siewior list_del_init(&stuser->list); stuser_put(stuser); stdev->mrpc_busy = 0; -@@ -721,10 +723,11 @@ static ssize_t switchtec_dev_read(struct +@@ -456,10 +458,11 @@ static ssize_t switchtec_dev_read(struct mutex_unlock(&stdev->mrpc_mutex); if (filp->f_flags & O_NONBLOCK) { @@ -79,25 +79,25 @@ Signed-off-by: Sebastian Andrzej Siewior if (rc < 0) return rc; } -@@ -772,7 +775,7 @@ static unsigned int switchtec_dev_poll(s +@@ -507,7 +510,7 @@ static __poll_t switchtec_dev_poll(struc struct switchtec_dev *stdev = stuser->stdev; - int ret = 0; + __poll_t ret = 0; - poll_wait(filp, &stuser->comp.wait, wait); + poll_wait(filp, &stuser->cmd_comp, wait); poll_wait(filp, &stdev->event_wq, wait); if (lock_mutex_and_test_alive(stdev)) -@@ -780,7 +783,7 @@ static unsigned int switchtec_dev_poll(s +@@ -515,7 +518,7 @@ static __poll_t switchtec_dev_poll(struc mutex_unlock(&stdev->mrpc_mutex); - if (try_wait_for_completion(&stuser->comp)) + if (READ_ONCE(stuser->cmd_done)) - ret |= POLLIN | POLLRDNORM; + ret |= EPOLLIN | EPOLLRDNORM; if (stuser->event_cnt != atomic_read(&stdev->event_cnt)) -@@ -1255,7 +1258,8 @@ static void stdev_kill(struct switchtec_ +@@ -1034,7 +1037,8 @@ static void stdev_kill(struct switchtec_ /* Wake up and kill any users waiting on an MRPC request */ list_for_each_entry_safe(stuser, tmpuser, &stdev->mrpc_queue, list) { diff --git a/debian/patches/features/all/rt/percpu_ida-Use-_irqsave-instead-of-local_irq_save-sp.patch b/debian/patches/features/all/rt/percpu_ida-Use-_irqsave-instead-of-local_irq_save-sp.patch new file mode 100644 index 000000000..7da3f0350 --- /dev/null +++ b/debian/patches/features/all/rt/percpu_ida-Use-_irqsave-instead-of-local_irq_save-sp.patch @@ -0,0 +1,171 @@ +From: Sebastian Andrzej Siewior +Date: Mon, 23 Apr 2018 16:42:39 +0200 +Subject: [PATCH] percpu_ida: Use _irqsave() instead of local_irq_save() + + spin_lock +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +percpu_ida() decouples disabling interrupts from the locking operations. +This breaks some assumptions if the locking operations are replaced like +they are under -RT. +The same locking can be achieved by avoiding local_irq_save() and using +spin_lock_irqsave() instead. percpu_ida_alloc() gains one more +preemption point because after unlocking the fastpath and before the +pool lock is acquired, the interrupts are briefly enabled. + +Signed-off-by: Sebastian Andrzej Siewior +--- + lib/percpu_ida.c | 63 ++++++++++++++++++------------------------------------- + 1 file changed, 21 insertions(+), 42 deletions(-) + +--- a/lib/percpu_ida.c ++++ b/lib/percpu_ida.c +@@ -112,18 +112,6 @@ static inline void alloc_global_tags(str + min(pool->nr_free, pool->percpu_batch_size)); + } + +-static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags) +-{ +- int tag = -ENOSPC; +- +- spin_lock(&tags->lock); +- if (tags->nr_free) +- tag = tags->freelist[--tags->nr_free]; +- spin_unlock(&tags->lock); +- +- return tag; +-} +- + /** + * percpu_ida_alloc - allocate a tag + * @pool: pool to allocate from +@@ -147,20 +135,22 @@ int percpu_ida_alloc(struct percpu_ida * + DEFINE_WAIT(wait); + struct percpu_ida_cpu *tags; + unsigned long flags; +- int tag; ++ int tag = -ENOSPC; + +- local_irq_save(flags); +- tags = this_cpu_ptr(pool->tag_cpu); ++ tags = raw_cpu_ptr(pool->tag_cpu); ++ spin_lock_irqsave(&tags->lock, flags); + + /* Fastpath */ +- tag = alloc_local_tag(tags); +- if (likely(tag >= 0)) { +- local_irq_restore(flags); ++ if (likely(tags->nr_free >= 0)) { ++ tag = tags->freelist[--tags->nr_free]; ++ spin_unlock_irqrestore(&tags->lock, flags); + return tag; + } ++ spin_unlock_irqrestore(&tags->lock, flags); + + while (1) { +- spin_lock(&pool->lock); ++ spin_lock_irqsave(&pool->lock, flags); ++ tags = this_cpu_ptr(pool->tag_cpu); + + /* + * prepare_to_wait() must come before steal_tags(), in case +@@ -184,8 +174,7 @@ int percpu_ida_alloc(struct percpu_ida * + &pool->cpus_have_tags); + } + +- spin_unlock(&pool->lock); +- local_irq_restore(flags); ++ spin_unlock_irqrestore(&pool->lock, flags); + + if (tag >= 0 || state == TASK_RUNNING) + break; +@@ -196,9 +185,6 @@ int percpu_ida_alloc(struct percpu_ida * + } + + schedule(); +- +- local_irq_save(flags); +- tags = this_cpu_ptr(pool->tag_cpu); + } + if (state != TASK_RUNNING) + finish_wait(&pool->wait, &wait); +@@ -222,28 +208,24 @@ void percpu_ida_free(struct percpu_ida * + + BUG_ON(tag >= pool->nr_tags); + +- local_irq_save(flags); +- tags = this_cpu_ptr(pool->tag_cpu); ++ tags = raw_cpu_ptr(pool->tag_cpu); + +- spin_lock(&tags->lock); ++ spin_lock_irqsave(&tags->lock, flags); + tags->freelist[tags->nr_free++] = tag; + + nr_free = tags->nr_free; +- spin_unlock(&tags->lock); + + if (nr_free == 1) { + cpumask_set_cpu(smp_processor_id(), + &pool->cpus_have_tags); + wake_up(&pool->wait); + } ++ spin_unlock_irqrestore(&tags->lock, flags); + + if (nr_free == pool->percpu_max_size) { +- spin_lock(&pool->lock); ++ spin_lock_irqsave(&pool->lock, flags); ++ spin_lock(&tags->lock); + +- /* +- * Global lock held and irqs disabled, don't need percpu +- * lock +- */ + if (tags->nr_free == pool->percpu_max_size) { + move_tags(pool->freelist, &pool->nr_free, + tags->freelist, &tags->nr_free, +@@ -251,10 +233,9 @@ void percpu_ida_free(struct percpu_ida * + + wake_up(&pool->wait); + } +- spin_unlock(&pool->lock); ++ spin_unlock(&tags->lock); ++ spin_unlock_irqrestore(&pool->lock, flags); + } +- +- local_irq_restore(flags); + } + EXPORT_SYMBOL_GPL(percpu_ida_free); + +@@ -346,29 +327,27 @@ int percpu_ida_for_each_free(struct perc + struct percpu_ida_cpu *remote; + unsigned cpu, i, err = 0; + +- local_irq_save(flags); + for_each_possible_cpu(cpu) { + remote = per_cpu_ptr(pool->tag_cpu, cpu); +- spin_lock(&remote->lock); ++ spin_lock_irqsave(&remote->lock, flags); + for (i = 0; i < remote->nr_free; i++) { + err = fn(remote->freelist[i], data); + if (err) + break; + } +- spin_unlock(&remote->lock); ++ spin_unlock_irqrestore(&remote->lock, flags); + if (err) + goto out; + } + +- spin_lock(&pool->lock); ++ spin_lock_irqsave(&pool->lock, flags); + for (i = 0; i < pool->nr_free; i++) { + err = fn(pool->freelist[i], data); + if (err) + break; + } +- spin_unlock(&pool->lock); ++ spin_unlock_irqrestore(&pool->lock, flags); + out: +- local_irq_restore(flags); + return err; + } + EXPORT_SYMBOL_GPL(percpu_ida_for_each_free); diff --git a/debian/patches/features/all/rt/peter_zijlstra-frob-rcu.patch b/debian/patches/features/all/rt/peter_zijlstra-frob-rcu.patch index c7564b0ea..2fe06936f 100644 --- a/debian/patches/features/all/rt/peter_zijlstra-frob-rcu.patch +++ b/debian/patches/features/all/rt/peter_zijlstra-frob-rcu.patch @@ -1,7 +1,7 @@ Subject: rcu: Frob softirq test From: Peter Zijlstra Date: Sat Aug 13 00:23:17 CEST 2011 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz With RT_FULL we get the below wreckage: @@ -156,7 +156,7 @@ Signed-off-by: Peter Zijlstra --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h -@@ -466,7 +466,7 @@ void rcu_read_unlock_special(struct task +@@ -468,7 +468,7 @@ void rcu_read_unlock_special(struct task } /* Hardware IRQ handlers cannot block, complain if they get here. */ diff --git a/debian/patches/features/all/rt/peterz-percpu-rwsem-rt.patch b/debian/patches/features/all/rt/peterz-percpu-rwsem-rt.patch index 052c3fe6d..b4974264a 100644 --- a/debian/patches/features/all/rt/peterz-percpu-rwsem-rt.patch +++ b/debian/patches/features/all/rt/peterz-percpu-rwsem-rt.patch @@ -1,7 +1,7 @@ Subject: locking/percpu-rwsem: Remove preempt_disable variants From: Peter Zijlstra Date: Wed Nov 23 16:29:32 CET 2016 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Effective revert commit: @@ -89,7 +89,7 @@ Signed-off-by: Peter Zijlstra (Intel) locks_dispose_list(&dispose); locks_free_lock(new_fl); return error; -@@ -1619,7 +1619,7 @@ int fcntl_getlease(struct file *filp) +@@ -1617,7 +1617,7 @@ int fcntl_getlease(struct file *filp) ctx = smp_load_acquire(&inode->i_flctx); if (ctx && !list_empty_careful(&ctx->flc_lease)) { @@ -98,7 +98,7 @@ Signed-off-by: Peter Zijlstra (Intel) spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); list_for_each_entry(fl, &ctx->flc_lease, fl_list) { -@@ -1629,7 +1629,7 @@ int fcntl_getlease(struct file *filp) +@@ -1627,7 +1627,7 @@ int fcntl_getlease(struct file *filp) break; } spin_unlock(&ctx->flc_lock); @@ -107,7 +107,7 @@ Signed-off-by: Peter Zijlstra (Intel) locks_dispose_list(&dispose); } -@@ -1704,7 +1704,7 @@ generic_add_lease(struct file *filp, lon +@@ -1702,7 +1702,7 @@ generic_add_lease(struct file *filp, lon return -EINVAL; } @@ -116,7 +116,7 @@ Signed-off-by: Peter Zijlstra (Intel) spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); error = check_conflicting_open(dentry, arg, lease->fl_flags); -@@ -1775,7 +1775,7 @@ generic_add_lease(struct file *filp, lon +@@ -1773,7 +1773,7 @@ generic_add_lease(struct file *filp, lon lease->fl_lmops->lm_setup(lease, priv); out: spin_unlock(&ctx->flc_lock); @@ -125,7 +125,7 @@ Signed-off-by: Peter Zijlstra (Intel) locks_dispose_list(&dispose); if (is_deleg) inode_unlock(inode); -@@ -1798,7 +1798,7 @@ static int generic_delete_lease(struct f +@@ -1796,7 +1796,7 @@ static int generic_delete_lease(struct f return error; } @@ -134,7 +134,7 @@ Signed-off-by: Peter Zijlstra (Intel) spin_lock(&ctx->flc_lock); list_for_each_entry(fl, &ctx->flc_lease, fl_list) { if (fl->fl_file == filp && -@@ -1811,7 +1811,7 @@ static int generic_delete_lease(struct f +@@ -1809,7 +1809,7 @@ static int generic_delete_lease(struct f if (victim) error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); spin_unlock(&ctx->flc_lock); @@ -143,7 +143,7 @@ Signed-off-by: Peter Zijlstra (Intel) locks_dispose_list(&dispose); return error; } -@@ -2535,13 +2535,13 @@ locks_remove_lease(struct file *filp, st +@@ -2533,13 +2533,13 @@ locks_remove_lease(struct file *filp, st if (list_empty(&ctx->flc_lease)) return; diff --git a/debian/patches/features/all/rt/peterz-srcu-crypto-chain.patch b/debian/patches/features/all/rt/peterz-srcu-crypto-chain.patch index efe2c636c..b446762b5 100644 --- a/debian/patches/features/all/rt/peterz-srcu-crypto-chain.patch +++ b/debian/patches/features/all/rt/peterz-srcu-crypto-chain.patch @@ -1,7 +1,7 @@ Subject: crypto: Convert crypto notifier chain to SRCU From: Peter Zijlstra Date: Fri, 05 Oct 2012 09:03:24 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The crypto notifier deadlocks on RT. Though this can be a real deadlock on mainline as well due to fifo fair rwsems. @@ -121,7 +121,7 @@ Signed-off-by: Thomas Gleixner --- a/crypto/algapi.c +++ b/crypto/algapi.c -@@ -731,13 +731,13 @@ EXPORT_SYMBOL_GPL(crypto_spawn_tfm2); +@@ -726,13 +726,13 @@ EXPORT_SYMBOL_GPL(crypto_spawn_tfm2); int crypto_register_notifier(struct notifier_block *nb) { @@ -139,7 +139,7 @@ Signed-off-by: Thomas Gleixner --- a/crypto/api.c +++ b/crypto/api.c -@@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(crypto_alg_list); +@@ -32,7 +32,7 @@ EXPORT_SYMBOL_GPL(crypto_alg_list); DECLARE_RWSEM(crypto_alg_sem); EXPORT_SYMBOL_GPL(crypto_alg_sem); @@ -148,7 +148,7 @@ Signed-off-by: Thomas Gleixner EXPORT_SYMBOL_GPL(crypto_chain); static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg); -@@ -236,10 +236,10 @@ int crypto_probing_notify(unsigned long +@@ -237,10 +237,10 @@ int crypto_probing_notify(unsigned long { int ok; @@ -163,7 +163,7 @@ Signed-off-by: Thomas Gleixner return ok; --- a/crypto/internal.h +++ b/crypto/internal.h -@@ -47,7 +47,7 @@ struct crypto_larval { +@@ -44,7 +44,7 @@ struct crypto_larval { extern struct list_head crypto_alg_list; extern struct rw_semaphore crypto_alg_sem; @@ -172,7 +172,7 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_PROC_FS void __init crypto_init_proc(void); -@@ -143,7 +143,7 @@ static inline int crypto_is_moribund(str +@@ -139,7 +139,7 @@ static inline int crypto_is_moribund(str static inline void crypto_notify(unsigned long val, void *v) { diff --git a/debian/patches/features/all/rt/pid.h-include-atomic.h.patch b/debian/patches/features/all/rt/pid.h-include-atomic.h.patch index 2698b6787..4d091d28e 100644 --- a/debian/patches/features/all/rt/pid.h-include-atomic.h.patch +++ b/debian/patches/features/all/rt/pid.h-include-atomic.h.patch @@ -1,7 +1,7 @@ From: Grygorii Strashko Date: Tue, 21 Jul 2015 19:43:56 +0300 Subject: pid.h: include atomic.h -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz This patch fixes build error: CC kernel/pid_namespace.o diff --git a/debian/patches/features/all/rt/posix-cpu-timers-remove-lockdep_assert_irqs_disabled.patch b/debian/patches/features/all/rt/posix-cpu-timers-remove-lockdep_assert_irqs_disabled.patch new file mode 100644 index 000000000..2959f30e2 --- /dev/null +++ b/debian/patches/features/all/rt/posix-cpu-timers-remove-lockdep_assert_irqs_disabled.patch @@ -0,0 +1,36 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 12 Apr 2018 17:37:17 +0200 +Subject: [PATCH] posix-cpu-timers: remove lockdep_assert_irqs_disabled() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +The lockdep_assert_irqs_disabled() was a BUG_ON() statement in the +beginning and it was added just before the "spin_lock(siglock)" +statement to ensure this lock was taken with disabled interrupts. +This is no longer the case: the siglock is acquired via +lock_task_sighand() and this function already disables the interrupts. +The lock is also acquired before this "lockdep_assert_irqs_disabled" so +it is beset to remove it. + +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/time/posix-cpu-timers.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/kernel/time/posix-cpu-timers.c ++++ b/kernel/time/posix-cpu-timers.c +@@ -604,7 +604,6 @@ static int posix_cpu_timer_set(struct k_ + /* + * Disarm any old timer after extracting its expiry time. + */ +- lockdep_assert_irqs_disabled(); + + ret = 0; + old_incr = timer->it.cpu.incr; +@@ -1049,7 +1048,6 @@ static void posix_cpu_timer_rearm(struct + /* + * Now re-arm for the new expiry time. + */ +- lockdep_assert_irqs_disabled(); + arm_timer(timer); + unlock: + unlock_task_sighand(p, &flags); diff --git a/debian/patches/features/all/rt/posix-timers-move-the-rcu-head-out-of-the-union.patch b/debian/patches/features/all/rt/posix-timers-move-the-rcu-head-out-of-the-union.patch new file mode 100644 index 000000000..b9fb2d19d --- /dev/null +++ b/debian/patches/features/all/rt/posix-timers-move-the-rcu-head-out-of-the-union.patch @@ -0,0 +1,53 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 28 Mar 2018 11:15:19 +0200 +Subject: [PATCH 3/3] posix-timers: move the rcu head out of the union +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +On RT the timer can be preempted while running and therefore we wait +with timer_wait_for_callback() for the timer to complete (instead of +busy looping). The RCU-readlock is held to ensure that this posix timer +is not removed while we wait on it. +If the timer is removed then it invokes call_rcu() with a pointer that +is shared with the hrtimer because it is part of the same union. +In order to avoid any possible side effects I am moving the rcu pointer +out of the union. + +Cc: stable-rt@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/posix-timers.h | 2 +- + kernel/time/posix-timers.c | 4 ++-- + 2 files changed, 3 insertions(+), 3 deletions(-) + +--- a/include/linux/posix-timers.h ++++ b/include/linux/posix-timers.h +@@ -114,8 +114,8 @@ struct k_itimer { + struct { + struct alarm alarmtimer; + } alarm; +- struct rcu_head rcu; + } it; ++ struct rcu_head rcu; + }; + + void run_posix_cpu_timers(struct task_struct *task); +--- a/kernel/time/posix-timers.c ++++ b/kernel/time/posix-timers.c +@@ -471,7 +471,7 @@ static struct k_itimer * alloc_posix_tim + + static void k_itimer_rcu_free(struct rcu_head *head) + { +- struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu); ++ struct k_itimer *tmr = container_of(head, struct k_itimer, rcu); + + kmem_cache_free(posix_timers_cache, tmr); + } +@@ -488,7 +488,7 @@ static void release_posix_timer(struct k + } + put_pid(tmr->it_pid); + sigqueue_free(tmr->sigq); +- call_rcu(&tmr->it.rcu, k_itimer_rcu_free); ++ call_rcu(&tmr->rcu, k_itimer_rcu_free); + } + + static int common_timer_create(struct k_itimer *new_timer) diff --git a/debian/patches/features/all/rt/posix-timers-no-broadcast.patch b/debian/patches/features/all/rt/posix-timers-no-broadcast.patch index 670488993..a7a2e6586 100644 --- a/debian/patches/features/all/rt/posix-timers-no-broadcast.patch +++ b/debian/patches/features/all/rt/posix-timers-no-broadcast.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Fri, 3 Jul 2009 08:29:20 -0500 Subject: posix-timers: Prevent broadcast signals -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Posix timers should not send broadcast signals and kernel only signals. Prevent it. @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c -@@ -433,6 +433,7 @@ static enum hrtimer_restart posix_timer_ +@@ -434,6 +434,7 @@ static enum hrtimer_restart posix_timer_ static struct pid *good_sigevent(sigevent_t * event) { struct task_struct *rtn = current->group_leader; @@ -22,7 +22,7 @@ Signed-off-by: Thomas Gleixner switch (event->sigev_notify) { case SIGEV_SIGNAL | SIGEV_THREAD_ID: -@@ -442,7 +443,8 @@ static struct pid *good_sigevent(sigeven +@@ -443,7 +444,8 @@ static struct pid *good_sigevent(sigeven /* FALLTHRU */ case SIGEV_SIGNAL: case SIGEV_THREAD: diff --git a/debian/patches/features/all/rt/posix-timers-thread-posix-cpu-timers-on-rt.patch b/debian/patches/features/all/rt/posix-timers-thread-posix-cpu-timers-on-rt.patch index 36316173f..f856c5766 100644 --- a/debian/patches/features/all/rt/posix-timers-thread-posix-cpu-timers-on-rt.patch +++ b/debian/patches/features/all/rt/posix-timers-thread-posix-cpu-timers-on-rt.patch @@ -1,7 +1,7 @@ From: John Stultz Date: Fri, 3 Jul 2009 08:29:58 -0500 Subject: posix-timers: Thread posix-cpu-timers on -rt -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz posix-cpu-timer code takes non -rt safe locks in hard irq context. Move it to a thread. @@ -12,38 +12,15 @@ Signed-off-by: John Stultz Signed-off-by: Thomas Gleixner --- - include/linux/init_task.h | 7 + include/linux/sched.h | 3 + init/init_task.c | 7 + kernel/fork.c | 3 - kernel/time/posix-cpu-timers.c | 157 +++++++++++++++++++++++++++++++++++++++-- - 4 files changed, 166 insertions(+), 4 deletions(-) + kernel/time/posix-cpu-timers.c | 154 ++++++++++++++++++++++++++++++++++++++++- + 4 files changed, 164 insertions(+), 3 deletions(-) ---- a/include/linux/init_task.h -+++ b/include/linux/init_task.h -@@ -163,6 +163,12 @@ extern struct cred init_cred; - # define INIT_PERF_EVENTS(tsk) - #endif - -+#if defined(CONFIG_POSIX_TIMERS) && defined(CONFIG_PREEMPT_RT_BASE) -+# define INIT_TIMER_LIST .posix_timer_list = NULL, -+#else -+# define INIT_TIMER_LIST -+#endif -+ - #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN - # define INIT_VTIME(tsk) \ - .vtime.seqcount = SEQCNT_ZERO(tsk.vtime.seqcount), \ -@@ -277,6 +283,7 @@ extern struct cred init_cred; - INIT_CPU_TIMERS(tsk) \ - .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ - .timer_slack_ns = 50000, /* 50 usec default slack */ \ -+ INIT_TIMER_LIST \ - .pids = { \ - [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \ - [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -745,6 +745,9 @@ struct task_struct { +@@ -757,6 +757,9 @@ struct task_struct { #ifdef CONFIG_POSIX_TIMERS struct task_cputime cputime_expires; struct list_head cpu_timers[3]; @@ -53,9 +30,32 @@ Signed-off-by: Thomas Gleixner #endif /* Process credentials: */ +--- a/init/init_task.c ++++ b/init/init_task.c +@@ -42,6 +42,12 @@ static struct sighand_struct init_sighan + .signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh), + }; + ++#if defined(CONFIG_POSIX_TIMERS) && defined(CONFIG_PREEMPT_RT_BASE) ++# define INIT_TIMER_LIST .posix_timer_list = NULL, ++#else ++# define INIT_TIMER_LIST ++#endif ++ + /* + * Set up the first task table, touch at your own risk!. Base=0, + * limit=0x1fffff (=2MB) +@@ -111,6 +117,7 @@ struct task_struct init_task + INIT_CPU_TIMERS(init_task) + .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock), + .timer_slack_ns = 50000, /* 50 usec default slack */ ++ INIT_TIMER_LIST + .pids = { + [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), + [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -1496,6 +1496,9 @@ static void rt_mutex_init_task(struct ta +@@ -1529,6 +1529,9 @@ static void rt_mutex_init_task(struct ta */ static void posix_cpu_timers_init(struct task_struct *tsk) { @@ -78,33 +78,15 @@ Signed-off-by: Thomas Gleixner #include #include #include -@@ -14,6 +16,7 @@ - #include +@@ -15,6 +17,7 @@ #include #include + #include +#include #include "posix-timers.h" -@@ -603,7 +606,7 @@ static int posix_cpu_timer_set(struct k_ - /* - * Disarm any old timer after extracting its expiry time. - */ -- WARN_ON_ONCE(!irqs_disabled()); -+ WARN_ON_ONCE_NONRT(!irqs_disabled()); - - ret = 0; - old_incr = timer->it.cpu.incr; -@@ -1034,7 +1037,7 @@ static void posix_cpu_timer_rearm(struct - /* - * Now re-arm for the new expiry time. - */ -- WARN_ON_ONCE(!irqs_disabled()); -+ WARN_ON_ONCE_NONRT(!irqs_disabled()); - arm_timer(timer); - unlock: - unlock_task_sighand(p, &flags); -@@ -1119,13 +1122,13 @@ static inline int fastpath_timer_check(s +@@ -1135,14 +1138,12 @@ static inline int fastpath_timer_check(s * already updated our counts. We need to check if any timers fire now. * Interrupts are disabled. */ @@ -115,12 +97,12 @@ Signed-off-by: Thomas Gleixner struct k_itimer *timer, *next; unsigned long flags; -- WARN_ON_ONCE(!irqs_disabled()); -+ WARN_ON_ONCE_NONRT(!irqs_disabled()); - +- lockdep_assert_irqs_disabled(); +- /* * The fast path checks that there are no expired thread or thread -@@ -1179,6 +1182,152 @@ void run_posix_cpu_timers(struct task_st + * group timers. If that's so, just return. +@@ -1195,6 +1196,153 @@ void run_posix_cpu_timers(struct task_st } } @@ -266,6 +248,7 @@ Signed-off-by: Thomas Gleixner +#else /* CONFIG_PREEMPT_RT_BASE */ +void run_posix_cpu_timers(struct task_struct *tsk) +{ ++ lockdep_assert_irqs_disabled(); + __run_posix_cpu_timers(tsk); +} +#endif /* CONFIG_PREEMPT_RT_BASE */ diff --git a/debian/patches/features/all/rt/power-disable-highmem-on-rt.patch b/debian/patches/features/all/rt/power-disable-highmem-on-rt.patch index 9d0291efe..b49b432bf 100644 --- a/debian/patches/features/all/rt/power-disable-highmem-on-rt.patch +++ b/debian/patches/features/all/rt/power-disable-highmem-on-rt.patch @@ -1,7 +1,7 @@ Subject: powerpc: Disable highmem on RT From: Thomas Gleixner Date: Mon, 18 Jul 2011 17:08:34 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The current highmem handling on -RT is not compatible and needs fixups. @@ -12,7 +12,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig -@@ -390,7 +390,7 @@ menu "Kernel options" +@@ -394,7 +394,7 @@ menu "Kernel options" config HIGHMEM bool "High memory support" diff --git a/debian/patches/features/all/rt/power-use-generic-rwsem-on-rt.patch b/debian/patches/features/all/rt/power-use-generic-rwsem-on-rt.patch index 3dc9b9a51..b816c19da 100644 --- a/debian/patches/features/all/rt/power-use-generic-rwsem-on-rt.patch +++ b/debian/patches/features/all/rt/power-use-generic-rwsem-on-rt.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Tue, 14 Jul 2015 14:26:34 +0200 Subject: powerpc: Use generic rwsem on RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Use generic code which uses rtmutex diff --git a/debian/patches/features/all/rt/powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch b/debian/patches/features/all/rt/powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch index 76497f3a3..3c2dbf083 100644 --- a/debian/patches/features/all/rt/powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch +++ b/debian/patches/features/all/rt/powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch @@ -1,7 +1,7 @@ From: Bogdan Purcareata Date: Fri, 24 Apr 2015 15:53:13 +0000 Subject: powerpc/kvm: Disable in-kernel MPIC emulation for PREEMPT_RT_FULL -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz While converting the openpic emulation code to use a raw_spinlock_t enables guests to run on RT, there's still a performance issue. For interrupts sent in @@ -28,7 +28,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig -@@ -177,6 +177,7 @@ config KVM_E500MC +@@ -178,6 +178,7 @@ config KVM_E500MC config KVM_MPIC bool "KVM in-kernel MPIC emulation" depends on KVM && E500 diff --git a/debian/patches/features/all/rt/powerpc-preempt-lazy-support.patch b/debian/patches/features/all/rt/powerpc-preempt-lazy-support.patch index 0b5c81ee9..9e8bc7fdd 100644 --- a/debian/patches/features/all/rt/powerpc-preempt-lazy-support.patch +++ b/debian/patches/features/all/rt/powerpc-preempt-lazy-support.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Thu, 1 Nov 2012 10:14:11 +0100 Subject: powerpc: Add support for lazy preemption -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Implement the powerpc pieces for lazy preempt. @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig -@@ -215,6 +215,7 @@ config PPC +@@ -219,6 +219,7 @@ config PPC select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -35,7 +35,7 @@ Signed-off-by: Thomas Gleixner unsigned long local_flags; /* private flags for thread */ #ifdef CONFIG_LIVEPATCH unsigned long *livepatch_sp; -@@ -81,8 +83,7 @@ static inline struct thread_info *curren +@@ -78,8 +80,7 @@ static inline struct thread_info *curren #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ @@ -45,7 +45,7 @@ Signed-off-by: Thomas Gleixner #define TIF_32BIT 4 /* 32 bit binary */ #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */ #define TIF_PATCH_PENDING 6 /* pending live patching update */ -@@ -101,6 +102,8 @@ static inline struct thread_info *curren +@@ -98,6 +99,8 @@ static inline struct thread_info *curren #if defined(CONFIG_PPC64) #define TIF_ELF2ABI 18 /* function descriptors must die! */ #endif @@ -54,7 +54,7 @@ Signed-off-by: Thomas Gleixner /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1< beq restore_user --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S -@@ -689,7 +689,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEG +@@ -688,7 +688,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEG bl restore_math b restore #endif @@ -143,7 +143,7 @@ Signed-off-by: Thomas Gleixner beq 2f bl restore_interrupts SCHEDULE_USER -@@ -751,10 +751,18 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEG +@@ -750,10 +750,18 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEG #ifdef CONFIG_PREEMPT /* Check if we need to preempt */ @@ -160,10 +160,10 @@ Signed-off-by: Thomas Gleixner /* Check that preempt_count() == 0 and interrupts are enabled */ - lwz r8,TI_PREEMPT(r9) +check_count: - cmpwi cr1,r8,0 + cmpwi cr0,r8,0 + bne restore ld r0,SOFTE(r1) - cmpdi r0,0 -@@ -771,7 +779,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEG +@@ -770,7 +778,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEG /* Re-test flags and eventually loop */ CURRENT_THREAD_INFO(r9, r1) ld r4,TI_FLAGS(r9) diff --git a/debian/patches/features/all/rt/powerpc-ps3-device-init.c-adapt-to-completions-using.patch b/debian/patches/features/all/rt/powerpc-ps3-device-init.c-adapt-to-completions-using.patch index 08db14712..63872c396 100644 --- a/debian/patches/features/all/rt/powerpc-ps3-device-init.c-adapt-to-completions-using.patch +++ b/debian/patches/features/all/rt/powerpc-ps3-device-init.c-adapt-to-completions-using.patch @@ -1,7 +1,7 @@ From: Paul Gortmaker Date: Sun, 31 May 2015 14:44:42 -0400 Subject: powerpc: ps3/device-init.c - adapt to completions using swait vs wait -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz To fix: diff --git a/debian/patches/features/all/rt/preempt-lazy-support.patch b/debian/patches/features/all/rt/preempt-lazy-support.patch index 0a6583481..b9bd20c3f 100644 --- a/debian/patches/features/all/rt/preempt-lazy-support.patch +++ b/debian/patches/features/all/rt/preempt-lazy-support.patch @@ -1,7 +1,7 @@ Subject: sched: Add support for lazy preemption From: Thomas Gleixner Date: Fri, 26 Oct 2012 18:50:54 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz It has become an obsession to mitigate the determinism vs. throughput loss of RT. Looking at the mainline semantics of preemption points @@ -103,7 +103,7 @@ Signed-off-by: Thomas Gleixner #define sched_preempt_enable_no_resched() \ do { \ barrier(); \ -@@ -241,6 +261,13 @@ do { \ +@@ -250,6 +270,13 @@ do { \ __preempt_schedule(); \ } while (0) @@ -117,7 +117,7 @@ Signed-off-by: Thomas Gleixner #else /* !CONFIG_PREEMPT */ #define preempt_enable() \ do { \ -@@ -248,6 +275,12 @@ do { \ +@@ -257,6 +284,12 @@ do { \ preempt_count_dec(); \ } while (0) @@ -130,7 +130,7 @@ Signed-off-by: Thomas Gleixner #define preempt_enable_notrace() \ do { \ barrier(); \ -@@ -314,7 +347,7 @@ do { \ +@@ -323,7 +356,7 @@ do { \ } while (0) #define preempt_fold_need_resched() \ do { \ @@ -141,7 +141,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1600,6 +1600,44 @@ static inline int test_tsk_need_resched( +@@ -1628,6 +1628,44 @@ static inline int test_tsk_need_resched( return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } @@ -188,7 +188,7 @@ Signed-off-by: Thomas Gleixner if (task->state & (__TASK_STOPPED | __TASK_TRACED)) --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h -@@ -91,7 +91,17 @@ static inline int test_ti_thread_flag(st +@@ -90,7 +90,17 @@ static inline int test_ti_thread_flag(st #define test_thread_flag(flag) \ test_ti_thread_flag(current_thread_info(), flag) @@ -234,7 +234,7 @@ Signed-off-by: Thomas Gleixner default PREEMPT_NONE --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -518,6 +518,48 @@ void resched_curr(struct rq *rq) +@@ -520,6 +520,48 @@ void resched_curr(struct rq *rq) trace_sched_wake_idle_without_ipi(cpu); } @@ -283,7 +283,7 @@ Signed-off-by: Thomas Gleixner void resched_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); -@@ -2444,6 +2486,9 @@ int sched_fork(unsigned long clone_flags +@@ -2455,6 +2497,9 @@ int sched_fork(unsigned long clone_flags p->on_cpu = 0; #endif init_task_preempt_count(p); @@ -293,7 +293,7 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_SMP plist_node_init(&p->pushable_tasks, MAX_PRIO); RB_CLEAR_NODE(&p->pushable_dl_tasks); -@@ -3361,6 +3406,7 @@ static void __sched notrace __schedule(b +@@ -3437,6 +3482,7 @@ static void __sched notrace __schedule(b next = pick_next_task(rq, prev, &rf); clear_tsk_need_resched(prev); @@ -301,7 +301,7 @@ Signed-off-by: Thomas Gleixner clear_preempt_need_resched(); if (likely(prev != next)) { -@@ -3551,6 +3597,30 @@ static void __sched notrace preempt_sche +@@ -3626,6 +3672,30 @@ static void __sched notrace preempt_sche } while (need_resched()); } @@ -332,7 +332,7 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_PREEMPT /* * this is the entry point to schedule() from in-kernel preemption -@@ -3565,7 +3635,8 @@ asmlinkage __visible void __sched notrac +@@ -3640,7 +3710,8 @@ asmlinkage __visible void __sched notrac */ if (likely(!preemptible())) return; @@ -342,7 +342,7 @@ Signed-off-by: Thomas Gleixner preempt_schedule_common(); } NOKPROBE_SYMBOL(preempt_schedule); -@@ -3592,6 +3663,9 @@ asmlinkage __visible void __sched notrac +@@ -3667,6 +3738,9 @@ asmlinkage __visible void __sched notrac if (likely(!preemptible())) return; @@ -352,7 +352,7 @@ Signed-off-by: Thomas Gleixner do { /* * Because the function tracer can trace preempt_count_sub() -@@ -5331,7 +5405,9 @@ void init_idle(struct task_struct *idle, +@@ -5430,7 +5504,9 @@ void init_idle(struct task_struct *idle, /* Set the preempt count _outside_ the spinlocks! */ init_idle_preempt_count(idle, cpu); @@ -363,7 +363,7 @@ Signed-off-by: Thomas Gleixner /* * The idle tasks have their own, simple scheduling class: */ -@@ -6887,6 +6963,7 @@ void migrate_disable(void) +@@ -7146,6 +7222,7 @@ void migrate_disable(void) } preempt_disable(); @@ -371,7 +371,7 @@ Signed-off-by: Thomas Gleixner pin_current_cpu(); migrate_disable_update_cpus_allowed(p); -@@ -6954,6 +7031,7 @@ void migrate_enable(void) +@@ -7213,6 +7290,7 @@ void migrate_enable(void) arg.dest_cpu = dest_cpu; unpin_current_cpu(); @@ -379,7 +379,7 @@ Signed-off-by: Thomas Gleixner preempt_enable(); stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); tlb_migrate_finish(p->mm); -@@ -6962,6 +7040,7 @@ void migrate_enable(void) +@@ -7221,6 +7299,7 @@ void migrate_enable(void) } } unpin_current_cpu(); @@ -389,7 +389,7 @@ Signed-off-by: Thomas Gleixner EXPORT_SYMBOL(migrate_enable); --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c -@@ -3840,7 +3840,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq +@@ -4163,7 +4163,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq ideal_runtime = sched_slice(cfs_rq, curr); delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; if (delta_exec > ideal_runtime) { @@ -398,7 +398,7 @@ Signed-off-by: Thomas Gleixner /* * The current task ran long enough, ensure it doesn't get * re-elected due to buddy favours. -@@ -3864,7 +3864,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq +@@ -4187,7 +4187,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq return; if (delta > ideal_runtime) @@ -407,7 +407,7 @@ Signed-off-by: Thomas Gleixner } static void -@@ -4006,7 +4006,7 @@ entity_tick(struct cfs_rq *cfs_rq, struc +@@ -4329,7 +4329,7 @@ entity_tick(struct cfs_rq *cfs_rq, struc * validating it and just reschedule. */ if (queued) { @@ -416,7 +416,7 @@ Signed-off-by: Thomas Gleixner return; } /* -@@ -4188,7 +4188,7 @@ static void __account_cfs_rq_runtime(str +@@ -4511,7 +4511,7 @@ static void __account_cfs_rq_runtime(str * hierarchy can be throttled */ if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) @@ -425,7 +425,7 @@ Signed-off-by: Thomas Gleixner } static __always_inline -@@ -4837,7 +4837,7 @@ static void hrtick_start_fair(struct rq +@@ -5160,7 +5160,7 @@ static void hrtick_start_fair(struct rq if (delta < 0) { if (rq->curr == p) @@ -434,7 +434,7 @@ Signed-off-by: Thomas Gleixner return; } hrtick_start(rq, delta); -@@ -6230,7 +6230,7 @@ static void check_preempt_wakeup(struct +@@ -6620,7 +6620,7 @@ static void check_preempt_wakeup(struct return; preempt: @@ -443,7 +443,7 @@ Signed-off-by: Thomas Gleixner /* * Only set the backward buddy when the current task is still * on the rq. This can happen when a wakeup gets interleaved -@@ -9084,7 +9084,7 @@ static void task_fork_fair(struct task_s +@@ -9485,7 +9485,7 @@ static void task_fork_fair(struct task_s * 'current' within the tree based on its new key value. */ swap(curr->vruntime, se->vruntime); @@ -452,7 +452,7 @@ Signed-off-by: Thomas Gleixner } se->vruntime -= cfs_rq->min_vruntime; -@@ -9108,7 +9108,7 @@ prio_changed_fair(struct rq *rq, struct +@@ -9509,7 +9509,7 @@ prio_changed_fair(struct rq *rq, struct */ if (rq->curr == p) { if (p->prio > oldprio) @@ -475,7 +475,7 @@ Signed-off-by: Thomas Gleixner /* --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h -@@ -1544,6 +1544,15 @@ extern void init_sched_fair_class(void); +@@ -1556,6 +1556,15 @@ extern void reweight_task(struct task_st extern void resched_curr(struct rq *rq); extern void resched_cpu(int cpu); @@ -511,7 +511,7 @@ Signed-off-by: Thomas Gleixner (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0; -@@ -3341,15 +3343,17 @@ get_total_entries(struct trace_buffer *b +@@ -3336,15 +3338,17 @@ get_total_entries(struct trace_buffer *b static void print_lat_help_header(struct seq_file *m) { @@ -538,7 +538,7 @@ Signed-off-by: Thomas Gleixner } static void print_event_info(struct trace_buffer *buf, struct seq_file *m) -@@ -3385,15 +3389,17 @@ static void print_func_help_header_irq(s +@@ -3380,15 +3384,17 @@ static void print_func_help_header_irq(s tgid ? tgid_space : space); seq_printf(m, "# %s / _----=> need-resched\n", tgid ? tgid_space : space); diff --git a/debian/patches/features/all/rt/preempt-nort-rt-variants.patch b/debian/patches/features/all/rt/preempt-nort-rt-variants.patch index e39813fbd..a723ffc33 100644 --- a/debian/patches/features/all/rt/preempt-nort-rt-variants.patch +++ b/debian/patches/features/all/rt/preempt-nort-rt-variants.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Fri, 24 Jul 2009 12:38:56 +0200 Subject: preempt: Provide preempt_*_(no)rt variants -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz RT needs a few preempt_disable/enable points which are not necessary otherwise. Implement variants to avoid #ifdeffery. diff --git a/debian/patches/features/all/rt/printk-27force_early_printk-27-boot-param-to-help-with-debugging.patch b/debian/patches/features/all/rt/printk-27force_early_printk-27-boot-param-to-help-with-debugging.patch index a3ce7bdab..28140bc08 100644 --- a/debian/patches/features/all/rt/printk-27force_early_printk-27-boot-param-to-help-with-debugging.patch +++ b/debian/patches/features/all/rt/printk-27force_early_printk-27-boot-param-to-help-with-debugging.patch @@ -1,7 +1,7 @@ Subject: printk: Add "force_early_printk" boot param to help with debugging From: Peter Zijlstra Date: Fri, 02 Sep 2011 14:41:29 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Gives me an option to screw printk and actually see what the machine says. @@ -16,7 +16,7 @@ Link: http://lkml.kernel.org/n/tip-ykb97nsfmobq44xketrxs977@git.kernel.org --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -430,6 +430,13 @@ asmlinkage void early_printk(const char +@@ -434,6 +434,13 @@ asmlinkage void early_printk(const char */ static bool __read_mostly printk_killswitch; diff --git a/debian/patches/features/all/rt/printk-kill.patch b/debian/patches/features/all/rt/printk-kill.patch index 3c36ed992..54b398f9f 100644 --- a/debian/patches/features/all/rt/printk-kill.patch +++ b/debian/patches/features/all/rt/printk-kill.patch @@ -1,7 +1,7 @@ Subject: printk: Add a printk kill switch From: Ingo Molnar Date: Fri, 22 Jul 2011 17:58:40 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Add a prinkt-kill-switch. This is used from (NMI) watchdog to ensure that it does not dead-lock with the early printk code. @@ -15,7 +15,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/printk.h +++ b/include/linux/printk.h -@@ -142,9 +142,11 @@ struct va_format { +@@ -140,9 +140,11 @@ struct va_format { #ifdef CONFIG_EARLY_PRINTK extern asmlinkage __printf(1, 2) void early_printk(const char *fmt, ...); @@ -29,7 +29,7 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_PRINTK_NMI --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -400,6 +400,58 @@ DEFINE_RAW_SPINLOCK(logbuf_lock); +@@ -404,6 +404,58 @@ DEFINE_RAW_SPINLOCK(logbuf_lock); printk_safe_exit_irqrestore(flags); \ } while (0) @@ -88,7 +88,7 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_PRINTK DECLARE_WAIT_QUEUE_HEAD(log_wait); /* the next printk record to read by syslog(READ) or /proc/kmsg */ -@@ -1692,6 +1744,13 @@ asmlinkage int vprintk_emit(int facility +@@ -1836,6 +1888,13 @@ asmlinkage int vprintk_emit(int facility int printed_len; bool in_sched = false; @@ -102,7 +102,7 @@ Signed-off-by: Thomas Gleixner if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; in_sched = true; -@@ -1863,26 +1922,6 @@ static bool suppress_message_printing(in +@@ -2016,26 +2075,6 @@ static bool suppress_message_printing(in #endif /* CONFIG_PRINTK */ diff --git a/debian/patches/features/all/rt/printk-rt-aware.patch b/debian/patches/features/all/rt/printk-rt-aware.patch index c68c4e606..e905b2daa 100644 --- a/debian/patches/features/all/rt/printk-rt-aware.patch +++ b/debian/patches/features/all/rt/printk-rt-aware.patch @@ -1,19 +1,43 @@ Subject: printk: Make rt aware From: Thomas Gleixner Date: Wed, 19 Sep 2012 14:50:37 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Drop the lock before calling the console driver and do not disable interrupts while printing to a serial console. Signed-off-by: Thomas Gleixner --- - kernel/printk/printk.c | 19 ++++++++++++++++++- - 1 file changed, 18 insertions(+), 1 deletion(-) + kernel/printk/printk.c | 36 +++++++++++++++++++++++++++++++++--- + 1 file changed, 33 insertions(+), 3 deletions(-) --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -1617,6 +1617,7 @@ static void call_console_drivers(const c +@@ -1606,6 +1606,7 @@ SYSCALL_DEFINE3(syslog, int, type, char + return do_syslog(type, buf, len, SYSLOG_FROM_READER); + } + ++#ifndef CONFIG_PREEMPT_RT_FULL + /* + * Special console_lock variants that help to reduce the risk of soft-lockups. + * They allow to pass console_lock to another printk() call using a busy wait. +@@ -1746,6 +1747,15 @@ static int console_trylock_spinning(void + return 1; + } + ++#else ++ ++static int console_trylock_spinning(void) ++{ ++ return console_trylock(); ++} ++ ++#endif ++ + /* + * Call the console drivers, asking them to write out + * log_buf[start] to log_buf[end - 1]. +@@ -1761,6 +1771,7 @@ static void call_console_drivers(const c if (!console_drivers) return; @@ -21,7 +45,7 @@ Signed-off-by: Thomas Gleixner for_each_console(con) { if (exclusive_console && con != exclusive_console) continue; -@@ -1632,6 +1633,7 @@ static void call_console_drivers(const c +@@ -1776,6 +1787,7 @@ static void call_console_drivers(const c else con->write(con, text, len); } @@ -29,7 +53,7 @@ Signed-off-by: Thomas Gleixner } int printk_delay_msec __read_mostly; -@@ -1814,12 +1816,22 @@ asmlinkage int vprintk_emit(int facility +@@ -1958,20 +1970,31 @@ asmlinkage int vprintk_emit(int facility /* If called from the scheduler, we can not call up(). */ if (!in_sched) { @@ -43,17 +67,28 @@ Signed-off-by: Thomas Gleixner + if (!(preempt_count() == 0 && !irqs_disabled())) + may_trylock = 0; +#endif ++ + /* + * Disable preemption to avoid being preempted while holding + * console_sem which would prevent anyone from printing to + * console + */ +- preempt_disable(); ++ migrate_disable(); /* * Try to acquire and then immediately release the console * semaphore. The release will print out buffers and wake up * /dev/kmsg and syslog() users. */ -- if (console_trylock()) -+ if (may_trylock && console_trylock()) +- if (console_trylock_spinning()) ++ if (may_trylock && console_trylock_spinning()) console_unlock(); +- preempt_enable(); ++ migrate_enable(); } -@@ -2275,10 +2287,15 @@ void console_unlock(void) + return printed_len; +@@ -2429,6 +2452,10 @@ void console_unlock(void) console_seq++; raw_spin_unlock(&logbuf_lock); @@ -61,11 +96,24 @@ Signed-off-by: Thomas Gleixner + printk_safe_exit_irqrestore(flags); + call_console_drivers(ext_text, ext_len, text, len); +#else - stop_critical_timings(); /* don't trace print latency */ - call_console_drivers(ext_text, ext_len, text, len); - start_critical_timings(); + /* + * While actively printing out messages, if another printk() + * were to occur on another CPU, it may wait for this one to +@@ -2447,6 +2474,7 @@ void console_unlock(void) + } + printk_safe_exit_irqrestore(flags); +#endif if (do_cond_resched) cond_resched(); +@@ -2476,7 +2504,9 @@ void console_unlock(void) + if (retry && console_trylock()) + goto again; + ++#ifndef CONFIG_PREEMPT_RT_FULL + out: ++#endif + if (wake_klogd) + wake_up_klogd(); + } diff --git a/debian/patches/features/all/rt/ptrace-fix-ptrace-vs-tasklist_lock-race.patch b/debian/patches/features/all/rt/ptrace-fix-ptrace-vs-tasklist_lock-race.patch index c419311bd..e5c4e2389 100644 --- a/debian/patches/features/all/rt/ptrace-fix-ptrace-vs-tasklist_lock-race.patch +++ b/debian/patches/features/all/rt/ptrace-fix-ptrace-vs-tasklist_lock-race.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 29 Aug 2013 18:21:04 +0200 Subject: ptrace: fix ptrace vs tasklist_lock race -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz As explained by Alexander Fyodorov : @@ -44,7 +44,7 @@ Signed-off-by: Sebastian Andrzej Siewior #define task_contributes_to_load(task) ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ (task->flags & PF_FROZEN) == 0 && \ (task->state & TASK_NOLOAD) == 0) -@@ -1593,6 +1589,51 @@ static inline int test_tsk_need_resched( +@@ -1612,6 +1608,51 @@ static inline int test_tsk_need_resched( return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } @@ -116,7 +116,7 @@ Signed-off-by: Sebastian Andrzej Siewior spin_unlock_irq(&task->sighand->siglock); --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -1358,6 +1358,18 @@ int migrate_swap(struct task_struct *cur +@@ -1369,6 +1369,18 @@ int migrate_swap(struct task_struct *cur return ret; } @@ -135,7 +135,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * wait_task_inactive - wait for a thread to unschedule. * -@@ -1402,7 +1414,7 @@ unsigned long wait_task_inactive(struct +@@ -1413,7 +1425,7 @@ unsigned long wait_task_inactive(struct * is actually now running somewhere else! */ while (task_running(rq, p)) { @@ -144,7 +144,7 @@ Signed-off-by: Sebastian Andrzej Siewior return 0; cpu_relax(); } -@@ -1417,7 +1429,8 @@ unsigned long wait_task_inactive(struct +@@ -1428,7 +1440,8 @@ unsigned long wait_task_inactive(struct running = task_running(rq, p); queued = task_on_rq_queued(p); ncsw = 0; diff --git a/debian/patches/features/all/rt/radix-tree-use-local-locks.patch b/debian/patches/features/all/rt/radix-tree-use-local-locks.patch index e21785244..23030d2ad 100644 --- a/debian/patches/features/all/rt/radix-tree-use-local-locks.patch +++ b/debian/patches/features/all/rt/radix-tree-use-local-locks.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 25 Jan 2017 16:34:27 +0100 Subject: [PATCH] radix-tree: use local locks -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The preload functionality uses per-CPU variables and preempt-disable to ensure that it does not switch CPUs during its usage. This patch adds @@ -19,7 +19,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/linux/idr.h +++ b/include/linux/idr.h -@@ -167,10 +167,7 @@ static inline bool idr_is_empty(const st +@@ -156,10 +156,7 @@ static inline bool idr_is_empty(const st * Each idr_preload() should be matched with an invocation of this * function. See idr_preload() for details. */ @@ -30,10 +30,10 @@ Signed-off-by: Sebastian Andrzej Siewior +void idr_preload_end(void); /** - * idr_find - return pointer for given id + * idr_for_each_entry() - Iterate over an IDR's elements of a given type. --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h -@@ -328,6 +328,8 @@ unsigned int radix_tree_gang_lookup_slot +@@ -326,6 +326,8 @@ unsigned int radix_tree_gang_lookup_slot int radix_tree_preload(gfp_t gfp_mask); int radix_tree_maybe_preload(gfp_t gfp_mask); int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order); @@ -42,7 +42,7 @@ Signed-off-by: Sebastian Andrzej Siewior void radix_tree_init(void); void *radix_tree_tag_set(struct radix_tree_root *, unsigned long index, unsigned int tag); -@@ -347,11 +349,6 @@ unsigned int radix_tree_gang_lookup_tag_ +@@ -345,11 +347,6 @@ unsigned int radix_tree_gang_lookup_tag_ unsigned int max_items, unsigned int tag); int radix_tree_tagged(const struct radix_tree_root *, unsigned int tag); @@ -56,7 +56,7 @@ Signed-off-by: Sebastian Andrzej Siewior unsigned new_order); --- a/lib/radix-tree.c +++ b/lib/radix-tree.c -@@ -37,7 +37,7 @@ +@@ -38,7 +38,7 @@ #include #include #include @@ -65,7 +65,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* Number of nodes in fully populated tree of given height */ static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly; -@@ -86,6 +86,7 @@ struct radix_tree_preload { +@@ -87,6 +87,7 @@ struct radix_tree_preload { struct radix_tree_node *nodes; }; static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; @@ -73,7 +73,7 @@ Signed-off-by: Sebastian Andrzej Siewior static inline struct radix_tree_node *entry_to_node(void *ptr) { -@@ -404,12 +405,13 @@ radix_tree_node_alloc(gfp_t gfp_mask, st +@@ -405,12 +406,13 @@ radix_tree_node_alloc(gfp_t gfp_mask, st * succeed in getting a node here (and never reach * kmem_cache_alloc) */ @@ -88,7 +88,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Update the allocation stack trace as this is more useful * for debugging. -@@ -475,14 +477,14 @@ static __must_check int __radix_tree_pre +@@ -476,14 +478,14 @@ static __must_check int __radix_tree_pre */ gfp_mask &= ~__GFP_ACCOUNT; @@ -106,7 +106,7 @@ Signed-off-by: Sebastian Andrzej Siewior rtp = this_cpu_ptr(&radix_tree_preloads); if (rtp->nr < nr) { node->parent = rtp->nodes; -@@ -524,7 +526,7 @@ int radix_tree_maybe_preload(gfp_t gfp_m +@@ -525,7 +527,7 @@ int radix_tree_maybe_preload(gfp_t gfp_m if (gfpflags_allow_blocking(gfp_mask)) return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); /* Preloading doesn't help anything with this gfp mask, skip it */ @@ -115,7 +115,7 @@ Signed-off-by: Sebastian Andrzej Siewior return 0; } EXPORT_SYMBOL(radix_tree_maybe_preload); -@@ -562,7 +564,7 @@ int radix_tree_maybe_preload_order(gfp_t +@@ -563,7 +565,7 @@ int radix_tree_maybe_preload_order(gfp_t /* Preloading doesn't help anything with this gfp mask, skip it */ if (!gfpflags_allow_blocking(gfp_mask)) { @@ -124,7 +124,7 @@ Signed-off-by: Sebastian Andrzej Siewior return 0; } -@@ -596,6 +598,12 @@ int radix_tree_maybe_preload_order(gfp_t +@@ -597,6 +599,12 @@ int radix_tree_maybe_preload_order(gfp_t return __radix_tree_preload(gfp_mask, nr_nodes); } @@ -137,7 +137,7 @@ Signed-off-by: Sebastian Andrzej Siewior static unsigned radix_tree_load_root(const struct radix_tree_root *root, struct radix_tree_node **nodep, unsigned long *maxindex) { -@@ -2105,10 +2113,16 @@ EXPORT_SYMBOL(radix_tree_tagged); +@@ -2102,10 +2110,16 @@ EXPORT_SYMBOL(radix_tree_tagged); void idr_preload(gfp_t gfp_mask) { if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE)) @@ -155,7 +155,7 @@ Signed-off-by: Sebastian Andrzej Siewior /** * ida_pre_get - reserve resources for ida allocation * @ida: ida handle -@@ -2125,7 +2139,7 @@ int ida_pre_get(struct ida *ida, gfp_t g +@@ -2122,7 +2136,7 @@ int ida_pre_get(struct ida *ida, gfp_t g * to return to the ida_pre_get() step. */ if (!__radix_tree_preload(gfp, IDA_PRELOAD_SIZE)) @@ -163,4 +163,4 @@ Signed-off-by: Sebastian Andrzej Siewior + local_unlock(radix_tree_preloads_lock); if (!this_cpu_read(ida_bitmap)) { - struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp); + struct ida_bitmap *bitmap = kzalloc(sizeof(*bitmap), gfp); diff --git a/debian/patches/features/all/rt/random-avoid-preempt_disable-ed-section.patch b/debian/patches/features/all/rt/random-avoid-preempt_disable-ed-section.patch index 2899902ff..5adc36a96 100644 --- a/debian/patches/features/all/rt/random-avoid-preempt_disable-ed-section.patch +++ b/debian/patches/features/all/rt/random-avoid-preempt_disable-ed-section.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Fri, 12 May 2017 15:46:17 +0200 Subject: [PATCH] random: avoid preempt_disable()ed section -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz extract_crng() will use sleeping locks while in a preempt_disable() section due to get_cpu_var(). @@ -23,7 +23,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include #include -@@ -2087,6 +2088,7 @@ static rwlock_t batched_entropy_reset_lo +@@ -2188,6 +2189,7 @@ static rwlock_t batched_entropy_reset_lo * at any point prior. */ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64); @@ -31,7 +31,7 @@ Signed-off-by: Sebastian Andrzej Siewior u64 get_random_u64(void) { u64 ret; -@@ -2107,7 +2109,7 @@ u64 get_random_u64(void) +@@ -2208,7 +2210,7 @@ u64 get_random_u64(void) warn_unseeded_randomness(&previous); use_lock = READ_ONCE(crng_init) < 2; @@ -40,7 +40,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (use_lock) read_lock_irqsave(&batched_entropy_reset_lock, flags); if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) { -@@ -2117,12 +2119,13 @@ u64 get_random_u64(void) +@@ -2218,12 +2220,13 @@ u64 get_random_u64(void) ret = batch->entropy_u64[batch->position++]; if (use_lock) read_unlock_irqrestore(&batched_entropy_reset_lock, flags); @@ -55,7 +55,7 @@ Signed-off-by: Sebastian Andrzej Siewior u32 get_random_u32(void) { u32 ret; -@@ -2137,7 +2140,7 @@ u32 get_random_u32(void) +@@ -2238,7 +2241,7 @@ u32 get_random_u32(void) warn_unseeded_randomness(&previous); use_lock = READ_ONCE(crng_init) < 2; @@ -64,7 +64,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (use_lock) read_lock_irqsave(&batched_entropy_reset_lock, flags); if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) { -@@ -2147,7 +2150,7 @@ u32 get_random_u32(void) +@@ -2248,7 +2251,7 @@ u32 get_random_u32(void) ret = batch->entropy_u32[batch->position++]; if (use_lock) read_unlock_irqrestore(&batched_entropy_reset_lock, flags); diff --git a/debian/patches/features/all/rt/random-make-it-work-on-rt.patch b/debian/patches/features/all/rt/random-make-it-work-on-rt.patch index 29771727a..5556dddfe 100644 --- a/debian/patches/features/all/rt/random-make-it-work-on-rt.patch +++ b/debian/patches/features/all/rt/random-make-it-work-on-rt.patch @@ -1,7 +1,7 @@ Subject: random: Make it work on rt From: Thomas Gleixner Date: Tue, 21 Aug 2012 20:38:50 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Delegate the random insertion to the forced threaded interrupt handler. Store the return IP of the hard interrupt handler in the irq @@ -21,7 +21,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/char/random.c +++ b/drivers/char/random.c -@@ -1113,28 +1113,27 @@ static __u32 get_reg(struct fast_pool *f +@@ -1218,28 +1218,27 @@ static __u32 get_reg(struct fast_pool *f return *ptr; } @@ -57,7 +57,7 @@ Signed-off-by: Thomas Gleixner add_interrupt_bench(cycles); --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c -@@ -966,6 +966,8 @@ static void vmbus_isr(void) +@@ -973,6 +973,8 @@ static void vmbus_isr(void) void *page_addr = hv_cpu->synic_event_page; struct hv_message *msg; union hv_synic_event_flags *event; @@ -66,7 +66,7 @@ Signed-off-by: Thomas Gleixner bool handled = false; if (unlikely(page_addr == NULL)) -@@ -1009,7 +1011,7 @@ static void vmbus_isr(void) +@@ -1016,7 +1018,7 @@ static void vmbus_isr(void) tasklet_schedule(&hv_cpu->msg_dpc); } @@ -118,7 +118,7 @@ Signed-off-by: Thomas Gleixner note_interrupt(desc, retval); --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c -@@ -1027,6 +1027,12 @@ static int irq_thread(void *data) +@@ -1029,6 +1029,12 @@ static int irq_thread(void *data) if (action_ret == IRQ_WAKE_THREAD) irq_wake_secondary(desc, action); diff --git a/debian/patches/features/all/rt/rbtree-include-rcu.h-because-we-use-it.patch b/debian/patches/features/all/rt/rbtree-include-rcu.h-because-we-use-it.patch index 9cc5e4268..74ddc8abf 100644 --- a/debian/patches/features/all/rt/rbtree-include-rcu.h-because-we-use-it.patch +++ b/debian/patches/features/all/rt/rbtree-include-rcu.h-because-we-use-it.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 14 Sep 2016 11:52:17 +0200 Subject: rbtree: include rcu.h because we use it -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Since commit c1adf20052d8 ("Introduce rb_replace_node_rcu()") rbtree_augmented.h uses RCU related data structures but does not include diff --git a/debian/patches/features/all/rt/rcu-Eliminate-softirq-processing-from-rcutree.patch b/debian/patches/features/all/rt/rcu-Eliminate-softirq-processing-from-rcutree.patch index 8e60233c6..4ea200886 100644 --- a/debian/patches/features/all/rt/rcu-Eliminate-softirq-processing-from-rcutree.patch +++ b/debian/patches/features/all/rt/rcu-Eliminate-softirq-processing-from-rcutree.patch @@ -1,7 +1,7 @@ From: "Paul E. McKenney" Date: Mon, 4 Nov 2013 13:21:10 -0800 Subject: rcu: Eliminate softirq processing from rcutree -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Running RCU out of softirq is a problem for some workloads that would like to manage RCU core processing independently of other softirq work, @@ -17,14 +17,14 @@ Tested-by: Mike Galbraith Signed-off-by: Paul E. McKenney Signed-off-by: Sebastian Andrzej Siewior --- - kernel/rcu/tree.c | 110 ++++++++++++++++++++++++++++++---- + kernel/rcu/tree.c | 112 +++++++++++++++++++++++++++++++--- kernel/rcu/tree.h | 5 - - kernel/rcu/tree_plugin.h | 152 ++++------------------------------------------- - 3 files changed, 114 insertions(+), 153 deletions(-) + kernel/rcu/tree_plugin.h | 151 +++-------------------------------------------- + 3 files changed, 114 insertions(+), 154 deletions(-) --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c -@@ -58,6 +58,11 @@ +@@ -58,6 +58,13 @@ #include #include #include @@ -32,11 +32,13 @@ Signed-off-by: Sebastian Andrzej Siewior +#include +#include +#include ++#include ++#include +#include "../time/tick-internal.h" #include "tree.h" #include "rcu.h" -@@ -2946,18 +2951,17 @@ static void +@@ -2934,18 +2941,17 @@ static void /* * Do RCU core processing for the current CPU. */ @@ -57,7 +59,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Schedule RCU callback invocation. If the specified type of RCU * does not support RCU priority boosting, just do a direct call, -@@ -2969,18 +2973,105 @@ static void invoke_rcu_callbacks(struct +@@ -2957,18 +2963,105 @@ static void invoke_rcu_callbacks(struct { if (unlikely(!READ_ONCE(rcu_scheduler_fully_active))) return; @@ -169,7 +171,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Handle any core-RCU processing required by a call_rcu() invocation. -@@ -4221,7 +4312,6 @@ void __init rcu_init(void) +@@ -4238,7 +4331,6 @@ void __init rcu_init(void) if (dump_tree) rcu_dump_rcu_node_tree(&rcu_sched_state); __rcu_init_preempt(); @@ -179,7 +181,7 @@ Signed-off-by: Sebastian Andrzej Siewior * We don't need protection against CPU-hotplug here because --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h -@@ -438,12 +438,10 @@ extern struct rcu_state rcu_preempt_stat +@@ -442,12 +442,10 @@ extern struct rcu_state rcu_preempt_stat int rcu_dynticks_snap(struct rcu_dynticks *rdtp); bool rcu_eqs_special_set(int cpu); @@ -192,7 +194,7 @@ Signed-off-by: Sebastian Andrzej Siewior #ifndef RCU_TREE_NONCORE -@@ -463,10 +461,9 @@ void call_rcu(struct rcu_head *head, rcu +@@ -467,10 +465,9 @@ void call_rcu(struct rcu_head *head, rcu static void __init __rcu_init_preempt(void); static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); @@ -206,7 +208,7 @@ Signed-off-by: Sebastian Andrzej Siewior #endif /* #ifdef CONFIG_RCU_BOOST */ --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h -@@ -24,39 +24,16 @@ +@@ -24,42 +24,16 @@ * Paul E. McKenney */ @@ -216,12 +218,14 @@ Signed-off-by: Sebastian Andrzej Siewior -#include -#include -#include +-#include -#include -#include "../time/tick-internal.h" - #include "../locking/rtmutex_common.h" - +- -#ifdef CONFIG_RCU_BOOST - + #include "../locking/rtmutex_common.h" + /* * Control variables for per-CPU and per-rcu_node kthreads. These * handle all flavors of RCU. @@ -240,13 +244,14 @@ Signed-off-by: Sebastian Andrzej Siewior - * This probably needs to be excluded from -rt builds. - */ -#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; }) +-#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1) - -#endif /* #else #ifdef CONFIG_RCU_BOOST */ - #ifdef CONFIG_RCU_NOCB_CPU static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ - static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ -@@ -682,15 +659,6 @@ static void rcu_preempt_check_callbacks( + static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */ +@@ -684,15 +658,6 @@ static void rcu_preempt_check_callbacks( t->rcu_read_unlock_special.b.need_qs = true; } @@ -262,7 +267,7 @@ Signed-off-by: Sebastian Andrzej Siewior /** * call_rcu() - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. -@@ -913,20 +881,23 @@ void exit_rcu(void) +@@ -915,18 +880,21 @@ void exit_rcu(void) #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ @@ -274,8 +279,6 @@ Signed-off-by: Sebastian Andrzej Siewior #ifdef CONFIG_RCU_BOOST + struct sched_param sp; --#include "../locking/rtmutex_common.h" -- -static void rcu_wake_cond(struct task_struct *t, int status) -{ - /* @@ -290,13 +293,11 @@ Signed-off-by: Sebastian Andrzej Siewior } +#ifdef CONFIG_RCU_BOOST -+ -+#include "../locking/rtmutex_common.h" + /* * Carry out RCU priority boosting on the task indicated by ->exp_tasks * or ->boost_tasks, advancing the pointer to the next task in the -@@ -1069,23 +1040,6 @@ static void rcu_initiate_boost(struct rc +@@ -1069,23 +1037,6 @@ static void rcu_initiate_boost(struct rc } /* @@ -320,7 +321,7 @@ Signed-off-by: Sebastian Andrzej Siewior * Is the current CPU running the RCU-callbacks kthread? * Caller must have preemption disabled. */ -@@ -1139,67 +1093,6 @@ static int rcu_spawn_one_boost_kthread(s +@@ -1139,67 +1090,6 @@ static int rcu_spawn_one_boost_kthread(s return 0; } @@ -388,7 +389,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Set the per-rcu_node kthread's affinity to cover all CPUs that are * served by the rcu_node in question. The CPU hotplug lock is still -@@ -1230,26 +1123,12 @@ static void rcu_boost_kthread_setaffinit +@@ -1230,26 +1120,12 @@ static void rcu_boost_kthread_setaffinit free_cpumask_var(cm); } @@ -415,7 +416,7 @@ Signed-off-by: Sebastian Andrzej Siewior rcu_for_each_leaf_node(rcu_state_p, rnp) (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); } -@@ -1272,11 +1151,6 @@ static void rcu_initiate_boost(struct rc +@@ -1272,11 +1148,6 @@ static void rcu_initiate_boost(struct rc raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } diff --git a/debian/patches/features/all/rt/rcu-disable-rcu-fast-no-hz-on-rt.patch b/debian/patches/features/all/rt/rcu-disable-rcu-fast-no-hz-on-rt.patch index 7793ff750..460dda33a 100644 --- a/debian/patches/features/all/rt/rcu-disable-rcu-fast-no-hz-on-rt.patch +++ b/debian/patches/features/all/rt/rcu-disable-rcu-fast-no-hz-on-rt.patch @@ -1,7 +1,7 @@ Subject: rcu: Disable RCU_FAST_NO_HZ on RT From: Thomas Gleixner Date: Sun, 28 Oct 2012 13:26:09 +0000 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz This uses a timer_list timer from the irq disabled guts of the idle code. Disable it for now to prevent wreckage. diff --git a/debian/patches/features/all/rt/rcu-enable-rcu_normal_after_boot-by-default-for-RT.patch b/debian/patches/features/all/rt/rcu-enable-rcu_normal_after_boot-by-default-for-RT.patch index 877ca2619..0d7c3c2a5 100644 --- a/debian/patches/features/all/rt/rcu-enable-rcu_normal_after_boot-by-default-for-RT.patch +++ b/debian/patches/features/all/rt/rcu-enable-rcu_normal_after_boot-by-default-for-RT.patch @@ -1,7 +1,7 @@ From: Julia Cartwright Date: Wed, 12 Oct 2016 11:21:14 -0500 Subject: [PATCH] rcu: enable rcu_normal_after_boot by default for RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The forcing of an expedited grace period is an expensive and very RT-application unfriendly operation, as it forcibly preempts all running @@ -19,7 +19,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c -@@ -66,7 +66,7 @@ extern int rcu_expedited; /* from sysctl +@@ -67,7 +67,7 @@ extern int rcu_expedited; /* from sysctl module_param(rcu_expedited, int, 0); extern int rcu_normal; /* from sysctl */ module_param(rcu_normal, int, 0); diff --git a/debian/patches/features/all/rt/rcu-make-RCU_BOOST-default-on-RT.patch b/debian/patches/features/all/rt/rcu-make-RCU_BOOST-default-on-RT.patch index b4e52785a..1679cd776 100644 --- a/debian/patches/features/all/rt/rcu-make-RCU_BOOST-default-on-RT.patch +++ b/debian/patches/features/all/rt/rcu-make-RCU_BOOST-default-on-RT.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Fri, 21 Mar 2014 20:19:05 +0100 Subject: rcu: make RCU_BOOST default on RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Since it is no longer invoked from the softirq people run into OOM more often if the priority of the RCU thread is too low. Making boosting diff --git a/debian/patches/features/all/rt/rcu-merge-rcu-bh-into-rcu-preempt-for-rt.patch b/debian/patches/features/all/rt/rcu-merge-rcu-bh-into-rcu-preempt-for-rt.patch index 04456a3bb..902613533 100644 --- a/debian/patches/features/all/rt/rcu-merge-rcu-bh-into-rcu-preempt-for-rt.patch +++ b/debian/patches/features/all/rt/rcu-merge-rcu-bh-into-rcu-preempt-for-rt.patch @@ -1,7 +1,7 @@ Subject: rcu: Merge RCU-bh into RCU-preempt Date: Wed, 5 Oct 2011 11:59:38 -0700 From: Thomas Gleixner -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The Linux kernel has long RCU-bh read-side critical sections that intolerably increase scheduling latency under mainline's RCU-bh rules, @@ -75,7 +75,7 @@ Signed-off-by: Thomas Gleixner int rcu_read_lock_sched_held(void); #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -@@ -667,10 +682,14 @@ static inline void rcu_read_unlock(void) +@@ -666,10 +681,14 @@ static inline void rcu_read_unlock(void) static inline void rcu_read_lock_bh(void) { local_bh_disable(); @@ -90,7 +90,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -680,10 +699,14 @@ static inline void rcu_read_lock_bh(void +@@ -679,10 +698,14 @@ static inline void rcu_read_lock_bh(void */ static inline void rcu_read_unlock_bh(void) { @@ -133,7 +133,7 @@ Signed-off-by: Thomas Gleixner void cond_synchronize_rcu(unsigned long oldstate); --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h -@@ -462,18 +462,26 @@ static inline void show_rcu_gp_kthreads( +@@ -458,18 +458,26 @@ static inline void show_rcu_gp_kthreads( extern unsigned long rcutorture_testseq; extern unsigned long rcutorture_vernum; unsigned long rcu_batches_started(void); @@ -165,7 +165,7 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_RCU_NOCB_CPU --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c -@@ -417,6 +417,7 @@ static struct rcu_torture_ops rcu_ops = +@@ -413,6 +413,7 @@ static struct rcu_torture_ops rcu_ops = .name = "rcu" }; @@ -173,7 +173,7 @@ Signed-off-by: Thomas Gleixner /* * Definitions for rcu_bh torture testing. */ -@@ -456,6 +457,12 @@ static struct rcu_torture_ops rcu_bh_ops +@@ -452,6 +453,12 @@ static struct rcu_torture_ops rcu_bh_ops .name = "rcu_bh" }; @@ -204,7 +204,7 @@ Signed-off-by: Thomas Gleixner /* * Steal a bit from the bottom of ->dynticks for idle entry/exit -@@ -564,11 +566,13 @@ EXPORT_SYMBOL_GPL(rcu_batches_started_sc +@@ -551,11 +553,13 @@ EXPORT_SYMBOL_GPL(rcu_batches_started_sc /* * Return the number of RCU BH batches started thus far for debug & stats. */ @@ -218,7 +218,7 @@ Signed-off-by: Thomas Gleixner /* * Return the number of RCU batches completed thus far for debug & stats. -@@ -588,6 +592,7 @@ unsigned long rcu_batches_completed_sche +@@ -575,6 +579,7 @@ unsigned long rcu_batches_completed_sche } EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); @@ -226,7 +226,7 @@ Signed-off-by: Thomas Gleixner /* * Return the number of RCU BH batches completed thus far for debug & stats. */ -@@ -596,6 +601,7 @@ unsigned long rcu_batches_completed_bh(v +@@ -583,6 +588,7 @@ unsigned long rcu_batches_completed_bh(v return rcu_bh_state.completed; } EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); @@ -234,7 +234,7 @@ Signed-off-by: Thomas Gleixner /* * Return the number of RCU expedited batches completed thus far for -@@ -619,6 +625,7 @@ unsigned long rcu_exp_batches_completed_ +@@ -606,6 +612,7 @@ unsigned long rcu_exp_batches_completed_ } EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched); @@ -242,7 +242,7 @@ Signed-off-by: Thomas Gleixner /* * Force a quiescent state. */ -@@ -637,6 +644,13 @@ void rcu_bh_force_quiescent_state(void) +@@ -624,6 +631,13 @@ void rcu_bh_force_quiescent_state(void) } EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); @@ -256,7 +256,7 @@ Signed-off-by: Thomas Gleixner /* * Force a quiescent state for RCU-sched. */ -@@ -687,9 +701,11 @@ void rcutorture_get_gp_data(enum rcutort +@@ -674,9 +688,11 @@ void rcutorture_get_gp_data(enum rcutort case RCU_FLAVOR: rsp = rcu_state_p; break; @@ -268,7 +268,7 @@ Signed-off-by: Thomas Gleixner case RCU_SCHED_FLAVOR: rsp = &rcu_sched_state; break; -@@ -3113,6 +3129,7 @@ void call_rcu_sched(struct rcu_head *hea +@@ -3101,6 +3117,7 @@ void call_rcu_sched(struct rcu_head *hea } EXPORT_SYMBOL_GPL(call_rcu_sched); @@ -276,7 +276,7 @@ Signed-off-by: Thomas Gleixner /** * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. -@@ -3140,6 +3157,7 @@ void call_rcu_bh(struct rcu_head *head, +@@ -3128,6 +3145,7 @@ void call_rcu_bh(struct rcu_head *head, __call_rcu(head, func, &rcu_bh_state, -1, 0); } EXPORT_SYMBOL_GPL(call_rcu_bh); @@ -284,7 +284,7 @@ Signed-off-by: Thomas Gleixner /* * Queue an RCU callback for lazy invocation after a grace period. -@@ -3225,6 +3243,7 @@ void synchronize_sched(void) +@@ -3213,6 +3231,7 @@ void synchronize_sched(void) } EXPORT_SYMBOL_GPL(synchronize_sched); @@ -292,7 +292,7 @@ Signed-off-by: Thomas Gleixner /** * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. * -@@ -3251,6 +3270,7 @@ void synchronize_rcu_bh(void) +@@ -3239,6 +3258,7 @@ void synchronize_rcu_bh(void) wait_rcu_gp(call_rcu_bh); } EXPORT_SYMBOL_GPL(synchronize_rcu_bh); @@ -300,7 +300,7 @@ Signed-off-by: Thomas Gleixner /** * get_state_synchronize_rcu - Snapshot current RCU state -@@ -3601,6 +3621,7 @@ static void _rcu_barrier(struct rcu_stat +@@ -3589,6 +3609,7 @@ static void _rcu_barrier(struct rcu_stat mutex_unlock(&rsp->barrier_mutex); } @@ -308,7 +308,7 @@ Signed-off-by: Thomas Gleixner /** * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. */ -@@ -3609,6 +3630,7 @@ void rcu_barrier_bh(void) +@@ -3597,6 +3618,7 @@ void rcu_barrier_bh(void) _rcu_barrier(&rcu_bh_state); } EXPORT_SYMBOL_GPL(rcu_barrier_bh); @@ -316,7 +316,7 @@ Signed-off-by: Thomas Gleixner /** * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. -@@ -4184,7 +4206,9 @@ void __init rcu_init(void) +@@ -4201,7 +4223,9 @@ void __init rcu_init(void) rcu_bootup_announce(); rcu_init_geometry(); @@ -328,7 +328,7 @@ Signed-off-by: Thomas Gleixner rcu_dump_rcu_node_tree(&rcu_sched_state); --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h -@@ -427,7 +427,9 @@ extern struct list_head rcu_struct_flavo +@@ -431,7 +431,9 @@ extern struct list_head rcu_struct_flavo */ extern struct rcu_state rcu_sched_state; @@ -340,7 +340,7 @@ Signed-off-by: Thomas Gleixner extern struct rcu_state rcu_preempt_state; --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c -@@ -333,6 +333,7 @@ int rcu_read_lock_held(void) +@@ -334,6 +334,7 @@ int rcu_read_lock_held(void) } EXPORT_SYMBOL_GPL(rcu_read_lock_held); @@ -348,7 +348,7 @@ Signed-off-by: Thomas Gleixner /** * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? * -@@ -359,6 +360,7 @@ int rcu_read_lock_bh_held(void) +@@ -360,6 +361,7 @@ int rcu_read_lock_bh_held(void) return in_softirq() || irqs_disabled(); } EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); diff --git a/debian/patches/features/all/rt/rcutree-rcu_bh_qs-disable-irq-while-calling-rcu_pree.patch b/debian/patches/features/all/rt/rcutree-rcu_bh_qs-disable-irq-while-calling-rcu_pree.patch index 31f18a65c..27c54475c 100644 --- a/debian/patches/features/all/rt/rcutree-rcu_bh_qs-disable-irq-while-calling-rcu_pree.patch +++ b/debian/patches/features/all/rt/rcutree-rcu_bh_qs-disable-irq-while-calling-rcu_pree.patch @@ -1,7 +1,7 @@ From: Tiejun Chen Date: Wed, 18 Dec 2013 17:51:49 +0800 Subject: rcutree/rcu_bh_qs: Disable irq while calling rcu_preempt_qs() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Any callers to the function rcu_preempt_qs() must disable irqs in order to protect the assignment to ->rcu_read_unlock_special. In diff --git a/debian/patches/features/all/rt/re-preempt_rt_full-arm-coredump-fails-for-cpu-3e-3d-4.patch b/debian/patches/features/all/rt/re-preempt_rt_full-arm-coredump-fails-for-cpu-3e-3d-4.patch index b0d391f12..3ed82449d 100644 --- a/debian/patches/features/all/rt/re-preempt_rt_full-arm-coredump-fails-for-cpu-3e-3d-4.patch +++ b/debian/patches/features/all/rt/re-preempt_rt_full-arm-coredump-fails-for-cpu-3e-3d-4.patch @@ -1,7 +1,7 @@ Subject: ARM: Initialize split page table locks for vector page From: Frank Rowand Date: Sat, 1 Oct 2011 18:58:13 -0700 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Without this patch, ARM can not use SPLIT_PTLOCK_CPUS if PREEMPT_RT_FULL=y because vectors_user_mapping() creates a @@ -36,7 +36,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c -@@ -325,6 +325,30 @@ unsigned long arch_randomize_brk(struct +@@ -324,6 +324,30 @@ unsigned long arch_randomize_brk(struct } #ifdef CONFIG_MMU diff --git a/debian/patches/features/all/rt/rfc-arm-smp-__cpu_disable-fix-sleeping-function-called-from-invalid-context.patch b/debian/patches/features/all/rt/rfc-arm-smp-__cpu_disable-fix-sleeping-function-called-from-invalid-context.patch index 54e71e7a0..918edd074 100644 --- a/debian/patches/features/all/rt/rfc-arm-smp-__cpu_disable-fix-sleeping-function-called-from-invalid-context.patch +++ b/debian/patches/features/all/rt/rfc-arm-smp-__cpu_disable-fix-sleeping-function-called-from-invalid-context.patch @@ -1,7 +1,7 @@ Subject: ARM: smp: Move clear_tasks_mm_cpumask() call to __cpu_die() From: Grygorii Strashko Date: Fri, 11 Sep 2015 21:21:23 +0300 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz When running with the RT-kernel (4.1.5-rt5) on TI OMAP dra7-evm and trying to do Suspend to RAM, the following backtrace occurs: diff --git a/debian/patches/features/all/rt/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch b/debian/patches/features/all/rt/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch index 1366a0ca7..7de791505 100644 --- a/debian/patches/features/all/rt/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch +++ b/debian/patches/features/all/rt/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch @@ -1,7 +1,7 @@ From: Daniel Bristot de Oliveira Date: Mon, 26 Jun 2017 17:07:15 +0200 Subject: rt: Increase/decrease the nr of migratory tasks when enabling/disabling migration -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz There is a problem in the migrate_disable()/enable() implementation regarding the number of migratory tasks in the rt/dl RQs. The problem @@ -81,7 +81,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -6850,6 +6850,47 @@ const u32 sched_prio_to_wmult[40] = { +@@ -7109,6 +7109,47 @@ const u32 sched_prio_to_wmult[40] = { #if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP) @@ -129,7 +129,7 @@ Signed-off-by: Sebastian Andrzej Siewior void migrate_disable(void) { struct task_struct *p = current; -@@ -6873,10 +6914,9 @@ void migrate_disable(void) +@@ -7132,10 +7173,9 @@ void migrate_disable(void) } preempt_disable(); @@ -142,7 +142,7 @@ Signed-off-by: Sebastian Andrzej Siewior preempt_enable(); } -@@ -6908,9 +6948,8 @@ void migrate_enable(void) +@@ -7167,9 +7207,8 @@ void migrate_enable(void) preempt_disable(); diff --git a/debian/patches/features/all/rt/rt-introduce-cpu-chill.patch b/debian/patches/features/all/rt/rt-introduce-cpu-chill.patch index aad5c6457..c823a4e63 100644 --- a/debian/patches/features/all/rt/rt-introduce-cpu-chill.patch +++ b/debian/patches/features/all/rt/rt-introduce-cpu-chill.patch @@ -1,7 +1,7 @@ Subject: rt: Introduce cpu_chill() From: Thomas Gleixner Date: Wed, 07 Mar 2012 20:51:03 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Retry loops on RT might loop forever when the modifying side was preempted. Add cpu_chill() to replace cpu_relax(). cpu_chill() @@ -14,40 +14,14 @@ Steven Rostedt changed it to use a hrtimer instead of msleep(): |up by the ksoftirqd running the TIMER softirq. But as the cpu_chill() is |called from softirq context, it may block the ksoftirqd() from running, in |which case, it may never wake up the msleep() causing the deadlock. -| -|I checked the vmcore, and irq/74-qla2xxx is stuck in the msleep() call, -|running on CPU 8. The one ksoftirqd that is stuck, happens to be the one that -|runs on CPU 8, and it is blocked on a lock held by irq/74-qla2xxx. As that -|ksoftirqd is the one that will wake up irq/74-qla2xxx, and it happens to be -|blocked on a lock that irq/74-qla2xxx holds, we have our deadlock. -| -|The solution is not to convert the cpu_chill() back to a cpu_relax() as that -|will re-create a possible live lock that the cpu_chill() fixed earlier, and may -|also leave this bug open on other softirqs. The fix is to remove the -|dependency on ksoftirqd from cpu_chill(). That is, instead of calling -|msleep() that requires ksoftirqd to wake it up, use the -|hrtimer_nanosleep() code that does the wakeup from hard irq context. -| -||Looks to be the lock of the block softirq. I don't have the core dump -||anymore, but from what I could tell the ksoftirqd was blocked on the -||block softirq lock, where the block softirq handler did a msleep -||(called by the qla2xxx interrupt handler). -|| -||Looking at trigger_softirq() in block/blk-softirq.c, it can do a -||smp_callfunction() to another cpu to run the block softirq. If that -||happens to be the cpu where the qla2xx irq handler is doing the block -||softirq and is in a middle of a msleep(), I believe the ksoftirqd will -||try to run the softirq. If it does that, then BOOM, it's deadlocked -||because the ksoftirqd will never run the timer softirq either. -| -||I should have also stated that it was only one lock that was involved. -||But the lock owner was doing a msleep() that requires a wakeup by -||ksoftirqd to continue. If ksoftirqd happens to be blocked on a lock -||held by the msleep() caller, then you have your deadlock. -|| -||It's best not to have any softirqs going to sleep requiring another -||softirq to wake it up. Note, if we ever require a timer softirq to do a -||cpu_chill() it will most definitely hit this deadlock. + ++ bigeasy later changed to schedule_hrtimeout() +|If a task calls cpu_chill() and gets woken up by a regular or spurious +|wakeup and has a signal pending, then it exits the sleep loop in +|do_nanosleep() and sets up the restart block. If restart->nanosleep.type is +|not TI_NONE then this results in accessing a stale user pointer from a +|previously interrupted syscall and a copy to user based on the stale +|pointer or a BUG() when 'type' is not supported in nanosleep_copyout(). + bigeasy: add PF_NOFREEZE: | [....] Waiting for /dev to be fully populated... @@ -83,8 +57,8 @@ Signed-off-by: Steven Rostedt Signed-off-by: Sebastian Andrzej Siewior --- include/linux/delay.h | 6 ++++++ - kernel/time/hrtimer.c | 19 +++++++++++++++++++ - 2 files changed, 25 insertions(+) + kernel/time/hrtimer.c | 21 +++++++++++++++++++++ + 2 files changed, 27 insertions(+) --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -101,7 +75,7 @@ Signed-off-by: Sebastian Andrzej Siewior #endif /* defined(_LINUX_DELAY_H) */ --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c -@@ -1858,6 +1858,25 @@ COMPAT_SYSCALL_DEFINE2(nanosleep, struct +@@ -1842,6 +1842,27 @@ COMPAT_SYSCALL_DEFINE2(nanosleep, struct } #endif @@ -111,13 +85,15 @@ Signed-off-by: Sebastian Andrzej Siewior + */ +void cpu_chill(void) +{ -+ struct timespec64 tu = { -+ .tv_nsec = NSEC_PER_MSEC, -+ }; ++ ktime_t chill_time; + unsigned int freeze_flag = current->flags & PF_NOFREEZE; + ++ chill_time = ktime_set(0, NSEC_PER_MSEC); ++ set_current_state(TASK_UNINTERRUPTIBLE); + current->flags |= PF_NOFREEZE; -+ hrtimer_nanosleep(&tu, HRTIMER_MODE_REL_HARD, CLOCK_MONOTONIC); ++ sleeping_lock_inc(); ++ schedule_hrtimeout(&chill_time, HRTIMER_MODE_REL_HARD); ++ sleeping_lock_dec(); + if (!freeze_flag) + current->flags &= ~PF_NOFREEZE; +} diff --git a/debian/patches/features/all/rt/rt-local-irq-lock.patch b/debian/patches/features/all/rt/rt-local-irq-lock.patch index dd0c70772..e0a638ee8 100644 --- a/debian/patches/features/all/rt/rt-local-irq-lock.patch +++ b/debian/patches/features/all/rt/rt-local-irq-lock.patch @@ -1,7 +1,7 @@ Subject: rt: Add local irq locks From: Thomas Gleixner Date: Mon, 20 Jun 2011 09:03:47 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Introduce locallock. For !RT this maps to preempt_disable()/ local_irq_disable() so there is not much that changes. For RT this will diff --git a/debian/patches/features/all/rt/rt-preempt-base-config.patch b/debian/patches/features/all/rt/rt-preempt-base-config.patch index 2460d2308..0b170a521 100644 --- a/debian/patches/features/all/rt/rt-preempt-base-config.patch +++ b/debian/patches/features/all/rt/rt-preempt-base-config.patch @@ -1,7 +1,7 @@ Subject: rt: Provide PREEMPT_RT_BASE config switch From: Thomas Gleixner Date: Fri, 17 Jun 2011 12:39:57 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Introduce PREEMPT_RT_BASE which enables parts of PREEMPT_RT_FULL. Forces interrupt threading and enables some of the RT diff --git a/debian/patches/features/all/rt/rt-serial-warn-fix.patch b/debian/patches/features/all/rt/rt-serial-warn-fix.patch index 0b9165739..29ffa8ad1 100644 --- a/debian/patches/features/all/rt/rt-serial-warn-fix.patch +++ b/debian/patches/features/all/rt/rt-serial-warn-fix.patch @@ -1,7 +1,7 @@ Subject: rt: Improve the serial console PASS_LIMIT From: Ingo Molnar Date: Wed Dec 14 13:05:54 CET 2011 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Beyond the warning: @@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c -@@ -58,7 +58,16 @@ static struct uart_driver serial8250_reg +@@ -54,7 +54,16 @@ static struct uart_driver serial8250_reg static unsigned int skip_txen_test; /* force skip of txen test at init time */ diff --git a/debian/patches/features/all/rt/rtmutex-Make-lock_killable-work.patch b/debian/patches/features/all/rt/rtmutex-Make-lock_killable-work.patch index f1c57bce2..d24c476f5 100644 --- a/debian/patches/features/all/rt/rtmutex-Make-lock_killable-work.patch +++ b/debian/patches/features/all/rt/rtmutex-Make-lock_killable-work.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Sat, 1 Apr 2017 12:50:59 +0200 Subject: [PATCH] rtmutex: Make lock_killable work -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Locking an rt mutex killable does not work because signal handling is restricted to TASK_INTERRUPTIBLE. diff --git a/debian/patches/features/all/rt/rtmutex-Provide-rt_mutex_slowlock_locked.patch b/debian/patches/features/all/rt/rtmutex-Provide-rt_mutex_slowlock_locked.patch index 645e6f904..91c9a208e 100644 --- a/debian/patches/features/all/rt/rtmutex-Provide-rt_mutex_slowlock_locked.patch +++ b/debian/patches/features/all/rt/rtmutex-Provide-rt_mutex_slowlock_locked.patch @@ -1,16 +1,16 @@ From: Thomas Gleixner Date: Thu, 12 Oct 2017 16:14:22 +0200 Subject: rtmutex: Provide rt_mutex_slowlock_locked() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz This is the inner-part of rt_mutex_slowlock(), required for rwsem-rt. Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior --- - kernel/locking/rtmutex.c | 70 ++++++++++++++++++++++------------------ + kernel/locking/rtmutex.c | 67 ++++++++++++++++++++++------------------ kernel/locking/rtmutex_common.h | 6 +++ - 2 files changed, 46 insertions(+), 30 deletions(-) + 2 files changed, 44 insertions(+), 29 deletions(-) --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -56,31 +56,28 @@ Signed-off-by: Sebastian Andrzej Siewior set_current_state(state); -@@ -1280,17 +1261,18 @@ rt_mutex_slowlock(struct rt_mutex *lock, +@@ -1280,16 +1261,16 @@ rt_mutex_slowlock(struct rt_mutex *lock, if (unlikely(timeout)) hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); - ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); + ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk); -- if (likely(!ret)) -+ if (likely(!ret)) { + if (likely(!ret)) /* sleep on the mutex */ - ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); + ret = __rt_mutex_slowlock(lock, state, timeout, waiter); -+ } if (unlikely(ret)) { __set_current_state(TASK_RUNNING); - if (rt_mutex_has_waiters(lock)) -- remove_waiter(lock, &waiter); +- remove_waiter(lock, &waiter); - rt_mutex_handle_deadlock(ret, chwalk, &waiter); -+ remove_waiter(lock, waiter); -+ /* ww_mutex want to report EDEADLK/EALREADY, let them */ ++ remove_waiter(lock, waiter); ++ rt_mutex_handle_deadlock(ret, chwalk, waiter); } /* -@@ -1298,6 +1280,34 @@ rt_mutex_slowlock(struct rt_mutex *lock, +@@ -1297,6 +1278,34 @@ rt_mutex_slowlock(struct rt_mutex *lock, * unconditionally. We might have to fix that up. */ fixup_rt_mutex_waiters(lock); @@ -117,7 +114,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h -@@ -158,6 +158,12 @@ extern bool __rt_mutex_futex_unlock(stru +@@ -159,6 +159,12 @@ extern bool __rt_mutex_futex_unlock(stru struct wake_q_head *wqh); extern void rt_mutex_postunlock(struct wake_q_head *wake_q); diff --git a/debian/patches/features/all/rt/rtmutex-add-mutex-implementation-based-on-rtmutex.patch b/debian/patches/features/all/rt/rtmutex-add-mutex-implementation-based-on-rtmutex.patch index bcbbd52c9..633b00b4c 100644 --- a/debian/patches/features/all/rt/rtmutex-add-mutex-implementation-based-on-rtmutex.patch +++ b/debian/patches/features/all/rt/rtmutex-add-mutex-implementation-based-on-rtmutex.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Thu, 12 Oct 2017 17:17:03 +0200 Subject: rtmutex: add mutex implementation based on rtmutex -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior diff --git a/debian/patches/features/all/rt/rtmutex-add-rwlock-implementation-based-on-rtmutex.patch b/debian/patches/features/all/rt/rtmutex-add-rwlock-implementation-based-on-rtmutex.patch index facba05c9..5fa9d668c 100644 --- a/debian/patches/features/all/rt/rtmutex-add-rwlock-implementation-based-on-rtmutex.patch +++ b/debian/patches/features/all/rt/rtmutex-add-rwlock-implementation-based-on-rtmutex.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Thu, 12 Oct 2017 17:18:06 +0200 Subject: rtmutex: add rwlock implementation based on rtmutex -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The implementation is bias-based, similar to the rwsem implementation. diff --git a/debian/patches/features/all/rt/rtmutex-add-rwsem-implementation-based-on-rtmutex.patch b/debian/patches/features/all/rt/rtmutex-add-rwsem-implementation-based-on-rtmutex.patch index 2f2bfed92..11fec24dd 100644 --- a/debian/patches/features/all/rt/rtmutex-add-rwsem-implementation-based-on-rtmutex.patch +++ b/debian/patches/features/all/rt/rtmutex-add-rwsem-implementation-based-on-rtmutex.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Thu, 12 Oct 2017 17:28:34 +0200 Subject: rtmutex: add rwsem implementation based on rtmutex -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The RT specific R/W semaphore implementation restricts the number of readers to one because a writer cannot block on multiple readers and inherit its @@ -42,15 +42,15 @@ the approach. Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior --- - include/linux/rwsem_rt.h | 67 +++++++++++ - kernel/locking/rwsem-rt.c | 269 ++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 336 insertions(+) + include/linux/rwsem_rt.h | 68 ++++++++++ + kernel/locking/rwsem-rt.c | 293 ++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 361 insertions(+) create mode 100644 include/linux/rwsem_rt.h create mode 100644 kernel/locking/rwsem-rt.c --- /dev/null +++ b/include/linux/rwsem_rt.h -@@ -0,0 +1,67 @@ +@@ -0,0 +1,68 @@ +#ifndef _LINUX_RWSEM_RT_H +#define _LINUX_RWSEM_RT_H + @@ -109,6 +109,7 @@ Signed-off-by: Sebastian Andrzej Siewior +} + +extern void __down_read(struct rw_semaphore *sem); ++extern int __down_read_killable(struct rw_semaphore *sem); +extern int __down_read_trylock(struct rw_semaphore *sem); +extern void __down_write(struct rw_semaphore *sem); +extern int __must_check __down_write_killable(struct rw_semaphore *sem); @@ -120,7 +121,7 @@ Signed-off-by: Sebastian Andrzej Siewior +#endif --- /dev/null +++ b/kernel/locking/rwsem-rt.c -@@ -0,0 +1,269 @@ +@@ -0,0 +1,293 @@ +/* + */ +#include @@ -202,13 +203,14 @@ Signed-off-by: Sebastian Andrzej Siewior + return 0; +} + -+void __sched __down_read(struct rw_semaphore *sem) ++static int __sched __down_read_common(struct rw_semaphore *sem, int state) +{ + struct rt_mutex *m = &sem->rtmutex; + struct rt_mutex_waiter waiter; ++ int ret; + + if (__down_read_trylock(sem)) -+ return; ++ return 0; + + might_sleep(); + raw_spin_lock_irq(&m->wait_lock); @@ -219,7 +221,7 @@ Signed-off-by: Sebastian Andrzej Siewior + if (atomic_read(&sem->readers) != WRITER_BIAS) { + atomic_inc(&sem->readers); + raw_spin_unlock_irq(&m->wait_lock); -+ return; ++ return 0; + } + + /* @@ -252,19 +254,42 @@ Signed-off-by: Sebastian Andrzej Siewior + * Reader2 to call up_read() which might be unbound. + */ + rt_mutex_init_waiter(&waiter, false); -+ rt_mutex_slowlock_locked(m, TASK_UNINTERRUPTIBLE, NULL, -+ RT_MUTEX_MIN_CHAINWALK, -+ &waiter); ++ ret = rt_mutex_slowlock_locked(m, state, NULL, RT_MUTEX_MIN_CHAINWALK, ++ &waiter); + /* -+ * The slowlock() above is guaranteed to return with the rtmutex is -+ * now held, so there can't be a writer active. Increment the reader -+ * count and immediately drop the rtmutex again. ++ * The slowlock() above is guaranteed to return with the rtmutex (for ++ * ret = 0) is now held, so there can't be a writer active. Increment ++ * the reader count and immediately drop the rtmutex again. ++ * For ret != 0 we don't hold the rtmutex and need unlock the wait_lock. ++ * We don't own the lock then. + */ -+ atomic_inc(&sem->readers); ++ if (!ret) ++ atomic_inc(&sem->readers); + raw_spin_unlock_irq(&m->wait_lock); -+ __rt_mutex_unlock(m); ++ if (!ret) ++ __rt_mutex_unlock(m); + + debug_rt_mutex_free_waiter(&waiter); ++ return ret; ++} ++ ++void __down_read(struct rw_semaphore *sem) ++{ ++ int ret; ++ ++ ret = __down_read_common(sem, TASK_UNINTERRUPTIBLE); ++ WARN_ON_ONCE(ret); ++} ++ ++int __down_read_killable(struct rw_semaphore *sem) ++{ ++ int ret; ++ ++ ret = __down_read_common(sem, TASK_KILLABLE); ++ if (likely(!ret)) ++ return ret; ++ WARN_ONCE(ret != -EINTR, "Unexpected state: %d\n", ret); ++ return -EINTR; +} + +void __up_read(struct rw_semaphore *sem) diff --git a/debian/patches/features/all/rt/rtmutex-add-sleeping-lock-implementation.patch b/debian/patches/features/all/rt/rtmutex-add-sleeping-lock-implementation.patch index 8869cca03..8caaae9cd 100644 --- a/debian/patches/features/all/rt/rtmutex-add-sleeping-lock-implementation.patch +++ b/debian/patches/features/all/rt/rtmutex-add-sleeping-lock-implementation.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Thu, 12 Oct 2017 17:11:19 +0200 Subject: rtmutex: add sleeping lock implementation -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior @@ -10,14 +10,14 @@ Signed-off-by: Sebastian Andrzej Siewior include/linux/rtmutex.h | 21 + include/linux/sched.h | 9 include/linux/sched/wake_q.h | 27 ++ - include/linux/spinlock_rt.h | 159 +++++++++++++ + include/linux/spinlock_rt.h | 156 +++++++++++++ include/linux/spinlock_types_rt.h | 48 ++++ kernel/fork.c | 1 kernel/futex.c | 11 - kernel/locking/rtmutex.c | 449 ++++++++++++++++++++++++++++++++++---- + kernel/locking/rtmutex.c | 436 ++++++++++++++++++++++++++++++++++---- kernel/locking/rtmutex_common.h | 15 + kernel/sched/core.c | 28 +- - 11 files changed, 713 insertions(+), 59 deletions(-) + 11 files changed, 697 insertions(+), 59 deletions(-) create mode 100644 include/linux/spinlock_rt.h create mode 100644 include/linux/spinlock_types_rt.h @@ -121,7 +121,7 @@ Signed-off-by: Sebastian Andrzej Siewior #endif /* Task command name length: */ -@@ -827,6 +835,7 @@ struct task_struct { +@@ -838,6 +846,7 @@ struct task_struct { raw_spinlock_t pi_lock; struct wake_q_node wake_q; @@ -166,7 +166,7 @@ Signed-off-by: Sebastian Andrzej Siewior #endif /* _LINUX_SCHED_WAKE_Q_H */ --- /dev/null +++ b/include/linux/spinlock_rt.h -@@ -0,0 +1,159 @@ +@@ -0,0 +1,156 @@ +#ifndef __LINUX_SPINLOCK_RT_H +#define __LINUX_SPINLOCK_RT_H + @@ -322,9 +322,6 @@ Signed-off-by: Sebastian Andrzej Siewior + BUG_ON(!spin_is_locked(lock)); +} + -+#define atomic_dec_and_lock(atomic, lock) \ -+ atomic_dec_and_spin_lock(atomic, lock) -+ +#endif --- /dev/null +++ b/include/linux/spinlock_types_rt.h @@ -379,7 +376,7 @@ Signed-off-by: Sebastian Andrzej Siewior +#endif --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -600,6 +600,7 @@ static struct task_struct *dup_task_stru +@@ -862,6 +862,7 @@ static struct task_struct *dup_task_stru tsk->splice_pipe = NULL; tsk->task_frag.page = NULL; tsk->wake_q.next = NULL; @@ -389,7 +386,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/futex.c +++ b/kernel/futex.c -@@ -1432,6 +1432,7 @@ static int wake_futex_pi(u32 __user *uad +@@ -1414,6 +1414,7 @@ static int wake_futex_pi(u32 __user *uad struct task_struct *new_owner; bool postunlock = false; DEFINE_WAKE_Q(wake_q); @@ -397,7 +394,7 @@ Signed-off-by: Sebastian Andrzej Siewior int ret = 0; new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); -@@ -1493,13 +1494,13 @@ static int wake_futex_pi(u32 __user *uad +@@ -1475,13 +1476,13 @@ static int wake_futex_pi(u32 __user *uad pi_state->owner = new_owner; raw_spin_unlock(&new_owner->pi_lock); @@ -414,7 +411,7 @@ Signed-off-by: Sebastian Andrzej Siewior return ret; } -@@ -2811,7 +2812,7 @@ static int futex_lock_pi(u32 __user *uad +@@ -2793,7 +2794,7 @@ static int futex_lock_pi(u32 __user *uad goto no_block; } @@ -423,7 +420,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not -@@ -3183,7 +3184,7 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3165,7 +3166,7 @@ static int futex_wait_requeue_pi(u32 __u * The waiter is allocated on our stack, manipulated by the requeue * code while we sleep on uaddr. */ @@ -566,7 +563,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * The current top waiter stays enqueued. We * don't have to change anything in the lock -@@ -926,6 +962,309 @@ static int try_to_take_rt_mutex(struct r +@@ -926,6 +962,296 @@ static int try_to_take_rt_mutex(struct r return 1; } @@ -838,19 +835,6 @@ Signed-off-by: Sebastian Andrzej Siewior +} +EXPORT_SYMBOL(rt_spin_trylock_irqsave); + -+int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock) -+{ -+ /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ -+ if (atomic_add_unless(atomic, -1, 1)) -+ return 0; -+ rt_spin_lock(lock); -+ if (atomic_dec_and_test(atomic)) -+ return 1; -+ rt_spin_unlock(lock); -+ return 0; -+} -+EXPORT_SYMBOL(atomic_dec_and_spin_lock); -+ +void +__rt_spin_lock_init(spinlock_t *lock, const char *name, struct lock_class_key *key) +{ @@ -876,7 +860,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Task blocks on lock. * -@@ -1039,6 +1378,7 @@ static int task_blocks_on_rt_mutex(struc +@@ -1039,6 +1365,7 @@ static int task_blocks_on_rt_mutex(struc * Called with lock->wait_lock held and interrupts disabled. */ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, @@ -884,7 +868,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct rt_mutex *lock) { struct rt_mutex_waiter *waiter; -@@ -1078,7 +1418,10 @@ static void mark_wakeup_next_waiter(stru +@@ -1078,7 +1405,10 @@ static void mark_wakeup_next_waiter(stru * Pairs with preempt_enable() in rt_mutex_postunlock(); */ preempt_disable(); @@ -896,7 +880,7 @@ Signed-off-by: Sebastian Andrzej Siewior raw_spin_unlock(¤t->pi_lock); } -@@ -1162,21 +1505,22 @@ void rt_mutex_adjust_pi(struct task_stru +@@ -1162,21 +1492,22 @@ void rt_mutex_adjust_pi(struct task_stru return; } next_lock = waiter->lock; @@ -921,7 +905,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /** -@@ -1295,7 +1639,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, +@@ -1293,7 +1624,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, unsigned long flags; int ret = 0; @@ -930,7 +914,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Technically we could use raw_spin_[un]lock_irq() here, but this can -@@ -1368,7 +1712,8 @@ static inline int rt_mutex_slowtrylock(s +@@ -1366,7 +1697,8 @@ static inline int rt_mutex_slowtrylock(s * Return whether the current task needs to call rt_mutex_postunlock(). */ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, @@ -940,7 +924,7 @@ Signed-off-by: Sebastian Andrzej Siewior { unsigned long flags; -@@ -1422,7 +1767,7 @@ static bool __sched rt_mutex_slowunlock( +@@ -1420,7 +1752,7 @@ static bool __sched rt_mutex_slowunlock( * * Queue the next waiter for wakeup once we release the wait_lock. */ @@ -949,7 +933,7 @@ Signed-off-by: Sebastian Andrzej Siewior raw_spin_unlock_irqrestore(&lock->wait_lock, flags); return true; /* call rt_mutex_postunlock() */ -@@ -1474,9 +1819,11 @@ rt_mutex_fasttrylock(struct rt_mutex *lo +@@ -1472,9 +1804,11 @@ rt_mutex_fasttrylock(struct rt_mutex *lo /* * Performs the wakeup of the the top-waiter and re-enables preemption. */ @@ -962,7 +946,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ preempt_enable(); -@@ -1485,15 +1832,17 @@ void rt_mutex_postunlock(struct wake_q_h +@@ -1483,15 +1817,17 @@ void rt_mutex_postunlock(struct wake_q_h static inline void rt_mutex_fastunlock(struct rt_mutex *lock, bool (*slowfn)(struct rt_mutex *lock, @@ -983,7 +967,7 @@ Signed-off-by: Sebastian Andrzej Siewior } int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state) -@@ -1653,16 +2002,13 @@ void __sched __rt_mutex_unlock(struct rt +@@ -1651,16 +1987,13 @@ void __sched __rt_mutex_unlock(struct rt void __sched rt_mutex_unlock(struct rt_mutex *lock) { mutex_release(&lock->dep_map, 1, _RET_IP_); @@ -1004,7 +988,7 @@ Signed-off-by: Sebastian Andrzej Siewior { lockdep_assert_held(&lock->wait_lock); -@@ -1679,22 +2025,34 @@ bool __sched __rt_mutex_futex_unlock(str +@@ -1677,23 +2010,35 @@ bool __sched __rt_mutex_futex_unlock(str * avoid inversion prior to the wakeup. preempt_disable() * therein pairs with rt_mutex_postunlock(). */ @@ -1019,8 +1003,8 @@ Signed-off-by: Sebastian Andrzej Siewior + * simple and will not need to retry. + */ +bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, -+ struct wake_q_head *wake_q, -+ struct wake_q_head *wq_sleeper) ++ struct wake_q_head *wake_q, ++ struct wake_q_head *wq_sleeper) +{ + return __rt_mutex_unlock_common(lock, wake_q, wq_sleeper); +} @@ -1029,12 +1013,13 @@ Signed-off-by: Sebastian Andrzej Siewior { DEFINE_WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_sleeper_q); + unsigned long flags; bool postunlock; - raw_spin_lock_irq(&lock->wait_lock); + raw_spin_lock_irqsave(&lock->wait_lock, flags); - postunlock = __rt_mutex_futex_unlock(lock, &wake_q); + postunlock = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q); - raw_spin_unlock_irq(&lock->wait_lock); + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); if (postunlock) - rt_mutex_postunlock(&wake_q); @@ -1042,7 +1027,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /** -@@ -1733,7 +2091,7 @@ void __rt_mutex_init(struct rt_mutex *lo +@@ -1732,7 +2077,7 @@ void __rt_mutex_init(struct rt_mutex *lo if (name && key) debug_rt_mutex_init(lock, name, key); } @@ -1051,7 +1036,7 @@ Signed-off-by: Sebastian Andrzej Siewior /** * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a -@@ -1902,6 +2260,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m +@@ -1901,6 +2246,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m struct hrtimer_sleeper *to, struct rt_mutex_waiter *waiter) { @@ -1059,7 +1044,7 @@ Signed-off-by: Sebastian Andrzej Siewior int ret; raw_spin_lock_irq(&lock->wait_lock); -@@ -1913,6 +2272,24 @@ int rt_mutex_wait_proxy_lock(struct rt_m +@@ -1912,6 +2258,24 @@ int rt_mutex_wait_proxy_lock(struct rt_m * have to fix that up. */ fixup_rt_mutex_waiters(lock); @@ -1102,7 +1087,7 @@ Signed-off-by: Sebastian Andrzej Siewior #ifdef CONFIG_DEBUG_RT_MUTEXES unsigned long ip; struct pid *deadlock_task_pid; -@@ -137,7 +139,7 @@ extern void rt_mutex_init_proxy_locked(s +@@ -138,7 +140,7 @@ extern void rt_mutex_init_proxy_locked(s struct task_struct *proxy_owner); extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, struct task_struct *proxy_owner); @@ -1111,7 +1096,7 @@ Signed-off-by: Sebastian Andrzej Siewior extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, struct task_struct *task); -@@ -155,9 +157,12 @@ extern int __rt_mutex_futex_trylock(stru +@@ -156,9 +158,12 @@ extern int __rt_mutex_futex_trylock(stru extern void rt_mutex_futex_unlock(struct rt_mutex *lock); extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, @@ -1126,7 +1111,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* RW semaphore special interface */ extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state); -@@ -167,6 +172,10 @@ int __sched rt_mutex_slowlock_locked(str +@@ -168,6 +173,10 @@ int __sched rt_mutex_slowlock_locked(str struct hrtimer_sleeper *timeout, enum rtmutex_chainwalk chwalk, struct rt_mutex_waiter *waiter); @@ -1139,7 +1124,7 @@ Signed-off-by: Sebastian Andrzej Siewior # include "rtmutex-debug.h" --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -427,9 +427,15 @@ static bool set_nr_if_polling(struct tas +@@ -429,9 +429,15 @@ static bool set_nr_if_polling(struct tas #endif #endif @@ -1157,7 +1142,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Atomically grab the task, if ->wake_q is !nil already it means -@@ -451,24 +457,32 @@ void wake_q_add(struct wake_q_head *head +@@ -453,24 +459,32 @@ void wake_q_add(struct wake_q_head *head head->lastp = &node->next; } diff --git a/debian/patches/features/all/rt/rtmutex-add-ww_mutex-addon-for-mutex-rt.patch b/debian/patches/features/all/rt/rtmutex-add-ww_mutex-addon-for-mutex-rt.patch index 17b2d792b..9f11c96f1 100644 --- a/debian/patches/features/all/rt/rtmutex-add-ww_mutex-addon-for-mutex-rt.patch +++ b/debian/patches/features/all/rt/rtmutex-add-ww_mutex-addon-for-mutex-rt.patch @@ -1,14 +1,14 @@ From: Sebastian Andrzej Siewior Date: Thu, 12 Oct 2017 17:34:38 +0200 Subject: rtmutex: add ww_mutex addon for mutex-rt -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Signed-off-by: Sebastian Andrzej Siewior --- - kernel/locking/rtmutex.c | 264 ++++++++++++++++++++++++++++++++++++++-- + kernel/locking/rtmutex.c | 269 ++++++++++++++++++++++++++++++++++++++-- kernel/locking/rtmutex_common.h | 2 kernel/locking/rwsem-rt.c | 2 - 3 files changed, 257 insertions(+), 11 deletions(-) + 3 files changed, 260 insertions(+), 13 deletions(-) --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -20,7 +20,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include "rtmutex_common.h" -@@ -1258,6 +1259,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init); +@@ -1245,6 +1246,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init); #endif /* PREEMPT_RT_FULL */ @@ -29,7 +29,7 @@ Signed-off-by: Sebastian Andrzej Siewior +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) +{ + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); -+ struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); ++ struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); + + if (!hold_ctx) + return 0; @@ -61,7 +61,7 @@ Signed-off-by: Sebastian Andrzej Siewior static inline int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, struct rt_mutex_waiter *waiter) -@@ -1536,7 +1571,8 @@ void rt_mutex_init_waiter(struct rt_mute +@@ -1523,7 +1558,8 @@ void rt_mutex_init_waiter(struct rt_mute static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, @@ -71,7 +71,7 @@ Signed-off-by: Sebastian Andrzej Siewior { int ret = 0; -@@ -1554,6 +1590,12 @@ static int __sched +@@ -1541,6 +1577,12 @@ static int __sched break; } @@ -84,7 +84,7 @@ Signed-off-by: Sebastian Andrzej Siewior raw_spin_unlock_irq(&lock->wait_lock); debug_rt_mutex_print_deadlock(waiter); -@@ -1588,16 +1630,106 @@ static void rt_mutex_handle_deadlock(int +@@ -1575,16 +1617,106 @@ static void rt_mutex_handle_deadlock(int } } @@ -192,9 +192,12 @@ Signed-off-by: Sebastian Andrzej Siewior set_current_state(state); -@@ -1609,7 +1741,12 @@ int __sched rt_mutex_slowlock_locked(str +@@ -1594,14 +1726,24 @@ int __sched rt_mutex_slowlock_locked(str - if (likely(!ret)) { + ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk); + +- if (likely(!ret)) ++ if (likely(!ret)) { /* sleep on the mutex */ - ret = __rt_mutex_slowlock(lock, state, timeout, waiter); + ret = __rt_mutex_slowlock(lock, state, timeout, waiter, @@ -203,13 +206,13 @@ Signed-off-by: Sebastian Andrzej Siewior + /* ww_mutex received EDEADLK, let it become EALREADY */ + ret = __mutex_lock_check_stamp(lock, ww_ctx); + BUG_ON(!ret); - } ++ } if (unlikely(ret)) { -@@ -1617,6 +1754,10 @@ int __sched rt_mutex_slowlock_locked(str - if (rt_mutex_has_waiters(lock)) - remove_waiter(lock, waiter); - /* ww_mutex want to report EDEADLK/EALREADY, let them */ + __set_current_state(TASK_RUNNING); + remove_waiter(lock, waiter); +- rt_mutex_handle_deadlock(ret, chwalk, waiter); ++ /* ww_mutex wants to report EDEADLK/EALREADY, let it */ + if (!ww_ctx) + rt_mutex_handle_deadlock(ret, chwalk, waiter); + } else if (ww_ctx) { @@ -217,7 +220,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /* -@@ -1633,7 +1774,8 @@ int __sched rt_mutex_slowlock_locked(str +@@ -1618,7 +1760,8 @@ int __sched rt_mutex_slowlock_locked(str static int __sched rt_mutex_slowlock(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, @@ -227,7 +230,7 @@ Signed-off-by: Sebastian Andrzej Siewior { struct rt_mutex_waiter waiter; unsigned long flags; -@@ -1651,7 +1793,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, +@@ -1636,7 +1779,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, */ raw_spin_lock_irqsave(&lock->wait_lock, flags); @@ -237,7 +240,7 @@ Signed-off-by: Sebastian Andrzej Siewior raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -@@ -1781,29 +1924,33 @@ static bool __sched rt_mutex_slowunlock( +@@ -1766,29 +1910,33 @@ static bool __sched rt_mutex_slowunlock( */ static inline int rt_mutex_fastlock(struct rt_mutex *lock, int state, @@ -275,7 +278,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static inline int -@@ -1946,6 +2093,7 @@ rt_mutex_timed_lock(struct rt_mutex *loc +@@ -1931,6 +2079,7 @@ rt_mutex_timed_lock(struct rt_mutex *loc mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); ret = rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, RT_MUTEX_MIN_CHAINWALK, @@ -283,7 +286,7 @@ Signed-off-by: Sebastian Andrzej Siewior rt_mutex_slowlock); if (ret) mutex_release(&lock->dep_map, 1, _RET_IP_); -@@ -2266,7 +2414,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m +@@ -2252,7 +2401,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m raw_spin_lock_irq(&lock->wait_lock); /* sleep on the mutex */ set_current_state(TASK_INTERRUPTIBLE); @@ -292,7 +295,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might * have to fix that up. -@@ -2350,3 +2498,99 @@ bool rt_mutex_cleanup_proxy_lock(struct +@@ -2336,3 +2485,99 @@ bool rt_mutex_cleanup_proxy_lock(struct return cleanup; } @@ -394,7 +397,7 @@ Signed-off-by: Sebastian Andrzej Siewior +#endif --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h -@@ -164,6 +164,7 @@ extern void rt_mutex_postunlock(struct w +@@ -165,6 +165,7 @@ extern void rt_mutex_postunlock(struct w struct wake_q_head *wake_sleeper_q); /* RW semaphore special interface */ @@ -402,7 +405,7 @@ Signed-off-by: Sebastian Andrzej Siewior extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state); extern int __rt_mutex_trylock(struct rt_mutex *lock); -@@ -171,6 +172,7 @@ extern void __rt_mutex_unlock(struct rt_ +@@ -172,6 +173,7 @@ extern void __rt_mutex_unlock(struct rt_ int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, enum rtmutex_chainwalk chwalk, @@ -412,12 +415,12 @@ Signed-off-by: Sebastian Andrzej Siewior struct rt_mutex_waiter *waiter, --- a/kernel/locking/rwsem-rt.c +++ b/kernel/locking/rwsem-rt.c -@@ -130,7 +130,7 @@ void __sched __down_read(struct rw_semap +@@ -131,7 +131,7 @@ static int __sched __down_read_common(st */ rt_mutex_init_waiter(&waiter, false); - rt_mutex_slowlock_locked(m, TASK_UNINTERRUPTIBLE, NULL, -- RT_MUTEX_MIN_CHAINWALK, -+ RT_MUTEX_MIN_CHAINWALK, NULL, - &waiter); + ret = rt_mutex_slowlock_locked(m, state, NULL, RT_MUTEX_MIN_CHAINWALK, +- &waiter); ++ NULL, &waiter); /* - * The slowlock() above is guaranteed to return with the rtmutex is + * The slowlock() above is guaranteed to return with the rtmutex (for + * ret = 0) is now held, so there can't be a writer active. Increment diff --git a/debian/patches/features/all/rt/rtmutex-annotate-sleeping-lock-context.patch b/debian/patches/features/all/rt/rtmutex-annotate-sleeping-lock-context.patch new file mode 100644 index 000000000..9febfb87f --- /dev/null +++ b/debian/patches/features/all/rt/rtmutex-annotate-sleeping-lock-context.patch @@ -0,0 +1,217 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 20 Apr 2018 18:13:11 +0200 +Subject: [PATCH] rtmutex: annotate sleeping lock context +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +The RCU code complains on schedule() within a rcu_readlock() section. +The valid scenario on -RT is if a sleeping is held. In order to suppress +the warning the mirgrate_disable counter was used to identify the +invocation of schedule() due to lock contention. + +Grygorii Strashko report that during CPU hotplug we might see the +warning via + rt_spin_lock() -> migrate_disable() -> pin_current_cpu() -> __read_rt_lock() + +because the counter is not yet set. +It is also possible to trigger the warning from cpu_chill() +(seen on a kblockd_mod_delayed_work_on() caller). + +To address this RCU warning I annotate the sleeping lock context. The +counter is incremented before migrate_disable() so the warning Grygorii +should not trigger anymore. Additionally I use that counter in +cpu_chill() to avoid the RCU warning from there. + +Reported-by: Grygorii Strashko +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/sched.h | 20 ++++++++++++++++++++ + kernel/locking/rtmutex.c | 12 ++++++++++-- + kernel/locking/rwlock-rt.c | 18 ++++++++++++++---- + kernel/rcu/tree_plugin.h | 8 ++++---- + 4 files changed, 48 insertions(+), 10 deletions(-) + +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -611,6 +611,9 @@ struct task_struct { + int migrate_disable_atomic; + # endif + #endif ++#ifdef CONFIG_PREEMPT_RT_FULL ++ int sleeping_lock; ++#endif + + #ifdef CONFIG_PREEMPT_RCU + int rcu_read_lock_nesting; +@@ -1723,6 +1726,23 @@ static __always_inline bool need_resched + return unlikely(tif_need_resched()); + } + ++#ifdef CONFIG_PREEMPT_RT_FULL ++static inline void sleeping_lock_inc(void) ++{ ++ current->sleeping_lock++; ++} ++ ++static inline void sleeping_lock_dec(void) ++{ ++ current->sleeping_lock--; ++} ++ ++#else ++ ++static inline void sleeping_lock_inc(void) { } ++static inline void sleeping_lock_dec(void) { } ++#endif ++ + /* + * Wrappers for p->thread_info->cpu access. No-op on UP. + */ +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1141,6 +1141,7 @@ void __sched rt_spin_lock_slowunlock(str + + void __lockfunc rt_spin_lock(spinlock_t *lock) + { ++ sleeping_lock_inc(); + migrate_disable(); + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); +@@ -1155,6 +1156,7 @@ void __lockfunc __rt_spin_lock(struct rt + #ifdef CONFIG_DEBUG_LOCK_ALLOC + void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass) + { ++ sleeping_lock_inc(); + migrate_disable(); + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); +@@ -1168,6 +1170,7 @@ void __lockfunc rt_spin_unlock(spinlock_ + spin_release(&lock->dep_map, 1, _RET_IP_); + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); + migrate_enable(); ++ sleeping_lock_dec(); + } + EXPORT_SYMBOL(rt_spin_unlock); + +@@ -1193,12 +1196,15 @@ int __lockfunc rt_spin_trylock(spinlock_ + { + int ret; + ++ sleeping_lock_inc(); + migrate_disable(); + ret = __rt_mutex_trylock(&lock->lock); +- if (ret) ++ if (ret) { + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); +- else ++ } else { + migrate_enable(); ++ sleeping_lock_dec(); ++ } + return ret; + } + EXPORT_SYMBOL(rt_spin_trylock); +@@ -1210,6 +1216,7 @@ int __lockfunc rt_spin_trylock_bh(spinlo + local_bh_disable(); + ret = __rt_mutex_trylock(&lock->lock); + if (ret) { ++ sleeping_lock_inc(); + migrate_disable(); + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); + } else +@@ -1225,6 +1232,7 @@ int __lockfunc rt_spin_trylock_irqsave(s + *flags = 0; + ret = __rt_mutex_trylock(&lock->lock); + if (ret) { ++ sleeping_lock_inc(); + migrate_disable(); + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); + } +--- a/kernel/locking/rwlock-rt.c ++++ b/kernel/locking/rwlock-rt.c +@@ -305,12 +305,15 @@ int __lockfunc rt_read_trylock(rwlock_t + { + int ret; + ++ sleeping_lock_inc(); + migrate_disable(); + ret = do_read_rt_trylock(rwlock); +- if (ret) ++ if (ret) { + rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_); +- else ++ } else { + migrate_enable(); ++ sleeping_lock_dec(); ++ } + return ret; + } + EXPORT_SYMBOL(rt_read_trylock); +@@ -319,18 +322,22 @@ int __lockfunc rt_write_trylock(rwlock_t + { + int ret; + ++ sleeping_lock_inc(); + migrate_disable(); + ret = do_write_rt_trylock(rwlock); +- if (ret) ++ if (ret) { + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); +- else ++ } else { + migrate_enable(); ++ sleeping_lock_dec(); ++ } + return ret; + } + EXPORT_SYMBOL(rt_write_trylock); + + void __lockfunc rt_read_lock(rwlock_t *rwlock) + { ++ sleeping_lock_inc(); + migrate_disable(); + rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_); + do_read_rt_lock(rwlock); +@@ -339,6 +346,7 @@ EXPORT_SYMBOL(rt_read_lock); + + void __lockfunc rt_write_lock(rwlock_t *rwlock) + { ++ sleeping_lock_inc(); + migrate_disable(); + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); + do_write_rt_lock(rwlock); +@@ -350,6 +358,7 @@ void __lockfunc rt_read_unlock(rwlock_t + rwlock_release(&rwlock->dep_map, 1, _RET_IP_); + do_read_rt_unlock(rwlock); + migrate_enable(); ++ sleeping_lock_dec(); + } + EXPORT_SYMBOL(rt_read_unlock); + +@@ -358,6 +367,7 @@ void __lockfunc rt_write_unlock(rwlock_t + rwlock_release(&rwlock->dep_map, 1, _RET_IP_); + do_write_rt_unlock(rwlock); + migrate_enable(); ++ sleeping_lock_dec(); + } + EXPORT_SYMBOL(rt_write_unlock); + +--- a/kernel/rcu/tree_plugin.h ++++ b/kernel/rcu/tree_plugin.h +@@ -325,13 +325,13 @@ static void rcu_preempt_note_context_swi + struct task_struct *t = current; + struct rcu_data *rdp; + struct rcu_node *rnp; +- int mg_counter = 0; ++ int sleeping_l = 0; + + lockdep_assert_irqs_disabled(); +-#if defined(CONFIG_PREEMPT_RT_BASE) +- mg_counter = t->migrate_disable; ++#if defined(CONFIG_PREEMPT_RT_FULL) ++ sleeping_l = t->sleeping_lock; + #endif +- WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0 && !mg_counter); ++ WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0 && !sleeping_l); + if (t->rcu_read_lock_nesting > 0 && + !t->rcu_read_unlock_special.b.blocked) { + diff --git a/debian/patches/features/all/rt/rtmutex-avoid-include-hell.patch b/debian/patches/features/all/rt/rtmutex-avoid-include-hell.patch index 98aa7601f..3935f5cf1 100644 --- a/debian/patches/features/all/rt/rtmutex-avoid-include-hell.patch +++ b/debian/patches/features/all/rt/rtmutex-avoid-include-hell.patch @@ -1,7 +1,7 @@ Subject: rtmutex: Avoid include hell From: Thomas Gleixner Date: Wed, 29 Jun 2011 20:06:39 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Include only the required raw types. This avoids pulling in the complete spinlock header which in turn requires rtmutex.h at some point. diff --git a/debian/patches/features/all/rt/rtmutex-export-lockdep-less-version-of-rt_mutex-s-lo.patch b/debian/patches/features/all/rt/rtmutex-export-lockdep-less-version-of-rt_mutex-s-lo.patch index 1289f2646..e76dfd8dc 100644 --- a/debian/patches/features/all/rt/rtmutex-export-lockdep-less-version-of-rt_mutex-s-lo.patch +++ b/debian/patches/features/all/rt/rtmutex-export-lockdep-less-version-of-rt_mutex-s-lo.patch @@ -2,7 +2,7 @@ From: Thomas Gleixner Date: Thu, 12 Oct 2017 16:36:39 +0200 Subject: rtmutex: export lockdep-less version of rt_mutex's lock, trylock and unlock -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Required for lock implementation ontop of rtmutex. @@ -15,7 +15,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -1496,6 +1496,29 @@ rt_mutex_fastunlock(struct rt_mutex *loc +@@ -1494,6 +1494,29 @@ rt_mutex_fastunlock(struct rt_mutex *loc rt_mutex_postunlock(&wake_q); } @@ -45,7 +45,7 @@ Signed-off-by: Sebastian Andrzej Siewior /** * rt_mutex_lock - lock a rt_mutex * -@@ -1503,10 +1526,7 @@ rt_mutex_fastunlock(struct rt_mutex *loc +@@ -1501,10 +1524,7 @@ rt_mutex_fastunlock(struct rt_mutex *loc */ void __sched rt_mutex_lock(struct rt_mutex *lock) { @@ -57,7 +57,7 @@ Signed-off-by: Sebastian Andrzej Siewior } EXPORT_SYMBOL_GPL(rt_mutex_lock); -@@ -1521,16 +1541,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock); +@@ -1519,16 +1539,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock); */ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) { @@ -75,7 +75,7 @@ Signed-off-by: Sebastian Andrzej Siewior } EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); -@@ -1556,13 +1567,10 @@ int __sched __rt_mutex_futex_trylock(str +@@ -1554,13 +1565,10 @@ int __sched __rt_mutex_futex_trylock(str * Returns: * 0 on success * -EINTR when interrupted by a signal @@ -90,7 +90,7 @@ Signed-off-by: Sebastian Andrzej Siewior } EXPORT_SYMBOL_GPL(rt_mutex_lock_killable); -@@ -1597,6 +1605,18 @@ rt_mutex_timed_lock(struct rt_mutex *loc +@@ -1595,6 +1603,18 @@ rt_mutex_timed_lock(struct rt_mutex *loc } EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); @@ -109,7 +109,7 @@ Signed-off-by: Sebastian Andrzej Siewior /** * rt_mutex_trylock - try to lock a rt_mutex * -@@ -1612,14 +1632,7 @@ int __sched rt_mutex_trylock(struct rt_m +@@ -1610,14 +1630,7 @@ int __sched rt_mutex_trylock(struct rt_m { int ret; @@ -125,7 +125,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (ret) mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); -@@ -1627,6 +1640,11 @@ int __sched rt_mutex_trylock(struct rt_m +@@ -1625,6 +1638,11 @@ int __sched rt_mutex_trylock(struct rt_m } EXPORT_SYMBOL_GPL(rt_mutex_trylock); @@ -139,7 +139,7 @@ Signed-off-by: Sebastian Andrzej Siewior * --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h -@@ -160,6 +160,9 @@ extern bool __rt_mutex_futex_unlock(stru +@@ -161,6 +161,9 @@ extern bool __rt_mutex_futex_unlock(stru extern void rt_mutex_postunlock(struct wake_q_head *wake_q); /* RW semaphore special interface */ diff --git a/debian/patches/features/all/rt/rtmutex-futex-prepare-rt.patch b/debian/patches/features/all/rt/rtmutex-futex-prepare-rt.patch index c86dad0ec..7c017a994 100644 --- a/debian/patches/features/all/rt/rtmutex-futex-prepare-rt.patch +++ b/debian/patches/features/all/rt/rtmutex-futex-prepare-rt.patch @@ -1,7 +1,7 @@ Subject: rtmutex: Handle the various new futex race conditions From: Thomas Gleixner Date: Fri, 10 Jun 2011 11:04:15 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz RT opens a few new interesting race conditions in the rtmutex/futex combo due to futex hash bucket lock being a 'sleeping' spinlock and @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/futex.c +++ b/kernel/futex.c -@@ -2104,6 +2104,16 @@ static int futex_requeue(u32 __user *uad +@@ -2086,6 +2086,16 @@ static int futex_requeue(u32 __user *uad requeue_pi_wake_futex(this, &key2, hb2); drop_count++; continue; @@ -33,7 +33,7 @@ Signed-off-by: Thomas Gleixner } else if (ret) { /* * rt_mutex_start_proxy_lock() detected a -@@ -3144,7 +3154,7 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3126,7 +3136,7 @@ static int futex_wait_requeue_pi(u32 __u struct hrtimer_sleeper timeout, *to = NULL; struct futex_pi_state *pi_state = NULL; struct rt_mutex_waiter rt_waiter; @@ -42,7 +42,7 @@ Signed-off-by: Thomas Gleixner union futex_key key2 = FUTEX_KEY_INIT; struct futex_q q = futex_q_init; int res, ret; -@@ -3202,20 +3212,55 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3184,20 +3194,55 @@ static int futex_wait_requeue_pi(u32 __u /* Queue the futex_q, drop the hb lock, wait for wakeup. */ futex_wait_queue_me(hb, &q, to); @@ -109,7 +109,7 @@ Signed-off-by: Thomas Gleixner /* Check if the requeue code acquired the second futex for us. */ if (!q.rt_waiter) { -@@ -3224,7 +3269,8 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3206,7 +3251,8 @@ static int futex_wait_requeue_pi(u32 __u * did a lock-steal - fix up the PI-state in that case. */ if (q.pi_state && (q.pi_state->owner != current)) { @@ -119,7 +119,7 @@ Signed-off-by: Thomas Gleixner ret = fixup_pi_state_owner(uaddr2, &q, current); if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { pi_state = q.pi_state; -@@ -3235,7 +3281,7 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3217,7 +3263,7 @@ static int futex_wait_requeue_pi(u32 __u * the requeue_pi() code acquired for us. */ put_pi_state(q.pi_state); @@ -128,7 +128,7 @@ Signed-off-by: Thomas Gleixner } } else { struct rt_mutex *pi_mutex; -@@ -3249,7 +3295,8 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3231,7 +3277,8 @@ static int futex_wait_requeue_pi(u32 __u pi_mutex = &q.pi_state->pi_mutex; ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); @@ -234,7 +234,7 @@ Signed-off-by: Thomas Gleixner } --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h -@@ -129,6 +129,8 @@ enum rtmutex_chainwalk { +@@ -130,6 +130,8 @@ enum rtmutex_chainwalk { /* * PI-futex support (proxy locking functions, etc.): */ diff --git a/debian/patches/features/all/rt/rtmutex-lock-killable.patch b/debian/patches/features/all/rt/rtmutex-lock-killable.patch index 6e7c389b8..efc51ffd9 100644 --- a/debian/patches/features/all/rt/rtmutex-lock-killable.patch +++ b/debian/patches/features/all/rt/rtmutex-lock-killable.patch @@ -1,7 +1,7 @@ Subject: rtmutex: Add rtmutex_lock_killable() From: Thomas Gleixner Date: Thu, 09 Jun 2011 11:43:52 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Add "killable" type to rtmutex. We need this since rtmutex are used as "normal" mutexes which do use this type. @@ -24,7 +24,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -1543,6 +1543,25 @@ int __sched __rt_mutex_futex_trylock(str +@@ -1542,6 +1542,25 @@ int __sched __rt_mutex_futex_trylock(str } /** diff --git a/debian/patches/features/all/rt/rtmutex-trylock-is-okay-on-RT.patch b/debian/patches/features/all/rt/rtmutex-trylock-is-okay-on-RT.patch index 16dbacf2d..1fd36ed54 100644 --- a/debian/patches/features/all/rt/rtmutex-trylock-is-okay-on-RT.patch +++ b/debian/patches/features/all/rt/rtmutex-trylock-is-okay-on-RT.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed 02 Dec 2015 11:34:07 +0100 Subject: rtmutex: trylock is okay on -RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz non-RT kernel could deadlock on rt_mutex_trylock() in softirq context. On -RT we don't run softirqs in IRQ context but in thread context so it is @@ -14,7 +14,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -1563,7 +1563,11 @@ int __sched rt_mutex_trylock(struct rt_m +@@ -1562,7 +1562,11 @@ int __sched rt_mutex_trylock(struct rt_m { int ret; diff --git a/debian/patches/features/all/rt/rtmutex-wire-up-RT-s-locking.patch b/debian/patches/features/all/rt/rtmutex-wire-up-RT-s-locking.patch index 28e214244..87dace6af 100644 --- a/debian/patches/features/all/rt/rtmutex-wire-up-RT-s-locking.patch +++ b/debian/patches/features/all/rt/rtmutex-wire-up-RT-s-locking.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Thu, 12 Oct 2017 17:31:14 +0200 Subject: rtmutex: wire up RT's locking -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior @@ -36,7 +36,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Simple, straightforward mutexes with strict semantics: * -@@ -114,13 +125,6 @@ do { \ +@@ -119,13 +130,6 @@ do { \ __mutex_init((mutex), #mutex, &__key); \ } while (0) @@ -50,7 +50,7 @@ Signed-off-by: Sebastian Andrzej Siewior #define __MUTEX_INITIALIZER(lockname) \ { .owner = ATOMIC_LONG_INIT(0) \ , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ -@@ -228,4 +232,6 @@ mutex_trylock_recursive(struct mutex *lo +@@ -233,4 +237,6 @@ mutex_trylock_recursive(struct mutex *lo return mutex_trylock(lock); } @@ -86,8 +86,8 @@ Signed-off-by: Sebastian Andrzej Siewior */ --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h -@@ -286,7 +286,11 @@ static inline void do_raw_spin_unlock(ra - #define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock)) +@@ -279,7 +279,11 @@ static inline void do_raw_spin_unlock(ra + }) /* Include rwlock functions */ -#include @@ -99,7 +99,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: -@@ -297,6 +301,10 @@ static inline void do_raw_spin_unlock(ra +@@ -290,6 +294,10 @@ static inline void do_raw_spin_unlock(ra # include #endif @@ -110,13 +110,15 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Map the spin_lock functions to the raw variants for PREEMPT_RT=n */ -@@ -421,4 +429,6 @@ extern int _atomic_dec_and_lock(atomic_t - #define atomic_dec_and_lock(atomic, lock) \ - __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) +@@ -392,6 +400,8 @@ static __always_inline int spin_is_conte + + #define assert_spin_locked(lock) assert_raw_spin_locked(&(lock)->rlock) +#endif /* !PREEMPT_RT_FULL */ + - #endif /* __LINUX_SPINLOCK_H */ + /* + * Pull the atomic_t declaration: + * (asm-mips/atomic.h needs above definitions) --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -187,6 +187,8 @@ static inline int __raw_spin_trylock_bh( @@ -185,7 +187,7 @@ Signed-off-by: Sebastian Andrzej Siewior obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o --- a/kernel/locking/spinlock.c +++ b/kernel/locking/spinlock.c -@@ -125,8 +125,11 @@ void __lockfunc __raw_##op##_lock_bh(loc +@@ -117,8 +117,11 @@ void __lockfunc __raw_##op##_lock_bh(loc * __[spin|read|write]_lock_bh() */ BUILD_LOCK_OPS(spin, raw_spinlock); @@ -197,7 +199,7 @@ Signed-off-by: Sebastian Andrzej Siewior #endif -@@ -210,6 +213,8 @@ void __lockfunc _raw_spin_unlock_bh(raw_ +@@ -202,6 +205,8 @@ void __lockfunc _raw_spin_unlock_bh(raw_ EXPORT_SYMBOL(_raw_spin_unlock_bh); #endif @@ -206,7 +208,7 @@ Signed-off-by: Sebastian Andrzej Siewior #ifndef CONFIG_INLINE_READ_TRYLOCK int __lockfunc _raw_read_trylock(rwlock_t *lock) { -@@ -354,6 +359,8 @@ void __lockfunc _raw_write_unlock_bh(rwl +@@ -346,6 +351,8 @@ void __lockfunc _raw_write_unlock_bh(rwl EXPORT_SYMBOL(_raw_write_unlock_bh); #endif diff --git a/debian/patches/features/all/rt/rtmutex_dont_include_rcu.patch b/debian/patches/features/all/rt/rtmutex_dont_include_rcu.patch index df4a5ac17..f9e175655 100644 --- a/debian/patches/features/all/rt/rtmutex_dont_include_rcu.patch +++ b/debian/patches/features/all/rt/rtmutex_dont_include_rcu.patch @@ -1,6 +1,6 @@ From: Sebastian Andrzej Siewior Subject: rbtree: don't include the rcu header -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The RCU header pulls in spinlock.h and fails due not yet defined types: diff --git a/debian/patches/features/all/rt/rxrpc-remove-unused-static-variables.patch b/debian/patches/features/all/rt/rxrpc-remove-unused-static-variables.patch index 33cec82bc..b33ff7aad 100644 --- a/debian/patches/features/all/rt/rxrpc-remove-unused-static-variables.patch +++ b/debian/patches/features/all/rt/rxrpc-remove-unused-static-variables.patch @@ -1,7 +1,9 @@ From: Sebastian Andrzej Siewior Date: Fri, 21 Oct 2016 10:54:50 +0200 Subject: [PATCH] rxrpc: remove unused static variables -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +upstream commit edb63e2b271752a9424a3d33cfcd4f434a020f9b The rxrpc_security_methods and rxrpc_security_sem user has been removed in 648af7fca159 ("rxrpc: Absorb the rxkad security module"). This was diff --git a/debian/patches/features/all/rt/sched-Prevent-task-state-corruption-by-spurious-lock.patch b/debian/patches/features/all/rt/sched-Prevent-task-state-corruption-by-spurious-lock.patch index 0f7d8ac49..c72f47c81 100644 --- a/debian/patches/features/all/rt/sched-Prevent-task-state-corruption-by-spurious-lock.patch +++ b/debian/patches/features/all/rt/sched-Prevent-task-state-corruption-by-spurious-lock.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Tue, 6 Jun 2017 14:20:37 +0200 Subject: sched: Prevent task state corruption by spurious lock wakeup -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Mathias and others reported GDB failures on RT. @@ -67,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -2206,7 +2206,7 @@ EXPORT_SYMBOL(wake_up_process); +@@ -2217,7 +2217,7 @@ EXPORT_SYMBOL(wake_up_process); */ int wake_up_lock_sleeper(struct task_struct *p) { diff --git a/debian/patches/features/all/rt/sched-Remove-TASK_ALL.patch b/debian/patches/features/all/rt/sched-Remove-TASK_ALL.patch index 51571450f..39936d71d 100644 --- a/debian/patches/features/all/rt/sched-Remove-TASK_ALL.patch +++ b/debian/patches/features/all/rt/sched-Remove-TASK_ALL.patch @@ -1,7 +1,7 @@ From: Peter Zijlstra Date: Wed, 7 Jun 2017 10:12:45 +0200 Subject: [PATCH] sched: Remove TASK_ALL -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz It's unused: diff --git a/debian/patches/features/all/rt/sched-delay-put-task.patch b/debian/patches/features/all/rt/sched-delay-put-task.patch index b6bb9cd70..6c4b81347 100644 --- a/debian/patches/features/all/rt/sched-delay-put-task.patch +++ b/debian/patches/features/all/rt/sched-delay-put-task.patch @@ -1,7 +1,7 @@ Subject: sched: Move task_struct cleanup to RCU From: Thomas Gleixner Date: Tue, 31 May 2011 16:59:16 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz __put_task_struct() does quite some expensive work. We don't want to burden random tasks with that. @@ -15,7 +15,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1093,6 +1093,9 @@ struct task_struct { +@@ -1092,6 +1092,9 @@ struct task_struct { unsigned int sequential_io; unsigned int sequential_io_avg; #endif @@ -54,7 +54,7 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -408,7 +408,9 @@ static inline void put_signal_struct(str +@@ -639,7 +639,9 @@ static inline void put_signal_struct(str if (atomic_dec_and_test(&sig->sigcnt)) free_signal_struct(sig); } @@ -65,7 +65,7 @@ Signed-off-by: Thomas Gleixner void __put_task_struct(struct task_struct *tsk) { WARN_ON(!tsk->exit_state); -@@ -425,7 +427,18 @@ void __put_task_struct(struct task_struc +@@ -656,7 +658,18 @@ void __put_task_struct(struct task_struc if (!profile_handoff_task(tsk)) free_task(tsk); } diff --git a/debian/patches/features/all/rt/sched-disable-rt-group-sched-on-rt.patch b/debian/patches/features/all/rt/sched-disable-rt-group-sched-on-rt.patch index c764a3dd1..a5da7b9b1 100644 --- a/debian/patches/features/all/rt/sched-disable-rt-group-sched-on-rt.patch +++ b/debian/patches/features/all/rt/sched-disable-rt-group-sched-on-rt.patch @@ -1,7 +1,7 @@ Subject: sched: Disable CONFIG_RT_GROUP_SCHED on RT From: Thomas Gleixner Date: Mon, 18 Jul 2011 17:03:52 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Carsten reported problems when running: @@ -19,7 +19,7 @@ Signed-off-by: Thomas Gleixner --- a/init/Kconfig +++ b/init/Kconfig -@@ -744,6 +744,7 @@ config CFS_BANDWIDTH +@@ -743,6 +743,7 @@ config CFS_BANDWIDTH config RT_GROUP_SCHED bool "Group scheduling for SCHED_RR/FIFO" depends on CGROUP_SCHED diff --git a/debian/patches/features/all/rt/sched-disable-ttwu-queue.patch b/debian/patches/features/all/rt/sched-disable-ttwu-queue.patch index 56885f678..71ff85d7c 100644 --- a/debian/patches/features/all/rt/sched-disable-ttwu-queue.patch +++ b/debian/patches/features/all/rt/sched-disable-ttwu-queue.patch @@ -1,7 +1,7 @@ Subject: sched: Disable TTWU_QUEUE on RT From: Thomas Gleixner Date: Tue, 13 Sep 2011 16:42:35 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The queued remote wakeup mechanism can introduce rather large latencies if the number of migrated tasks is high. Disable it for RT. diff --git a/debian/patches/features/all/rt/sched-limit-nr-migrate.patch b/debian/patches/features/all/rt/sched-limit-nr-migrate.patch index 77fe79041..49a0381e5 100644 --- a/debian/patches/features/all/rt/sched-limit-nr-migrate.patch +++ b/debian/patches/features/all/rt/sched-limit-nr-migrate.patch @@ -1,7 +1,7 @@ Subject: sched: Limit the number of task migrations per batch From: Thomas Gleixner Date: Mon, 06 Jun 2011 12:12:51 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Put an upper limit on the number of tasks which are migrated per batch to avoid large latencies. @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -59,7 +59,11 @@ const_debug unsigned int sysctl_sched_fe +@@ -64,7 +64,11 @@ const_debug unsigned int sysctl_sched_fe * Number of tasks to iterate in a single balance run. * Limited because this is done with IRQs disabled. */ diff --git a/debian/patches/features/all/rt/sched-might-sleep-do-not-account-rcu-depth.patch b/debian/patches/features/all/rt/sched-might-sleep-do-not-account-rcu-depth.patch index 1f3d44b06..1c42e78a4 100644 --- a/debian/patches/features/all/rt/sched-might-sleep-do-not-account-rcu-depth.patch +++ b/debian/patches/features/all/rt/sched-might-sleep-do-not-account-rcu-depth.patch @@ -1,7 +1,7 @@ Subject: sched: Do not account rcu_preempt_depth on RT in might_sleep() From: Thomas Gleixner Date: Tue, 07 Jun 2011 09:19:06 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz RT changes the rcu_preempt_depth semantics, so we cannot check for it in might_sleep(). @@ -37,7 +37,7 @@ Signed-off-by: Thomas Gleixner /* Internal to kernel */ --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -6049,7 +6049,7 @@ void __init sched_init(void) +@@ -6137,7 +6137,7 @@ void __init sched_init(void) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP static inline int preempt_count_equals(int preempt_offset) { diff --git a/debian/patches/features/all/rt/sched-mmdrop-delayed.patch b/debian/patches/features/all/rt/sched-mmdrop-delayed.patch index 276ede976..8e4c28509 100644 --- a/debian/patches/features/all/rt/sched-mmdrop-delayed.patch +++ b/debian/patches/features/all/rt/sched-mmdrop-delayed.patch @@ -1,7 +1,7 @@ Subject: sched: Move mmdrop to RCU on RT From: Thomas Gleixner Date: Mon, 06 Jun 2011 12:20:33 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Takes sleeping locks and calls into the memory allocator, so nothing we want to do in task switch and oder atomic contexts. @@ -24,7 +24,7 @@ Signed-off-by: Thomas Gleixner #include #include -@@ -504,6 +505,9 @@ struct mm_struct { +@@ -491,6 +492,9 @@ struct mm_struct { bool tlb_flush_batched; #endif struct uprobes_state uprobes_state; @@ -36,7 +36,7 @@ Signed-off-by: Thomas Gleixner #endif --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h -@@ -43,6 +43,17 @@ static inline void mmdrop(struct mm_stru +@@ -49,6 +49,17 @@ static inline void mmdrop(struct mm_stru __mmdrop(mm); } @@ -51,12 +51,12 @@ Signed-off-by: Thomas Gleixner +# define mmdrop_delayed(mm) mmdrop(mm) +#endif + - static inline void mmdrop_async_fn(struct work_struct *work) - { - struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); + /** + * mmget() - Pin the address space associated with a &struct mm_struct. + * @mm: The address space to pin. --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -930,6 +930,19 @@ void __mmdrop(struct mm_struct *mm) +@@ -605,6 +605,19 @@ void __mmdrop(struct mm_struct *mm) } EXPORT_SYMBOL_GPL(__mmdrop); @@ -73,26 +73,27 @@ Signed-off-by: Thomas Gleixner +} +#endif + - static inline void __mmput(struct mm_struct *mm) + static void mmdrop_async_fn(struct work_struct *work) { - VM_BUG_ON(atomic_read(&mm->mm_users)); + struct mm_struct *mm; --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -2695,8 +2695,12 @@ static struct rq *finish_task_switch(str - finish_arch_post_lock_switch(); - - fire_sched_in_preempt_notifiers(current); +@@ -2766,9 +2766,13 @@ static struct rq *finish_task_switch(str + * provided by mmdrop(), + * - a sync_core for SYNC_CORE. + */ + /* + * We use mmdrop_delayed() here so we don't have to do the + * full __mmdrop() when we are the last user. + */ - if (mm) + if (mm) { + membarrier_mm_sync_core_before_usermode(mm); - mmdrop(mm); + mmdrop_delayed(mm); + } if (unlikely(prev_state == TASK_DEAD)) { if (prev->sched_class->task_dead) - prev->sched_class->task_dead(prev); -@@ -5433,6 +5437,8 @@ void sched_setnuma(struct task_struct *p +@@ -5532,6 +5536,8 @@ void sched_setnuma(struct task_struct *p #endif /* CONFIG_NUMA_BALANCING */ #ifdef CONFIG_HOTPLUG_CPU @@ -101,7 +102,7 @@ Signed-off-by: Thomas Gleixner /* * Ensure that the idle task is using init_mm right before its CPU goes * offline. -@@ -5447,7 +5453,12 @@ void idle_task_exit(void) +@@ -5546,7 +5552,12 @@ void idle_task_exit(void) switch_mm(mm, &init_mm, current); finish_arch_post_lock_switch(); } @@ -115,7 +116,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -5750,6 +5761,10 @@ int sched_cpu_dying(unsigned int cpu) +@@ -5849,6 +5860,10 @@ int sched_cpu_dying(unsigned int cpu) update_max_interval(); nohz_balance_exit_idle(cpu); hrtick_clear(rq); diff --git a/debian/patches/features/all/rt/sched-rt-mutex-wakeup.patch b/debian/patches/features/all/rt/sched-rt-mutex-wakeup.patch index 33ffb892b..2401090b9 100644 --- a/debian/patches/features/all/rt/sched-rt-mutex-wakeup.patch +++ b/debian/patches/features/all/rt/sched-rt-mutex-wakeup.patch @@ -1,7 +1,7 @@ Subject: sched: Add saved_state for tasks blocked on sleeping locks From: Thomas Gleixner Date: Sat, 25 Jun 2011 09:21:04 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Spinlocks are state preserving in !RT. RT changes the state when a task gets blocked on a lock. So we need to remember the state before @@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -526,6 +526,8 @@ struct task_struct { +@@ -530,6 +530,8 @@ struct task_struct { #endif /* -1 unrunnable, 0 runnable, >0 stopped: */ volatile long state; @@ -27,7 +27,7 @@ Signed-off-by: Thomas Gleixner /* * This begins the randomizable portion of task_struct. Only -@@ -1507,6 +1509,7 @@ extern struct task_struct *find_task_by_ +@@ -1521,6 +1523,7 @@ extern struct task_struct *find_get_task extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); @@ -37,7 +37,7 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_SMP --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -2015,8 +2015,25 @@ try_to_wake_up(struct task_struct *p, un +@@ -2026,8 +2026,25 @@ try_to_wake_up(struct task_struct *p, un */ raw_spin_lock_irqsave(&p->pi_lock, flags); smp_mb__after_spinlock(); @@ -64,7 +64,7 @@ Signed-off-by: Thomas Gleixner trace_sched_waking(p); -@@ -2180,6 +2197,18 @@ int wake_up_process(struct task_struct * +@@ -2191,6 +2208,18 @@ int wake_up_process(struct task_struct * } EXPORT_SYMBOL(wake_up_process); @@ -85,7 +85,7 @@ Signed-off-by: Thomas Gleixner return try_to_wake_up(p, state, 0); --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h -@@ -1352,6 +1352,7 @@ static inline void finish_lock_switch(st +@@ -1362,6 +1362,7 @@ static inline int task_on_rq_migrating(s #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ #define WF_FORK 0x02 /* child wakeup after fork */ #define WF_MIGRATED 0x4 /* internal use, task got migrated */ diff --git a/debian/patches/features/all/rt/sched-swait-include-wait.h.patch b/debian/patches/features/all/rt/sched-swait-include-wait.h.patch index bae4c414d..053555188 100644 --- a/debian/patches/features/all/rt/sched-swait-include-wait.h.patch +++ b/debian/patches/features/all/rt/sched-swait-include-wait.h.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Mon, 4 Dec 2017 13:11:10 +0100 Subject: [PATCH] sched/swait: include wait.h -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz kbuild bot reported against an intermediate RT patch that the build fails with: diff --git a/debian/patches/features/all/rt/sched-ttwu-ensure-success-return-is-correct.patch b/debian/patches/features/all/rt/sched-ttwu-ensure-success-return-is-correct.patch index e261e005f..069346f58 100644 --- a/debian/patches/features/all/rt/sched-ttwu-ensure-success-return-is-correct.patch +++ b/debian/patches/features/all/rt/sched-ttwu-ensure-success-return-is-correct.patch @@ -1,7 +1,7 @@ Subject: sched: ttwu: Return success when only changing the saved_state value From: Thomas Gleixner Date: Tue, 13 Dec 2011 21:42:19 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz When a task blocks on a rt lock, it saves the current state in p->saved_state, so a lock related wake up will not destroy the @@ -21,7 +21,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -2022,8 +2022,10 @@ try_to_wake_up(struct task_struct *p, un +@@ -2033,8 +2033,10 @@ try_to_wake_up(struct task_struct *p, un * if the wakeup condition is true. */ if (!(wake_flags & WF_LOCK_SLEEPER)) { diff --git a/debian/patches/features/all/rt/sched-workqueue-Only-wake-up-idle-workers-if-not-blo.patch b/debian/patches/features/all/rt/sched-workqueue-Only-wake-up-idle-workers-if-not-blo.patch index a559757d1..ba1dec7b9 100644 --- a/debian/patches/features/all/rt/sched-workqueue-Only-wake-up-idle-workers-if-not-blo.patch +++ b/debian/patches/features/all/rt/sched-workqueue-Only-wake-up-idle-workers-if-not-blo.patch @@ -1,7 +1,7 @@ From: Steven Rostedt Date: Mon, 18 Mar 2013 15:12:49 -0400 Subject: sched/workqueue: Only wake up idle workers if not blocked on sleeping spin lock -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz In -rt, most spin_locks() turn into mutexes. One of these spin_lock conversions is performed on the workqueue gcwq->lock. When the idle @@ -24,7 +24,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -3387,8 +3387,10 @@ static void __sched notrace __schedule(b +@@ -3463,8 +3463,10 @@ static void __sched notrace __schedule(b * If a worker went to sleep, notify and ask workqueue * whether it wants to wake up a task to maintain * concurrency. diff --git a/debian/patches/features/all/rt/scsi-fcoe-rt-aware.patch b/debian/patches/features/all/rt/scsi-fcoe-rt-aware.patch index 66381021b..0d5504181 100644 --- a/debian/patches/features/all/rt/scsi-fcoe-rt-aware.patch +++ b/debian/patches/features/all/rt/scsi-fcoe-rt-aware.patch @@ -1,7 +1,7 @@ Subject: scsi/fcoe: Make RT aware. From: Thomas Gleixner Date: Sat, 12 Nov 2011 14:00:48 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Do not disable preemption while taking sleeping locks. All user look safe for migrate_diable() only. @@ -15,7 +15,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c -@@ -1464,11 +1464,11 @@ static int fcoe_rcv(struct sk_buff *skb, +@@ -1459,11 +1459,11 @@ static int fcoe_rcv(struct sk_buff *skb, static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen) { struct fcoe_percpu_s *fps; @@ -30,7 +30,7 @@ Signed-off-by: Thomas Gleixner return rc; } -@@ -1655,11 +1655,11 @@ static inline int fcoe_filter_frames(str +@@ -1650,11 +1650,11 @@ static inline int fcoe_filter_frames(str return 0; } @@ -44,7 +44,7 @@ Signed-off-by: Thomas Gleixner return -EINVAL; } -@@ -1702,7 +1702,7 @@ static void fcoe_recv_frame(struct sk_bu +@@ -1697,7 +1697,7 @@ static void fcoe_recv_frame(struct sk_bu */ hp = (struct fcoe_hdr *) skb_network_header(skb); @@ -53,7 +53,7 @@ Signed-off-by: Thomas Gleixner if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) { if (stats->ErrorFrames < 5) printk(KERN_WARNING "fcoe: FCoE version " -@@ -1734,13 +1734,13 @@ static void fcoe_recv_frame(struct sk_bu +@@ -1729,13 +1729,13 @@ static void fcoe_recv_frame(struct sk_bu goto drop; if (!fcoe_filter_frames(lport, fp)) { diff --git a/debian/patches/features/all/rt/seqlock-prevent-rt-starvation.patch b/debian/patches/features/all/rt/seqlock-prevent-rt-starvation.patch index c8a140ecf..329a10b56 100644 --- a/debian/patches/features/all/rt/seqlock-prevent-rt-starvation.patch +++ b/debian/patches/features/all/rt/seqlock-prevent-rt-starvation.patch @@ -1,7 +1,7 @@ Subject: seqlock: Prevent rt starvation From: Thomas Gleixner Date: Wed, 22 Feb 2012 12:03:30 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz If a low prio writer gets preempted while holding the seqlock write locked, a high prio reader spins forever on RT. @@ -63,7 +63,7 @@ Signed-off-by: Thomas Gleixner /** * raw_write_seqcount_barrier - do a seq write barrier * @s: pointer to seqcount_t -@@ -429,10 +439,32 @@ typedef struct { +@@ -428,10 +438,32 @@ typedef struct { /* * Read side functions for starting and finalizing a read side section. */ @@ -81,7 +81,7 @@ Signed-off-by: Thomas Gleixner + unsigned ret; + +repeat: -+ ret = ACCESS_ONCE(sl->seqcount.sequence); ++ ret = READ_ONCE(sl->seqcount.sequence); + if (unlikely(ret & 1)) { + /* + * Take the lock and let the writer proceed (i.e. evtl @@ -96,7 +96,7 @@ Signed-off-by: Thomas Gleixner static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) { -@@ -447,36 +479,36 @@ static inline unsigned read_seqretry(con +@@ -446,36 +478,36 @@ static inline unsigned read_seqretry(con static inline void write_seqlock(seqlock_t *sl) { spin_lock(&sl->lock); @@ -139,7 +139,7 @@ Signed-off-by: Thomas Gleixner spin_unlock_irq(&sl->lock); } -@@ -485,7 +517,7 @@ static inline unsigned long __write_seql +@@ -484,7 +516,7 @@ static inline unsigned long __write_seql unsigned long flags; spin_lock_irqsave(&sl->lock, flags); @@ -148,7 +148,7 @@ Signed-off-by: Thomas Gleixner return flags; } -@@ -495,7 +527,7 @@ static inline unsigned long __write_seql +@@ -494,7 +526,7 @@ static inline unsigned long __write_seql static inline void write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) { diff --git a/debian/patches/features/all/rt/signal-revert-ptrace-preempt-magic.patch b/debian/patches/features/all/rt/signal-revert-ptrace-preempt-magic.patch index 5eaa67608..03d707e9d 100644 --- a/debian/patches/features/all/rt/signal-revert-ptrace-preempt-magic.patch +++ b/debian/patches/features/all/rt/signal-revert-ptrace-preempt-magic.patch @@ -1,7 +1,7 @@ Subject: signal: Revert ptrace preempt magic From: Thomas Gleixner Date: Wed, 21 Sep 2011 19:57:12 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Upstream commit '53da1d9456fe7f8 fix ptrace slowness' is nothing more than a bandaid around the ptrace design trainwreck. It's not a @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/signal.c +++ b/kernel/signal.c -@@ -1876,15 +1876,7 @@ static void ptrace_stop(int exit_code, i +@@ -2008,15 +2008,7 @@ static void ptrace_stop(int exit_code, i if (gstop_done && ptrace_reparented(current)) do_notify_parent_cldstop(current, false, why); diff --git a/debian/patches/features/all/rt/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch b/debian/patches/features/all/rt/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch index 68a059a34..89dec2542 100644 --- a/debian/patches/features/all/rt/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch +++ b/debian/patches/features/all/rt/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Fri, 3 Jul 2009 08:44:56 -0500 Subject: signals: Allow rt tasks to cache one sigqueue struct -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz To avoid allocation allow rt tasks to cache one sigqueue struct in task struct. @@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -788,6 +788,8 @@ struct task_struct { +@@ -820,6 +820,8 @@ struct task_struct { /* Signal handlers: */ struct signal_struct *signal; struct sighand_struct *sighand; @@ -29,7 +29,7 @@ Signed-off-by: Thomas Gleixner /* Restored if set_restore_sigmask() was used: */ --- a/include/linux/signal.h +++ b/include/linux/signal.h -@@ -243,6 +243,7 @@ static inline void init_sigpending(struc +@@ -242,6 +242,7 @@ static inline void init_sigpending(struc } extern void flush_sigqueue(struct sigpending *queue); @@ -50,7 +50,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -1648,6 +1648,7 @@ static __latent_entropy struct task_stru +@@ -1725,6 +1725,7 @@ static __latent_entropy struct task_stru spin_lock_init(&p->alloc_lock); init_sigpending(&p->pending); @@ -68,7 +68,7 @@ Signed-off-by: Thomas Gleixner #include #include #include -@@ -360,13 +361,30 @@ static bool task_participate_group_stop( +@@ -362,13 +363,30 @@ static bool task_participate_group_stop( return false; } @@ -100,7 +100,7 @@ Signed-off-by: Thomas Gleixner { struct sigqueue *q = NULL; struct user_struct *user; -@@ -383,7 +401,10 @@ static struct sigqueue * +@@ -385,7 +403,10 @@ static struct sigqueue * if (override_rlimit || atomic_read(&user->sigpending) <= task_rlimit(t, RLIMIT_SIGPENDING)) { @@ -112,7 +112,7 @@ Signed-off-by: Thomas Gleixner } else { print_dropped_signal(sig); } -@@ -400,6 +421,13 @@ static struct sigqueue * +@@ -402,6 +423,13 @@ static struct sigqueue * return q; } @@ -126,7 +126,7 @@ Signed-off-by: Thomas Gleixner static void __sigqueue_free(struct sigqueue *q) { if (q->flags & SIGQUEUE_PREALLOC) -@@ -409,6 +437,21 @@ static void __sigqueue_free(struct sigqu +@@ -411,6 +439,21 @@ static void __sigqueue_free(struct sigqu kmem_cache_free(sigqueue_cachep, q); } @@ -148,7 +148,7 @@ Signed-off-by: Thomas Gleixner void flush_sigqueue(struct sigpending *queue) { struct sigqueue *q; -@@ -422,6 +465,21 @@ void flush_sigqueue(struct sigpending *q +@@ -424,6 +467,21 @@ void flush_sigqueue(struct sigpending *q } /* @@ -170,7 +170,7 @@ Signed-off-by: Thomas Gleixner * Flush all pending signals for this kthread. */ void flush_signals(struct task_struct *t) -@@ -542,7 +600,7 @@ static void collect_signal(int sig, stru +@@ -544,7 +602,7 @@ static void collect_signal(int sig, stru (info->si_code == SI_TIMER) && (info->si_sys_private); @@ -179,7 +179,7 @@ Signed-off-by: Thomas Gleixner } else { /* * Ok, it wasn't in the queue. This must be -@@ -578,6 +636,8 @@ int dequeue_signal(struct task_struct *t +@@ -581,6 +639,8 @@ int dequeue_signal(struct task_struct *t bool resched_timer = false; int signr; @@ -188,7 +188,7 @@ Signed-off-by: Thomas Gleixner /* We only dequeue private signals from ourselves, we don't let * signalfd steal them */ -@@ -1515,7 +1575,8 @@ EXPORT_SYMBOL(kill_pid); +@@ -1668,7 +1728,8 @@ EXPORT_SYMBOL(kill_pid); */ struct sigqueue *sigqueue_alloc(void) { diff --git a/debian/patches/features/all/rt/skbufhead-raw-lock.patch b/debian/patches/features/all/rt/skbufhead-raw-lock.patch index 5c30b7f8f..b119560fc 100644 --- a/debian/patches/features/all/rt/skbufhead-raw-lock.patch +++ b/debian/patches/features/all/rt/skbufhead-raw-lock.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Tue, 12 Jul 2011 15:38:34 +0200 Subject: net: Use skbufhead with raw lock -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Use the rps lock as rawlock so we can keep irq-off regions. It looks low latency. However we can't kfree() from this context therefore we defer this @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h -@@ -2772,6 +2772,7 @@ struct softnet_data { +@@ -2821,6 +2821,7 @@ struct softnet_data { unsigned int dropped; struct sk_buff_head input_pkt_queue; struct napi_struct backlog; @@ -26,7 +26,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h -@@ -288,6 +288,7 @@ struct sk_buff_head { +@@ -287,6 +287,7 @@ struct sk_buff_head { __u32 qlen; spinlock_t lock; @@ -34,7 +34,7 @@ Signed-off-by: Thomas Gleixner }; struct sk_buff; -@@ -1670,6 +1671,12 @@ static inline void skb_queue_head_init(s +@@ -1660,6 +1661,12 @@ static inline void skb_queue_head_init(s __skb_queue_head_init(list); } @@ -49,7 +49,7 @@ Signed-off-by: Thomas Gleixner { --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -217,14 +217,14 @@ static inline struct hlist_head *dev_ind +@@ -219,14 +219,14 @@ static inline struct hlist_head *dev_ind static inline void rps_lock(struct softnet_data *sd) { #ifdef CONFIG_RPS @@ -66,7 +66,7 @@ Signed-off-by: Thomas Gleixner #endif } -@@ -4582,7 +4582,7 @@ static void flush_backlog(struct work_st +@@ -4746,7 +4746,7 @@ static void flush_backlog(struct work_st skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->input_pkt_queue); @@ -75,7 +75,7 @@ Signed-off-by: Thomas Gleixner input_queue_head_incr(sd); } } -@@ -4592,11 +4592,14 @@ static void flush_backlog(struct work_st +@@ -4756,11 +4756,14 @@ static void flush_backlog(struct work_st skb_queue_walk_safe(&sd->process_queue, skb, tmp) { if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->process_queue); @@ -91,7 +91,7 @@ Signed-off-by: Thomas Gleixner } static void flush_all_backlogs(void) -@@ -5143,7 +5146,9 @@ static int process_backlog(struct napi_s +@@ -5308,7 +5311,9 @@ static int process_backlog(struct napi_s while (again) { struct sk_buff *skb; @@ -101,7 +101,7 @@ Signed-off-by: Thomas Gleixner rcu_read_lock(); __netif_receive_skb(skb); rcu_read_unlock(); -@@ -5151,9 +5156,9 @@ static int process_backlog(struct napi_s +@@ -5316,9 +5321,9 @@ static int process_backlog(struct napi_s if (++work >= quota) return work; @@ -112,7 +112,7 @@ Signed-off-by: Thomas Gleixner rps_lock(sd); if (skb_queue_empty(&sd->input_pkt_queue)) { /* -@@ -5593,13 +5598,21 @@ static __latent_entropy void net_rx_acti +@@ -5758,13 +5763,21 @@ static __latent_entropy void net_rx_acti unsigned long time_limit = jiffies + usecs_to_jiffies(netdev_budget_usecs); int budget = netdev_budget; @@ -134,7 +134,7 @@ Signed-off-by: Thomas Gleixner for (;;) { struct napi_struct *n; -@@ -8414,6 +8427,9 @@ static int dev_cpu_dead(unsigned int old +@@ -8699,6 +8712,9 @@ static int dev_cpu_dead(unsigned int old netif_rx_ni(skb); input_queue_head_incr(oldsd); } @@ -144,7 +144,7 @@ Signed-off-by: Thomas Gleixner return 0; } -@@ -8717,8 +8733,9 @@ static int __init net_dev_init(void) +@@ -9004,8 +9020,9 @@ static int __init net_dev_init(void) INIT_WORK(flush, flush_backlog); @@ -153,6 +153,6 @@ Signed-off-by: Thomas Gleixner + skb_queue_head_init_raw(&sd->input_pkt_queue); + skb_queue_head_init_raw(&sd->process_queue); + skb_queue_head_init_raw(&sd->tofree_queue); - INIT_LIST_HEAD(&sd->poll_list); - sd->output_queue_tailp = &sd->output_queue; - #ifdef CONFIG_RPS + #ifdef CONFIG_XFRM_OFFLOAD + skb_queue_head_init(&sd->xfrm_backlog); + #endif diff --git a/debian/patches/features/all/rt/slub-disable-SLUB_CPU_PARTIAL.patch b/debian/patches/features/all/rt/slub-disable-SLUB_CPU_PARTIAL.patch index b3224d6e8..03df65e9f 100644 --- a/debian/patches/features/all/rt/slub-disable-SLUB_CPU_PARTIAL.patch +++ b/debian/patches/features/all/rt/slub-disable-SLUB_CPU_PARTIAL.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 15 Apr 2015 19:00:47 +0200 Subject: slub: Disable SLUB_CPU_PARTIAL -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz |BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:915 |in_atomic(): 1, irqs_disabled(): 0, pid: 87, name: rcuop/7 @@ -37,7 +37,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/init/Kconfig +++ b/init/Kconfig -@@ -1589,7 +1589,7 @@ config SLAB_FREELIST_HARDENED +@@ -1608,7 +1608,7 @@ config SLAB_FREELIST_HARDENED config SLUB_CPU_PARTIAL default y diff --git a/debian/patches/features/all/rt/slub-enable-irqs-for-no-wait.patch b/debian/patches/features/all/rt/slub-enable-irqs-for-no-wait.patch index f020eef27..6e3c882cc 100644 --- a/debian/patches/features/all/rt/slub-enable-irqs-for-no-wait.patch +++ b/debian/patches/features/all/rt/slub-enable-irqs-for-no-wait.patch @@ -1,7 +1,7 @@ Subject: slub: Enable irqs for __GFP_WAIT From: Thomas Gleixner Date: Wed, 09 Jan 2013 12:08:15 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz SYSTEM_RUNNING might be too late for enabling interrupts. Allocations with GFP_WAIT can happen before that. So use this as an indicator. @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/mm/slub.c +++ b/mm/slub.c -@@ -1572,14 +1572,17 @@ static struct page *allocate_slab(struct +@@ -1571,14 +1571,17 @@ static struct page *allocate_slab(struct void *start, *p; int idx, order; bool shuffle; @@ -33,7 +33,7 @@ Signed-off-by: Thomas Gleixner local_irq_enable(); flags |= s->allocflags; -@@ -1654,11 +1657,7 @@ static struct page *allocate_slab(struct +@@ -1637,11 +1640,7 @@ static struct page *allocate_slab(struct page->frozen = 1; out: diff --git a/debian/patches/features/all/rt/softirq-disable-softirq-stacks-for-rt.patch b/debian/patches/features/all/rt/softirq-disable-softirq-stacks-for-rt.patch index 6c2025b11..a1806d769 100644 --- a/debian/patches/features/all/rt/softirq-disable-softirq-stacks-for-rt.patch +++ b/debian/patches/features/all/rt/softirq-disable-softirq-stacks-for-rt.patch @@ -1,7 +1,7 @@ Subject: softirq: Disable softirq stacks for RT From: Thomas Gleixner Date: Mon, 18 Jul 2011 13:59:17 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Disable extra stacks for softirqs. We want to preempt softirqs and having them on special IRQ-stack does not make this easier. @@ -20,7 +20,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c -@@ -685,6 +685,7 @@ void irq_ctx_init(void) +@@ -739,6 +739,7 @@ void irq_ctx_init(void) } } @@ -28,7 +28,7 @@ Signed-off-by: Thomas Gleixner void do_softirq_own_stack(void) { struct thread_info *curtp, *irqtp; -@@ -702,6 +703,7 @@ void do_softirq_own_stack(void) +@@ -756,6 +757,7 @@ void do_softirq_own_stack(void) if (irqtp->flags) set_bits(irqtp->flags, &curtp->flags); } @@ -110,7 +110,7 @@ Signed-off-by: Thomas Gleixner void fixup_irqs(void) --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S -@@ -1136,6 +1136,7 @@ EXPORT_SYMBOL(native_load_gs_index) +@@ -1032,6 +1032,7 @@ EXPORT_SYMBOL(native_load_gs_index) jmp 2b .previous @@ -118,7 +118,7 @@ Signed-off-by: Thomas Gleixner /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(do_softirq_own_stack) pushq %rbp -@@ -1146,6 +1147,7 @@ ENTRY(do_softirq_own_stack) +@@ -1042,6 +1043,7 @@ ENTRY(do_softirq_own_stack) leaveq ret ENDPROC(do_softirq_own_stack) diff --git a/debian/patches/features/all/rt/softirq-preempt-fix-3-re.patch b/debian/patches/features/all/rt/softirq-preempt-fix-3-re.patch index 1e0396915..3aa859cea 100644 --- a/debian/patches/features/all/rt/softirq-preempt-fix-3-re.patch +++ b/debian/patches/features/all/rt/softirq-preempt-fix-3-re.patch @@ -1,7 +1,7 @@ Subject: softirq: Check preemption after reenabling interrupts From: Thomas Gleixner Date: Sun, 13 Nov 2011 17:17:09 +0100 (CET) -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz raise_softirq_irqoff() disables interrupts and wakes the softirq daemon, but after reenabling interrupts there is no preemption check, @@ -112,7 +112,7 @@ Signed-off-by: Thomas Gleixner } --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -2431,6 +2431,7 @@ static void __netif_reschedule(struct Qd +@@ -2477,6 +2477,7 @@ static void __netif_reschedule(struct Qd sd->output_queue_tailp = &q->next_sched; raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); @@ -120,7 +120,7 @@ Signed-off-by: Thomas Gleixner } void __netif_schedule(struct Qdisc *q) -@@ -2493,6 +2494,7 @@ void __dev_kfree_skb_irq(struct sk_buff +@@ -2539,6 +2540,7 @@ void __dev_kfree_skb_irq(struct sk_buff __this_cpu_write(softnet_data.completion_queue, skb); raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); @@ -128,7 +128,7 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(__dev_kfree_skb_irq); -@@ -3856,6 +3858,7 @@ static int enqueue_to_backlog(struct sk_ +@@ -3936,6 +3938,7 @@ static int enqueue_to_backlog(struct sk_ rps_unlock(sd); local_irq_restore(flags); @@ -136,7 +136,7 @@ Signed-off-by: Thomas Gleixner atomic_long_inc(&skb->dev->rx_dropped); kfree_skb(skb); -@@ -5105,12 +5108,14 @@ static void net_rps_action_and_irq_enabl +@@ -5270,12 +5273,14 @@ static void net_rps_action_and_irq_enabl sd->rps_ipi_list = NULL; local_irq_enable(); @@ -151,7 +151,7 @@ Signed-off-by: Thomas Gleixner } static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) -@@ -5188,6 +5193,7 @@ void __napi_schedule(struct napi_struct +@@ -5353,6 +5358,7 @@ void __napi_schedule(struct napi_struct local_irq_save(flags); ____napi_schedule(this_cpu_ptr(&softnet_data), n); local_irq_restore(flags); @@ -159,7 +159,7 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(__napi_schedule); -@@ -8392,6 +8398,7 @@ static int dev_cpu_dead(unsigned int old +@@ -8677,6 +8683,7 @@ static int dev_cpu_dead(unsigned int old raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_enable(); diff --git a/debian/patches/features/all/rt/softirq-split-locks.patch b/debian/patches/features/all/rt/softirq-split-locks.patch index fe44a93ef..fcb652c4f 100644 --- a/debian/patches/features/all/rt/softirq-split-locks.patch +++ b/debian/patches/features/all/rt/softirq-split-locks.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Thu, 04 Oct 2012 14:20:47 +0100 Subject: softirq: Split softirq locks -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The 3.x RT series removed the split softirq implementation in favour of pushing softirq processing into the context of the thread which @@ -32,8 +32,7 @@ Signed-off-by: Thomas Gleixner init/main.c | 1 kernel/softirq.c | 492 +++++++++++++++++++++++++++++++++++++------- kernel/time/tick-sched.c | 9 - net/core/dev.c | 6 - 8 files changed, 480 insertions(+), 95 deletions(-) + 7 files changed, 478 insertions(+), 91 deletions(-) --- a/include/linux/bottom_half.h +++ b/include/linux/bottom_half.h @@ -118,7 +117,7 @@ Signed-off-by: Thomas Gleixner DECLARE_PER_CPU(struct task_struct *, ksoftirqd); -@@ -648,6 +653,12 @@ extern void tasklet_kill_immediate(struc +@@ -633,6 +638,12 @@ extern void tasklet_kill_immediate(struc extern void tasklet_init(struct tasklet_struct *t, void (*func)(unsigned long), unsigned long data); @@ -128,9 +127,9 @@ Signed-off-by: Thomas Gleixner +static inline void softirq_early_init(void) { } +#endif + - /* - * Autoprobing for irqs: - * + struct tasklet_hrtimer { + struct hrtimer timer; + struct tasklet_struct tasklet; --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -51,7 +51,11 @@ @@ -192,7 +191,7 @@ Signed-off-by: Thomas Gleixner #define PF_EXITPIDONE 0x00000008 /* PI exit done on shut down */ --- a/init/main.c +++ b/init/main.c -@@ -544,6 +544,7 @@ asmlinkage __visible void __init start_k +@@ -545,6 +545,7 @@ asmlinkage __visible void __init start_k setup_command_line(command_line); setup_nr_cpu_ids(); setup_per_cpu_areas(); @@ -385,7 +384,7 @@ Signed-off-by: Thomas Gleixner /* * preempt_count and SOFTIRQ_OFFSET usage: * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving -@@ -244,10 +399,8 @@ asmlinkage __visible void __softirq_entr +@@ -245,10 +400,8 @@ asmlinkage __visible void __softirq_entr unsigned long end = jiffies + MAX_SOFTIRQ_TIME; unsigned long old_flags = current->flags; int max_restart = MAX_SOFTIRQ_RESTART; @@ -396,7 +395,7 @@ Signed-off-by: Thomas Gleixner /* * Mask out PF_MEMALLOC s current task context is borrowed for the -@@ -266,36 +419,7 @@ asmlinkage __visible void __softirq_entr +@@ -267,36 +420,7 @@ asmlinkage __visible void __softirq_entr /* Reset the pending bitmask before enabling irqs */ set_softirq_pending(0); @@ -434,7 +433,7 @@ Signed-off-by: Thomas Gleixner pending = local_softirq_pending(); if (pending) { -@@ -332,6 +456,246 @@ asmlinkage __visible void do_softirq(voi +@@ -333,6 +457,246 @@ asmlinkage __visible void do_softirq(voi } /* @@ -681,7 +680,7 @@ Signed-off-by: Thomas Gleixner * Enter an interrupt context. */ void irq_enter(void) -@@ -342,9 +706,9 @@ void irq_enter(void) +@@ -343,9 +707,9 @@ void irq_enter(void) * Prevent raise_softirq from needlessly waking up ksoftirqd * here, as softirq will be serviced on return from interrupt. */ @@ -693,7 +692,7 @@ Signed-off-by: Thomas Gleixner } __irq_enter(); -@@ -352,9 +716,13 @@ void irq_enter(void) +@@ -353,9 +717,13 @@ void irq_enter(void) static inline void invoke_softirq(void) { @@ -708,7 +707,7 @@ Signed-off-by: Thomas Gleixner if (!force_irqthreads) { #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK /* -@@ -374,6 +742,14 @@ static inline void invoke_softirq(void) +@@ -375,6 +743,14 @@ static inline void invoke_softirq(void) } else { wakeup_softirqd(); } @@ -763,7 +762,7 @@ Signed-off-by: Thomas Gleixner void open_softirq(int nr, void (*action)(struct softirq_action *)) { softirq_vec[nr].action = action; -@@ -702,23 +1052,7 @@ EXPORT_SYMBOL(tasklet_unlock_wait); +@@ -741,23 +1091,7 @@ EXPORT_SYMBOL(tasklet_unlock_wait); static int ksoftirqd_should_run(unsigned int cpu) { @@ -780,7 +779,7 @@ Signed-off-by: Thomas Gleixner - */ - __do_softirq(); - local_irq_enable(); -- cond_resched_rcu_qs(); +- cond_resched(); - return; - } - local_irq_enable(); @@ -788,7 +787,7 @@ Signed-off-by: Thomas Gleixner } #ifdef CONFIG_HOTPLUG_CPU -@@ -785,6 +1119,8 @@ static int takeover_tasklets(unsigned in +@@ -824,6 +1158,8 @@ static int takeover_tasklets(unsigned in static struct smp_hotplug_thread softirq_threads = { .store = &ksoftirqd, @@ -799,7 +798,7 @@ Signed-off-by: Thomas Gleixner .thread_comm = "ksoftirqd/%u", --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c -@@ -910,14 +910,7 @@ static bool can_stop_idle_tick(int cpu, +@@ -888,14 +888,7 @@ static bool can_stop_idle_tick(int cpu, return false; if (unlikely(local_softirq_pending() && cpu_online(cpu))) { @@ -815,19 +814,3 @@ Signed-off-by: Thomas Gleixner return false; } ---- a/net/core/dev.c -+++ b/net/core/dev.c -@@ -4062,11 +4062,9 @@ int netif_rx_ni(struct sk_buff *skb) - - trace_netif_rx_ni_entry(skb); - -- preempt_disable(); -+ local_bh_disable(); - err = netif_rx_internal(skb); -- if (local_softirq_pending()) -- do_softirq(); -- preempt_enable(); -+ local_bh_enable(); - - return err; - } diff --git a/debian/patches/features/all/rt/softirq-split-timer-softirqs-out-of-ksoftirqd.patch b/debian/patches/features/all/rt/softirq-split-timer-softirqs-out-of-ksoftirqd.patch index 2cb9fcd72..25f8a1206 100644 --- a/debian/patches/features/all/rt/softirq-split-timer-softirqs-out-of-ksoftirqd.patch +++ b/debian/patches/features/all/rt/softirq-split-timer-softirqs-out-of-ksoftirqd.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 20 Jan 2016 16:34:17 +0100 Subject: softirq: split timer softirqs out of ksoftirqd -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The softirqd runs in -RT with SCHED_FIFO (prio 1) and deals mostly with timer wakeup which can not happen in hardirq context. The prio has been @@ -57,7 +57,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void handle_softirq(unsigned int vec_nr) { struct softirq_action *h = softirq_vec + vec_nr; -@@ -484,7 +499,6 @@ void __raise_softirq_irqoff(unsigned int +@@ -485,7 +500,6 @@ void __raise_softirq_irqoff(unsigned int static inline void local_bh_disable_nort(void) { local_bh_disable(); } static inline void _local_bh_enable_nort(void) { _local_bh_enable(); } static void ksoftirqd_set_sched_params(unsigned int cpu) { } @@ -65,7 +65,7 @@ Signed-off-by: Sebastian Andrzej Siewior #else /* !PREEMPT_RT_FULL */ -@@ -631,8 +645,12 @@ void thread_do_softirq(void) +@@ -632,8 +646,12 @@ void thread_do_softirq(void) static void do_raise_softirq_irqoff(unsigned int nr) { @@ -79,7 +79,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * If we are not in a hard interrupt and inside a bh disabled -@@ -641,16 +659,29 @@ static void do_raise_softirq_irqoff(unsi +@@ -642,16 +660,29 @@ static void do_raise_softirq_irqoff(unsi * delegate it to ksoftirqd. */ if (!in_irq() && current->softirq_nestcnt) @@ -113,7 +113,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /* -@@ -676,7 +707,7 @@ void raise_softirq_irqoff(unsigned int n +@@ -677,7 +708,7 @@ void raise_softirq_irqoff(unsigned int n * raise a WARN() if the condition is met. */ if (!current->softirq_nestcnt) @@ -122,7 +122,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static inline int ksoftirqd_softirq_pending(void) -@@ -689,22 +720,37 @@ static inline void _local_bh_enable_nort +@@ -690,22 +721,37 @@ static inline void _local_bh_enable_nort static inline void ksoftirqd_set_sched_params(unsigned int cpu) { @@ -163,7 +163,7 @@ Signed-off-by: Sebastian Andrzej Siewior #endif /* PREEMPT_RT_FULL */ /* * Enter an interrupt context. -@@ -759,6 +805,9 @@ static inline void invoke_softirq(void) +@@ -760,6 +806,9 @@ static inline void invoke_softirq(void) if (__this_cpu_read(ksoftirqd) && __this_cpu_read(ksoftirqd)->softirqs_raised) wakeup_softirqd(); @@ -173,7 +173,7 @@ Signed-off-by: Sebastian Andrzej Siewior local_irq_restore(flags); #endif } -@@ -1131,18 +1180,30 @@ static int takeover_tasklets(unsigned in +@@ -1170,18 +1219,30 @@ static int takeover_tasklets(unsigned in static struct smp_hotplug_thread softirq_threads = { .store = &ksoftirqd, .setup = ksoftirqd_set_sched_params, diff --git a/debian/patches/features/all/rt/softirq-wake-the-timer-softirq-if-needed.patch b/debian/patches/features/all/rt/softirq-wake-the-timer-softirq-if-needed.patch index d64409882..06bf9029d 100644 --- a/debian/patches/features/all/rt/softirq-wake-the-timer-softirq-if-needed.patch +++ b/debian/patches/features/all/rt/softirq-wake-the-timer-softirq-if-needed.patch @@ -1,7 +1,7 @@ From: Mike Galbraith Date: Fri, 20 Jan 2017 18:10:20 +0100 Subject: [PATCH] softirq: wake the timer softirq if needed -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The irq-exit path only checks the "normal"-softirq thread if it is running and ignores the state of the "timer"-softirq thread. It is possible @@ -54,7 +54,7 @@ Signed-off-by: Sebastian Andrzej Siewior static inline int ksoftirqd_softirq_pending(void) { return local_softirq_pending(); -@@ -773,13 +774,10 @@ void irq_enter(void) +@@ -774,13 +775,10 @@ void irq_enter(void) static inline void invoke_softirq(void) { @@ -70,7 +70,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!force_irqthreads) { #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK /* -@@ -800,6 +798,7 @@ static inline void invoke_softirq(void) +@@ -801,6 +799,7 @@ static inline void invoke_softirq(void) wakeup_softirqd(); } #else /* PREEMPT_RT_FULL */ diff --git a/debian/patches/features/all/rt/spinlock-atomic_dec_and_lock-Add-an-irqsave-variant.patch b/debian/patches/features/all/rt/spinlock-atomic_dec_and_lock-Add-an-irqsave-variant.patch new file mode 100644 index 000000000..bf43c7377 --- /dev/null +++ b/debian/patches/features/all/rt/spinlock-atomic_dec_and_lock-Add-an-irqsave-variant.patch @@ -0,0 +1,59 @@ +From: Anna-Maria Gleixner +Date: Wed, 4 Apr 2018 11:43:55 +0200 +Subject: [PATCH] spinlock: atomic_dec_and_lock: Add an irqsave variant +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +There are in-tree users of atomic_dec_and_lock() which must acquire the +spin lock with interrupts disabled. To workaround the lack of an irqsave +variant of atomic_dec_and_lock() they use local_irq_save() at the call +site. This causes extra code and creates in some places unneeded long +interrupt disabled times. These places need also extra treatment for +PREEMPT_RT due to the disconnect of the irq disabling and the lock +function. + +Implement the missing irqsave variant of the function. + +Signed-off-by: Anna-Maria Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/spinlock.h | 5 +++++ + lib/dec_and_lock.c | 17 +++++++++++++++++ + 2 files changed, 22 insertions(+) + +--- a/include/linux/spinlock.h ++++ b/include/linux/spinlock.h +@@ -409,6 +409,11 @@ extern int _atomic_dec_and_lock(atomic_t + #define atomic_dec_and_lock(atomic, lock) \ + __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) + ++extern int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, ++ unsigned long *flags); ++#define atomic_dec_and_lock_irqsave(atomic, lock, flags) \ ++ __cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags))) ++ + int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, + size_t max_size, unsigned int cpu_mult, + gfp_t gfp); +--- a/lib/dec_and_lock.c ++++ b/lib/dec_and_lock.c +@@ -33,3 +33,20 @@ int _atomic_dec_and_lock(atomic_t *atomi + } + + EXPORT_SYMBOL(_atomic_dec_and_lock); ++ ++int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, ++ unsigned long *flags) ++{ ++ /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ ++ if (atomic_add_unless(atomic, -1, 1)) ++ return 0; ++ ++ /* Otherwise do it the slow way */ ++ spin_lock_irqsave(lock, *flags); ++ if (atomic_dec_and_test(atomic)) ++ return 1; ++ spin_unlock_irqrestore(lock, *flags); ++ return 0; ++} ++ ++EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave); diff --git a/debian/patches/features/all/rt/spinlock-types-separate-raw.patch b/debian/patches/features/all/rt/spinlock-types-separate-raw.patch index 71786d087..52650e158 100644 --- a/debian/patches/features/all/rt/spinlock-types-separate-raw.patch +++ b/debian/patches/features/all/rt/spinlock-types-separate-raw.patch @@ -1,7 +1,7 @@ Subject: spinlock: Split the lock types header From: Thomas Gleixner Date: Wed, 29 Jun 2011 19:34:01 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Split raw_spinlock into its own file and the remaining spinlock_t into its own non-RT header. The non-RT header will be replaced later by sleeping @@ -9,11 +9,11 @@ spinlocks. Signed-off-by: Thomas Gleixner --- - include/linux/rwlock_types.h | 4 + - include/linux/spinlock_types.h | 74 ------------------------------------ + include/linux/rwlock_types.h | 4 ++ + include/linux/spinlock_types.h | 71 +----------------------------------- include/linux/spinlock_types_nort.h | 33 ++++++++++++++++ - include/linux/spinlock_types_raw.h | 58 ++++++++++++++++++++++++++++ - 4 files changed, 97 insertions(+), 72 deletions(-) + include/linux/spinlock_types_raw.h | 55 +++++++++++++++++++++++++++ + 4 files changed, 94 insertions(+), 69 deletions(-) --- a/include/linux/rwlock_types.h +++ b/include/linux/rwlock_types.h @@ -30,7 +30,7 @@ Signed-off-by: Thomas Gleixner * and initializers --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h -@@ -9,79 +9,9 @@ +@@ -9,76 +9,9 @@ * Released under the General Public License (GPL). */ @@ -45,9 +45,6 @@ Signed-off-by: Thomas Gleixner - -typedef struct raw_spinlock { - arch_spinlock_t raw_lock; --#ifdef CONFIG_GENERIC_LOCKBREAK -- unsigned int break_lock; --#endif -#ifdef CONFIG_DEBUG_SPINLOCK - unsigned int magic, owner_cpu; - void *owner; @@ -150,7 +147,7 @@ Signed-off-by: Thomas Gleixner +#endif --- /dev/null +++ b/include/linux/spinlock_types_raw.h -@@ -0,0 +1,58 @@ +@@ -0,0 +1,55 @@ +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H +#define __LINUX_SPINLOCK_TYPES_RAW_H + @@ -166,9 +163,6 @@ Signed-off-by: Thomas Gleixner + +typedef struct raw_spinlock { + arch_spinlock_t raw_lock; -+#ifdef CONFIG_GENERIC_LOCKBREAK -+ unsigned int break_lock; -+#endif +#ifdef CONFIG_DEBUG_SPINLOCK + unsigned int magic, owner_cpu; + void *owner; diff --git a/debian/patches/features/all/rt/srcu-replace-local_irqsave-with-a-locallock.patch b/debian/patches/features/all/rt/srcu-replace-local_irqsave-with-a-locallock.patch index 533cc5a2c..492e9aef2 100644 --- a/debian/patches/features/all/rt/srcu-replace-local_irqsave-with-a-locallock.patch +++ b/debian/patches/features/all/rt/srcu-replace-local_irqsave-with-a-locallock.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 12 Oct 2017 18:37:12 +0200 Subject: [PATCH] srcu: replace local_irqsave() with a locallock -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz There are two instances which disable interrupts in order to become a stable this_cpu_ptr() pointer. The restore part is coupled with diff --git a/debian/patches/features/all/rt/srcu-use-cpu_online-instead-custom-check.patch b/debian/patches/features/all/rt/srcu-use-cpu_online-instead-custom-check.patch index 37dbcb5d6..d428f4f88 100644 --- a/debian/patches/features/all/rt/srcu-use-cpu_online-instead-custom-check.patch +++ b/debian/patches/features/all/rt/srcu-use-cpu_online-instead-custom-check.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 13 Sep 2017 14:43:41 +0200 Subject: [PATCH] srcu: use cpu_online() instead custom check -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The current check via srcu_online is slightly racy because after looking at srcu_online there could be an interrupt that interrupted us long @@ -18,8 +18,8 @@ SRCU won't enqueue a work item before SRCU is up and ready. Signed-off-by: Sebastian Andrzej Siewior --- kernel/rcu/srcutree.c | 22 ++++------------------ - kernel/rcu/tree.c | 6 ------ - 2 files changed, 4 insertions(+), 24 deletions(-) + kernel/rcu/tree.c | 4 ---- + 2 files changed, 4 insertions(+), 22 deletions(-) --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -31,7 +31,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include "rcu.h" #include "rcu_segcblist.h" -@@ -425,21 +426,6 @@ static void srcu_gp_start(struct srcu_st +@@ -452,21 +453,6 @@ static void srcu_gp_start(struct srcu_st } /* @@ -53,7 +53,7 @@ Signed-off-by: Sebastian Andrzej Siewior * Place the workqueue handler on the specified CPU if online, otherwise * just run it whereever. This is useful for placing workqueue handlers * that are to invoke the specified CPU's callbacks. -@@ -450,12 +436,12 @@ static bool srcu_queue_delayed_work_on(i +@@ -477,12 +463,12 @@ static bool srcu_queue_delayed_work_on(i { bool ret; @@ -72,29 +72,20 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3775,8 +3775,6 @@ int rcutree_online_cpu(unsigned int cpu) - { - sync_sched_exp_online_cleanup(cpu); - rcutree_affinity_setting(cpu, -1); + rnp->ffmask |= rdp->grpmask; + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + } - if (IS_ENABLED(CONFIG_TREE_SRCU)) - srcu_online_cpu(cpu); - return 0; - } + if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) + return 0; /* Too early in boot for scheduler work. */ + sync_sched_exp_online_cleanup(cpu); +@@ -3804,8 +3802,6 @@ int rcutree_offline_cpu(unsigned int cpu + } -@@ -3787,8 +3785,6 @@ int rcutree_online_cpu(unsigned int cpu) - int rcutree_offline_cpu(unsigned int cpu) - { rcutree_affinity_setting(cpu, cpu); - if (IS_ENABLED(CONFIG_TREE_SRCU)) - srcu_offline_cpu(cpu); return 0; } -@@ -4236,8 +4232,6 @@ void __init rcu_init(void) - for_each_online_cpu(cpu) { - rcutree_prepare_cpu(cpu); - rcu_cpu_starting(cpu); -- if (IS_ENABLED(CONFIG_TREE_SRCU)) -- srcu_online_cpu(cpu); - } - } - diff --git a/debian/patches/features/all/rt/stop-machine-raw-lock.patch b/debian/patches/features/all/rt/stop-machine-raw-lock.patch index 3f1865dce..4a0070dbe 100644 --- a/debian/patches/features/all/rt/stop-machine-raw-lock.patch +++ b/debian/patches/features/all/rt/stop-machine-raw-lock.patch @@ -1,14 +1,16 @@ Subject: stop_machine: Use raw spinlocks From: Thomas Gleixner Date: Wed, 29 Jun 2011 11:01:51 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +upstream commit de5b55c1d4e30740009864eb35ce4ed856aac01d Use raw-locks in stomp_machine() to allow locking in irq-off regions. Signed-off-by: Thomas Gleixner --- - kernel/stop_machine.c | 34 +++++++++++++--------------------- - 1 file changed, 13 insertions(+), 21 deletions(-) + kernel/stop_machine.c | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -88,24 +90,7 @@ Signed-off-by: Thomas Gleixner if (work) { cpu_stop_fn_t fn = work->fn; -@@ -475,15 +475,7 @@ static void cpu_stopper_thread(unsigned - struct cpu_stop_done *done = work->done; - int ret; - -- /* -- * Wait until the stopper finished scheduling on all -- * cpus -- */ -- lg_global_lock(&stop_cpus_lock); -- /* -- * Let other cpu threads continue as well -- */ -- lg_global_unlock(&stop_cpus_lock); -+ /* XXX */ - - /* cpu stop callbacks must not sleep, make in_atomic() == T */ - preempt_count_inc(); -@@ -551,7 +543,7 @@ static int __init cpu_stop_init(void) +@@ -541,7 +541,7 @@ static int __init cpu_stop_init(void) for_each_possible_cpu(cpu) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); diff --git a/debian/patches/features/all/rt/sunrpc-make-svc_xprt_do_enqueue-use-get_cpu_light.patch b/debian/patches/features/all/rt/sunrpc-make-svc_xprt_do_enqueue-use-get_cpu_light.patch index 964923e34..de46e1650 100644 --- a/debian/patches/features/all/rt/sunrpc-make-svc_xprt_do_enqueue-use-get_cpu_light.patch +++ b/debian/patches/features/all/rt/sunrpc-make-svc_xprt_do_enqueue-use-get_cpu_light.patch @@ -1,7 +1,7 @@ From: Mike Galbraith Date: Wed, 18 Feb 2015 16:05:28 +0100 Subject: sunrpc: Make svc_xprt_do_enqueue() use get_cpu_light() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz |BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:915 |in_atomic(): 1, irqs_disabled(): 0, pid: 3194, name: rpc.nfsd @@ -29,12 +29,12 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4 Signed-off-by: Mike Galbraith Signed-off-by: Sebastian Andrzej Siewior --- - net/sunrpc/svc_xprt.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) + net/sunrpc/svc_xprt.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c -@@ -396,7 +396,7 @@ void svc_xprt_do_enqueue(struct svc_xprt +@@ -395,7 +395,7 @@ void svc_xprt_do_enqueue(struct svc_xprt goto out; } @@ -43,19 +43,10 @@ Signed-off-by: Sebastian Andrzej Siewior pool = svc_pool_for_cpu(xprt->xpt_server, cpu); atomic_long_inc(&pool->sp_stats.packets); -@@ -432,7 +432,7 @@ void svc_xprt_do_enqueue(struct svc_xprt - - atomic_long_inc(&pool->sp_stats.threads_woken); - wake_up_process(rqstp->rq_task); -- put_cpu(); -+ put_cpu_light(); - goto out; - } - rcu_read_unlock(); -@@ -453,7 +453,7 @@ void svc_xprt_do_enqueue(struct svc_xprt - goto redo_search; - } +@@ -419,7 +419,7 @@ void svc_xprt_do_enqueue(struct svc_xprt rqstp = NULL; + out_unlock: + rcu_read_unlock(); - put_cpu(); + put_cpu_light(); out: diff --git a/debian/patches/features/all/rt/suspend-prevernt-might-sleep-splats.patch b/debian/patches/features/all/rt/suspend-prevernt-might-sleep-splats.patch index ea47a7e80..885933565 100644 --- a/debian/patches/features/all/rt/suspend-prevernt-might-sleep-splats.patch +++ b/debian/patches/features/all/rt/suspend-prevernt-might-sleep-splats.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Thu, 15 Jul 2010 10:29:00 +0200 Subject: suspend: Prevent might sleep splats -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz timekeeping suspend/resume calls read_persistant_clock() which takes rtc_lock. That results in might sleep warnings because at that point @@ -26,7 +26,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/kernel.h +++ b/include/linux/kernel.h -@@ -531,6 +531,7 @@ extern enum system_states { +@@ -532,6 +532,7 @@ extern enum system_states { SYSTEM_HALT, SYSTEM_POWER_OFF, SYSTEM_RESTART, diff --git a/debian/patches/features/all/rt/sysfs-realtime-entry.patch b/debian/patches/features/all/rt/sysfs-realtime-entry.patch index 1b2d7d641..760b73feb 100644 --- a/debian/patches/features/all/rt/sysfs-realtime-entry.patch +++ b/debian/patches/features/all/rt/sysfs-realtime-entry.patch @@ -1,7 +1,7 @@ Subject: sysfs: Add /sys/kernel/realtime entry From: Clark Williams Date: Sat Jul 30 21:55:53 2011 -0500 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Add a /sys/kernel entry to indicate that the kernel is a realtime kernel. diff --git a/debian/patches/features/all/rt/take-write_seqcount_invalidate-into-__d_drop.patch b/debian/patches/features/all/rt/take-write_seqcount_invalidate-into-__d_drop.patch new file mode 100644 index 000000000..97965e067 --- /dev/null +++ b/debian/patches/features/all/rt/take-write_seqcount_invalidate-into-__d_drop.patch @@ -0,0 +1,80 @@ +From: Al Viro +Date: Wed, 7 Mar 2018 00:49:10 -0500 +Subject: [PATCH] take write_seqcount_invalidate() into __d_drop() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Upstream commit 0632a9ac7bc0a32f8251a53b3925775f0a7c4da6 + +... and reorder it with making d_unhashed() true. + +Signed-off-by: Al Viro +Signed-off-by: Sebastian Andrzej Siewior +--- + fs/dcache.c | 44 ++++++++++++++++++++++---------------------- + 1 file changed, 22 insertions(+), 22 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -470,30 +470,29 @@ static void dentry_lru_add(struct dentry + */ + static void ___d_drop(struct dentry *dentry) + { +- if (!d_unhashed(dentry)) { +- struct hlist_bl_head *b; +- /* +- * Hashed dentries are normally on the dentry hashtable, +- * with the exception of those newly allocated by +- * d_obtain_root, which are always IS_ROOT: +- */ +- if (unlikely(IS_ROOT(dentry))) +- b = &dentry->d_sb->s_roots; +- else +- b = d_hash(dentry->d_name.hash); ++ struct hlist_bl_head *b; ++ /* ++ * Hashed dentries are normally on the dentry hashtable, ++ * with the exception of those newly allocated by ++ * d_obtain_root, which are always IS_ROOT: ++ */ ++ if (unlikely(IS_ROOT(dentry))) ++ b = &dentry->d_sb->s_roots; ++ else ++ b = d_hash(dentry->d_name.hash); + +- hlist_bl_lock(b); +- __hlist_bl_del(&dentry->d_hash); +- hlist_bl_unlock(b); +- /* After this call, in-progress rcu-walk path lookup will fail. */ +- write_seqcount_invalidate(&dentry->d_seq); +- } ++ hlist_bl_lock(b); ++ __hlist_bl_del(&dentry->d_hash); ++ hlist_bl_unlock(b); + } + + void __d_drop(struct dentry *dentry) + { +- ___d_drop(dentry); +- dentry->d_hash.pprev = NULL; ++ if (!d_unhashed(dentry)) { ++ ___d_drop(dentry); ++ dentry->d_hash.pprev = NULL; ++ write_seqcount_invalidate(&dentry->d_seq); ++ } + } + EXPORT_SYMBOL(__d_drop); + +@@ -2853,9 +2852,10 @@ static void __d_move(struct dentry *dent + write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); + + /* unhash both */ +- /* ___d_drop does write_seqcount_barrier, but they're OK to nest. */ +- ___d_drop(dentry); +- ___d_drop(target); ++ if (!d_unhashed(dentry)) ++ ___d_drop(dentry); ++ if (!d_unhashed(target)) ++ ___d_drop(target); + + /* Switch the names.. */ + if (exchange) diff --git a/debian/patches/features/all/rt/tasklet-rt-prevent-tasklets-from-going-into-infinite-spin-in-rt.patch b/debian/patches/features/all/rt/tasklet-rt-prevent-tasklets-from-going-into-infinite-spin-in-rt.patch index fdb9841c7..0f298c59a 100644 --- a/debian/patches/features/all/rt/tasklet-rt-prevent-tasklets-from-going-into-infinite-spin-in-rt.patch +++ b/debian/patches/features/all/rt/tasklet-rt-prevent-tasklets-from-going-into-infinite-spin-in-rt.patch @@ -1,7 +1,7 @@ Subject: tasklet: Prevent tasklets from going into infinite spin in RT From: Ingo Molnar Date: Tue Nov 29 20:18:22 2011 -0500 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz When CONFIG_PREEMPT_RT_FULL is enabled, tasklets run as threads, and spinlocks turn are mutexes. But this can cause issues with @@ -39,8 +39,8 @@ Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 33 ++++--- - kernel/softirq.c | 201 ++++++++++++++++++++++++++++++++-------------- - 2 files changed, 162 insertions(+), 72 deletions(-) + kernel/softirq.c | 193 ++++++++++++++++++++++++++++++++-------------- + 2 files changed, 157 insertions(+), 69 deletions(-) --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -99,7 +99,7 @@ Signed-off-by: Thomas Gleixner #define tasklet_unlock_wait(t) do { } while (0) #define tasklet_unlock(t) do { } while (0) #endif -@@ -632,12 +642,7 @@ static inline void tasklet_disable(struc +@@ -617,12 +627,7 @@ static inline void tasklet_disable(struc smp_mb(); } @@ -173,7 +173,7 @@ Signed-off-by: Thomas Gleixner local_irq_restore(flags); } EXPORT_SYMBOL(__tasklet_schedule); -@@ -484,10 +515,7 @@ void __tasklet_hi_schedule(struct taskle +@@ -484,50 +515,108 @@ void __tasklet_hi_schedule(struct taskle unsigned long flags; local_irq_save(flags); @@ -185,19 +185,9 @@ Signed-off-by: Thomas Gleixner local_irq_restore(flags); } EXPORT_SYMBOL(__tasklet_hi_schedule); -@@ -496,82 +524,122 @@ void __tasklet_hi_schedule_first(struct - { - BUG_ON(!irqs_disabled()); - -- t->next = __this_cpu_read(tasklet_hi_vec.head); -- __this_cpu_write(tasklet_hi_vec.head, t); -- __raise_softirq_irqoff(HI_SOFTIRQ); -+ __tasklet_hi_schedule(t); - } - EXPORT_SYMBOL(__tasklet_hi_schedule_first); -static __latent_entropy void tasklet_action(struct softirq_action *a) -+void tasklet_enable(struct tasklet_struct *t) ++void tasklet_enable(struct tasklet_struct *t) { - struct tasklet_struct *list; + if (!atomic_dec_and_test(&t->count)) @@ -302,16 +292,14 @@ Signed-off-by: Thomas Gleixner } } -+static void tasklet_action(struct softirq_action *a) ++static __latent_entropy void tasklet_action(struct softirq_action *a) +{ + struct tasklet_struct *list; + + local_irq_disable(); -+ + list = __this_cpu_read(tasklet_vec.head); + __this_cpu_write(tasklet_vec.head, NULL); + __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head)); -+ + local_irq_enable(); + + __tasklet_action(a, list); @@ -320,17 +308,13 @@ Signed-off-by: Thomas Gleixner static __latent_entropy void tasklet_hi_action(struct softirq_action *a) { struct tasklet_struct *list; - - local_irq_disable(); -+ - list = __this_cpu_read(tasklet_hi_vec.head); - __this_cpu_write(tasklet_hi_vec.head, NULL); +@@ -538,30 +627,7 @@ static __latent_entropy void tasklet_hi_ __this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head)); -- local_irq_enable(); -- + local_irq_enable(); + - while (list) { - struct tasklet_struct *t = list; - +- - list = list->next; - - if (tasklet_trylock(t)) { @@ -344,8 +328,7 @@ Signed-off-by: Thomas Gleixner - } - tasklet_unlock(t); - } -+ local_irq_enable(); - +- - local_irq_disable(); - t->next = NULL; - *__this_cpu_read(tasklet_hi_vec.tail) = t; @@ -357,7 +340,7 @@ Signed-off-by: Thomas Gleixner } void tasklet_init(struct tasklet_struct *t, -@@ -592,7 +660,7 @@ void tasklet_kill(struct tasklet_struct +@@ -582,7 +648,7 @@ void tasklet_kill(struct tasklet_struct while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { do { @@ -366,7 +349,7 @@ Signed-off-by: Thomas Gleixner } while (test_bit(TASKLET_STATE_SCHED, &t->state)); } tasklet_unlock_wait(t); -@@ -615,6 +683,23 @@ void __init softirq_init(void) +@@ -656,6 +722,23 @@ void __init softirq_init(void) open_softirq(HI_SOFTIRQ, tasklet_hi_action); } diff --git a/debian/patches/features/all/rt/thermal-Defer-thermal-wakups-to-threads.patch b/debian/patches/features/all/rt/thermal-Defer-thermal-wakups-to-threads.patch index f9f2d386e..639830c96 100644 --- a/debian/patches/features/all/rt/thermal-Defer-thermal-wakups-to-threads.patch +++ b/debian/patches/features/all/rt/thermal-Defer-thermal-wakups-to-threads.patch @@ -1,7 +1,7 @@ From: Daniel Wagner Date: Tue, 17 Feb 2015 09:37:44 +0100 Subject: thermal: Defer thermal wakups to threads -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz On RT the spin lock in pkg_temp_thermal_platfrom_thermal_notify will call schedule while we run in irq context. diff --git a/debian/patches/features/all/rt/time-hrtimer-avoid-schedule_work-with-interrupts-dis.patch b/debian/patches/features/all/rt/time-hrtimer-avoid-schedule_work-with-interrupts-dis.patch index 71d131d21..48a145dc2 100644 --- a/debian/patches/features/all/rt/time-hrtimer-avoid-schedule_work-with-interrupts-dis.patch +++ b/debian/patches/features/all/rt/time-hrtimer-avoid-schedule_work-with-interrupts-dis.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Wed, 15 Nov 2017 17:29:51 +0100 Subject: [PATCH] time/hrtimer: avoid schedule_work() with interrupts disabled -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz The NOHZ code tries to schedule a workqueue with interrupts disabled. Since this does not work -RT I am switching it to swork instead. @@ -23,7 +23,7 @@ Signed-off-by: Sebastian Andrzej Siewior #ifdef CONFIG_SMP unsigned int sysctl_timer_migration = 1; -@@ -238,7 +237,7 @@ static void timers_update_migration(void +@@ -236,7 +235,7 @@ static void timers_update_migration(void static inline void timers_update_migration(void) { } #endif /* !CONFIG_SMP */ @@ -32,7 +32,7 @@ Signed-off-by: Sebastian Andrzej Siewior { mutex_lock(&timer_keys_mutex); timers_update_migration(); -@@ -248,9 +247,17 @@ static void timer_update_keys(struct wor +@@ -246,9 +245,17 @@ static void timer_update_keys(struct wor void timers_update_nohz(void) { diff --git a/debian/patches/features/all/rt/timekeeping-split-jiffies-lock.patch b/debian/patches/features/all/rt/timekeeping-split-jiffies-lock.patch index 588939dd7..209921cfc 100644 --- a/debian/patches/features/all/rt/timekeeping-split-jiffies-lock.patch +++ b/debian/patches/features/all/rt/timekeeping-split-jiffies-lock.patch @@ -1,7 +1,7 @@ Subject: timekeeping: Split jiffies seqlock From: Thomas Gleixner Date: Thu, 14 Feb 2013 22:36:59 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Replace jiffies_lock seqlock with a simple seqcounter and a rawlock so it can be taken in atomic context on RT. @@ -73,7 +73,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c -@@ -66,7 +66,8 @@ static void tick_do_update_jiffies64(kti +@@ -67,7 +67,8 @@ static void tick_do_update_jiffies64(kti return; /* Reevaluate with jiffies_lock held */ @@ -83,7 +83,7 @@ Signed-off-by: Thomas Gleixner delta = ktime_sub(now, last_jiffies_update); if (delta >= tick_period) { -@@ -89,10 +90,12 @@ static void tick_do_update_jiffies64(kti +@@ -90,10 +91,12 @@ static void tick_do_update_jiffies64(kti /* Keep the tick_next_period variable up to date */ tick_next_period = ktime_add(last_jiffies_update, tick_period); } else { @@ -98,7 +98,7 @@ Signed-off-by: Thomas Gleixner update_wall_time(); } -@@ -103,12 +106,14 @@ static ktime_t tick_init_jiffy_update(vo +@@ -104,12 +107,14 @@ static ktime_t tick_init_jiffy_update(vo { ktime_t period; @@ -115,7 +115,7 @@ Signed-off-by: Thomas Gleixner return period; } -@@ -689,10 +694,10 @@ static ktime_t tick_nohz_stop_sched_tick +@@ -665,10 +670,10 @@ static ktime_t tick_nohz_stop_sched_tick /* Read jiffies and the time when jiffies were updated last */ do { @@ -130,7 +130,7 @@ Signed-off-by: Thomas Gleixner /* --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c -@@ -2326,8 +2326,10 @@ EXPORT_SYMBOL(hardpps); +@@ -2421,8 +2421,10 @@ EXPORT_SYMBOL(hardpps); */ void xtime_update(unsigned long ticks) { diff --git a/debian/patches/features/all/rt/timer-delay-waking-softirqs-from-the-jiffy-tick.patch b/debian/patches/features/all/rt/timer-delay-waking-softirqs-from-the-jiffy-tick.patch index c9a25f7e1..5705d00d4 100644 --- a/debian/patches/features/all/rt/timer-delay-waking-softirqs-from-the-jiffy-tick.patch +++ b/debian/patches/features/all/rt/timer-delay-waking-softirqs-from-the-jiffy-tick.patch @@ -1,7 +1,7 @@ From: Peter Zijlstra Date: Fri, 21 Aug 2009 11:56:45 +0200 Subject: timer: delay waking softirqs from the jiffy tick -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz People were complaining about broken balancing with the recent -rt series. @@ -59,7 +59,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/time/timer.c +++ b/kernel/time/timer.c -@@ -1628,13 +1628,13 @@ void update_process_times(int user_tick) +@@ -1666,13 +1666,13 @@ void update_process_times(int user_tick) /* Note: this timer irq context must be accounted for as well. */ account_process_tick(p, user_tick); diff --git a/debian/patches/features/all/rt/timer-fd-avoid-live-lock.patch b/debian/patches/features/all/rt/timer-fd-avoid-live-lock.patch index fef1db3e1..65deffe3c 100644 --- a/debian/patches/features/all/rt/timer-fd-avoid-live-lock.patch +++ b/debian/patches/features/all/rt/timer-fd-avoid-live-lock.patch @@ -1,7 +1,7 @@ Subject: timer-fd: Prevent live lock From: Thomas Gleixner Date: Wed, 25 Jan 2012 11:08:40 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz If hrtimer_try_to_cancel() requires a retry, then depending on the priority setting te retry loop might prevent timer callback completion diff --git a/debian/patches/features/all/rt/timers-prepare-for-full-preemption.patch b/debian/patches/features/all/rt/timers-prepare-for-full-preemption.patch index e9e359498..32cb7973f 100644 --- a/debian/patches/features/all/rt/timers-prepare-for-full-preemption.patch +++ b/debian/patches/features/all/rt/timers-prepare-for-full-preemption.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:29:34 -0500 Subject: timers: Prepare for full preemption -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz When softirqs can be preempted we need to make sure that cancelling the timer from the active thread can not deadlock vs. a running timer @@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/timer.h +++ b/include/linux/timer.h -@@ -213,7 +213,7 @@ extern void add_timer(struct timer_list +@@ -174,7 +174,7 @@ extern void add_timer(struct timer_list extern int try_to_del_timer_sync(struct timer_list *timer); @@ -29,7 +29,7 @@ Signed-off-by: Thomas Gleixner # define del_timer_sync(t) del_timer(t) --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -522,11 +522,14 @@ void resched_cpu(int cpu) +@@ -525,11 +525,14 @@ void resched_cpu(int cpu) */ int get_nohz_timer_target(void) { @@ -40,14 +40,14 @@ Signed-off-by: Thomas Gleixner + preempt_disable_rt(); + cpu = smp_processor_id(); + - if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu)) + if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER)) - return cpu; + goto preempt_en_rt; rcu_read_lock(); for_each_domain(cpu, sd) { -@@ -545,6 +548,8 @@ int get_nohz_timer_target(void) - cpu = housekeeping_any_cpu(); +@@ -548,6 +551,8 @@ int get_nohz_timer_target(void) + cpu = housekeeping_any_cpu(HK_FLAG_TIMER); unlock: rcu_read_unlock(); +preempt_en_rt: @@ -75,7 +75,7 @@ Signed-off-by: Thomas Gleixner unsigned long clk; unsigned long next_expiry; unsigned int cpu; -@@ -1140,6 +1144,33 @@ void add_timer_on(struct timer_list *tim +@@ -1178,6 +1182,33 @@ void add_timer_on(struct timer_list *tim } EXPORT_SYMBOL_GPL(add_timer_on); @@ -109,7 +109,7 @@ Signed-off-by: Thomas Gleixner /** * del_timer - deactivate a timer. * @timer: the timer to be deactivated -@@ -1195,7 +1226,7 @@ int try_to_del_timer_sync(struct timer_l +@@ -1233,7 +1264,7 @@ int try_to_del_timer_sync(struct timer_l } EXPORT_SYMBOL(try_to_del_timer_sync); @@ -118,7 +118,7 @@ Signed-off-by: Thomas Gleixner /** * del_timer_sync - deactivate a timer and wait for the handler to finish. * @timer: the timer to be deactivated -@@ -1255,7 +1286,7 @@ int del_timer_sync(struct timer_list *ti +@@ -1293,7 +1324,7 @@ int del_timer_sync(struct timer_list *ti int ret = try_to_del_timer_sync(timer); if (ret >= 0) return ret; @@ -127,25 +127,25 @@ Signed-off-by: Thomas Gleixner } } EXPORT_SYMBOL(del_timer_sync); -@@ -1319,13 +1350,16 @@ static void expire_timers(struct timer_b +@@ -1354,13 +1385,16 @@ static void expire_timers(struct timer_b + fn = timer->function; - data = timer->data; - if (timer->flags & TIMER_IRQSAFE) { + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && + timer->flags & TIMER_IRQSAFE) { raw_spin_unlock(&base->lock); - call_timer_fn(timer, fn, data); + call_timer_fn(timer, fn); + base->running_timer = NULL; raw_spin_lock(&base->lock); } else { raw_spin_unlock_irq(&base->lock); - call_timer_fn(timer, fn, data); + call_timer_fn(timer, fn); + base->running_timer = NULL; raw_spin_lock_irq(&base->lock); } } -@@ -1627,8 +1661,8 @@ static inline void __run_timers(struct t +@@ -1665,8 +1699,8 @@ static inline void __run_timers(struct t while (levels--) expire_timers(base, heads + levels); } @@ -155,7 +155,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -1868,6 +1902,9 @@ static void __init init_timer_cpu(int cp +@@ -1924,6 +1958,9 @@ static void __init init_timer_cpu(int cp base->cpu = cpu; raw_spin_lock_init(&base->lock); base->clk = jiffies; diff --git a/debian/patches/features/all/rt/tpm_tis-fix-stall-after-iowrite-s.patch b/debian/patches/features/all/rt/tpm_tis-fix-stall-after-iowrite-s.patch index 9c0c594ba..5ab7b87fc 100644 --- a/debian/patches/features/all/rt/tpm_tis-fix-stall-after-iowrite-s.patch +++ b/debian/patches/features/all/rt/tpm_tis-fix-stall-after-iowrite-s.patch @@ -1,7 +1,7 @@ From: Haris Okanovic Date: Tue, 15 Aug 2017 15:13:08 -0500 Subject: [PATCH] tpm_tis: fix stall after iowrite*()s -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz ioread8() operations to TPM MMIO addresses can stall the cpu when immediately following a sequence of iowrite*()'s to the same region. @@ -26,7 +26,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c -@@ -52,6 +52,31 @@ static inline struct tpm_tis_tcg_phy *to +@@ -53,6 +53,31 @@ static inline struct tpm_tis_tcg_phy *to return container_of(data, struct tpm_tis_tcg_phy, priv); } @@ -58,21 +58,21 @@ Signed-off-by: Sebastian Andrzej Siewior static bool interrupts = true; module_param(interrupts, bool, 0444); MODULE_PARM_DESC(interrupts, "Enable interrupts"); -@@ -230,7 +255,7 @@ static int tpm_tcg_write_bytes(struct tp - tpm_platform_begin_xfer(); +@@ -150,7 +175,7 @@ static int tpm_tcg_write_bytes(struct tp + struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data); while (len--) - iowrite8(*value++, phy->iobase + addr); + tpm_tis_iowrite8(*value++, phy->iobase, addr); - tpm_platform_end_xfer(); - -@@ -269,7 +294,7 @@ static int tpm_tcg_write32(struct tpm_ti - - tpm_platform_begin_xfer(); + return 0; + } +@@ -177,7 +202,7 @@ static int tpm_tcg_write32(struct tpm_ti + { + struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data); - iowrite32(value, phy->iobase + addr); + tpm_tis_iowrite32(value, phy->iobase, addr); - tpm_platform_end_xfer(); - + return 0; + } diff --git a/debian/patches/features/all/rt/tty-serial-8250-don-t-take-the-trylock-during-oops.patch b/debian/patches/features/all/rt/tty-serial-8250-don-t-take-the-trylock-during-oops.patch index 4cdd4f772..25072efd2 100644 --- a/debian/patches/features/all/rt/tty-serial-8250-don-t-take-the-trylock-during-oops.patch +++ b/debian/patches/features/all/rt/tty-serial-8250-don-t-take-the-trylock-during-oops.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Mon, 11 Apr 2016 16:55:02 +0200 Subject: [PATCH] tty: serial: 8250: don't take the trylock during oops -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz An oops with irqs off (panic() from irqsafe hrtimer like the watchdog timer) will lead to a lockdep warning on each invocation and as such diff --git a/debian/patches/features/all/rt/tty-serial-atmel-use-port-name-as-name-in-request_ir.patch b/debian/patches/features/all/rt/tty-serial-atmel-use-port-name-as-name-in-request_ir.patch new file mode 100644 index 000000000..e7c8bf4c8 --- /dev/null +++ b/debian/patches/features/all/rt/tty-serial-atmel-use-port-name-as-name-in-request_ir.patch @@ -0,0 +1,56 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 26 Apr 2018 16:42:24 +0200 +Subject: [PATCH] tty/serial: atmel: use port->name as name in request_irq() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +I was puzzled while looking at /proc/interrupts and random things showed +up between reboots. This occurred more often but I realised it later. The +"correct" output should be: +|38: 11861 atmel-aic5 2 Level ttyS0 + +but I saw sometimes +|38: 6426 atmel-aic5 2 Level tty1 + +and accounted it wrongly as correct. This is use after free and the +former example randomly got the "old" pointer which pointed to the same +content. With SLAB_FREELIST_RANDOM and HARDENED I even got +|38: 7067 atmel-aic5 2 Level E=Started User Manager for UID 0 + +or other nonsense. +As it turns out the tty, pointer that is accessed in atmel_startup(), is +freed() before atmel_shutdown(). It seems to happen quite often that the +tty for ttyS0 is allocated and freed while ->shutdown is not invoked. I +don't do anything special - just a systemd boot :) + +It seems not to happen in v4.1.51 but it happens in v4.9 and v4.17-rc2 +so if it broke accidentally it was not recently. + +Use port->name as the IRQ name for request_irq(). This exists as long as +the driver is loaded so no use-after-free here. + +Cc: stable@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/tty/serial/atmel_serial.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/drivers/tty/serial/atmel_serial.c ++++ b/drivers/tty/serial/atmel_serial.c +@@ -1757,7 +1757,6 @@ static int atmel_startup(struct uart_por + { + struct platform_device *pdev = to_platform_device(port->dev); + struct atmel_uart_port *atmel_port = to_atmel_uart_port(port); +- struct tty_struct *tty = port->state->port.tty; + int retval; + + /* +@@ -1772,8 +1771,7 @@ static int atmel_startup(struct uart_por + * Allocate the IRQ + */ + retval = request_irq(port->irq, atmel_interrupt, +- IRQF_SHARED | IRQF_COND_SUSPEND, +- tty ? tty->name : "atmel_serial", port); ++ IRQF_SHARED | IRQF_COND_SUSPEND, port->name, port); + if (retval) { + dev_err(port->dev, "atmel_startup - Can't get irq\n"); + return retval; diff --git a/debian/patches/features/all/rt/upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch b/debian/patches/features/all/rt/upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch index 574e59497..4151d503c 100644 --- a/debian/patches/features/all/rt/upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch +++ b/debian/patches/features/all/rt/upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch @@ -1,7 +1,7 @@ Subject: net: Remove preemption disabling in netif_rx() From: Priyanka Jain Date: Thu, 17 May 2012 09:35:11 +0530 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz 1)enqueue_to_backlog() (called from netif_rx) should be bind to a particluar CPU. This can be achieved by @@ -38,7 +38,7 @@ Signed-off-by: Thomas Gleixner --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -4016,7 +4016,7 @@ static int netif_rx_internal(struct sk_b +@@ -4147,7 +4147,7 @@ static int netif_rx_internal(struct sk_b struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu; @@ -47,7 +47,7 @@ Signed-off-by: Thomas Gleixner rcu_read_lock(); cpu = get_rps_cpu(skb->dev, skb, &rflow); -@@ -4026,14 +4026,14 @@ static int netif_rx_internal(struct sk_b +@@ -4157,14 +4157,14 @@ static int netif_rx_internal(struct sk_b ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); rcu_read_unlock(); diff --git a/debian/patches/features/all/rt/usb-do-not-disable-interrupts-in-giveback.patch b/debian/patches/features/all/rt/usb-do-not-disable-interrupts-in-giveback.patch new file mode 100644 index 000000000..cf7404049 --- /dev/null +++ b/debian/patches/features/all/rt/usb-do-not-disable-interrupts-in-giveback.patch @@ -0,0 +1,40 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 8 Nov 2013 17:34:54 +0100 +Subject: usb: do no disable interrupts in giveback +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz + +Since commit 94dfd7ed ("USB: HCD: support giveback of URB in tasklet +context") the USB code disables interrupts before invoking the complete +callback. +This should not be required the HCD completes the URBs either in hard-irq +context or in BH context. Lockdep may report false positives if one has two +HCDs (one completes in IRQ and the other in BH context) and is using the same +USB driver (device) with both HCDs. This is safe since the same URBs are never +mixed with those two HCDs. +Longeterm we should force all HCDs to complete in the same context. + +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/usb/core/hcd.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/drivers/usb/core/hcd.c ++++ b/drivers/usb/core/hcd.c +@@ -1736,7 +1736,6 @@ static void __usb_hcd_giveback_urb(struc + struct usb_hcd *hcd = bus_to_hcd(urb->dev->bus); + struct usb_anchor *anchor = urb->anchor; + int status = urb->unlinked; +- unsigned long flags; + + urb->hcpriv = NULL; + if (unlikely((urb->transfer_flags & URB_SHORT_NOT_OK) && +@@ -1764,9 +1763,7 @@ static void __usb_hcd_giveback_urb(struc + * and no one may trigger the above deadlock situation when + * running complete() in tasklet. + */ +- local_irq_save(flags); + urb->complete(urb); +- local_irq_restore(flags); + + usb_anchor_resume_wakeups(anchor); + atomic_dec(&urb->use_count); diff --git a/debian/patches/features/all/rt/wait.h-include-atomic.h.patch b/debian/patches/features/all/rt/wait.h-include-atomic.h.patch index dd6e52df5..c58d9aa13 100644 --- a/debian/patches/features/all/rt/wait.h-include-atomic.h.patch +++ b/debian/patches/features/all/rt/wait.h-include-atomic.h.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Mon, 28 Oct 2013 12:19:57 +0100 Subject: wait.h: include atomic.h -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz | CC init/main.o |In file included from include/linux/mmzone.h:9:0, diff --git a/debian/patches/features/all/rt/work-queue-work-around-irqsafe-timer-optimization.patch b/debian/patches/features/all/rt/work-queue-work-around-irqsafe-timer-optimization.patch index 09d6968c1..4de9884e5 100644 --- a/debian/patches/features/all/rt/work-queue-work-around-irqsafe-timer-optimization.patch +++ b/debian/patches/features/all/rt/work-queue-work-around-irqsafe-timer-optimization.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Mon, 01 Jul 2013 11:02:42 +0200 Subject: workqueue: Prevent workqueue versus ata-piix livelock -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz An Intel i7 system regularly detected rcu_preempt stalls after the kernel was upgraded from 3.6-rt to 3.8-rt. When the stall happened, disk I/O was no @@ -115,7 +115,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -50,6 +50,7 @@ - #include + #include #include #include +#include diff --git a/debian/patches/features/all/rt/work-simple-Simple-work-queue-implemenation.patch b/debian/patches/features/all/rt/work-simple-Simple-work-queue-implemenation.patch index bcf84cfe2..686ac3851 100644 --- a/debian/patches/features/all/rt/work-simple-Simple-work-queue-implemenation.patch +++ b/debian/patches/features/all/rt/work-simple-Simple-work-queue-implemenation.patch @@ -1,7 +1,7 @@ From: Daniel Wagner Date: Fri, 11 Jul 2014 15:26:11 +0200 Subject: work-simple: Simple work queue implemenation -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Provides a framework for enqueuing callbacks from irq context PREEMPT_RT_FULL safe. The callbacks are executed in kthread context. diff --git a/debian/patches/features/all/rt/workqueue-distangle-from-rq-lock.patch b/debian/patches/features/all/rt/workqueue-distangle-from-rq-lock.patch index 58f4a821f..838e2f97a 100644 --- a/debian/patches/features/all/rt/workqueue-distangle-from-rq-lock.patch +++ b/debian/patches/features/all/rt/workqueue-distangle-from-rq-lock.patch @@ -22,7 +22,7 @@ Cc: Jens Axboe Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20110622174919.135236139@linutronix.de Signed-off-by: Thomas Gleixner -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz --- kernel/sched/core.c | 84 +++++++------------------------------------- @@ -32,7 +32,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -1715,10 +1715,6 @@ static inline void ttwu_activate(struct +@@ -1726,10 +1726,6 @@ static inline void ttwu_activate(struct { activate_task(rq, p, en_flags); p->on_rq = TASK_ON_RQ_QUEUED; @@ -43,7 +43,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4 } /* -@@ -2159,56 +2155,6 @@ try_to_wake_up(struct task_struct *p, un +@@ -2170,56 +2166,6 @@ try_to_wake_up(struct task_struct *p, un } /** @@ -100,7 +100,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4 * wake_up_process - Wake up a specific process * @p: The process to be woken up. * -@@ -3409,21 +3355,6 @@ static void __sched notrace __schedule(b +@@ -3485,21 +3431,6 @@ static void __sched notrace __schedule(b atomic_inc(&rq->nr_iowait); delayacct_blkio_start(); } @@ -122,7 +122,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4 } switch_count = &prev->nvcsw; } -@@ -3499,6 +3430,14 @@ static inline void sched_submit_work(str +@@ -3574,6 +3505,14 @@ static inline void sched_submit_work(str { if (!tsk->state || tsk_is_pi_blocked(tsk)) return; @@ -137,7 +137,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4 /* * If we are going to sleep and we have plugged IO queued, * make sure to submit it to avoid deadlocks. -@@ -3507,6 +3446,12 @@ static inline void sched_submit_work(str +@@ -3582,6 +3521,12 @@ static inline void sched_submit_work(str blk_schedule_flush_plug(tsk); } @@ -150,7 +150,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4 asmlinkage __visible void __sched schedule(void) { struct task_struct *tsk = current; -@@ -3517,6 +3462,7 @@ asmlinkage __visible void __sched schedu +@@ -3592,6 +3537,7 @@ asmlinkage __visible void __sched schedu __schedule(false); sched_preempt_enable_no_resched(); } while (need_resched()); diff --git a/debian/patches/features/all/rt/workqueue-prevent-deadlock-stall.patch b/debian/patches/features/all/rt/workqueue-prevent-deadlock-stall.patch index c7e9e7a77..d505ec875 100644 --- a/debian/patches/features/all/rt/workqueue-prevent-deadlock-stall.patch +++ b/debian/patches/features/all/rt/workqueue-prevent-deadlock-stall.patch @@ -1,7 +1,7 @@ Subject: workqueue: Prevent deadlock/stall on RT From: Thomas Gleixner Date: Fri, 27 Jun 2014 16:24:52 +0200 (CEST) -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Austin reported a XFS deadlock/stall on RT where scheduled work gets never exececuted and tasks are waiting for each other for ever. @@ -44,7 +44,7 @@ Cc: Steven Rostedt --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -3474,9 +3474,8 @@ void __noreturn do_task_dead(void) +@@ -3549,9 +3549,8 @@ void __noreturn do_task_dead(void) static inline void sched_submit_work(struct task_struct *tsk) { @@ -55,7 +55,7 @@ Cc: Steven Rostedt /* * If a worker went to sleep, notify and ask workqueue whether * it wants to wake up a task to maintain concurrency. -@@ -3484,6 +3483,10 @@ static inline void sched_submit_work(str +@@ -3559,6 +3558,10 @@ static inline void sched_submit_work(str if (tsk->flags & PF_WQ_WORKER) wq_worker_sleeping(tsk); @@ -169,7 +169,7 @@ Cc: Steven Rostedt } /** -@@ -1637,7 +1665,9 @@ static void worker_enter_idle(struct wor +@@ -1642,7 +1670,9 @@ static void worker_enter_idle(struct wor worker->last_active = jiffies; /* idle_list is LIFO */ @@ -179,7 +179,7 @@ Cc: Steven Rostedt if (too_many_workers(pool) && !timer_pending(&pool->idle_timer)) mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); -@@ -1670,7 +1700,9 @@ static void worker_leave_idle(struct wor +@@ -1675,7 +1705,9 @@ static void worker_leave_idle(struct wor return; worker_clr_flags(worker, WORKER_IDLE); pool->nr_idle--; @@ -189,7 +189,7 @@ Cc: Steven Rostedt } static struct worker *alloc_worker(int node) -@@ -1836,7 +1868,9 @@ static void destroy_worker(struct worker +@@ -1841,7 +1873,9 @@ static void destroy_worker(struct worker pool->nr_workers--; pool->nr_idle--; diff --git a/debian/patches/features/all/rt/workqueue-use-locallock.patch b/debian/patches/features/all/rt/workqueue-use-locallock.patch index 969abcf24..f74e0880f 100644 --- a/debian/patches/features/all/rt/workqueue-use-locallock.patch +++ b/debian/patches/features/all/rt/workqueue-use-locallock.patch @@ -1,21 +1,21 @@ Subject: workqueue: Use local irq lock instead of irq disable regions From: Thomas Gleixner Date: Sun, 17 Jul 2011 21:42:26 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Use a local_irq_lock as a replacement for irq off regions. We keep the semantic of irq-off in regard to the pool->lock and remain preemptible. Signed-off-by: Thomas Gleixner --- - kernel/workqueue.c | 36 ++++++++++++++++++++++-------------- - 1 file changed, 22 insertions(+), 14 deletions(-) + kernel/workqueue.c | 40 +++++++++++++++++++++++++++------------- + 1 file changed, 27 insertions(+), 13 deletions(-) --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -49,6 +49,7 @@ - #include #include + #include #include +#include @@ -62,16 +62,21 @@ Signed-off-by: Thomas Gleixner if (work_is_canceling(work)) return -ENOENT; cpu_relax(); -@@ -1380,7 +1385,7 @@ static void __queue_work(int cpu, struct +@@ -1380,7 +1385,13 @@ static void __queue_work(int cpu, struct * queued or lose PENDING. Grabbing PENDING and queueing should * happen with IRQ disabled. */ -- WARN_ON_ONCE(!irqs_disabled()); -+ WARN_ON_ONCE_NONRT(!irqs_disabled()); ++#ifndef CONFIG_PREEMPT_RT_FULL ++ /* ++ * nort: On RT the "interrupts-disabled" rule has been replaced with ++ * pendingb_lock. ++ */ + lockdep_assert_irqs_disabled(); ++#endif debug_work_activate(work); -@@ -1486,14 +1491,14 @@ bool queue_work_on(int cpu, struct workq +@@ -1486,14 +1497,14 @@ bool queue_work_on(int cpu, struct workq bool ret = false; unsigned long flags; @@ -88,9 +93,9 @@ Signed-off-by: Thomas Gleixner return ret; } EXPORT_SYMBOL(queue_work_on); -@@ -1502,8 +1507,11 @@ void delayed_work_timer_fn(unsigned long +@@ -1502,8 +1513,11 @@ void delayed_work_timer_fn(struct timer_ { - struct delayed_work *dwork = (struct delayed_work *)__data; + struct delayed_work *dwork = from_timer(dwork, t, timer); + /* XXX */ + /* local_lock(pendingb_lock); */ @@ -100,7 +105,7 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(delayed_work_timer_fn); -@@ -1559,14 +1567,14 @@ bool queue_delayed_work_on(int cpu, stru +@@ -1558,14 +1572,14 @@ bool queue_delayed_work_on(int cpu, stru unsigned long flags; /* read the comment in __queue_work() */ @@ -117,7 +122,7 @@ Signed-off-by: Thomas Gleixner return ret; } EXPORT_SYMBOL(queue_delayed_work_on); -@@ -1601,7 +1609,7 @@ bool mod_delayed_work_on(int cpu, struct +@@ -1600,7 +1614,7 @@ bool mod_delayed_work_on(int cpu, struct if (likely(ret >= 0)) { __queue_delayed_work(cpu, wq, dwork, delay); @@ -126,7 +131,7 @@ Signed-off-by: Thomas Gleixner } /* -ENOENT from try_to_grab_pending() becomes %true */ -@@ -2951,7 +2959,7 @@ static bool __cancel_work_timer(struct w +@@ -2937,7 +2951,7 @@ static bool __cancel_work_timer(struct w /* tell other tasks trying to grab @work to back off */ mark_work_canceling(work); @@ -135,7 +140,7 @@ Signed-off-by: Thomas Gleixner /* * This allows canceling during early boot. We know that @work -@@ -3012,10 +3020,10 @@ EXPORT_SYMBOL_GPL(cancel_work_sync); +@@ -2998,10 +3012,10 @@ EXPORT_SYMBOL_GPL(cancel_work_sync); */ bool flush_delayed_work(struct delayed_work *dwork) { @@ -148,7 +153,7 @@ Signed-off-by: Thomas Gleixner return flush_work(&dwork->work); } EXPORT_SYMBOL(flush_delayed_work); -@@ -3033,7 +3041,7 @@ static bool __cancel_work(struct work_st +@@ -3019,7 +3033,7 @@ static bool __cancel_work(struct work_st return false; set_work_pool_and_clear_pending(work, get_work_pool_id(work)); diff --git a/debian/patches/features/all/rt/workqueue-use-rcu.patch b/debian/patches/features/all/rt/workqueue-use-rcu.patch index 16361956a..dc99370f5 100644 --- a/debian/patches/features/all/rt/workqueue-use-rcu.patch +++ b/debian/patches/features/all/rt/workqueue-use-rcu.patch @@ -1,7 +1,7 @@ Subject: workqueue: Use normal rcu From: Thomas Gleixner Date: Wed, 24 Jul 2013 15:26:54 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz There is no need for sched_rcu. The undocumented reason why sched_rcu is used is to avoid a few explicit rcu_read_lock()/unlock() pairs by @@ -177,7 +177,7 @@ Signed-off-by: Thomas Gleixner } /** -@@ -2815,14 +2819,14 @@ static bool start_flush_work(struct work +@@ -2804,14 +2808,14 @@ static bool start_flush_work(struct work might_sleep(); @@ -195,7 +195,7 @@ Signed-off-by: Thomas Gleixner /* see the comment in try_to_grab_pending() with the same code */ pwq = get_work_pwq(work); if (pwq) { -@@ -2853,10 +2857,11 @@ static bool start_flush_work(struct work +@@ -2842,10 +2846,11 @@ static bool start_flush_work(struct work lock_map_acquire(&pwq->wq->lockdep_map); lock_map_release(&pwq->wq->lockdep_map); } @@ -208,7 +208,7 @@ Signed-off-by: Thomas Gleixner return false; } -@@ -3284,7 +3289,7 @@ static void rcu_free_pool(struct rcu_hea +@@ -3260,7 +3265,7 @@ static void rcu_free_pool(struct rcu_hea * put_unbound_pool - put a worker_pool * @pool: worker_pool to put * @@ -217,7 +217,7 @@ Signed-off-by: Thomas Gleixner * safe manner. get_unbound_pool() calls this function on its failure path * and this function should be able to release pools which went through, * successfully or not, init_worker_pool(). -@@ -3338,8 +3343,8 @@ static void put_unbound_pool(struct work +@@ -3314,8 +3319,8 @@ static void put_unbound_pool(struct work del_timer_sync(&pool->idle_timer); del_timer_sync(&pool->mayday_timer); @@ -228,7 +228,7 @@ Signed-off-by: Thomas Gleixner } /** -@@ -3446,14 +3451,14 @@ static void pwq_unbound_release_workfn(s +@@ -3422,14 +3427,14 @@ static void pwq_unbound_release_workfn(s put_unbound_pool(pool); mutex_unlock(&wq_pool_mutex); @@ -245,7 +245,7 @@ Signed-off-by: Thomas Gleixner } /** -@@ -4128,7 +4133,7 @@ void destroy_workqueue(struct workqueue_ +@@ -4115,7 +4120,7 @@ void destroy_workqueue(struct workqueue_ * The base ref is never dropped on per-cpu pwqs. Directly * schedule RCU free. */ @@ -254,7 +254,7 @@ Signed-off-by: Thomas Gleixner } else { /* * We're the sole accessor of @wq at this point. Directly -@@ -4222,7 +4227,8 @@ bool workqueue_congested(int cpu, struct +@@ -4225,7 +4230,8 @@ bool workqueue_congested(int cpu, struct struct pool_workqueue *pwq; bool ret; @@ -264,7 +264,7 @@ Signed-off-by: Thomas Gleixner if (cpu == WORK_CPU_UNBOUND) cpu = smp_processor_id(); -@@ -4233,7 +4239,8 @@ bool workqueue_congested(int cpu, struct +@@ -4236,7 +4242,8 @@ bool workqueue_congested(int cpu, struct pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu)); ret = !list_empty(&pwq->delayed_works); @@ -274,7 +274,7 @@ Signed-off-by: Thomas Gleixner return ret; } -@@ -4259,15 +4266,15 @@ unsigned int work_busy(struct work_struc +@@ -4262,15 +4269,15 @@ unsigned int work_busy(struct work_struc if (work_pending(work)) ret |= WORK_BUSY_PENDING; @@ -294,7 +294,7 @@ Signed-off-by: Thomas Gleixner return ret; } -@@ -4456,7 +4463,7 @@ void show_workqueue_state(void) +@@ -4459,7 +4466,7 @@ void show_workqueue_state(void) unsigned long flags; int pi; @@ -303,7 +303,7 @@ Signed-off-by: Thomas Gleixner pr_info("Showing busy workqueues and worker pools:\n"); -@@ -4521,7 +4528,7 @@ void show_workqueue_state(void) +@@ -4524,7 +4531,7 @@ void show_workqueue_state(void) touch_nmi_watchdog(); } @@ -312,7 +312,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -4882,16 +4889,16 @@ bool freeze_workqueues_busy(void) +@@ -4872,16 +4879,16 @@ bool freeze_workqueues_busy(void) * nr_active is monotonically decreasing. It's safe * to peek without lock. */ @@ -332,7 +332,7 @@ Signed-off-by: Thomas Gleixner } out_unlock: mutex_unlock(&wq_pool_mutex); -@@ -5081,7 +5088,8 @@ static ssize_t wq_pool_ids_show(struct d +@@ -5076,7 +5083,8 @@ static ssize_t wq_pool_ids_show(struct d const char *delim = ""; int node, written = 0; @@ -342,7 +342,7 @@ Signed-off-by: Thomas Gleixner for_each_node(node) { written += scnprintf(buf + written, PAGE_SIZE - written, "%s%d:%d", delim, node, -@@ -5089,7 +5097,8 @@ static ssize_t wq_pool_ids_show(struct d +@@ -5084,7 +5092,8 @@ static ssize_t wq_pool_ids_show(struct d delim = " "; } written += scnprintf(buf + written, PAGE_SIZE - written, "\n"); diff --git a/debian/patches/features/all/rt/x86-UV-raw_spinlock-conversion.patch b/debian/patches/features/all/rt/x86-UV-raw_spinlock-conversion.patch index 12c0efa17..81df94d87 100644 --- a/debian/patches/features/all/rt/x86-UV-raw_spinlock-conversion.patch +++ b/debian/patches/features/all/rt/x86-UV-raw_spinlock-conversion.patch @@ -1,7 +1,7 @@ From: Mike Galbraith Date: Sun, 2 Nov 2014 08:31:37 +0100 Subject: x86: UV: raw_spinlock conversion -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Shrug. Lots of hobbyists have a beast in their basement, right? @@ -16,7 +16,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h -@@ -643,9 +643,9 @@ struct bau_control { +@@ -642,9 +642,9 @@ struct bau_control { cycles_t send_message; cycles_t period_end; cycles_t period_time; @@ -29,7 +29,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* tunables */ int max_concurr; int max_concurr_const; -@@ -847,15 +847,15 @@ static inline int atom_asr(short i, stru +@@ -846,15 +846,15 @@ static inline int atom_asr(short i, stru * to be lowered below the current 'v'. atomic_add_unless can only stop * on equal. */ @@ -124,7 +124,7 @@ Signed-off-by: Sebastian Andrzej Siewior return -1; } -@@ -1941,9 +1941,9 @@ static void __init init_per_cpu_tunables +@@ -1942,9 +1942,9 @@ static void __init init_per_cpu_tunables bcp->cong_reps = congested_reps; bcp->disabled_period = sec_2_cycles(disabled_period); bcp->giveup_limit = giveup_limit; diff --git a/debian/patches/features/all/rt/x86-crypto-reduce-preempt-disabled-regions.patch b/debian/patches/features/all/rt/x86-crypto-reduce-preempt-disabled-regions.patch index ab0473ac3..a0da53251 100644 --- a/debian/patches/features/all/rt/x86-crypto-reduce-preempt-disabled-regions.patch +++ b/debian/patches/features/all/rt/x86-crypto-reduce-preempt-disabled-regions.patch @@ -1,7 +1,7 @@ Subject: x86: crypto: Reduce preempt disabled regions From: Peter Zijlstra Date: Mon, 14 Nov 2011 18:19:27 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Restrict the preempt disabled regions to the actual floating point operations and enable preemption for the administrative actions. @@ -19,7 +19,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c -@@ -386,14 +386,14 @@ static int ecb_encrypt(struct skcipher_r +@@ -387,14 +387,14 @@ static int ecb_encrypt(struct skcipher_r err = skcipher_walk_virt(&walk, req, true); @@ -36,7 +36,7 @@ Signed-off-by: Thomas Gleixner return err; } -@@ -408,14 +408,14 @@ static int ecb_decrypt(struct skcipher_r +@@ -409,14 +409,14 @@ static int ecb_decrypt(struct skcipher_r err = skcipher_walk_virt(&walk, req, true); @@ -53,7 +53,7 @@ Signed-off-by: Thomas Gleixner return err; } -@@ -430,14 +430,14 @@ static int cbc_encrypt(struct skcipher_r +@@ -431,14 +431,14 @@ static int cbc_encrypt(struct skcipher_r err = skcipher_walk_virt(&walk, req, true); @@ -70,7 +70,7 @@ Signed-off-by: Thomas Gleixner return err; } -@@ -452,14 +452,14 @@ static int cbc_decrypt(struct skcipher_r +@@ -453,14 +453,14 @@ static int cbc_decrypt(struct skcipher_r err = skcipher_walk_virt(&walk, req, true); @@ -87,7 +87,7 @@ Signed-off-by: Thomas Gleixner return err; } -@@ -509,18 +509,20 @@ static int ctr_crypt(struct skcipher_req +@@ -510,18 +510,20 @@ static int ctr_crypt(struct skcipher_req err = skcipher_walk_virt(&walk, req, true); diff --git a/debian/patches/features/all/rt/x86-highmem-add-a-already-used-pte-check.patch b/debian/patches/features/all/rt/x86-highmem-add-a-already-used-pte-check.patch index 2b527ab44..9d782fbf6 100644 --- a/debian/patches/features/all/rt/x86-highmem-add-a-already-used-pte-check.patch +++ b/debian/patches/features/all/rt/x86-highmem-add-a-already-used-pte-check.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Mon, 11 Mar 2013 17:09:55 +0100 Subject: x86/highmem: Add a "already used pte" check -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz This is a copy from kmap_atomic_prot(). diff --git a/debian/patches/features/all/rt/x86-io-apic-migra-no-unmask.patch b/debian/patches/features/all/rt/x86-io-apic-migra-no-unmask.patch index 442197574..0e24f5860 100644 --- a/debian/patches/features/all/rt/x86-io-apic-migra-no-unmask.patch +++ b/debian/patches/features/all/rt/x86-io-apic-migra-no-unmask.patch @@ -1,7 +1,7 @@ From: Ingo Molnar Date: Fri, 3 Jul 2009 08:29:27 -0500 Subject: x86/ioapic: Do not unmask io_apic when interrupt is in progress -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz With threaded interrupts we might see an interrupt in progress on migration. Do not unmask it when this is the case. @@ -16,7 +16,7 @@ xXx --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c -@@ -1690,7 +1690,8 @@ static bool io_apic_level_ack_pending(st +@@ -1732,7 +1732,8 @@ static bool io_apic_level_ack_pending(st static inline bool ioapic_irqd_mask(struct irq_data *data) { /* If we are moving the irq we need to mask it */ diff --git a/debian/patches/features/all/rt/x86-kvm-require-const-tsc-for-rt.patch b/debian/patches/features/all/rt/x86-kvm-require-const-tsc-for-rt.patch index 93fc9321a..eaa824e33 100644 --- a/debian/patches/features/all/rt/x86-kvm-require-const-tsc-for-rt.patch +++ b/debian/patches/features/all/rt/x86-kvm-require-const-tsc-for-rt.patch @@ -1,7 +1,7 @@ Subject: x86: kvm Require const tsc for RT From: Thomas Gleixner Date: Sun, 06 Nov 2011 12:26:18 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Non constant TSC is a nightmare on bare metal already, but with virtualization it becomes a complete disaster because the workarounds @@ -15,7 +15,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -6138,6 +6138,13 @@ int kvm_arch_init(void *opaque) +@@ -6391,6 +6391,13 @@ int kvm_arch_init(void *opaque) goto out; } diff --git a/debian/patches/features/all/rt/x86-mce-timer-hrtimer.patch b/debian/patches/features/all/rt/x86-mce-timer-hrtimer.patch index 6c59a9ed1..eb019e73f 100644 --- a/debian/patches/features/all/rt/x86-mce-timer-hrtimer.patch +++ b/debian/patches/features/all/rt/x86-mce-timer-hrtimer.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Mon, 13 Dec 2010 16:33:39 +0100 Subject: x86: Convert mce timer to hrtimer -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz mce_timer is started in atomic contexts of cpu bringup. This results in might_sleep() warnings on RT. Convert mce_timer to a hrtimer to @@ -22,12 +22,12 @@ fold in: |[bigeasy: use ULL instead of u64 cast] |Signed-off-by: Sebastian Andrzej Siewior --- - arch/x86/kernel/cpu/mcheck/mce.c | 54 ++++++++++++++++++--------------------- - 1 file changed, 26 insertions(+), 28 deletions(-) + arch/x86/kernel/cpu/mcheck/mce.c | 52 +++++++++++++++++++-------------------- + 1 file changed, 26 insertions(+), 26 deletions(-) --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c -@@ -42,6 +42,7 @@ +@@ -41,6 +41,7 @@ #include #include #include @@ -35,7 +35,7 @@ fold in: #include #include -@@ -1345,7 +1346,7 @@ int memory_failure(unsigned long pfn, in +@@ -1363,7 +1364,7 @@ int memory_failure(unsigned long pfn, in static unsigned long check_interval = INITIAL_CHECK_INTERVAL; static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ @@ -44,7 +44,7 @@ fold in: static unsigned long mce_adjust_timer_default(unsigned long interval) { -@@ -1354,27 +1355,19 @@ static unsigned long mce_adjust_timer_de +@@ -1372,26 +1373,18 @@ static unsigned long mce_adjust_timer_de static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; @@ -58,27 +58,26 @@ fold in: - - if (!timer_pending(t) || time_before(when, t->expires)) - mod_timer(t, round_jiffies(when)); +- +- local_irq_restore(flags); + if (!iv) + return; - -- local_irq_restore(flags); + hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL), + 0, HRTIMER_MODE_REL_PINNED); } --static void mce_timer_fn(unsigned long data) -+static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer) +-static void mce_timer_fn(struct timer_list *t) ++static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer) { -- struct timer_list *t = this_cpu_ptr(&mce_timer); -- int cpu = smp_processor_id(); +- struct timer_list *cpu_t = this_cpu_ptr(&mce_timer); unsigned long iv; -- WARN_ON(cpu != data); +- WARN_ON(cpu_t != t); - iv = __this_cpu_read(mce_next_interval); if (mce_available(this_cpu_ptr(&cpu_info))) { -@@ -1397,7 +1390,11 @@ static void mce_timer_fn(unsigned long d +@@ -1414,7 +1407,11 @@ static void mce_timer_fn(struct timer_li done: __this_cpu_write(mce_next_interval, iv); @@ -91,7 +90,7 @@ fold in: } /* -@@ -1405,7 +1402,7 @@ static void mce_timer_fn(unsigned long d +@@ -1422,7 +1419,7 @@ static void mce_timer_fn(struct timer_li */ void mce_timer_kick(unsigned long interval) { @@ -100,7 +99,7 @@ fold in: unsigned long iv = __this_cpu_read(mce_next_interval); __start_timer(t, interval); -@@ -1420,7 +1417,7 @@ static void mce_timer_delete_all(void) +@@ -1437,7 +1434,7 @@ static void mce_timer_delete_all(void) int cpu; for_each_online_cpu(cpu) @@ -109,7 +108,7 @@ fold in: } /* -@@ -1749,7 +1746,7 @@ static void __mcheck_cpu_clear_vendor(st +@@ -1766,7 +1763,7 @@ static void __mcheck_cpu_clear_vendor(st } } @@ -118,15 +117,14 @@ fold in: { unsigned long iv = check_interval * HZ; -@@ -1762,18 +1759,19 @@ static void mce_start_timer(struct timer +@@ -1779,16 +1776,19 @@ static void mce_start_timer(struct timer static void __mcheck_cpu_setup_timer(void) { - struct timer_list *t = this_cpu_ptr(&mce_timer); -- unsigned int cpu = smp_processor_id(); + struct hrtimer *t = this_cpu_ptr(&mce_timer); -- setup_pinned_timer(t, mce_timer_fn, cpu); +- timer_setup(t, mce_timer_fn, TIMER_PINNED); + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + t->function = mce_timer_fn; } @@ -134,17 +132,16 @@ fold in: static void __mcheck_cpu_init_timer(void) { - struct timer_list *t = this_cpu_ptr(&mce_timer); -- unsigned int cpu = smp_processor_id(); + struct hrtimer *t = this_cpu_ptr(&mce_timer); + + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + t->function = mce_timer_fn; -- setup_pinned_timer(t, mce_timer_fn, cpu); +- timer_setup(t, mce_timer_fn, TIMER_PINNED); mce_start_timer(t); } -@@ -2275,7 +2273,7 @@ static int mce_cpu_dead(unsigned int cpu +@@ -2307,7 +2307,7 @@ static int mce_cpu_dead(unsigned int cpu static int mce_cpu_online(unsigned int cpu) { @@ -153,7 +150,7 @@ fold in: int ret; mce_device_create(cpu); -@@ -2292,10 +2290,10 @@ static int mce_cpu_online(unsigned int c +@@ -2324,10 +2324,10 @@ static int mce_cpu_online(unsigned int c static int mce_cpu_pre_down(unsigned int cpu) { diff --git a/debian/patches/features/all/rt/x86-mce-use-swait-queue-for-mce-wakeups.patch b/debian/patches/features/all/rt/x86-mce-use-swait-queue-for-mce-wakeups.patch index c0f37f305..13ce1fb20 100644 --- a/debian/patches/features/all/rt/x86-mce-use-swait-queue-for-mce-wakeups.patch +++ b/debian/patches/features/all/rt/x86-mce-use-swait-queue-for-mce-wakeups.patch @@ -1,7 +1,7 @@ Subject: x86/mce: use swait queue for mce wakeups From: Steven Rostedt Date: Fri, 27 Feb 2015 15:20:37 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz We had a customer report a lockup on a 3.0-rt kernel that had the following backtrace: diff --git a/debian/patches/features/all/rt/x86-preempt-lazy.patch b/debian/patches/features/all/rt/x86-preempt-lazy.patch index a08d7b750..ad6b5d322 100644 --- a/debian/patches/features/all/rt/x86-preempt-lazy.patch +++ b/debian/patches/features/all/rt/x86-preempt-lazy.patch @@ -1,7 +1,7 @@ Subject: x86: Support for lazy preemption From: Thomas Gleixner Date: Thu, 01 Nov 2012 11:03:47 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Implement the x86 pieces for lazy preempt. @@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig -@@ -170,6 +170,7 @@ config X86 +@@ -174,6 +174,7 @@ config X86 select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -28,7 +28,7 @@ Signed-off-by: Thomas Gleixner select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c -@@ -132,7 +132,7 @@ static long syscall_trace_enter(struct p +@@ -133,7 +133,7 @@ static long syscall_trace_enter(struct p #define EXIT_TO_USERMODE_LOOP_FLAGS \ (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ @@ -37,7 +37,7 @@ Signed-off-by: Thomas Gleixner static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) { -@@ -147,7 +147,7 @@ static void exit_to_usermode_loop(struct +@@ -148,7 +148,7 @@ static void exit_to_usermode_loop(struct /* We have work to do. */ local_irq_enable(); @@ -76,7 +76,7 @@ Signed-off-by: Thomas Gleixner call preempt_schedule_irq --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S -@@ -778,7 +778,23 @@ GLOBAL(swapgs_restore_regs_and_return_to +@@ -698,7 +698,23 @@ GLOBAL(swapgs_restore_regs_and_return_to bt $9, EFLAGS(%rsp) /* were interrupts off? */ jnc 1f 0: cmpl $0, PER_CPU_VAR(__preempt_count) @@ -152,12 +152,12 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_PREEMPT --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h -@@ -55,11 +55,14 @@ struct task_struct; - +@@ -56,17 +56,24 @@ struct task_struct; struct thread_info { unsigned long flags; /* low level flags */ -+ int preempt_lazy_count; /* 0 => lazy preemptable -+ <0 => BUG */ + u32 status; /* thread synchronous flags */ ++ int preempt_lazy_count; /* 0 => lazy preemptable ++ <0 => BUG */ }; #define INIT_THREAD_INFO(tsk) \ @@ -166,8 +166,7 @@ Signed-off-by: Thomas Gleixner + .preempt_lazy_count = 0, \ } - #define init_stack (init_thread_union.stack) -@@ -68,6 +71,10 @@ struct thread_info { + #else /* !__ASSEMBLY__ */ #include @@ -178,7 +177,7 @@ Signed-off-by: Thomas Gleixner #endif /* -@@ -83,6 +90,7 @@ struct thread_info { +@@ -82,6 +89,7 @@ struct thread_info { #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ @@ -186,7 +185,7 @@ Signed-off-by: Thomas Gleixner #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_PATCH_PENDING 13 /* pending live patching update */ -@@ -109,6 +117,7 @@ struct thread_info { +@@ -108,6 +116,7 @@ struct thread_info { #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) @@ -194,7 +193,7 @@ Signed-off-by: Thomas Gleixner #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) -@@ -150,6 +159,8 @@ struct thread_info { +@@ -149,6 +158,8 @@ struct thread_info { #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/debian/patches/features/all/rt/x86-signal-delay-calling-signals-on-32bit.patch b/debian/patches/features/all/rt/x86-signal-delay-calling-signals-on-32bit.patch index afe61e29f..d91d9b25c 100644 --- a/debian/patches/features/all/rt/x86-signal-delay-calling-signals-on-32bit.patch +++ b/debian/patches/features/all/rt/x86-signal-delay-calling-signals-on-32bit.patch @@ -1,7 +1,7 @@ From: Yang Shi Date: Thu, 10 Dec 2015 10:58:51 -0800 Subject: x86/signal: delay calling signals on 32bit -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz When running some ptrace single step tests on x86-32 machine, the below problem is triggered: diff --git a/debian/patches/features/all/rt/x86-stackprot-no-random-on-rt.patch b/debian/patches/features/all/rt/x86-stackprot-no-random-on-rt.patch index 5718dadb9..eccd00d1e 100644 --- a/debian/patches/features/all/rt/x86-stackprot-no-random-on-rt.patch +++ b/debian/patches/features/all/rt/x86-stackprot-no-random-on-rt.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Thu, 16 Dec 2010 14:25:18 +0100 Subject: x86: stackprotector: Avoid random pool on rt -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz CPU bringup calls into the random pool to initialize the stack canary. During boot that works nicely even on RT as the might sleep diff --git a/debian/patches/features/all/rt/x86-use-gen-rwsem-spinlocks-rt.patch b/debian/patches/features/all/rt/x86-use-gen-rwsem-spinlocks-rt.patch index ff26f5489..1575c5895 100644 --- a/debian/patches/features/all/rt/x86-use-gen-rwsem-spinlocks-rt.patch +++ b/debian/patches/features/all/rt/x86-use-gen-rwsem-spinlocks-rt.patch @@ -1,7 +1,7 @@ From: Thomas Gleixner Date: Sun, 26 Jul 2009 02:21:32 +0200 Subject: x86: Use generic rwsem_spinlocks on -rt -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz Simplifies the separation of anon_rw_semaphores and rw_semaphores for -rt. @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig -@@ -256,8 +256,11 @@ config ARCH_MAY_HAVE_PC_FDC +@@ -260,8 +260,11 @@ config ARCH_MAY_HAVE_PC_FDC def_bool y depends on ISA_DMA_API diff --git a/debian/patches/features/all/rt/xen-9pfs-don-t-inclide-rwlock.h-directly.patch b/debian/patches/features/all/rt/xen-9pfs-don-t-inclide-rwlock.h-directly.patch index ade2ebfea..44d282689 100644 --- a/debian/patches/features/all/rt/xen-9pfs-don-t-inclide-rwlock.h-directly.patch +++ b/debian/patches/features/all/rt/xen-9pfs-don-t-inclide-rwlock.h-directly.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior Date: Thu, 5 Oct 2017 14:38:52 +0200 Subject: [PATCH] xen/9pfs: don't inclide rwlock.h directly. -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz rwlock.h should not be included directly. Instead linux/splinlock.h should be included. One thing it does is to break the RT build. diff --git a/debian/patches/series-rt b/debian/patches/series-rt index 2eabef452..dc4f51def 100644 --- a/debian/patches/series-rt +++ b/debian/patches/series-rt @@ -3,258 +3,239 @@ ########################################################### ############################################################ -# UPSTREAM changes queued +# UPSTREAM merged ############################################################ -features/all/rt/rcu-Suppress-lockdep-false-positive-boost_mtx-compla.patch +#AMD-iommu +features/all/rt/0001-iommu-amd-Use-raw-locks-on-atomic-context-paths.patch +features/all/rt/0002-iommu-amd-Don-t-use-dev_data-in-irte_ga_set_affinity.patch +features/all/rt/0003-iommu-amd-Avoid-locking-get_irq_table-from-atomic-co.patch +features/all/rt/0001-iommu-amd-Take-into-account-that-alloc_dev_data-may-.patch +features/all/rt/0002-iommu-amd-Turn-dev_data_list-into-a-lock-less-list.patch +features/all/rt/0003-iommu-amd-Split-domain-id-out-of-amd_iommu_devtable_.patch +features/all/rt/0004-iommu-amd-Split-irq_lookup_table-out-of-the-amd_iomm.patch +features/all/rt/0005-iommu-amd-Remove-the-special-case-from-alloc_irq_tab.patch +features/all/rt/0006-iommu-amd-Use-table-instead-irt-as-variable-name-in-.patch +features/all/rt/0007-iommu-amd-Factor-out-setting-the-remap-table-for-a-d.patch +features/all/rt/0008-iommu-amd-Drop-the-lock-while-allocating-new-irq-rem.patch +features/all/rt/0009-iommu-amd-Make-amd_iommu_devtable_lock-a-spin_lock.patch +features/all/rt/0010-iommu-amd-Return-proper-error-code-in-irq_remapping_.patch -############################################################ -# UPSTREAM FIXES, patches pending -############################################################ +# dcache +features/all/rt/take-write_seqcount_invalidate-into-__d_drop.patch +features/all/rt/fs-dcache-Remove-stale-comment-from-dentry_kill.patch +features/all/rt/fs-dcache-Move-dentry_kill-below-lock_parent.patch +features/all/rt/d_delete-get-rid-of-trylock-loop.patch +features/all/rt/0001-get-rid-of-trylock-loop-in-locking-dentries-on-shrin.patch +features/all/rt/0002-now-lock_parent-can-t-run-into-killed-dentry.patch +features/all/rt/0003-split-the-slow-part-of-lock_parent-off.patch +features/all/rt/0004-dput-consolidate-the-do-we-need-to-retain-it-into-an.patch +features/all/rt/0005-handle-move-to-LRU-in-retain_dentry.patch +features/all/rt/0006-get-rid-of-trylock-loop-around-dentry_kill.patch +features/all/rt/0007-fs-dcache-Avoid-a-try_lock-loop-in-shrink_dentry_lis.patch +features/all/rt/0008-dcache.c-trim-includes.patch +features/all/rt/0009-split-d_path-and-friends-into-a-separate-file.patch +features/all/rt/0010-take-out-orphan-externs-empty_string-slash_string.patch +features/all/rt/0011-fold-lookup_real-into-__lookup_hash.patch +features/all/rt/0012-debugfs_lookup-switch-to-lookup_one_len_unlocked.patch +features/all/rt/0013-lustre-get-rid-of-pointless-casts-to-struct-dentry.patch +features/all/rt/0014-oprofilefs-don-t-oops-on-allocation-failure.patch +features/all/rt/0015-make-non-exchanging-__d_move-copy-d_parent-rather-th.patch +features/all/rt/0016-fold-dentry_lock_for_move-into-its-sole-caller-and-c.patch +features/all/rt/0017-d_genocide-move-export-to-definition.patch -############################################################ -# Stuff broken upstream, patches submitted -############################################################ +# tracing: Inter-event (e.g. latency) support +# cherry-picked from Steven's tracing tree. +features/all/rt/0009-tracing-Move-hist-trigger-Documentation-to-histogram.patch +features/all/rt/0010-tracing-Add-Documentation-for-log2-modifier.patch +features/all/rt/0011-tracing-Add-support-to-detect-and-avoid-duplicates.patch +features/all/rt/0012-tracing-Remove-code-which-merges-duplicates.patch +features/all/rt/0013-ring-buffer-Add-interface-for-setting-absolute-time-.patch +features/all/rt/0014-ring-buffer-Redefine-the-unimplemented-RINGBUF_TYPE_.patch +features/all/rt/0015-tracing-Add-timestamp_mode-trace-file.patch +features/all/rt/0016-tracing-Give-event-triggers-access-to-ring_buffer_ev.patch +features/all/rt/0017-tracing-Add-ring-buffer-event-param-to-hist-field-fu.patch +features/all/rt/0018-tracing-Break-out-hist-trigger-assignment-parsing.patch +features/all/rt/0019-tracing-Add-hist-trigger-timestamp-support.patch +features/all/rt/0020-tracing-Add-per-element-variable-support-to-tracing_.patch +features/all/rt/0021-tracing-Add-hist_data-member-to-hist_field.patch +features/all/rt/0022-tracing-Add-usecs-modifier-for-hist-trigger-timestam.patch +features/all/rt/0023-tracing-Add-variable-support-to-hist-triggers.patch +features/all/rt/0024-tracing-Account-for-variables-in-named-trigger-compa.patch +features/all/rt/0025-tracing-Move-get_hist_field_flags.patch +features/all/rt/0026-tracing-Add-simple-expression-support-to-hist-trigge.patch +features/all/rt/0027-tracing-Generalize-per-element-hist-trigger-data.patch +features/all/rt/0028-tracing-Pass-tracing_map_elt-to-hist_field-accessor-.patch +features/all/rt/0029-tracing-Add-hist_field-type-field.patch +features/all/rt/0030-tracing-Add-variable-reference-handling-to-hist-trig.patch +features/all/rt/0031-tracing-Add-hist-trigger-action-hook.patch +features/all/rt/0032-tracing-Add-support-for-synthetic-events.patch +features/all/rt/0033-tracing-Add-support-for-field-variables.patch +features/all/rt/0034-tracing-Add-onmatch-hist-trigger-action-support.patch +features/all/rt/0035-tracing-Add-onmax-hist-trigger-action-support.patch +features/all/rt/0036-tracing-Allow-whitespace-to-surround-hist-trigger-fi.patch +features/all/rt/0037-tracing-Add-cpu-field-for-hist-triggers.patch +features/all/rt/0038-tracing-Add-hist-trigger-support-for-variable-refere.patch +features/all/rt/0039-tracing-Add-last-error-error-facility-for-hist-trigg.patch +features/all/rt/0040-tracing-Add-inter-event-hist-trigger-Documentation.patch +features/all/rt/0041-tracing-Make-tracing_set_clock-non-static.patch +features/all/rt/0042-tracing-Add-a-clock-attribute-for-hist-triggers.patch +features/all/rt/0045-ring-buffer-Add-nesting-for-adding-events-within-eve.patch +features/all/rt/0046-tracing-Use-the-ring-buffer-nesting-to-allow-synthet.patch +features/all/rt/0047-tracing-Add-inter-event-blurb-to-HIST_TRIGGERS-confi.patch +features/all/rt/0048-selftests-ftrace-Add-inter-event-hist-triggers-testc.patch -# soft hrtimer patches (v4) -features/all/rt/0001-timers-Use-static-keys-for-migrate_enable-nohz_activ.patch -features/all/rt/0002-hrtimer-Correct-blantanly-wrong-comment.patch -features/all/rt/0003-hrtimer-Fix-kerneldoc-for-struct-hrtimer_cpu_base.patch -features/all/rt/0004-hrtimer-Cleanup-clock-argument-in-schedule_hrtimeout.patch -features/all/rt/0005-hrtimer-Fix-hrtimer-function-description.patch -features/all/rt/0006-hrtimer-Ensure-POSIX-compliance-relative-CLOCK_REALT.patch -features/all/rt/0007-hrtimer-Cleanup-hrtimer_mode-enum.patch -features/all/rt/0008-tracing-hrtimer-Take-all-clock-bases-and-modes-into-.patch -features/all/rt/0009-tracing-hrtimer-Print-hrtimer-mode-in-hrtimer_start-.patch -features/all/rt/0010-hrtimer-Switch-for-loop-to-_ffs-evaluation.patch -features/all/rt/0011-hrtimer-Store-running-timer-in-hrtimer_clock_base.patch -features/all/rt/0012-hrtimer-Make-room-in-struct-hrtimer_cpu_base.patch -features/all/rt/0013-hrtimer-Reduce-conditional-code-hres_active.patch -features/all/rt/0014-hrtimer-Use-accesor-functions-instead-of-direct-acce.patch -features/all/rt/0015-hrtimer-Make-the-remote-enqueue-check-unconditional.patch -features/all/rt/0016-hrtimer-Make-hrtimer_cpu_base.next_timer-handling-un.patch -features/all/rt/0017-hrtimer-Make-hrtimer_reprogramm-unconditional.patch -features/all/rt/0018-hrtimer-Make-hrtimer_force_reprogramm-unconditionall.patch -features/all/rt/0019-hrtimer-Unify-handling-of-hrtimer-remove.patch -features/all/rt/0020-hrtimer-Unify-handling-of-remote-enqueue.patch -features/all/rt/0021-hrtimer-Make-remote-enqueue-decision-less-restrictiv.patch -features/all/rt/0022-hrtimer-Remove-base-argument-from-hrtimer_reprogram.patch -features/all/rt/0023-hrtimer-Split-hrtimer_start_range_ns.patch -features/all/rt/0024-hrtimer-Split-__hrtimer_get_next_event.patch -features/all/rt/0025-hrtimer-Use-irqsave-irqrestore-around-__run_hrtimer.patch -features/all/rt/0026-hrtimer-Add-clock-bases-and-hrtimer-mode-for-soft-ir.patch -features/all/rt/0027-hrtimer-Prepare-handling-of-hard-and-softirq-based-h.patch -features/all/rt/0028-hrtimer-Implement-support-for-softirq-based-hrtimers.patch -features/all/rt/0029-hrtimer-Implement-SOFT-HARD-clock-base-selection.patch -features/all/rt/0030-can-bcm-Replace-hrtimer_tasklet-with-softirq-based-h.patch -features/all/rt/0031-mac80211_hwsim-Replace-hrtimer-tasklet-with-softirq-.patch -features/all/rt/0032-xfrm-Replace-hrtimer-tasklet-with-softirq-hrtimer.patch -features/all/rt/0033-softirq-Remove-tasklet_hrtimer.patch -features/all/rt/0034-ALSA-dummy-Replace-tasklet-with-softirq-hrtimer.patch -features/all/rt/0035-usb-gadget-NCM-Replace-tasklet-with-softirq-hrtimer.patch -features/all/rt/0036-net-mvpp2-Replace-tasklet-with-softirq-hrtimer.patch - -# Those two should vanish soon (not use PIT during bootup) -features/all/rt/at91_dont_enable_disable_clock.patch - -############################################################ -# Stuff which needs addressing upstream, but requires more -# information -############################################################ -features/all/rt/rfc-arm-smp-__cpu_disable-fix-sleeping-function-called-from-invalid-context.patch - -############################################################ -# Stuff broken upstream, need to be sent -############################################################ -features/all/rt/rtmutex--Handle-non-enqueued-waiters-gracefully.patch -features/all/rt/rbtree-include-rcu.h-because-we-use-it.patch +#misc +features/all/rt/locking-rtmutex-Handle-non-enqueued-waiters-graceful.patch +features/all/rt/sched-Remove-TASK_ALL.patch features/all/rt/rxrpc-remove-unused-static-variables.patch +features/all/rt/drivers-net-8139-disable-irq-nosync.patch +features/all/rt/delayacct-use-raw_spinlocks.patch +features/all/rt/stop-machine-raw-lock.patch +features/all/rt/mmci-remove-bogus-irq-save.patch + +############################################################ +# POSTED by others +############################################################ +# AT91 +# Alexandre Belloni | [PATCH v4 0/6] clocksource: rework Atmel TCB timer driver +# Date: Wed, 18 Apr 2018 12:51:37 +0200 +features/all/rt/0001-ARM-at91-add-TCB-registers-definitions.patch +features/all/rt/0002-clocksource-drivers-Add-a-new-driver-for-the-Atmel-A.patch +features/all/rt/0003-clocksource-drivers-atmel-pit-make-option-silent.patch +features/all/rt/0004-ARM-at91-Implement-clocksource-selection.patch +features/all/rt/0005-ARM-configs-at91-use-new-TCB-timer-driver.patch +features/all/rt/0006-ARM-configs-at91-unselect-PIT.patch + +############################################################ +# POSTED +############################################################ +features/all/rt/xen-9pfs-don-t-inclide-rwlock.h-directly.patch +features/all/rt/tty-serial-atmel-use-port-name-as-name-in-request_ir.patch +features/all/rt/rbtree-include-rcu.h-because-we-use-it.patch features/all/rt/mfd-syscon-atmel-smc-include-string.h.patch features/all/rt/sched-swait-include-wait.h.patch +features/all/rt/Revert-mm-vmstat.c-fix-vmstat_update-preemption-BUG.patch +features/all/rt/arm-convert-boot-lock-to-raw.patch +features/all/rt/x86-io-apic-migra-no-unmask.patch +features/all/rt/ACPICA-provide-abstraction-for-raw_spinlock_t.patch +features/all/rt/ACPICA-Convert-acpi_gbl_hardware-lock-back-to-an-acp.patch -# Wants a different fix for upstream +############################################################ +# Ready for posting +############################################################ +features/all/rt/arm-kprobe-replace-patch_lock-to-raw-lock.patch +features/all/rt/x86-mce-timer-hrtimer.patch +features/all/rt/x86-mce-use-swait-queue-for-mce-wakeups.patch +features/all/rt/x86-UV-raw_spinlock-conversion.patch +features/all/rt/arm-unwind-use_raw_lock.patch + +############################################################ +# Needs to address review feedback +############################################################ +features/all/rt/posix-timers-no-broadcast.patch + +############################################################ +# Almost ready, needs final polishing +############################################################ +features/all/rt/rfc-arm-smp-__cpu_disable-fix-sleeping-function-called-from-invalid-context.patch +features/all/rt/drivers-random-reduce-preempt-disabled-region.patch +features/all/rt/mm-page_alloc-rt-friendly-per-cpu-pages.patch +features/all/rt/mm-page_alloc-reduce-lock-sections-further.patch + +############################################################### +# Stuff broken upstream and upstream wants something different +############################################################### features/all/rt/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch - -############################################################ -# Submitted on LKML -############################################################ -features/all/rt/Bluetooth-avoid-recursive-locking-in-hci_send_to_cha.patch -features/all/rt/iommu-iova-Use-raw_cpu_ptr-instead-of-get_cpu_ptr-fo.patch -features/all/rt/greybus-audio-don-t-inclide-rwlock.h-directly.patch -features/all/rt/xen-9pfs-don-t-inclide-rwlock.h-directly.patch -features/all/rt/tty-goldfish-Enable-earlycon-only-if-built-in.patch -features/all/rt/drm-i915-properly-init-lockdep-class.patch -features/all/rt/timerqueue-Document-return-values-of-timerqueue_add-.patch - -# SPARC part of erly printk consolidation -features/all/rt/sparc64-use-generic-rwsem-spinlocks-rt.patch - -# SRCU -# XXX features/all/rt/kernel-SRCU-provide-a-static-initializer.patch - -############################################################ -# Submitted to mips ML -############################################################ - -############################################################ -# Submitted to ARM ML -############################################################ - -############################################################ -# Submitted to PPC ML -############################################################ - -############################################################ -# Submitted on LKML -############################################################ - -############################################################ -# Submitted to net-dev -############################################################ - -############################################################ -# Pending in tip -############################################################ - -############################################################ -# Stuff which should go upstream ASAP -############################################################ features/all/rt/kernel-sched-Provide-a-pointer-to-the-valid-CPU-mask.patch features/all/rt/add_migrate_disable.patch -# tracing: Bug fixes and minor cleanup | 2017-09-22 -features/all/rt/0001-tracing-Steve-s-unofficial-trace_recursive_lock-patc.patch -features/all/rt/0002-tracing-Reverse-the-order-of-trace_types_lock-and-ev.patch -features/all/rt/0004-tracing-Remove-lookups-from-tracing_map-hitcount.patch -features/all/rt/0005-tracing-Increase-tracing-map-KEYS_MAX-size.patch -features/all/rt/0006-tracing-Make-traceprobe-parsing-code-reusable.patch -features/all/rt/0007-tracing-Clean-up-hist_field_flags-enum.patch -features/all/rt/0008-tracing-Add-hist_field_name-accessor.patch -features/all/rt/0009-tracing-Reimplement-log2.patch -# v7 tracing: Inter-event (e.g. latency) support | 2017-12-06 -features/all/rt/0001-tracing-Move-hist-trigger-Documentation-to-histogram.patch -features/all/rt/0002-tracing-Add-Documentation-for-log2-modifier.patch -features/all/rt/0003-tracing-Add-support-to-detect-and-avoid-duplicates.patch -features/all/rt/0004-tracing-Remove-code-which-merges-duplicates.patch -features/all/rt/0005-ring-buffer-Add-interface-for-setting-absolute-time-.patch -features/all/rt/0006-ring-buffer-Redefine-the-unimplemented-RINGBUF_TYPE_.patch -features/all/rt/0007-tracing-Add-timestamp_mode-trace-file.patch -features/all/rt/0008-tracing-Give-event-triggers-access-to-ring_buffer_ev.patch -features/all/rt/0009-tracing-Add-ring-buffer-event-param-to-hist-field-fu.patch -features/all/rt/0010-tracing-Break-out-hist-trigger-assignment-parsing.patch -features/all/rt/0011-tracing-Add-hist-trigger-timestamp-support.patch -features/all/rt/0012-tracing-Add-per-element-variable-support-to-tracing_.patch -features/all/rt/0013-tracing-Add-hist_data-member-to-hist_field.patch -features/all/rt/0014-tracing-Add-usecs-modifier-for-hist-trigger-timestam.patch -features/all/rt/0015-tracing-Add-variable-support-to-hist-triggers.patch -features/all/rt/0016-tracing-Account-for-variables-in-named-trigger-compa.patch -features/all/rt/0017-tracing-Move-get_hist_field_flags.patch -features/all/rt/0018-tracing-Add-simple-expression-support-to-hist-trigge.patch -features/all/rt/0019-tracing-Generalize-per-element-hist-trigger-data.patch -features/all/rt/0020-tracing-Pass-tracing_map_elt-to-hist_field-accessor-.patch -features/all/rt/0021-tracing-Add-hist_field-type-field.patch -features/all/rt/0022-tracing-Add-variable-reference-handling-to-hist-trig.patch -features/all/rt/0023-tracing-Add-hist-trigger-action-hook.patch -features/all/rt/0024-tracing-Add-support-for-synthetic-events.patch -features/all/rt/0025-tracing-Add-support-for-field-variables.patch -features/all/rt/0026-tracing-Add-onmatch-hist-trigger-action-support.patch -features/all/rt/0027-tracing-Add-onmax-hist-trigger-action-support.patch -features/all/rt/0028-tracing-Allow-whitespace-to-surround-hist-trigger-fi.patch -features/all/rt/0029-tracing-Add-cpu-field-for-hist-triggers.patch -features/all/rt/0030-tracing-Add-hist-trigger-support-for-variable-refere.patch -features/all/rt/0031-tracing-Add-last-error-error-facility-for-hist-trigg.patch -features/all/rt/0032-tracing-Add-inter-event-hist-trigger-Documentation.patch -features/all/rt/0033-tracing-Make-tracing_set_clock-non-static.patch -features/all/rt/0034-tracing-Add-a-clock-attribute-for-hist-triggers.patch -features/all/rt/0035-tracing-Increase-trace_recursive_lock-limit-for-synt.patch -features/all/rt/0036-tracing-Add-inter-event-blurb-to-HIST_TRIGGERS-confi.patch -features/all/rt/0037-selftests-ftrace-Add-inter-event-hist-triggers-testc.patch - -# SCHED BLOCK/WQ -features/all/rt/block-shorten-interrupt-disabled-regions.patch +# Those two should vanish soon (not use PIT during bootup) +features/all/rt/at91_dont_enable_disable_clock.patch +features/all/rt/clocksource-tclib-allow-higher-clockrates.patch # Timekeeping split jiffies lock. Needs a good argument :) features/all/rt/timekeeping-split-jiffies-lock.patch -# Tracing -features/all/rt/tracing-account-for-preempt-off-in-preempt_schedule.patch - # PTRACE/SIGNAL crap features/all/rt/signal-revert-ptrace-preempt-magic.patch -# ARM lock annotation -features/all/rt/arm-convert-boot-lock-to-raw.patch -features/all/rt/arm-kprobe-replace-patch_lock-to-raw-lock.patch - -# PREEMPT_ENABLE_NO_RESCHED - -# SIGNALS / POSIXTIMERS -features/all/rt/posix-timers-no-broadcast.patch -features/all/rt/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch - -# SCHED - -# GENERIC CMPXCHG - -# SHORTEN PREEMPT DISABLED -features/all/rt/drivers-random-reduce-preempt-disabled-region.patch - -# CLOCKSOURCE -features/all/rt/arm-at91-pit-remove-irq-handler-when-clock-is-unused.patch -features/all/rt/clockevents-drivers-timer-atmel-pit-fix-double-free_.patch -features/all/rt/clocksource-tclib-allow-higher-clockrates.patch - -# DRIVERS NET -features/all/rt/drivers-net-8139-disable-irq-nosync.patch - -# PREEMPT - # PM features/all/rt/suspend-prevernt-might-sleep-splats.patch # NETWORKING -features/all/rt/net-prevent-abba-deadlock.patch features/all/rt/net-sched-dev_deactivate_many-use-msleep-1-instead-o.patch -features/all/rt/net_disable_NET_RX_BUSY_POLL.patch -# X86 -features/all/rt/x86-io-apic-migra-no-unmask.patch - -# RCU - -# LOCKING INIT FIXES - -# WORKQUEUE - -##################################################### -# Stuff which should go mainline, but wants some care -##################################################### -features/all/rt/rcu-segcblist-include-rcupdate.h.patch - -# SEQLOCK - -# ANON RW SEMAPHORES +# The removal of NONRT helpers +features/all/rt/alim15x3-move-irq-restore-before-pci_dev_put.patch +features/all/rt/ide-don-t-disable-interrupts-if-they-are-already-dis.patch +features/all/rt/ide-don-t-disable-interrupts-during-kmap_atomic.patch +features/all/rt/ide-don-t-enable-disable-interrupts-in-force-threade.patch +features/all/rt/block-avoid-disabling-interrupts-during-kmap_atomic.patch +features/all/rt/block-Remove-redundant-WARN_ON.patch +features/all/rt/block-shorten-interrupt-disabled-regions.patch +features/all/rt/dm-rq-remove-BUG_ON-irqs_disabled-check.patch +features/all/rt/kernel-signal-Remove-no-longer-required-irqsave-rest.patch +features/all/rt/ntfs-avoid-disabling-interrupts-during-kmap_atomic.patch +features/all/rt/include-linux-u64_stats_sync.h-Remove-functions-with.patch +features/all/rt/IB-ipoib-replace-local_irq_disable-with-proper-locki.patch +features/all/rt/SCSI-libsas-remove-irq-save-in-sas_ata_qc_issue.patch +features/all/rt/SCSI-qla2xxx-remove-irq-save-in-qla2x00_poll.patch +features/all/rt/libata-remove-ata_sff_data_xfer_noirq.patch +features/all/rt/net-3com-3c59x-Move-boomerang-vortex-conditional-int.patch +features/all/rt/net-3com-3c59x-Pull-locking-out-of-ISR.patch +features/all/rt/net-3com-3c59x-irq-save-variant-of-ISR.patch +features/all/rt/posix-cpu-timers-remove-lockdep_assert_irqs_disabled.patch +features/all/rt/ALSA-pcm-Hide-local_irq_disable-enable-and-local_irq.patch +features/all/rt/usb-do-not-disable-interrupts-in-giveback.patch +features/all/rt/percpu_ida-Use-_irqsave-instead-of-local_irq_save-sp.patch +#atomic_dec_and_lock_irqsave +features/all/rt/spinlock-atomic_dec_and_lock-Add-an-irqsave-variant.patch +features/all/rt/mm-backing-dev-Use-irqsave-variant-of-atomic_dec_and.patch +features/all/rt/kernel-user-Use-irqsave-variant-of-atomic_dec_and_lo.patch +features/all/rt/drivers-md-raid5-Use-irqsave-variant-of-atomic_dec_a.patch +features/all/rt/drivers-md-raid5-Do-not-disable-irq-on-release_inact.patch +#iommu +features/all/rt/iommu-amd-hide-unused-iommu_table_lock.patch +features/all/rt/iommu-amd-Prevent-possible-null-pointer-dereference-.patch +features/all/rt/iommu-amd-Cleanup-locking-in-__attach-detach_device.patch +features/all/rt/iommu-amd-Do-not-flush-when-device-is-busy.patch +#net +features/all/rt/lockdep-Add-a-assert_in_softirq.patch +features/all/rt/net-mac808211-rc-warn_on.patch ################################################## # REAL RT STUFF starts here ################################################## +# Kconfig on/off +features/all/rt/rt-preempt-base-config.patch +features/all/rt/kconfig-preempt-rt-full.patch +features/all/rt/cpumask-disable-offstack-on-rt.patch +features/all/rt/jump-label-rt.patch +features/all/rt/kconfig-disable-a-few-options-rt.patch +features/all/rt/lockdep-disable-self-test.patch +features/all/rt/mm-disable-sloub-rt.patch +features/all/rt/mutex-no-spin-on-rt.patch +features/all/rt/rcu-disable-rcu-fast-no-hz-on-rt.patch +features/all/rt/rcu-make-RCU_BOOST-default-on-RT.patch +features/all/rt/sched-disable-rt-group-sched-on-rt.patch +features/all/rt/net_disable_NET_RX_BUSY_POLL.patch +features/all/rt/arm-disable-NEON-in-kernel-mode.patch +features/all/rt/arm64-xen--Make-XEN-depend-on-non-rt.patch +features/all/rt/power-use-generic-rwsem-on-rt.patch +features/all/rt/powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch +features/all/rt/power-disable-highmem-on-rt.patch +features/all/rt/mips-disable-highmem-on-rt.patch +features/all/rt/x86-use-gen-rwsem-spinlocks-rt.patch +features/all/rt/leds-trigger-disable-CPU-trigger-on-RT.patch +features/all/rt/cpufreq-drop-K8-s-driver-from-beeing-selected.patch +features/all/rt/md-disable-bcache.patch + # PRINTK features/all/rt/printk-kill.patch features/all/rt/printk-27force_early_printk-27-boot-param-to-help-with-debugging.patch -# Enable RT CONFIG -features/all/rt/rt-preempt-base-config.patch -features/all/rt/kconfig-disable-a-few-options-rt.patch -features/all/rt/kconfig-preempt-rt-full.patch - -# WARN/BUG_ON_RT -features/all/rt/bug-rt-dependend-variants.patch -features/all/rt/iommu-amd--Use-WARN_ON_NORT.patch - -# LOCAL_IRQ_RT/NON_RT -features/all/rt/local-irq-rt-depending-variants.patch - # PREEMPT NORT features/all/rt/preempt-nort-rt-variants.patch @@ -263,23 +244,13 @@ features/all/rt/futex-workaround-migrate_disable-enable-in-different.patch features/all/rt/rt-local-irq-lock.patch # ANNOTATE local_irq_disable sites -features/all/rt/ata-disable-interrupts-if-non-rt.patch -features/all/rt/ide-use-nort-local-irq-variants.patch -features/all/rt/infiniband-mellanox-ib-use-nort-irq.patch -features/all/rt/inpt-gameport-use-local-irq-nort.patch -features/all/rt/user-use-local-irq-nort.patch -features/all/rt/usb-use-_nort-in-giveback.patch features/all/rt/mm-scatterlist-dont-disable-irqs-on-RT.patch features/all/rt/mm-workingset-do-not-protect-workingset_shadow_nodes.patch # Sigh -features/all/rt/signal-fix-up-rcu-wreckage.patch features/all/rt/oleg-signal-rt-fix.patch features/all/rt/x86-signal-delay-calling-signals-on-32bit.patch -# ANNOTATE BUG/WARNON -features/all/rt/net-wireless-warn-nort.patch - # BIT SPINLOCKS - SIGH features/all/rt/fs-replace-bh_uptodate_lock-for-rt.patch features/all/rt/fs-jbd-replace-bh_state-lock.patch @@ -290,16 +261,6 @@ features/all/rt/list_bl-fixup-bogus-lockdep-warning.patch features/all/rt/genirq-disable-irqpoll-on-rt.patch features/all/rt/genirq-force-threading.patch -# DRIVERS NET -features/all/rt/drivers-net-vortex-fix-locking-issues.patch - -# ACCT -features/all/rt/delayacct-use-raw_spinlocks.patch - -# MM PAGE_ALLOC -features/all/rt/mm-page_alloc-rt-friendly-per-cpu-pages.patch -features/all/rt/mm-page_alloc-reduce-lock-sections-further.patch - # MM SWAP features/all/rt/mm-convert-swap-to-percpu-locked.patch features/all/rt/mm-perform-lru_add_drain_all-remotely.patch @@ -310,29 +271,19 @@ features/all/rt/mm-make-vmstat-rt-aware.patch # MM memory features/all/rt/re-preempt_rt_full-arm-coredump-fails-for-cpu-3e-3d-4.patch -# MM bounce -features/all/rt/mm-bounce-local-irq-save-nort.patch - # MM SLxB -features/all/rt/mm-disable-sloub-rt.patch features/all/rt/mm-enable-slub.patch -features/all/rt/mm-slub-close-possible-memory-leak-in-kmem_cache_all.patch features/all/rt/slub-enable-irqs-for-no-wait.patch features/all/rt/slub-disable-SLUB_CPU_PARTIAL.patch # MM features/all/rt/mm-page-alloc-use-local-lock-on-target-cpu.patch -features/all/rt/mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch features/all/rt/mm-memcontrol-do_not_disable_irq.patch -features/all/rt/mm-backing-dev-don-t-disable-IRQs-in-wb_congested_pu.patch features/all/rt/mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch # RADIX TREE features/all/rt/radix-tree-use-local-locks.patch -# PANIC -features/all/rt/panic-disable-random-on-rt.patch - # TIMERS features/all/rt/timers-prepare-for-full-preemption.patch features/all/rt/timer-delay-waking-softirqs-from-the-jiffy-tick.patch @@ -353,6 +304,7 @@ features/all/rt/time-hrtimer-avoid-schedule_work-with-interrupts-dis.patch features/all/rt/hrtimer-consolidate-hrtimer_init-hrtimer_init_sleepe.patch features/all/rt/hrtimers-prepare-full-preemption.patch features/all/rt/hrtimer-by-timers-by-default-into-the-softirq-context.patch +features/all/rt/posix-timers-move-the-rcu-head-out-of-the-union.patch features/all/rt/hrtimer-Move-schedule_work-call-to-helper-thread.patch features/all/rt/timer-fd-avoid-live-lock.patch @@ -366,36 +318,27 @@ features/all/rt/sched-mmdrop-delayed.patch features/all/rt/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch features/all/rt/sched-rt-mutex-wakeup.patch features/all/rt/sched-Prevent-task-state-corruption-by-spurious-lock.patch -features/all/rt/sched-Remove-TASK_ALL.patch features/all/rt/sched-might-sleep-do-not-account-rcu-depth.patch features/all/rt/cond-resched-softirq-rt.patch features/all/rt/cond-resched-lock-rt-tweak.patch features/all/rt/sched-disable-ttwu-queue.patch -features/all/rt/sched-disable-rt-group-sched-on-rt.patch features/all/rt/sched-ttwu-ensure-success-return-is-correct.patch features/all/rt/sched-workqueue-Only-wake-up-idle-workers-if-not-blo.patch features/all/rt/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch -# STOP MACHINE -features/all/rt/stop_machine-convert-stop_machine_run-to-PREEMPT_RT.patch -features/all/rt/stop-machine-raw-lock.patch - # MIGRATE DISABLE AND PER CPU features/all/rt/hotplug-light-get-online-cpus.patch features/all/rt/ftrace-migrate-disable-tracing.patch -# NOHZ - # LOCKDEP features/all/rt/lockdep-no-softirq-accounting-on-rt.patch -features/all/rt/lockdep-disable-self-test.patch # SOFTIRQ -features/all/rt/mutex-no-spin-on-rt.patch features/all/rt/tasklet-rt-prevent-tasklets-from-going-into-infinite-spin-in-rt.patch features/all/rt/softirq-preempt-fix-3-re.patch features/all/rt/softirq-disable-softirq-stacks-for-rt.patch features/all/rt/softirq-split-locks.patch +features/all/rt/net-core-use-local_bh_disable-in-netif_rx_ni.patch features/all/rt/kernel-softirq-unlock-with-irqs-on.patch features/all/rt/irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch features/all/rt/softirq-split-timer-softirqs-out-of-ksoftirqd.patch @@ -432,6 +375,8 @@ features/all/rt/locking-rt-mutex-fix-deadlock-in-device-mapper-block.patch features/all/rt/locking-rtmutex-re-init-the-wait_lock-in-rt_mutex_in.patch features/all/rt/ptrace-fix-ptrace-vs-tasklist_lock-race.patch features/all/rt/RCU-we-need-to-skip-that-warning-but-only-on-sleepin.patch +features/all/rt/RCU-skip-the-schedule-in-RCU-section-warning-on-UP-t.patch +features/all/rt/rtmutex-annotate-sleeping-lock-context.patch features/all/rt/locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch # RCU @@ -450,19 +395,10 @@ features/all/rt/tty-serial-8250-don-t-take-the-trylock-during-oops.patch features/all/rt/peterz-percpu-rwsem-rt.patch features/all/rt/fs-namespace-preemption-fix.patch features/all/rt/mm-protect-activate-switch-mm.patch -features/all/rt/fs-block-rt-support.patch -features/all/rt/fs-ntfs-disable-interrupt-non-rt.patch -features/all/rt/fs-jbd2-pull-your-plug-when-waiting-for-space.patch -features/all/rt/Revert-fs-jbd2-pull-your-plug-when-waiting-for-space.patch -features/all/rt/fs-dcache-bringt-back-explicit-INIT_HLIST_BL_HEAD-in.patch +features/all/rt/fs-dcache-bring-back-explicit-INIT_HLIST_BL_HEAD-in.patch features/all/rt/fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch # X86 -features/all/rt/x86-mce-timer-hrtimer.patch -features/all/rt/x86-mce-use-swait-queue-for-mce-wakeups.patch -features/all/rt/x86-stackprot-no-random-on-rt.patch -features/all/rt/x86-use-gen-rwsem-spinlocks-rt.patch -features/all/rt/x86-UV-raw_spinlock-conversion.patch features/all/rt/thermal-Defer-thermal-wakups-to-threads.patch # CPU get light @@ -472,14 +408,12 @@ features/all/rt/block-mq-use-cpu_light.patch features/all/rt/block-mq-drop-preempt-disable.patch features/all/rt/block-mq-don-t-complete-requests-via-IPI.patch features/all/rt/md-raid5-percpu-handling-rt-aware.patch -features/all/rt/md-raid5-do-not-disable-interrupts.patch # CPU CHILL features/all/rt/rt-introduce-cpu-chill.patch -features/all/rt/cpu_chill-Add-a-UNINTERRUPTIBLE-hrtimer_nanosleep.patch # block -features/all/rt/block-blk-mq-use-swait.patch +features/all/rt/block-blk-mq-move-blk_queue_usage_counter_release-in.patch # BLOCK LIVELOCK PREVENTION features/all/rt/block-use-cpu-chill.patch @@ -495,15 +429,9 @@ features/all/rt/workqueue-use-locallock.patch features/all/rt/work-queue-work-around-irqsafe-timer-optimization.patch features/all/rt/workqueue-distangle-from-rq-lock.patch -# IDA -features/all/rt/percpu_ida-use-locklocks.patch - # DEBUGOBJECTS features/all/rt/debugobjects-rt.patch -# JUMPLABEL -features/all/rt/jump-label-rt.patch - # SEQLOCKS features/all/rt/seqlock-prevent-rt-starvation.patch @@ -512,6 +440,7 @@ features/all/rt/sunrpc-make-svc_xprt_do_enqueue-use-get_cpu_light.patch features/all/rt/skbufhead-raw-lock.patch features/all/rt/net-core-cpuhotplug-drain-input_pkt_queue-lockless.patch features/all/rt/net-move-xmit_recursion-to-per-task-variable-on-RT.patch +features/all/rt/net-use-task_struct-instead-of-CPU-number-as-the-que.patch features/all/rt/net-provide-a-way-to-delegate-processing-a-softirq-t.patch features/all/rt/net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch features/all/rt/net-Qdisc-use-a-seqlock-instead-seqcount.patch @@ -525,29 +454,19 @@ features/all/rt/net-Have-__napi_schedule_irqoff-disable-interrupts-o.patch features/all/rt/irqwork-push_most_work_into_softirq_context.patch features/all/rt/irqwork-Move-irq-safe-work-to-irq-context.patch -# Sound -features/all/rt/snd-pcm-fix-snd_pcm_stream_lock-irqs_disabled-splats.patch - # CONSOLE. NEEDS more thought !!! features/all/rt/printk-rt-aware.patch features/all/rt/kernel-printk-Don-t-try-to-print-from-IRQ-NMI-region.patch features/all/rt/HACK-printk-drop-the-logbuf_lock-more-often.patch # POWERC -features/all/rt/power-use-generic-rwsem-on-rt.patch -features/all/rt/powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch features/all/rt/powerpc-ps3-device-init.c-adapt-to-completions-using.patch # ARM -features/all/rt/arm-at91-tclib-default-to-tclib-timer-for-rt.patch -features/all/rt/arm-unwind-use_raw_lock.patch features/all/rt/ARM-enable-irq-in-translation-section-permission-fau.patch features/all/rt/genirq-update-irq_set_irqchip_state-documentation.patch features/all/rt/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch -# ARM64 -features/all/rt/arm64-xen--Make-XEN-depend-on-non-rt.patch - # KGDB features/all/rt/kgb-serial-hackaround.patch @@ -555,47 +474,30 @@ features/all/rt/kgb-serial-hackaround.patch features/all/rt/sysfs-realtime-entry.patch # KMAP/HIGHMEM -features/all/rt/power-disable-highmem-on-rt.patch -features/all/rt/mips-disable-highmem-on-rt.patch features/all/rt/mm-rt-kmap-atomic-scheduling.patch features/all/rt/mm--rt--Fix-generic-kmap_atomic-for-RT.patch features/all/rt/x86-highmem-add-a-already-used-pte-check.patch features/all/rt/arm-highmem-flush-tlb-on-unmap.patch features/all/rt/arm-enable-highmem-for-rt.patch -# SYSRQ - # SCSI/FCOE features/all/rt/scsi-fcoe-rt-aware.patch -features/all/rt/sas-ata-isci-dont-t-disable-interrupts-in-qc_issue-h.patch # crypto drivers features/all/rt/x86-crypto-reduce-preempt-disabled-regions.patch features/all/rt/crypto-Reduce-preempt-disabled-regions-more-algos.patch features/all/rt/crypto-limit-more-FPU-enabled-sections.patch -features/all/rt/arm-disable-NEON-in-kernel-mode.patch - -# Device mapper -features/all/rt/dm-make-rt-aware.patch - -# ACPI -features/all/rt/acpi-rt-Convert-acpi_gbl_hardware-lock-back-to-a-raw.patch - -# CPUMASK OFFSTACK -features/all/rt/cpumask-disable-offstack-on-rt.patch # RANDOM +features/all/rt/panic-disable-random-on-rt.patch +features/all/rt/x86-stackprot-no-random-on-rt.patch features/all/rt/random-make-it-work-on-rt.patch features/all/rt/random-avoid-preempt_disable-ed-section.patch -features/all/rt/char-random-don-t-print-that-the-init-is-done.patch # HOTPLUG features/all/rt/cpu-hotplug--Implement-CPU-pinning.patch features/all/rt/hotplug-duct-tape-RT-rwlock-usage-for-non-RT.patch -# SCSCI QLA2xxx -features/all/rt/scsi-qla2xxx-fix-bug-sleeping-function-called-from-invalid-context.patch - # NET features/all/rt/upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch features/all/rt/net-another-local-irq-disable-alloc-atomic-headache.patch @@ -604,22 +506,16 @@ features/all/rt/net-fix-iptable-xt-write-recseq-begin-rt-fallout.patch features/all/rt/net-make-devnet_rename_seq-a-mutex.patch # CRYPTO -# XXX features/all/rt/peterz-srcu-crypto-chain.patch # LOCKDEP features/all/rt/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch features/all/rt/lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch -# PERF - # RCU features/all/rt/srcu-use-cpu_online-instead-custom-check.patch -features/all/rt/srcu-Prohibit-call_srcu-use-under-raw-spinlocks.patch features/all/rt/srcu-replace-local_irqsave-with-a-locallock.patch -features/all/rt/rcu-disable-rcu-fast-no-hz-on-rt.patch features/all/rt/rcu-Eliminate-softirq-processing-from-rcutree.patch -features/all/rt/rcu-make-RCU_BOOST-default-on-RT.patch features/all/rt/rcu-enable-rcu_normal_after_boot-by-default-for-RT.patch # PREEMPT LAZY @@ -630,12 +526,7 @@ features/all/rt/arm-preempt-lazy-support.patch features/all/rt/powerpc-preempt-lazy-support.patch features/all/rt/arch-arm64-Add-lazy-preempt-support.patch -# LEDS -features/all/rt/leds-trigger-disable-CPU-trigger-on-RT.patch - # DRIVERS -features/all/rt/mmci-remove-bogus-irq-save.patch -features/all/rt/cpufreq-drop-K8-s-driver-from-beeing-selected.patch features/all/rt/connector-cn_proc-Protect-send_msg-with-a-local-lock.patch features/all/rt/drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch features/all/rt/drivers-zram-Don-t-disable-preemption-in-zcomp_strea.patch @@ -649,22 +540,16 @@ features/all/rt/drmi915_Use_local_lockunlock_irq()_in_intel_pipe_update_startend # CGROUPS features/all/rt/cgroups-use-simple-wait-in-css_release.patch -features/all/rt/cgroups-scheduling-while-atomic-in-cgroup-code.patch -features/all/rt/Revert-memcontrol-Prevent-scheduling-while-atomic-in.patch features/all/rt/cpuset-Convert-callback_lock-to-raw_spinlock_t.patch -# New stuff -# Revisit: We need this in other places as well -features/all/rt/move_sched_delayed_work_to_helper.patch - -# MD -features/all/rt/md-disable-bcache.patch - # Security features/all/rt/apparmor-use-a-locallock-instead-preempt_disable.patch # WORKQUEUE SIGH features/all/rt/workqueue-prevent-deadlock-stall.patch +# Nice to have +features/all/rt/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch + # Add RT to version features/all/rt/localversion.patch