Update to 4.16.7-rt1 and reenable

This commit is contained in:
Salvatore Bonaccorso 2018-05-12 09:06:27 +02:00
parent 1173c652e9
commit 5f1b90fa69
373 changed files with 23418 additions and 2111 deletions

3
debian/changelog vendored
View File

@ -254,6 +254,9 @@ linux (4.16.8-1) UNRELEASED; urgency=medium
[ Ben Hutchings ]
* kbuild: use -fmacro-prefix-map to make __FILE__ a relative path
[ Salvatore Bonaccorso ]
* [rt] Update to 4.16.7-rt1 and reenable
-- Vagrant Cascadian <vagrant@debian.org> Mon, 30 Apr 2018 11:23:15 -0700
linux (4.16.5-1) unstable; urgency=medium

View File

@ -113,7 +113,7 @@ debug-info: true
signed-code: false
[featureset-rt_base]
enabled: false
enabled: true
[description]
part-long-up: This kernel is not suitable for SMP (multi-processor,

View File

@ -0,0 +1,236 @@
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 18 Apr 2018 12:51:38 +0200
Subject: [PATCH 1/6] ARM: at91: add TCB registers definitions
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Add registers and bits definitions for the timer counter blocks found on
Atmel ARM SoCs.
Tested-by: Alexander Dahl <ada@thorsis.com>
Tested-by: Andras Szemzo <szemzo.andras@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/soc/at91/atmel_tcb.h | 216 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 216 insertions(+)
create mode 100644 include/soc/at91/atmel_tcb.h
--- /dev/null
+++ b/include/soc/at91/atmel_tcb.h
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018 Microchip */
+
+#ifndef __SOC_ATMEL_TCB_H
+#define __SOC_ATMEL_TCB_H
+
+/* Channel registers */
+#define ATMEL_TC_COFFS(c) ((c) * 0x40)
+#define ATMEL_TC_CCR(c) ATMEL_TC_COFFS(c)
+#define ATMEL_TC_CMR(c) (ATMEL_TC_COFFS(c) + 0x4)
+#define ATMEL_TC_SMMR(c) (ATMEL_TC_COFFS(c) + 0x8)
+#define ATMEL_TC_RAB(c) (ATMEL_TC_COFFS(c) + 0xc)
+#define ATMEL_TC_CV(c) (ATMEL_TC_COFFS(c) + 0x10)
+#define ATMEL_TC_RA(c) (ATMEL_TC_COFFS(c) + 0x14)
+#define ATMEL_TC_RB(c) (ATMEL_TC_COFFS(c) + 0x18)
+#define ATMEL_TC_RC(c) (ATMEL_TC_COFFS(c) + 0x1c)
+#define ATMEL_TC_SR(c) (ATMEL_TC_COFFS(c) + 0x20)
+#define ATMEL_TC_IER(c) (ATMEL_TC_COFFS(c) + 0x24)
+#define ATMEL_TC_IDR(c) (ATMEL_TC_COFFS(c) + 0x28)
+#define ATMEL_TC_IMR(c) (ATMEL_TC_COFFS(c) + 0x2c)
+#define ATMEL_TC_EMR(c) (ATMEL_TC_COFFS(c) + 0x30)
+
+/* Block registers */
+#define ATMEL_TC_BCR 0xc0
+#define ATMEL_TC_BMR 0xc4
+#define ATMEL_TC_QIER 0xc8
+#define ATMEL_TC_QIDR 0xcc
+#define ATMEL_TC_QIMR 0xd0
+#define ATMEL_TC_QISR 0xd4
+#define ATMEL_TC_FMR 0xd8
+#define ATMEL_TC_WPMR 0xe4
+
+/* CCR fields */
+#define ATMEL_TC_CCR_CLKEN BIT(0)
+#define ATMEL_TC_CCR_CLKDIS BIT(1)
+#define ATMEL_TC_CCR_SWTRG BIT(2)
+
+/* Common CMR fields */
+#define ATMEL_TC_CMR_TCLKS_MSK GENMASK(2, 0)
+#define ATMEL_TC_CMR_TCLK(x) (x)
+#define ATMEL_TC_CMR_XC(x) ((x) + 5)
+#define ATMEL_TC_CMR_CLKI BIT(3)
+#define ATMEL_TC_CMR_BURST_MSK GENMASK(5, 4)
+#define ATMEL_TC_CMR_BURST_XC(x) (((x) + 1) << 4)
+#define ATMEL_TC_CMR_WAVE BIT(15)
+
+/* Capture mode CMR fields */
+#define ATMEL_TC_CMR_LDBSTOP BIT(6)
+#define ATMEL_TC_CMR_LDBDIS BIT(7)
+#define ATMEL_TC_CMR_ETRGEDG_MSK GENMASK(9, 8)
+#define ATMEL_TC_CMR_ETRGEDG_NONE (0 << 8)
+#define ATMEL_TC_CMR_ETRGEDG_RISING (1 << 8)
+#define ATMEL_TC_CMR_ETRGEDG_FALLING (2 << 8)
+#define ATMEL_TC_CMR_ETRGEDG_BOTH (3 << 8)
+#define ATMEL_TC_CMR_ABETRG BIT(10)
+#define ATMEL_TC_CMR_CPCTRG BIT(14)
+#define ATMEL_TC_CMR_LDRA_MSK GENMASK(17, 16)
+#define ATMEL_TC_CMR_LDRA_NONE (0 << 16)
+#define ATMEL_TC_CMR_LDRA_RISING (1 << 16)
+#define ATMEL_TC_CMR_LDRA_FALLING (2 << 16)
+#define ATMEL_TC_CMR_LDRA_BOTH (3 << 16)
+#define ATMEL_TC_CMR_LDRB_MSK GENMASK(19, 18)
+#define ATMEL_TC_CMR_LDRB_NONE (0 << 18)
+#define ATMEL_TC_CMR_LDRB_RISING (1 << 18)
+#define ATMEL_TC_CMR_LDRB_FALLING (2 << 18)
+#define ATMEL_TC_CMR_LDRB_BOTH (3 << 18)
+#define ATMEL_TC_CMR_SBSMPLR_MSK GENMASK(22, 20)
+#define ATMEL_TC_CMR_SBSMPLR(x) ((x) << 20)
+
+/* Waveform mode CMR fields */
+#define ATMEL_TC_CMR_CPCSTOP BIT(6)
+#define ATMEL_TC_CMR_CPCDIS BIT(7)
+#define ATMEL_TC_CMR_EEVTEDG_MSK GENMASK(9, 8)
+#define ATMEL_TC_CMR_EEVTEDG_NONE (0 << 8)
+#define ATMEL_TC_CMR_EEVTEDG_RISING (1 << 8)
+#define ATMEL_TC_CMR_EEVTEDG_FALLING (2 << 8)
+#define ATMEL_TC_CMR_EEVTEDG_BOTH (3 << 8)
+#define ATMEL_TC_CMR_EEVT_MSK GENMASK(11, 10)
+#define ATMEL_TC_CMR_EEVT_XC(x) (((x) + 1) << 10)
+#define ATMEL_TC_CMR_ENETRG BIT(12)
+#define ATMEL_TC_CMR_WAVESEL_MSK GENMASK(14, 13)
+#define ATMEL_TC_CMR_WAVESEL_UP (0 << 13)
+#define ATMEL_TC_CMR_WAVESEL_UPDOWN (1 << 13)
+#define ATMEL_TC_CMR_WAVESEL_UPRC (2 << 13)
+#define ATMEL_TC_CMR_WAVESEL_UPDOWNRC (3 << 13)
+#define ATMEL_TC_CMR_ACPA_MSK GENMASK(17, 16)
+#define ATMEL_TC_CMR_ACPA(a) (ATMEL_TC_CMR_ACTION_##a << 16)
+#define ATMEL_TC_CMR_ACPC_MSK GENMASK(19, 18)
+#define ATMEL_TC_CMR_ACPC(a) (ATMEL_TC_CMR_ACTION_##a << 18)
+#define ATMEL_TC_CMR_AEEVT_MSK GENMASK(21, 20)
+#define ATMEL_TC_CMR_AEEVT(a) (ATMEL_TC_CMR_ACTION_##a << 20)
+#define ATMEL_TC_CMR_ASWTRG_MSK GENMASK(23, 22)
+#define ATMEL_TC_CMR_ASWTRG(a) (ATMEL_TC_CMR_ACTION_##a << 22)
+#define ATMEL_TC_CMR_BCPB_MSK GENMASK(25, 24)
+#define ATMEL_TC_CMR_BCPB(a) (ATMEL_TC_CMR_ACTION_##a << 24)
+#define ATMEL_TC_CMR_BCPC_MSK GENMASK(27, 26)
+#define ATMEL_TC_CMR_BCPC(a) (ATMEL_TC_CMR_ACTION_##a << 26)
+#define ATMEL_TC_CMR_BEEVT_MSK GENMASK(29, 28)
+#define ATMEL_TC_CMR_BEEVT(a) (ATMEL_TC_CMR_ACTION_##a << 28)
+#define ATMEL_TC_CMR_BSWTRG_MSK GENMASK(31, 30)
+#define ATMEL_TC_CMR_BSWTRG(a) (ATMEL_TC_CMR_ACTION_##a << 30)
+#define ATMEL_TC_CMR_ACTION_NONE 0
+#define ATMEL_TC_CMR_ACTION_SET 1
+#define ATMEL_TC_CMR_ACTION_CLEAR 2
+#define ATMEL_TC_CMR_ACTION_TOGGLE 3
+
+/* SMMR fields */
+#define ATMEL_TC_SMMR_GCEN BIT(0)
+#define ATMEL_TC_SMMR_DOWN BIT(1)
+
+/* SR/IER/IDR/IMR fields */
+#define ATMEL_TC_COVFS BIT(0)
+#define ATMEL_TC_LOVRS BIT(1)
+#define ATMEL_TC_CPAS BIT(2)
+#define ATMEL_TC_CPBS BIT(3)
+#define ATMEL_TC_CPCS BIT(4)
+#define ATMEL_TC_LDRAS BIT(5)
+#define ATMEL_TC_LDRBS BIT(6)
+#define ATMEL_TC_ETRGS BIT(7)
+#define ATMEL_TC_CLKSTA BIT(16)
+#define ATMEL_TC_MTIOA BIT(17)
+#define ATMEL_TC_MTIOB BIT(18)
+
+/* EMR fields */
+#define ATMEL_TC_EMR_TRIGSRCA_MSK GENMASK(1, 0)
+#define ATMEL_TC_EMR_TRIGSRCA_TIOA 0
+#define ATMEL_TC_EMR_TRIGSRCA_PWMX 1
+#define ATMEL_TC_EMR_TRIGSRCB_MSK GENMASK(5, 4)
+#define ATMEL_TC_EMR_TRIGSRCB_TIOB (0 << 4)
+#define ATMEL_TC_EMR_TRIGSRCB_PWM (1 << 4)
+#define ATMEL_TC_EMR_NOCLKDIV BIT(8)
+
+/* BCR fields */
+#define ATMEL_TC_BCR_SYNC BIT(0)
+
+/* BMR fields */
+#define ATMEL_TC_BMR_TCXC_MSK(c) GENMASK(((c) * 2) + 1, (c) * 2)
+#define ATMEL_TC_BMR_TCXC(x, c) ((x) << (2 * (c)))
+#define ATMEL_TC_BMR_QDEN BIT(8)
+#define ATMEL_TC_BMR_POSEN BIT(9)
+#define ATMEL_TC_BMR_SPEEDEN BIT(10)
+#define ATMEL_TC_BMR_QDTRANS BIT(11)
+#define ATMEL_TC_BMR_EDGPHA BIT(12)
+#define ATMEL_TC_BMR_INVA BIT(13)
+#define ATMEL_TC_BMR_INVB BIT(14)
+#define ATMEL_TC_BMR_INVIDX BIT(15)
+#define ATMEL_TC_BMR_SWAP BIT(16)
+#define ATMEL_TC_BMR_IDXPHB BIT(17)
+#define ATMEL_TC_BMR_AUTOC BIT(18)
+#define ATMEL_TC_MAXFILT_MSK GENMASK(25, 20)
+#define ATMEL_TC_MAXFILT(x) (((x) - 1) << 20)
+#define ATMEL_TC_MAXCMP_MSK GENMASK(29, 26)
+#define ATMEL_TC_MAXCMP(x) ((x) << 26)
+
+/* QEDC fields */
+#define ATMEL_TC_QEDC_IDX BIT(0)
+#define ATMEL_TC_QEDC_DIRCHG BIT(1)
+#define ATMEL_TC_QEDC_QERR BIT(2)
+#define ATMEL_TC_QEDC_MPE BIT(3)
+#define ATMEL_TC_QEDC_DIR BIT(8)
+
+/* FMR fields */
+#define ATMEL_TC_FMR_ENCF(x) BIT(x)
+
+/* WPMR fields */
+#define ATMEL_TC_WPMR_WPKEY (0x54494d << 8)
+#define ATMEL_TC_WPMR_WPEN BIT(0)
+
+static inline struct clk *tcb_clk_get(struct device_node *node, int channel)
+{
+ struct clk *clk;
+ char clk_name[] = "t0_clk";
+
+ clk_name[1] += channel;
+ clk = of_clk_get_by_name(node->parent, clk_name);
+ if (!IS_ERR(clk))
+ return clk;
+
+ return of_clk_get_by_name(node->parent, "t0_clk");
+}
+
+static inline int tcb_irq_get(struct device_node *node, int channel)
+{
+ int irq;
+
+ irq = of_irq_get(node->parent, channel);
+ if (irq > 0)
+ return irq;
+
+ return of_irq_get(node->parent, 0);
+}
+
+static const u8 atmel_tc_divisors[5] = { 2, 8, 32, 128, 0, };
+
+struct atmel_tcb_info {
+ int bits;
+};
+
+static const struct atmel_tcb_info atmel_tcb_infos[] = {
+ { .bits = 16 },
+ { .bits = 32 },
+};
+
+static const struct of_device_id atmel_tcb_dt_ids[] = {
+ {
+ .compatible = "atmel,at91rm9200-tcb",
+ .data = &atmel_tcb_infos[0],
+ }, {
+ .compatible = "atmel,at91sam9x5-tcb",
+ .data = &atmel_tcb_infos[1],
+ }, {
+ /* sentinel */
+ }
+};
+
+#endif /* __SOC_ATMEL_TCB_H */

View File

@ -0,0 +1,153 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 23 Feb 2018 21:54:18 -0500
Subject: [PATCH 01/17] get rid of trylock loop in locking dentries on shrink
list
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit 3b3f09f48ba78c0634e929849860a6447d057eed
In case of trylock failure don't re-add to the list - drop the locks
and carefully get them in the right order. For shrink_dentry_list(),
somebody having grabbed a reference to dentry means that we can
kick it off-list, so if we find dentry being modified under us we
don't need to play silly buggers with retries anyway - off the list
it is.
The locking logics taken out into a helper of its own; lock_parent()
is no longer used for dentries that can be killed under us.
[fix from Eric Biggers folded]
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 106 ++++++++++++++++++++++++++++++++++++++----------------------
1 file changed, 68 insertions(+), 38 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -974,56 +974,86 @@ void d_prune_aliases(struct inode *inode
}
EXPORT_SYMBOL(d_prune_aliases);
-static void shrink_dentry_list(struct list_head *list)
+/*
+ * Lock a dentry from shrink list.
+ * Note that dentry is *not* protected from concurrent dentry_kill(),
+ * d_delete(), etc. It is protected from freeing (by the fact of
+ * being on a shrink list), but everything else is fair game.
+ * Return false if dentry has been disrupted or grabbed, leaving
+ * the caller to kick it off-list. Otherwise, return true and have
+ * that dentry's inode and parent both locked.
+ */
+static bool shrink_lock_dentry(struct dentry *dentry)
{
- struct dentry *dentry, *parent;
+ struct inode *inode;
+ struct dentry *parent;
+
+ if (dentry->d_lockref.count)
+ return false;
+ inode = dentry->d_inode;
+ if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
+ rcu_read_lock(); /* to protect inode */
+ spin_unlock(&dentry->d_lock);
+ spin_lock(&inode->i_lock);
+ spin_lock(&dentry->d_lock);
+ if (unlikely(dentry->d_lockref.count))
+ goto out;
+ /* changed inode means that somebody had grabbed it */
+ if (unlikely(inode != dentry->d_inode))
+ goto out;
+ rcu_read_unlock();
+ }
+
+ parent = dentry->d_parent;
+ if (IS_ROOT(dentry) || likely(spin_trylock(&parent->d_lock)))
+ return true;
+
+ rcu_read_lock(); /* to protect parent */
+ spin_unlock(&dentry->d_lock);
+ parent = READ_ONCE(dentry->d_parent);
+ spin_lock(&parent->d_lock);
+ if (unlikely(parent != dentry->d_parent)) {
+ spin_unlock(&parent->d_lock);
+ spin_lock(&dentry->d_lock);
+ goto out;
+ }
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+ if (likely(!dentry->d_lockref.count)) {
+ rcu_read_unlock();
+ return true;
+ }
+ spin_unlock(&parent->d_lock);
+out:
+ if (inode)
+ spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
+ return false;
+}
+
+static void shrink_dentry_list(struct list_head *list)
+{
while (!list_empty(list)) {
+ struct dentry *dentry, *parent;
struct inode *inode;
+
dentry = list_entry(list->prev, struct dentry, d_lru);
spin_lock(&dentry->d_lock);
- parent = lock_parent(dentry);
-
- /*
- * The dispose list is isolated and dentries are not accounted
- * to the LRU here, so we can simply remove it from the list
- * here regardless of whether it is referenced or not.
- */
- d_shrink_del(dentry);
-
- /*
- * We found an inuse dentry which was not removed from
- * the LRU because of laziness during lookup. Do not free it.
- */
- if (dentry->d_lockref.count > 0) {
+ if (!shrink_lock_dentry(dentry)) {
+ bool can_free = false;
+ d_shrink_del(dentry);
+ if (dentry->d_lockref.count < 0)
+ can_free = dentry->d_flags & DCACHE_MAY_FREE;
spin_unlock(&dentry->d_lock);
- if (parent)
- spin_unlock(&parent->d_lock);
- continue;
- }
-
-
- if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) {
- bool can_free = dentry->d_flags & DCACHE_MAY_FREE;
- spin_unlock(&dentry->d_lock);
- if (parent)
- spin_unlock(&parent->d_lock);
if (can_free)
dentry_free(dentry);
continue;
}
-
- inode = dentry->d_inode;
- if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
- d_shrink_add(dentry, list);
- spin_unlock(&dentry->d_lock);
- if (parent)
- spin_unlock(&parent->d_lock);
- continue;
- }
-
+ d_shrink_del(dentry);
+ parent = dentry->d_parent;
__dentry_kill(dentry);
-
+ if (parent == dentry)
+ continue;
/*
* We need to prune ancestors too. This is necessary to prevent
* quadratic behavior of shrink_dcache_parent(), but is also

View File

@ -0,0 +1,33 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Mar 2018 16:22:33 +0100
Subject: [PATCH 01/10] iommu/amd: Take into account that alloc_dev_data() may
return NULL
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit 39ffe39545cd5cb5b8cee9f0469165cf24dc62c2
find_dev_data() does not check whether the return value alloc_dev_data()
is NULL. This was okay once because the pointer was returned once as-is.
Since commit df3f7a6e8e85 ("iommu/amd: Use is_attach_deferred
call-back") the pointer may be used within find_dev_data() so a NULL
check is required.
Cc: Baoquan He <bhe@redhat.com>
Fixes: df3f7a6e8e85 ("iommu/amd: Use is_attach_deferred call-back")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 2 ++
1 file changed, 2 insertions(+)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -310,6 +310,8 @@ static struct iommu_dev_data *find_dev_d
if (dev_data == NULL) {
dev_data = alloc_dev_data(devid);
+ if (!dev_data)
+ return NULL;
if (translation_pre_enabled(iommu))
dev_data->defer_attach = true;

View File

@ -0,0 +1,171 @@
From: Scott Wood <swood@redhat.com>
Date: Sun, 21 Jan 2018 03:28:54 -0600
Subject: [PATCH 1/3] iommu/amd: Use raw locks on atomic context paths
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit 27790398c2aed917828dc3c6f81240d57f1584c9
Several functions in this driver are called from atomic context,
and thus raw locks must be used in order to be safe on PREEMPT_RT.
This includes paths that must wait for command completion, which is
a potential PREEMPT_RT latency concern but not easily avoidable.
Signed-off-by: Scott Wood <swood@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 30 +++++++++++++++---------------
drivers/iommu/amd_iommu_init.c | 2 +-
drivers/iommu/amd_iommu_types.h | 4 ++--
3 files changed, 18 insertions(+), 18 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1056,9 +1056,9 @@ static int iommu_queue_command_sync(stru
unsigned long flags;
int ret;
- spin_lock_irqsave(&iommu->lock, flags);
+ raw_spin_lock_irqsave(&iommu->lock, flags);
ret = __iommu_queue_command_sync(iommu, cmd, sync);
- spin_unlock_irqrestore(&iommu->lock, flags);
+ raw_spin_unlock_irqrestore(&iommu->lock, flags);
return ret;
}
@@ -1084,7 +1084,7 @@ static int iommu_completion_wait(struct
build_completion_wait(&cmd, (u64)&iommu->cmd_sem);
- spin_lock_irqsave(&iommu->lock, flags);
+ raw_spin_lock_irqsave(&iommu->lock, flags);
iommu->cmd_sem = 0;
@@ -1095,7 +1095,7 @@ static int iommu_completion_wait(struct
ret = wait_on_sem(&iommu->cmd_sem);
out_unlock:
- spin_unlock_irqrestore(&iommu->lock, flags);
+ raw_spin_unlock_irqrestore(&iommu->lock, flags);
return ret;
}
@@ -3627,7 +3627,7 @@ static struct irq_remap_table *get_irq_t
goto out_unlock;
/* Initialize table spin-lock */
- spin_lock_init(&table->lock);
+ raw_spin_lock_init(&table->lock);
if (ioapic)
/* Keep the first 32 indexes free for IOAPIC interrupts */
@@ -3689,7 +3689,7 @@ static int alloc_irq_index(u16 devid, in
if (align)
alignment = roundup_pow_of_two(count);
- spin_lock_irqsave(&table->lock, flags);
+ raw_spin_lock_irqsave(&table->lock, flags);
/* Scan table for free entries */
for (index = ALIGN(table->min_index, alignment), c = 0;
@@ -3716,7 +3716,7 @@ static int alloc_irq_index(u16 devid, in
index = -ENOSPC;
out:
- spin_unlock_irqrestore(&table->lock, flags);
+ raw_spin_unlock_irqrestore(&table->lock, flags);
return index;
}
@@ -3737,7 +3737,7 @@ static int modify_irte_ga(u16 devid, int
if (!table)
return -ENOMEM;
- spin_lock_irqsave(&table->lock, flags);
+ raw_spin_lock_irqsave(&table->lock, flags);
entry = (struct irte_ga *)table->table;
entry = &entry[index];
@@ -3748,7 +3748,7 @@ static int modify_irte_ga(u16 devid, int
if (data)
data->ref = entry;
- spin_unlock_irqrestore(&table->lock, flags);
+ raw_spin_unlock_irqrestore(&table->lock, flags);
iommu_flush_irt(iommu, devid);
iommu_completion_wait(iommu);
@@ -3770,9 +3770,9 @@ static int modify_irte(u16 devid, int in
if (!table)
return -ENOMEM;
- spin_lock_irqsave(&table->lock, flags);
+ raw_spin_lock_irqsave(&table->lock, flags);
table->table[index] = irte->val;
- spin_unlock_irqrestore(&table->lock, flags);
+ raw_spin_unlock_irqrestore(&table->lock, flags);
iommu_flush_irt(iommu, devid);
iommu_completion_wait(iommu);
@@ -3794,9 +3794,9 @@ static void free_irte(u16 devid, int ind
if (!table)
return;
- spin_lock_irqsave(&table->lock, flags);
+ raw_spin_lock_irqsave(&table->lock, flags);
iommu->irte_ops->clear_allocated(table, index);
- spin_unlock_irqrestore(&table->lock, flags);
+ raw_spin_unlock_irqrestore(&table->lock, flags);
iommu_flush_irt(iommu, devid);
iommu_completion_wait(iommu);
@@ -4397,7 +4397,7 @@ int amd_iommu_update_ga(int cpu, bool is
if (!irt)
return -ENODEV;
- spin_lock_irqsave(&irt->lock, flags);
+ raw_spin_lock_irqsave(&irt->lock, flags);
if (ref->lo.fields_vapic.guest_mode) {
if (cpu >= 0)
@@ -4406,7 +4406,7 @@ int amd_iommu_update_ga(int cpu, bool is
barrier();
}
- spin_unlock_irqrestore(&irt->lock, flags);
+ raw_spin_unlock_irqrestore(&irt->lock, flags);
iommu_flush_irt(iommu, devid);
iommu_completion_wait(iommu);
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1474,7 +1474,7 @@ static int __init init_iommu_one(struct
{
int ret;
- spin_lock_init(&iommu->lock);
+ raw_spin_lock_init(&iommu->lock);
/* Add IOMMU to internal data structures */
list_add_tail(&iommu->list, &amd_iommu_list);
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -408,7 +408,7 @@ extern bool amd_iommu_iotlb_sup;
#define IRQ_TABLE_ALIGNMENT 128
struct irq_remap_table {
- spinlock_t lock;
+ raw_spinlock_t lock;
unsigned min_index;
u32 *table;
};
@@ -490,7 +490,7 @@ struct amd_iommu {
int index;
/* locks the accesses to the hardware */
- spinlock_t lock;
+ raw_spinlock_t lock;
/* Pointer to PCI device of this IOMMU */
struct pci_dev *dev;

View File

@ -0,0 +1,672 @@
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 18 Apr 2018 12:51:39 +0200
Subject: [PATCH 2/6] clocksource/drivers: Add a new driver for the Atmel ARM
TC blocks
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Add a driver for the Atmel Timer Counter Blocks. This driver provides a
clocksource and two clockevent devices.
One of the clockevent device is linked to the clocksource counter and so it
will run at the same frequency. This will be used when there is only on TCB
channel available for timers.
The other clockevent device runs on a separate TCB channel when available.
This driver uses regmap and syscon to be able to probe early in the boot
and avoid having to switch on the TCB clocksource later. Using regmap also
means that unused TCB channels may be used by other drivers (PWM for
example). read/writel are still used to access channel specific registers
to avoid the performance impact of regmap (mainly locking).
Tested-by: Alexander Dahl <ada@thorsis.com>
Tested-by: Andras Szemzo <szemzo.andras@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/clocksource/Kconfig | 8
drivers/clocksource/Makefile | 3
drivers/clocksource/timer-atmel-tcb.c | 608 ++++++++++++++++++++++++++++++++++
3 files changed, 618 insertions(+), 1 deletion(-)
create mode 100644 drivers/clocksource/timer-atmel-tcb.c
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -392,6 +392,14 @@ config ATMEL_ST
help
Support for the Atmel ST timer.
+config ATMEL_ARM_TCB_CLKSRC
+ bool "Microchip ARM TC Block" if COMPILE_TEST
+ select REGMAP_MMIO
+ depends on GENERIC_CLOCKEVENTS
+ help
+ This enables build of clocksource and clockevent driver for
+ the integrated Timer Counter Blocks in Microchip ARM SoCs.
+
config CLKSRC_METAG_GENERIC
def_bool y if METAG
help
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -3,7 +3,8 @@ obj-$(CONFIG_TIMER_OF) += timer-of.o
obj-$(CONFIG_TIMER_PROBE) += timer-probe.o
obj-$(CONFIG_ATMEL_PIT) += timer-atmel-pit.o
obj-$(CONFIG_ATMEL_ST) += timer-atmel-st.o
-obj-$(CONFIG_ATMEL_TCB_CLKSRC) += tcb_clksrc.o
+obj-$(CONFIG_ATMEL_TCB_CLKSRC) += tcb_clksrc.o
+obj-$(CONFIG_ATMEL_ARM_TCB_CLKSRC) += timer-atmel-tcb.o
obj-$(CONFIG_X86_PM_TIMER) += acpi_pm.o
obj-$(CONFIG_SCx200HR_TIMER) += scx200_hrt.o
obj-$(CONFIG_CS5535_CLOCK_EVENT_SRC) += cs5535-clockevt.o
--- /dev/null
+++ b/drivers/clocksource/timer-atmel-tcb.c
@@ -0,0 +1,608 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/clk.h>
+#include <linux/clockchips.h>
+#include <linux/clocksource.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/regmap.h>
+#include <linux/sched_clock.h>
+#include <soc/at91/atmel_tcb.h>
+
+static struct atmel_tcb_clksrc {
+ struct clocksource clksrc;
+ struct clock_event_device clkevt;
+ struct regmap *regmap;
+ void __iomem *base;
+ struct clk *clk[2];
+ char name[20];
+ int channels[2];
+ int bits;
+ int irq;
+ struct {
+ u32 cmr;
+ u32 imr;
+ u32 rc;
+ bool clken;
+ } cache[2];
+ u32 bmr_cache;
+ bool registered;
+} tc = {
+ .clksrc = {
+ .rating = 200,
+ .mask = CLOCKSOURCE_MASK(32),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ },
+ .clkevt = {
+ .features = CLOCK_EVT_FEAT_ONESHOT,
+ /* Should be lower than at91rm9200's system timer */
+ .rating = 125,
+ },
+};
+
+static struct tc_clkevt_device {
+ struct clock_event_device clkevt;
+ struct regmap *regmap;
+ void __iomem *base;
+ struct clk *slow_clk;
+ struct clk *clk;
+ char name[20];
+ int channel;
+ int irq;
+ struct {
+ u32 cmr;
+ u32 imr;
+ u32 rc;
+ bool clken;
+ } cache;
+ bool registered;
+} tce = {
+ .clkevt = {
+ .features = CLOCK_EVT_FEAT_PERIODIC |
+ CLOCK_EVT_FEAT_ONESHOT,
+ /*
+ * Should be lower than at91rm9200's system timer
+ * but higher than tc.clkevt.rating
+ */
+ .rating = 140,
+ },
+};
+
+/*
+ * Clockevent device using its own channel
+ */
+static int tc_clkevt2_shutdown(struct clock_event_device *d)
+{
+ writel(0xff, tce.base + ATMEL_TC_IDR(tce.channel));
+ writel(ATMEL_TC_CCR_CLKDIS, tce.base + ATMEL_TC_CCR(tce.channel));
+ if (!clockevent_state_detached(d))
+ clk_disable(tce.clk);
+
+ return 0;
+}
+
+/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
+ * because using one of the divided clocks would usually mean the
+ * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
+ *
+ * A divided clock could be good for high resolution timers, since
+ * 30.5 usec resolution can seem "low".
+ */
+static int tc_clkevt2_set_oneshot(struct clock_event_device *d)
+{
+ if (clockevent_state_oneshot(d) || clockevent_state_periodic(d))
+ tc_clkevt2_shutdown(d);
+
+ clk_enable(tce.clk);
+
+ /* slow clock, count up to RC, then irq and stop */
+ writel(ATMEL_TC_CMR_TCLK(4) | ATMEL_TC_CMR_CPCSTOP |
+ ATMEL_TC_CMR_WAVE | ATMEL_TC_CMR_WAVESEL_UPRC,
+ tce.base + ATMEL_TC_CMR(tce.channel));
+ writel(ATMEL_TC_CPCS, tce.base + ATMEL_TC_IER(tce.channel));
+
+ return 0;
+}
+
+static int tc_clkevt2_set_periodic(struct clock_event_device *d)
+{
+ if (clockevent_state_oneshot(d) || clockevent_state_periodic(d))
+ tc_clkevt2_shutdown(d);
+
+ /* By not making the gentime core emulate periodic mode on top
+ * of oneshot, we get lower overhead and improved accuracy.
+ */
+ clk_enable(tce.clk);
+
+ /* slow clock, count up to RC, then irq and restart */
+ writel(ATMEL_TC_CMR_TCLK(4) | ATMEL_TC_CMR_WAVE |
+ ATMEL_TC_CMR_WAVESEL_UPRC,
+ tce.base + ATMEL_TC_CMR(tce.channel));
+ writel((32768 + HZ / 2) / HZ, tce.base + ATMEL_TC_RC(tce.channel));
+
+ /* Enable clock and interrupts on RC compare */
+ writel(ATMEL_TC_CPCS, tce.base + ATMEL_TC_IER(tce.channel));
+ writel(ATMEL_TC_CCR_CLKEN | ATMEL_TC_CCR_SWTRG,
+ tce.base + ATMEL_TC_CCR(tce.channel));
+
+ return 0;
+}
+
+static int tc_clkevt2_next_event(unsigned long delta,
+ struct clock_event_device *d)
+{
+ writel(delta, tce.base + ATMEL_TC_RC(tce.channel));
+ writel(ATMEL_TC_CCR_CLKEN | ATMEL_TC_CCR_SWTRG,
+ tce.base + ATMEL_TC_CCR(tce.channel));
+
+ return 0;
+}
+
+static irqreturn_t tc_clkevt2_irq(int irq, void *handle)
+{
+ unsigned int sr;
+
+ sr = readl(tce.base + ATMEL_TC_SR(tce.channel));
+ if (sr & ATMEL_TC_CPCS) {
+ tce.clkevt.event_handler(&tce.clkevt);
+ return IRQ_HANDLED;
+ }
+
+ return IRQ_NONE;
+}
+
+static void tc_clkevt2_suspend(struct clock_event_device *d)
+{
+ tce.cache.cmr = readl(tce.base + ATMEL_TC_CMR(tce.channel));
+ tce.cache.imr = readl(tce.base + ATMEL_TC_IMR(tce.channel));
+ tce.cache.rc = readl(tce.base + ATMEL_TC_RC(tce.channel));
+ tce.cache.clken = !!(readl(tce.base + ATMEL_TC_SR(tce.channel)) &
+ ATMEL_TC_CLKSTA);
+}
+
+static void tc_clkevt2_resume(struct clock_event_device *d)
+{
+ /* Restore registers for the channel, RA and RB are not used */
+ writel(tce.cache.cmr, tc.base + ATMEL_TC_CMR(tce.channel));
+ writel(tce.cache.rc, tc.base + ATMEL_TC_RC(tce.channel));
+ writel(0, tc.base + ATMEL_TC_RA(tce.channel));
+ writel(0, tc.base + ATMEL_TC_RB(tce.channel));
+ /* Disable all the interrupts */
+ writel(0xff, tc.base + ATMEL_TC_IDR(tce.channel));
+ /* Reenable interrupts that were enabled before suspending */
+ writel(tce.cache.imr, tc.base + ATMEL_TC_IER(tce.channel));
+
+ /* Start the clock if it was used */
+ if (tce.cache.clken)
+ writel(ATMEL_TC_CCR_CLKEN | ATMEL_TC_CCR_SWTRG,
+ tc.base + ATMEL_TC_CCR(tce.channel));
+}
+
+static int __init tc_clkevt_register(struct device_node *node,
+ struct regmap *regmap, void __iomem *base,
+ int channel, int irq, int bits)
+{
+ int ret;
+
+ tce.regmap = regmap;
+ tce.base = base;
+ tce.channel = channel;
+ tce.irq = irq;
+
+ tce.slow_clk = of_clk_get_by_name(node->parent, "slow_clk");
+ if (IS_ERR(tce.slow_clk))
+ return PTR_ERR(tce.slow_clk);
+
+ ret = clk_prepare_enable(tce.slow_clk);
+ if (ret)
+ return ret;
+
+ tce.clk = tcb_clk_get(node, tce.channel);
+ if (IS_ERR(tce.clk)) {
+ ret = PTR_ERR(tce.clk);
+ goto err_slow;
+ }
+
+ snprintf(tce.name, sizeof(tce.name), "%s:%d",
+ kbasename(node->parent->full_name), channel);
+ tce.clkevt.cpumask = cpumask_of(0);
+ tce.clkevt.name = tce.name;
+ tce.clkevt.set_next_event = tc_clkevt2_next_event,
+ tce.clkevt.set_state_shutdown = tc_clkevt2_shutdown,
+ tce.clkevt.set_state_periodic = tc_clkevt2_set_periodic,
+ tce.clkevt.set_state_oneshot = tc_clkevt2_set_oneshot,
+ tce.clkevt.suspend = tc_clkevt2_suspend,
+ tce.clkevt.resume = tc_clkevt2_resume,
+
+ /* try to enable clk to avoid future errors in mode change */
+ ret = clk_prepare_enable(tce.clk);
+ if (ret)
+ goto err_slow;
+ clk_disable(tce.clk);
+
+ clockevents_config_and_register(&tce.clkevt, 32768, 1, BIT(bits) - 1);
+
+ ret = request_irq(tce.irq, tc_clkevt2_irq, IRQF_TIMER | IRQF_SHARED,
+ tce.clkevt.name, &tce);
+ if (ret)
+ goto err_clk;
+
+ tce.registered = true;
+
+ return 0;
+
+err_clk:
+ clk_unprepare(tce.clk);
+err_slow:
+ clk_disable_unprepare(tce.slow_clk);
+
+ return ret;
+}
+
+/*
+ * Clocksource and clockevent using the same channel(s)
+ */
+static u64 tc_get_cycles(struct clocksource *cs)
+{
+ u32 lower, upper;
+
+ do {
+ upper = readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[1]));
+ lower = readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[0]));
+ } while (upper != readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[1])));
+
+ return (upper << 16) | lower;
+}
+
+static u64 tc_get_cycles32(struct clocksource *cs)
+{
+ return readl_relaxed(tc.base + ATMEL_TC_CV(tc.channels[0]));
+}
+
+static u64 notrace tc_sched_clock_read(void)
+{
+ return tc_get_cycles(&tc.clksrc);
+}
+
+static u64 notrace tc_sched_clock_read32(void)
+{
+ return tc_get_cycles32(&tc.clksrc);
+}
+
+static int tcb_clkevt_next_event(unsigned long delta,
+ struct clock_event_device *d)
+{
+ u32 old, next, cur;
+
+
+ old = readl(tc.base + ATMEL_TC_CV(tc.channels[0]));
+ next = old + delta;
+ writel(next, tc.base + ATMEL_TC_RC(tc.channels[0]));
+ cur = readl(tc.base + ATMEL_TC_CV(tc.channels[0]));
+
+ /* check whether the delta elapsed while setting the register */
+ if ((next < old && cur < old && cur > next) ||
+ (next > old && (cur < old || cur > next))) {
+ /*
+ * Clear the CPCS bit in the status register to avoid
+ * generating a spurious interrupt next time a valid
+ * timer event is configured.
+ */
+ old = readl(tc.base + ATMEL_TC_SR(tc.channels[0]));
+ return -ETIME;
+ }
+
+ writel(ATMEL_TC_CPCS, tc.base + ATMEL_TC_IER(tc.channels[0]));
+
+ return 0;
+}
+
+static irqreturn_t tc_clkevt_irq(int irq, void *handle)
+{
+ unsigned int sr;
+
+ sr = readl(tc.base + ATMEL_TC_SR(tc.channels[0]));
+ if (sr & ATMEL_TC_CPCS) {
+ tc.clkevt.event_handler(&tc.clkevt);
+ return IRQ_HANDLED;
+ }
+
+ return IRQ_NONE;
+}
+
+static int tcb_clkevt_oneshot(struct clock_event_device *dev)
+{
+ if (clockevent_state_oneshot(dev))
+ return 0;
+
+ /*
+ * Because both clockevent devices may share the same IRQ, we don't want
+ * the less likely one to stay requested
+ */
+ return request_irq(tc.irq, tc_clkevt_irq, IRQF_TIMER | IRQF_SHARED,
+ tc.name, &tc);
+}
+
+static int tcb_clkevt_shutdown(struct clock_event_device *dev)
+{
+ writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[0]));
+ if (tc.bits == 16)
+ writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[1]));
+
+ if (!clockevent_state_detached(dev))
+ free_irq(tc.irq, &tc);
+
+ return 0;
+}
+
+static void __init tcb_setup_dual_chan(struct atmel_tcb_clksrc *tc,
+ int mck_divisor_idx)
+{
+ /* first channel: waveform mode, input mclk/8, clock TIOA on overflow */
+ writel(mck_divisor_idx /* likely divide-by-8 */
+ | ATMEL_TC_CMR_WAVE
+ | ATMEL_TC_CMR_WAVESEL_UP /* free-run */
+ | ATMEL_TC_CMR_ACPA(SET) /* TIOA rises at 0 */
+ | ATMEL_TC_CMR_ACPC(CLEAR), /* (duty cycle 50%) */
+ tc->base + ATMEL_TC_CMR(tc->channels[0]));
+ writel(0x0000, tc->base + ATMEL_TC_RA(tc->channels[0]));
+ writel(0x8000, tc->base + ATMEL_TC_RC(tc->channels[0]));
+ writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[0])); /* no irqs */
+ writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[0]));
+
+ /* second channel: waveform mode, input TIOA */
+ writel(ATMEL_TC_CMR_XC(tc->channels[1]) /* input: TIOA */
+ | ATMEL_TC_CMR_WAVE
+ | ATMEL_TC_CMR_WAVESEL_UP, /* free-run */
+ tc->base + ATMEL_TC_CMR(tc->channels[1]));
+ writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[1])); /* no irqs */
+ writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[1]));
+
+ /* chain both channel, we assume the previous channel */
+ regmap_write(tc->regmap, ATMEL_TC_BMR,
+ ATMEL_TC_BMR_TCXC(1 + tc->channels[1], tc->channels[1]));
+ /* then reset all the timers */
+ regmap_write(tc->regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC);
+}
+
+static void __init tcb_setup_single_chan(struct atmel_tcb_clksrc *tc,
+ int mck_divisor_idx)
+{
+ /* channel 0: waveform mode, input mclk/8 */
+ writel(mck_divisor_idx /* likely divide-by-8 */
+ | ATMEL_TC_CMR_WAVE
+ | ATMEL_TC_CMR_WAVESEL_UP, /* free-run */
+ tc->base + ATMEL_TC_CMR(tc->channels[0]));
+ writel(0xff, tc->base + ATMEL_TC_IDR(tc->channels[0])); /* no irqs */
+ writel(ATMEL_TC_CCR_CLKEN, tc->base + ATMEL_TC_CCR(tc->channels[0]));
+
+ /* then reset all the timers */
+ regmap_write(tc->regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC);
+}
+
+static void tc_clksrc_suspend(struct clocksource *cs)
+{
+ int i;
+
+ for (i = 0; i < 1 + (tc.bits == 16); i++) {
+ tc.cache[i].cmr = readl(tc.base + ATMEL_TC_CMR(tc.channels[i]));
+ tc.cache[i].imr = readl(tc.base + ATMEL_TC_IMR(tc.channels[i]));
+ tc.cache[i].rc = readl(tc.base + ATMEL_TC_RC(tc.channels[i]));
+ tc.cache[i].clken = !!(readl(tc.base +
+ ATMEL_TC_SR(tc.channels[i])) &
+ ATMEL_TC_CLKSTA);
+ }
+
+ if (tc.bits == 16)
+ regmap_read(tc.regmap, ATMEL_TC_BMR, &tc.bmr_cache);
+}
+
+static void tc_clksrc_resume(struct clocksource *cs)
+{
+ int i;
+
+ for (i = 0; i < 1 + (tc.bits == 16); i++) {
+ /* Restore registers for the channel, RA and RB are not used */
+ writel(tc.cache[i].cmr, tc.base + ATMEL_TC_CMR(tc.channels[i]));
+ writel(tc.cache[i].rc, tc.base + ATMEL_TC_RC(tc.channels[i]));
+ writel(0, tc.base + ATMEL_TC_RA(tc.channels[i]));
+ writel(0, tc.base + ATMEL_TC_RB(tc.channels[i]));
+ /* Disable all the interrupts */
+ writel(0xff, tc.base + ATMEL_TC_IDR(tc.channels[i]));
+ /* Reenable interrupts that were enabled before suspending */
+ writel(tc.cache[i].imr, tc.base + ATMEL_TC_IER(tc.channels[i]));
+
+ /* Start the clock if it was used */
+ if (tc.cache[i].clken)
+ writel(ATMEL_TC_CCR_CLKEN, tc.base +
+ ATMEL_TC_CCR(tc.channels[i]));
+ }
+
+ /* in case of dual channel, chain channels */
+ if (tc.bits == 16)
+ regmap_write(tc.regmap, ATMEL_TC_BMR, tc.bmr_cache);
+ /* Finally, trigger all the channels*/
+ regmap_write(tc.regmap, ATMEL_TC_BCR, ATMEL_TC_BCR_SYNC);
+}
+
+static int __init tcb_clksrc_register(struct device_node *node,
+ struct regmap *regmap, void __iomem *base,
+ int channel, int channel1, int irq,
+ int bits)
+{
+ u32 rate, divided_rate = 0;
+ int best_divisor_idx = -1;
+ int i, err = -1;
+ u64 (*tc_sched_clock)(void);
+
+ tc.regmap = regmap;
+ tc.base = base;
+ tc.channels[0] = channel;
+ tc.channels[1] = channel1;
+ tc.irq = irq;
+ tc.bits = bits;
+
+ tc.clk[0] = tcb_clk_get(node, tc.channels[0]);
+ if (IS_ERR(tc.clk[0]))
+ return PTR_ERR(tc.clk[0]);
+ err = clk_prepare_enable(tc.clk[0]);
+ if (err) {
+ pr_debug("can't enable T0 clk\n");
+ goto err_clk;
+ }
+
+ /* How fast will we be counting? Pick something over 5 MHz. */
+ rate = (u32)clk_get_rate(tc.clk[0]);
+ for (i = 0; i < 5; i++) {
+ unsigned int divisor = atmel_tc_divisors[i];
+ unsigned int tmp;
+
+ if (!divisor)
+ continue;
+
+ tmp = rate / divisor;
+ pr_debug("TC: %u / %-3u [%d] --> %u\n", rate, divisor, i, tmp);
+ if (best_divisor_idx > 0) {
+ if (tmp < 5 * 1000 * 1000)
+ continue;
+ }
+ divided_rate = tmp;
+ best_divisor_idx = i;
+ }
+
+ if (tc.bits == 32) {
+ tc.clksrc.read = tc_get_cycles32;
+ tcb_setup_single_chan(&tc, best_divisor_idx);
+ tc_sched_clock = tc_sched_clock_read32;
+ snprintf(tc.name, sizeof(tc.name), "%s:%d",
+ kbasename(node->parent->full_name), tc.channels[0]);
+ } else {
+ tc.clk[1] = tcb_clk_get(node, tc.channels[1]);
+ if (IS_ERR(tc.clk[1]))
+ goto err_disable_t0;
+
+ err = clk_prepare_enable(tc.clk[1]);
+ if (err) {
+ pr_debug("can't enable T1 clk\n");
+ goto err_clk1;
+ }
+ tc.clksrc.read = tc_get_cycles,
+ tcb_setup_dual_chan(&tc, best_divisor_idx);
+ tc_sched_clock = tc_sched_clock_read;
+ snprintf(tc.name, sizeof(tc.name), "%s:%d,%d",
+ kbasename(node->parent->full_name), tc.channels[0],
+ tc.channels[1]);
+ }
+
+ pr_debug("%s at %d.%03d MHz\n", tc.name,
+ divided_rate / 1000000,
+ ((divided_rate + 500000) % 1000000) / 1000);
+
+ tc.clksrc.name = tc.name;
+ tc.clksrc.suspend = tc_clksrc_suspend;
+ tc.clksrc.resume = tc_clksrc_resume;
+
+ err = clocksource_register_hz(&tc.clksrc, divided_rate);
+ if (err)
+ goto err_disable_t1;
+
+ sched_clock_register(tc_sched_clock, 32, divided_rate);
+
+ tc.registered = true;
+
+ /* Set up and register clockevents */
+ tc.clkevt.name = tc.name;
+ tc.clkevt.cpumask = cpumask_of(0);
+ tc.clkevt.set_next_event = tcb_clkevt_next_event;
+ tc.clkevt.set_state_oneshot = tcb_clkevt_oneshot;
+ tc.clkevt.set_state_shutdown = tcb_clkevt_shutdown;
+ clockevents_config_and_register(&tc.clkevt, divided_rate, 1,
+ BIT(tc.bits) - 1);
+
+ return 0;
+
+err_disable_t1:
+ if (tc.bits == 16)
+ clk_disable_unprepare(tc.clk[1]);
+
+err_clk1:
+ if (tc.bits == 16)
+ clk_put(tc.clk[1]);
+
+err_disable_t0:
+ clk_disable_unprepare(tc.clk[0]);
+
+err_clk:
+ clk_put(tc.clk[0]);
+
+ pr_err("%s: unable to register clocksource/clockevent\n",
+ tc.clksrc.name);
+
+ return err;
+}
+
+static int __init tcb_clksrc_init(struct device_node *node)
+{
+ const struct of_device_id *match;
+ const struct atmel_tcb_info *tcb_info;
+ struct regmap *regmap;
+ void __iomem *tcb_base;
+ u32 channel;
+ int bits, irq, err, chan1 = -1;
+
+ if (tc.registered && tce.registered)
+ return -ENODEV;
+
+ /*
+ * The regmap has to be used to access registers that are shared
+ * between channels on the same TCB but we keep direct IO access for
+ * the counters to avoid the impact on performance
+ */
+ regmap = syscon_node_to_regmap(node->parent);
+ if (IS_ERR(regmap))
+ return PTR_ERR(regmap);
+
+ tcb_base = of_iomap(node->parent, 0);
+ if (!tcb_base) {
+ pr_err("%s +%d %s\n", __FILE__, __LINE__, __func__);
+ return -ENXIO;
+ }
+
+ match = of_match_node(atmel_tcb_dt_ids, node->parent);
+ tcb_info = match->data;
+ bits = tcb_info->bits;
+
+ err = of_property_read_u32_index(node, "reg", 0, &channel);
+ if (err)
+ return err;
+
+ irq = tcb_irq_get(node, channel);
+ if (irq < 0)
+ return irq;
+
+ if (tc.registered)
+ return tc_clkevt_register(node, regmap, tcb_base, channel, irq,
+ bits);
+
+ if (bits == 16) {
+ of_property_read_u32_index(node, "reg", 1, &chan1);
+ if (chan1 == -1) {
+ if (tce.registered) {
+ pr_err("%s: clocksource needs two channels\n",
+ node->parent->full_name);
+ return -EINVAL;
+ } else {
+ return tc_clkevt_register(node, regmap,
+ tcb_base, channel,
+ irq, bits);
+ }
+ }
+ }
+
+ return tcb_clksrc_register(node, regmap, tcb_base, channel, chan1, irq,
+ bits);
+}
+CLOCKSOURCE_OF_DECLARE(atmel_tcb_clksrc, "atmel,tcb-timer",
+ tcb_clksrc_init);

View File

@ -0,0 +1,32 @@
From: Scott Wood <swood@redhat.com>
Date: Sun, 28 Jan 2018 14:22:19 -0600
Subject: [PATCH 2/3] iommu/amd: Don't use dev_data in irte_ga_set_affinity()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit 01ee04badefd296eb7a4430497373be9b7b16783
search_dev_data() acquires a non-raw lock, which can't be done
from atomic context on PREEMPT_RT. There is no need to look at
dev_data because guest_mode should never be set if use_vapic is
not set.
Signed-off-by: Scott Wood <swood@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3877,10 +3877,8 @@ static void irte_ga_set_affinity(void *e
u8 vector, u32 dest_apicid)
{
struct irte_ga *irte = (struct irte_ga *) entry;
- struct iommu_dev_data *dev_data = search_dev_data(devid);
- if (!dev_data || !dev_data->use_vapic ||
- !irte->lo.fields_remap.guest_mode) {
+ if (!irte->lo.fields_remap.guest_mode) {
irte->hi.fields.vector = vector;
irte->lo.fields_remap.destination = dest_apicid;
modify_irte_ga(devid, index, irte, NULL);

View File

@ -0,0 +1,98 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Mar 2018 16:22:34 +0100
Subject: [PATCH 02/10] iommu/amd: Turn dev_data_list into a lock less list
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit 779da73273fc4c4c6f41579a95e4fb7880a1720e
alloc_dev_data() adds new items to dev_data_list and search_dev_data()
is searching for items in this list. Both protect the access to the list
with a spinlock.
There is no need to navigate forth and back within the list and there is
also no deleting of a specific item. This qualifies the list to become a
lock less list and as part of this, the spinlock can be removed.
With this change the ordering of those items within the list is changed:
before the change new items were added to the end of the list, now they
are added to the front. I don't think it matters but wanted to mention
it.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 28 ++++++++++------------------
drivers/iommu/amd_iommu_types.h | 2 +-
2 files changed, 11 insertions(+), 19 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -83,8 +83,7 @@
static DEFINE_RWLOCK(amd_iommu_devtable_lock);
/* List of all available dev_data structures */
-static LIST_HEAD(dev_data_list);
-static DEFINE_SPINLOCK(dev_data_list_lock);
+static LLIST_HEAD(dev_data_list);
LIST_HEAD(ioapic_map);
LIST_HEAD(hpet_map);
@@ -203,40 +202,33 @@ static struct dma_ops_domain* to_dma_ops
static struct iommu_dev_data *alloc_dev_data(u16 devid)
{
struct iommu_dev_data *dev_data;
- unsigned long flags;
dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
if (!dev_data)
return NULL;
dev_data->devid = devid;
-
- spin_lock_irqsave(&dev_data_list_lock, flags);
- list_add_tail(&dev_data->dev_data_list, &dev_data_list);
- spin_unlock_irqrestore(&dev_data_list_lock, flags);
-
ratelimit_default_init(&dev_data->rs);
+ llist_add(&dev_data->dev_data_list, &dev_data_list);
return dev_data;
}
static struct iommu_dev_data *search_dev_data(u16 devid)
{
struct iommu_dev_data *dev_data;
- unsigned long flags;
+ struct llist_node *node;
+
+ if (llist_empty(&dev_data_list))
+ return NULL;
- spin_lock_irqsave(&dev_data_list_lock, flags);
- list_for_each_entry(dev_data, &dev_data_list, dev_data_list) {
+ node = dev_data_list.first;
+ llist_for_each_entry(dev_data, node, dev_data_list) {
if (dev_data->devid == devid)
- goto out_unlock;
+ return dev_data;
}
- dev_data = NULL;
-
-out_unlock:
- spin_unlock_irqrestore(&dev_data_list_lock, flags);
-
- return dev_data;
+ return NULL;
}
static int __last_alias(struct pci_dev *pdev, u16 alias, void *data)
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -627,7 +627,7 @@ struct devid_map {
*/
struct iommu_dev_data {
struct list_head list; /* For domain->dev_list */
- struct list_head dev_data_list; /* For global dev_data_list */
+ struct llist_node dev_data_list; /* For global dev_data_list */
struct protection_domain *domain; /* Domain the device is bound to */
u16 devid; /* PCI Device ID */
u16 alias; /* Alias Device ID */

View File

@ -0,0 +1,46 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 23 Feb 2018 22:07:35 -0500
Subject: [PATCH 02/17] now lock_parent() can't run into killed dentry
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit 65d8eb5a8f5480756105173de147ef5d60163e2f
all remaining callers hold either a reference or ->i_lock
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 13 +++----------
1 file changed, 3 insertions(+), 10 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -593,8 +593,6 @@ static inline struct dentry *lock_parent
struct dentry *parent = dentry->d_parent;
if (IS_ROOT(dentry))
return NULL;
- if (unlikely(dentry->d_lockref.count < 0))
- return NULL;
if (likely(spin_trylock(&parent->d_lock)))
return parent;
rcu_read_lock();
@@ -614,16 +612,11 @@ static inline struct dentry *lock_parent
spin_unlock(&parent->d_lock);
goto again;
}
- if (parent != dentry) {
+ rcu_read_unlock();
+ if (parent != dentry)
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
- if (unlikely(dentry->d_lockref.count < 0)) {
- spin_unlock(&parent->d_lock);
- parent = NULL;
- }
- } else {
+ else
parent = NULL;
- }
- rcu_read_unlock();
return parent;
}

View File

@ -0,0 +1,30 @@
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 18 Apr 2018 12:51:40 +0200
Subject: [PATCH 3/6] clocksource/drivers: atmel-pit: make option silent
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
To conform with the other option, make the ATMEL_PIT option silent so it
can be selected from the platform
Tested-by: Alexander Dahl <ada@thorsis.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/clocksource/Kconfig | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -381,8 +381,11 @@ config ARMV7M_SYSTICK
This options enables support for the ARMv7M system timer unit
config ATMEL_PIT
+ bool "Microchip ARM Periodic Interval Timer (PIT)" if COMPILE_TEST
select TIMER_OF if OF
- def_bool SOC_AT91SAM9 || SOC_SAMA5
+ help
+ This enables build of clocksource and clockevent driver for
+ the integrated PIT in Microchip ARM SoCs.
config ATMEL_ST
bool "Atmel ST timer support" if COMPILE_TEST

View File

@ -0,0 +1,117 @@
From: Scott Wood <swood@redhat.com>
Date: Wed, 14 Feb 2018 17:36:28 -0600
Subject: [PATCH 3/3] iommu/amd: Avoid locking get_irq_table() from atomic
context
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit df42a04b15f19a842393dc98a84cbc52b1f8ed49
get_irq_table() previously acquired amd_iommu_devtable_lock which is not
a raw lock, and thus cannot be acquired from atomic context on
PREEMPT_RT. Many calls to modify_irte*() come from atomic context due to
the IRQ desc->lock, as does amd_iommu_update_ga() due to the preemption
disabling in vcpu_load/put().
The only difference between calling get_irq_table() and reading from
irq_lookup_table[] directly, other than the lock acquisition and
amd_iommu_rlookup_table[] check, is if the table entry is unpopulated,
which should never happen when looking up a devid that came from an
irq_2_irte struct, as get_irq_table() would have already been called on
that devid during irq_remapping_alloc().
The lock acquisition is not needed in these cases because entries in
irq_lookup_table[] never change once non-NULL -- nor would the
amd_iommu_devtable_lock usage in get_irq_table() provide meaningful
protection if they did, since it's released before using the looked up
table in the get_irq_table() caller.
Rename the old get_irq_table() to alloc_irq_table(), and create a new
lockless get_irq_table() to be used in non-allocating contexts that WARNs
if it doesn't find what it's looking for.
Signed-off-by: Scott Wood <swood@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 29 ++++++++++++++++++++++-------
1 file changed, 22 insertions(+), 7 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3595,7 +3595,22 @@ static void set_dte_irq_entry(u16 devid,
amd_iommu_dev_table[devid].data[2] = dte;
}
-static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic)
+static struct irq_remap_table *get_irq_table(u16 devid)
+{
+ struct irq_remap_table *table;
+
+ if (WARN_ONCE(!amd_iommu_rlookup_table[devid],
+ "%s: no iommu for devid %x\n", __func__, devid))
+ return NULL;
+
+ table = irq_lookup_table[devid];
+ if (WARN_ONCE(!table, "%s: no table for devid %x\n", __func__, devid))
+ return NULL;
+
+ return table;
+}
+
+static struct irq_remap_table *alloc_irq_table(u16 devid, bool ioapic)
{
struct irq_remap_table *table = NULL;
struct amd_iommu *iommu;
@@ -3682,7 +3697,7 @@ static int alloc_irq_index(u16 devid, in
if (!iommu)
return -ENODEV;
- table = get_irq_table(devid, false);
+ table = alloc_irq_table(devid, false);
if (!table)
return -ENODEV;
@@ -3733,7 +3748,7 @@ static int modify_irte_ga(u16 devid, int
if (iommu == NULL)
return -EINVAL;
- table = get_irq_table(devid, false);
+ table = get_irq_table(devid);
if (!table)
return -ENOMEM;
@@ -3766,7 +3781,7 @@ static int modify_irte(u16 devid, int in
if (iommu == NULL)
return -EINVAL;
- table = get_irq_table(devid, false);
+ table = get_irq_table(devid);
if (!table)
return -ENOMEM;
@@ -3790,7 +3805,7 @@ static void free_irte(u16 devid, int ind
if (iommu == NULL)
return;
- table = get_irq_table(devid, false);
+ table = get_irq_table(devid);
if (!table)
return;
@@ -4108,7 +4123,7 @@ static int irq_remapping_alloc(struct ir
return ret;
if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) {
- if (get_irq_table(devid, true))
+ if (alloc_irq_table(devid, true))
index = info->ioapic_pin;
else
ret = -ENOMEM;
@@ -4391,7 +4406,7 @@ int amd_iommu_update_ga(int cpu, bool is
if (!iommu)
return -ENODEV;
- irt = get_irq_table(devid, false);
+ irt = get_irq_table(devid);
if (!irt)
return -ENODEV;

View File

@ -0,0 +1,63 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Mar 2018 16:22:35 +0100
Subject: [PATCH 03/10] iommu/amd: Split domain id out of
amd_iommu_devtable_lock
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit 2bc00180890427dcc092b2f2b0d03c904bcade29
domain_id_alloc() and domain_id_free() is used for id management. Those
two function share a bitmap (amd_iommu_pd_alloc_bitmap) and set/clear
bits based on id allocation. There is no need to share this with
amd_iommu_devtable_lock, it can use its own lock for this operation.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -81,6 +81,7 @@
#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38))
static DEFINE_RWLOCK(amd_iommu_devtable_lock);
+static DEFINE_SPINLOCK(pd_bitmap_lock);
/* List of all available dev_data structures */
static LLIST_HEAD(dev_data_list);
@@ -1599,29 +1600,26 @@ static void del_domain_from_list(struct
static u16 domain_id_alloc(void)
{
- unsigned long flags;
int id;
- write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ spin_lock(&pd_bitmap_lock);
id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
BUG_ON(id == 0);
if (id > 0 && id < MAX_DOMAIN_ID)
__set_bit(id, amd_iommu_pd_alloc_bitmap);
else
id = 0;
- write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ spin_unlock(&pd_bitmap_lock);
return id;
}
static void domain_id_free(int id)
{
- unsigned long flags;
-
- write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ spin_lock(&pd_bitmap_lock);
if (id > 0 && id < MAX_DOMAIN_ID)
__clear_bit(id, amd_iommu_pd_alloc_bitmap);
- write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ spin_unlock(&pd_bitmap_lock);
}
#define DEFINE_FREE_PT_FN(LVL, FN) \

View File

@ -0,0 +1,50 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 23 Feb 2018 22:11:34 -0500
Subject: [PATCH 03/17] split the slow part of lock_parent() off
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit 8b987a46a1e0e93d4cb4babea06ea274e2e2b658
Turn the "trylock failed" part into uninlined __lock_parent().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -588,13 +588,9 @@ static void __dentry_kill(struct dentry
dentry_free(dentry);
}
-static inline struct dentry *lock_parent(struct dentry *dentry)
+static struct dentry *__lock_parent(struct dentry *dentry)
{
- struct dentry *parent = dentry->d_parent;
- if (IS_ROOT(dentry))
- return NULL;
- if (likely(spin_trylock(&parent->d_lock)))
- return parent;
+ struct dentry *parent;
rcu_read_lock();
spin_unlock(&dentry->d_lock);
again:
@@ -620,6 +616,16 @@ static inline struct dentry *lock_parent
return parent;
}
+static inline struct dentry *lock_parent(struct dentry *dentry)
+{
+ struct dentry *parent = dentry->d_parent;
+ if (IS_ROOT(dentry))
+ return NULL;
+ if (likely(spin_trylock(&parent->d_lock)))
+ return parent;
+ return __lock_parent(dentry);
+}
+
/*
* Finish off a dentry we've decided to kill.
* dentry->d_lock must be held, returns with it unlocked.

View File

@ -0,0 +1,49 @@
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 18 Apr 2018 12:51:41 +0200
Subject: [PATCH 4/6] ARM: at91: Implement clocksource selection
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Allow selecting and unselecting the PIT clocksource driver so it doesn't
have to be compile when unused.
Tested-by: Alexander Dahl <ada@thorsis.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/mach-at91/Kconfig | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
--- a/arch/arm/mach-at91/Kconfig
+++ b/arch/arm/mach-at91/Kconfig
@@ -106,6 +106,31 @@ config SOC_AT91SAM9
AT91SAM9X35
AT91SAM9XE
+comment "Clocksource driver selection"
+
+config ATMEL_CLOCKSOURCE_PIT
+ bool "Periodic Interval Timer (PIT) support"
+ depends on SOC_AT91SAM9 || SOC_SAMA5
+ default SOC_AT91SAM9 || SOC_SAMA5
+ select ATMEL_PIT
+ help
+ Select this to get a clocksource based on the Atmel Periodic Interval
+ Timer. It has a relatively low resolution and the TC Block clocksource
+ should be preferred.
+
+config ATMEL_CLOCKSOURCE_TCB
+ bool "Timer Counter Blocks (TCB) support"
+ depends on SOC_AT91RM9200 || SOC_AT91SAM9 || SOC_SAMA5 || COMPILE_TEST
+ default SOC_AT91RM9200 || SOC_AT91SAM9 || SOC_SAMA5
+ depends on !ATMEL_TCLIB
+ select ATMEL_ARM_TCB_CLKSRC
+ help
+ Select this to get a high precision clocksource based on a
+ TC block with a 5+ MHz base clock rate.
+ On platforms with 16-bit counters, two timer channels are combined
+ to make a single 32-bit timer.
+ It can also be used as a clock event device supporting oneshot mode.
+
config HAVE_AT91_UTMI
bool

View File

@ -0,0 +1,74 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 23 Feb 2018 21:07:24 -0500
Subject: [PATCH 04/17] dput(): consolidate the "do we need to retain it?" into
an inlined helper
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit a338579f2f3d6a15c78f1dc7de4c248b4183fcea
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 42 +++++++++++++++++++++++-------------------
1 file changed, 23 insertions(+), 19 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -626,6 +626,24 @@ static inline struct dentry *lock_parent
return __lock_parent(dentry);
}
+static inline bool retain_dentry(struct dentry *dentry)
+{
+ WARN_ON(d_in_lookup(dentry));
+
+ /* Unreachable? Get rid of it */
+ if (unlikely(d_unhashed(dentry)))
+ return false;
+
+ if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
+ return false;
+
+ if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
+ if (dentry->d_op->d_delete(dentry))
+ return false;
+ }
+ return true;
+}
+
/*
* Finish off a dentry we've decided to kill.
* dentry->d_lock must be held, returns with it unlocked.
@@ -804,27 +822,13 @@ void dput(struct dentry *dentry)
/* Slow case: now with the dentry lock held */
rcu_read_unlock();
- WARN_ON(d_in_lookup(dentry));
-
- /* Unreachable? Get rid of it */
- if (unlikely(d_unhashed(dentry)))
- goto kill_it;
-
- if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
- goto kill_it;
-
- if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
- if (dentry->d_op->d_delete(dentry))
- goto kill_it;
+ if (likely(retain_dentry(dentry))) {
+ dentry_lru_add(dentry);
+ dentry->d_lockref.count--;
+ spin_unlock(&dentry->d_lock);
+ return;
}
- dentry_lru_add(dentry);
-
- dentry->d_lockref.count--;
- spin_unlock(&dentry->d_lock);
- return;
-
-kill_it:
dentry = dentry_kill(dentry);
if (dentry) {
cond_resched();

View File

@ -0,0 +1,51 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Mar 2018 16:22:36 +0100
Subject: [PATCH 04/10] iommu/amd: Split irq_lookup_table out of the
amd_iommu_devtable_lock
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit ea6166f4b83e9cfba1c18f46a764d50045682fe5
The function get_irq_table() reads/writes irq_lookup_table while holding
the amd_iommu_devtable_lock. It also modifies
amd_iommu_dev_table[].data[2].
set_dte_entry() is using amd_iommu_dev_table[].data[0|1] (under the
domain->lock) so it should be okay. The access to the iommu is
serialized with its own (iommu's) lock.
So split out get_irq_table() out of amd_iommu_devtable_lock's lock.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -82,6 +82,7 @@
static DEFINE_RWLOCK(amd_iommu_devtable_lock);
static DEFINE_SPINLOCK(pd_bitmap_lock);
+static DEFINE_SPINLOCK(iommu_table_lock);
/* List of all available dev_data structures */
static LLIST_HEAD(dev_data_list);
@@ -3609,7 +3610,7 @@ static struct irq_remap_table *alloc_irq
unsigned long flags;
u16 alias;
- write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ spin_lock_irqsave(&iommu_table_lock, flags);
iommu = amd_iommu_rlookup_table[devid];
if (!iommu)
@@ -3674,7 +3675,7 @@ static struct irq_remap_table *alloc_irq
iommu_completion_wait(iommu);
out_unlock:
- write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ spin_unlock_irqrestore(&iommu_table_lock, flags);
return table;
}

View File

@ -0,0 +1,35 @@
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 18 Apr 2018 12:51:42 +0200
Subject: [PATCH 5/6] ARM: configs: at91: use new TCB timer driver
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Unselecting ATMEL_TCLIB switches the TCB timer driver from tcb_clksrc to
timer-atmel-tcb.
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/configs/at91_dt_defconfig | 1 -
arch/arm/configs/sama5_defconfig | 1 -
2 files changed, 2 deletions(-)
--- a/arch/arm/configs/at91_dt_defconfig
+++ b/arch/arm/configs/at91_dt_defconfig
@@ -64,7 +64,6 @@ CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=4
CONFIG_BLK_DEV_RAM_SIZE=8192
-CONFIG_ATMEL_TCLIB=y
CONFIG_ATMEL_SSC=y
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
--- a/arch/arm/configs/sama5_defconfig
+++ b/arch/arm/configs/sama5_defconfig
@@ -75,7 +75,6 @@ CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=4
CONFIG_BLK_DEV_RAM_SIZE=8192
-CONFIG_ATMEL_TCLIB=y
CONFIG_ATMEL_SSC=y
CONFIG_EEPROM_AT24=y
CONFIG_SCSI=y

View File

@ -0,0 +1,55 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 6 Mar 2018 21:37:31 -0500
Subject: [PATCH 05/17] handle move to LRU in retain_dentry()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit 62d9956cefe6ecc4b43a7fae37af78ba7adaceaa
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 19 ++++++-------------
1 file changed, 6 insertions(+), 13 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -440,17 +440,6 @@ static void d_lru_shrink_move(struct lis
list_lru_isolate_move(lru, &dentry->d_lru, list);
}
-/*
- * dentry_lru_(add|del)_list) must be called with d_lock held.
- */
-static void dentry_lru_add(struct dentry *dentry)
-{
- if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
- d_lru_add(dentry);
- else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED)))
- dentry->d_flags |= DCACHE_REFERENCED;
-}
-
/**
* d_drop - drop a dentry
* @dentry: dentry to drop
@@ -641,6 +630,12 @@ static inline bool retain_dentry(struct
if (dentry->d_op->d_delete(dentry))
return false;
}
+ /* retain; LRU fodder */
+ dentry->d_lockref.count--;
+ if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
+ d_lru_add(dentry);
+ else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED)))
+ dentry->d_flags |= DCACHE_REFERENCED;
return true;
}
@@ -823,8 +818,6 @@ void dput(struct dentry *dentry)
rcu_read_unlock();
if (likely(retain_dentry(dentry))) {
- dentry_lru_add(dentry);
- dentry->d_lockref.count--;
spin_unlock(&dentry->d_lock);
return;
}

View File

@ -0,0 +1,95 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Mar 2018 16:22:37 +0100
Subject: [PATCH 05/10] iommu/amd: Remove the special case from
alloc_irq_table()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit fde65dd3d3096e8f6ecc7bbe544eb91f4220772c
alloc_irq_table() has a special ioapic argument. If set then it will
pre-allocate / reserve the first 32 indexes. The argument is only once
true and it would make alloc_irq_table() a little simpler if we would
extract the special bits to the caller.
The caller of irq_remapping_alloc() is holding irq_domain_mutex so the
initialization of iommu->irte_ops->set_allocated() should not race
against other user.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 34 ++++++++++++++++++++--------------
1 file changed, 20 insertions(+), 14 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3603,7 +3603,7 @@ static struct irq_remap_table *get_irq_t
return table;
}
-static struct irq_remap_table *alloc_irq_table(u16 devid, bool ioapic)
+static struct irq_remap_table *alloc_irq_table(u16 devid)
{
struct irq_remap_table *table = NULL;
struct amd_iommu *iommu;
@@ -3637,10 +3637,6 @@ static struct irq_remap_table *alloc_irq
/* Initialize table spin-lock */
raw_spin_lock_init(&table->lock);
- if (ioapic)
- /* Keep the first 32 indexes free for IOAPIC interrupts */
- table->min_index = 32;
-
table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_ATOMIC);
if (!table->table) {
kfree(table);
@@ -3655,12 +3651,6 @@ static struct irq_remap_table *alloc_irq
memset(table->table, 0,
(MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
- if (ioapic) {
- int i;
-
- for (i = 0; i < 32; ++i)
- iommu->irte_ops->set_allocated(table, i);
- }
irq_lookup_table[devid] = table;
set_dte_irq_entry(devid, table);
@@ -3690,7 +3680,7 @@ static int alloc_irq_index(u16 devid, in
if (!iommu)
return -ENODEV;
- table = alloc_irq_table(devid, false);
+ table = alloc_irq_table(devid);
if (!table)
return -ENODEV;
@@ -4116,10 +4106,26 @@ static int irq_remapping_alloc(struct ir
return ret;
if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) {
- if (alloc_irq_table(devid, true))
+ struct irq_remap_table *table;
+ struct amd_iommu *iommu;
+
+ table = alloc_irq_table(devid);
+ if (table) {
+ if (!table->min_index) {
+ /*
+ * Keep the first 32 indexes free for IOAPIC
+ * interrupts.
+ */
+ table->min_index = 32;
+ iommu = amd_iommu_rlookup_table[devid];
+ for (i = 0; i < 32; ++i)
+ iommu->irte_ops->set_allocated(table, i);
+ }
+ WARN_ON(table->min_index != 32);
index = info->ioapic_pin;
- else
+ } else {
ret = -ENOMEM;
+ }
} else {
bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI);

View File

@ -0,0 +1,36 @@
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 18 Apr 2018 12:51:43 +0200
Subject: [PATCH 6/6] ARM: configs: at91: unselect PIT
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
The PIT is not required anymore to successfully boot and may actually harm
in case preempt-rt is used because the PIT interrupt is shared.
Disable it so the TCB clocksource is used.
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/configs/at91_dt_defconfig | 1 +
arch/arm/configs/sama5_defconfig | 1 +
2 files changed, 2 insertions(+)
--- a/arch/arm/configs/at91_dt_defconfig
+++ b/arch/arm/configs/at91_dt_defconfig
@@ -19,6 +19,7 @@ CONFIG_ARCH_MULTI_V5=y
CONFIG_ARCH_AT91=y
CONFIG_SOC_AT91RM9200=y
CONFIG_SOC_AT91SAM9=y
+# CONFIG_ATMEL_CLOCKSOURCE_PIT is not set
CONFIG_AEABI=y
CONFIG_UACCESS_WITH_MEMCPY=y
CONFIG_ZBOOT_ROM_TEXT=0x0
--- a/arch/arm/configs/sama5_defconfig
+++ b/arch/arm/configs/sama5_defconfig
@@ -20,6 +20,7 @@ CONFIG_ARCH_AT91=y
CONFIG_SOC_SAMA5D2=y
CONFIG_SOC_SAMA5D3=y
CONFIG_SOC_SAMA5D4=y
+# CONFIG_ATMEL_CLOCKSOURCE_PIT is not set
CONFIG_AEABI=y
CONFIG_UACCESS_WITH_MEMCPY=y
CONFIG_ZBOOT_ROM_TEXT=0x0

View File

@ -0,0 +1,73 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 23 Feb 2018 21:25:42 -0500
Subject: [PATCH 06/17] get rid of trylock loop around dentry_kill()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit f657a666fd1b1b9fe59963943c74c245ae66f4cc
In case when trylock in there fails, deal with it directly in
dentry_kill(). Note that in cases when we drop and retake
->d_lock, we need to recheck whether to retain the dentry.
Another thing is that dropping/retaking ->d_lock might have
ended up with negative dentry turning into positive; that,
of course, can happen only once...
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 34 +++++++++++++++++++++++++++-------
1 file changed, 27 insertions(+), 7 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -651,23 +651,43 @@ static struct dentry *dentry_kill(struct
struct dentry *parent = NULL;
if (inode && unlikely(!spin_trylock(&inode->i_lock)))
- goto failed;
+ goto slow_positive;
if (!IS_ROOT(dentry)) {
parent = dentry->d_parent;
if (unlikely(!spin_trylock(&parent->d_lock))) {
- if (inode)
- spin_unlock(&inode->i_lock);
- goto failed;
+ parent = __lock_parent(dentry);
+ if (likely(inode || !dentry->d_inode))
+ goto got_locks;
+ /* negative that became positive */
+ if (parent)
+ spin_unlock(&parent->d_lock);
+ inode = dentry->d_inode;
+ goto slow_positive;
}
}
-
__dentry_kill(dentry);
return parent;
-failed:
+slow_positive:
+ spin_unlock(&dentry->d_lock);
+ spin_lock(&inode->i_lock);
+ spin_lock(&dentry->d_lock);
+ parent = lock_parent(dentry);
+got_locks:
+ if (unlikely(dentry->d_lockref.count != 1)) {
+ dentry->d_lockref.count--;
+ } else if (likely(!retain_dentry(dentry))) {
+ __dentry_kill(dentry);
+ return parent;
+ }
+ /* we are keeping it, after all */
+ if (inode)
+ spin_unlock(&inode->i_lock);
+ if (parent)
+ spin_unlock(&parent->d_lock);
spin_unlock(&dentry->d_lock);
- return dentry; /* try again with same dentry */
+ return NULL;
}
/*

View File

@ -0,0 +1,53 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Mar 2018 16:22:38 +0100
Subject: [PATCH 06/10] iommu/amd: Use `table' instead `irt' as variable name
in amd_iommu_update_ga()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit 4fde541c9dc114c5b448ad34b0286fe8b7c550f1
The variable of type struct irq_remap_table is always named `table'
except in amd_iommu_update_ga() where it is called `irt'. Make it
consistent and name it also `table'.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4391,7 +4391,7 @@ int amd_iommu_update_ga(int cpu, bool is
{
unsigned long flags;
struct amd_iommu *iommu;
- struct irq_remap_table *irt;
+ struct irq_remap_table *table;
struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
int devid = ir_data->irq_2_irte.devid;
struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
@@ -4405,11 +4405,11 @@ int amd_iommu_update_ga(int cpu, bool is
if (!iommu)
return -ENODEV;
- irt = get_irq_table(devid);
- if (!irt)
+ table = get_irq_table(devid);
+ if (!table)
return -ENODEV;
- raw_spin_lock_irqsave(&irt->lock, flags);
+ raw_spin_lock_irqsave(&table->lock, flags);
if (ref->lo.fields_vapic.guest_mode) {
if (cpu >= 0)
@@ -4418,7 +4418,7 @@ int amd_iommu_update_ga(int cpu, bool is
barrier();
}
- raw_spin_unlock_irqrestore(&irt->lock, flags);
+ raw_spin_unlock_irqrestore(&table->lock, flags);
iommu_flush_irt(iommu, devid);
iommu_completion_wait(iommu);

View File

@ -0,0 +1,150 @@
From: John Ogness <john.ogness@linutronix.de>
Date: Fri, 23 Feb 2018 00:50:24 +0100
Subject: [PATCH 07/17] fs/dcache: Avoid a try_lock loop in
shrink_dentry_list()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit 8f04da2adbdffed8dc4b2feb00ec3b3d84683885
shrink_dentry_list() holds dentry->d_lock and needs to acquire
dentry->d_inode->i_lock. This cannot be done with a spin_lock()
operation because it's the reverse of the regular lock order.
To avoid ABBA deadlocks it is done with a trylock loop.
Trylock loops are problematic in two scenarios:
1) PREEMPT_RT converts spinlocks to 'sleeping' spinlocks, which are
preemptible. As a consequence the i_lock holder can be preempted
by a higher priority task. If that task executes the trylock loop
it will do so forever and live lock.
2) In virtual machines trylock loops are problematic as well. The
VCPU on which the i_lock holder runs can be scheduled out and a
task on a different VCPU can loop for a whole time slice. In the
worst case this can lead to starvation. Commits 47be61845c77
("fs/dcache.c: avoid soft-lockup in dput()") and 046b961b45f9
("shrink_dentry_list(): take parent's d_lock earlier") are
addressing exactly those symptoms.
Avoid the trylock loop by using dentry_kill(). When pruning ancestors,
the same code applies that is used to kill a dentry in dput(). This
also has the benefit that the locking order is now the same. First
the inode is locked, then the parent.
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 41 ++++++++++-------------------------------
1 file changed, 10 insertions(+), 31 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -992,9 +992,11 @@ EXPORT_SYMBOL(d_prune_aliases);
/*
* Lock a dentry from shrink list.
+ * Called under rcu_read_lock() and dentry->d_lock; the former
+ * guarantees that nothing we access will be freed under us.
* Note that dentry is *not* protected from concurrent dentry_kill(),
- * d_delete(), etc. It is protected from freeing (by the fact of
- * being on a shrink list), but everything else is fair game.
+ * d_delete(), etc.
+ *
* Return false if dentry has been disrupted or grabbed, leaving
* the caller to kick it off-list. Otherwise, return true and have
* that dentry's inode and parent both locked.
@@ -1009,7 +1011,6 @@ static bool shrink_lock_dentry(struct de
inode = dentry->d_inode;
if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
- rcu_read_lock(); /* to protect inode */
spin_unlock(&dentry->d_lock);
spin_lock(&inode->i_lock);
spin_lock(&dentry->d_lock);
@@ -1018,16 +1019,13 @@ static bool shrink_lock_dentry(struct de
/* changed inode means that somebody had grabbed it */
if (unlikely(inode != dentry->d_inode))
goto out;
- rcu_read_unlock();
}
parent = dentry->d_parent;
if (IS_ROOT(dentry) || likely(spin_trylock(&parent->d_lock)))
return true;
- rcu_read_lock(); /* to protect parent */
spin_unlock(&dentry->d_lock);
- parent = READ_ONCE(dentry->d_parent);
spin_lock(&parent->d_lock);
if (unlikely(parent != dentry->d_parent)) {
spin_unlock(&parent->d_lock);
@@ -1035,15 +1033,12 @@ static bool shrink_lock_dentry(struct de
goto out;
}
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
- if (likely(!dentry->d_lockref.count)) {
- rcu_read_unlock();
+ if (likely(!dentry->d_lockref.count))
return true;
- }
spin_unlock(&parent->d_lock);
out:
if (inode)
spin_unlock(&inode->i_lock);
- rcu_read_unlock();
return false;
}
@@ -1051,12 +1046,13 @@ static void shrink_dentry_list(struct li
{
while (!list_empty(list)) {
struct dentry *dentry, *parent;
- struct inode *inode;
dentry = list_entry(list->prev, struct dentry, d_lru);
spin_lock(&dentry->d_lock);
+ rcu_read_lock();
if (!shrink_lock_dentry(dentry)) {
bool can_free = false;
+ rcu_read_unlock();
d_shrink_del(dentry);
if (dentry->d_lockref.count < 0)
can_free = dentry->d_flags & DCACHE_MAY_FREE;
@@ -1065,6 +1061,7 @@ static void shrink_dentry_list(struct li
dentry_free(dentry);
continue;
}
+ rcu_read_unlock();
d_shrink_del(dentry);
parent = dentry->d_parent;
__dentry_kill(dentry);
@@ -1077,26 +1074,8 @@ static void shrink_dentry_list(struct li
* fragmentation.
*/
dentry = parent;
- while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) {
- parent = lock_parent(dentry);
- if (dentry->d_lockref.count != 1) {
- dentry->d_lockref.count--;
- spin_unlock(&dentry->d_lock);
- if (parent)
- spin_unlock(&parent->d_lock);
- break;
- }
- inode = dentry->d_inode; /* can't be NULL */
- if (unlikely(!spin_trylock(&inode->i_lock))) {
- spin_unlock(&dentry->d_lock);
- if (parent)
- spin_unlock(&parent->d_lock);
- cpu_relax();
- continue;
- }
- __dentry_kill(dentry);
- dentry = parent;
- }
+ while (dentry && !lockref_put_or_lock(&dentry->d_lockref))
+ dentry = dentry_kill(dentry);
}
}

View File

@ -0,0 +1,67 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Mar 2018 16:22:39 +0100
Subject: [PATCH 07/10] iommu/amd: Factor out setting the remap table for a
devid
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit 2fcc1e8ac4a8514c64f946178fc36c2e30e56a41
Setting the IRQ remap table for a specific devid (or its alias devid)
includes three steps. Those three steps are always repeated each time
this is done.
Introduce a new helper function, move those steps there and use that
function instead. The compiler can still decide if it is worth to
inline.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 23 ++++++++++++-----------
1 file changed, 12 insertions(+), 11 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3603,6 +3603,14 @@ static struct irq_remap_table *get_irq_t
return table;
}
+static void set_remap_table_entry(struct amd_iommu *iommu, u16 devid,
+ struct irq_remap_table *table)
+{
+ irq_lookup_table[devid] = table;
+ set_dte_irq_entry(devid, table);
+ iommu_flush_dte(iommu, devid);
+}
+
static struct irq_remap_table *alloc_irq_table(u16 devid)
{
struct irq_remap_table *table = NULL;
@@ -3623,9 +3631,7 @@ static struct irq_remap_table *alloc_irq
alias = amd_iommu_alias_table[devid];
table = irq_lookup_table[alias];
if (table) {
- irq_lookup_table[devid] = table;
- set_dte_irq_entry(devid, table);
- iommu_flush_dte(iommu, devid);
+ set_remap_table_entry(iommu, devid, table);
goto out;
}
@@ -3652,14 +3658,9 @@ static struct irq_remap_table *alloc_irq
(MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
- irq_lookup_table[devid] = table;
- set_dte_irq_entry(devid, table);
- iommu_flush_dte(iommu, devid);
- if (devid != alias) {
- irq_lookup_table[alias] = table;
- set_dte_irq_entry(alias, table);
- iommu_flush_dte(iommu, alias);
- }
+ set_remap_table_entry(iommu, devid, table);
+ if (devid != alias)
+ set_remap_table_entry(iommu, alias, table);
out:
iommu_completion_wait(iommu);

View File

@ -0,0 +1,33 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 25 Feb 2018 02:47:29 -0500
Subject: [PATCH 08/17] dcache.c: trim includes
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit 43986d63b60fd0152d9038ee3f0f9294efa8c983
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 3 ---
1 file changed, 3 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -25,17 +25,14 @@
#include <linux/cache.h>
#include <linux/export.h>
#include <linux/mount.h>
-#include <linux/file.h>
#include <linux/uaccess.h>
#include <linux/security.h>
#include <linux/seqlock.h>
-#include <linux/swap.h>
#include <linux/bootmem.h>
#include <linux/fs_struct.h>
#include <linux/bit_spinlock.h>
#include <linux/rculist_bl.h>
#include <linux/prefetch.h>
-#include <linux/ratelimit.h>
#include <linux/list_lru.h>
#include "internal.h"
#include "mount.h"

View File

@ -0,0 +1,132 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Mar 2018 16:22:40 +0100
Subject: [PATCH 08/10] iommu/amd: Drop the lock while allocating new irq remap
table
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit 993ca6e063a69a0c65ca42ed449b6bc1b3844151
The irq_remap_table is allocated while the iommu_table_lock is held with
interrupts disabled.
>From looking at the call sites, all callers are in the early device
initialisation (apic_bsp_setup(), pci_enable_device(),
pci_enable_msi()) so make sense to drop the lock which also enables
interrupts and try to allocate that memory with GFP_KERNEL instead
GFP_ATOMIC.
Since during the allocation the iommu_table_lock is dropped, we need to
recheck if table exists after the lock has been reacquired. I *think*
that it is impossible that the "devid" entry appears in irq_lookup_table
while the lock is dropped since the same device can only be probed once.
However I check for both cases, just to be sure.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 63 ++++++++++++++++++++++++++++++++--------------
1 file changed, 45 insertions(+), 18 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3603,6 +3603,30 @@ static struct irq_remap_table *get_irq_t
return table;
}
+static struct irq_remap_table *__alloc_irq_table(void)
+{
+ struct irq_remap_table *table;
+
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
+ if (!table)
+ return NULL;
+
+ table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_KERNEL);
+ if (!table->table) {
+ kfree(table);
+ return NULL;
+ }
+ raw_spin_lock_init(&table->lock);
+
+ if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
+ memset(table->table, 0,
+ MAX_IRQS_PER_TABLE * sizeof(u32));
+ else
+ memset(table->table, 0,
+ (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
+ return table;
+}
+
static void set_remap_table_entry(struct amd_iommu *iommu, u16 devid,
struct irq_remap_table *table)
{
@@ -3614,6 +3638,7 @@ static void set_remap_table_entry(struct
static struct irq_remap_table *alloc_irq_table(u16 devid)
{
struct irq_remap_table *table = NULL;
+ struct irq_remap_table *new_table = NULL;
struct amd_iommu *iommu;
unsigned long flags;
u16 alias;
@@ -3632,42 +3657,44 @@ static struct irq_remap_table *alloc_irq
table = irq_lookup_table[alias];
if (table) {
set_remap_table_entry(iommu, devid, table);
- goto out;
+ goto out_wait;
}
+ spin_unlock_irqrestore(&iommu_table_lock, flags);
/* Nothing there yet, allocate new irq remapping table */
- table = kzalloc(sizeof(*table), GFP_ATOMIC);
- if (!table)
- goto out_unlock;
+ new_table = __alloc_irq_table();
+ if (!new_table)
+ return NULL;
- /* Initialize table spin-lock */
- raw_spin_lock_init(&table->lock);
+ spin_lock_irqsave(&iommu_table_lock, flags);
- table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_ATOMIC);
- if (!table->table) {
- kfree(table);
- table = NULL;
+ table = irq_lookup_table[devid];
+ if (table)
goto out_unlock;
- }
- if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
- memset(table->table, 0,
- MAX_IRQS_PER_TABLE * sizeof(u32));
- else
- memset(table->table, 0,
- (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
+ table = irq_lookup_table[alias];
+ if (table) {
+ set_remap_table_entry(iommu, devid, table);
+ goto out_wait;
+ }
+ table = new_table;
+ new_table = NULL;
set_remap_table_entry(iommu, devid, table);
if (devid != alias)
set_remap_table_entry(iommu, alias, table);
-out:
+out_wait:
iommu_completion_wait(iommu);
out_unlock:
spin_unlock_irqrestore(&iommu_table_lock, flags);
+ if (new_table) {
+ kmem_cache_free(amd_iommu_irq_cache, new_table->table);
+ kfree(new_table);
+ }
return table;
}

View File

@ -0,0 +1,74 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Mar 2018 16:22:41 +0100
Subject: [PATCH 09/10] iommu/amd: Make amd_iommu_devtable_lock a spin_lock
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit 2cd1083d79a0a8c223af430ca97884c28a1e2fc0
Before commit 0bb6e243d7fb ("iommu/amd: Support IOMMU_DOMAIN_DMA type
allocation") amd_iommu_devtable_lock had a read_lock() user but now
there are none. In fact, after the mentioned commit we had only
write_lock() user of the lock. Since there is no reason to keep it as
writer lock, change its type to a spin_lock.
I *think* that we might even be able to remove the lock because all its
current user seem to have their own protection.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -80,7 +80,7 @@
*/
#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38))
-static DEFINE_RWLOCK(amd_iommu_devtable_lock);
+static DEFINE_SPINLOCK(amd_iommu_devtable_lock);
static DEFINE_SPINLOCK(pd_bitmap_lock);
static DEFINE_SPINLOCK(iommu_table_lock);
@@ -2096,9 +2096,9 @@ static int attach_device(struct device *
}
skip_ats_check:
- write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
ret = __attach_device(dev_data, domain);
- write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
/*
* We might boot into a crash-kernel here. The crashed kernel
@@ -2148,9 +2148,9 @@ static void detach_device(struct device
domain = dev_data->domain;
/* lock device table */
- write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
__detach_device(dev_data);
- write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
if (!dev_is_pci(dev))
return;
@@ -2813,7 +2813,7 @@ static void cleanup_domain(struct protec
struct iommu_dev_data *entry;
unsigned long flags;
- write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
while (!list_empty(&domain->dev_list)) {
entry = list_first_entry(&domain->dev_list,
@@ -2821,7 +2821,7 @@ static void cleanup_domain(struct protec
__detach_device(entry);
}
- write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
}
static void protection_domain_free(struct protection_domain *domain)

View File

@ -0,0 +1,997 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 5 Mar 2018 19:15:50 -0500
Subject: [PATCH 09/17] split d_path() and friends into a separate file
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit 7a5cf791a747640adb2a1b5e3838321b26953a23
Those parts of fs/dcache.c are pretty much self-contained.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/Makefile | 2
fs/d_path.c | 470 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
fs/dcache.c | 467 -----------------------------------------------------------
3 files changed, 472 insertions(+), 467 deletions(-)
create mode 100644 fs/d_path.c
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.
ioctl.o readdir.o select.o dcache.o inode.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
- pnode.o splice.o sync.o utimes.o \
+ pnode.o splice.o sync.o utimes.o d_path.o \
stack.o fs_struct.o statfs.o fs_pin.o nsfs.o
ifeq ($(CONFIG_BLOCK),y)
--- /dev/null
+++ b/fs/d_path.c
@@ -0,0 +1,470 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/syscalls.h>
+#include <linux/export.h>
+#include <linux/uaccess.h>
+#include <linux/fs_struct.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/prefetch.h>
+#include "mount.h"
+
+static int prepend(char **buffer, int *buflen, const char *str, int namelen)
+{
+ *buflen -= namelen;
+ if (*buflen < 0)
+ return -ENAMETOOLONG;
+ *buffer -= namelen;
+ memcpy(*buffer, str, namelen);
+ return 0;
+}
+
+/**
+ * prepend_name - prepend a pathname in front of current buffer pointer
+ * @buffer: buffer pointer
+ * @buflen: allocated length of the buffer
+ * @name: name string and length qstr structure
+ *
+ * With RCU path tracing, it may race with d_move(). Use READ_ONCE() to
+ * make sure that either the old or the new name pointer and length are
+ * fetched. However, there may be mismatch between length and pointer.
+ * The length cannot be trusted, we need to copy it byte-by-byte until
+ * the length is reached or a null byte is found. It also prepends "/" at
+ * the beginning of the name. The sequence number check at the caller will
+ * retry it again when a d_move() does happen. So any garbage in the buffer
+ * due to mismatched pointer and length will be discarded.
+ *
+ * Load acquire is needed to make sure that we see that terminating NUL.
+ */
+static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
+{
+ const char *dname = smp_load_acquire(&name->name); /* ^^^ */
+ u32 dlen = READ_ONCE(name->len);
+ char *p;
+
+ *buflen -= dlen + 1;
+ if (*buflen < 0)
+ return -ENAMETOOLONG;
+ p = *buffer -= dlen + 1;
+ *p++ = '/';
+ while (dlen--) {
+ char c = *dname++;
+ if (!c)
+ break;
+ *p++ = c;
+ }
+ return 0;
+}
+
+/**
+ * prepend_path - Prepend path string to a buffer
+ * @path: the dentry/vfsmount to report
+ * @root: root vfsmnt/dentry
+ * @buffer: pointer to the end of the buffer
+ * @buflen: pointer to buffer length
+ *
+ * The function will first try to write out the pathname without taking any
+ * lock other than the RCU read lock to make sure that dentries won't go away.
+ * It only checks the sequence number of the global rename_lock as any change
+ * in the dentry's d_seq will be preceded by changes in the rename_lock
+ * sequence number. If the sequence number had been changed, it will restart
+ * the whole pathname back-tracing sequence again by taking the rename_lock.
+ * In this case, there is no need to take the RCU read lock as the recursive
+ * parent pointer references will keep the dentry chain alive as long as no
+ * rename operation is performed.
+ */
+static int prepend_path(const struct path *path,
+ const struct path *root,
+ char **buffer, int *buflen)
+{
+ struct dentry *dentry;
+ struct vfsmount *vfsmnt;
+ struct mount *mnt;
+ int error = 0;
+ unsigned seq, m_seq = 0;
+ char *bptr;
+ int blen;
+
+ rcu_read_lock();
+restart_mnt:
+ read_seqbegin_or_lock(&mount_lock, &m_seq);
+ seq = 0;
+ rcu_read_lock();
+restart:
+ bptr = *buffer;
+ blen = *buflen;
+ error = 0;
+ dentry = path->dentry;
+ vfsmnt = path->mnt;
+ mnt = real_mount(vfsmnt);
+ read_seqbegin_or_lock(&rename_lock, &seq);
+ while (dentry != root->dentry || vfsmnt != root->mnt) {
+ struct dentry * parent;
+
+ if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
+ struct mount *parent = READ_ONCE(mnt->mnt_parent);
+ /* Escaped? */
+ if (dentry != vfsmnt->mnt_root) {
+ bptr = *buffer;
+ blen = *buflen;
+ error = 3;
+ break;
+ }
+ /* Global root? */
+ if (mnt != parent) {
+ dentry = READ_ONCE(mnt->mnt_mountpoint);
+ mnt = parent;
+ vfsmnt = &mnt->mnt;
+ continue;
+ }
+ if (!error)
+ error = is_mounted(vfsmnt) ? 1 : 2;
+ break;
+ }
+ parent = dentry->d_parent;
+ prefetch(parent);
+ error = prepend_name(&bptr, &blen, &dentry->d_name);
+ if (error)
+ break;
+
+ dentry = parent;
+ }
+ if (!(seq & 1))
+ rcu_read_unlock();
+ if (need_seqretry(&rename_lock, seq)) {
+ seq = 1;
+ goto restart;
+ }
+ done_seqretry(&rename_lock, seq);
+
+ if (!(m_seq & 1))
+ rcu_read_unlock();
+ if (need_seqretry(&mount_lock, m_seq)) {
+ m_seq = 1;
+ goto restart_mnt;
+ }
+ done_seqretry(&mount_lock, m_seq);
+
+ if (error >= 0 && bptr == *buffer) {
+ if (--blen < 0)
+ error = -ENAMETOOLONG;
+ else
+ *--bptr = '/';
+ }
+ *buffer = bptr;
+ *buflen = blen;
+ return error;
+}
+
+/**
+ * __d_path - return the path of a dentry
+ * @path: the dentry/vfsmount to report
+ * @root: root vfsmnt/dentry
+ * @buf: buffer to return value in
+ * @buflen: buffer length
+ *
+ * Convert a dentry into an ASCII path name.
+ *
+ * Returns a pointer into the buffer or an error code if the
+ * path was too long.
+ *
+ * "buflen" should be positive.
+ *
+ * If the path is not reachable from the supplied root, return %NULL.
+ */
+char *__d_path(const struct path *path,
+ const struct path *root,
+ char *buf, int buflen)
+{
+ char *res = buf + buflen;
+ int error;
+
+ prepend(&res, &buflen, "\0", 1);
+ error = prepend_path(path, root, &res, &buflen);
+
+ if (error < 0)
+ return ERR_PTR(error);
+ if (error > 0)
+ return NULL;
+ return res;
+}
+
+char *d_absolute_path(const struct path *path,
+ char *buf, int buflen)
+{
+ struct path root = {};
+ char *res = buf + buflen;
+ int error;
+
+ prepend(&res, &buflen, "\0", 1);
+ error = prepend_path(path, &root, &res, &buflen);
+
+ if (error > 1)
+ error = -EINVAL;
+ if (error < 0)
+ return ERR_PTR(error);
+ return res;
+}
+
+/*
+ * same as __d_path but appends "(deleted)" for unlinked files.
+ */
+static int path_with_deleted(const struct path *path,
+ const struct path *root,
+ char **buf, int *buflen)
+{
+ prepend(buf, buflen, "\0", 1);
+ if (d_unlinked(path->dentry)) {
+ int error = prepend(buf, buflen, " (deleted)", 10);
+ if (error)
+ return error;
+ }
+
+ return prepend_path(path, root, buf, buflen);
+}
+
+static int prepend_unreachable(char **buffer, int *buflen)
+{
+ return prepend(buffer, buflen, "(unreachable)", 13);
+}
+
+static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
+{
+ unsigned seq;
+
+ do {
+ seq = read_seqcount_begin(&fs->seq);
+ *root = fs->root;
+ } while (read_seqcount_retry(&fs->seq, seq));
+}
+
+/**
+ * d_path - return the path of a dentry
+ * @path: path to report
+ * @buf: buffer to return value in
+ * @buflen: buffer length
+ *
+ * Convert a dentry into an ASCII path name. If the entry has been deleted
+ * the string " (deleted)" is appended. Note that this is ambiguous.
+ *
+ * Returns a pointer into the buffer or an error code if the path was
+ * too long. Note: Callers should use the returned pointer, not the passed
+ * in buffer, to use the name! The implementation often starts at an offset
+ * into the buffer, and may leave 0 bytes at the start.
+ *
+ * "buflen" should be positive.
+ */
+char *d_path(const struct path *path, char *buf, int buflen)
+{
+ char *res = buf + buflen;
+ struct path root;
+ int error;
+
+ /*
+ * We have various synthetic filesystems that never get mounted. On
+ * these filesystems dentries are never used for lookup purposes, and
+ * thus don't need to be hashed. They also don't need a name until a
+ * user wants to identify the object in /proc/pid/fd/. The little hack
+ * below allows us to generate a name for these objects on demand:
+ *
+ * Some pseudo inodes are mountable. When they are mounted
+ * path->dentry == path->mnt->mnt_root. In that case don't call d_dname
+ * and instead have d_path return the mounted path.
+ */
+ if (path->dentry->d_op && path->dentry->d_op->d_dname &&
+ (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root))
+ return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
+
+ rcu_read_lock();
+ get_fs_root_rcu(current->fs, &root);
+ error = path_with_deleted(path, &root, &res, &buflen);
+ rcu_read_unlock();
+
+ if (error < 0)
+ res = ERR_PTR(error);
+ return res;
+}
+EXPORT_SYMBOL(d_path);
+
+/*
+ * Helper function for dentry_operations.d_dname() members
+ */
+char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
+ const char *fmt, ...)
+{
+ va_list args;
+ char temp[64];
+ int sz;
+
+ va_start(args, fmt);
+ sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
+ va_end(args);
+
+ if (sz > sizeof(temp) || sz > buflen)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ buffer += buflen - sz;
+ return memcpy(buffer, temp, sz);
+}
+
+char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+ char *end = buffer + buflen;
+ /* these dentries are never renamed, so d_lock is not needed */
+ if (prepend(&end, &buflen, " (deleted)", 11) ||
+ prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) ||
+ prepend(&end, &buflen, "/", 1))
+ end = ERR_PTR(-ENAMETOOLONG);
+ return end;
+}
+EXPORT_SYMBOL(simple_dname);
+
+/*
+ * Write full pathname from the root of the filesystem into the buffer.
+ */
+static char *__dentry_path(struct dentry *d, char *buf, int buflen)
+{
+ struct dentry *dentry;
+ char *end, *retval;
+ int len, seq = 0;
+ int error = 0;
+
+ if (buflen < 2)
+ goto Elong;
+
+ rcu_read_lock();
+restart:
+ dentry = d;
+ end = buf + buflen;
+ len = buflen;
+ prepend(&end, &len, "\0", 1);
+ /* Get '/' right */
+ retval = end-1;
+ *retval = '/';
+ read_seqbegin_or_lock(&rename_lock, &seq);
+ while (!IS_ROOT(dentry)) {
+ struct dentry *parent = dentry->d_parent;
+
+ prefetch(parent);
+ error = prepend_name(&end, &len, &dentry->d_name);
+ if (error)
+ break;
+
+ retval = end;
+ dentry = parent;
+ }
+ if (!(seq & 1))
+ rcu_read_unlock();
+ if (need_seqretry(&rename_lock, seq)) {
+ seq = 1;
+ goto restart;
+ }
+ done_seqretry(&rename_lock, seq);
+ if (error)
+ goto Elong;
+ return retval;
+Elong:
+ return ERR_PTR(-ENAMETOOLONG);
+}
+
+char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
+{
+ return __dentry_path(dentry, buf, buflen);
+}
+EXPORT_SYMBOL(dentry_path_raw);
+
+char *dentry_path(struct dentry *dentry, char *buf, int buflen)
+{
+ char *p = NULL;
+ char *retval;
+
+ if (d_unlinked(dentry)) {
+ p = buf + buflen;
+ if (prepend(&p, &buflen, "//deleted", 10) != 0)
+ goto Elong;
+ buflen++;
+ }
+ retval = __dentry_path(dentry, buf, buflen);
+ if (!IS_ERR(retval) && p)
+ *p = '/'; /* restore '/' overriden with '\0' */
+ return retval;
+Elong:
+ return ERR_PTR(-ENAMETOOLONG);
+}
+
+static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root,
+ struct path *pwd)
+{
+ unsigned seq;
+
+ do {
+ seq = read_seqcount_begin(&fs->seq);
+ *root = fs->root;
+ *pwd = fs->pwd;
+ } while (read_seqcount_retry(&fs->seq, seq));
+}
+
+/*
+ * NOTE! The user-level library version returns a
+ * character pointer. The kernel system call just
+ * returns the length of the buffer filled (which
+ * includes the ending '\0' character), or a negative
+ * error value. So libc would do something like
+ *
+ * char *getcwd(char * buf, size_t size)
+ * {
+ * int retval;
+ *
+ * retval = sys_getcwd(buf, size);
+ * if (retval >= 0)
+ * return buf;
+ * errno = -retval;
+ * return NULL;
+ * }
+ */
+SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
+{
+ int error;
+ struct path pwd, root;
+ char *page = __getname();
+
+ if (!page)
+ return -ENOMEM;
+
+ rcu_read_lock();
+ get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
+
+ error = -ENOENT;
+ if (!d_unlinked(pwd.dentry)) {
+ unsigned long len;
+ char *cwd = page + PATH_MAX;
+ int buflen = PATH_MAX;
+
+ prepend(&cwd, &buflen, "\0", 1);
+ error = prepend_path(&pwd, &root, &cwd, &buflen);
+ rcu_read_unlock();
+
+ if (error < 0)
+ goto out;
+
+ /* Unreachable from current root */
+ if (error > 0) {
+ error = prepend_unreachable(&cwd, &buflen);
+ if (error)
+ goto out;
+ }
+
+ error = -ERANGE;
+ len = PATH_MAX + page - cwd;
+ if (len <= size) {
+ error = len;
+ if (copy_to_user(buf, cwd, len))
+ error = -EFAULT;
+ }
+ } else {
+ rcu_read_unlock();
+ }
+
+out:
+ __putname(page);
+ return error;
+}
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -14,7 +14,7 @@
* the dcache entry is deleted or garbage collected.
*/
-#include <linux/syscalls.h>
+#include <linux/ratelimit.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/fs.h>
@@ -24,15 +24,11 @@
#include <linux/hash.h>
#include <linux/cache.h>
#include <linux/export.h>
-#include <linux/mount.h>
-#include <linux/uaccess.h>
#include <linux/security.h>
#include <linux/seqlock.h>
#include <linux/bootmem.h>
-#include <linux/fs_struct.h>
#include <linux/bit_spinlock.h>
#include <linux/rculist_bl.h>
-#include <linux/prefetch.h>
#include <linux/list_lru.h>
#include "internal.h"
#include "mount.h"
@@ -3072,467 +3068,6 @@ struct dentry *d_splice_alias(struct ino
}
EXPORT_SYMBOL(d_splice_alias);
-static int prepend(char **buffer, int *buflen, const char *str, int namelen)
-{
- *buflen -= namelen;
- if (*buflen < 0)
- return -ENAMETOOLONG;
- *buffer -= namelen;
- memcpy(*buffer, str, namelen);
- return 0;
-}
-
-/**
- * prepend_name - prepend a pathname in front of current buffer pointer
- * @buffer: buffer pointer
- * @buflen: allocated length of the buffer
- * @name: name string and length qstr structure
- *
- * With RCU path tracing, it may race with d_move(). Use READ_ONCE() to
- * make sure that either the old or the new name pointer and length are
- * fetched. However, there may be mismatch between length and pointer.
- * The length cannot be trusted, we need to copy it byte-by-byte until
- * the length is reached or a null byte is found. It also prepends "/" at
- * the beginning of the name. The sequence number check at the caller will
- * retry it again when a d_move() does happen. So any garbage in the buffer
- * due to mismatched pointer and length will be discarded.
- *
- * Load acquire is needed to make sure that we see that terminating NUL.
- */
-static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
-{
- const char *dname = smp_load_acquire(&name->name); /* ^^^ */
- u32 dlen = READ_ONCE(name->len);
- char *p;
-
- *buflen -= dlen + 1;
- if (*buflen < 0)
- return -ENAMETOOLONG;
- p = *buffer -= dlen + 1;
- *p++ = '/';
- while (dlen--) {
- char c = *dname++;
- if (!c)
- break;
- *p++ = c;
- }
- return 0;
-}
-
-/**
- * prepend_path - Prepend path string to a buffer
- * @path: the dentry/vfsmount to report
- * @root: root vfsmnt/dentry
- * @buffer: pointer to the end of the buffer
- * @buflen: pointer to buffer length
- *
- * The function will first try to write out the pathname without taking any
- * lock other than the RCU read lock to make sure that dentries won't go away.
- * It only checks the sequence number of the global rename_lock as any change
- * in the dentry's d_seq will be preceded by changes in the rename_lock
- * sequence number. If the sequence number had been changed, it will restart
- * the whole pathname back-tracing sequence again by taking the rename_lock.
- * In this case, there is no need to take the RCU read lock as the recursive
- * parent pointer references will keep the dentry chain alive as long as no
- * rename operation is performed.
- */
-static int prepend_path(const struct path *path,
- const struct path *root,
- char **buffer, int *buflen)
-{
- struct dentry *dentry;
- struct vfsmount *vfsmnt;
- struct mount *mnt;
- int error = 0;
- unsigned seq, m_seq = 0;
- char *bptr;
- int blen;
-
- rcu_read_lock();
-restart_mnt:
- read_seqbegin_or_lock(&mount_lock, &m_seq);
- seq = 0;
- rcu_read_lock();
-restart:
- bptr = *buffer;
- blen = *buflen;
- error = 0;
- dentry = path->dentry;
- vfsmnt = path->mnt;
- mnt = real_mount(vfsmnt);
- read_seqbegin_or_lock(&rename_lock, &seq);
- while (dentry != root->dentry || vfsmnt != root->mnt) {
- struct dentry * parent;
-
- if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
- struct mount *parent = READ_ONCE(mnt->mnt_parent);
- /* Escaped? */
- if (dentry != vfsmnt->mnt_root) {
- bptr = *buffer;
- blen = *buflen;
- error = 3;
- break;
- }
- /* Global root? */
- if (mnt != parent) {
- dentry = READ_ONCE(mnt->mnt_mountpoint);
- mnt = parent;
- vfsmnt = &mnt->mnt;
- continue;
- }
- if (!error)
- error = is_mounted(vfsmnt) ? 1 : 2;
- break;
- }
- parent = dentry->d_parent;
- prefetch(parent);
- error = prepend_name(&bptr, &blen, &dentry->d_name);
- if (error)
- break;
-
- dentry = parent;
- }
- if (!(seq & 1))
- rcu_read_unlock();
- if (need_seqretry(&rename_lock, seq)) {
- seq = 1;
- goto restart;
- }
- done_seqretry(&rename_lock, seq);
-
- if (!(m_seq & 1))
- rcu_read_unlock();
- if (need_seqretry(&mount_lock, m_seq)) {
- m_seq = 1;
- goto restart_mnt;
- }
- done_seqretry(&mount_lock, m_seq);
-
- if (error >= 0 && bptr == *buffer) {
- if (--blen < 0)
- error = -ENAMETOOLONG;
- else
- *--bptr = '/';
- }
- *buffer = bptr;
- *buflen = blen;
- return error;
-}
-
-/**
- * __d_path - return the path of a dentry
- * @path: the dentry/vfsmount to report
- * @root: root vfsmnt/dentry
- * @buf: buffer to return value in
- * @buflen: buffer length
- *
- * Convert a dentry into an ASCII path name.
- *
- * Returns a pointer into the buffer or an error code if the
- * path was too long.
- *
- * "buflen" should be positive.
- *
- * If the path is not reachable from the supplied root, return %NULL.
- */
-char *__d_path(const struct path *path,
- const struct path *root,
- char *buf, int buflen)
-{
- char *res = buf + buflen;
- int error;
-
- prepend(&res, &buflen, "\0", 1);
- error = prepend_path(path, root, &res, &buflen);
-
- if (error < 0)
- return ERR_PTR(error);
- if (error > 0)
- return NULL;
- return res;
-}
-
-char *d_absolute_path(const struct path *path,
- char *buf, int buflen)
-{
- struct path root = {};
- char *res = buf + buflen;
- int error;
-
- prepend(&res, &buflen, "\0", 1);
- error = prepend_path(path, &root, &res, &buflen);
-
- if (error > 1)
- error = -EINVAL;
- if (error < 0)
- return ERR_PTR(error);
- return res;
-}
-
-/*
- * same as __d_path but appends "(deleted)" for unlinked files.
- */
-static int path_with_deleted(const struct path *path,
- const struct path *root,
- char **buf, int *buflen)
-{
- prepend(buf, buflen, "\0", 1);
- if (d_unlinked(path->dentry)) {
- int error = prepend(buf, buflen, " (deleted)", 10);
- if (error)
- return error;
- }
-
- return prepend_path(path, root, buf, buflen);
-}
-
-static int prepend_unreachable(char **buffer, int *buflen)
-{
- return prepend(buffer, buflen, "(unreachable)", 13);
-}
-
-static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
-{
- unsigned seq;
-
- do {
- seq = read_seqcount_begin(&fs->seq);
- *root = fs->root;
- } while (read_seqcount_retry(&fs->seq, seq));
-}
-
-/**
- * d_path - return the path of a dentry
- * @path: path to report
- * @buf: buffer to return value in
- * @buflen: buffer length
- *
- * Convert a dentry into an ASCII path name. If the entry has been deleted
- * the string " (deleted)" is appended. Note that this is ambiguous.
- *
- * Returns a pointer into the buffer or an error code if the path was
- * too long. Note: Callers should use the returned pointer, not the passed
- * in buffer, to use the name! The implementation often starts at an offset
- * into the buffer, and may leave 0 bytes at the start.
- *
- * "buflen" should be positive.
- */
-char *d_path(const struct path *path, char *buf, int buflen)
-{
- char *res = buf + buflen;
- struct path root;
- int error;
-
- /*
- * We have various synthetic filesystems that never get mounted. On
- * these filesystems dentries are never used for lookup purposes, and
- * thus don't need to be hashed. They also don't need a name until a
- * user wants to identify the object in /proc/pid/fd/. The little hack
- * below allows us to generate a name for these objects on demand:
- *
- * Some pseudo inodes are mountable. When they are mounted
- * path->dentry == path->mnt->mnt_root. In that case don't call d_dname
- * and instead have d_path return the mounted path.
- */
- if (path->dentry->d_op && path->dentry->d_op->d_dname &&
- (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root))
- return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
-
- rcu_read_lock();
- get_fs_root_rcu(current->fs, &root);
- error = path_with_deleted(path, &root, &res, &buflen);
- rcu_read_unlock();
-
- if (error < 0)
- res = ERR_PTR(error);
- return res;
-}
-EXPORT_SYMBOL(d_path);
-
-/*
- * Helper function for dentry_operations.d_dname() members
- */
-char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
- const char *fmt, ...)
-{
- va_list args;
- char temp[64];
- int sz;
-
- va_start(args, fmt);
- sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
- va_end(args);
-
- if (sz > sizeof(temp) || sz > buflen)
- return ERR_PTR(-ENAMETOOLONG);
-
- buffer += buflen - sz;
- return memcpy(buffer, temp, sz);
-}
-
-char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
-{
- char *end = buffer + buflen;
- /* these dentries are never renamed, so d_lock is not needed */
- if (prepend(&end, &buflen, " (deleted)", 11) ||
- prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) ||
- prepend(&end, &buflen, "/", 1))
- end = ERR_PTR(-ENAMETOOLONG);
- return end;
-}
-EXPORT_SYMBOL(simple_dname);
-
-/*
- * Write full pathname from the root of the filesystem into the buffer.
- */
-static char *__dentry_path(struct dentry *d, char *buf, int buflen)
-{
- struct dentry *dentry;
- char *end, *retval;
- int len, seq = 0;
- int error = 0;
-
- if (buflen < 2)
- goto Elong;
-
- rcu_read_lock();
-restart:
- dentry = d;
- end = buf + buflen;
- len = buflen;
- prepend(&end, &len, "\0", 1);
- /* Get '/' right */
- retval = end-1;
- *retval = '/';
- read_seqbegin_or_lock(&rename_lock, &seq);
- while (!IS_ROOT(dentry)) {
- struct dentry *parent = dentry->d_parent;
-
- prefetch(parent);
- error = prepend_name(&end, &len, &dentry->d_name);
- if (error)
- break;
-
- retval = end;
- dentry = parent;
- }
- if (!(seq & 1))
- rcu_read_unlock();
- if (need_seqretry(&rename_lock, seq)) {
- seq = 1;
- goto restart;
- }
- done_seqretry(&rename_lock, seq);
- if (error)
- goto Elong;
- return retval;
-Elong:
- return ERR_PTR(-ENAMETOOLONG);
-}
-
-char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
-{
- return __dentry_path(dentry, buf, buflen);
-}
-EXPORT_SYMBOL(dentry_path_raw);
-
-char *dentry_path(struct dentry *dentry, char *buf, int buflen)
-{
- char *p = NULL;
- char *retval;
-
- if (d_unlinked(dentry)) {
- p = buf + buflen;
- if (prepend(&p, &buflen, "//deleted", 10) != 0)
- goto Elong;
- buflen++;
- }
- retval = __dentry_path(dentry, buf, buflen);
- if (!IS_ERR(retval) && p)
- *p = '/'; /* restore '/' overriden with '\0' */
- return retval;
-Elong:
- return ERR_PTR(-ENAMETOOLONG);
-}
-
-static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root,
- struct path *pwd)
-{
- unsigned seq;
-
- do {
- seq = read_seqcount_begin(&fs->seq);
- *root = fs->root;
- *pwd = fs->pwd;
- } while (read_seqcount_retry(&fs->seq, seq));
-}
-
-/*
- * NOTE! The user-level library version returns a
- * character pointer. The kernel system call just
- * returns the length of the buffer filled (which
- * includes the ending '\0' character), or a negative
- * error value. So libc would do something like
- *
- * char *getcwd(char * buf, size_t size)
- * {
- * int retval;
- *
- * retval = sys_getcwd(buf, size);
- * if (retval >= 0)
- * return buf;
- * errno = -retval;
- * return NULL;
- * }
- */
-SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
-{
- int error;
- struct path pwd, root;
- char *page = __getname();
-
- if (!page)
- return -ENOMEM;
-
- rcu_read_lock();
- get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
-
- error = -ENOENT;
- if (!d_unlinked(pwd.dentry)) {
- unsigned long len;
- char *cwd = page + PATH_MAX;
- int buflen = PATH_MAX;
-
- prepend(&cwd, &buflen, "\0", 1);
- error = prepend_path(&pwd, &root, &cwd, &buflen);
- rcu_read_unlock();
-
- if (error < 0)
- goto out;
-
- /* Unreachable from current root */
- if (error > 0) {
- error = prepend_unreachable(&cwd, &buflen);
- if (error)
- goto out;
- }
-
- error = -ERANGE;
- len = PATH_MAX + page - cwd;
- if (len <= size) {
- error = len;
- if (copy_to_user(buf, cwd, len))
- error = -EFAULT;
- }
- } else {
- rcu_read_unlock();
- }
-
-out:
- __putname(page);
- return error;
-}
-
/*
* Test whether new_dentry is a subdirectory of old_dentry.
*

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,41 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Mar 2018 16:22:42 +0100
Subject: [PATCH 10/10] iommu/amd: Return proper error code in
irq_remapping_alloc()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit 29d049be9438278c47253a74cf8d0ddf36bd5d68
In the unlikely case when alloc_irq_table() is not able to return a
remap table then "ret" will be assigned with an error code. Later, the
code checks `index' and if it is negative (which it is because it is
initialized with `-1') and then then function properly aborts but
returns `-1' instead `-ENOMEM' what was intended.
In order to correct this, I assign -ENOMEM to index.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4110,7 +4110,7 @@ static int irq_remapping_alloc(struct ir
struct amd_ir_data *data = NULL;
struct irq_cfg *cfg;
int i, ret, devid;
- int index = -1;
+ int index;
if (!info)
return -EINVAL;
@@ -4152,7 +4152,7 @@ static int irq_remapping_alloc(struct ir
WARN_ON(table->min_index != 32);
index = info->ioapic_pin;
} else {
- ret = -ENOMEM;
+ index = -ENOMEM;
}
} else {
bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI);

View File

@ -0,0 +1,25 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 7 Mar 2018 12:47:04 -0500
Subject: [PATCH 10/17] take out orphan externs (empty_string/slash_string)
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit 903ddaf49329076862d65f7284d825759ff67bd6
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/dcache.h | 2 --
1 file changed, 2 deletions(-)
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -56,9 +56,7 @@ struct qstr {
#define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
-extern const char empty_string[];
extern const struct qstr empty_name;
-extern const char slash_string[];
extern const struct qstr slash_name;
struct dentry_stat_t {

View File

@ -0,0 +1,28 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:36 -0600
Subject: [PATCH 10/48] tracing: Add Documentation for log2 modifier
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Add a line for the log2 modifier, to keep it aligned with
tracing/README.
Link: http://lkml.kernel.org/r/a419028bccab155749a4b8702d5b97af75f1578f.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit fcb5b95a2bb931f8e72e2dbd2def67382dd99d42)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
Documentation/trace/histogram.txt | 1 +
1 file changed, 1 insertion(+)
--- a/Documentation/trace/histogram.txt
+++ b/Documentation/trace/histogram.txt
@@ -73,6 +73,7 @@
.sym-offset display an address as a symbol and offset
.syscall display a syscall id as a system call name
.execname display a common_pid as a program name
+ .log2 display log2 value rather than raw number
Note that in general the semantics of a given field aren't
interpreted when applying a modifier to it, but there are some

View File

@ -0,0 +1,76 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 8 Mar 2018 11:00:45 -0500
Subject: [PATCH 11/17] fold lookup_real() into __lookup_hash()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit a03ece5ff2bd7a9abaa0e8ddfe5f79d79e5984c8
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/namei.c | 41 +++++++++++++++++------------------------
1 file changed, 17 insertions(+), 24 deletions(-)
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1475,43 +1475,36 @@ static struct dentry *lookup_dcache(cons
}
/*
- * Call i_op->lookup on the dentry. The dentry must be negative and
- * unhashed.
- *
- * dir->d_inode->i_mutex must be held
+ * Parent directory has inode locked exclusive. This is one
+ * and only case when ->lookup() gets called on non in-lookup
+ * dentries - as the matter of fact, this only gets called
+ * when directory is guaranteed to have no in-lookup children
+ * at all.
*/
-static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
- unsigned int flags)
-{
- struct dentry *old;
-
- /* Don't create child dentry for a dead directory. */
- if (unlikely(IS_DEADDIR(dir))) {
- dput(dentry);
- return ERR_PTR(-ENOENT);
- }
-
- old = dir->i_op->lookup(dir, dentry, flags);
- if (unlikely(old)) {
- dput(dentry);
- dentry = old;
- }
- return dentry;
-}
-
static struct dentry *__lookup_hash(const struct qstr *name,
struct dentry *base, unsigned int flags)
{
struct dentry *dentry = lookup_dcache(name, base, flags);
+ struct dentry *old;
+ struct inode *dir = base->d_inode;
if (dentry)
return dentry;
+ /* Don't create child dentry for a dead directory. */
+ if (unlikely(IS_DEADDIR(dir)))
+ return ERR_PTR(-ENOENT);
+
dentry = d_alloc(base, name);
if (unlikely(!dentry))
return ERR_PTR(-ENOMEM);
- return lookup_real(base->d_inode, dentry, flags);
+ old = dir->i_op->lookup(dir, dentry, flags);
+ if (unlikely(old)) {
+ dput(dentry);
+ dentry = old;
+ }
+ return dentry;
}
static int lookup_fast(struct nameidata *nd,

View File

@ -0,0 +1,119 @@
From: Vedang Patel <vedang.patel@intel.com>
Date: Mon, 15 Jan 2018 20:51:37 -0600
Subject: [PATCH 11/48] tracing: Add support to detect and avoid duplicates
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
A duplicate in the tracing_map hash table is when 2 different entries
have the same key and, as a result, the key_hash. This is possible due
to a race condition in the algorithm. This race condition is inherent to
the algorithm and not a bug. This was fine because, until now, we were
only interested in the sum of all the values related to a particular
key (the duplicates are dealt with in tracing_map_sort_entries()). But,
with the inclusion of variables[1], we are interested in individual
values. So, it will not be clear what value to choose when
there are duplicates. So, the duplicates need to be removed.
The duplicates can occur in the code in the following scenarios:
- A thread is in the process of adding a new element. It has
successfully executed cmpxchg() and inserted the key. But, it is still
not done acquiring the trace_map_elt struct, populating it and storing
the pointer to the struct in the value field of tracing_map hash table.
If another thread comes in at this time and wants to add an element with
the same key, it will not see the current element and add a new one.
- There are multiple threads trying to execute cmpxchg at the same time,
one of the threads will succeed and the others will fail. The ones which
fail will go ahead increment 'idx' and add a new element there creating
a duplicate.
This patch detects and avoids the first condition by asking the thread
which detects the duplicate to loop one more time. There is also a
possibility of infinite loop if the thread which is trying to insert
goes to sleep indefinitely and the one which is trying to insert a new
element detects a duplicate. Which is why, the thread loops for
map_size iterations before returning NULL.
The second scenario is avoided by preventing the threads which failed
cmpxchg() from incrementing idx. This way, they will loop
around and check if the thread which succeeded in executing cmpxchg()
had the same key.
[1] http://lkml.kernel.org/r/cover.1498510759.git.tom.zanussi@linux.intel.com
Link: http://lkml.kernel.org/r/e178e89ec399240331d383bd5913d649713110f4.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit bd0a7ab135d0d0872296c3ae3c4f816a9a4c3dee)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/tracing_map.c | 41 ++++++++++++++++++++++++++++++++++++-----
1 file changed, 36 insertions(+), 5 deletions(-)
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -414,7 +414,9 @@ static inline struct tracing_map_elt *
__tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only)
{
u32 idx, key_hash, test_key;
+ int dup_try = 0;
struct tracing_map_entry *entry;
+ struct tracing_map_elt *val;
key_hash = jhash(key, map->key_size, 0);
if (key_hash == 0)
@@ -426,11 +428,33 @@ static inline struct tracing_map_elt *
entry = TRACING_MAP_ENTRY(map->map, idx);
test_key = entry->key;
- if (test_key && test_key == key_hash && entry->val &&
- keys_match(key, entry->val->key, map->key_size)) {
- if (!lookup_only)
- atomic64_inc(&map->hits);
- return entry->val;
+ if (test_key && test_key == key_hash) {
+ val = READ_ONCE(entry->val);
+ if (val &&
+ keys_match(key, val->key, map->key_size)) {
+ if (!lookup_only)
+ atomic64_inc(&map->hits);
+ return val;
+ } else if (unlikely(!val)) {
+ /*
+ * The key is present. But, val (pointer to elt
+ * struct) is still NULL. which means some other
+ * thread is in the process of inserting an
+ * element.
+ *
+ * On top of that, it's key_hash is same as the
+ * one being inserted right now. So, it's
+ * possible that the element has the same
+ * key as well.
+ */
+
+ dup_try++;
+ if (dup_try > map->map_size) {
+ atomic64_inc(&map->drops);
+ break;
+ }
+ continue;
+ }
}
if (!test_key) {
@@ -452,6 +476,13 @@ static inline struct tracing_map_elt *
atomic64_inc(&map->hits);
return entry->val;
+ } else {
+ /*
+ * cmpxchg() failed. Loop around once
+ * more to check what key was inserted.
+ */
+ dup_try++;
+ continue;
}
}

View File

@ -0,0 +1,27 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 8 Mar 2018 11:01:22 -0500
Subject: [PATCH 12/17] debugfs_lookup(): switch to lookup_one_len_unlocked()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit cd1c0c9321999737073dcfc3364e194e02604bce
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/debugfs/inode.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -270,10 +270,7 @@ struct dentry *debugfs_lookup(const char
if (!parent)
parent = debugfs_mount->mnt_root;
- inode_lock(d_inode(parent));
- dentry = lookup_one_len(name, parent, strlen(name));
- inode_unlock(d_inode(parent));
-
+ dentry = lookup_one_len_unlocked(name, parent, strlen(name));
if (IS_ERR(dentry))
return NULL;
if (!d_really_is_positive(dentry)) {

View File

@ -0,0 +1,193 @@
From: Vedang Patel <vedang.patel@intel.com>
Date: Mon, 15 Jan 2018 20:51:38 -0600
Subject: [PATCH 12/48] tracing: Remove code which merges duplicates
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
We now have the logic to detect and remove duplicates in the
tracing_map hash table. The code which merges duplicates in the
histogram is redundant now. So, modify this code just to detect
duplicates. The duplication detection code is still kept to ensure
that any rare race condition which might cause duplicates does not go
unnoticed.
Link: http://lkml.kernel.org/r/55215cf59e2674391bdaf772fdafc4c393352b03.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 3f7f4cc21fc62ff7da7d34b5ca95a69d73a1f764)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 11 -----
kernel/trace/tracing_map.c | 83 ++-------------------------------------
kernel/trace/tracing_map.h | 7 ---
3 files changed, 6 insertions(+), 95 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -340,16 +340,6 @@ static int hist_trigger_elt_comm_alloc(s
return 0;
}
-static void hist_trigger_elt_comm_copy(struct tracing_map_elt *to,
- struct tracing_map_elt *from)
-{
- char *comm_from = from->private_data;
- char *comm_to = to->private_data;
-
- if (comm_from)
- memcpy(comm_to, comm_from, TASK_COMM_LEN + 1);
-}
-
static void hist_trigger_elt_comm_init(struct tracing_map_elt *elt)
{
char *comm = elt->private_data;
@@ -360,7 +350,6 @@ static void hist_trigger_elt_comm_init(s
static const struct tracing_map_ops hist_trigger_elt_comm_ops = {
.elt_alloc = hist_trigger_elt_comm_alloc,
- .elt_copy = hist_trigger_elt_comm_copy,
.elt_free = hist_trigger_elt_comm_free,
.elt_init = hist_trigger_elt_comm_init,
};
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -847,67 +847,15 @@ create_sort_entry(void *key, struct trac
return sort_entry;
}
-static struct tracing_map_elt *copy_elt(struct tracing_map_elt *elt)
-{
- struct tracing_map_elt *dup_elt;
- unsigned int i;
-
- dup_elt = tracing_map_elt_alloc(elt->map);
- if (IS_ERR(dup_elt))
- return NULL;
-
- if (elt->map->ops && elt->map->ops->elt_copy)
- elt->map->ops->elt_copy(dup_elt, elt);
-
- dup_elt->private_data = elt->private_data;
- memcpy(dup_elt->key, elt->key, elt->map->key_size);
-
- for (i = 0; i < elt->map->n_fields; i++) {
- atomic64_set(&dup_elt->fields[i].sum,
- atomic64_read(&elt->fields[i].sum));
- dup_elt->fields[i].cmp_fn = elt->fields[i].cmp_fn;
- }
-
- return dup_elt;
-}
-
-static int merge_dup(struct tracing_map_sort_entry **sort_entries,
- unsigned int target, unsigned int dup)
-{
- struct tracing_map_elt *target_elt, *elt;
- bool first_dup = (target - dup) == 1;
- int i;
-
- if (first_dup) {
- elt = sort_entries[target]->elt;
- target_elt = copy_elt(elt);
- if (!target_elt)
- return -ENOMEM;
- sort_entries[target]->elt = target_elt;
- sort_entries[target]->elt_copied = true;
- } else
- target_elt = sort_entries[target]->elt;
-
- elt = sort_entries[dup]->elt;
-
- for (i = 0; i < elt->map->n_fields; i++)
- atomic64_add(atomic64_read(&elt->fields[i].sum),
- &target_elt->fields[i].sum);
-
- sort_entries[dup]->dup = true;
-
- return 0;
-}
-
-static int merge_dups(struct tracing_map_sort_entry **sort_entries,
+static void detect_dups(struct tracing_map_sort_entry **sort_entries,
int n_entries, unsigned int key_size)
{
unsigned int dups = 0, total_dups = 0;
- int err, i, j;
+ int i;
void *key;
if (n_entries < 2)
- return total_dups;
+ return;
sort(sort_entries, n_entries, sizeof(struct tracing_map_sort_entry *),
(int (*)(const void *, const void *))cmp_entries_dup, NULL);
@@ -916,30 +864,14 @@ static int merge_dups(struct tracing_map
for (i = 1; i < n_entries; i++) {
if (!memcmp(sort_entries[i]->key, key, key_size)) {
dups++; total_dups++;
- err = merge_dup(sort_entries, i - dups, i);
- if (err)
- return err;
continue;
}
key = sort_entries[i]->key;
dups = 0;
}
- if (!total_dups)
- return total_dups;
-
- for (i = 0, j = 0; i < n_entries; i++) {
- if (!sort_entries[i]->dup) {
- sort_entries[j] = sort_entries[i];
- if (j++ != i)
- sort_entries[i] = NULL;
- } else {
- destroy_sort_entry(sort_entries[i]);
- sort_entries[i] = NULL;
- }
- }
-
- return total_dups;
+ WARN_ONCE(total_dups > 0,
+ "Duplicates detected: %d\n", total_dups);
}
static bool is_key(struct tracing_map *map, unsigned int field_idx)
@@ -1065,10 +997,7 @@ int tracing_map_sort_entries(struct trac
return 1;
}
- ret = merge_dups(entries, n_entries, map->key_size);
- if (ret < 0)
- goto free;
- n_entries -= ret;
+ detect_dups(entries, n_entries, map->key_size);
if (is_key(map, sort_keys[0].field_idx))
cmp_entries_fn = cmp_entries_key;
--- a/kernel/trace/tracing_map.h
+++ b/kernel/trace/tracing_map.h
@@ -215,11 +215,6 @@ struct tracing_map {
* Element allocation occurs before tracing begins, when the
* tracing_map_init() call is made by client code.
*
- * @elt_copy: At certain points in the lifetime of an element, it may
- * need to be copied. The copy should include a copy of the
- * client-allocated data, which can be copied into the 'to'
- * element from the 'from' element.
- *
* @elt_free: When a tracing_map_elt is freed, this function is called
* and allows client-allocated per-element data to be freed.
*
@@ -233,8 +228,6 @@ struct tracing_map {
*/
struct tracing_map_ops {
int (*elt_alloc)(struct tracing_map_elt *elt);
- void (*elt_copy)(struct tracing_map_elt *to,
- struct tracing_map_elt *from);
void (*elt_free)(struct tracing_map_elt *elt);
void (*elt_clear)(struct tracing_map_elt *elt);
void (*elt_init)(struct tracing_map_elt *elt);

View File

@ -0,0 +1,45 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 9 Mar 2018 18:06:03 -0500
Subject: [PATCH 13/17] lustre: get rid of pointless casts to struct dentry *
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit 5bf1ddf7ee0e23598a620ef9ea2b0f00e804859d
... when feeding const struct dentry * to primitives taking
exactly that.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/staging/lustre/lustre/llite/dcache.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
--- a/drivers/staging/lustre/lustre/llite/dcache.c
+++ b/drivers/staging/lustre/lustre/llite/dcache.c
@@ -90,7 +90,7 @@ static int ll_dcompare(const struct dent
d_count(dentry));
/* mountpoint is always valid */
- if (d_mountpoint((struct dentry *)dentry))
+ if (d_mountpoint(dentry))
return 0;
if (d_lustre_invalid(dentry))
@@ -111,7 +111,7 @@ static int ll_ddelete(const struct dentr
LASSERT(de);
CDEBUG(D_DENTRY, "%s dentry %pd (%p, parent %p, inode %p) %s%s\n",
- d_lustre_invalid((struct dentry *)de) ? "deleting" : "keeping",
+ d_lustre_invalid(de) ? "deleting" : "keeping",
de, de, de->d_parent, d_inode(de),
d_unhashed(de) ? "" : "hashed,",
list_empty(&de->d_subdirs) ? "" : "subdirs");
@@ -119,7 +119,7 @@ static int ll_ddelete(const struct dentr
/* kernel >= 2.6.38 last refcount is decreased after this function. */
LASSERT(d_count(de) == 1);
- if (d_lustre_invalid((struct dentry *)de))
+ if (d_lustre_invalid(de))
return 1;
return 0;
}

View File

@ -0,0 +1,133 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:39 -0600
Subject: [PATCH 13/48] ring-buffer: Add interface for setting absolute time
stamps
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Define a new function, tracing_set_time_stamp_abs(), which can be used
to enable or disable the use of absolute timestamps rather than time
deltas for a trace array.
Only the interface is added here; a subsequent patch will add the
underlying implementation.
Link: http://lkml.kernel.org/r/ce96119de44c7fe0ee44786d15254e9b493040d3.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Baohong Liu <baohong.liu@intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 22753475c5232cd6f024746d6a6696a4dd2683ab)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/ring_buffer.h | 2 ++
kernel/trace/ring_buffer.c | 11 +++++++++++
kernel/trace/trace.c | 33 ++++++++++++++++++++++++++++++++-
kernel/trace/trace.h | 3 +++
4 files changed, 48 insertions(+), 1 deletion(-)
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -178,6 +178,8 @@ void ring_buffer_normalize_time_stamp(st
int cpu, u64 *ts);
void ring_buffer_set_clock(struct ring_buffer *buffer,
u64 (*clock)(void));
+void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs);
+bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer);
size_t ring_buffer_page_len(void *page);
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -488,6 +488,7 @@ struct ring_buffer {
u64 (*clock)(void);
struct rb_irq_work irq_work;
+ bool time_stamp_abs;
};
struct ring_buffer_iter {
@@ -1387,6 +1388,16 @@ void ring_buffer_set_clock(struct ring_b
buffer->clock = clock;
}
+void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs)
+{
+ buffer->time_stamp_abs = abs;
+}
+
+bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer)
+{
+ return buffer->time_stamp_abs;
+}
+
static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
static inline unsigned long rb_page_entries(struct buffer_page *bpage)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2269,7 +2269,7 @@ trace_event_buffer_lock_reserve(struct r
*current_rb = trace_file->tr->trace_buffer.buffer;
- if ((trace_file->flags &
+ if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
(EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
(entry = this_cpu_read(trace_buffered_event))) {
/* Try to use the per cpu buffer first */
@@ -6281,6 +6281,37 @@ static int tracing_clock_open(struct ino
return ret;
}
+
+int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
+{
+ int ret = 0;
+
+ mutex_lock(&trace_types_lock);
+
+ if (abs && tr->time_stamp_abs_ref++)
+ goto out;
+
+ if (!abs) {
+ if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (--tr->time_stamp_abs_ref)
+ goto out;
+ }
+
+ ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+ if (tr->max_buffer.buffer)
+ ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
+#endif
+ out:
+ mutex_unlock(&trace_types_lock);
+
+ return ret;
+}
struct ftrace_buffer_info {
struct trace_iterator iter;
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -273,6 +273,7 @@ struct trace_array {
/* function tracing enabled */
int function_enabled;
#endif
+ int time_stamp_abs_ref;
};
enum {
@@ -286,6 +287,8 @@ extern struct mutex trace_types_lock;
extern int trace_array_get(struct trace_array *tr);
extern void trace_array_put(struct trace_array *tr);
+extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs);
+
/*
* The global tracer (top) should be the first trace array added,
* but we check the flag anyway.

View File

@ -0,0 +1,27 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 10 Mar 2018 16:40:33 -0500
Subject: [PATCH 14/17] oprofilefs: don't oops on allocation failure
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit a7498968338da9b928f5d8054acc8be6ed2bc14c
... just short-circuit the creation of potential children
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/oprofile/oprofilefs.c | 3 +++
1 file changed, 3 insertions(+)
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -138,6 +138,9 @@ static int __oprofilefs_create_file(stru
struct dentry *dentry;
struct inode *inode;
+ if (!root)
+ return -ENOMEM;
+
inode_lock(d_inode(root));
dentry = d_alloc_name(root, name);
if (!dentry) {

View File

@ -0,0 +1,323 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:40 -0600
Subject: [PATCH 14/48] ring-buffer: Redefine the unimplemented
RINGBUF_TYPE_TIME_STAMP
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
RINGBUF_TYPE_TIME_STAMP is defined but not used, and from what I can
gather was reserved for something like an absolute timestamp feature
for the ring buffer, if not a complete replacement of the current
time_delta scheme.
This code redefines RINGBUF_TYPE_TIME_STAMP to implement absolute time
stamps. Another way to look at it is that it essentially forces
extended time_deltas for all events.
The motivation for doing this is to enable time_deltas that aren't
dependent on previous events in the ring buffer, making it feasible to
use the ring_buffer_event timetamps in a more random-access way, for
purposes other than serial event printing.
To set/reset this mode, use tracing_set_timestamp_abs() from the
previous interface patch.
Link: http://lkml.kernel.org/r/477b362dba1ce7fab9889a1a8e885a62c472f041.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 58c0bd803060b0c0c9de8751382a7af5f507d74d)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/ring_buffer.h | 12 ++---
kernel/trace/ring_buffer.c | 104 ++++++++++++++++++++++++++++++++------------
2 files changed, 83 insertions(+), 33 deletions(-)
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -34,10 +34,12 @@ struct ring_buffer_event {
* array[0] = time delta (28 .. 59)
* size = 8 bytes
*
- * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock
- * array[0] = tv_nsec
- * array[1..2] = tv_sec
- * size = 16 bytes
+ * @RINGBUF_TYPE_TIME_STAMP: Absolute timestamp
+ * Same format as TIME_EXTEND except that the
+ * value is an absolute timestamp, not a delta
+ * event.time_delta contains bottom 27 bits
+ * array[0] = top (28 .. 59) bits
+ * size = 8 bytes
*
* <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX:
* Data record
@@ -54,12 +56,12 @@ enum ring_buffer_type {
RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28,
RINGBUF_TYPE_PADDING,
RINGBUF_TYPE_TIME_EXTEND,
- /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */
RINGBUF_TYPE_TIME_STAMP,
};
unsigned ring_buffer_event_length(struct ring_buffer_event *event);
void *ring_buffer_event_data(struct ring_buffer_event *event);
+u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event);
/*
* ring_buffer_discard_commit will remove an event that has not
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -41,6 +41,8 @@ int ring_buffer_print_entry_header(struc
RINGBUF_TYPE_PADDING);
trace_seq_printf(s, "\ttime_extend : type == %d\n",
RINGBUF_TYPE_TIME_EXTEND);
+ trace_seq_printf(s, "\ttime_stamp : type == %d\n",
+ RINGBUF_TYPE_TIME_STAMP);
trace_seq_printf(s, "\tdata max type_len == %d\n",
RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
@@ -140,12 +142,15 @@ int ring_buffer_print_entry_header(struc
enum {
RB_LEN_TIME_EXTEND = 8,
- RB_LEN_TIME_STAMP = 16,
+ RB_LEN_TIME_STAMP = 8,
};
#define skip_time_extend(event) \
((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
+#define extended_time(event) \
+ (event->type_len >= RINGBUF_TYPE_TIME_EXTEND)
+
static inline int rb_null_event(struct ring_buffer_event *event)
{
return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
@@ -209,7 +214,7 @@ rb_event_ts_length(struct ring_buffer_ev
{
unsigned len = 0;
- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+ if (extended_time(event)) {
/* time extends include the data event after it */
len = RB_LEN_TIME_EXTEND;
event = skip_time_extend(event);
@@ -231,7 +236,7 @@ unsigned ring_buffer_event_length(struct
{
unsigned length;
- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ if (extended_time(event))
event = skip_time_extend(event);
length = rb_event_length(event);
@@ -248,7 +253,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_leng
static __always_inline void *
rb_event_data(struct ring_buffer_event *event)
{
- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ if (extended_time(event))
event = skip_time_extend(event);
BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
/* If length is in len field, then array[0] has the data */
@@ -275,6 +280,27 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data
#define TS_MASK ((1ULL << TS_SHIFT) - 1)
#define TS_DELTA_TEST (~TS_MASK)
+/**
+ * ring_buffer_event_time_stamp - return the event's extended timestamp
+ * @event: the event to get the timestamp of
+ *
+ * Returns the extended timestamp associated with a data event.
+ * An extended time_stamp is a 64-bit timestamp represented
+ * internally in a special way that makes the best use of space
+ * contained within a ring buffer event. This function decodes
+ * it and maps it to a straight u64 value.
+ */
+u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event)
+{
+ u64 ts;
+
+ ts = event->array[0];
+ ts <<= TS_SHIFT;
+ ts += event->time_delta;
+
+ return ts;
+}
+
/* Flag when events were overwritten */
#define RB_MISSED_EVENTS (1 << 31)
/* Missed count stored at end */
@@ -2222,12 +2248,15 @@ rb_move_tail(struct ring_buffer_per_cpu
/* Slow path, do not inline */
static noinline struct ring_buffer_event *
-rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
+rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs)
{
- event->type_len = RINGBUF_TYPE_TIME_EXTEND;
+ if (abs)
+ event->type_len = RINGBUF_TYPE_TIME_STAMP;
+ else
+ event->type_len = RINGBUF_TYPE_TIME_EXTEND;
- /* Not the first event on the page? */
- if (rb_event_index(event)) {
+ /* Not the first event on the page, or not delta? */
+ if (abs || rb_event_index(event)) {
event->time_delta = delta & TS_MASK;
event->array[0] = delta >> TS_SHIFT;
} else {
@@ -2270,7 +2299,9 @@ rb_update_event(struct ring_buffer_per_c
* add it to the start of the resevered space.
*/
if (unlikely(info->add_timestamp)) {
- event = rb_add_time_stamp(event, delta);
+ bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer);
+
+ event = rb_add_time_stamp(event, info->delta, abs);
length -= RB_LEN_TIME_EXTEND;
delta = 0;
}
@@ -2458,7 +2489,7 @@ static __always_inline void rb_end_commi
static inline void rb_event_discard(struct ring_buffer_event *event)
{
- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ if (extended_time(event))
event = skip_time_extend(event);
/* array[0] holds the actual length for the discarded event */
@@ -2502,10 +2533,11 @@ rb_update_write_stamp(struct ring_buffer
cpu_buffer->write_stamp =
cpu_buffer->commit_page->page->time_stamp;
else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
- delta = event->array[0];
- delta <<= TS_SHIFT;
- delta += event->time_delta;
+ delta = ring_buffer_event_time_stamp(event);
cpu_buffer->write_stamp += delta;
+ } else if (event->type_len == RINGBUF_TYPE_TIME_STAMP) {
+ delta = ring_buffer_event_time_stamp(event);
+ cpu_buffer->write_stamp = delta;
} else
cpu_buffer->write_stamp += event->time_delta;
}
@@ -2685,7 +2717,7 @@ static struct ring_buffer_event *
* If this is the first commit on the page, then it has the same
* timestamp as the page itself.
*/
- if (!tail)
+ if (!tail && !ring_buffer_time_stamp_abs(cpu_buffer->buffer))
info->delta = 0;
/* See if we shot pass the end of this buffer page */
@@ -2762,8 +2794,11 @@ rb_reserve_next_event(struct ring_buffer
/* make sure this diff is calculated here */
barrier();
- /* Did the write stamp get updated already? */
- if (likely(info.ts >= cpu_buffer->write_stamp)) {
+ if (ring_buffer_time_stamp_abs(buffer)) {
+ info.delta = info.ts;
+ rb_handle_timestamp(cpu_buffer, &info);
+ } else /* Did the write stamp get updated already? */
+ if (likely(info.ts >= cpu_buffer->write_stamp)) {
info.delta = diff;
if (unlikely(test_time_stamp(info.delta)))
rb_handle_timestamp(cpu_buffer, &info);
@@ -3445,14 +3480,13 @@ rb_update_read_stamp(struct ring_buffer_
return;
case RINGBUF_TYPE_TIME_EXTEND:
- delta = event->array[0];
- delta <<= TS_SHIFT;
- delta += event->time_delta;
+ delta = ring_buffer_event_time_stamp(event);
cpu_buffer->read_stamp += delta;
return;
case RINGBUF_TYPE_TIME_STAMP:
- /* FIXME: not implemented */
+ delta = ring_buffer_event_time_stamp(event);
+ cpu_buffer->read_stamp = delta;
return;
case RINGBUF_TYPE_DATA:
@@ -3476,14 +3510,13 @@ rb_update_iter_read_stamp(struct ring_bu
return;
case RINGBUF_TYPE_TIME_EXTEND:
- delta = event->array[0];
- delta <<= TS_SHIFT;
- delta += event->time_delta;
+ delta = ring_buffer_event_time_stamp(event);
iter->read_stamp += delta;
return;
case RINGBUF_TYPE_TIME_STAMP:
- /* FIXME: not implemented */
+ delta = ring_buffer_event_time_stamp(event);
+ iter->read_stamp = delta;
return;
case RINGBUF_TYPE_DATA:
@@ -3707,6 +3740,8 @@ rb_buffer_peek(struct ring_buffer_per_cp
struct buffer_page *reader;
int nr_loops = 0;
+ if (ts)
+ *ts = 0;
again:
/*
* We repeat when a time extend is encountered.
@@ -3743,12 +3778,17 @@ rb_buffer_peek(struct ring_buffer_per_cp
goto again;
case RINGBUF_TYPE_TIME_STAMP:
- /* FIXME: not implemented */
+ if (ts) {
+ *ts = ring_buffer_event_time_stamp(event);
+ ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
+ cpu_buffer->cpu, ts);
+ }
+ /* Internal data, OK to advance */
rb_advance_reader(cpu_buffer);
goto again;
case RINGBUF_TYPE_DATA:
- if (ts) {
+ if (ts && !(*ts)) {
*ts = cpu_buffer->read_stamp + event->time_delta;
ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
cpu_buffer->cpu, ts);
@@ -3773,6 +3813,9 @@ rb_iter_peek(struct ring_buffer_iter *it
struct ring_buffer_event *event;
int nr_loops = 0;
+ if (ts)
+ *ts = 0;
+
cpu_buffer = iter->cpu_buffer;
buffer = cpu_buffer->buffer;
@@ -3825,12 +3868,17 @@ rb_iter_peek(struct ring_buffer_iter *it
goto again;
case RINGBUF_TYPE_TIME_STAMP:
- /* FIXME: not implemented */
+ if (ts) {
+ *ts = ring_buffer_event_time_stamp(event);
+ ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
+ cpu_buffer->cpu, ts);
+ }
+ /* Internal data, OK to advance */
rb_advance_iter(iter);
goto again;
case RINGBUF_TYPE_DATA:
- if (ts) {
+ if (ts && !(*ts)) {
*ts = iter->read_stamp + event->time_delta;
ring_buffer_normalize_time_stamp(buffer,
cpu_buffer->cpu, ts);

View File

@ -0,0 +1,212 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 10 Mar 2018 23:15:52 -0500
Subject: [PATCH 15/17] make non-exchanging __d_move() copy ->d_parent rather
than swap them
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit 076515fc926793e162fc6525bed1679ef2bbf269
Currently d_move(from, to) does the following:
* name/parent of from <- old name/parent of to, from hashed there
* to is unhashed
* name of to is preserved
* if from used to be detached, to gets detached
* if from used to be attached, parent of to <- old parent of from.
That's both user-visibly bogus and complicates reasoning a lot.
Much saner semantics would be
* name/parent of from <- name/parent of to, from hashed there.
* to is unhashed
* name/parent of to is unchanged.
The price, of course, is that old parent of from might lose a reference.
However,
* all potentially cross-directory callers of d_move() have both
parents pinned directly; typically, dentries themselves are grabbed
only after we have grabbed and locked both parents. IOW, the decrement
of old parent's refcount in case of d_move() won't reach zero.
* __d_move() from d_splice_alias() is done to detached alias.
No refcount decrements in that case
* __d_move() from __d_unalias() *can* get the refcount to zero.
So let's grab a reference to alias' old parent before calling __d_unalias()
and dput() it after we'd dropped rename_lock.
That does make d_splice_alias() potentially blocking. However, it has
no callers in non-sleepable contexts (and the case where we'd grown
that dget/dput pair is _very_ rare, so performance is not an issue).
Another thing that needs adjustment is unlocking in the end of __d_move();
folded it in. And cleaned the remnants of bogus ordering from the
"lock them in the beginning" counterpart - it's never been right and
now (well, for 7 years now) we have that thing always serialized on
rename_lock anyway.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 93 +++++++++++++++++++-----------------------------------------
1 file changed, 30 insertions(+), 63 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -67,9 +67,7 @@
* dentry->d_lock
*
* If no ancestor relationship:
- * if (dentry1 < dentry2)
- * dentry1->d_lock
- * dentry2->d_lock
+ * arbitrary, since it's serialized on rename_lock
*/
int sysctl_vfs_cache_pressure __read_mostly = 100;
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
@@ -2777,9 +2775,6 @@ static void copy_name(struct dentry *den
static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
{
- /*
- * XXXX: do we really need to take target->d_lock?
- */
if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
spin_lock(&target->d_parent->d_lock);
else {
@@ -2793,40 +2788,11 @@ static void dentry_lock_for_move(struct
DENTRY_D_LOCK_NESTED);
}
}
- if (target < dentry) {
- spin_lock_nested(&target->d_lock, 2);
- spin_lock_nested(&dentry->d_lock, 3);
- } else {
- spin_lock_nested(&dentry->d_lock, 2);
- spin_lock_nested(&target->d_lock, 3);
- }
-}
-
-static void dentry_unlock_for_move(struct dentry *dentry, struct dentry *target)
-{
- if (target->d_parent != dentry->d_parent)
- spin_unlock(&dentry->d_parent->d_lock);
- if (target->d_parent != target)
- spin_unlock(&target->d_parent->d_lock);
- spin_unlock(&target->d_lock);
- spin_unlock(&dentry->d_lock);
+ spin_lock_nested(&dentry->d_lock, 2);
+ spin_lock_nested(&target->d_lock, 3);
}
/*
- * When switching names, the actual string doesn't strictly have to
- * be preserved in the target - because we're dropping the target
- * anyway. As such, we can just do a simple memcpy() to copy over
- * the new name before we switch, unless we are going to rehash
- * it. Note that if we *do* unhash the target, we are not allowed
- * to rehash it without giving it a new name/hash key - whether
- * we swap or overwrite the names here, resulting name won't match
- * the reality in filesystem; it's only there for d_path() purposes.
- * Note that all of this is happening under rename_lock, so the
- * any hash lookup seeing it in the middle of manipulations will
- * be discarded anyway. So we do not care what happens to the hash
- * key in that case.
- */
-/*
* __d_move - move a dentry
* @dentry: entry to move
* @target: new dentry
@@ -2840,6 +2806,7 @@ static void dentry_unlock_for_move(struc
static void __d_move(struct dentry *dentry, struct dentry *target,
bool exchange)
{
+ struct dentry *old_parent;
struct inode *dir = NULL;
unsigned n;
if (!dentry->d_inode)
@@ -2858,49 +2825,47 @@ static void __d_move(struct dentry *dent
write_seqcount_begin(&dentry->d_seq);
write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED);
+ old_parent = dentry->d_parent;
+
/* unhash both */
if (!d_unhashed(dentry))
___d_drop(dentry);
if (!d_unhashed(target))
___d_drop(target);
- /* Switch the names.. */
- if (exchange)
- swap_names(dentry, target);
- else
+ /* ... and switch them in the tree */
+ dentry->d_parent = target->d_parent;
+ if (!exchange) {
copy_name(dentry, target);
-
- /* rehash in new place(s) */
- __d_rehash(dentry);
- if (exchange)
- __d_rehash(target);
- else
target->d_hash.pprev = NULL;
-
- /* ... and switch them in the tree */
- if (IS_ROOT(dentry)) {
- /* splicing a tree */
- dentry->d_flags |= DCACHE_RCUACCESS;
- dentry->d_parent = target->d_parent;
- target->d_parent = target;
- list_del_init(&target->d_child);
- list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
+ dentry->d_parent->d_lockref.count++;
+ if (dentry == old_parent)
+ dentry->d_flags |= DCACHE_RCUACCESS;
+ else
+ WARN_ON(!--old_parent->d_lockref.count);
} else {
- /* swapping two dentries */
- swap(dentry->d_parent, target->d_parent);
+ target->d_parent = old_parent;
+ swap_names(dentry, target);
list_move(&target->d_child, &target->d_parent->d_subdirs);
- list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
- if (exchange)
- fsnotify_update_flags(target);
- fsnotify_update_flags(dentry);
+ __d_rehash(target);
+ fsnotify_update_flags(target);
}
+ list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
+ __d_rehash(dentry);
+ fsnotify_update_flags(dentry);
write_seqcount_end(&target->d_seq);
write_seqcount_end(&dentry->d_seq);
if (dir)
end_dir_add(dir, n);
- dentry_unlock_for_move(dentry, target);
+
+ if (dentry->d_parent != old_parent)
+ spin_unlock(&dentry->d_parent->d_lock);
+ if (dentry != old_parent)
+ spin_unlock(&old_parent->d_lock);
+ spin_unlock(&target->d_lock);
+ spin_unlock(&dentry->d_lock);
}
/*
@@ -3048,12 +3013,14 @@ struct dentry *d_splice_alias(struct ino
inode->i_sb->s_type->name,
inode->i_sb->s_id);
} else if (!IS_ROOT(new)) {
+ struct dentry *old_parent = dget(new->d_parent);
int err = __d_unalias(inode, dentry, new);
write_sequnlock(&rename_lock);
if (err) {
dput(new);
new = ERR_PTR(err);
}
+ dput(old_parent);
} else {
__d_move(new, dentry, false);
write_sequnlock(&rename_lock);

View File

@ -0,0 +1,138 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:41 -0600
Subject: [PATCH 15/48] tracing: Add timestamp_mode trace file
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Add a new option flag indicating whether or not the ring buffer is in
'absolute timestamp' mode.
Currently this is only set/unset by hist triggers that make use of a
common_timestamp. As such, there's no reason to make this writeable
for users - its purpose is only to allow users to determine
unequivocally whether or not the ring buffer is in that mode (although
absolute timestamps can coexist with the normal delta timestamps, when
the ring buffer is in absolute mode, timestamps written while absolute
mode is in effect take up more space in the buffer, and are not as
efficient).
Link: http://lkml.kernel.org/r/e8aa7b1cde1cf15014e66545d06ac6ef2ebba456.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 0eba34f9bf5b66217355a6a66054b3194aca123d)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
Documentation/trace/ftrace.txt | 24 ++++++++++++++++++++
kernel/trace/trace.c | 47 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 71 insertions(+)
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -539,6 +539,30 @@ After mounting tracefs you will have acc
See events.txt for more information.
+ timestamp_mode:
+
+ Certain tracers may change the timestamp mode used when
+ logging trace events into the event buffer. Events with
+ different modes can coexist within a buffer but the mode in
+ effect when an event is logged determines which timestamp mode
+ is used for that event. The default timestamp mode is
+ 'delta'.
+
+ Usual timestamp modes for tracing:
+
+ # cat timestamp_mode
+ [delta] absolute
+
+ The timestamp mode with the square brackets around it is the
+ one in effect.
+
+ delta: Default timestamp mode - timestamp is a delta against
+ a per-buffer timestamp.
+
+ absolute: The timestamp is a full timestamp, not a delta
+ against some other value. As such it takes up more
+ space and is less efficient.
+
hwlat_detector:
Directory for the Hardware Latency Detector.
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4515,6 +4515,9 @@ static const char readme_msg[] =
#ifdef CONFIG_X86_64
" x86-tsc: TSC cycle counter\n"
#endif
+ "\n timestamp_mode\t-view the mode used to timestamp events\n"
+ " delta: Delta difference against a buffer-wide timestamp\n"
+ " absolute: Absolute (standalone) timestamp\n"
"\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
"\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
" tracing_cpumask\t- Limit which CPUs to trace\n"
@@ -6282,6 +6285,40 @@ static int tracing_clock_open(struct ino
return ret;
}
+static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
+{
+ struct trace_array *tr = m->private;
+
+ mutex_lock(&trace_types_lock);
+
+ if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
+ seq_puts(m, "delta [absolute]\n");
+ else
+ seq_puts(m, "[delta] absolute\n");
+
+ mutex_unlock(&trace_types_lock);
+
+ return 0;
+}
+
+static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
+{
+ struct trace_array *tr = inode->i_private;
+ int ret;
+
+ if (tracing_disabled)
+ return -ENODEV;
+
+ if (trace_array_get(tr))
+ return -ENODEV;
+
+ ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
+ if (ret < 0)
+ trace_array_put(tr);
+
+ return ret;
+}
+
int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
{
int ret = 0;
@@ -6560,6 +6597,13 @@ static const struct file_operations trac
.write = tracing_clock_write,
};
+static const struct file_operations trace_time_stamp_mode_fops = {
+ .open = tracing_time_stamp_mode_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = tracing_single_release_tr,
+};
+
#ifdef CONFIG_TRACER_SNAPSHOT
static const struct file_operations snapshot_fops = {
.open = tracing_snapshot_open,
@@ -7882,6 +7926,9 @@ init_tracer_tracefs(struct trace_array *
trace_create_file("tracing_on", 0644, d_tracer,
tr, &rb_simple_fops);
+ trace_create_file("timestamp_mode", 0444, d_tracer, tr,
+ &trace_time_stamp_mode_fops);
+
create_trace_options_dir(tr);
#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)

View File

@ -0,0 +1,91 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 11 Mar 2018 15:15:46 -0400
Subject: [PATCH 16/17] fold dentry_lock_for_move() into its sole caller and
clean it up
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit 42177007aa277af3e37bf2ae3efdfe795c81d700
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 49 +++++++++++++++++++++++--------------------------
1 file changed, 23 insertions(+), 26 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2773,25 +2773,6 @@ static void copy_name(struct dentry *den
kfree_rcu(old_name, u.head);
}
-static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
-{
- if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
- spin_lock(&target->d_parent->d_lock);
- else {
- if (d_ancestor(dentry->d_parent, target->d_parent)) {
- spin_lock(&dentry->d_parent->d_lock);
- spin_lock_nested(&target->d_parent->d_lock,
- DENTRY_D_LOCK_NESTED);
- } else {
- spin_lock(&target->d_parent->d_lock);
- spin_lock_nested(&dentry->d_parent->d_lock,
- DENTRY_D_LOCK_NESTED);
- }
- }
- spin_lock_nested(&dentry->d_lock, 2);
- spin_lock_nested(&target->d_lock, 3);
-}
-
/*
* __d_move - move a dentry
* @dentry: entry to move
@@ -2806,16 +2787,34 @@ static void dentry_lock_for_move(struct
static void __d_move(struct dentry *dentry, struct dentry *target,
bool exchange)
{
- struct dentry *old_parent;
+ struct dentry *old_parent, *p;
struct inode *dir = NULL;
unsigned n;
- if (!dentry->d_inode)
- printk(KERN_WARNING "VFS: moving negative dcache entry\n");
- BUG_ON(d_ancestor(dentry, target));
+ WARN_ON(!dentry->d_inode);
+ if (WARN_ON(dentry == target))
+ return;
+
BUG_ON(d_ancestor(target, dentry));
+ old_parent = dentry->d_parent;
+ p = d_ancestor(old_parent, target);
+ if (IS_ROOT(dentry)) {
+ BUG_ON(p);
+ spin_lock(&target->d_parent->d_lock);
+ } else if (!p) {
+ /* target is not a descendent of dentry->d_parent */
+ spin_lock(&target->d_parent->d_lock);
+ spin_lock_nested(&old_parent->d_lock, DENTRY_D_LOCK_NESTED);
+ } else {
+ BUG_ON(p == dentry);
+ spin_lock(&old_parent->d_lock);
+ if (p != target)
+ spin_lock_nested(&target->d_parent->d_lock,
+ DENTRY_D_LOCK_NESTED);
+ }
+ spin_lock_nested(&dentry->d_lock, 2);
+ spin_lock_nested(&target->d_lock, 3);
- dentry_lock_for_move(dentry, target);
if (unlikely(d_in_lookup(target))) {
dir = target->d_parent->d_inode;
n = start_dir_add(dir);
@@ -2825,8 +2824,6 @@ static void __d_move(struct dentry *dent
write_seqcount_begin(&dentry->d_seq);
write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED);
- old_parent = dentry->d_parent;
-
/* unhash both */
if (!d_unhashed(dentry))
___d_drop(dentry);

View File

@ -0,0 +1,303 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:42 -0600
Subject: [PATCH 16/48] tracing: Give event triggers access to
ring_buffer_event
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
The ring_buffer event can provide a timestamp that may be useful to
various triggers - pass it into the handlers for that purpose.
Link: http://lkml.kernel.org/r/6de592683b59fa70ffa5d43d0109896623fc1367.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 373514437a6f75b5cfe890742b590f2c12f6c335)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/trace_events.h | 14 ++++++----
kernel/trace/trace.h | 9 +++---
kernel/trace/trace_events_hist.c | 11 +++++---
kernel/trace/trace_events_trigger.c | 47 ++++++++++++++++++++++--------------
4 files changed, 49 insertions(+), 32 deletions(-)
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -430,11 +430,13 @@ enum event_trigger_type {
extern int filter_match_preds(struct event_filter *filter, void *rec);
-extern enum event_trigger_type event_triggers_call(struct trace_event_file *file,
- void *rec);
-extern void event_triggers_post_call(struct trace_event_file *file,
- enum event_trigger_type tt,
- void *rec);
+extern enum event_trigger_type
+event_triggers_call(struct trace_event_file *file, void *rec,
+ struct ring_buffer_event *event);
+extern void
+event_triggers_post_call(struct trace_event_file *file,
+ enum event_trigger_type tt,
+ void *rec, struct ring_buffer_event *event);
bool trace_event_ignore_this_pid(struct trace_event_file *trace_file);
@@ -454,7 +456,7 @@ trace_trigger_soft_disabled(struct trace
if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) {
if (eflags & EVENT_FILE_FL_TRIGGER_MODE)
- event_triggers_call(file, NULL);
+ event_triggers_call(file, NULL, NULL);
if (eflags & EVENT_FILE_FL_SOFT_DISABLED)
return true;
if (eflags & EVENT_FILE_FL_PID_FILTER)
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1294,7 +1294,7 @@ static inline bool
unsigned long eflags = file->flags;
if (eflags & EVENT_FILE_FL_TRIGGER_COND)
- *tt = event_triggers_call(file, entry);
+ *tt = event_triggers_call(file, entry, event);
if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
(unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
@@ -1331,7 +1331,7 @@ event_trigger_unlock_commit(struct trace
trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc);
if (tt)
- event_triggers_post_call(file, tt, entry);
+ event_triggers_post_call(file, tt, entry, event);
}
/**
@@ -1364,7 +1364,7 @@ event_trigger_unlock_commit_regs(struct
irq_flags, pc, regs);
if (tt)
- event_triggers_post_call(file, tt, entry);
+ event_triggers_post_call(file, tt, entry, event);
}
#define FILTER_PRED_INVALID ((unsigned short)-1)
@@ -1589,7 +1589,8 @@ extern int register_trigger_hist_enable_
*/
struct event_trigger_ops {
void (*func)(struct event_trigger_data *data,
- void *rec);
+ void *rec,
+ struct ring_buffer_event *rbe);
int (*init)(struct event_trigger_ops *ops,
struct event_trigger_data *data);
void (*free)(struct event_trigger_ops *ops,
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -909,7 +909,8 @@ static inline void add_to_key(char *comp
memcpy(compound_key + key_field->offset, key, size);
}
-static void event_hist_trigger(struct event_trigger_data *data, void *rec)
+static void event_hist_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
struct hist_trigger_data *hist_data = data->private_data;
bool use_compound_key = (hist_data->n_keys > 1);
@@ -1658,7 +1659,8 @@ static struct event_command trigger_hist
}
static void
-hist_enable_trigger(struct event_trigger_data *data, void *rec)
+hist_enable_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
struct enable_trigger_data *enable_data = data->private_data;
struct event_trigger_data *test;
@@ -1674,7 +1676,8 @@ hist_enable_trigger(struct event_trigger
}
static void
-hist_enable_count_trigger(struct event_trigger_data *data, void *rec)
+hist_enable_count_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
if (!data->count)
return;
@@ -1682,7 +1685,7 @@ hist_enable_count_trigger(struct event_t
if (data->count != -1)
(data->count)--;
- hist_enable_trigger(data, rec);
+ hist_enable_trigger(data, rec, event);
}
static struct event_trigger_ops hist_enable_trigger_ops = {
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -63,7 +63,8 @@ void trigger_data_free(struct event_trig
* any trigger that should be deferred, ETT_NONE if nothing to defer.
*/
enum event_trigger_type
-event_triggers_call(struct trace_event_file *file, void *rec)
+event_triggers_call(struct trace_event_file *file, void *rec,
+ struct ring_buffer_event *event)
{
struct event_trigger_data *data;
enum event_trigger_type tt = ETT_NONE;
@@ -76,7 +77,7 @@ event_triggers_call(struct trace_event_f
if (data->paused)
continue;
if (!rec) {
- data->ops->func(data, rec);
+ data->ops->func(data, rec, event);
continue;
}
filter = rcu_dereference_sched(data->filter);
@@ -86,7 +87,7 @@ event_triggers_call(struct trace_event_f
tt |= data->cmd_ops->trigger_type;
continue;
}
- data->ops->func(data, rec);
+ data->ops->func(data, rec, event);
}
return tt;
}
@@ -108,7 +109,7 @@ EXPORT_SYMBOL_GPL(event_triggers_call);
void
event_triggers_post_call(struct trace_event_file *file,
enum event_trigger_type tt,
- void *rec)
+ void *rec, struct ring_buffer_event *event)
{
struct event_trigger_data *data;
@@ -116,7 +117,7 @@ event_triggers_post_call(struct trace_ev
if (data->paused)
continue;
if (data->cmd_ops->trigger_type & tt)
- data->ops->func(data, rec);
+ data->ops->func(data, rec, event);
}
}
EXPORT_SYMBOL_GPL(event_triggers_post_call);
@@ -909,7 +910,8 @@ void set_named_trigger_data(struct event
}
static void
-traceon_trigger(struct event_trigger_data *data, void *rec)
+traceon_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
if (tracing_is_on())
return;
@@ -918,7 +920,8 @@ traceon_trigger(struct event_trigger_dat
}
static void
-traceon_count_trigger(struct event_trigger_data *data, void *rec)
+traceon_count_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
if (tracing_is_on())
return;
@@ -933,7 +936,8 @@ traceon_count_trigger(struct event_trigg
}
static void
-traceoff_trigger(struct event_trigger_data *data, void *rec)
+traceoff_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
if (!tracing_is_on())
return;
@@ -942,7 +946,8 @@ traceoff_trigger(struct event_trigger_da
}
static void
-traceoff_count_trigger(struct event_trigger_data *data, void *rec)
+traceoff_count_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
if (!tracing_is_on())
return;
@@ -1039,13 +1044,15 @@ static struct event_command trigger_trac
#ifdef CONFIG_TRACER_SNAPSHOT
static void
-snapshot_trigger(struct event_trigger_data *data, void *rec)
+snapshot_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
tracing_snapshot();
}
static void
-snapshot_count_trigger(struct event_trigger_data *data, void *rec)
+snapshot_count_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
if (!data->count)
return;
@@ -1053,7 +1060,7 @@ snapshot_count_trigger(struct event_trig
if (data->count != -1)
(data->count)--;
- snapshot_trigger(data, rec);
+ snapshot_trigger(data, rec, event);
}
static int
@@ -1141,13 +1148,15 @@ static __init int register_trigger_snaps
#endif
static void
-stacktrace_trigger(struct event_trigger_data *data, void *rec)
+stacktrace_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
trace_dump_stack(STACK_SKIP);
}
static void
-stacktrace_count_trigger(struct event_trigger_data *data, void *rec)
+stacktrace_count_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
if (!data->count)
return;
@@ -1155,7 +1164,7 @@ stacktrace_count_trigger(struct event_tr
if (data->count != -1)
(data->count)--;
- stacktrace_trigger(data, rec);
+ stacktrace_trigger(data, rec, event);
}
static int
@@ -1217,7 +1226,8 @@ static __init void unregister_trigger_tr
}
static void
-event_enable_trigger(struct event_trigger_data *data, void *rec)
+event_enable_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
struct enable_trigger_data *enable_data = data->private_data;
@@ -1228,7 +1238,8 @@ event_enable_trigger(struct event_trigge
}
static void
-event_enable_count_trigger(struct event_trigger_data *data, void *rec)
+event_enable_count_trigger(struct event_trigger_data *data, void *rec,
+ struct ring_buffer_event *event)
{
struct enable_trigger_data *enable_data = data->private_data;
@@ -1242,7 +1253,7 @@ event_enable_count_trigger(struct event_
if (data->count != -1)
(data->count)--;
- event_enable_trigger(data, rec);
+ event_enable_trigger(data, rec, event);
}
int event_enable_trigger_print(struct seq_file *m,

View File

@ -0,0 +1,33 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 29 Mar 2018 15:08:21 -0400
Subject: [PATCH 17/17] d_genocide: move export to definition
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Upstream commit cbd4a5bcb25b5ed0c1c64bc969b893cad9b78acc
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
fs/dcache.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3095,6 +3095,8 @@ void d_genocide(struct dentry *parent)
d_walk(parent, parent, d_genocide_kill, NULL);
}
+EXPORT_SYMBOL(d_genocide);
+
void d_tmpfile(struct dentry *dentry, struct inode *inode)
{
inode_dec_link_count(inode);
@@ -3174,8 +3176,6 @@ static void __init dcache_init(void)
struct kmem_cache *names_cachep __read_mostly;
EXPORT_SYMBOL(names_cachep);
-EXPORT_SYMBOL(d_genocide);
-
void __init vfs_caches_init_early(void)
{
int i;

View File

@ -0,0 +1,144 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:43 -0600
Subject: [PATCH 17/48] tracing: Add ring buffer event param to hist field
functions
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Some events such as timestamps require access to a ring_buffer_event
struct; add a param so that hist field functions can access that.
Link: http://lkml.kernel.org/r/2ff4af18e72b6002eb86b26b2a7f39cef7d1dfe4.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit df7253a730d0aaef760d45ea234dc087ba7cac88)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 39 ++++++++++++++++++++++++---------------
1 file changed, 24 insertions(+), 15 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -26,7 +26,8 @@
struct hist_field;
-typedef u64 (*hist_field_fn_t) (struct hist_field *field, void *event);
+typedef u64 (*hist_field_fn_t) (struct hist_field *field, void *event,
+ struct ring_buffer_event *rbe);
#define HIST_FIELD_OPERANDS_MAX 2
@@ -40,24 +41,28 @@ struct hist_field {
struct hist_field *operands[HIST_FIELD_OPERANDS_MAX];
};
-static u64 hist_field_none(struct hist_field *field, void *event)
+static u64 hist_field_none(struct hist_field *field, void *event,
+ struct ring_buffer_event *rbe)
{
return 0;
}
-static u64 hist_field_counter(struct hist_field *field, void *event)
+static u64 hist_field_counter(struct hist_field *field, void *event,
+ struct ring_buffer_event *rbe)
{
return 1;
}
-static u64 hist_field_string(struct hist_field *hist_field, void *event)
+static u64 hist_field_string(struct hist_field *hist_field, void *event,
+ struct ring_buffer_event *rbe)
{
char *addr = (char *)(event + hist_field->field->offset);
return (u64)(unsigned long)addr;
}
-static u64 hist_field_dynstring(struct hist_field *hist_field, void *event)
+static u64 hist_field_dynstring(struct hist_field *hist_field, void *event,
+ struct ring_buffer_event *rbe)
{
u32 str_item = *(u32 *)(event + hist_field->field->offset);
int str_loc = str_item & 0xffff;
@@ -66,24 +71,28 @@ static u64 hist_field_dynstring(struct h
return (u64)(unsigned long)addr;
}
-static u64 hist_field_pstring(struct hist_field *hist_field, void *event)
+static u64 hist_field_pstring(struct hist_field *hist_field, void *event,
+ struct ring_buffer_event *rbe)
{
char **addr = (char **)(event + hist_field->field->offset);
return (u64)(unsigned long)*addr;
}
-static u64 hist_field_log2(struct hist_field *hist_field, void *event)
+static u64 hist_field_log2(struct hist_field *hist_field, void *event,
+ struct ring_buffer_event *rbe)
{
struct hist_field *operand = hist_field->operands[0];
- u64 val = operand->fn(operand, event);
+ u64 val = operand->fn(operand, event, rbe);
return (u64) ilog2(roundup_pow_of_two(val));
}
#define DEFINE_HIST_FIELD_FN(type) \
-static u64 hist_field_##type(struct hist_field *hist_field, void *event)\
+ static u64 hist_field_##type(struct hist_field *hist_field, \
+ void *event, \
+ struct ring_buffer_event *rbe) \
{ \
type *addr = (type *)(event + hist_field->field->offset); \
\
@@ -871,8 +880,8 @@ create_hist_data(unsigned int map_bits,
}
static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
- struct tracing_map_elt *elt,
- void *rec)
+ struct tracing_map_elt *elt, void *rec,
+ struct ring_buffer_event *rbe)
{
struct hist_field *hist_field;
unsigned int i;
@@ -880,7 +889,7 @@ static void hist_trigger_elt_update(stru
for_each_hist_val_field(i, hist_data) {
hist_field = hist_data->fields[i];
- hist_val = hist_field->fn(hist_field, rec);
+ hist_val = hist_field->fn(hist_field, rec, rbe);
tracing_map_update_sum(elt, i, hist_val);
}
}
@@ -910,7 +919,7 @@ static inline void add_to_key(char *comp
}
static void event_hist_trigger(struct event_trigger_data *data, void *rec,
- struct ring_buffer_event *event)
+ struct ring_buffer_event *rbe)
{
struct hist_trigger_data *hist_data = data->private_data;
bool use_compound_key = (hist_data->n_keys > 1);
@@ -939,7 +948,7 @@ static void event_hist_trigger(struct ev
key = entries;
} else {
- field_contents = key_field->fn(key_field, rec);
+ field_contents = key_field->fn(key_field, rec, rbe);
if (key_field->flags & HIST_FIELD_FL_STRING) {
key = (void *)(unsigned long)field_contents;
use_compound_key = true;
@@ -956,7 +965,7 @@ static void event_hist_trigger(struct ev
elt = tracing_map_insert(hist_data->map, key);
if (elt)
- hist_trigger_elt_update(hist_data, elt, rec);
+ hist_trigger_elt_update(hist_data, elt, rec, rbe);
}
static void hist_trigger_stacktrace_print(struct seq_file *m,

View File

@ -0,0 +1,113 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:44 -0600
Subject: [PATCH 18/48] tracing: Break out hist trigger assignment parsing
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
This will make it easier to add variables, and makes the parsing code
cleaner regardless.
Link: http://lkml.kernel.org/r/e574b3291bbe15e35a4dfc87e5395aa715701c98.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Rajvi Jingar <rajvi.jingar@intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 3c1e23def1291b21a2057f883ccc0456418dc5ad)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 72 +++++++++++++++++++++++++++------------
1 file changed, 51 insertions(+), 21 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -251,6 +251,51 @@ static void destroy_hist_trigger_attrs(s
kfree(attrs);
}
+static int parse_assignment(char *str, struct hist_trigger_attrs *attrs)
+{
+ int ret = 0;
+
+ if ((strncmp(str, "key=", strlen("key=")) == 0) ||
+ (strncmp(str, "keys=", strlen("keys=")) == 0)) {
+ attrs->keys_str = kstrdup(str, GFP_KERNEL);
+ if (!attrs->keys_str) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ } else if ((strncmp(str, "val=", strlen("val=")) == 0) ||
+ (strncmp(str, "vals=", strlen("vals=")) == 0) ||
+ (strncmp(str, "values=", strlen("values=")) == 0)) {
+ attrs->vals_str = kstrdup(str, GFP_KERNEL);
+ if (!attrs->vals_str) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ } else if (strncmp(str, "sort=", strlen("sort=")) == 0) {
+ attrs->sort_key_str = kstrdup(str, GFP_KERNEL);
+ if (!attrs->sort_key_str) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ } else if (strncmp(str, "name=", strlen("name=")) == 0) {
+ attrs->name = kstrdup(str, GFP_KERNEL);
+ if (!attrs->name) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ } else if (strncmp(str, "size=", strlen("size=")) == 0) {
+ int map_bits = parse_map_size(str);
+
+ if (map_bits < 0) {
+ ret = map_bits;
+ goto out;
+ }
+ attrs->map_bits = map_bits;
+ } else
+ ret = -EINVAL;
+ out:
+ return ret;
+}
+
static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str)
{
struct hist_trigger_attrs *attrs;
@@ -263,33 +308,18 @@ static struct hist_trigger_attrs *parse_
while (trigger_str) {
char *str = strsep(&trigger_str, ":");
- if ((strncmp(str, "key=", strlen("key=")) == 0) ||
- (strncmp(str, "keys=", strlen("keys=")) == 0))
- attrs->keys_str = kstrdup(str, GFP_KERNEL);
- else if ((strncmp(str, "val=", strlen("val=")) == 0) ||
- (strncmp(str, "vals=", strlen("vals=")) == 0) ||
- (strncmp(str, "values=", strlen("values=")) == 0))
- attrs->vals_str = kstrdup(str, GFP_KERNEL);
- else if (strncmp(str, "sort=", strlen("sort=")) == 0)
- attrs->sort_key_str = kstrdup(str, GFP_KERNEL);
- else if (strncmp(str, "name=", strlen("name=")) == 0)
- attrs->name = kstrdup(str, GFP_KERNEL);
- else if (strcmp(str, "pause") == 0)
+ if (strchr(str, '=')) {
+ ret = parse_assignment(str, attrs);
+ if (ret)
+ goto free;
+ } else if (strcmp(str, "pause") == 0)
attrs->pause = true;
else if ((strcmp(str, "cont") == 0) ||
(strcmp(str, "continue") == 0))
attrs->cont = true;
else if (strcmp(str, "clear") == 0)
attrs->clear = true;
- else if (strncmp(str, "size=", strlen("size=")) == 0) {
- int map_bits = parse_map_size(str);
-
- if (map_bits < 0) {
- ret = map_bits;
- goto free;
- }
- attrs->map_bits = map_bits;
- } else {
+ else {
ret = -EINVAL;
goto free;
}

View File

@ -0,0 +1,247 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:45 -0600
Subject: [PATCH 19/48] tracing: Add hist trigger timestamp support
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Add support for a timestamp event field. This is actually a 'pseudo-'
event field in that it behaves like it's part of the event record, but
is really part of the corresponding ring buffer event.
To make use of the timestamp field, users can specify
"common_timestamp" as a field name for any histogram. Note that this
doesn't make much sense on its own either as either a key or value,
but needs to be supported even so, since follow-on patches will add
support for making use of this field in time deltas. The
common_timestamp 'field' is not a bona fide event field - so you won't
find it in the event description - but rather it's a synthetic field
that can be used like a real field.
Note that the use of this field requires the ring buffer be put into
'absolute timestamp' mode, which saves the complete timestamp for each
event rather than an offset. This mode will be enabled if and only if
a histogram makes use of the "common_timestamp" field.
Link: http://lkml.kernel.org/r/97afbd646ed146e26271f3458b4b33e16d7817c2.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Baohong Liu <baohong.liu@intel.com>
[kasan use-after-free fix]
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 5d9d58b00ff82078deac8557c91359cd13c8959d)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 94 +++++++++++++++++++++++++++++----------
1 file changed, 71 insertions(+), 23 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -89,6 +89,12 @@ static u64 hist_field_log2(struct hist_f
return (u64) ilog2(roundup_pow_of_two(val));
}
+static u64 hist_field_timestamp(struct hist_field *hist_field, void *event,
+ struct ring_buffer_event *rbe)
+{
+ return ring_buffer_event_time_stamp(rbe);
+}
+
#define DEFINE_HIST_FIELD_FN(type) \
static u64 hist_field_##type(struct hist_field *hist_field, \
void *event, \
@@ -135,6 +141,7 @@ enum hist_field_flags {
HIST_FIELD_FL_SYSCALL = 1 << 7,
HIST_FIELD_FL_STACKTRACE = 1 << 8,
HIST_FIELD_FL_LOG2 = 1 << 9,
+ HIST_FIELD_FL_TIMESTAMP = 1 << 10,
};
struct hist_trigger_attrs {
@@ -159,6 +166,7 @@ struct hist_trigger_data {
struct trace_event_file *event_file;
struct hist_trigger_attrs *attrs;
struct tracing_map *map;
+ bool enable_timestamps;
};
static const char *hist_field_name(struct hist_field *field,
@@ -173,6 +181,8 @@ static const char *hist_field_name(struc
field_name = field->field->name;
else if (field->flags & HIST_FIELD_FL_LOG2)
field_name = hist_field_name(field->operands[0], ++level);
+ else if (field->flags & HIST_FIELD_FL_TIMESTAMP)
+ field_name = "common_timestamp";
if (field_name == NULL)
field_name = "";
@@ -440,6 +450,12 @@ static struct hist_field *create_hist_fi
goto out;
}
+ if (flags & HIST_FIELD_FL_TIMESTAMP) {
+ hist_field->fn = hist_field_timestamp;
+ hist_field->size = sizeof(u64);
+ goto out;
+ }
+
if (WARN_ON_ONCE(!field))
goto out;
@@ -517,10 +533,15 @@ static int create_val_field(struct hist_
}
}
- field = trace_find_event_field(file->event_call, field_name);
- if (!field || !field->size) {
- ret = -EINVAL;
- goto out;
+ if (strcmp(field_name, "common_timestamp") == 0) {
+ flags |= HIST_FIELD_FL_TIMESTAMP;
+ hist_data->enable_timestamps = true;
+ } else {
+ field = trace_find_event_field(file->event_call, field_name);
+ if (!field || !field->size) {
+ ret = -EINVAL;
+ goto out;
+ }
}
hist_data->fields[val_idx] = create_hist_field(field, flags);
@@ -615,16 +636,22 @@ static int create_key_field(struct hist_
}
}
- field = trace_find_event_field(file->event_call, field_name);
- if (!field || !field->size) {
- ret = -EINVAL;
- goto out;
- }
+ if (strcmp(field_name, "common_timestamp") == 0) {
+ flags |= HIST_FIELD_FL_TIMESTAMP;
+ hist_data->enable_timestamps = true;
+ key_size = sizeof(u64);
+ } else {
+ field = trace_find_event_field(file->event_call, field_name);
+ if (!field || !field->size) {
+ ret = -EINVAL;
+ goto out;
+ }
- if (is_string_field(field))
- key_size = MAX_FILTER_STR_VAL;
- else
- key_size = field->size;
+ if (is_string_field(field))
+ key_size = MAX_FILTER_STR_VAL;
+ else
+ key_size = field->size;
+ }
}
hist_data->fields[key_idx] = create_hist_field(field, flags);
@@ -820,6 +847,9 @@ static int create_tracing_map_fields(str
if (hist_field->flags & HIST_FIELD_FL_STACKTRACE)
cmp_fn = tracing_map_cmp_none;
+ else if (!field)
+ cmp_fn = tracing_map_cmp_num(hist_field->size,
+ hist_field->is_signed);
else if (is_string_field(field))
cmp_fn = tracing_map_cmp_string;
else
@@ -1215,7 +1245,11 @@ static void hist_field_print(struct seq_
{
const char *field_name = hist_field_name(hist_field, 0);
- seq_printf(m, "%s", field_name);
+ if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP)
+ seq_puts(m, "common_timestamp");
+ else if (field_name)
+ seq_printf(m, "%s", field_name);
+
if (hist_field->flags) {
const char *flags_str = get_hist_field_flags(hist_field);
@@ -1266,27 +1300,25 @@ static int event_hist_trigger_print(stru
for (i = 0; i < hist_data->n_sort_keys; i++) {
struct tracing_map_sort_key *sort_key;
+ unsigned int idx;
sort_key = &hist_data->sort_keys[i];
+ idx = sort_key->field_idx;
+
+ if (WARN_ON(idx >= TRACING_MAP_FIELDS_MAX))
+ return -EINVAL;
if (i > 0)
seq_puts(m, ",");
- if (sort_key->field_idx == HITCOUNT_IDX)
+ if (idx == HITCOUNT_IDX)
seq_puts(m, "hitcount");
- else {
- unsigned int idx = sort_key->field_idx;
-
- if (WARN_ON(idx >= TRACING_MAP_FIELDS_MAX))
- return -EINVAL;
-
+ else
hist_field_print(m, hist_data->fields[idx]);
- }
if (sort_key->descending)
seq_puts(m, ".descending");
}
-
seq_printf(m, ":size=%u", (1 << hist_data->map->map_bits));
if (data->filter_str)
@@ -1454,6 +1486,10 @@ static bool hist_trigger_match(struct ev
return false;
if (key_field->offset != key_field_test->offset)
return false;
+ if (key_field->size != key_field_test->size)
+ return false;
+ if (key_field->is_signed != key_field_test->is_signed)
+ return false;
}
for (i = 0; i < hist_data->n_sort_keys; i++) {
@@ -1536,6 +1572,9 @@ static int hist_register_trigger(char *g
update_cond_flag(file);
+ if (hist_data->enable_timestamps)
+ tracing_set_time_stamp_abs(file->tr, true);
+
if (trace_event_trigger_enable_disable(file, 1) < 0) {
list_del_rcu(&data->list);
update_cond_flag(file);
@@ -1570,17 +1609,26 @@ static void hist_unregister_trigger(char
if (unregistered && test->ops->free)
test->ops->free(test->ops, test);
+
+ if (hist_data->enable_timestamps) {
+ if (unregistered)
+ tracing_set_time_stamp_abs(file->tr, false);
+ }
}
static void hist_unreg_all(struct trace_event_file *file)
{
struct event_trigger_data *test, *n;
+ struct hist_trigger_data *hist_data;
list_for_each_entry_safe(test, n, &file->triggers, list) {
if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+ hist_data = test->private_data;
list_del_rcu(&test->list);
trace_event_trigger_enable_disable(file, 0);
update_cond_flag(file);
+ if (hist_data->enable_timestamps)
+ tracing_set_time_stamp_abs(file->tr, false);
if (test->ops->free)
test->ops->free(test->ops, test);
}

View File

@ -0,0 +1,225 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:46 -0600
Subject: [PATCH 20/48] tracing: Add per-element variable support to
tracing_map
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
In order to allow information to be passed between trace events, add
support for per-element variables to tracing_map. This provides a
means for histograms to associate a value or values with an entry when
it's saved or updated, and retrieved by a subsequent event occurrences.
Variables can be set using tracing_map_set_var() and read using
tracing_map_read_var(). tracing_map_var_set() returns true or false
depending on whether or not the variable has been set or not, which is
important for event-matching applications.
tracing_map_read_var_once() reads the variable and resets it to the
'unset' state, implementing read-once variables, which are also
important for event-matching uses.
Link: http://lkml.kernel.org/r/7fa001108252556f0c6dd9d63145eabfe3370d1a.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 42a38132f9e154e1fa2dd2182dff17f9c0e7ee7e)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/tracing_map.c | 108 +++++++++++++++++++++++++++++++++++++++++++++
kernel/trace/tracing_map.h | 11 ++++
2 files changed, 119 insertions(+)
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -66,6 +66,73 @@ u64 tracing_map_read_sum(struct tracing_
return (u64)atomic64_read(&elt->fields[i].sum);
}
+/**
+ * tracing_map_set_var - Assign a tracing_map_elt's variable field
+ * @elt: The tracing_map_elt
+ * @i: The index of the given variable associated with the tracing_map_elt
+ * @n: The value to assign
+ *
+ * Assign n to variable i associated with the specified tracing_map_elt
+ * instance. The index i is the index returned by the call to
+ * tracing_map_add_var() when the tracing map was set up.
+ */
+void tracing_map_set_var(struct tracing_map_elt *elt, unsigned int i, u64 n)
+{
+ atomic64_set(&elt->vars[i], n);
+ elt->var_set[i] = true;
+}
+
+/**
+ * tracing_map_var_set - Return whether or not a variable has been set
+ * @elt: The tracing_map_elt
+ * @i: The index of the given variable associated with the tracing_map_elt
+ *
+ * Return true if the variable has been set, false otherwise. The
+ * index i is the index returned by the call to tracing_map_add_var()
+ * when the tracing map was set up.
+ */
+bool tracing_map_var_set(struct tracing_map_elt *elt, unsigned int i)
+{
+ return elt->var_set[i];
+}
+
+/**
+ * tracing_map_read_var - Return the value of a tracing_map_elt's variable field
+ * @elt: The tracing_map_elt
+ * @i: The index of the given variable associated with the tracing_map_elt
+ *
+ * Retrieve the value of the variable i associated with the specified
+ * tracing_map_elt instance. The index i is the index returned by the
+ * call to tracing_map_add_var() when the tracing map was set
+ * up.
+ *
+ * Return: The variable value associated with field i for elt.
+ */
+u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i)
+{
+ return (u64)atomic64_read(&elt->vars[i]);
+}
+
+/**
+ * tracing_map_read_var_once - Return and reset a tracing_map_elt's variable field
+ * @elt: The tracing_map_elt
+ * @i: The index of the given variable associated with the tracing_map_elt
+ *
+ * Retrieve the value of the variable i associated with the specified
+ * tracing_map_elt instance, and reset the variable to the 'not set'
+ * state. The index i is the index returned by the call to
+ * tracing_map_add_var() when the tracing map was set up. The reset
+ * essentially makes the variable a read-once variable if it's only
+ * accessed using this function.
+ *
+ * Return: The variable value associated with field i for elt.
+ */
+u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i)
+{
+ elt->var_set[i] = false;
+ return (u64)atomic64_read(&elt->vars[i]);
+}
+
int tracing_map_cmp_string(void *val_a, void *val_b)
{
char *a = val_a;
@@ -171,6 +238,28 @@ int tracing_map_add_sum_field(struct tra
}
/**
+ * tracing_map_add_var - Add a field describing a tracing_map var
+ * @map: The tracing_map
+ *
+ * Add a var to the map and return the index identifying it in the map
+ * and associated tracing_map_elts. This is the index used for
+ * instance to update a var for a particular tracing_map_elt using
+ * tracing_map_update_var() or reading it via tracing_map_read_var().
+ *
+ * Return: The index identifying the var in the map and associated
+ * tracing_map_elts, or -EINVAL on error.
+ */
+int tracing_map_add_var(struct tracing_map *map)
+{
+ int ret = -EINVAL;
+
+ if (map->n_vars < TRACING_MAP_VARS_MAX)
+ ret = map->n_vars++;
+
+ return ret;
+}
+
+/**
* tracing_map_add_key_field - Add a field describing a tracing_map key
* @map: The tracing_map
* @offset: The offset within the key
@@ -280,6 +369,11 @@ static void tracing_map_elt_clear(struct
if (elt->fields[i].cmp_fn == tracing_map_cmp_atomic64)
atomic64_set(&elt->fields[i].sum, 0);
+ for (i = 0; i < elt->map->n_vars; i++) {
+ atomic64_set(&elt->vars[i], 0);
+ elt->var_set[i] = false;
+ }
+
if (elt->map->ops && elt->map->ops->elt_clear)
elt->map->ops->elt_clear(elt);
}
@@ -306,6 +400,8 @@ static void tracing_map_elt_free(struct
if (elt->map->ops && elt->map->ops->elt_free)
elt->map->ops->elt_free(elt);
kfree(elt->fields);
+ kfree(elt->vars);
+ kfree(elt->var_set);
kfree(elt->key);
kfree(elt);
}
@@ -332,6 +428,18 @@ static struct tracing_map_elt *tracing_m
err = -ENOMEM;
goto free;
}
+
+ elt->vars = kcalloc(map->n_vars, sizeof(*elt->vars), GFP_KERNEL);
+ if (!elt->vars) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ elt->var_set = kcalloc(map->n_vars, sizeof(*elt->var_set), GFP_KERNEL);
+ if (!elt->var_set) {
+ err = -ENOMEM;
+ goto free;
+ }
tracing_map_elt_init_fields(elt);
--- a/kernel/trace/tracing_map.h
+++ b/kernel/trace/tracing_map.h
@@ -10,6 +10,7 @@
#define TRACING_MAP_VALS_MAX 3
#define TRACING_MAP_FIELDS_MAX (TRACING_MAP_KEYS_MAX + \
TRACING_MAP_VALS_MAX)
+#define TRACING_MAP_VARS_MAX 16
#define TRACING_MAP_SORT_KEYS_MAX 2
typedef int (*tracing_map_cmp_fn_t) (void *val_a, void *val_b);
@@ -137,6 +138,8 @@ struct tracing_map_field {
struct tracing_map_elt {
struct tracing_map *map;
struct tracing_map_field *fields;
+ atomic64_t *vars;
+ bool *var_set;
void *key;
void *private_data;
};
@@ -192,6 +195,7 @@ struct tracing_map {
int key_idx[TRACING_MAP_KEYS_MAX];
unsigned int n_keys;
struct tracing_map_sort_key sort_key;
+ unsigned int n_vars;
atomic64_t hits;
atomic64_t drops;
};
@@ -241,6 +245,7 @@ tracing_map_create(unsigned int map_bits
extern int tracing_map_init(struct tracing_map *map);
extern int tracing_map_add_sum_field(struct tracing_map *map);
+extern int tracing_map_add_var(struct tracing_map *map);
extern int tracing_map_add_key_field(struct tracing_map *map,
unsigned int offset,
tracing_map_cmp_fn_t cmp_fn);
@@ -260,7 +265,13 @@ extern int tracing_map_cmp_none(void *va
extern void tracing_map_update_sum(struct tracing_map_elt *elt,
unsigned int i, u64 n);
+extern void tracing_map_set_var(struct tracing_map_elt *elt,
+ unsigned int i, u64 n);
+extern bool tracing_map_var_set(struct tracing_map_elt *elt, unsigned int i);
extern u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i);
+extern u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i);
+extern u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i);
+
extern void tracing_map_set_field_descr(struct tracing_map *map,
unsigned int i,
unsigned int key_offset,

View File

@ -0,0 +1,83 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:47 -0600
Subject: [PATCH 21/48] tracing: Add hist_data member to hist_field
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Allow hist_data access via hist_field. Some users of hist_fields
require or will require more access to the associated hist_data.
Link: http://lkml.kernel.org/r/d04cd0768f5228ebb4ac0ba4a847bc4d14d4826f.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 14ab3edac407939009700c04215935576250e969)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -39,6 +39,7 @@ struct hist_field {
unsigned int offset;
unsigned int is_signed;
struct hist_field *operands[HIST_FIELD_OPERANDS_MAX];
+ struct hist_trigger_data *hist_data;
};
static u64 hist_field_none(struct hist_field *field, void *event,
@@ -420,7 +421,8 @@ static void destroy_hist_field(struct hi
kfree(hist_field);
}
-static struct hist_field *create_hist_field(struct ftrace_event_field *field,
+static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
+ struct ftrace_event_field *field,
unsigned long flags)
{
struct hist_field *hist_field;
@@ -432,6 +434,8 @@ static struct hist_field *create_hist_fi
if (!hist_field)
return NULL;
+ hist_field->hist_data = hist_data;
+
if (flags & HIST_FIELD_FL_HITCOUNT) {
hist_field->fn = hist_field_counter;
goto out;
@@ -445,7 +449,7 @@ static struct hist_field *create_hist_fi
if (flags & HIST_FIELD_FL_LOG2) {
unsigned long fl = flags & ~HIST_FIELD_FL_LOG2;
hist_field->fn = hist_field_log2;
- hist_field->operands[0] = create_hist_field(field, fl);
+ hist_field->operands[0] = create_hist_field(hist_data, field, fl);
hist_field->size = hist_field->operands[0]->size;
goto out;
}
@@ -498,7 +502,7 @@ static void destroy_hist_fields(struct h
static int create_hitcount_val(struct hist_trigger_data *hist_data)
{
hist_data->fields[HITCOUNT_IDX] =
- create_hist_field(NULL, HIST_FIELD_FL_HITCOUNT);
+ create_hist_field(hist_data, NULL, HIST_FIELD_FL_HITCOUNT);
if (!hist_data->fields[HITCOUNT_IDX])
return -ENOMEM;
@@ -544,7 +548,7 @@ static int create_val_field(struct hist_
}
}
- hist_data->fields[val_idx] = create_hist_field(field, flags);
+ hist_data->fields[val_idx] = create_hist_field(hist_data, field, flags);
if (!hist_data->fields[val_idx]) {
ret = -ENOMEM;
goto out;
@@ -654,7 +658,7 @@ static int create_key_field(struct hist_
}
}
- hist_data->fields[key_idx] = create_hist_field(field, flags);
+ hist_data->fields[key_idx] = create_hist_field(hist_data, field, flags);
if (!hist_data->fields[key_idx]) {
ret = -ENOMEM;
goto out;

View File

@ -0,0 +1,158 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:48 -0600
Subject: [PATCH 22/48] tracing: Add usecs modifier for hist trigger timestamps
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Appending .usecs onto a common_timestamp field will cause the
timestamp value to be in microseconds instead of the default
nanoseconds. A typical latency histogram using usecs would look like
this:
# echo 'hist:keys=pid,prio:ts0=common_timestamp.usecs ...
# echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0 ...
This also adds an external trace_clock_in_ns() to trace.c for the
timestamp conversion.
Link: http://lkml.kernel.org/r/4e813705a170b3e13e97dc3135047362fb1a39f3.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 4fa4fdb0fe5d0e87e05b0c5b443cec2269ec0609)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
Documentation/trace/histogram.txt | 1 +
kernel/trace/trace.c | 13 +++++++++++--
kernel/trace/trace.h | 2 ++
kernel/trace/trace_events_hist.c | 28 ++++++++++++++++++++++------
4 files changed, 36 insertions(+), 8 deletions(-)
--- a/Documentation/trace/histogram.txt
+++ b/Documentation/trace/histogram.txt
@@ -74,6 +74,7 @@
.syscall display a syscall id as a system call name
.execname display a common_pid as a program name
.log2 display log2 value rather than raw number
+ .usecs display a common_timestamp in microseconds
Note that in general the semantics of a given field aren't
interpreted when applying a modifier to it, but there are some
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1168,6 +1168,14 @@ static struct {
ARCH_TRACE_CLOCKS
};
+bool trace_clock_in_ns(struct trace_array *tr)
+{
+ if (trace_clocks[tr->clock_id].in_ns)
+ return true;
+
+ return false;
+}
+
/*
* trace_parser_get_init - gets the buffer for trace parser
*/
@@ -4694,8 +4702,9 @@ static const char readme_msg[] =
"\t .sym display an address as a symbol\n"
"\t .sym-offset display an address as a symbol and offset\n"
"\t .execname display a common_pid as a program name\n"
- "\t .syscall display a syscall id as a syscall name\n\n"
- "\t .log2 display log2 value rather than raw number\n\n"
+ "\t .syscall display a syscall id as a syscall name\n"
+ "\t .log2 display log2 value rather than raw number\n"
+ "\t .usecs display a common_timestamp in microseconds\n\n"
"\t The 'pause' parameter can be used to pause an existing hist\n"
"\t trigger or to start a hist trigger but not log any events\n"
"\t until told to do so. 'continue' can be used to start or\n"
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -289,6 +289,8 @@ extern void trace_array_put(struct trace
extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs);
+extern bool trace_clock_in_ns(struct trace_array *tr);
+
/*
* The global tracer (top) should be the first trace array added,
* but we check the flag anyway.
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -90,12 +90,6 @@ static u64 hist_field_log2(struct hist_f
return (u64) ilog2(roundup_pow_of_two(val));
}
-static u64 hist_field_timestamp(struct hist_field *hist_field, void *event,
- struct ring_buffer_event *rbe)
-{
- return ring_buffer_event_time_stamp(rbe);
-}
-
#define DEFINE_HIST_FIELD_FN(type) \
static u64 hist_field_##type(struct hist_field *hist_field, \
void *event, \
@@ -143,6 +137,7 @@ enum hist_field_flags {
HIST_FIELD_FL_STACKTRACE = 1 << 8,
HIST_FIELD_FL_LOG2 = 1 << 9,
HIST_FIELD_FL_TIMESTAMP = 1 << 10,
+ HIST_FIELD_FL_TIMESTAMP_USECS = 1 << 11,
};
struct hist_trigger_attrs {
@@ -153,6 +148,7 @@ struct hist_trigger_attrs {
bool pause;
bool cont;
bool clear;
+ bool ts_in_usecs;
unsigned int map_bits;
};
@@ -170,6 +166,20 @@ struct hist_trigger_data {
bool enable_timestamps;
};
+static u64 hist_field_timestamp(struct hist_field *hist_field, void *event,
+ struct ring_buffer_event *rbe)
+{
+ struct hist_trigger_data *hist_data = hist_field->hist_data;
+ struct trace_array *tr = hist_data->event_file->tr;
+
+ u64 ts = ring_buffer_event_time_stamp(rbe);
+
+ if (hist_data->attrs->ts_in_usecs && trace_clock_in_ns(tr))
+ ts = ns2usecs(ts);
+
+ return ts;
+}
+
static const char *hist_field_name(struct hist_field *field,
unsigned int level)
{
@@ -634,6 +644,8 @@ static int create_key_field(struct hist_
flags |= HIST_FIELD_FL_SYSCALL;
else if (strcmp(field_str, "log2") == 0)
flags |= HIST_FIELD_FL_LOG2;
+ else if (strcmp(field_str, "usecs") == 0)
+ flags |= HIST_FIELD_FL_TIMESTAMP_USECS;
else {
ret = -EINVAL;
goto out;
@@ -643,6 +655,8 @@ static int create_key_field(struct hist_
if (strcmp(field_name, "common_timestamp") == 0) {
flags |= HIST_FIELD_FL_TIMESTAMP;
hist_data->enable_timestamps = true;
+ if (flags & HIST_FIELD_FL_TIMESTAMP_USECS)
+ hist_data->attrs->ts_in_usecs = true;
key_size = sizeof(u64);
} else {
field = trace_find_event_field(file->event_call, field_name);
@@ -1241,6 +1255,8 @@ static const char *get_hist_field_flags(
flags_str = "syscall";
else if (hist_field->flags & HIST_FIELD_FL_LOG2)
flags_str = "log2";
+ else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS)
+ flags_str = "usecs";
return flags_str;
}

View File

@ -0,0 +1,783 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:49 -0600
Subject: [PATCH 23/48] tracing: Add variable support to hist triggers
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Add support for saving the value of a current event's event field by
assigning it to a variable that can be read by a subsequent event.
The basic syntax for saving a variable is to simply prefix a unique
variable name not corresponding to any keyword along with an '=' sign
to any event field.
Both keys and values can be saved and retrieved in this way:
# echo 'hist:keys=next_pid:vals=$ts0:ts0=common_timestamp ...
# echo 'hist:timer_pid=common_pid:key=$timer_pid ...'
If a variable isn't a key variable or prefixed with 'vals=', the
associated event field will be saved in a variable but won't be summed
as a value:
# echo 'hist:keys=next_pid:ts1=common_timestamp:...
Multiple variables can be assigned at the same time:
# echo 'hist:keys=pid:vals=$ts0,$b,field2:ts0=common_timestamp,b=field1 ...
Multiple (or single) variables can also be assigned at the same time
using separate assignments:
# echo 'hist:keys=pid:vals=$ts0:ts0=common_timestamp:b=field1:c=field2 ...
Variables set as above can be used by being referenced from another
event, as described in a subsequent patch.
Link: http://lkml.kernel.org/r/fc93c4944d9719dbcb1d0067be627d44e98e2adc.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Baohong Liu <baohong.liu@intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit b073711690e3af61965e53f197a56638b3c65a81)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 370 ++++++++++++++++++++++++++++++++++-----
1 file changed, 331 insertions(+), 39 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -30,6 +30,13 @@ typedef u64 (*hist_field_fn_t) (struct h
struct ring_buffer_event *rbe);
#define HIST_FIELD_OPERANDS_MAX 2
+#define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX)
+
+struct hist_var {
+ char *name;
+ struct hist_trigger_data *hist_data;
+ unsigned int idx;
+};
struct hist_field {
struct ftrace_event_field *field;
@@ -40,6 +47,7 @@ struct hist_field {
unsigned int is_signed;
struct hist_field *operands[HIST_FIELD_OPERANDS_MAX];
struct hist_trigger_data *hist_data;
+ struct hist_var var;
};
static u64 hist_field_none(struct hist_field *field, void *event,
@@ -138,6 +146,13 @@ enum hist_field_flags {
HIST_FIELD_FL_LOG2 = 1 << 9,
HIST_FIELD_FL_TIMESTAMP = 1 << 10,
HIST_FIELD_FL_TIMESTAMP_USECS = 1 << 11,
+ HIST_FIELD_FL_VAR = 1 << 12,
+};
+
+struct var_defs {
+ unsigned int n_vars;
+ char *name[TRACING_MAP_VARS_MAX];
+ char *expr[TRACING_MAP_VARS_MAX];
};
struct hist_trigger_attrs {
@@ -150,13 +165,19 @@ struct hist_trigger_attrs {
bool clear;
bool ts_in_usecs;
unsigned int map_bits;
+
+ char *assignment_str[TRACING_MAP_VARS_MAX];
+ unsigned int n_assignments;
+
+ struct var_defs var_defs;
};
struct hist_trigger_data {
- struct hist_field *fields[TRACING_MAP_FIELDS_MAX];
+ struct hist_field *fields[HIST_FIELDS_MAX];
unsigned int n_vals;
unsigned int n_keys;
unsigned int n_fields;
+ unsigned int n_vars;
unsigned int key_size;
struct tracing_map_sort_key sort_keys[TRACING_MAP_SORT_KEYS_MAX];
unsigned int n_sort_keys;
@@ -164,6 +185,7 @@ struct hist_trigger_data {
struct hist_trigger_attrs *attrs;
struct tracing_map *map;
bool enable_timestamps;
+ bool remove;
};
static u64 hist_field_timestamp(struct hist_field *hist_field, void *event,
@@ -180,6 +202,48 @@ static u64 hist_field_timestamp(struct h
return ts;
}
+static struct hist_field *find_var_field(struct hist_trigger_data *hist_data,
+ const char *var_name)
+{
+ struct hist_field *hist_field, *found = NULL;
+ int i;
+
+ for_each_hist_field(i, hist_data) {
+ hist_field = hist_data->fields[i];
+ if (hist_field && hist_field->flags & HIST_FIELD_FL_VAR &&
+ strcmp(hist_field->var.name, var_name) == 0) {
+ found = hist_field;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static struct hist_field *find_var(struct hist_trigger_data *hist_data,
+ struct trace_event_file *file,
+ const char *var_name)
+{
+ struct hist_trigger_data *test_data;
+ struct event_trigger_data *test;
+ struct hist_field *hist_field;
+
+ hist_field = find_var_field(hist_data, var_name);
+ if (hist_field)
+ return hist_field;
+
+ list_for_each_entry_rcu(test, &file->triggers, list) {
+ if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+ test_data = test->private_data;
+ hist_field = find_var_field(test_data, var_name);
+ if (hist_field)
+ return hist_field;
+ }
+ }
+
+ return NULL;
+}
+
static const char *hist_field_name(struct hist_field *field,
unsigned int level)
{
@@ -262,9 +326,14 @@ static int parse_map_size(char *str)
static void destroy_hist_trigger_attrs(struct hist_trigger_attrs *attrs)
{
+ unsigned int i;
+
if (!attrs)
return;
+ for (i = 0; i < attrs->n_assignments; i++)
+ kfree(attrs->assignment_str[i]);
+
kfree(attrs->name);
kfree(attrs->sort_key_str);
kfree(attrs->keys_str);
@@ -311,8 +380,22 @@ static int parse_assignment(char *str, s
goto out;
}
attrs->map_bits = map_bits;
- } else
- ret = -EINVAL;
+ } else {
+ char *assignment;
+
+ if (attrs->n_assignments == TRACING_MAP_VARS_MAX) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ assignment = kstrdup(str, GFP_KERNEL);
+ if (!assignment) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ attrs->assignment_str[attrs->n_assignments++] = assignment;
+ }
out:
return ret;
}
@@ -428,12 +511,15 @@ static void destroy_hist_field(struct hi
for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++)
destroy_hist_field(hist_field->operands[i], level + 1);
+ kfree(hist_field->var.name);
+
kfree(hist_field);
}
static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
struct ftrace_event_field *field,
- unsigned long flags)
+ unsigned long flags,
+ char *var_name)
{
struct hist_field *hist_field;
@@ -459,7 +545,7 @@ static struct hist_field *create_hist_fi
if (flags & HIST_FIELD_FL_LOG2) {
unsigned long fl = flags & ~HIST_FIELD_FL_LOG2;
hist_field->fn = hist_field_log2;
- hist_field->operands[0] = create_hist_field(hist_data, field, fl);
+ hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL);
hist_field->size = hist_field->operands[0]->size;
goto out;
}
@@ -494,14 +580,23 @@ static struct hist_field *create_hist_fi
hist_field->field = field;
hist_field->flags = flags;
+ if (var_name) {
+ hist_field->var.name = kstrdup(var_name, GFP_KERNEL);
+ if (!hist_field->var.name)
+ goto free;
+ }
+
return hist_field;
+ free:
+ destroy_hist_field(hist_field, 0);
+ return NULL;
}
static void destroy_hist_fields(struct hist_trigger_data *hist_data)
{
unsigned int i;
- for (i = 0; i < TRACING_MAP_FIELDS_MAX; i++) {
+ for (i = 0; i < HIST_FIELDS_MAX; i++) {
if (hist_data->fields[i]) {
destroy_hist_field(hist_data->fields[i], 0);
hist_data->fields[i] = NULL;
@@ -512,11 +607,12 @@ static void destroy_hist_fields(struct h
static int create_hitcount_val(struct hist_trigger_data *hist_data)
{
hist_data->fields[HITCOUNT_IDX] =
- create_hist_field(hist_data, NULL, HIST_FIELD_FL_HITCOUNT);
+ create_hist_field(hist_data, NULL, HIST_FIELD_FL_HITCOUNT, NULL);
if (!hist_data->fields[HITCOUNT_IDX])
return -ENOMEM;
hist_data->n_vals++;
+ hist_data->n_fields++;
if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX))
return -EINVAL;
@@ -524,19 +620,16 @@ static int create_hitcount_val(struct hi
return 0;
}
-static int create_val_field(struct hist_trigger_data *hist_data,
- unsigned int val_idx,
- struct trace_event_file *file,
- char *field_str)
+static int __create_val_field(struct hist_trigger_data *hist_data,
+ unsigned int val_idx,
+ struct trace_event_file *file,
+ char *var_name, char *field_str,
+ unsigned long flags)
{
struct ftrace_event_field *field = NULL;
- unsigned long flags = 0;
char *field_name;
int ret = 0;
- if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX))
- return -EINVAL;
-
field_name = strsep(&field_str, ".");
if (field_str) {
if (strcmp(field_str, "hex") == 0)
@@ -558,25 +651,58 @@ static int create_val_field(struct hist_
}
}
- hist_data->fields[val_idx] = create_hist_field(hist_data, field, flags);
+ hist_data->fields[val_idx] = create_hist_field(hist_data, field, flags, var_name);
if (!hist_data->fields[val_idx]) {
ret = -ENOMEM;
goto out;
}
++hist_data->n_vals;
+ ++hist_data->n_fields;
- if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX))
+ if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX))
ret = -EINVAL;
out:
return ret;
}
+static int create_val_field(struct hist_trigger_data *hist_data,
+ unsigned int val_idx,
+ struct trace_event_file *file,
+ char *field_str)
+{
+ if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX))
+ return -EINVAL;
+
+ return __create_val_field(hist_data, val_idx, file, NULL, field_str, 0);
+}
+
+static int create_var_field(struct hist_trigger_data *hist_data,
+ unsigned int val_idx,
+ struct trace_event_file *file,
+ char *var_name, char *expr_str)
+{
+ unsigned long flags = 0;
+
+ if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX))
+ return -EINVAL;
+ if (find_var(hist_data, file, var_name) && !hist_data->remove) {
+ return -EINVAL;
+ }
+
+ flags |= HIST_FIELD_FL_VAR;
+ hist_data->n_vars++;
+ if (WARN_ON(hist_data->n_vars > TRACING_MAP_VARS_MAX))
+ return -EINVAL;
+
+ return __create_val_field(hist_data, val_idx, file, var_name, expr_str, flags);
+}
+
static int create_val_fields(struct hist_trigger_data *hist_data,
struct trace_event_file *file)
{
char *fields_str, *field_str;
- unsigned int i, j;
+ unsigned int i, j = 1;
int ret;
ret = create_hitcount_val(hist_data);
@@ -596,12 +722,15 @@ static int create_val_fields(struct hist
field_str = strsep(&fields_str, ",");
if (!field_str)
break;
+
if (strcmp(field_str, "hitcount") == 0)
continue;
+
ret = create_val_field(hist_data, j++, file, field_str);
if (ret)
goto out;
}
+
if (fields_str && (strcmp(fields_str, "hitcount") != 0))
ret = -EINVAL;
out:
@@ -615,11 +744,12 @@ static int create_key_field(struct hist_
char *field_str)
{
struct ftrace_event_field *field = NULL;
+ struct hist_field *hist_field = NULL;
unsigned long flags = 0;
unsigned int key_size;
int ret = 0;
- if (WARN_ON(key_idx >= TRACING_MAP_FIELDS_MAX))
+ if (WARN_ON(key_idx >= HIST_FIELDS_MAX))
return -EINVAL;
flags |= HIST_FIELD_FL_KEY;
@@ -627,6 +757,7 @@ static int create_key_field(struct hist_
if (strcmp(field_str, "stacktrace") == 0) {
flags |= HIST_FIELD_FL_STACKTRACE;
key_size = sizeof(unsigned long) * HIST_STACKTRACE_DEPTH;
+ hist_field = create_hist_field(hist_data, NULL, flags, NULL);
} else {
char *field_name = strsep(&field_str, ".");
@@ -672,7 +803,7 @@ static int create_key_field(struct hist_
}
}
- hist_data->fields[key_idx] = create_hist_field(hist_data, field, flags);
+ hist_data->fields[key_idx] = create_hist_field(hist_data, field, flags, NULL);
if (!hist_data->fields[key_idx]) {
ret = -ENOMEM;
goto out;
@@ -688,6 +819,7 @@ static int create_key_field(struct hist_
}
hist_data->n_keys++;
+ hist_data->n_fields++;
if (WARN_ON(hist_data->n_keys > TRACING_MAP_KEYS_MAX))
return -EINVAL;
@@ -731,21 +863,111 @@ static int create_key_fields(struct hist
return ret;
}
+static int create_var_fields(struct hist_trigger_data *hist_data,
+ struct trace_event_file *file)
+{
+ unsigned int i, j = hist_data->n_vals;
+ int ret = 0;
+
+ unsigned int n_vars = hist_data->attrs->var_defs.n_vars;
+
+ for (i = 0; i < n_vars; i++) {
+ char *var_name = hist_data->attrs->var_defs.name[i];
+ char *expr = hist_data->attrs->var_defs.expr[i];
+
+ ret = create_var_field(hist_data, j++, file, var_name, expr);
+ if (ret)
+ goto out;
+ }
+ out:
+ return ret;
+}
+
+static void free_var_defs(struct hist_trigger_data *hist_data)
+{
+ unsigned int i;
+
+ for (i = 0; i < hist_data->attrs->var_defs.n_vars; i++) {
+ kfree(hist_data->attrs->var_defs.name[i]);
+ kfree(hist_data->attrs->var_defs.expr[i]);
+ }
+
+ hist_data->attrs->var_defs.n_vars = 0;
+}
+
+static int parse_var_defs(struct hist_trigger_data *hist_data)
+{
+ char *s, *str, *var_name, *field_str;
+ unsigned int i, j, n_vars = 0;
+ int ret = 0;
+
+ for (i = 0; i < hist_data->attrs->n_assignments; i++) {
+ str = hist_data->attrs->assignment_str[i];
+ for (j = 0; j < TRACING_MAP_VARS_MAX; j++) {
+ field_str = strsep(&str, ",");
+ if (!field_str)
+ break;
+
+ var_name = strsep(&field_str, "=");
+ if (!var_name || !field_str) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ if (n_vars == TRACING_MAP_VARS_MAX) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ s = kstrdup(var_name, GFP_KERNEL);
+ if (!s) {
+ ret = -ENOMEM;
+ goto free;
+ }
+ hist_data->attrs->var_defs.name[n_vars] = s;
+
+ s = kstrdup(field_str, GFP_KERNEL);
+ if (!s) {
+ kfree(hist_data->attrs->var_defs.name[n_vars]);
+ ret = -ENOMEM;
+ goto free;
+ }
+ hist_data->attrs->var_defs.expr[n_vars++] = s;
+
+ hist_data->attrs->var_defs.n_vars = n_vars;
+ }
+ }
+
+ return ret;
+ free:
+ free_var_defs(hist_data);
+
+ return ret;
+}
+
static int create_hist_fields(struct hist_trigger_data *hist_data,
struct trace_event_file *file)
{
int ret;
+ ret = parse_var_defs(hist_data);
+ if (ret)
+ goto out;
+
ret = create_val_fields(hist_data, file);
if (ret)
goto out;
- ret = create_key_fields(hist_data, file);
+ ret = create_var_fields(hist_data, file);
if (ret)
goto out;
- hist_data->n_fields = hist_data->n_vals + hist_data->n_keys;
+ ret = create_key_fields(hist_data, file);
+ if (ret)
+ goto out;
out:
+ free_var_defs(hist_data);
+
return ret;
}
@@ -768,7 +990,7 @@ static int create_sort_keys(struct hist_
char *fields_str = hist_data->attrs->sort_key_str;
struct tracing_map_sort_key *sort_key;
int descending, ret = 0;
- unsigned int i, j;
+ unsigned int i, j, k;
hist_data->n_sort_keys = 1; /* we always have at least one, hitcount */
@@ -816,12 +1038,19 @@ static int create_sort_keys(struct hist_
continue;
}
- for (j = 1; j < hist_data->n_fields; j++) {
+ for (j = 1, k = 1; j < hist_data->n_fields; j++) {
+ unsigned int idx;
+
hist_field = hist_data->fields[j];
+ if (hist_field->flags & HIST_FIELD_FL_VAR)
+ continue;
+
+ idx = k++;
+
test_name = hist_field_name(hist_field, 0);
if (strcmp(field_name, test_name) == 0) {
- sort_key->field_idx = j;
+ sort_key->field_idx = idx;
descending = is_descending(field_str);
if (descending < 0) {
ret = descending;
@@ -836,6 +1065,7 @@ static int create_sort_keys(struct hist_
break;
}
}
+
hist_data->n_sort_keys = i;
out:
return ret;
@@ -876,12 +1106,19 @@ static int create_tracing_map_fields(str
idx = tracing_map_add_key_field(map,
hist_field->offset,
cmp_fn);
-
- } else
+ } else if (!(hist_field->flags & HIST_FIELD_FL_VAR))
idx = tracing_map_add_sum_field(map);
if (idx < 0)
return idx;
+
+ if (hist_field->flags & HIST_FIELD_FL_VAR) {
+ idx = tracing_map_add_var(map);
+ if (idx < 0)
+ return idx;
+ hist_field->var.idx = idx;
+ hist_field->var.hist_data = hist_data;
+ }
}
return 0;
@@ -905,7 +1142,8 @@ static bool need_tracing_map_ops(struct
static struct hist_trigger_data *
create_hist_data(unsigned int map_bits,
struct hist_trigger_attrs *attrs,
- struct trace_event_file *file)
+ struct trace_event_file *file,
+ bool remove)
{
const struct tracing_map_ops *map_ops = NULL;
struct hist_trigger_data *hist_data;
@@ -916,6 +1154,7 @@ create_hist_data(unsigned int map_bits,
return ERR_PTR(-ENOMEM);
hist_data->attrs = attrs;
+ hist_data->remove = remove;
ret = create_hist_fields(hist_data, file);
if (ret)
@@ -962,14 +1201,28 @@ static void hist_trigger_elt_update(stru
struct ring_buffer_event *rbe)
{
struct hist_field *hist_field;
- unsigned int i;
+ unsigned int i, var_idx;
u64 hist_val;
for_each_hist_val_field(i, hist_data) {
hist_field = hist_data->fields[i];
hist_val = hist_field->fn(hist_field, rec, rbe);
+ if (hist_field->flags & HIST_FIELD_FL_VAR) {
+ var_idx = hist_field->var.idx;
+ tracing_map_set_var(elt, var_idx, hist_val);
+ continue;
+ }
tracing_map_update_sum(elt, i, hist_val);
}
+
+ for_each_hist_key_field(i, hist_data) {
+ hist_field = hist_data->fields[i];
+ if (hist_field->flags & HIST_FIELD_FL_VAR) {
+ hist_val = hist_field->fn(hist_field, rec, rbe);
+ var_idx = hist_field->var.idx;
+ tracing_map_set_var(elt, var_idx, hist_val);
+ }
+ }
}
static inline void add_to_key(char *compound_key, void *key,
@@ -1144,6 +1397,9 @@ hist_trigger_entry_print(struct seq_file
for (i = 1; i < hist_data->n_vals; i++) {
field_name = hist_field_name(hist_data->fields[i], 0);
+ if (hist_data->fields[i]->flags & HIST_FIELD_FL_VAR)
+ continue;
+
if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) {
seq_printf(m, " %s: %10llx", field_name,
tracing_map_read_sum(elt, i));
@@ -1265,6 +1521,9 @@ static void hist_field_print(struct seq_
{
const char *field_name = hist_field_name(hist_field, 0);
+ if (hist_field->var.name)
+ seq_printf(m, "%s=", hist_field->var.name);
+
if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP)
seq_puts(m, "common_timestamp");
else if (field_name)
@@ -1283,7 +1542,8 @@ static int event_hist_trigger_print(stru
struct event_trigger_data *data)
{
struct hist_trigger_data *hist_data = data->private_data;
- struct hist_field *key_field;
+ struct hist_field *field;
+ bool have_var = false;
unsigned int i;
seq_puts(m, "hist:");
@@ -1294,25 +1554,47 @@ static int event_hist_trigger_print(stru
seq_puts(m, "keys=");
for_each_hist_key_field(i, hist_data) {
- key_field = hist_data->fields[i];
+ field = hist_data->fields[i];
if (i > hist_data->n_vals)
seq_puts(m, ",");
- if (key_field->flags & HIST_FIELD_FL_STACKTRACE)
+ if (field->flags & HIST_FIELD_FL_STACKTRACE)
seq_puts(m, "stacktrace");
else
- hist_field_print(m, key_field);
+ hist_field_print(m, field);
}
seq_puts(m, ":vals=");
for_each_hist_val_field(i, hist_data) {
+ field = hist_data->fields[i];
+ if (field->flags & HIST_FIELD_FL_VAR) {
+ have_var = true;
+ continue;
+ }
+
if (i == HITCOUNT_IDX)
seq_puts(m, "hitcount");
else {
seq_puts(m, ",");
- hist_field_print(m, hist_data->fields[i]);
+ hist_field_print(m, field);
+ }
+ }
+
+ if (have_var) {
+ unsigned int n = 0;
+
+ seq_puts(m, ":");
+
+ for_each_hist_val_field(i, hist_data) {
+ field = hist_data->fields[i];
+
+ if (field->flags & HIST_FIELD_FL_VAR) {
+ if (n++)
+ seq_puts(m, ",");
+ hist_field_print(m, field);
+ }
}
}
@@ -1320,7 +1602,10 @@ static int event_hist_trigger_print(stru
for (i = 0; i < hist_data->n_sort_keys; i++) {
struct tracing_map_sort_key *sort_key;
- unsigned int idx;
+ unsigned int idx, first_key_idx;
+
+ /* skip VAR vals */
+ first_key_idx = hist_data->n_vals - hist_data->n_vars;
sort_key = &hist_data->sort_keys[i];
idx = sort_key->field_idx;
@@ -1333,8 +1618,11 @@ static int event_hist_trigger_print(stru
if (idx == HITCOUNT_IDX)
seq_puts(m, "hitcount");
- else
+ else {
+ if (idx >= first_key_idx)
+ idx += hist_data->n_vars;
hist_field_print(m, hist_data->fields[idx]);
+ }
if (sort_key->descending)
seq_puts(m, ".descending");
@@ -1631,7 +1919,7 @@ static void hist_unregister_trigger(char
test->ops->free(test->ops, test);
if (hist_data->enable_timestamps) {
- if (unregistered)
+ if (!hist_data->remove || unregistered)
tracing_set_time_stamp_abs(file->tr, false);
}
}
@@ -1664,12 +1952,16 @@ static int event_hist_trigger_func(struc
struct hist_trigger_attrs *attrs;
struct event_trigger_ops *trigger_ops;
struct hist_trigger_data *hist_data;
+ bool remove = false;
char *trigger;
int ret = 0;
if (!param)
return -EINVAL;
+ if (glob[0] == '!')
+ remove = true;
+
/* separate the trigger from the filter (k:v [if filter]) */
trigger = strsep(&param, " \t");
if (!trigger)
@@ -1682,7 +1974,7 @@ static int event_hist_trigger_func(struc
if (attrs->map_bits)
hist_trigger_bits = attrs->map_bits;
- hist_data = create_hist_data(hist_trigger_bits, attrs, file);
+ hist_data = create_hist_data(hist_trigger_bits, attrs, file, remove);
if (IS_ERR(hist_data)) {
destroy_hist_trigger_attrs(attrs);
return PTR_ERR(hist_data);
@@ -1711,7 +2003,7 @@ static int event_hist_trigger_func(struc
goto out_free;
}
- if (glob[0] == '!') {
+ if (remove) {
cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
ret = 0;
goto out_free;

View File

@ -0,0 +1,46 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:50 -0600
Subject: [PATCH 24/48] tracing: Account for variables in named trigger
compatibility
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Named triggers must also have the same set of variables in order to be
considered compatible - update the trigger match test to account for
that.
The reason for this requirement is that named triggers with variables
are meant to allow one or more events to set the same variable.
Link: http://lkml.kernel.org/r/a17eae6328a99917f9d5c66129c9fcd355279ee9.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit f94add7df3d72bc8e659f9491e25d91c9dae1b44)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -1610,7 +1610,7 @@ static int event_hist_trigger_print(stru
sort_key = &hist_data->sort_keys[i];
idx = sort_key->field_idx;
- if (WARN_ON(idx >= TRACING_MAP_FIELDS_MAX))
+ if (WARN_ON(idx >= HIST_FIELDS_MAX))
return -EINVAL;
if (i > 0)
@@ -1798,6 +1798,11 @@ static bool hist_trigger_match(struct ev
return false;
if (key_field->is_signed != key_field_test->is_signed)
return false;
+ if (!!key_field->var.name != !!key_field_test->var.name)
+ return false;
+ if (key_field->var.name &&
+ strcmp(key_field->var.name, key_field_test->var.name) != 0)
+ return false;
}
for (i = 0; i < hist_data->n_sort_keys; i++) {

View File

@ -0,0 +1,78 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:51 -0600
Subject: [PATCH 25/48] tracing: Move get_hist_field_flags()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Move get_hist_field_flags() to make it more easily accessible for new
code (and keep the move separate from new functionality).
Link: http://lkml.kernel.org/r/32470f0a7047ec7a6e84ba5ec89d6142cc6ede7d.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit fde3bce553d359c01beb9a6fce4013b65076aff3)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 44 +++++++++++++++++++--------------------
1 file changed, 22 insertions(+), 22 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -497,6 +497,28 @@ static const struct tracing_map_ops hist
.elt_init = hist_trigger_elt_comm_init,
};
+static const char *get_hist_field_flags(struct hist_field *hist_field)
+{
+ const char *flags_str = NULL;
+
+ if (hist_field->flags & HIST_FIELD_FL_HEX)
+ flags_str = "hex";
+ else if (hist_field->flags & HIST_FIELD_FL_SYM)
+ flags_str = "sym";
+ else if (hist_field->flags & HIST_FIELD_FL_SYM_OFFSET)
+ flags_str = "sym-offset";
+ else if (hist_field->flags & HIST_FIELD_FL_EXECNAME)
+ flags_str = "execname";
+ else if (hist_field->flags & HIST_FIELD_FL_SYSCALL)
+ flags_str = "syscall";
+ else if (hist_field->flags & HIST_FIELD_FL_LOG2)
+ flags_str = "log2";
+ else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS)
+ flags_str = "usecs";
+
+ return flags_str;
+}
+
static void destroy_hist_field(struct hist_field *hist_field,
unsigned int level)
{
@@ -1495,28 +1517,6 @@ const struct file_operations event_hist_
.release = single_release,
};
-static const char *get_hist_field_flags(struct hist_field *hist_field)
-{
- const char *flags_str = NULL;
-
- if (hist_field->flags & HIST_FIELD_FL_HEX)
- flags_str = "hex";
- else if (hist_field->flags & HIST_FIELD_FL_SYM)
- flags_str = "sym";
- else if (hist_field->flags & HIST_FIELD_FL_SYM_OFFSET)
- flags_str = "sym-offset";
- else if (hist_field->flags & HIST_FIELD_FL_EXECNAME)
- flags_str = "execname";
- else if (hist_field->flags & HIST_FIELD_FL_SYSCALL)
- flags_str = "syscall";
- else if (hist_field->flags & HIST_FIELD_FL_LOG2)
- flags_str = "log2";
- else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS)
- flags_str = "usecs";
-
- return flags_str;
-}
-
static void hist_field_print(struct seq_file *m, struct hist_field *hist_field)
{
const char *field_name = hist_field_name(hist_field, 0);

View File

@ -0,0 +1,628 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:52 -0600
Subject: [PATCH 26/48] tracing: Add simple expression support to hist triggers
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Add support for simple addition, subtraction, and unary expressions
(-(expr) and expr, where expr = b-a, a+b, a+b+c) to hist triggers, in
order to support a minimal set of useful inter-event calculations.
These operations are needed for calculating latencies between events
(timestamp1-timestamp0) and for combined latencies (latencies over 3
or more events).
In the process, factor out some common code from key and value
parsing.
Link: http://lkml.kernel.org/r/9a9308ead4fe32a433d9c7e95921fb798394f6b2.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
[kbuild test robot fix, add static to parse_atom()]
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 26c5cb5e4790fec96e3eba02c347e78fa72273a8)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 487 +++++++++++++++++++++++++++++++++------
1 file changed, 413 insertions(+), 74 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -32,6 +32,13 @@ typedef u64 (*hist_field_fn_t) (struct h
#define HIST_FIELD_OPERANDS_MAX 2
#define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX)
+enum field_op_id {
+ FIELD_OP_NONE,
+ FIELD_OP_PLUS,
+ FIELD_OP_MINUS,
+ FIELD_OP_UNARY_MINUS,
+};
+
struct hist_var {
char *name;
struct hist_trigger_data *hist_data;
@@ -48,6 +55,8 @@ struct hist_field {
struct hist_field *operands[HIST_FIELD_OPERANDS_MAX];
struct hist_trigger_data *hist_data;
struct hist_var var;
+ enum field_op_id operator;
+ char *name;
};
static u64 hist_field_none(struct hist_field *field, void *event,
@@ -98,6 +107,41 @@ static u64 hist_field_log2(struct hist_f
return (u64) ilog2(roundup_pow_of_two(val));
}
+static u64 hist_field_plus(struct hist_field *hist_field, void *event,
+ struct ring_buffer_event *rbe)
+{
+ struct hist_field *operand1 = hist_field->operands[0];
+ struct hist_field *operand2 = hist_field->operands[1];
+
+ u64 val1 = operand1->fn(operand1, event, rbe);
+ u64 val2 = operand2->fn(operand2, event, rbe);
+
+ return val1 + val2;
+}
+
+static u64 hist_field_minus(struct hist_field *hist_field, void *event,
+ struct ring_buffer_event *rbe)
+{
+ struct hist_field *operand1 = hist_field->operands[0];
+ struct hist_field *operand2 = hist_field->operands[1];
+
+ u64 val1 = operand1->fn(operand1, event, rbe);
+ u64 val2 = operand2->fn(operand2, event, rbe);
+
+ return val1 - val2;
+}
+
+static u64 hist_field_unary_minus(struct hist_field *hist_field, void *event,
+ struct ring_buffer_event *rbe)
+{
+ struct hist_field *operand = hist_field->operands[0];
+
+ s64 sval = (s64)operand->fn(operand, event, rbe);
+ u64 val = (u64)-sval;
+
+ return val;
+}
+
#define DEFINE_HIST_FIELD_FN(type) \
static u64 hist_field_##type(struct hist_field *hist_field, \
void *event, \
@@ -147,6 +191,7 @@ enum hist_field_flags {
HIST_FIELD_FL_TIMESTAMP = 1 << 10,
HIST_FIELD_FL_TIMESTAMP_USECS = 1 << 11,
HIST_FIELD_FL_VAR = 1 << 12,
+ HIST_FIELD_FL_EXPR = 1 << 13,
};
struct var_defs {
@@ -258,6 +303,8 @@ static const char *hist_field_name(struc
field_name = hist_field_name(field->operands[0], ++level);
else if (field->flags & HIST_FIELD_FL_TIMESTAMP)
field_name = "common_timestamp";
+ else if (field->flags & HIST_FIELD_FL_EXPR)
+ field_name = field->name;
if (field_name == NULL)
field_name = "";
@@ -519,12 +566,104 @@ static const char *get_hist_field_flags(
return flags_str;
}
+static void expr_field_str(struct hist_field *field, char *expr)
+{
+ strcat(expr, hist_field_name(field, 0));
+
+ if (field->flags) {
+ const char *flags_str = get_hist_field_flags(field);
+
+ if (flags_str) {
+ strcat(expr, ".");
+ strcat(expr, flags_str);
+ }
+ }
+}
+
+static char *expr_str(struct hist_field *field, unsigned int level)
+{
+ char *expr;
+
+ if (level > 1)
+ return NULL;
+
+ expr = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL);
+ if (!expr)
+ return NULL;
+
+ if (!field->operands[0]) {
+ expr_field_str(field, expr);
+ return expr;
+ }
+
+ if (field->operator == FIELD_OP_UNARY_MINUS) {
+ char *subexpr;
+
+ strcat(expr, "-(");
+ subexpr = expr_str(field->operands[0], ++level);
+ if (!subexpr) {
+ kfree(expr);
+ return NULL;
+ }
+ strcat(expr, subexpr);
+ strcat(expr, ")");
+
+ kfree(subexpr);
+
+ return expr;
+ }
+
+ expr_field_str(field->operands[0], expr);
+
+ switch (field->operator) {
+ case FIELD_OP_MINUS:
+ strcat(expr, "-");
+ break;
+ case FIELD_OP_PLUS:
+ strcat(expr, "+");
+ break;
+ default:
+ kfree(expr);
+ return NULL;
+ }
+
+ expr_field_str(field->operands[1], expr);
+
+ return expr;
+}
+
+static int contains_operator(char *str)
+{
+ enum field_op_id field_op = FIELD_OP_NONE;
+ char *op;
+
+ op = strpbrk(str, "+-");
+ if (!op)
+ return FIELD_OP_NONE;
+
+ switch (*op) {
+ case '-':
+ if (*str == '-')
+ field_op = FIELD_OP_UNARY_MINUS;
+ else
+ field_op = FIELD_OP_MINUS;
+ break;
+ case '+':
+ field_op = FIELD_OP_PLUS;
+ break;
+ default:
+ break;
+ }
+
+ return field_op;
+}
+
static void destroy_hist_field(struct hist_field *hist_field,
unsigned int level)
{
unsigned int i;
- if (level > 2)
+ if (level > 3)
return;
if (!hist_field)
@@ -534,6 +673,7 @@ static void destroy_hist_field(struct hi
destroy_hist_field(hist_field->operands[i], level + 1);
kfree(hist_field->var.name);
+ kfree(hist_field->name);
kfree(hist_field);
}
@@ -554,6 +694,9 @@ static struct hist_field *create_hist_fi
hist_field->hist_data = hist_data;
+ if (flags & HIST_FIELD_FL_EXPR)
+ goto out; /* caller will populate */
+
if (flags & HIST_FIELD_FL_HITCOUNT) {
hist_field->fn = hist_field_counter;
goto out;
@@ -626,6 +769,257 @@ static void destroy_hist_fields(struct h
}
}
+static struct ftrace_event_field *
+parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
+ char *field_str, unsigned long *flags)
+{
+ struct ftrace_event_field *field = NULL;
+ char *field_name, *modifier, *str;
+
+ modifier = str = kstrdup(field_str, GFP_KERNEL);
+ if (!modifier)
+ return ERR_PTR(-ENOMEM);
+
+ field_name = strsep(&modifier, ".");
+ if (modifier) {
+ if (strcmp(modifier, "hex") == 0)
+ *flags |= HIST_FIELD_FL_HEX;
+ else if (strcmp(modifier, "sym") == 0)
+ *flags |= HIST_FIELD_FL_SYM;
+ else if (strcmp(modifier, "sym-offset") == 0)
+ *flags |= HIST_FIELD_FL_SYM_OFFSET;
+ else if ((strcmp(modifier, "execname") == 0) &&
+ (strcmp(field_name, "common_pid") == 0))
+ *flags |= HIST_FIELD_FL_EXECNAME;
+ else if (strcmp(modifier, "syscall") == 0)
+ *flags |= HIST_FIELD_FL_SYSCALL;
+ else if (strcmp(modifier, "log2") == 0)
+ *flags |= HIST_FIELD_FL_LOG2;
+ else if (strcmp(modifier, "usecs") == 0)
+ *flags |= HIST_FIELD_FL_TIMESTAMP_USECS;
+ else {
+ field = ERR_PTR(-EINVAL);
+ goto out;
+ }
+ }
+
+ if (strcmp(field_name, "common_timestamp") == 0) {
+ *flags |= HIST_FIELD_FL_TIMESTAMP;
+ hist_data->enable_timestamps = true;
+ if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS)
+ hist_data->attrs->ts_in_usecs = true;
+ } else {
+ field = trace_find_event_field(file->event_call, field_name);
+ if (!field || !field->size) {
+ field = ERR_PTR(-EINVAL);
+ goto out;
+ }
+ }
+ out:
+ kfree(str);
+
+ return field;
+}
+
+static struct hist_field *parse_atom(struct hist_trigger_data *hist_data,
+ struct trace_event_file *file, char *str,
+ unsigned long *flags, char *var_name)
+{
+ struct ftrace_event_field *field = NULL;
+ struct hist_field *hist_field = NULL;
+ int ret = 0;
+
+ field = parse_field(hist_data, file, str, flags);
+ if (IS_ERR(field)) {
+ ret = PTR_ERR(field);
+ goto out;
+ }
+
+ hist_field = create_hist_field(hist_data, field, *flags, var_name);
+ if (!hist_field) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ return hist_field;
+ out:
+ return ERR_PTR(ret);
+}
+
+static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
+ struct trace_event_file *file,
+ char *str, unsigned long flags,
+ char *var_name, unsigned int level);
+
+static struct hist_field *parse_unary(struct hist_trigger_data *hist_data,
+ struct trace_event_file *file,
+ char *str, unsigned long flags,
+ char *var_name, unsigned int level)
+{
+ struct hist_field *operand1, *expr = NULL;
+ unsigned long operand_flags;
+ int ret = 0;
+ char *s;
+
+ // we support only -(xxx) i.e. explicit parens required
+
+ if (level > 3) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ str++; // skip leading '-'
+
+ s = strchr(str, '(');
+ if (s)
+ str++;
+ else {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ s = strrchr(str, ')');
+ if (s)
+ *s = '\0';
+ else {
+ ret = -EINVAL; // no closing ')'
+ goto free;
+ }
+
+ flags |= HIST_FIELD_FL_EXPR;
+ expr = create_hist_field(hist_data, NULL, flags, var_name);
+ if (!expr) {
+ ret = -ENOMEM;
+ goto free;
+ }
+
+ operand_flags = 0;
+ operand1 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level);
+ if (IS_ERR(operand1)) {
+ ret = PTR_ERR(operand1);
+ goto free;
+ }
+
+ expr->flags |= operand1->flags &
+ (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
+ expr->fn = hist_field_unary_minus;
+ expr->operands[0] = operand1;
+ expr->operator = FIELD_OP_UNARY_MINUS;
+ expr->name = expr_str(expr, 0);
+
+ return expr;
+ free:
+ destroy_hist_field(expr, 0);
+ return ERR_PTR(ret);
+}
+
+static int check_expr_operands(struct hist_field *operand1,
+ struct hist_field *operand2)
+{
+ unsigned long operand1_flags = operand1->flags;
+ unsigned long operand2_flags = operand2->flags;
+
+ if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) !=
+ (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS))
+ return -EINVAL;
+
+ return 0;
+}
+
+static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
+ struct trace_event_file *file,
+ char *str, unsigned long flags,
+ char *var_name, unsigned int level)
+{
+ struct hist_field *operand1 = NULL, *operand2 = NULL, *expr = NULL;
+ unsigned long operand_flags;
+ int field_op, ret = -EINVAL;
+ char *sep, *operand1_str;
+
+ if (level > 3)
+ return ERR_PTR(-EINVAL);
+
+ field_op = contains_operator(str);
+
+ if (field_op == FIELD_OP_NONE)
+ return parse_atom(hist_data, file, str, &flags, var_name);
+
+ if (field_op == FIELD_OP_UNARY_MINUS)
+ return parse_unary(hist_data, file, str, flags, var_name, ++level);
+
+ switch (field_op) {
+ case FIELD_OP_MINUS:
+ sep = "-";
+ break;
+ case FIELD_OP_PLUS:
+ sep = "+";
+ break;
+ default:
+ goto free;
+ }
+
+ operand1_str = strsep(&str, sep);
+ if (!operand1_str || !str)
+ goto free;
+
+ operand_flags = 0;
+ operand1 = parse_atom(hist_data, file, operand1_str,
+ &operand_flags, NULL);
+ if (IS_ERR(operand1)) {
+ ret = PTR_ERR(operand1);
+ operand1 = NULL;
+ goto free;
+ }
+
+ // rest of string could be another expression e.g. b+c in a+b+c
+ operand_flags = 0;
+ operand2 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level);
+ if (IS_ERR(operand2)) {
+ ret = PTR_ERR(operand2);
+ operand2 = NULL;
+ goto free;
+ }
+
+ ret = check_expr_operands(operand1, operand2);
+ if (ret)
+ goto free;
+
+ flags |= HIST_FIELD_FL_EXPR;
+
+ flags |= operand1->flags &
+ (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
+
+ expr = create_hist_field(hist_data, NULL, flags, var_name);
+ if (!expr) {
+ ret = -ENOMEM;
+ goto free;
+ }
+
+ expr->operands[0] = operand1;
+ expr->operands[1] = operand2;
+ expr->operator = field_op;
+ expr->name = expr_str(expr, 0);
+
+ switch (field_op) {
+ case FIELD_OP_MINUS:
+ expr->fn = hist_field_minus;
+ break;
+ case FIELD_OP_PLUS:
+ expr->fn = hist_field_plus;
+ break;
+ default:
+ goto free;
+ }
+
+ return expr;
+ free:
+ destroy_hist_field(operand1, 0);
+ destroy_hist_field(operand2, 0);
+ destroy_hist_field(expr, 0);
+
+ return ERR_PTR(ret);
+}
+
static int create_hitcount_val(struct hist_trigger_data *hist_data)
{
hist_data->fields[HITCOUNT_IDX] =
@@ -648,37 +1042,17 @@ static int __create_val_field(struct his
char *var_name, char *field_str,
unsigned long flags)
{
- struct ftrace_event_field *field = NULL;
- char *field_name;
+ struct hist_field *hist_field;
int ret = 0;
- field_name = strsep(&field_str, ".");
- if (field_str) {
- if (strcmp(field_str, "hex") == 0)
- flags |= HIST_FIELD_FL_HEX;
- else {
- ret = -EINVAL;
- goto out;
- }
- }
-
- if (strcmp(field_name, "common_timestamp") == 0) {
- flags |= HIST_FIELD_FL_TIMESTAMP;
- hist_data->enable_timestamps = true;
- } else {
- field = trace_find_event_field(file->event_call, field_name);
- if (!field || !field->size) {
- ret = -EINVAL;
- goto out;
- }
- }
-
- hist_data->fields[val_idx] = create_hist_field(hist_data, field, flags, var_name);
- if (!hist_data->fields[val_idx]) {
- ret = -ENOMEM;
+ hist_field = parse_expr(hist_data, file, field_str, flags, var_name, 0);
+ if (IS_ERR(hist_field)) {
+ ret = PTR_ERR(hist_field);
goto out;
}
+ hist_data->fields[val_idx] = hist_field;
+
++hist_data->n_vals;
++hist_data->n_fields;
@@ -765,8 +1139,8 @@ static int create_key_field(struct hist_
struct trace_event_file *file,
char *field_str)
{
- struct ftrace_event_field *field = NULL;
struct hist_field *hist_field = NULL;
+
unsigned long flags = 0;
unsigned int key_size;
int ret = 0;
@@ -781,60 +1155,24 @@ static int create_key_field(struct hist_
key_size = sizeof(unsigned long) * HIST_STACKTRACE_DEPTH;
hist_field = create_hist_field(hist_data, NULL, flags, NULL);
} else {
- char *field_name = strsep(&field_str, ".");
-
- if (field_str) {
- if (strcmp(field_str, "hex") == 0)
- flags |= HIST_FIELD_FL_HEX;
- else if (strcmp(field_str, "sym") == 0)
- flags |= HIST_FIELD_FL_SYM;
- else if (strcmp(field_str, "sym-offset") == 0)
- flags |= HIST_FIELD_FL_SYM_OFFSET;
- else if ((strcmp(field_str, "execname") == 0) &&
- (strcmp(field_name, "common_pid") == 0))
- flags |= HIST_FIELD_FL_EXECNAME;
- else if (strcmp(field_str, "syscall") == 0)
- flags |= HIST_FIELD_FL_SYSCALL;
- else if (strcmp(field_str, "log2") == 0)
- flags |= HIST_FIELD_FL_LOG2;
- else if (strcmp(field_str, "usecs") == 0)
- flags |= HIST_FIELD_FL_TIMESTAMP_USECS;
- else {
- ret = -EINVAL;
- goto out;
- }
+ hist_field = parse_expr(hist_data, file, field_str, flags,
+ NULL, 0);
+ if (IS_ERR(hist_field)) {
+ ret = PTR_ERR(hist_field);
+ goto out;
}
- if (strcmp(field_name, "common_timestamp") == 0) {
- flags |= HIST_FIELD_FL_TIMESTAMP;
- hist_data->enable_timestamps = true;
- if (flags & HIST_FIELD_FL_TIMESTAMP_USECS)
- hist_data->attrs->ts_in_usecs = true;
- key_size = sizeof(u64);
- } else {
- field = trace_find_event_field(file->event_call, field_name);
- if (!field || !field->size) {
- ret = -EINVAL;
- goto out;
- }
-
- if (is_string_field(field))
- key_size = MAX_FILTER_STR_VAL;
- else
- key_size = field->size;
- }
+ key_size = hist_field->size;
}
- hist_data->fields[key_idx] = create_hist_field(hist_data, field, flags, NULL);
- if (!hist_data->fields[key_idx]) {
- ret = -ENOMEM;
- goto out;
- }
+ hist_data->fields[key_idx] = hist_field;
key_size = ALIGN(key_size, sizeof(u64));
hist_data->fields[key_idx]->size = key_size;
hist_data->fields[key_idx]->offset = key_offset;
+
hist_data->key_size += key_size;
+
if (hist_data->key_size > HIST_KEY_SIZE_MAX) {
ret = -EINVAL;
goto out;
@@ -1419,7 +1757,8 @@ hist_trigger_entry_print(struct seq_file
for (i = 1; i < hist_data->n_vals; i++) {
field_name = hist_field_name(hist_data->fields[i], 0);
- if (hist_data->fields[i]->flags & HIST_FIELD_FL_VAR)
+ if (hist_data->fields[i]->flags & HIST_FIELD_FL_VAR ||
+ hist_data->fields[i]->flags & HIST_FIELD_FL_EXPR)
continue;
if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) {

View File

@ -0,0 +1,159 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:53 -0600
Subject: [PATCH 27/48] tracing: Generalize per-element hist trigger data
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Up until now, hist triggers only needed per-element support for saving
'comm' data, which was saved directly as a private data pointer.
In anticipation of the need to save other data besides 'comm', add a
new hist_elt_data struct for the purpose, and switch the current
'comm'-related code over to that.
Link: http://lkml.kernel.org/r/4502c338c965ddf5fc19fb1ec4764391e001ed4b.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 8102d0cb859d223564b17afb01e33701f57191d1)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 76 ++++++++++++++++++++++-----------------
1 file changed, 43 insertions(+), 33 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -289,6 +289,10 @@ static struct hist_field *find_var(struc
return NULL;
}
+struct hist_elt_data {
+ char *comm;
+};
+
static const char *hist_field_name(struct hist_field *field,
unsigned int level)
{
@@ -503,45 +507,61 @@ static inline void save_comm(char *comm,
memcpy(comm, task->comm, TASK_COMM_LEN);
}
-static void hist_trigger_elt_comm_free(struct tracing_map_elt *elt)
+static void hist_elt_data_free(struct hist_elt_data *elt_data)
+{
+ kfree(elt_data->comm);
+ kfree(elt_data);
+}
+
+static void hist_trigger_elt_data_free(struct tracing_map_elt *elt)
{
- kfree((char *)elt->private_data);
+ struct hist_elt_data *elt_data = elt->private_data;
+
+ hist_elt_data_free(elt_data);
}
-static int hist_trigger_elt_comm_alloc(struct tracing_map_elt *elt)
+static int hist_trigger_elt_data_alloc(struct tracing_map_elt *elt)
{
struct hist_trigger_data *hist_data = elt->map->private_data;
+ unsigned int size = TASK_COMM_LEN;
+ struct hist_elt_data *elt_data;
struct hist_field *key_field;
unsigned int i;
+ elt_data = kzalloc(sizeof(*elt_data), GFP_KERNEL);
+ if (!elt_data)
+ return -ENOMEM;
+
for_each_hist_key_field(i, hist_data) {
key_field = hist_data->fields[i];
if (key_field->flags & HIST_FIELD_FL_EXECNAME) {
- unsigned int size = TASK_COMM_LEN + 1;
-
- elt->private_data = kzalloc(size, GFP_KERNEL);
- if (!elt->private_data)
+ elt_data->comm = kzalloc(size, GFP_KERNEL);
+ if (!elt_data->comm) {
+ kfree(elt_data);
return -ENOMEM;
+ }
break;
}
}
+ elt->private_data = elt_data;
+
return 0;
}
-static void hist_trigger_elt_comm_init(struct tracing_map_elt *elt)
+static void hist_trigger_elt_data_init(struct tracing_map_elt *elt)
{
- char *comm = elt->private_data;
+ struct hist_elt_data *elt_data = elt->private_data;
- if (comm)
- save_comm(comm, current);
+ if (elt_data->comm)
+ save_comm(elt_data->comm, current);
}
-static const struct tracing_map_ops hist_trigger_elt_comm_ops = {
- .elt_alloc = hist_trigger_elt_comm_alloc,
- .elt_free = hist_trigger_elt_comm_free,
- .elt_init = hist_trigger_elt_comm_init,
+static const struct tracing_map_ops hist_trigger_elt_data_ops = {
+ .elt_alloc = hist_trigger_elt_data_alloc,
+ .elt_free = hist_trigger_elt_data_free,
+ .elt_init = hist_trigger_elt_data_init,
};
static const char *get_hist_field_flags(struct hist_field *hist_field)
@@ -1484,21 +1504,6 @@ static int create_tracing_map_fields(str
return 0;
}
-static bool need_tracing_map_ops(struct hist_trigger_data *hist_data)
-{
- struct hist_field *key_field;
- unsigned int i;
-
- for_each_hist_key_field(i, hist_data) {
- key_field = hist_data->fields[i];
-
- if (key_field->flags & HIST_FIELD_FL_EXECNAME)
- return true;
- }
-
- return false;
-}
-
static struct hist_trigger_data *
create_hist_data(unsigned int map_bits,
struct hist_trigger_attrs *attrs,
@@ -1524,8 +1529,7 @@ create_hist_data(unsigned int map_bits,
if (ret)
goto free;
- if (need_tracing_map_ops(hist_data))
- map_ops = &hist_trigger_elt_comm_ops;
+ map_ops = &hist_trigger_elt_data_ops;
hist_data->map = tracing_map_create(map_bits, hist_data->key_size,
map_ops, hist_data);
@@ -1713,7 +1717,13 @@ hist_trigger_entry_print(struct seq_file
seq_printf(m, "%s: [%llx] %-55s", field_name,
uval, str);
} else if (key_field->flags & HIST_FIELD_FL_EXECNAME) {
- char *comm = elt->private_data;
+ struct hist_elt_data *elt_data = elt->private_data;
+ char *comm;
+
+ if (WARN_ON_ONCE(!elt_data))
+ return;
+
+ comm = elt_data->comm;
uval = *(u64 *)(key + key_field->offset);
seq_printf(m, "%s: %-16s[%10llu]", field_name,

View File

@ -0,0 +1,226 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:54 -0600
Subject: [PATCH 28/48] tracing: Pass tracing_map_elt to hist_field accessor
functions
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Some accessor functions, such as for variable references, require
access to a corrsponding tracing_map_elt.
Add a tracing_map_elt param to the function signature and update the
accessor functions accordingly.
Link: http://lkml.kernel.org/r/e0f292b068e9e4948da1d5af21b5ae0efa9b5717.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 8405bbbbc9dc0d88ffc92848cb8f0bda2c7a1b30)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 91 ++++++++++++++++++++++++---------------
1 file changed, 57 insertions(+), 34 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -26,8 +26,10 @@
struct hist_field;
-typedef u64 (*hist_field_fn_t) (struct hist_field *field, void *event,
- struct ring_buffer_event *rbe);
+typedef u64 (*hist_field_fn_t) (struct hist_field *field,
+ struct tracing_map_elt *elt,
+ struct ring_buffer_event *rbe,
+ void *event);
#define HIST_FIELD_OPERANDS_MAX 2
#define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX)
@@ -59,28 +61,36 @@ struct hist_field {
char *name;
};
-static u64 hist_field_none(struct hist_field *field, void *event,
- struct ring_buffer_event *rbe)
+static u64 hist_field_none(struct hist_field *field,
+ struct tracing_map_elt *elt,
+ struct ring_buffer_event *rbe,
+ void *event)
{
return 0;
}
-static u64 hist_field_counter(struct hist_field *field, void *event,
- struct ring_buffer_event *rbe)
+static u64 hist_field_counter(struct hist_field *field,
+ struct tracing_map_elt *elt,
+ struct ring_buffer_event *rbe,
+ void *event)
{
return 1;
}
-static u64 hist_field_string(struct hist_field *hist_field, void *event,
- struct ring_buffer_event *rbe)
+static u64 hist_field_string(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct ring_buffer_event *rbe,
+ void *event)
{
char *addr = (char *)(event + hist_field->field->offset);
return (u64)(unsigned long)addr;
}
-static u64 hist_field_dynstring(struct hist_field *hist_field, void *event,
- struct ring_buffer_event *rbe)
+static u64 hist_field_dynstring(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct ring_buffer_event *rbe,
+ void *event)
{
u32 str_item = *(u32 *)(event + hist_field->field->offset);
int str_loc = str_item & 0xffff;
@@ -89,54 +99,64 @@ static u64 hist_field_dynstring(struct h
return (u64)(unsigned long)addr;
}
-static u64 hist_field_pstring(struct hist_field *hist_field, void *event,
- struct ring_buffer_event *rbe)
+static u64 hist_field_pstring(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct ring_buffer_event *rbe,
+ void *event)
{
char **addr = (char **)(event + hist_field->field->offset);
return (u64)(unsigned long)*addr;
}
-static u64 hist_field_log2(struct hist_field *hist_field, void *event,
- struct ring_buffer_event *rbe)
+static u64 hist_field_log2(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct ring_buffer_event *rbe,
+ void *event)
{
struct hist_field *operand = hist_field->operands[0];
- u64 val = operand->fn(operand, event, rbe);
+ u64 val = operand->fn(operand, elt, rbe, event);
return (u64) ilog2(roundup_pow_of_two(val));
}
-static u64 hist_field_plus(struct hist_field *hist_field, void *event,
- struct ring_buffer_event *rbe)
+static u64 hist_field_plus(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct ring_buffer_event *rbe,
+ void *event)
{
struct hist_field *operand1 = hist_field->operands[0];
struct hist_field *operand2 = hist_field->operands[1];
- u64 val1 = operand1->fn(operand1, event, rbe);
- u64 val2 = operand2->fn(operand2, event, rbe);
+ u64 val1 = operand1->fn(operand1, elt, rbe, event);
+ u64 val2 = operand2->fn(operand2, elt, rbe, event);
return val1 + val2;
}
-static u64 hist_field_minus(struct hist_field *hist_field, void *event,
- struct ring_buffer_event *rbe)
+static u64 hist_field_minus(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct ring_buffer_event *rbe,
+ void *event)
{
struct hist_field *operand1 = hist_field->operands[0];
struct hist_field *operand2 = hist_field->operands[1];
- u64 val1 = operand1->fn(operand1, event, rbe);
- u64 val2 = operand2->fn(operand2, event, rbe);
+ u64 val1 = operand1->fn(operand1, elt, rbe, event);
+ u64 val2 = operand2->fn(operand2, elt, rbe, event);
return val1 - val2;
}
-static u64 hist_field_unary_minus(struct hist_field *hist_field, void *event,
- struct ring_buffer_event *rbe)
+static u64 hist_field_unary_minus(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct ring_buffer_event *rbe,
+ void *event)
{
struct hist_field *operand = hist_field->operands[0];
- s64 sval = (s64)operand->fn(operand, event, rbe);
+ s64 sval = (s64)operand->fn(operand, elt, rbe, event);
u64 val = (u64)-sval;
return val;
@@ -144,8 +164,9 @@ static u64 hist_field_unary_minus(struct
#define DEFINE_HIST_FIELD_FN(type) \
static u64 hist_field_##type(struct hist_field *hist_field, \
- void *event, \
- struct ring_buffer_event *rbe) \
+ struct tracing_map_elt *elt, \
+ struct ring_buffer_event *rbe, \
+ void *event) \
{ \
type *addr = (type *)(event + hist_field->field->offset); \
\
@@ -233,8 +254,10 @@ struct hist_trigger_data {
bool remove;
};
-static u64 hist_field_timestamp(struct hist_field *hist_field, void *event,
- struct ring_buffer_event *rbe)
+static u64 hist_field_timestamp(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct ring_buffer_event *rbe,
+ void *event)
{
struct hist_trigger_data *hist_data = hist_field->hist_data;
struct trace_array *tr = hist_data->event_file->tr;
@@ -1570,7 +1593,7 @@ static void hist_trigger_elt_update(stru
for_each_hist_val_field(i, hist_data) {
hist_field = hist_data->fields[i];
- hist_val = hist_field->fn(hist_field, rec, rbe);
+ hist_val = hist_field->fn(hist_field, elt, rbe, rec);
if (hist_field->flags & HIST_FIELD_FL_VAR) {
var_idx = hist_field->var.idx;
tracing_map_set_var(elt, var_idx, hist_val);
@@ -1582,7 +1605,7 @@ static void hist_trigger_elt_update(stru
for_each_hist_key_field(i, hist_data) {
hist_field = hist_data->fields[i];
if (hist_field->flags & HIST_FIELD_FL_VAR) {
- hist_val = hist_field->fn(hist_field, rec, rbe);
+ hist_val = hist_field->fn(hist_field, elt, rbe, rec);
var_idx = hist_field->var.idx;
tracing_map_set_var(elt, var_idx, hist_val);
}
@@ -1620,9 +1643,9 @@ static void event_hist_trigger(struct ev
bool use_compound_key = (hist_data->n_keys > 1);
unsigned long entries[HIST_STACKTRACE_DEPTH];
char compound_key[HIST_KEY_SIZE_MAX];
+ struct tracing_map_elt *elt = NULL;
struct stack_trace stacktrace;
struct hist_field *key_field;
- struct tracing_map_elt *elt;
u64 field_contents;
void *key = NULL;
unsigned int i;
@@ -1643,7 +1666,7 @@ static void event_hist_trigger(struct ev
key = entries;
} else {
- field_contents = key_field->fn(key_field, rec, rbe);
+ field_contents = key_field->fn(key_field, elt, rbe, rec);
if (key_field->flags & HIST_FIELD_FL_STRING) {
key = (void *)(unsigned long)field_contents;
use_compound_key = true;

View File

@ -0,0 +1,118 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:55 -0600
Subject: [PATCH 29/48] tracing: Add hist_field 'type' field
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Future support for synthetic events requires hist_field 'type'
information, so add a field for that.
Also, make other hist_field attribute usage consistent (size,
is_signed, etc).
Link: http://lkml.kernel.org/r/3fd12a2e86316b05151ba0d7c68268e780af2c9d.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit d544a468f82526e97cc80c18a019708eb203b00a)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 33 +++++++++++++++++++++++++++++++++
1 file changed, 33 insertions(+)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -54,6 +54,7 @@ struct hist_field {
unsigned int size;
unsigned int offset;
unsigned int is_signed;
+ const char *type;
struct hist_field *operands[HIST_FIELD_OPERANDS_MAX];
struct hist_trigger_data *hist_data;
struct hist_var var;
@@ -717,6 +718,7 @@ static void destroy_hist_field(struct hi
kfree(hist_field->var.name);
kfree(hist_field->name);
+ kfree(hist_field->type);
kfree(hist_field);
}
@@ -742,6 +744,10 @@ static struct hist_field *create_hist_fi
if (flags & HIST_FIELD_FL_HITCOUNT) {
hist_field->fn = hist_field_counter;
+ hist_field->size = sizeof(u64);
+ hist_field->type = kstrdup("u64", GFP_KERNEL);
+ if (!hist_field->type)
+ goto free;
goto out;
}
@@ -755,12 +761,18 @@ static struct hist_field *create_hist_fi
hist_field->fn = hist_field_log2;
hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL);
hist_field->size = hist_field->operands[0]->size;
+ hist_field->type = kstrdup(hist_field->operands[0]->type, GFP_KERNEL);
+ if (!hist_field->type)
+ goto free;
goto out;
}
if (flags & HIST_FIELD_FL_TIMESTAMP) {
hist_field->fn = hist_field_timestamp;
hist_field->size = sizeof(u64);
+ hist_field->type = kstrdup("u64", GFP_KERNEL);
+ if (!hist_field->type)
+ goto free;
goto out;
}
@@ -770,6 +782,11 @@ static struct hist_field *create_hist_fi
if (is_string_field(field)) {
flags |= HIST_FIELD_FL_STRING;
+ hist_field->size = MAX_FILTER_STR_VAL;
+ hist_field->type = kstrdup(field->type, GFP_KERNEL);
+ if (!hist_field->type)
+ goto free;
+
if (field->filter_type == FILTER_STATIC_STRING)
hist_field->fn = hist_field_string;
else if (field->filter_type == FILTER_DYN_STRING)
@@ -777,6 +794,12 @@ static struct hist_field *create_hist_fi
else
hist_field->fn = hist_field_pstring;
} else {
+ hist_field->size = field->size;
+ hist_field->is_signed = field->is_signed;
+ hist_field->type = kstrdup(field->type, GFP_KERNEL);
+ if (!hist_field->type)
+ goto free;
+
hist_field->fn = select_value_fn(field->size,
field->is_signed);
if (!hist_field->fn) {
@@ -949,6 +972,11 @@ static struct hist_field *parse_unary(st
expr->operands[0] = operand1;
expr->operator = FIELD_OP_UNARY_MINUS;
expr->name = expr_str(expr, 0);
+ expr->type = kstrdup(operand1->type, GFP_KERNEL);
+ if (!expr->type) {
+ ret = -ENOMEM;
+ goto free;
+ }
return expr;
free:
@@ -1042,6 +1070,11 @@ static struct hist_field *parse_expr(str
expr->operands[1] = operand2;
expr->operator = field_op;
expr->name = expr_str(expr, 0);
+ expr->type = kstrdup(operand1->type, GFP_KERNEL);
+ if (!expr->type) {
+ ret = -ENOMEM;
+ goto free;
+ }
switch (field_op) {
case FIELD_OP_MINUS:

View File

@ -0,0 +1,956 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:56 -0600
Subject: [PATCH 30/48] tracing: Add variable reference handling to hist
triggers
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Add the necessary infrastructure to allow the variables defined on one
event to be referenced in another. This allows variables set by a
previous event to be referenced and used in expressions combining the
variable values saved by that previous event and the event fields of
the current event. For example, here's how a latency can be
calculated and saved into yet another variable named 'wakeup_lat':
# echo 'hist:keys=pid,prio:ts0=common_timestamp ...
# echo 'hist:keys=next_pid:wakeup_lat=common_timestamp-$ts0 ...
In the first event, the event's timetamp is saved into the variable
ts0. In the next line, ts0 is subtracted from the second event's
timestamp to produce the latency.
Further users of variable references will be described in subsequent
patches, such as for instance how the 'wakeup_lat' variable above can
be displayed in a latency histogram.
Link: http://lkml.kernel.org/r/b1d3e6975374e34d501ff417c20189c3f9b2c7b8.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 434c1d5831194e72e6eb30d46534d75b5a985eb7)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace.c | 2
kernel/trace/trace.h | 3
kernel/trace/trace_events_hist.c | 661 +++++++++++++++++++++++++++++++++++-
kernel/trace/trace_events_trigger.c | 6
4 files changed, 656 insertions(+), 16 deletions(-)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -7783,6 +7783,7 @@ static int instance_mkdir(const char *na
INIT_LIST_HEAD(&tr->systems);
INIT_LIST_HEAD(&tr->events);
+ INIT_LIST_HEAD(&tr->hist_vars);
if (allocate_trace_buffers(tr, trace_buf_size) < 0)
goto out_free_tr;
@@ -8533,6 +8534,7 @@ ssize_t trace_parse_run_command(struct f
INIT_LIST_HEAD(&global_trace.systems);
INIT_LIST_HEAD(&global_trace.events);
+ INIT_LIST_HEAD(&global_trace.hist_vars);
list_add(&global_trace.list, &ftrace_trace_arrays);
apply_trace_boot_options();
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -274,6 +274,7 @@ struct trace_array {
int function_enabled;
#endif
int time_stamp_abs_ref;
+ struct list_head hist_vars;
};
enum {
@@ -1548,6 +1549,8 @@ extern void pause_named_trigger(struct e
extern void unpause_named_trigger(struct event_trigger_data *data);
extern void set_named_trigger_data(struct event_trigger_data *data,
struct event_trigger_data *named_data);
+extern struct event_trigger_data *
+get_named_trigger_data(struct event_trigger_data *data);
extern int register_event_command(struct event_command *cmd);
extern int unregister_event_command(struct event_command *cmd);
extern int register_trigger_hist_enable_disable_cmds(void);
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -59,7 +59,12 @@ struct hist_field {
struct hist_trigger_data *hist_data;
struct hist_var var;
enum field_op_id operator;
+ char *system;
+ char *event_name;
char *name;
+ unsigned int var_idx;
+ unsigned int var_ref_idx;
+ bool read_once;
};
static u64 hist_field_none(struct hist_field *field,
@@ -214,6 +219,7 @@ enum hist_field_flags {
HIST_FIELD_FL_TIMESTAMP_USECS = 1 << 11,
HIST_FIELD_FL_VAR = 1 << 12,
HIST_FIELD_FL_EXPR = 1 << 13,
+ HIST_FIELD_FL_VAR_REF = 1 << 14,
};
struct var_defs {
@@ -253,6 +259,8 @@ struct hist_trigger_data {
struct tracing_map *map;
bool enable_timestamps;
bool remove;
+ struct hist_field *var_refs[TRACING_MAP_VARS_MAX];
+ unsigned int n_var_refs;
};
static u64 hist_field_timestamp(struct hist_field *hist_field,
@@ -271,6 +279,214 @@ static u64 hist_field_timestamp(struct h
return ts;
}
+struct hist_var_data {
+ struct list_head list;
+ struct hist_trigger_data *hist_data;
+};
+
+static struct hist_field *
+check_field_for_var_ref(struct hist_field *hist_field,
+ struct hist_trigger_data *var_data,
+ unsigned int var_idx)
+{
+ struct hist_field *found = NULL;
+
+ if (hist_field && hist_field->flags & HIST_FIELD_FL_VAR_REF) {
+ if (hist_field->var.idx == var_idx &&
+ hist_field->var.hist_data == var_data) {
+ found = hist_field;
+ }
+ }
+
+ return found;
+}
+
+static struct hist_field *
+check_field_for_var_refs(struct hist_trigger_data *hist_data,
+ struct hist_field *hist_field,
+ struct hist_trigger_data *var_data,
+ unsigned int var_idx,
+ unsigned int level)
+{
+ struct hist_field *found = NULL;
+ unsigned int i;
+
+ if (level > 3)
+ return found;
+
+ if (!hist_field)
+ return found;
+
+ found = check_field_for_var_ref(hist_field, var_data, var_idx);
+ if (found)
+ return found;
+
+ for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) {
+ struct hist_field *operand;
+
+ operand = hist_field->operands[i];
+ found = check_field_for_var_refs(hist_data, operand, var_data,
+ var_idx, level + 1);
+ if (found)
+ return found;
+ }
+
+ return found;
+}
+
+static struct hist_field *find_var_ref(struct hist_trigger_data *hist_data,
+ struct hist_trigger_data *var_data,
+ unsigned int var_idx)
+{
+ struct hist_field *hist_field, *found = NULL;
+ unsigned int i;
+
+ for_each_hist_field(i, hist_data) {
+ hist_field = hist_data->fields[i];
+ found = check_field_for_var_refs(hist_data, hist_field,
+ var_data, var_idx, 0);
+ if (found)
+ return found;
+ }
+
+ return found;
+}
+
+static struct hist_field *find_any_var_ref(struct hist_trigger_data *hist_data,
+ unsigned int var_idx)
+{
+ struct trace_array *tr = hist_data->event_file->tr;
+ struct hist_field *found = NULL;
+ struct hist_var_data *var_data;
+
+ list_for_each_entry(var_data, &tr->hist_vars, list) {
+ if (var_data->hist_data == hist_data)
+ continue;
+ found = find_var_ref(var_data->hist_data, hist_data, var_idx);
+ if (found)
+ break;
+ }
+
+ return found;
+}
+
+static bool check_var_refs(struct hist_trigger_data *hist_data)
+{
+ struct hist_field *field;
+ bool found = false;
+ int i;
+
+ for_each_hist_field(i, hist_data) {
+ field = hist_data->fields[i];
+ if (field && field->flags & HIST_FIELD_FL_VAR) {
+ if (find_any_var_ref(hist_data, field->var.idx)) {
+ found = true;
+ break;
+ }
+ }
+ }
+
+ return found;
+}
+
+static struct hist_var_data *find_hist_vars(struct hist_trigger_data *hist_data)
+{
+ struct trace_array *tr = hist_data->event_file->tr;
+ struct hist_var_data *var_data, *found = NULL;
+
+ list_for_each_entry(var_data, &tr->hist_vars, list) {
+ if (var_data->hist_data == hist_data) {
+ found = var_data;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static bool field_has_hist_vars(struct hist_field *hist_field,
+ unsigned int level)
+{
+ int i;
+
+ if (level > 3)
+ return false;
+
+ if (!hist_field)
+ return false;
+
+ if (hist_field->flags & HIST_FIELD_FL_VAR ||
+ hist_field->flags & HIST_FIELD_FL_VAR_REF)
+ return true;
+
+ for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) {
+ struct hist_field *operand;
+
+ operand = hist_field->operands[i];
+ if (field_has_hist_vars(operand, level + 1))
+ return true;
+ }
+
+ return false;
+}
+
+static bool has_hist_vars(struct hist_trigger_data *hist_data)
+{
+ struct hist_field *hist_field;
+ int i;
+
+ for_each_hist_field(i, hist_data) {
+ hist_field = hist_data->fields[i];
+ if (field_has_hist_vars(hist_field, 0))
+ return true;
+ }
+
+ return false;
+}
+
+static int save_hist_vars(struct hist_trigger_data *hist_data)
+{
+ struct trace_array *tr = hist_data->event_file->tr;
+ struct hist_var_data *var_data;
+
+ var_data = find_hist_vars(hist_data);
+ if (var_data)
+ return 0;
+
+ if (trace_array_get(tr) < 0)
+ return -ENODEV;
+
+ var_data = kzalloc(sizeof(*var_data), GFP_KERNEL);
+ if (!var_data) {
+ trace_array_put(tr);
+ return -ENOMEM;
+ }
+
+ var_data->hist_data = hist_data;
+ list_add(&var_data->list, &tr->hist_vars);
+
+ return 0;
+}
+
+static void remove_hist_vars(struct hist_trigger_data *hist_data)
+{
+ struct trace_array *tr = hist_data->event_file->tr;
+ struct hist_var_data *var_data;
+
+ var_data = find_hist_vars(hist_data);
+ if (!var_data)
+ return;
+
+ if (WARN_ON(check_var_refs(hist_data)))
+ return;
+
+ list_del(&var_data->list);
+
+ kfree(var_data);
+
+ trace_array_put(tr);
+}
+
static struct hist_field *find_var_field(struct hist_trigger_data *hist_data,
const char *var_name)
{
@@ -313,10 +529,137 @@ static struct hist_field *find_var(struc
return NULL;
}
+static struct trace_event_file *find_var_file(struct trace_array *tr,
+ char *system,
+ char *event_name,
+ char *var_name)
+{
+ struct hist_trigger_data *var_hist_data;
+ struct hist_var_data *var_data;
+ struct trace_event_file *file, *found = NULL;
+
+ if (system)
+ return find_event_file(tr, system, event_name);
+
+ list_for_each_entry(var_data, &tr->hist_vars, list) {
+ var_hist_data = var_data->hist_data;
+ file = var_hist_data->event_file;
+ if (file == found)
+ continue;
+
+ if (find_var_field(var_hist_data, var_name)) {
+ if (found)
+ return NULL;
+
+ found = file;
+ }
+ }
+
+ return found;
+}
+
+static struct hist_field *find_file_var(struct trace_event_file *file,
+ const char *var_name)
+{
+ struct hist_trigger_data *test_data;
+ struct event_trigger_data *test;
+ struct hist_field *hist_field;
+
+ list_for_each_entry_rcu(test, &file->triggers, list) {
+ if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+ test_data = test->private_data;
+ hist_field = find_var_field(test_data, var_name);
+ if (hist_field)
+ return hist_field;
+ }
+ }
+
+ return NULL;
+}
+
+static struct hist_field *find_event_var(struct hist_trigger_data *hist_data,
+ char *system,
+ char *event_name,
+ char *var_name)
+{
+ struct trace_array *tr = hist_data->event_file->tr;
+ struct hist_field *hist_field = NULL;
+ struct trace_event_file *file;
+
+ file = find_var_file(tr, system, event_name, var_name);
+ if (!file)
+ return NULL;
+
+ hist_field = find_file_var(file, var_name);
+
+ return hist_field;
+}
+
struct hist_elt_data {
char *comm;
+ u64 *var_ref_vals;
};
+static u64 hist_field_var_ref(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct ring_buffer_event *rbe,
+ void *event)
+{
+ struct hist_elt_data *elt_data;
+ u64 var_val = 0;
+
+ elt_data = elt->private_data;
+ var_val = elt_data->var_ref_vals[hist_field->var_ref_idx];
+
+ return var_val;
+}
+
+static bool resolve_var_refs(struct hist_trigger_data *hist_data, void *key,
+ u64 *var_ref_vals, bool self)
+{
+ struct hist_trigger_data *var_data;
+ struct tracing_map_elt *var_elt;
+ struct hist_field *hist_field;
+ unsigned int i, var_idx;
+ bool resolved = true;
+ u64 var_val = 0;
+
+ for (i = 0; i < hist_data->n_var_refs; i++) {
+ hist_field = hist_data->var_refs[i];
+ var_idx = hist_field->var.idx;
+ var_data = hist_field->var.hist_data;
+
+ if (var_data == NULL) {
+ resolved = false;
+ break;
+ }
+
+ if ((self && var_data != hist_data) ||
+ (!self && var_data == hist_data))
+ continue;
+
+ var_elt = tracing_map_lookup(var_data->map, key);
+ if (!var_elt) {
+ resolved = false;
+ break;
+ }
+
+ if (!tracing_map_var_set(var_elt, var_idx)) {
+ resolved = false;
+ break;
+ }
+
+ if (self || !hist_field->read_once)
+ var_val = tracing_map_read_var(var_elt, var_idx);
+ else
+ var_val = tracing_map_read_var_once(var_elt, var_idx);
+
+ var_ref_vals[i] = var_val;
+ }
+
+ return resolved;
+}
+
static const char *hist_field_name(struct hist_field *field,
unsigned int level)
{
@@ -331,8 +674,20 @@ static const char *hist_field_name(struc
field_name = hist_field_name(field->operands[0], ++level);
else if (field->flags & HIST_FIELD_FL_TIMESTAMP)
field_name = "common_timestamp";
- else if (field->flags & HIST_FIELD_FL_EXPR)
- field_name = field->name;
+ else if (field->flags & HIST_FIELD_FL_EXPR ||
+ field->flags & HIST_FIELD_FL_VAR_REF) {
+ if (field->system) {
+ static char full_name[MAX_FILTER_STR_VAL];
+
+ strcat(full_name, field->system);
+ strcat(full_name, ".");
+ strcat(full_name, field->event_name);
+ strcat(full_name, ".");
+ strcat(full_name, field->name);
+ field_name = full_name;
+ } else
+ field_name = field->name;
+ }
if (field_name == NULL)
field_name = "";
@@ -612,6 +967,9 @@ static const char *get_hist_field_flags(
static void expr_field_str(struct hist_field *field, char *expr)
{
+ if (field->flags & HIST_FIELD_FL_VAR_REF)
+ strcat(expr, "$");
+
strcat(expr, hist_field_name(field, 0));
if (field->flags) {
@@ -742,6 +1100,11 @@ static struct hist_field *create_hist_fi
if (flags & HIST_FIELD_FL_EXPR)
goto out; /* caller will populate */
+ if (flags & HIST_FIELD_FL_VAR_REF) {
+ hist_field->fn = hist_field_var_ref;
+ goto out;
+ }
+
if (flags & HIST_FIELD_FL_HITCOUNT) {
hist_field->fn = hist_field_counter;
hist_field->size = sizeof(u64);
@@ -835,6 +1198,144 @@ static void destroy_hist_fields(struct h
}
}
+static int init_var_ref(struct hist_field *ref_field,
+ struct hist_field *var_field,
+ char *system, char *event_name)
+{
+ int err = 0;
+
+ ref_field->var.idx = var_field->var.idx;
+ ref_field->var.hist_data = var_field->hist_data;
+ ref_field->size = var_field->size;
+ ref_field->is_signed = var_field->is_signed;
+ ref_field->flags |= var_field->flags &
+ (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
+
+ if (system) {
+ ref_field->system = kstrdup(system, GFP_KERNEL);
+ if (!ref_field->system)
+ return -ENOMEM;
+ }
+
+ if (event_name) {
+ ref_field->event_name = kstrdup(event_name, GFP_KERNEL);
+ if (!ref_field->event_name) {
+ err = -ENOMEM;
+ goto free;
+ }
+ }
+
+ ref_field->name = kstrdup(var_field->var.name, GFP_KERNEL);
+ if (!ref_field->name) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ ref_field->type = kstrdup(var_field->type, GFP_KERNEL);
+ if (!ref_field->type) {
+ err = -ENOMEM;
+ goto free;
+ }
+ out:
+ return err;
+ free:
+ kfree(ref_field->system);
+ kfree(ref_field->event_name);
+ kfree(ref_field->name);
+
+ goto out;
+}
+
+static struct hist_field *create_var_ref(struct hist_field *var_field,
+ char *system, char *event_name)
+{
+ unsigned long flags = HIST_FIELD_FL_VAR_REF;
+ struct hist_field *ref_field;
+
+ ref_field = create_hist_field(var_field->hist_data, NULL, flags, NULL);
+ if (ref_field) {
+ if (init_var_ref(ref_field, var_field, system, event_name)) {
+ destroy_hist_field(ref_field, 0);
+ return NULL;
+ }
+ }
+
+ return ref_field;
+}
+
+static bool is_var_ref(char *var_name)
+{
+ if (!var_name || strlen(var_name) < 2 || var_name[0] != '$')
+ return false;
+
+ return true;
+}
+
+static char *field_name_from_var(struct hist_trigger_data *hist_data,
+ char *var_name)
+{
+ char *name, *field;
+ unsigned int i;
+
+ for (i = 0; i < hist_data->attrs->var_defs.n_vars; i++) {
+ name = hist_data->attrs->var_defs.name[i];
+
+ if (strcmp(var_name, name) == 0) {
+ field = hist_data->attrs->var_defs.expr[i];
+ if (contains_operator(field) || is_var_ref(field))
+ continue;
+ return field;
+ }
+ }
+
+ return NULL;
+}
+
+static char *local_field_var_ref(struct hist_trigger_data *hist_data,
+ char *system, char *event_name,
+ char *var_name)
+{
+ struct trace_event_call *call;
+
+ if (system && event_name) {
+ call = hist_data->event_file->event_call;
+
+ if (strcmp(system, call->class->system) != 0)
+ return NULL;
+
+ if (strcmp(event_name, trace_event_name(call)) != 0)
+ return NULL;
+ }
+
+ if (!!system != !!event_name)
+ return NULL;
+
+ if (!is_var_ref(var_name))
+ return NULL;
+
+ var_name++;
+
+ return field_name_from_var(hist_data, var_name);
+}
+
+static struct hist_field *parse_var_ref(struct hist_trigger_data *hist_data,
+ char *system, char *event_name,
+ char *var_name)
+{
+ struct hist_field *var_field = NULL, *ref_field = NULL;
+
+ if (!is_var_ref(var_name))
+ return NULL;
+
+ var_name++;
+
+ var_field = find_event_var(hist_data, system, event_name, var_name);
+ if (var_field)
+ ref_field = create_var_ref(var_field, system, event_name);
+
+ return ref_field;
+}
+
static struct ftrace_event_field *
parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
char *field_str, unsigned long *flags)
@@ -891,10 +1392,40 @@ static struct hist_field *parse_atom(str
struct trace_event_file *file, char *str,
unsigned long *flags, char *var_name)
{
+ char *s, *ref_system = NULL, *ref_event = NULL, *ref_var = str;
struct ftrace_event_field *field = NULL;
struct hist_field *hist_field = NULL;
int ret = 0;
+ s = strchr(str, '.');
+ if (s) {
+ s = strchr(++s, '.');
+ if (s) {
+ ref_system = strsep(&str, ".");
+ if (!str) {
+ ret = -EINVAL;
+ goto out;
+ }
+ ref_event = strsep(&str, ".");
+ if (!str) {
+ ret = -EINVAL;
+ goto out;
+ }
+ ref_var = str;
+ }
+ }
+
+ s = local_field_var_ref(hist_data, ref_system, ref_event, ref_var);
+ if (!s) {
+ hist_field = parse_var_ref(hist_data, ref_system, ref_event, ref_var);
+ if (hist_field) {
+ hist_data->var_refs[hist_data->n_var_refs] = hist_field;
+ hist_field->var_ref_idx = hist_data->n_var_refs++;
+ return hist_field;
+ }
+ } else
+ str = s;
+
field = parse_field(hist_data, file, str, flags);
if (IS_ERR(field)) {
ret = PTR_ERR(field);
@@ -1066,6 +1597,9 @@ static struct hist_field *parse_expr(str
goto free;
}
+ operand1->read_once = true;
+ operand2->read_once = true;
+
expr->operands[0] = operand1;
expr->operands[1] = operand2;
expr->operator = field_op;
@@ -1238,6 +1772,12 @@ static int create_key_field(struct hist_
goto out;
}
+ if (hist_field->flags & HIST_FIELD_FL_VAR_REF) {
+ destroy_hist_field(hist_field, 0);
+ ret = -EINVAL;
+ goto out;
+ }
+
key_size = hist_field->size;
}
@@ -1576,6 +2116,7 @@ create_hist_data(unsigned int map_bits,
hist_data->attrs = attrs;
hist_data->remove = remove;
+ hist_data->event_file = file;
ret = create_hist_fields(hist_data, file);
if (ret)
@@ -1598,12 +2139,6 @@ create_hist_data(unsigned int map_bits,
ret = create_tracing_map_fields(hist_data);
if (ret)
goto free;
-
- ret = tracing_map_init(hist_data->map);
- if (ret)
- goto free;
-
- hist_data->event_file = file;
out:
return hist_data;
free:
@@ -1618,12 +2153,17 @@ create_hist_data(unsigned int map_bits,
static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
struct tracing_map_elt *elt, void *rec,
- struct ring_buffer_event *rbe)
+ struct ring_buffer_event *rbe,
+ u64 *var_ref_vals)
{
+ struct hist_elt_data *elt_data;
struct hist_field *hist_field;
unsigned int i, var_idx;
u64 hist_val;
+ elt_data = elt->private_data;
+ elt_data->var_ref_vals = var_ref_vals;
+
for_each_hist_val_field(i, hist_data) {
hist_field = hist_data->fields[i];
hist_val = hist_field->fn(hist_field, elt, rbe, rec);
@@ -1675,6 +2215,7 @@ static void event_hist_trigger(struct ev
struct hist_trigger_data *hist_data = data->private_data;
bool use_compound_key = (hist_data->n_keys > 1);
unsigned long entries[HIST_STACKTRACE_DEPTH];
+ u64 var_ref_vals[TRACING_MAP_VARS_MAX];
char compound_key[HIST_KEY_SIZE_MAX];
struct tracing_map_elt *elt = NULL;
struct stack_trace stacktrace;
@@ -1714,9 +2255,15 @@ static void event_hist_trigger(struct ev
if (use_compound_key)
key = compound_key;
+ if (hist_data->n_var_refs &&
+ !resolve_var_refs(hist_data, key, var_ref_vals, false))
+ return;
+
elt = tracing_map_insert(hist_data->map, key);
- if (elt)
- hist_trigger_elt_update(hist_data, elt, rec, rbe);
+ if (!elt)
+ return;
+
+ hist_trigger_elt_update(hist_data, elt, rec, rbe, var_ref_vals);
}
static void hist_trigger_stacktrace_print(struct seq_file *m,
@@ -1931,8 +2478,11 @@ static void hist_field_print(struct seq_
if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP)
seq_puts(m, "common_timestamp");
- else if (field_name)
+ else if (field_name) {
+ if (hist_field->flags & HIST_FIELD_FL_VAR_REF)
+ seq_putc(m, '$');
seq_printf(m, "%s", field_name);
+ }
if (hist_field->flags) {
const char *flags_str = get_hist_field_flags(hist_field);
@@ -2072,7 +2622,11 @@ static void event_hist_trigger_free(stru
if (!data->ref) {
if (data->name)
del_named_trigger(data);
+
trigger_data_free(data);
+
+ remove_hist_vars(hist_data);
+
destroy_hist_data(hist_data);
}
}
@@ -2285,23 +2839,55 @@ static int hist_register_trigger(char *g
goto out;
}
- list_add_rcu(&data->list, &file->triggers);
ret++;
- update_cond_flag(file);
-
if (hist_data->enable_timestamps)
tracing_set_time_stamp_abs(file->tr, true);
+ out:
+ return ret;
+}
+
+static int hist_trigger_enable(struct event_trigger_data *data,
+ struct trace_event_file *file)
+{
+ int ret = 0;
+
+ list_add_tail_rcu(&data->list, &file->triggers);
+
+ update_cond_flag(file);
if (trace_event_trigger_enable_disable(file, 1) < 0) {
list_del_rcu(&data->list);
update_cond_flag(file);
ret--;
}
- out:
+
return ret;
}
+static bool hist_trigger_check_refs(struct event_trigger_data *data,
+ struct trace_event_file *file)
+{
+ struct hist_trigger_data *hist_data = data->private_data;
+ struct event_trigger_data *test, *named_data = NULL;
+
+ if (hist_data->attrs->name)
+ named_data = find_named_trigger(hist_data->attrs->name);
+
+ list_for_each_entry_rcu(test, &file->triggers, list) {
+ if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+ if (!hist_trigger_match(data, test, named_data, false))
+ continue;
+ hist_data = test->private_data;
+ if (check_var_refs(hist_data))
+ return true;
+ break;
+ }
+ }
+
+ return false;
+}
+
static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops,
struct event_trigger_data *data,
struct trace_event_file *file)
@@ -2334,11 +2920,30 @@ static void hist_unregister_trigger(char
}
}
+static bool hist_file_check_refs(struct trace_event_file *file)
+{
+ struct hist_trigger_data *hist_data;
+ struct event_trigger_data *test;
+
+ list_for_each_entry_rcu(test, &file->triggers, list) {
+ if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+ hist_data = test->private_data;
+ if (check_var_refs(hist_data))
+ return true;
+ }
+ }
+
+ return false;
+}
+
static void hist_unreg_all(struct trace_event_file *file)
{
struct event_trigger_data *test, *n;
struct hist_trigger_data *hist_data;
+ if (hist_file_check_refs(file))
+ return;
+
list_for_each_entry_safe(test, n, &file->triggers, list) {
if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
hist_data = test->private_data;
@@ -2414,6 +3019,11 @@ static int event_hist_trigger_func(struc
}
if (remove) {
+ if (hist_trigger_check_refs(trigger_data, file)) {
+ ret = -EBUSY;
+ goto out_free;
+ }
+
cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
ret = 0;
goto out_free;
@@ -2431,14 +3041,33 @@ static int event_hist_trigger_func(struc
goto out_free;
} else if (ret < 0)
goto out_free;
+
+ if (get_named_trigger_data(trigger_data))
+ goto enable;
+
+ if (has_hist_vars(hist_data))
+ save_hist_vars(hist_data);
+
+ ret = tracing_map_init(hist_data->map);
+ if (ret)
+ goto out_unreg;
+enable:
+ ret = hist_trigger_enable(trigger_data, file);
+ if (ret)
+ goto out_unreg;
+
/* Just return zero, not the number of registered triggers */
ret = 0;
out:
return ret;
+ out_unreg:
+ cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
out_free:
if (cmd_ops->set_filter)
cmd_ops->set_filter(NULL, trigger_data, NULL);
+ remove_hist_vars(hist_data);
+
kfree(trigger_data);
destroy_hist_data(hist_data);
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -909,6 +909,12 @@ void set_named_trigger_data(struct event
data->named_data = named_data;
}
+struct event_trigger_data *
+get_named_trigger_data(struct event_trigger_data *data)
+{
+ return data->named_data;
+}
+
static void
traceon_trigger(struct event_trigger_data *data, void *rec,
struct ring_buffer_event *event)

View File

@ -0,0 +1,216 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:57 -0600
Subject: [PATCH 31/48] tracing: Add hist trigger action hook
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Add a hook for executing extra actions whenever a histogram entry is
added or updated.
The default 'action' when a hist entry is added to a histogram is to
update the set of values associated with it. Some applications may
want to perform additional actions at that point, such as generate
another event, or compare and save a maximum.
Add a simple framework for doing that; specific actions will be
implemented on top of it in later patches.
Link: http://lkml.kernel.org/r/9482ba6a3eaf5ca6e60954314beacd0e25c05b24.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit b91ae245c2f781e6da0532d8545f51a0f1291cc0)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 106 ++++++++++++++++++++++++++++++++++++++-
1 file changed, 104 insertions(+), 2 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -33,6 +33,7 @@ typedef u64 (*hist_field_fn_t) (struct h
#define HIST_FIELD_OPERANDS_MAX 2
#define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX)
+#define HIST_ACTIONS_MAX 8
enum field_op_id {
FIELD_OP_NONE,
@@ -242,6 +243,9 @@ struct hist_trigger_attrs {
char *assignment_str[TRACING_MAP_VARS_MAX];
unsigned int n_assignments;
+ char *action_str[HIST_ACTIONS_MAX];
+ unsigned int n_actions;
+
struct var_defs var_defs;
};
@@ -261,6 +265,21 @@ struct hist_trigger_data {
bool remove;
struct hist_field *var_refs[TRACING_MAP_VARS_MAX];
unsigned int n_var_refs;
+
+ struct action_data *actions[HIST_ACTIONS_MAX];
+ unsigned int n_actions;
+};
+
+struct action_data;
+
+typedef void (*action_fn_t) (struct hist_trigger_data *hist_data,
+ struct tracing_map_elt *elt, void *rec,
+ struct ring_buffer_event *rbe,
+ struct action_data *data, u64 *var_ref_vals);
+
+struct action_data {
+ action_fn_t fn;
+ unsigned int var_ref_idx;
};
static u64 hist_field_timestamp(struct hist_field *hist_field,
@@ -764,6 +783,9 @@ static void destroy_hist_trigger_attrs(s
for (i = 0; i < attrs->n_assignments; i++)
kfree(attrs->assignment_str[i]);
+ for (i = 0; i < attrs->n_actions; i++)
+ kfree(attrs->action_str[i]);
+
kfree(attrs->name);
kfree(attrs->sort_key_str);
kfree(attrs->keys_str);
@@ -771,6 +793,16 @@ static void destroy_hist_trigger_attrs(s
kfree(attrs);
}
+static int parse_action(char *str, struct hist_trigger_attrs *attrs)
+{
+ int ret = 0;
+
+ if (attrs->n_actions >= HIST_ACTIONS_MAX)
+ return ret;
+
+ return ret;
+}
+
static int parse_assignment(char *str, struct hist_trigger_attrs *attrs)
{
int ret = 0;
@@ -854,8 +886,9 @@ static struct hist_trigger_attrs *parse_
else if (strcmp(str, "clear") == 0)
attrs->clear = true;
else {
- ret = -EINVAL;
- goto free;
+ ret = parse_action(str, attrs);
+ if (ret)
+ goto free;
}
}
@@ -2047,11 +2080,55 @@ static int create_sort_keys(struct hist_
return ret;
}
+static void destroy_actions(struct hist_trigger_data *hist_data)
+{
+ unsigned int i;
+
+ for (i = 0; i < hist_data->n_actions; i++) {
+ struct action_data *data = hist_data->actions[i];
+
+ kfree(data);
+ }
+}
+
+static int parse_actions(struct hist_trigger_data *hist_data)
+{
+ unsigned int i;
+ int ret = 0;
+ char *str;
+
+ for (i = 0; i < hist_data->attrs->n_actions; i++) {
+ str = hist_data->attrs->action_str[i];
+ }
+
+ return ret;
+}
+
+static int create_actions(struct hist_trigger_data *hist_data,
+ struct trace_event_file *file)
+{
+ struct action_data *data;
+ unsigned int i;
+ int ret = 0;
+
+ for (i = 0; i < hist_data->attrs->n_actions; i++) {
+ data = hist_data->actions[i];
+ }
+
+ return ret;
+}
+
static void destroy_hist_data(struct hist_trigger_data *hist_data)
{
+ if (!hist_data)
+ return;
+
destroy_hist_trigger_attrs(hist_data->attrs);
destroy_hist_fields(hist_data);
tracing_map_destroy(hist_data->map);
+
+ destroy_actions(hist_data);
+
kfree(hist_data);
}
@@ -2118,6 +2195,10 @@ create_hist_data(unsigned int map_bits,
hist_data->remove = remove;
hist_data->event_file = file;
+ ret = parse_actions(hist_data);
+ if (ret)
+ goto free;
+
ret = create_hist_fields(hist_data, file);
if (ret)
goto free;
@@ -2209,6 +2290,20 @@ static inline void add_to_key(char *comp
memcpy(compound_key + key_field->offset, key, size);
}
+static void
+hist_trigger_actions(struct hist_trigger_data *hist_data,
+ struct tracing_map_elt *elt, void *rec,
+ struct ring_buffer_event *rbe, u64 *var_ref_vals)
+{
+ struct action_data *data;
+ unsigned int i;
+
+ for (i = 0; i < hist_data->n_actions; i++) {
+ data = hist_data->actions[i];
+ data->fn(hist_data, elt, rec, rbe, data, var_ref_vals);
+ }
+}
+
static void event_hist_trigger(struct event_trigger_data *data, void *rec,
struct ring_buffer_event *rbe)
{
@@ -2264,6 +2359,9 @@ static void event_hist_trigger(struct ev
return;
hist_trigger_elt_update(hist_data, elt, rec, rbe, var_ref_vals);
+
+ if (resolve_var_refs(hist_data, key, var_ref_vals, true))
+ hist_trigger_actions(hist_data, elt, rec, rbe, var_ref_vals);
}
static void hist_trigger_stacktrace_print(struct seq_file *m,
@@ -3048,6 +3146,10 @@ static int event_hist_trigger_func(struc
if (has_hist_vars(hist_data))
save_hist_vars(hist_data);
+ ret = create_actions(hist_data, file);
+ if (ret)
+ goto out_unreg;
+
ret = tracing_map_init(hist_data->map);
if (ret)
goto out_unreg;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,667 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:51:59 -0600
Subject: [PATCH 33/48] tracing: Add support for 'field variables'
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Users should be able to directly specify event fields in hist trigger
'actions' rather than being forced to explicitly create a variable for
that purpose.
Add support allowing fields to be used directly in actions, which
essentially does just that - creates 'invisible' variables for each
bare field specified in an action. If a bare field refers to a field
on another (matching) event, it even creates a special histogram for
the purpose (since variables can't be defined on an existing histogram
after histogram creation).
Here's a simple example that demonstrates both. Basically the
onmatch() action creates a list of variables corresponding to the
parameters of the synthetic event to be generated, and then uses those
values to generate the event. So for the wakeup_latency synthetic
event 'call' below the first param, $wakeup_lat, is a variable defined
explicitly on sched_switch, where 'next_pid' is just a normal field on
sched_switch, and prio is a normal field on sched_waking.
Since the mechanism works on variables, those two normal fields just
have 'invisible' variables created internally for them. In the case of
'prio', which is on another event, we actually need to create an
additional hist trigger and define the invisible variable on that, since
once a hist trigger is defined, variables can't be added to it later.
echo 'wakeup_latency u64 lat; pid_t pid; int prio' >>
/sys/kernel/debug/tracing/synthetic_events
echo 'hist:keys=pid:ts0=common_timestamp.usecs >>
/sys/kernel/debug/tracing/events/sched/sched_waking/trigger
echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:
onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,prio)
>> /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
Link: http://lkml.kernel.org/r/8e8dcdac1ea180ed7a3689e1caeeccede9dc42b3.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 5fcd8c6efab39371cb3ce51b8b391a43e83a94de)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 531 ++++++++++++++++++++++++++++++++++++++-
1 file changed, 530 insertions(+), 1 deletion(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -255,6 +255,16 @@ struct hist_trigger_attrs {
struct var_defs var_defs;
};
+struct field_var {
+ struct hist_field *var;
+ struct hist_field *val;
+};
+
+struct field_var_hist {
+ struct hist_trigger_data *hist_data;
+ char *cmd;
+};
+
struct hist_trigger_data {
struct hist_field *fields[HIST_FIELDS_MAX];
unsigned int n_vals;
@@ -274,6 +284,12 @@ struct hist_trigger_data {
struct action_data *actions[HIST_ACTIONS_MAX];
unsigned int n_actions;
+
+ struct field_var *field_vars[SYNTH_FIELDS_MAX];
+ unsigned int n_field_vars;
+ unsigned int n_field_var_str;
+ struct field_var_hist *field_var_hists[SYNTH_FIELDS_MAX];
+ unsigned int n_field_var_hists;
};
struct synth_field {
@@ -1427,6 +1443,7 @@ static struct hist_field *find_event_var
struct hist_elt_data {
char *comm;
u64 *var_ref_vals;
+ char *field_var_str[SYNTH_FIELDS_MAX];
};
static u64 hist_field_var_ref(struct hist_field *hist_field,
@@ -1731,6 +1748,11 @@ static inline void save_comm(char *comm,
static void hist_elt_data_free(struct hist_elt_data *elt_data)
{
+ unsigned int i;
+
+ for (i = 0; i < SYNTH_FIELDS_MAX; i++)
+ kfree(elt_data->field_var_str[i]);
+
kfree(elt_data->comm);
kfree(elt_data);
}
@@ -1748,7 +1770,7 @@ static int hist_trigger_elt_data_alloc(s
unsigned int size = TASK_COMM_LEN;
struct hist_elt_data *elt_data;
struct hist_field *key_field;
- unsigned int i;
+ unsigned int i, n_str;
elt_data = kzalloc(sizeof(*elt_data), GFP_KERNEL);
if (!elt_data)
@@ -1767,6 +1789,18 @@ static int hist_trigger_elt_data_alloc(s
}
}
+ n_str = hist_data->n_field_var_str;
+
+ size = STR_VAR_LEN_MAX;
+
+ for (i = 0; i < n_str; i++) {
+ elt_data->field_var_str[i] = kzalloc(size, GFP_KERNEL);
+ if (!elt_data->field_var_str[i]) {
+ hist_elt_data_free(elt_data);
+ return -ENOMEM;
+ }
+ }
+
elt->private_data = elt_data;
return 0;
@@ -2473,6 +2507,470 @@ static struct hist_field *parse_expr(str
return ERR_PTR(ret);
}
+static char *find_trigger_filter(struct hist_trigger_data *hist_data,
+ struct trace_event_file *file)
+{
+ struct event_trigger_data *test;
+
+ list_for_each_entry_rcu(test, &file->triggers, list) {
+ if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+ if (test->private_data == hist_data)
+ return test->filter_str;
+ }
+ }
+
+ return NULL;
+}
+
+static struct event_command trigger_hist_cmd;
+static int event_hist_trigger_func(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd, char *param);
+
+static bool compatible_keys(struct hist_trigger_data *target_hist_data,
+ struct hist_trigger_data *hist_data,
+ unsigned int n_keys)
+{
+ struct hist_field *target_hist_field, *hist_field;
+ unsigned int n, i, j;
+
+ if (hist_data->n_fields - hist_data->n_vals != n_keys)
+ return false;
+
+ i = hist_data->n_vals;
+ j = target_hist_data->n_vals;
+
+ for (n = 0; n < n_keys; n++) {
+ hist_field = hist_data->fields[i + n];
+ target_hist_field = target_hist_data->fields[j + n];
+
+ if (strcmp(hist_field->type, target_hist_field->type) != 0)
+ return false;
+ if (hist_field->size != target_hist_field->size)
+ return false;
+ if (hist_field->is_signed != target_hist_field->is_signed)
+ return false;
+ }
+
+ return true;
+}
+
+static struct hist_trigger_data *
+find_compatible_hist(struct hist_trigger_data *target_hist_data,
+ struct trace_event_file *file)
+{
+ struct hist_trigger_data *hist_data;
+ struct event_trigger_data *test;
+ unsigned int n_keys;
+
+ n_keys = target_hist_data->n_fields - target_hist_data->n_vals;
+
+ list_for_each_entry_rcu(test, &file->triggers, list) {
+ if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+ hist_data = test->private_data;
+
+ if (compatible_keys(target_hist_data, hist_data, n_keys))
+ return hist_data;
+ }
+ }
+
+ return NULL;
+}
+
+static struct trace_event_file *event_file(struct trace_array *tr,
+ char *system, char *event_name)
+{
+ struct trace_event_file *file;
+
+ file = find_event_file(tr, system, event_name);
+ if (!file)
+ return ERR_PTR(-EINVAL);
+
+ return file;
+}
+
+static struct hist_field *
+find_synthetic_field_var(struct hist_trigger_data *target_hist_data,
+ char *system, char *event_name, char *field_name)
+{
+ struct hist_field *event_var;
+ char *synthetic_name;
+
+ synthetic_name = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL);
+ if (!synthetic_name)
+ return ERR_PTR(-ENOMEM);
+
+ strcpy(synthetic_name, "synthetic_");
+ strcat(synthetic_name, field_name);
+
+ event_var = find_event_var(target_hist_data, system, event_name, synthetic_name);
+
+ kfree(synthetic_name);
+
+ return event_var;
+}
+
+/**
+ * create_field_var_hist - Automatically create a histogram and var for a field
+ * @target_hist_data: The target hist trigger
+ * @subsys_name: Optional subsystem name
+ * @event_name: Optional event name
+ * @field_name: The name of the field (and the resulting variable)
+ *
+ * Hist trigger actions fetch data from variables, not directly from
+ * events. However, for convenience, users are allowed to directly
+ * specify an event field in an action, which will be automatically
+ * converted into a variable on their behalf.
+
+ * If a user specifies a field on an event that isn't the event the
+ * histogram currently being defined (the target event histogram), the
+ * only way that can be accomplished is if a new hist trigger is
+ * created and the field variable defined on that.
+ *
+ * This function creates a new histogram compatible with the target
+ * event (meaning a histogram with the same key as the target
+ * histogram), and creates a variable for the specified field, but
+ * with 'synthetic_' prepended to the variable name in order to avoid
+ * collision with normal field variables.
+ *
+ * Return: The variable created for the field.
+ */
+struct hist_field *
+create_field_var_hist(struct hist_trigger_data *target_hist_data,
+ char *subsys_name, char *event_name, char *field_name)
+{
+ struct trace_array *tr = target_hist_data->event_file->tr;
+ struct hist_field *event_var = ERR_PTR(-EINVAL);
+ struct hist_trigger_data *hist_data;
+ unsigned int i, n, first = true;
+ struct field_var_hist *var_hist;
+ struct trace_event_file *file;
+ struct hist_field *key_field;
+ char *saved_filter;
+ char *cmd;
+ int ret;
+
+ if (target_hist_data->n_field_var_hists >= SYNTH_FIELDS_MAX)
+ return ERR_PTR(-EINVAL);
+
+ file = event_file(tr, subsys_name, event_name);
+
+ if (IS_ERR(file)) {
+ ret = PTR_ERR(file);
+ return ERR_PTR(ret);
+ }
+
+ /*
+ * Look for a histogram compatible with target. We'll use the
+ * found histogram specification to create a new matching
+ * histogram with our variable on it. target_hist_data is not
+ * yet a registered histogram so we can't use that.
+ */
+ hist_data = find_compatible_hist(target_hist_data, file);
+ if (!hist_data)
+ return ERR_PTR(-EINVAL);
+
+ /* See if a synthetic field variable has already been created */
+ event_var = find_synthetic_field_var(target_hist_data, subsys_name,
+ event_name, field_name);
+ if (!IS_ERR_OR_NULL(event_var))
+ return event_var;
+
+ var_hist = kzalloc(sizeof(*var_hist), GFP_KERNEL);
+ if (!var_hist)
+ return ERR_PTR(-ENOMEM);
+
+ cmd = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL);
+ if (!cmd) {
+ kfree(var_hist);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* Use the same keys as the compatible histogram */
+ strcat(cmd, "keys=");
+
+ for_each_hist_key_field(i, hist_data) {
+ key_field = hist_data->fields[i];
+ if (!first)
+ strcat(cmd, ",");
+ strcat(cmd, key_field->field->name);
+ first = false;
+ }
+
+ /* Create the synthetic field variable specification */
+ strcat(cmd, ":synthetic_");
+ strcat(cmd, field_name);
+ strcat(cmd, "=");
+ strcat(cmd, field_name);
+
+ /* Use the same filter as the compatible histogram */
+ saved_filter = find_trigger_filter(hist_data, file);
+ if (saved_filter) {
+ strcat(cmd, " if ");
+ strcat(cmd, saved_filter);
+ }
+
+ var_hist->cmd = kstrdup(cmd, GFP_KERNEL);
+ if (!var_hist->cmd) {
+ kfree(cmd);
+ kfree(var_hist);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* Save the compatible histogram information */
+ var_hist->hist_data = hist_data;
+
+ /* Create the new histogram with our variable */
+ ret = event_hist_trigger_func(&trigger_hist_cmd, file,
+ "", "hist", cmd);
+ if (ret) {
+ kfree(cmd);
+ kfree(var_hist->cmd);
+ kfree(var_hist);
+ return ERR_PTR(ret);
+ }
+
+ kfree(cmd);
+
+ /* If we can't find the variable, something went wrong */
+ event_var = find_synthetic_field_var(target_hist_data, subsys_name,
+ event_name, field_name);
+ if (IS_ERR_OR_NULL(event_var)) {
+ kfree(var_hist->cmd);
+ kfree(var_hist);
+ return ERR_PTR(-EINVAL);
+ }
+
+ n = target_hist_data->n_field_var_hists;
+ target_hist_data->field_var_hists[n] = var_hist;
+ target_hist_data->n_field_var_hists++;
+
+ return event_var;
+}
+
+struct hist_field *
+find_target_event_var(struct hist_trigger_data *hist_data,
+ char *subsys_name, char *event_name, char *var_name)
+{
+ struct trace_event_file *file = hist_data->event_file;
+ struct hist_field *hist_field = NULL;
+
+ if (subsys_name) {
+ struct trace_event_call *call;
+
+ if (!event_name)
+ return NULL;
+
+ call = file->event_call;
+
+ if (strcmp(subsys_name, call->class->system) != 0)
+ return NULL;
+
+ if (strcmp(event_name, trace_event_name(call)) != 0)
+ return NULL;
+ }
+
+ hist_field = find_var_field(hist_data, var_name);
+
+ return hist_field;
+}
+
+static inline void __update_field_vars(struct tracing_map_elt *elt,
+ struct ring_buffer_event *rbe,
+ void *rec,
+ struct field_var **field_vars,
+ unsigned int n_field_vars,
+ unsigned int field_var_str_start)
+{
+ struct hist_elt_data *elt_data = elt->private_data;
+ unsigned int i, j, var_idx;
+ u64 var_val;
+
+ for (i = 0, j = field_var_str_start; i < n_field_vars; i++) {
+ struct field_var *field_var = field_vars[i];
+ struct hist_field *var = field_var->var;
+ struct hist_field *val = field_var->val;
+
+ var_val = val->fn(val, elt, rbe, rec);
+ var_idx = var->var.idx;
+
+ if (val->flags & HIST_FIELD_FL_STRING) {
+ char *str = elt_data->field_var_str[j++];
+ char *val_str = (char *)(uintptr_t)var_val;
+
+ strncpy(str, val_str, STR_VAR_LEN_MAX);
+ var_val = (u64)(uintptr_t)str;
+ }
+ tracing_map_set_var(elt, var_idx, var_val);
+ }
+}
+
+static void update_field_vars(struct hist_trigger_data *hist_data,
+ struct tracing_map_elt *elt,
+ struct ring_buffer_event *rbe,
+ void *rec)
+{
+ __update_field_vars(elt, rbe, rec, hist_data->field_vars,
+ hist_data->n_field_vars, 0);
+}
+
+static struct hist_field *create_var(struct hist_trigger_data *hist_data,
+ struct trace_event_file *file,
+ char *name, int size, const char *type)
+{
+ struct hist_field *var;
+ int idx;
+
+ if (find_var(hist_data, file, name) && !hist_data->remove) {
+ var = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
+ var = kzalloc(sizeof(struct hist_field), GFP_KERNEL);
+ if (!var) {
+ var = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ idx = tracing_map_add_var(hist_data->map);
+ if (idx < 0) {
+ kfree(var);
+ var = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
+ var->flags = HIST_FIELD_FL_VAR;
+ var->var.idx = idx;
+ var->var.hist_data = var->hist_data = hist_data;
+ var->size = size;
+ var->var.name = kstrdup(name, GFP_KERNEL);
+ var->type = kstrdup(type, GFP_KERNEL);
+ if (!var->var.name || !var->type) {
+ kfree(var->var.name);
+ kfree(var->type);
+ kfree(var);
+ var = ERR_PTR(-ENOMEM);
+ }
+ out:
+ return var;
+}
+
+static struct field_var *create_field_var(struct hist_trigger_data *hist_data,
+ struct trace_event_file *file,
+ char *field_name)
+{
+ struct hist_field *val = NULL, *var = NULL;
+ unsigned long flags = HIST_FIELD_FL_VAR;
+ struct field_var *field_var;
+ int ret = 0;
+
+ if (hist_data->n_field_vars >= SYNTH_FIELDS_MAX) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ val = parse_atom(hist_data, file, field_name, &flags, NULL);
+ if (IS_ERR(val)) {
+ ret = PTR_ERR(val);
+ goto err;
+ }
+
+ var = create_var(hist_data, file, field_name, val->size, val->type);
+ if (IS_ERR(var)) {
+ kfree(val);
+ ret = PTR_ERR(var);
+ goto err;
+ }
+
+ field_var = kzalloc(sizeof(struct field_var), GFP_KERNEL);
+ if (!field_var) {
+ kfree(val);
+ kfree(var);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ field_var->var = var;
+ field_var->val = val;
+ out:
+ return field_var;
+ err:
+ field_var = ERR_PTR(ret);
+ goto out;
+}
+
+/**
+ * create_target_field_var - Automatically create a variable for a field
+ * @target_hist_data: The target hist trigger
+ * @subsys_name: Optional subsystem name
+ * @event_name: Optional event name
+ * @var_name: The name of the field (and the resulting variable)
+ *
+ * Hist trigger actions fetch data from variables, not directly from
+ * events. However, for convenience, users are allowed to directly
+ * specify an event field in an action, which will be automatically
+ * converted into a variable on their behalf.
+
+ * This function creates a field variable with the name var_name on
+ * the hist trigger currently being defined on the target event. If
+ * subsys_name and event_name are specified, this function simply
+ * verifies that they do in fact match the target event subsystem and
+ * event name.
+ *
+ * Return: The variable created for the field.
+ */
+struct field_var *
+create_target_field_var(struct hist_trigger_data *target_hist_data,
+ char *subsys_name, char *event_name, char *var_name)
+{
+ struct trace_event_file *file = target_hist_data->event_file;
+
+ if (subsys_name) {
+ struct trace_event_call *call;
+
+ if (!event_name)
+ return NULL;
+
+ call = file->event_call;
+
+ if (strcmp(subsys_name, call->class->system) != 0)
+ return NULL;
+
+ if (strcmp(event_name, trace_event_name(call)) != 0)
+ return NULL;
+ }
+
+ return create_field_var(target_hist_data, file, var_name);
+}
+
+static void destroy_field_var(struct field_var *field_var)
+{
+ if (!field_var)
+ return;
+
+ destroy_hist_field(field_var->var, 0);
+ destroy_hist_field(field_var->val, 0);
+
+ kfree(field_var);
+}
+
+static void destroy_field_vars(struct hist_trigger_data *hist_data)
+{
+ unsigned int i;
+
+ for (i = 0; i < hist_data->n_field_vars; i++)
+ destroy_field_var(hist_data->field_vars[i]);
+}
+
+void save_field_var(struct hist_trigger_data *hist_data,
+ struct field_var *field_var)
+{
+ hist_data->field_vars[hist_data->n_field_vars++] = field_var;
+
+ if (field_var->val->flags & HIST_FIELD_FL_STRING)
+ hist_data->n_field_var_str++;
+}
+
static int create_hitcount_val(struct hist_trigger_data *hist_data)
{
hist_data->fields[HITCOUNT_IDX] =
@@ -2928,6 +3426,16 @@ static int create_actions(struct hist_tr
return ret;
}
+static void destroy_field_var_hists(struct hist_trigger_data *hist_data)
+{
+ unsigned int i;
+
+ for (i = 0; i < hist_data->n_field_var_hists; i++) {
+ kfree(hist_data->field_var_hists[i]->cmd);
+ kfree(hist_data->field_var_hists[i]);
+ }
+}
+
static void destroy_hist_data(struct hist_trigger_data *hist_data)
{
if (!hist_data)
@@ -2938,6 +3446,8 @@ static void destroy_hist_data(struct his
tracing_map_destroy(hist_data->map);
destroy_actions(hist_data);
+ destroy_field_vars(hist_data);
+ destroy_field_var_hists(hist_data);
kfree(hist_data);
}
@@ -3074,6 +3584,8 @@ static void hist_trigger_elt_update(stru
tracing_map_set_var(elt, var_idx, hist_val);
}
}
+
+ update_field_vars(hist_data, elt, rbe, rec);
}
static inline void add_to_key(char *compound_key, void *key,
@@ -3518,6 +4030,21 @@ static int event_hist_trigger_init(struc
return 0;
}
+static void unregister_field_var_hists(struct hist_trigger_data *hist_data)
+{
+ struct trace_event_file *file;
+ unsigned int i;
+ char *cmd;
+ int ret;
+
+ for (i = 0; i < hist_data->n_field_var_hists; i++) {
+ file = hist_data->field_var_hists[i]->hist_data->event_file;
+ cmd = hist_data->field_var_hists[i]->cmd;
+ ret = event_hist_trigger_func(&trigger_hist_cmd, file,
+ "!hist", "hist", cmd);
+ }
+}
+
static void event_hist_trigger_free(struct event_trigger_ops *ops,
struct event_trigger_data *data)
{
@@ -3535,6 +4062,8 @@ static void event_hist_trigger_free(stru
remove_hist_vars(hist_data);
+ unregister_field_var_hists(hist_data);
+
destroy_hist_data(hist_data);
}
}

View File

@ -0,0 +1,688 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:52:00 -0600
Subject: [PATCH 34/48] tracing: Add 'onmatch' hist trigger action support
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Add an 'onmatch(matching.event).<synthetic_event_name>(param list)'
hist trigger action which is invoked with the set of variables or
event fields named in the 'param list'. The result is the generation
of a synthetic event that consists of the values contained in those
variables and/or fields at the time the invoking event was hit.
As an example the below defines a simple synthetic event using a
variable defined on the sched_wakeup_new event, and shows the event
definition with unresolved fields, since the sched_wakeup_new event
with the testpid variable hasn't been defined yet:
# echo 'wakeup_new_test pid_t pid; int prio' >> \
/sys/kernel/debug/tracing/synthetic_events
# cat /sys/kernel/debug/tracing/synthetic_events
wakeup_new_test pid_t pid; int prio
The following hist trigger both defines a testpid variable and
specifies an onmatch() trace action that uses that variable along with
a non-variable field to generate a wakeup_new_test synthetic event
whenever a sched_wakeup_new event occurs, which because of the 'if
comm == "cyclictest"' filter only happens when the executable is
cyclictest:
# echo 'hist:testpid=pid:keys=$testpid:\
onmatch(sched.sched_wakeup_new).wakeup_new_test($testpid, prio) \
if comm=="cyclictest"' >> \
/sys/kernel/debug/tracing/events/sched/sched_wakeup_new/trigger
Creating and displaying a histogram based on those events is now just
a matter of using the fields and new synthetic event in the
tracing/events/synthetic directory, as usual:
# echo 'hist:keys=pid,prio:sort=pid,prio' >> \
/sys/kernel/debug/tracing/events/synthetic/wakeup_new_test/trigger
Link: http://lkml.kernel.org/r/8c2a574bcb7530c876629c901ecd23911b14afe8.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Rajvi Jingar <rajvi.jingar@intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit ea82307e63ec125d8612d8cedd2618669f674226)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 488 +++++++++++++++++++++++++++++++++++++--
1 file changed, 475 insertions(+), 13 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -285,6 +285,8 @@ struct hist_trigger_data {
struct action_data *actions[HIST_ACTIONS_MAX];
unsigned int n_actions;
+ struct hist_field *synth_var_refs[SYNTH_FIELDS_MAX];
+ unsigned int n_synth_var_refs;
struct field_var *field_vars[SYNTH_FIELDS_MAX];
unsigned int n_field_vars;
unsigned int n_field_var_str;
@@ -321,7 +323,18 @@ typedef void (*action_fn_t) (struct hist
struct action_data {
action_fn_t fn;
- unsigned int var_ref_idx;
+ unsigned int n_params;
+ char *params[SYNTH_FIELDS_MAX];
+
+ union {
+ struct {
+ unsigned int var_ref_idx;
+ char *match_event;
+ char *match_event_system;
+ char *synth_event_name;
+ struct synth_event *synth_event;
+ } onmatch;
+ };
};
static LIST_HEAD(synth_event_list);
@@ -887,6 +900,21 @@ static struct synth_event *alloc_synth_e
return event;
}
+static void action_trace(struct hist_trigger_data *hist_data,
+ struct tracing_map_elt *elt, void *rec,
+ struct ring_buffer_event *rbe,
+ struct action_data *data, u64 *var_ref_vals)
+{
+ struct synth_event *event = data->onmatch.synth_event;
+
+ trace_synth(event, var_ref_vals, data->onmatch.var_ref_idx);
+}
+
+struct hist_var_data {
+ struct list_head list;
+ struct hist_trigger_data *hist_data;
+};
+
static void add_or_delete_synth_event(struct synth_event *event, int delete)
{
if (delete)
@@ -1124,11 +1152,6 @@ static u64 hist_field_timestamp(struct h
return ts;
}
-struct hist_var_data {
- struct list_head list;
- struct hist_trigger_data *hist_data;
-};
-
static struct hist_field *
check_field_for_var_ref(struct hist_field *hist_field,
struct hist_trigger_data *var_data,
@@ -1194,6 +1217,14 @@ static struct hist_field *find_var_ref(s
return found;
}
+ for (i = 0; i < hist_data->n_synth_var_refs; i++) {
+ hist_field = hist_data->synth_var_refs[i];
+ found = check_field_for_var_refs(hist_data, hist_field,
+ var_data, var_idx, 0);
+ if (found)
+ return found;
+ }
+
return found;
}
@@ -1422,6 +1453,37 @@ static struct hist_field *find_file_var(
return NULL;
}
+static struct hist_field *
+find_match_var(struct hist_trigger_data *hist_data, char *var_name)
+{
+ struct trace_array *tr = hist_data->event_file->tr;
+ struct hist_field *hist_field, *found = NULL;
+ struct trace_event_file *file;
+ unsigned int i;
+
+ for (i = 0; i < hist_data->n_actions; i++) {
+ struct action_data *data = hist_data->actions[i];
+
+ if (data->fn == action_trace) {
+ char *system = data->onmatch.match_event_system;
+ char *event_name = data->onmatch.match_event;
+
+ file = find_var_file(tr, system, event_name, var_name);
+ if (!file)
+ continue;
+ hist_field = find_file_var(file, var_name);
+ if (hist_field) {
+ if (found) {
+ return ERR_PTR(-EINVAL);
+ }
+
+ found = hist_field;
+ }
+ }
+ }
+ return found;
+}
+
static struct hist_field *find_event_var(struct hist_trigger_data *hist_data,
char *system,
char *event_name,
@@ -1431,6 +1493,14 @@ static struct hist_field *find_event_var
struct hist_field *hist_field = NULL;
struct trace_event_file *file;
+ if (!system || !event_name) {
+ hist_field = find_match_var(hist_data, var_name);
+ if (IS_ERR(hist_field))
+ return NULL;
+ if (hist_field)
+ return hist_field;
+ }
+
file = find_var_file(tr, system, event_name, var_name);
if (!file)
return NULL;
@@ -1622,11 +1692,21 @@ static void destroy_hist_trigger_attrs(s
static int parse_action(char *str, struct hist_trigger_attrs *attrs)
{
- int ret = 0;
+ int ret = -EINVAL;
if (attrs->n_actions >= HIST_ACTIONS_MAX)
return ret;
+ if ((strncmp(str, "onmatch(", strlen("onmatch(")) == 0)) {
+ attrs->action_str[attrs->n_actions] = kstrdup(str, GFP_KERNEL);
+ if (!attrs->action_str[attrs->n_actions]) {
+ ret = -ENOMEM;
+ return ret;
+ }
+ attrs->n_actions++;
+ ret = 0;
+ }
+
return ret;
}
@@ -2635,7 +2715,7 @@ find_synthetic_field_var(struct hist_tri
*
* Return: The variable created for the field.
*/
-struct hist_field *
+static struct hist_field *
create_field_var_hist(struct hist_trigger_data *target_hist_data,
char *subsys_name, char *event_name, char *field_name)
{
@@ -2748,7 +2828,7 @@ create_field_var_hist(struct hist_trigge
return event_var;
}
-struct hist_field *
+static struct hist_field *
find_target_event_var(struct hist_trigger_data *hist_data,
char *subsys_name, char *event_name, char *var_name)
{
@@ -2919,7 +2999,7 @@ static struct field_var *create_field_va
*
* Return: The variable created for the field.
*/
-struct field_var *
+static struct field_var *
create_target_field_var(struct hist_trigger_data *target_hist_data,
char *subsys_name, char *event_name, char *var_name)
{
@@ -2943,6 +3023,27 @@ create_target_field_var(struct hist_trig
return create_field_var(target_hist_data, file, var_name);
}
+static void onmatch_destroy(struct action_data *data)
+{
+ unsigned int i;
+
+ mutex_lock(&synth_event_mutex);
+
+ kfree(data->onmatch.match_event);
+ kfree(data->onmatch.match_event_system);
+ kfree(data->onmatch.synth_event_name);
+
+ for (i = 0; i < data->n_params; i++)
+ kfree(data->params[i]);
+
+ if (data->onmatch.synth_event)
+ data->onmatch.synth_event->ref--;
+
+ kfree(data);
+
+ mutex_unlock(&synth_event_mutex);
+}
+
static void destroy_field_var(struct field_var *field_var)
{
if (!field_var)
@@ -2962,8 +3063,8 @@ static void destroy_field_vars(struct hi
destroy_field_var(hist_data->field_vars[i]);
}
-void save_field_var(struct hist_trigger_data *hist_data,
- struct field_var *field_var)
+static void save_field_var(struct hist_trigger_data *hist_data,
+ struct field_var *field_var)
{
hist_data->field_vars[hist_data->n_field_vars++] = field_var;
@@ -2971,6 +3072,304 @@ void save_field_var(struct hist_trigger_
hist_data->n_field_var_str++;
}
+
+static void destroy_synth_var_refs(struct hist_trigger_data *hist_data)
+{
+ unsigned int i;
+
+ for (i = 0; i < hist_data->n_synth_var_refs; i++)
+ destroy_hist_field(hist_data->synth_var_refs[i], 0);
+}
+
+static void save_synth_var_ref(struct hist_trigger_data *hist_data,
+ struct hist_field *var_ref)
+{
+ hist_data->synth_var_refs[hist_data->n_synth_var_refs++] = var_ref;
+
+ hist_data->var_refs[hist_data->n_var_refs] = var_ref;
+ var_ref->var_ref_idx = hist_data->n_var_refs++;
+}
+
+static int check_synth_field(struct synth_event *event,
+ struct hist_field *hist_field,
+ unsigned int field_pos)
+{
+ struct synth_field *field;
+
+ if (field_pos >= event->n_fields)
+ return -EINVAL;
+
+ field = event->fields[field_pos];
+
+ if (strcmp(field->type, hist_field->type) != 0)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int parse_action_params(char *params, struct action_data *data)
+{
+ char *param, *saved_param;
+ int ret = 0;
+
+ while (params) {
+ if (data->n_params >= SYNTH_FIELDS_MAX)
+ goto out;
+
+ param = strsep(&params, ",");
+ if (!param) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ param = strstrip(param);
+ if (strlen(param) < 2) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ saved_param = kstrdup(param, GFP_KERNEL);
+ if (!saved_param) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ data->params[data->n_params++] = saved_param;
+ }
+ out:
+ return ret;
+}
+
+static struct hist_field *
+onmatch_find_var(struct hist_trigger_data *hist_data, struct action_data *data,
+ char *system, char *event, char *var)
+{
+ struct hist_field *hist_field;
+
+ var++; /* skip '$' */
+
+ hist_field = find_target_event_var(hist_data, system, event, var);
+ if (!hist_field) {
+ if (!system) {
+ system = data->onmatch.match_event_system;
+ event = data->onmatch.match_event;
+ }
+
+ hist_field = find_event_var(hist_data, system, event, var);
+ }
+
+ return hist_field;
+}
+
+static struct hist_field *
+onmatch_create_field_var(struct hist_trigger_data *hist_data,
+ struct action_data *data, char *system,
+ char *event, char *var)
+{
+ struct hist_field *hist_field = NULL;
+ struct field_var *field_var;
+
+ /*
+ * First try to create a field var on the target event (the
+ * currently being defined). This will create a variable for
+ * unqualified fields on the target event, or if qualified,
+ * target fields that have qualified names matching the target.
+ */
+ field_var = create_target_field_var(hist_data, system, event, var);
+
+ if (field_var && !IS_ERR(field_var)) {
+ save_field_var(hist_data, field_var);
+ hist_field = field_var->var;
+ } else {
+ field_var = NULL;
+ /*
+ * If no explicit system.event is specfied, default to
+ * looking for fields on the onmatch(system.event.xxx)
+ * event.
+ */
+ if (!system) {
+ system = data->onmatch.match_event_system;
+ event = data->onmatch.match_event;
+ }
+
+ /*
+ * At this point, we're looking at a field on another
+ * event. Because we can't modify a hist trigger on
+ * another event to add a variable for a field, we need
+ * to create a new trigger on that event and create the
+ * variable at the same time.
+ */
+ hist_field = create_field_var_hist(hist_data, system, event, var);
+ if (IS_ERR(hist_field))
+ goto free;
+ }
+ out:
+ return hist_field;
+ free:
+ destroy_field_var(field_var);
+ hist_field = NULL;
+ goto out;
+}
+
+static int onmatch_create(struct hist_trigger_data *hist_data,
+ struct trace_event_file *file,
+ struct action_data *data)
+{
+ char *event_name, *param, *system = NULL;
+ struct hist_field *hist_field, *var_ref;
+ unsigned int i, var_ref_idx;
+ unsigned int field_pos = 0;
+ struct synth_event *event;
+ int ret = 0;
+
+ mutex_lock(&synth_event_mutex);
+ event = find_synth_event(data->onmatch.synth_event_name);
+ if (!event) {
+ mutex_unlock(&synth_event_mutex);
+ return -EINVAL;
+ }
+ event->ref++;
+ mutex_unlock(&synth_event_mutex);
+
+ var_ref_idx = hist_data->n_var_refs;
+
+ for (i = 0; i < data->n_params; i++) {
+ char *p;
+
+ p = param = kstrdup(data->params[i], GFP_KERNEL);
+ if (!param) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ system = strsep(&param, ".");
+ if (!param) {
+ param = (char *)system;
+ system = event_name = NULL;
+ } else {
+ event_name = strsep(&param, ".");
+ if (!param) {
+ kfree(p);
+ ret = -EINVAL;
+ goto err;
+ }
+ }
+
+ if (param[0] == '$')
+ hist_field = onmatch_find_var(hist_data, data, system,
+ event_name, param);
+ else
+ hist_field = onmatch_create_field_var(hist_data, data,
+ system,
+ event_name,
+ param);
+
+ if (!hist_field) {
+ kfree(p);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (check_synth_field(event, hist_field, field_pos) == 0) {
+ var_ref = create_var_ref(hist_field, system, event_name);
+ if (!var_ref) {
+ kfree(p);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ save_synth_var_ref(hist_data, var_ref);
+ field_pos++;
+ kfree(p);
+ continue;
+ }
+
+ kfree(p);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (field_pos != event->n_fields) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ data->fn = action_trace;
+ data->onmatch.synth_event = event;
+ data->onmatch.var_ref_idx = var_ref_idx;
+ out:
+ return ret;
+ err:
+ mutex_lock(&synth_event_mutex);
+ event->ref--;
+ mutex_unlock(&synth_event_mutex);
+
+ goto out;
+}
+
+static struct action_data *onmatch_parse(struct trace_array *tr, char *str)
+{
+ char *match_event, *match_event_system;
+ char *synth_event_name, *params;
+ struct action_data *data;
+ int ret = -EINVAL;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+
+ match_event = strsep(&str, ")");
+ if (!match_event || !str)
+ goto free;
+
+ match_event_system = strsep(&match_event, ".");
+ if (!match_event)
+ goto free;
+
+ if (IS_ERR(event_file(tr, match_event_system, match_event)))
+ goto free;
+
+ data->onmatch.match_event = kstrdup(match_event, GFP_KERNEL);
+ if (!data->onmatch.match_event) {
+ ret = -ENOMEM;
+ goto free;
+ }
+
+ data->onmatch.match_event_system = kstrdup(match_event_system, GFP_KERNEL);
+ if (!data->onmatch.match_event_system) {
+ ret = -ENOMEM;
+ goto free;
+ }
+
+ strsep(&str, ".");
+ if (!str)
+ goto free;
+
+ synth_event_name = strsep(&str, "(");
+ if (!synth_event_name || !str)
+ goto free;
+
+ data->onmatch.synth_event_name = kstrdup(synth_event_name, GFP_KERNEL);
+ if (!data->onmatch.synth_event_name) {
+ ret = -ENOMEM;
+ goto free;
+ }
+
+ params = strsep(&str, ")");
+ if (!params || !str || (str && strlen(str)))
+ goto free;
+
+ ret = parse_action_params(params, data);
+ if (ret)
+ goto free;
+ out:
+ return data;
+ free:
+ onmatch_destroy(data);
+ data = ERR_PTR(ret);
+ goto out;
+}
+
static int create_hitcount_val(struct hist_trigger_data *hist_data)
{
hist_data->fields[HITCOUNT_IDX] =
@@ -3395,18 +3794,39 @@ static void destroy_actions(struct hist_
for (i = 0; i < hist_data->n_actions; i++) {
struct action_data *data = hist_data->actions[i];
- kfree(data);
+ if (data->fn == action_trace)
+ onmatch_destroy(data);
+ else
+ kfree(data);
}
}
static int parse_actions(struct hist_trigger_data *hist_data)
{
+ struct trace_array *tr = hist_data->event_file->tr;
+ struct action_data *data;
unsigned int i;
int ret = 0;
char *str;
for (i = 0; i < hist_data->attrs->n_actions; i++) {
str = hist_data->attrs->action_str[i];
+
+ if (strncmp(str, "onmatch(", strlen("onmatch(")) == 0) {
+ char *action_str = str + strlen("onmatch(");
+
+ data = onmatch_parse(tr, action_str);
+ if (IS_ERR(data)) {
+ ret = PTR_ERR(data);
+ break;
+ }
+ data->fn = action_trace;
+ } else {
+ ret = -EINVAL;
+ break;
+ }
+
+ hist_data->actions[hist_data->n_actions++] = data;
}
return ret;
@@ -3421,11 +3841,50 @@ static int create_actions(struct hist_tr
for (i = 0; i < hist_data->attrs->n_actions; i++) {
data = hist_data->actions[i];
+
+ if (data->fn == action_trace) {
+ ret = onmatch_create(hist_data, file, data);
+ if (ret)
+ return ret;
+ }
}
return ret;
}
+static void print_onmatch_spec(struct seq_file *m,
+ struct hist_trigger_data *hist_data,
+ struct action_data *data)
+{
+ unsigned int i;
+
+ seq_printf(m, ":onmatch(%s.%s).", data->onmatch.match_event_system,
+ data->onmatch.match_event);
+
+ seq_printf(m, "%s(", data->onmatch.synth_event->name);
+
+ for (i = 0; i < data->n_params; i++) {
+ if (i)
+ seq_puts(m, ",");
+ seq_printf(m, "%s", data->params[i]);
+ }
+
+ seq_puts(m, ")");
+}
+
+static void print_actions_spec(struct seq_file *m,
+ struct hist_trigger_data *hist_data)
+{
+ unsigned int i;
+
+ for (i = 0; i < hist_data->n_actions; i++) {
+ struct action_data *data = hist_data->actions[i];
+
+ if (data->fn == action_trace)
+ print_onmatch_spec(m, hist_data, data);
+ }
+}
+
static void destroy_field_var_hists(struct hist_trigger_data *hist_data)
{
unsigned int i;
@@ -3448,6 +3907,7 @@ static void destroy_hist_data(struct his
destroy_actions(hist_data);
destroy_field_vars(hist_data);
destroy_field_var_hists(hist_data);
+ destroy_synth_var_refs(hist_data);
kfree(hist_data);
}
@@ -4004,6 +4464,8 @@ static int event_hist_trigger_print(stru
}
seq_printf(m, ":size=%u", (1 << hist_data->map->map_bits));
+ print_actions_spec(m, hist_data);
+
if (data->filter_str)
seq_printf(m, " if %s", data->filter_str);

View File

@ -0,0 +1,487 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:52:01 -0600
Subject: [PATCH 35/48] tracing: Add 'onmax' hist trigger action support
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Add an 'onmax(var).save(field,...)' hist trigger action which is
invoked whenever an event exceeds the current maximum.
The end result is that the trace event fields or variables specified
as the onmax.save() params will be saved if 'var' exceeds the current
maximum for that hist trigger entry. This allows context from the
event that exhibited the new maximum to be saved for later reference.
When the histogram is displayed, additional fields displaying the
saved values will be printed.
As an example the below defines a couple of hist triggers, one for
sched_wakeup and another for sched_switch, keyed on pid. Whenever a
sched_wakeup occurs, the timestamp is saved in the entry corresponding
to the current pid, and when the scheduler switches back to that pid,
the timestamp difference is calculated. If the resulting latency
exceeds the current maximum latency, the specified save() values are
saved:
# echo 'hist:keys=pid:ts0=common_timestamp.usecs \
if comm=="cyclictest"' >> \
/sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger
# echo 'hist:keys=next_pid:\
wakeup_lat=common_timestamp.usecs-$ts0:\
onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) \
if next_comm=="cyclictest"' >> \
/sys/kernel/debug/tracing/events/sched/sched_switch/trigger
When the histogram is displayed, the max value and the saved values
corresponding to the max are displayed following the rest of the
fields:
# cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist
{ next_pid: 3728 } hitcount: 199 \
max: 123 next_comm: cyclictest prev_pid: 0 \
prev_prio: 120 prev_comm: swapper/3
{ next_pid: 3730 } hitcount: 1321 \
max: 15 next_comm: cyclictest prev_pid: 0 \
prev_prio: 120 prev_comm: swapper/1
{ next_pid: 3729 } hitcount: 1973\
max: 25 next_comm: cyclictest prev_pid: 0 \
prev_prio: 120 prev_comm: swapper/0
Totals:
Hits: 3493
Entries: 3
Dropped: 0
Link: http://lkml.kernel.org/r/006907f71b1e839bb059337ec3c496f84fcb71de.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 4e30c922f0a19496ff424edd5c473666e1690601)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 331 ++++++++++++++++++++++++++++++++++-----
1 file changed, 296 insertions(+), 35 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -292,6 +292,10 @@ struct hist_trigger_data {
unsigned int n_field_var_str;
struct field_var_hist *field_var_hists[SYNTH_FIELDS_MAX];
unsigned int n_field_var_hists;
+
+ struct field_var *max_vars[SYNTH_FIELDS_MAX];
+ unsigned int n_max_vars;
+ unsigned int n_max_var_str;
};
struct synth_field {
@@ -334,6 +338,14 @@ struct action_data {
char *synth_event_name;
struct synth_event *synth_event;
} onmatch;
+
+ struct {
+ char *var_str;
+ char *fn_name;
+ unsigned int max_var_ref_idx;
+ struct hist_field *max_var;
+ struct hist_field *var;
+ } onmax;
};
};
@@ -1697,7 +1709,8 @@ static int parse_action(char *str, struc
if (attrs->n_actions >= HIST_ACTIONS_MAX)
return ret;
- if ((strncmp(str, "onmatch(", strlen("onmatch(")) == 0)) {
+ if ((strncmp(str, "onmatch(", strlen("onmatch(")) == 0) ||
+ (strncmp(str, "onmax(", strlen("onmax(")) == 0)) {
attrs->action_str[attrs->n_actions] = kstrdup(str, GFP_KERNEL);
if (!attrs->action_str[attrs->n_actions]) {
ret = -ENOMEM;
@@ -1869,7 +1882,7 @@ static int hist_trigger_elt_data_alloc(s
}
}
- n_str = hist_data->n_field_var_str;
+ n_str = hist_data->n_field_var_str + hist_data->n_max_var_str;
size = STR_VAR_LEN_MAX;
@@ -2894,6 +2907,15 @@ static void update_field_vars(struct his
hist_data->n_field_vars, 0);
}
+static void update_max_vars(struct hist_trigger_data *hist_data,
+ struct tracing_map_elt *elt,
+ struct ring_buffer_event *rbe,
+ void *rec)
+{
+ __update_field_vars(elt, rbe, rec, hist_data->max_vars,
+ hist_data->n_max_vars, hist_data->n_field_var_str);
+}
+
static struct hist_field *create_var(struct hist_trigger_data *hist_data,
struct trace_event_file *file,
char *name, int size, const char *type)
@@ -3023,6 +3045,227 @@ create_target_field_var(struct hist_trig
return create_field_var(target_hist_data, file, var_name);
}
+static void onmax_print(struct seq_file *m,
+ struct hist_trigger_data *hist_data,
+ struct tracing_map_elt *elt,
+ struct action_data *data)
+{
+ unsigned int i, save_var_idx, max_idx = data->onmax.max_var->var.idx;
+
+ seq_printf(m, "\n\tmax: %10llu", tracing_map_read_var(elt, max_idx));
+
+ for (i = 0; i < hist_data->n_max_vars; i++) {
+ struct hist_field *save_val = hist_data->max_vars[i]->val;
+ struct hist_field *save_var = hist_data->max_vars[i]->var;
+ u64 val;
+
+ save_var_idx = save_var->var.idx;
+
+ val = tracing_map_read_var(elt, save_var_idx);
+
+ if (save_val->flags & HIST_FIELD_FL_STRING) {
+ seq_printf(m, " %s: %-32s", save_var->var.name,
+ (char *)(uintptr_t)(val));
+ } else
+ seq_printf(m, " %s: %10llu", save_var->var.name, val);
+ }
+}
+
+static void onmax_save(struct hist_trigger_data *hist_data,
+ struct tracing_map_elt *elt, void *rec,
+ struct ring_buffer_event *rbe,
+ struct action_data *data, u64 *var_ref_vals)
+{
+ unsigned int max_idx = data->onmax.max_var->var.idx;
+ unsigned int max_var_ref_idx = data->onmax.max_var_ref_idx;
+
+ u64 var_val, max_val;
+
+ var_val = var_ref_vals[max_var_ref_idx];
+ max_val = tracing_map_read_var(elt, max_idx);
+
+ if (var_val <= max_val)
+ return;
+
+ tracing_map_set_var(elt, max_idx, var_val);
+
+ update_max_vars(hist_data, elt, rbe, rec);
+}
+
+static void onmax_destroy(struct action_data *data)
+{
+ unsigned int i;
+
+ destroy_hist_field(data->onmax.max_var, 0);
+ destroy_hist_field(data->onmax.var, 0);
+
+ kfree(data->onmax.var_str);
+ kfree(data->onmax.fn_name);
+
+ for (i = 0; i < data->n_params; i++)
+ kfree(data->params[i]);
+
+ kfree(data);
+}
+
+static int onmax_create(struct hist_trigger_data *hist_data,
+ struct action_data *data)
+{
+ struct trace_event_file *file = hist_data->event_file;
+ struct hist_field *var_field, *ref_field, *max_var;
+ unsigned int var_ref_idx = hist_data->n_var_refs;
+ struct field_var *field_var;
+ char *onmax_var_str, *param;
+ unsigned long flags;
+ unsigned int i;
+ int ret = 0;
+
+ onmax_var_str = data->onmax.var_str;
+ if (onmax_var_str[0] != '$')
+ return -EINVAL;
+ onmax_var_str++;
+
+ var_field = find_target_event_var(hist_data, NULL, NULL, onmax_var_str);
+ if (!var_field)
+ return -EINVAL;
+
+ flags = HIST_FIELD_FL_VAR_REF;
+ ref_field = create_hist_field(hist_data, NULL, flags, NULL);
+ if (!ref_field)
+ return -ENOMEM;
+
+ if (init_var_ref(ref_field, var_field, NULL, NULL)) {
+ destroy_hist_field(ref_field, 0);
+ ret = -ENOMEM;
+ goto out;
+ }
+ hist_data->var_refs[hist_data->n_var_refs] = ref_field;
+ ref_field->var_ref_idx = hist_data->n_var_refs++;
+ data->onmax.var = ref_field;
+
+ data->fn = onmax_save;
+ data->onmax.max_var_ref_idx = var_ref_idx;
+ max_var = create_var(hist_data, file, "max", sizeof(u64), "u64");
+ if (IS_ERR(max_var)) {
+ ret = PTR_ERR(max_var);
+ goto out;
+ }
+ data->onmax.max_var = max_var;
+
+ for (i = 0; i < data->n_params; i++) {
+ param = kstrdup(data->params[i], GFP_KERNEL);
+ if (!param) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ field_var = create_target_field_var(hist_data, NULL, NULL, param);
+ if (IS_ERR(field_var)) {
+ ret = PTR_ERR(field_var);
+ kfree(param);
+ goto out;
+ }
+
+ hist_data->max_vars[hist_data->n_max_vars++] = field_var;
+ if (field_var->val->flags & HIST_FIELD_FL_STRING)
+ hist_data->n_max_var_str++;
+
+ kfree(param);
+ }
+ out:
+ return ret;
+}
+
+static int parse_action_params(char *params, struct action_data *data)
+{
+ char *param, *saved_param;
+ int ret = 0;
+
+ while (params) {
+ if (data->n_params >= SYNTH_FIELDS_MAX)
+ goto out;
+
+ param = strsep(&params, ",");
+ if (!param) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ param = strstrip(param);
+ if (strlen(param) < 2) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ saved_param = kstrdup(param, GFP_KERNEL);
+ if (!saved_param) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ data->params[data->n_params++] = saved_param;
+ }
+ out:
+ return ret;
+}
+
+static struct action_data *onmax_parse(char *str)
+{
+ char *onmax_fn_name, *onmax_var_str;
+ struct action_data *data;
+ int ret = -EINVAL;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+
+ onmax_var_str = strsep(&str, ")");
+ if (!onmax_var_str || !str) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ data->onmax.var_str = kstrdup(onmax_var_str, GFP_KERNEL);
+ if (!data->onmax.var_str) {
+ ret = -ENOMEM;
+ goto free;
+ }
+
+ strsep(&str, ".");
+ if (!str)
+ goto free;
+
+ onmax_fn_name = strsep(&str, "(");
+ if (!onmax_fn_name || !str)
+ goto free;
+
+ if (strncmp(onmax_fn_name, "save", strlen("save")) == 0) {
+ char *params = strsep(&str, ")");
+
+ if (!params) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ ret = parse_action_params(params, data);
+ if (ret)
+ goto free;
+ } else
+ goto free;
+
+ data->onmax.fn_name = kstrdup(onmax_fn_name, GFP_KERNEL);
+ if (!data->onmax.fn_name) {
+ ret = -ENOMEM;
+ goto free;
+ }
+ out:
+ return data;
+ free:
+ onmax_destroy(data);
+ data = ERR_PTR(ret);
+ goto out;
+}
+
static void onmatch_destroy(struct action_data *data)
{
unsigned int i;
@@ -3107,39 +3350,6 @@ static int check_synth_field(struct synt
return 0;
}
-static int parse_action_params(char *params, struct action_data *data)
-{
- char *param, *saved_param;
- int ret = 0;
-
- while (params) {
- if (data->n_params >= SYNTH_FIELDS_MAX)
- goto out;
-
- param = strsep(&params, ",");
- if (!param) {
- ret = -EINVAL;
- goto out;
- }
-
- param = strstrip(param);
- if (strlen(param) < 2) {
- ret = -EINVAL;
- goto out;
- }
-
- saved_param = kstrdup(param, GFP_KERNEL);
- if (!saved_param) {
- ret = -ENOMEM;
- goto out;
- }
-
- data->params[data->n_params++] = saved_param;
- }
- out:
- return ret;
-}
-
static struct hist_field *
onmatch_find_var(struct hist_trigger_data *hist_data, struct action_data *data,
char *system, char *event, char *var)
@@ -3796,6 +4006,8 @@ static void destroy_actions(struct hist_
if (data->fn == action_trace)
onmatch_destroy(data);
+ else if (data->fn == onmax_save)
+ onmax_destroy(data);
else
kfree(data);
}
@@ -3821,6 +4033,15 @@ static int parse_actions(struct hist_tri
break;
}
data->fn = action_trace;
+ } else if (strncmp(str, "onmax(", strlen("onmax(")) == 0) {
+ char *action_str = str + strlen("onmax(");
+
+ data = onmax_parse(action_str);
+ if (IS_ERR(data)) {
+ ret = PTR_ERR(data);
+ break;
+ }
+ data->fn = onmax_save;
} else {
ret = -EINVAL;
break;
@@ -3846,12 +4067,48 @@ static int create_actions(struct hist_tr
ret = onmatch_create(hist_data, file, data);
if (ret)
return ret;
+ } else if (data->fn == onmax_save) {
+ ret = onmax_create(hist_data, data);
+ if (ret)
+ return ret;
}
}
return ret;
}
+static void print_actions(struct seq_file *m,
+ struct hist_trigger_data *hist_data,
+ struct tracing_map_elt *elt)
+{
+ unsigned int i;
+
+ for (i = 0; i < hist_data->n_actions; i++) {
+ struct action_data *data = hist_data->actions[i];
+
+ if (data->fn == onmax_save)
+ onmax_print(m, hist_data, elt, data);
+ }
+}
+
+static void print_onmax_spec(struct seq_file *m,
+ struct hist_trigger_data *hist_data,
+ struct action_data *data)
+{
+ unsigned int i;
+
+ seq_puts(m, ":onmax(");
+ seq_printf(m, "%s", data->onmax.var_str);
+ seq_printf(m, ").%s(", data->onmax.fn_name);
+
+ for (i = 0; i < hist_data->n_max_vars; i++) {
+ seq_printf(m, "%s", hist_data->max_vars[i]->var->var.name);
+ if (i < hist_data->n_max_vars - 1)
+ seq_puts(m, ",");
+ }
+ seq_puts(m, ")");
+}
+
static void print_onmatch_spec(struct seq_file *m,
struct hist_trigger_data *hist_data,
struct action_data *data)
@@ -3882,6 +4139,8 @@ static void print_actions_spec(struct se
if (data->fn == action_trace)
print_onmatch_spec(m, hist_data, data);
+ else if (data->fn == onmax_save)
+ print_onmax_spec(m, hist_data, data);
}
}
@@ -4263,6 +4522,8 @@ hist_trigger_entry_print(struct seq_file
}
}
+ print_actions(m, hist_data, elt);
+
seq_puts(m, "\n");
}

View File

@ -0,0 +1,76 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:52:02 -0600
Subject: [PATCH 36/48] tracing: Allow whitespace to surround hist trigger
filter
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
The existing code only allows for one space before and after the 'if'
specifying the filter for a hist trigger. Add code to make that more
permissive as far as whitespace goes. Specifically, we want to allow
spaces in the trigger itself now that we have additional syntax
(onmatch/onmax) where spaces are more natural e.g. spaces after commas
in param lists.
Link: http://lkml.kernel.org/r/1053090c3c308d4f431accdeb59dff4b511d4554.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit ab257ec0f8eb50c58fafd50b1cb5352553f31ccf)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 37 ++++++++++++++++++++++++++++++++-----
1 file changed, 32 insertions(+), 5 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -5162,7 +5162,7 @@ static int event_hist_trigger_func(struc
struct synth_event *se;
const char *se_name;
bool remove = false;
- char *trigger;
+ char *trigger, *p;
int ret = 0;
if (!param)
@@ -5171,10 +5171,37 @@ static int event_hist_trigger_func(struc
if (glob[0] == '!')
remove = true;
- /* separate the trigger from the filter (k:v [if filter]) */
- trigger = strsep(&param, " \t");
- if (!trigger)
- return -EINVAL;
+ /*
+ * separate the trigger from the filter (k:v [if filter])
+ * allowing for whitespace in the trigger
+ */
+ p = trigger = param;
+ do {
+ p = strstr(p, "if");
+ if (!p)
+ break;
+ if (p == param)
+ return -EINVAL;
+ if (*(p - 1) != ' ' && *(p - 1) != '\t') {
+ p++;
+ continue;
+ }
+ if (p >= param + strlen(param) - strlen("if") - 1)
+ return -EINVAL;
+ if (*(p + strlen("if")) != ' ' && *(p + strlen("if")) != '\t') {
+ p++;
+ continue;
+ }
+ break;
+ } while (p);
+
+ if (!p)
+ param = NULL;
+ else {
+ *(p - 1) = '\0';
+ param = strstrip(p);
+ trigger = strstrip(trigger);
+ }
attrs = parse_hist_trigger_attrs(trigger);
if (IS_ERR(attrs))

View File

@ -0,0 +1,115 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:52:03 -0600
Subject: [PATCH 37/48] tracing: Add cpu field for hist triggers
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
A common key to use in a histogram is the cpuid - add a new cpu
'synthetic' field named 'cpu' for that purpose.
Link: http://lkml.kernel.org/r/89537645bfc957e0d76e2cacf5f0ada88691a6cc.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 4bfaa88f0e0e98e706d57647452e4d37afd78d00)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
Documentation/trace/histogram.txt | 15 +++++++++++++++
kernel/trace/trace_events_hist.c | 28 +++++++++++++++++++++++++++-
2 files changed, 42 insertions(+), 1 deletion(-)
--- a/Documentation/trace/histogram.txt
+++ b/Documentation/trace/histogram.txt
@@ -172,6 +172,21 @@
The examples below provide a more concrete illustration of the
concepts and typical usage patterns discussed above.
+ 'special' event fields
+ ------------------------
+
+ There are a number of 'special event fields' available for use as
+ keys or values in a hist trigger. These look like and behave as if
+ they were actual event fields, but aren't really part of the event's
+ field definition or format file. They are however available for any
+ event, and can be used anywhere an actual event field could be.
+ They are:
+
+ common_timestamp u64 - timestamp (from ring buffer) associated
+ with the event, in nanoseconds. May be
+ modified by .usecs to have timestamps
+ interpreted as microseconds.
+ cpu int - the cpu on which the event occurred.
6.2 'hist' trigger examples
---------------------------
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -227,6 +227,7 @@ enum hist_field_flags {
HIST_FIELD_FL_VAR = 1 << 12,
HIST_FIELD_FL_EXPR = 1 << 13,
HIST_FIELD_FL_VAR_REF = 1 << 14,
+ HIST_FIELD_FL_CPU = 1 << 15,
};
struct var_defs {
@@ -1164,6 +1165,16 @@ static u64 hist_field_timestamp(struct h
return ts;
}
+static u64 hist_field_cpu(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct ring_buffer_event *rbe,
+ void *event)
+{
+ int cpu = smp_processor_id();
+
+ return cpu;
+}
+
static struct hist_field *
check_field_for_var_ref(struct hist_field *hist_field,
struct hist_trigger_data *var_data,
@@ -1602,6 +1613,8 @@ static const char *hist_field_name(struc
field_name = hist_field_name(field->operands[0], ++level);
else if (field->flags & HIST_FIELD_FL_TIMESTAMP)
field_name = "common_timestamp";
+ else if (field->flags & HIST_FIELD_FL_CPU)
+ field_name = "cpu";
else if (field->flags & HIST_FIELD_FL_EXPR ||
field->flags & HIST_FIELD_FL_VAR_REF) {
if (field->system) {
@@ -2109,6 +2122,15 @@ static struct hist_field *create_hist_fi
goto out;
}
+ if (flags & HIST_FIELD_FL_CPU) {
+ hist_field->fn = hist_field_cpu;
+ hist_field->size = sizeof(int);
+ hist_field->type = kstrdup("unsigned int", GFP_KERNEL);
+ if (!hist_field->type)
+ goto free;
+ goto out;
+ }
+
if (WARN_ON_ONCE(!field))
goto out;
@@ -2345,7 +2367,9 @@ parse_field(struct hist_trigger_data *hi
hist_data->enable_timestamps = true;
if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS)
hist_data->attrs->ts_in_usecs = true;
- } else {
+ } else if (strcmp(field_name, "cpu") == 0)
+ *flags |= HIST_FIELD_FL_CPU;
+ else {
field = trace_find_event_field(file->event_call, field_name);
if (!field || !field->size) {
field = ERR_PTR(-EINVAL);
@@ -4619,6 +4643,8 @@ static void hist_field_print(struct seq_
if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP)
seq_puts(m, "common_timestamp");
+ else if (hist_field->flags & HIST_FIELD_FL_CPU)
+ seq_puts(m, "cpu");
else if (field_name) {
if (hist_field->flags & HIST_FIELD_FL_VAR_REF)
seq_putc(m, '$');

View File

@ -0,0 +1,165 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:52:04 -0600
Subject: [PATCH 38/48] tracing: Add hist trigger support for variable
reference aliases
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Add support for alias=$somevar where alias can be used as
onmatch.xxx($alias).
Aliases are a way of creating a new name for an existing variable, for
flexibly in making naming more clear in certain cases. For example in
the below the user perhaps feels that using $new_lat in the synthetic
event invocation is opaque or doesn't fit well stylistically with
previous triggers, so creates an alias of $new_lat named $latency and
uses that in the call instead:
# echo 'hist:keys=next_pid:new_lat=common_timestamp.usecs' >
/sys/kernel/debug/tracing/events/sched/sched_switch/trigger
# echo 'hist:keys=pid:latency=$new_lat:
onmatch(sched.sched_switch).wake2($latency,pid)' >
/sys/kernel/debug/tracing/events/synthetic/wake1/trigger
Link: http://lkml.kernel.org/r/ef20a65d921af3a873a6f1e8c71407c926d5586f.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 53c5a4f99f1a5f6ba304453716da571f3e51bc79)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 74 +++++++++++++++++++++++++++++++++++----
1 file changed, 67 insertions(+), 7 deletions(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -228,6 +228,7 @@ enum hist_field_flags {
HIST_FIELD_FL_EXPR = 1 << 13,
HIST_FIELD_FL_VAR_REF = 1 << 14,
HIST_FIELD_FL_CPU = 1 << 15,
+ HIST_FIELD_FL_ALIAS = 1 << 16,
};
struct var_defs {
@@ -1609,7 +1610,8 @@ static const char *hist_field_name(struc
if (field->field)
field_name = field->field->name;
- else if (field->flags & HIST_FIELD_FL_LOG2)
+ else if (field->flags & HIST_FIELD_FL_LOG2 ||
+ field->flags & HIST_FIELD_FL_ALIAS)
field_name = hist_field_name(field->operands[0], ++level);
else if (field->flags & HIST_FIELD_FL_TIMESTAMP)
field_name = "common_timestamp";
@@ -2080,7 +2082,7 @@ static struct hist_field *create_hist_fi
hist_field->hist_data = hist_data;
- if (flags & HIST_FIELD_FL_EXPR)
+ if (flags & HIST_FIELD_FL_EXPR || flags & HIST_FIELD_FL_ALIAS)
goto out; /* caller will populate */
if (flags & HIST_FIELD_FL_VAR_REF) {
@@ -2217,10 +2219,18 @@ static int init_var_ref(struct hist_fiel
}
}
- ref_field->name = kstrdup(var_field->var.name, GFP_KERNEL);
- if (!ref_field->name) {
- err = -ENOMEM;
- goto free;
+ if (var_field->var.name) {
+ ref_field->name = kstrdup(var_field->var.name, GFP_KERNEL);
+ if (!ref_field->name) {
+ err = -ENOMEM;
+ goto free;
+ }
+ } else if (var_field->name) {
+ ref_field->name = kstrdup(var_field->name, GFP_KERNEL);
+ if (!ref_field->name) {
+ err = -ENOMEM;
+ goto free;
+ }
}
ref_field->type = kstrdup(var_field->type, GFP_KERNEL);
@@ -2382,6 +2392,28 @@ parse_field(struct hist_trigger_data *hi
return field;
}
+static struct hist_field *create_alias(struct hist_trigger_data *hist_data,
+ struct hist_field *var_ref,
+ char *var_name)
+{
+ struct hist_field *alias = NULL;
+ unsigned long flags = HIST_FIELD_FL_ALIAS | HIST_FIELD_FL_VAR;
+
+ alias = create_hist_field(hist_data, NULL, flags, var_name);
+ if (!alias)
+ return NULL;
+
+ alias->fn = var_ref->fn;
+ alias->operands[0] = var_ref;
+
+ if (init_var_ref(alias, var_ref, var_ref->system, var_ref->event_name)) {
+ destroy_hist_field(alias, 0);
+ return NULL;
+ }
+
+ return alias;
+}
+
static struct hist_field *parse_atom(struct hist_trigger_data *hist_data,
struct trace_event_file *file, char *str,
unsigned long *flags, char *var_name)
@@ -2415,6 +2447,13 @@ static struct hist_field *parse_atom(str
if (hist_field) {
hist_data->var_refs[hist_data->n_var_refs] = hist_field;
hist_field->var_ref_idx = hist_data->n_var_refs++;
+ if (var_name) {
+ hist_field = create_alias(hist_data, hist_field, var_name);
+ if (!hist_field) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
return hist_field;
}
} else
@@ -2515,6 +2554,26 @@ static int check_expr_operands(struct hi
unsigned long operand1_flags = operand1->flags;
unsigned long operand2_flags = operand2->flags;
+ if ((operand1_flags & HIST_FIELD_FL_VAR_REF) ||
+ (operand1_flags & HIST_FIELD_FL_ALIAS)) {
+ struct hist_field *var;
+
+ var = find_var_field(operand1->var.hist_data, operand1->name);
+ if (!var)
+ return -EINVAL;
+ operand1_flags = var->flags;
+ }
+
+ if ((operand2_flags & HIST_FIELD_FL_VAR_REF) ||
+ (operand2_flags & HIST_FIELD_FL_ALIAS)) {
+ struct hist_field *var;
+
+ var = find_var_field(operand2->var.hist_data, operand2->name);
+ if (!var)
+ return -EINVAL;
+ operand2_flags = var->flags;
+ }
+
if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) !=
(operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS))
return -EINVAL;
@@ -4646,7 +4705,8 @@ static void hist_field_print(struct seq_
else if (hist_field->flags & HIST_FIELD_FL_CPU)
seq_puts(m, "cpu");
else if (field_name) {
- if (hist_field->flags & HIST_FIELD_FL_VAR_REF)
+ if (hist_field->flags & HIST_FIELD_FL_VAR_REF ||
+ hist_field->flags & HIST_FIELD_FL_ALIAS)
seq_putc(m, '$');
seq_printf(m, "%s", field_name);
}

View File

@ -0,0 +1,503 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:52:05 -0600
Subject: [PATCH 39/48] tracing: Add 'last error' error facility for hist
triggers
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
With the addition of variables and actions, it's become necessary to
provide more detailed error information to users about syntax errors.
Add a 'last error' facility accessible via the erroring event's 'hist'
file. Reading the hist file after an error will display more detailed
information about what went wrong, if information is available. This
extended error information will be available until the next hist
trigger command for that event.
# echo xxx > /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger
echo: write error: Invalid argument
# cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/hist
ERROR: Couldn't yyy: zzz
Last command: xxx
Also add specific error messages for variable and action errors.
Link: http://lkml.kernel.org/r/64e9c422fc8aeafcc2f7a3b4328c0cffe7969129.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 215016863b5ec1ee5db5e20f32ffe015a497209f)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
Documentation/trace/histogram.txt | 20 ++++
kernel/trace/trace_events_hist.c | 164 ++++++++++++++++++++++++++++++++++----
2 files changed, 170 insertions(+), 14 deletions(-)
--- a/Documentation/trace/histogram.txt
+++ b/Documentation/trace/histogram.txt
@@ -188,6 +188,26 @@
interpreted as microseconds.
cpu int - the cpu on which the event occurred.
+ Extended error information
+ --------------------------
+
+ For some error conditions encountered when invoking a hist trigger
+ command, extended error information is available via the
+ corresponding event's 'hist' file. Reading the hist file after an
+ error will display more detailed information about what went wrong,
+ if information is available. This extended error information will
+ be available until the next hist trigger command for that event.
+
+ If available for a given error condition, the extended error
+ information and usage takes the following form:
+
+ # echo xxx > /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger
+ echo: write error: Invalid argument
+
+ # cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/hist
+ ERROR: Couldn't yyy: zzz
+ Last command: xxx
+
6.2 'hist' trigger examples
---------------------------
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -351,6 +351,65 @@ struct action_data {
};
};
+
+static char last_hist_cmd[MAX_FILTER_STR_VAL];
+static char hist_err_str[MAX_FILTER_STR_VAL];
+
+static void last_cmd_set(char *str)
+{
+ if (!str)
+ return;
+
+ strncpy(last_hist_cmd, str, MAX_FILTER_STR_VAL - 1);
+}
+
+static void hist_err(char *str, char *var)
+{
+ int maxlen = MAX_FILTER_STR_VAL - 1;
+
+ if (!str)
+ return;
+
+ if (strlen(hist_err_str))
+ return;
+
+ if (!var)
+ var = "";
+
+ if (strlen(hist_err_str) + strlen(str) + strlen(var) > maxlen)
+ return;
+
+ strcat(hist_err_str, str);
+ strcat(hist_err_str, var);
+}
+
+static void hist_err_event(char *str, char *system, char *event, char *var)
+{
+ char err[MAX_FILTER_STR_VAL];
+
+ if (system && var)
+ snprintf(err, MAX_FILTER_STR_VAL, "%s.%s.%s", system, event, var);
+ else if (system)
+ snprintf(err, MAX_FILTER_STR_VAL, "%s.%s", system, event);
+ else
+ strncpy(err, var, MAX_FILTER_STR_VAL);
+
+ hist_err(str, err);
+}
+
+static void hist_err_clear(void)
+{
+ hist_err_str[0] = '\0';
+}
+
+static bool have_hist_err(void)
+{
+ if (strlen(hist_err_str))
+ return true;
+
+ return false;
+}
+
static LIST_HEAD(synth_event_list);
static DEFINE_MUTEX(synth_event_mutex);
@@ -1448,8 +1507,10 @@ static struct trace_event_file *find_var
continue;
if (find_var_field(var_hist_data, var_name)) {
- if (found)
+ if (found) {
+ hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name);
return NULL;
+ }
found = file;
}
@@ -1498,6 +1559,7 @@ find_match_var(struct hist_trigger_data
hist_field = find_file_var(file, var_name);
if (hist_field) {
if (found) {
+ hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name);
return ERR_PTR(-EINVAL);
}
@@ -1781,6 +1843,7 @@ static int parse_assignment(char *str, s
char *assignment;
if (attrs->n_assignments == TRACING_MAP_VARS_MAX) {
+ hist_err("Too many variables defined: ", str);
ret = -EINVAL;
goto out;
}
@@ -2335,6 +2398,10 @@ static struct hist_field *parse_var_ref(
if (var_field)
ref_field = create_var_ref(var_field, system, event_name);
+ if (!ref_field)
+ hist_err_event("Couldn't find variable: $",
+ system, event_name, var_name);
+
return ref_field;
}
@@ -2494,6 +2561,7 @@ static struct hist_field *parse_unary(st
// we support only -(xxx) i.e. explicit parens required
if (level > 3) {
+ hist_err("Too many subexpressions (3 max): ", str);
ret = -EINVAL;
goto free;
}
@@ -2575,8 +2643,10 @@ static int check_expr_operands(struct hi
}
if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) !=
- (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS))
+ (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS)) {
+ hist_err("Timestamp units in expression don't match", NULL);
return -EINVAL;
+ }
return 0;
}
@@ -2591,8 +2661,10 @@ static struct hist_field *parse_expr(str
int field_op, ret = -EINVAL;
char *sep, *operand1_str;
- if (level > 3)
+ if (level > 3) {
+ hist_err("Too many subexpressions (3 max): ", str);
return ERR_PTR(-EINVAL);
+ }
field_op = contains_operator(str);
@@ -2826,12 +2898,17 @@ create_field_var_hist(struct hist_trigge
char *cmd;
int ret;
- if (target_hist_data->n_field_var_hists >= SYNTH_FIELDS_MAX)
+ if (target_hist_data->n_field_var_hists >= SYNTH_FIELDS_MAX) {
+ hist_err_event("onmatch: Too many field variables defined: ",
+ subsys_name, event_name, field_name);
return ERR_PTR(-EINVAL);
+ }
file = event_file(tr, subsys_name, event_name);
if (IS_ERR(file)) {
+ hist_err_event("onmatch: Event file not found: ",
+ subsys_name, event_name, field_name);
ret = PTR_ERR(file);
return ERR_PTR(ret);
}
@@ -2843,8 +2920,11 @@ create_field_var_hist(struct hist_trigge
* yet a registered histogram so we can't use that.
*/
hist_data = find_compatible_hist(target_hist_data, file);
- if (!hist_data)
+ if (!hist_data) {
+ hist_err_event("onmatch: Matching event histogram not found: ",
+ subsys_name, event_name, field_name);
return ERR_PTR(-EINVAL);
+ }
/* See if a synthetic field variable has already been created */
event_var = find_synthetic_field_var(target_hist_data, subsys_name,
@@ -2903,6 +2983,8 @@ create_field_var_hist(struct hist_trigge
kfree(cmd);
kfree(var_hist->cmd);
kfree(var_hist);
+ hist_err_event("onmatch: Couldn't create histogram for field: ",
+ subsys_name, event_name, field_name);
return ERR_PTR(ret);
}
@@ -2914,6 +2996,8 @@ create_field_var_hist(struct hist_trigge
if (IS_ERR_OR_NULL(event_var)) {
kfree(var_hist->cmd);
kfree(var_hist);
+ hist_err_event("onmatch: Couldn't find synthetic variable: ",
+ subsys_name, event_name, field_name);
return ERR_PTR(-EINVAL);
}
@@ -3050,18 +3134,21 @@ static struct field_var *create_field_va
int ret = 0;
if (hist_data->n_field_vars >= SYNTH_FIELDS_MAX) {
+ hist_err("Too many field variables defined: ", field_name);
ret = -EINVAL;
goto err;
}
val = parse_atom(hist_data, file, field_name, &flags, NULL);
if (IS_ERR(val)) {
+ hist_err("Couldn't parse field variable: ", field_name);
ret = PTR_ERR(val);
goto err;
}
var = create_var(hist_data, file, field_name, val->size, val->type);
if (IS_ERR(var)) {
+ hist_err("Couldn't create or find variable: ", field_name);
kfree(val);
ret = PTR_ERR(var);
goto err;
@@ -3204,13 +3291,17 @@ static int onmax_create(struct hist_trig
int ret = 0;
onmax_var_str = data->onmax.var_str;
- if (onmax_var_str[0] != '$')
+ if (onmax_var_str[0] != '$') {
+ hist_err("onmax: For onmax(x), x must be a variable: ", onmax_var_str);
return -EINVAL;
+ }
onmax_var_str++;
var_field = find_target_event_var(hist_data, NULL, NULL, onmax_var_str);
- if (!var_field)
+ if (!var_field) {
+ hist_err("onmax: Couldn't find onmax variable: ", onmax_var_str);
return -EINVAL;
+ }
flags = HIST_FIELD_FL_VAR_REF;
ref_field = create_hist_field(hist_data, NULL, flags, NULL);
@@ -3230,6 +3321,7 @@ static int onmax_create(struct hist_trig
data->onmax.max_var_ref_idx = var_ref_idx;
max_var = create_var(hist_data, file, "max", sizeof(u64), "u64");
if (IS_ERR(max_var)) {
+ hist_err("onmax: Couldn't create onmax variable: ", "max");
ret = PTR_ERR(max_var);
goto out;
}
@@ -3244,6 +3336,7 @@ static int onmax_create(struct hist_trig
field_var = create_target_field_var(hist_data, NULL, NULL, param);
if (IS_ERR(field_var)) {
+ hist_err("onmax: Couldn't create field variable: ", param);
ret = PTR_ERR(field_var);
kfree(param);
goto out;
@@ -3276,6 +3369,7 @@ static int parse_action_params(char *par
param = strstrip(param);
if (strlen(param) < 2) {
+ hist_err("Invalid action param: ", param);
ret = -EINVAL;
goto out;
}
@@ -3451,6 +3545,9 @@ onmatch_find_var(struct hist_trigger_dat
hist_field = find_event_var(hist_data, system, event, var);
}
+ if (!hist_field)
+ hist_err_event("onmatch: Couldn't find onmatch param: $", system, event, var);
+
return hist_field;
}
@@ -3518,6 +3615,7 @@ static int onmatch_create(struct hist_tr
mutex_lock(&synth_event_mutex);
event = find_synth_event(data->onmatch.synth_event_name);
if (!event) {
+ hist_err("onmatch: Couldn't find synthetic event: ", data->onmatch.synth_event_name);
mutex_unlock(&synth_event_mutex);
return -EINVAL;
}
@@ -3577,12 +3675,15 @@ static int onmatch_create(struct hist_tr
continue;
}
+ hist_err_event("onmatch: Param type doesn't match synthetic event field type: ",
+ system, event_name, param);
kfree(p);
ret = -EINVAL;
goto err;
}
if (field_pos != event->n_fields) {
+ hist_err("onmatch: Param count doesn't match synthetic event field count: ", event->name);
ret = -EINVAL;
goto err;
}
@@ -3612,15 +3713,22 @@ static struct action_data *onmatch_parse
return ERR_PTR(-ENOMEM);
match_event = strsep(&str, ")");
- if (!match_event || !str)
+ if (!match_event || !str) {
+ hist_err("onmatch: Missing closing paren: ", match_event);
goto free;
+ }
match_event_system = strsep(&match_event, ".");
- if (!match_event)
+ if (!match_event) {
+ hist_err("onmatch: Missing subsystem for match event: ", match_event_system);
goto free;
+ }
- if (IS_ERR(event_file(tr, match_event_system, match_event)))
+ if (IS_ERR(event_file(tr, match_event_system, match_event))) {
+ hist_err_event("onmatch: Invalid subsystem or event name: ",
+ match_event_system, match_event, NULL);
goto free;
+ }
data->onmatch.match_event = kstrdup(match_event, GFP_KERNEL);
if (!data->onmatch.match_event) {
@@ -3635,12 +3743,16 @@ static struct action_data *onmatch_parse
}
strsep(&str, ".");
- if (!str)
+ if (!str) {
+ hist_err("onmatch: Missing . after onmatch(): ", str);
goto free;
+ }
synth_event_name = strsep(&str, "(");
- if (!synth_event_name || !str)
+ if (!synth_event_name || !str) {
+ hist_err("onmatch: Missing opening paramlist paren: ", synth_event_name);
goto free;
+ }
data->onmatch.synth_event_name = kstrdup(synth_event_name, GFP_KERNEL);
if (!data->onmatch.synth_event_name) {
@@ -3649,8 +3761,10 @@ static struct action_data *onmatch_parse
}
params = strsep(&str, ")");
- if (!params || !str || (str && strlen(str)))
+ if (!params || !str || (str && strlen(str))) {
+ hist_err("onmatch: Missing closing paramlist paren: ", params);
goto free;
+ }
ret = parse_action_params(params, data);
if (ret)
@@ -3725,7 +3839,9 @@ static int create_var_field(struct hist_
if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX))
return -EINVAL;
+
if (find_var(hist_data, file, var_name) && !hist_data->remove) {
+ hist_err("Variable already defined: ", var_name);
return -EINVAL;
}
@@ -3806,6 +3922,7 @@ static int create_key_field(struct hist_
}
if (hist_field->flags & HIST_FIELD_FL_VAR_REF) {
+ hist_err("Using variable references as keys not supported: ", field_str);
destroy_hist_field(hist_field, 0);
ret = -EINVAL;
goto out;
@@ -3919,11 +4036,13 @@ static int parse_var_defs(struct hist_tr
var_name = strsep(&field_str, "=");
if (!var_name || !field_str) {
+ hist_err("Malformed assignment: ", var_name);
ret = -EINVAL;
goto free;
}
if (n_vars == TRACING_MAP_VARS_MAX) {
+ hist_err("Too many variables defined: ", var_name);
ret = -EINVAL;
goto free;
}
@@ -4675,6 +4794,11 @@ static int hist_show(struct seq_file *m,
hist_trigger_show(m, data, n++);
}
+ if (have_hist_err()) {
+ seq_printf(m, "\nERROR: %s\n", hist_err_str);
+ seq_printf(m, " Last command: %s\n", last_hist_cmd);
+ }
+
out_unlock:
mutex_unlock(&event_mutex);
@@ -5039,6 +5163,7 @@ static int hist_register_trigger(char *g
if (named_data) {
if (!hist_trigger_match(data, named_data, named_data,
true)) {
+ hist_err("Named hist trigger doesn't match existing named trigger (includes variables): ", hist_data->attrs->name);
ret = -EINVAL;
goto out;
}
@@ -5058,13 +5183,16 @@ static int hist_register_trigger(char *g
test->paused = false;
else if (hist_data->attrs->clear)
hist_clear(test);
- else
+ else {
+ hist_err("Hist trigger already exists", NULL);
ret = -EEXIST;
+ }
goto out;
}
}
new:
if (hist_data->attrs->cont || hist_data->attrs->clear) {
+ hist_err("Can't clear or continue a nonexistent hist trigger", NULL);
ret = -ENOENT;
goto out;
}
@@ -5251,6 +5379,11 @@ static int event_hist_trigger_func(struc
char *trigger, *p;
int ret = 0;
+ if (glob && strlen(glob)) {
+ last_cmd_set(param);
+ hist_err_clear();
+ }
+
if (!param)
return -EINVAL;
@@ -5389,6 +5522,9 @@ static int event_hist_trigger_func(struc
/* Just return zero, not the number of registered triggers */
ret = 0;
out:
+ if (ret == 0)
+ hist_err_clear();
+
return ret;
out_unreg:
cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);

View File

@ -0,0 +1,406 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:52:06 -0600
Subject: [PATCH 40/48] tracing: Add inter-event hist trigger Documentation
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Add background and details on inter-event hist triggers, including
hist variables, synthetic events, and actions.
Link: http://lkml.kernel.org/r/b0414efb66535aa52aa7411f58c3d56724027fce.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Baohong Liu <baohong.liu@intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 7d5f30af5e39e572f6984c1083fe79fd7dc34d04)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
Documentation/trace/histogram.txt | 381 ++++++++++++++++++++++++++++++++++++++
1 file changed, 381 insertions(+)
--- a/Documentation/trace/histogram.txt
+++ b/Documentation/trace/histogram.txt
@@ -1603,3 +1603,384 @@
Hits: 489
Entries: 7
Dropped: 0
+
+
+2.2 Inter-event hist triggers
+-----------------------------
+
+Inter-event hist triggers are hist triggers that combine values from
+one or more other events and create a histogram using that data. Data
+from an inter-event histogram can in turn become the source for
+further combined histograms, thus providing a chain of related
+histograms, which is important for some applications.
+
+The most important example of an inter-event quantity that can be used
+in this manner is latency, which is simply a difference in timestamps
+between two events. Although latency is the most important
+inter-event quantity, note that because the support is completely
+general across the trace event subsystem, any event field can be used
+in an inter-event quantity.
+
+An example of a histogram that combines data from other histograms
+into a useful chain would be a 'wakeupswitch latency' histogram that
+combines a 'wakeup latency' histogram and a 'switch latency'
+histogram.
+
+Normally, a hist trigger specification consists of a (possibly
+compound) key along with one or more numeric values, which are
+continually updated sums associated with that key. A histogram
+specification in this case consists of individual key and value
+specifications that refer to trace event fields associated with a
+single event type.
+
+The inter-event hist trigger extension allows fields from multiple
+events to be referenced and combined into a multi-event histogram
+specification. In support of this overall goal, a few enabling
+features have been added to the hist trigger support:
+
+ - In order to compute an inter-event quantity, a value from one
+ event needs to saved and then referenced from another event. This
+ requires the introduction of support for histogram 'variables'.
+
+ - The computation of inter-event quantities and their combination
+ require some minimal amount of support for applying simple
+ expressions to variables (+ and -).
+
+ - A histogram consisting of inter-event quantities isn't logically a
+ histogram on either event (so having the 'hist' file for either
+ event host the histogram output doesn't really make sense). To
+ address the idea that the histogram is associated with a
+ combination of events, support is added allowing the creation of
+ 'synthetic' events that are events derived from other events.
+ These synthetic events are full-fledged events just like any other
+ and can be used as such, as for instance to create the
+ 'combination' histograms mentioned previously.
+
+ - A set of 'actions' can be associated with histogram entries -
+ these can be used to generate the previously mentioned synthetic
+ events, but can also be used for other purposes, such as for
+ example saving context when a 'max' latency has been hit.
+
+ - Trace events don't have a 'timestamp' associated with them, but
+ there is an implicit timestamp saved along with an event in the
+ underlying ftrace ring buffer. This timestamp is now exposed as a
+ a synthetic field named 'common_timestamp' which can be used in
+ histograms as if it were any other event field; it isn't an actual
+ field in the trace format but rather is a synthesized value that
+ nonetheless can be used as if it were an actual field. By default
+ it is in units of nanoseconds; appending '.usecs' to a
+ common_timestamp field changes the units to microseconds.
+
+These features are decribed in more detail in the following sections.
+
+2.2.1 Histogram Variables
+-------------------------
+
+Variables are simply named locations used for saving and retrieving
+values between matching events. A 'matching' event is defined as an
+event that has a matching key - if a variable is saved for a histogram
+entry corresponding to that key, any subsequent event with a matching
+key can access that variable.
+
+A variable's value is normally available to any subsequent event until
+it is set to something else by a subsequent event. The one exception
+to that rule is that any variable used in an expression is essentially
+'read-once' - once it's used by an expression in a subsequent event,
+it's reset to its 'unset' state, which means it can't be used again
+unless it's set again. This ensures not only that an event doesn't
+use an uninitialized variable in a calculation, but that that variable
+is used only once and not for any unrelated subsequent match.
+
+The basic syntax for saving a variable is to simply prefix a unique
+variable name not corresponding to any keyword along with an '=' sign
+to any event field.
+
+Either keys or values can be saved and retrieved in this way. This
+creates a variable named 'ts0' for a histogram entry with the key
+'next_pid':
+
+ # echo 'hist:keys=next_pid:vals=$ts0:ts0=common_timestamp ... >> \
+ event/trigger
+
+The ts0 variable can be accessed by any subsequent event having the
+same pid as 'next_pid'.
+
+Variable references are formed by prepending the variable name with
+the '$' sign. Thus for example, the ts0 variable above would be
+referenced as '$ts0' in expressions.
+
+Because 'vals=' is used, the common_timestamp variable value above
+will also be summed as a normal histogram value would (though for a
+timestamp it makes little sense).
+
+The below shows that a key value can also be saved in the same way:
+
+ # echo 'hist:timer_pid=common_pid:key=timer_pid ...' >> event/trigger
+
+If a variable isn't a key variable or prefixed with 'vals=', the
+associated event field will be saved in a variable but won't be summed
+as a value:
+
+ # echo 'hist:keys=next_pid:ts1=common_timestamp ... >> event/trigger
+
+Multiple variables can be assigned at the same time. The below would
+result in both ts0 and b being created as variables, with both
+common_timestamp and field1 additionally being summed as values:
+
+ # echo 'hist:keys=pid:vals=$ts0,$b:ts0=common_timestamp,b=field1 ... >> \
+ event/trigger
+
+Note that variable assignments can appear either preceding or
+following their use. The command below behaves identically to the
+command above:
+
+ # echo 'hist:keys=pid:ts0=common_timestamp,b=field1:vals=$ts0,$b ... >> \
+ event/trigger
+
+Any number of variables not bound to a 'vals=' prefix can also be
+assigned by simply separating them with colons. Below is the same
+thing but without the values being summed in the histogram:
+
+ # echo 'hist:keys=pid:ts0=common_timestamp:b=field1 ... >> event/trigger
+
+Variables set as above can be referenced and used in expressions on
+another event.
+
+For example, here's how a latency can be calculated:
+
+ # echo 'hist:keys=pid,prio:ts0=common_timestamp ... >> event1/trigger
+ # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp-$ts0 ... >> event2/trigger
+
+In the first line above, the event's timetamp is saved into the
+variable ts0. In the next line, ts0 is subtracted from the second
+event's timestamp to produce the latency, which is then assigned into
+yet another variable, 'wakeup_lat'. The hist trigger below in turn
+makes use of the wakeup_lat variable to compute a combined latency
+using the same key and variable from yet another event:
+
+ # echo 'hist:key=pid:wakeupswitch_lat=$wakeup_lat+$switchtime_lat ... >> event3/trigger
+
+2.2.2 Synthetic Events
+----------------------
+
+Synthetic events are user-defined events generated from hist trigger
+variables or fields associated with one or more other events. Their
+purpose is to provide a mechanism for displaying data spanning
+multiple events consistent with the existing and already familiar
+usage for normal events.
+
+To define a synthetic event, the user writes a simple specification
+consisting of the name of the new event along with one or more
+variables and their types, which can be any valid field type,
+separated by semicolons, to the tracing/synthetic_events file.
+
+For instance, the following creates a new event named 'wakeup_latency'
+with 3 fields: lat, pid, and prio. Each of those fields is simply a
+variable reference to a variable on another event:
+
+ # echo 'wakeup_latency \
+ u64 lat; \
+ pid_t pid; \
+ int prio' >> \
+ /sys/kernel/debug/tracing/synthetic_events
+
+Reading the tracing/synthetic_events file lists all the currently
+defined synthetic events, in this case the event defined above:
+
+ # cat /sys/kernel/debug/tracing/synthetic_events
+ wakeup_latency u64 lat; pid_t pid; int prio
+
+An existing synthetic event definition can be removed by prepending
+the command that defined it with a '!':
+
+ # echo '!wakeup_latency u64 lat pid_t pid int prio' >> \
+ /sys/kernel/debug/tracing/synthetic_events
+
+At this point, there isn't yet an actual 'wakeup_latency' event
+instantiated in the event subsytem - for this to happen, a 'hist
+trigger action' needs to be instantiated and bound to actual fields
+and variables defined on other events (see Section 6.3.3 below).
+
+Once that is done, an event instance is created, and a histogram can
+be defined using it:
+
+ # echo 'hist:keys=pid,prio,lat.log2:sort=pid,lat' >> \
+ /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
+
+The new event is created under the tracing/events/synthetic/ directory
+and looks and behaves just like any other event:
+
+ # ls /sys/kernel/debug/tracing/events/synthetic/wakeup_latency
+ enable filter format hist id trigger
+
+Like any other event, once a histogram is enabled for the event, the
+output can be displayed by reading the event's 'hist' file.
+
+2.2.3 Hist trigger 'actions'
+----------------------------
+
+A hist trigger 'action' is a function that's executed whenever a
+histogram entry is added or updated.
+
+The default 'action' if no special function is explicity specified is
+as it always has been, to simply update the set of values associated
+with an entry. Some applications, however, may want to perform
+additional actions at that point, such as generate another event, or
+compare and save a maximum.
+
+The following additional actions are available. To specify an action
+for a given event, simply specify the action between colons in the
+hist trigger specification.
+
+ - onmatch(matching.event).<synthetic_event_name>(param list)
+
+ The 'onmatch(matching.event).<synthetic_event_name>(params)' hist
+ trigger action is invoked whenever an event matches and the
+ histogram entry would be added or updated. It causes the named
+ synthetic event to be generated with the values given in the
+ 'param list'. The result is the generation of a synthetic event
+ that consists of the values contained in those variables at the
+ time the invoking event was hit.
+
+ The 'param list' consists of one or more parameters which may be
+ either variables or fields defined on either the 'matching.event'
+ or the target event. The variables or fields specified in the
+ param list may be either fully-qualified or unqualified. If a
+ variable is specified as unqualified, it must be unique between
+ the two events. A field name used as a param can be unqualified
+ if it refers to the target event, but must be fully qualified if
+ it refers to the matching event. A fully-qualified name is of the
+ form 'system.event_name.$var_name' or 'system.event_name.field'.
+
+ The 'matching.event' specification is simply the fully qualified
+ event name of the event that matches the target event for the
+ onmatch() functionality, in the form 'system.event_name'.
+
+ Finally, the number and type of variables/fields in the 'param
+ list' must match the number and types of the fields in the
+ synthetic event being generated.
+
+ As an example the below defines a simple synthetic event and uses
+ a variable defined on the sched_wakeup_new event as a parameter
+ when invoking the synthetic event. Here we define the synthetic
+ event:
+
+ # echo 'wakeup_new_test pid_t pid' >> \
+ /sys/kernel/debug/tracing/synthetic_events
+
+ # cat /sys/kernel/debug/tracing/synthetic_events
+ wakeup_new_test pid_t pid
+
+ The following hist trigger both defines the missing testpid
+ variable and specifies an onmatch() action that generates a
+ wakeup_new_test synthetic event whenever a sched_wakeup_new event
+ occurs, which because of the 'if comm == "cyclictest"' filter only
+ happens when the executable is cyclictest:
+
+ # echo 'hist:keys=$testpid:testpid=pid:onmatch(sched.sched_wakeup_new).\
+ wakeup_new_test($testpid) if comm=="cyclictest"' >> \
+ /sys/kernel/debug/tracing/events/sched/sched_wakeup_new/trigger
+
+ Creating and displaying a histogram based on those events is now
+ just a matter of using the fields and new synthetic event in the
+ tracing/events/synthetic directory, as usual:
+
+ # echo 'hist:keys=pid:sort=pid' >> \
+ /sys/kernel/debug/tracing/events/synthetic/wakeup_new_test/trigger
+
+ Running 'cyclictest' should cause wakeup_new events to generate
+ wakeup_new_test synthetic events which should result in histogram
+ output in the wakeup_new_test event's hist file:
+
+ # cat /sys/kernel/debug/tracing/events/synthetic/wakeup_new_test/hist
+
+ A more typical usage would be to use two events to calculate a
+ latency. The following example uses a set of hist triggers to
+ produce a 'wakeup_latency' histogram:
+
+ First, we define a 'wakeup_latency' synthetic event:
+
+ # echo 'wakeup_latency u64 lat; pid_t pid; int prio' >> \
+ /sys/kernel/debug/tracing/synthetic_events
+
+ Next, we specify that whenever we see a sched_waking event for a
+ cyclictest thread, save the timestamp in a 'ts0' variable:
+
+ # echo 'hist:keys=$saved_pid:saved_pid=pid:ts0=common_timestamp.usecs \
+ if comm=="cyclictest"' >> \
+ /sys/kernel/debug/tracing/events/sched/sched_waking/trigger
+
+ Then, when the corresponding thread is actually scheduled onto the
+ CPU by a sched_switch event, calculate the latency and use that
+ along with another variable and an event field to generate a
+ wakeup_latency synthetic event:
+
+ # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:\
+ onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,\
+ $saved_pid,next_prio) if next_comm=="cyclictest"' >> \
+ /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
+
+ We also need to create a histogram on the wakeup_latency synthetic
+ event in order to aggregate the generated synthetic event data:
+
+ # echo 'hist:keys=pid,prio,lat:sort=pid,lat' >> \
+ /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
+
+ Finally, once we've run cyclictest to actually generate some
+ events, we can see the output by looking at the wakeup_latency
+ synthetic event's hist file:
+
+ # cat /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/hist
+
+ - onmax(var).save(field,.. .)
+
+ The 'onmax(var).save(field,...)' hist trigger action is invoked
+ whenever the value of 'var' associated with a histogram entry
+ exceeds the current maximum contained in that variable.
+
+ The end result is that the trace event fields specified as the
+ onmax.save() params will be saved if 'var' exceeds the current
+ maximum for that hist trigger entry. This allows context from the
+ event that exhibited the new maximum to be saved for later
+ reference. When the histogram is displayed, additional fields
+ displaying the saved values will be printed.
+
+ As an example the below defines a couple of hist triggers, one for
+ sched_waking and another for sched_switch, keyed on pid. Whenever
+ a sched_waking occurs, the timestamp is saved in the entry
+ corresponding to the current pid, and when the scheduler switches
+ back to that pid, the timestamp difference is calculated. If the
+ resulting latency, stored in wakeup_lat, exceeds the current
+ maximum latency, the values specified in the save() fields are
+ recoreded:
+
+ # echo 'hist:keys=pid:ts0=common_timestamp.usecs \
+ if comm=="cyclictest"' >> \
+ /sys/kernel/debug/tracing/events/sched/sched_waking/trigger
+
+ # echo 'hist:keys=next_pid:\
+ wakeup_lat=common_timestamp.usecs-$ts0:\
+ onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) \
+ if next_comm=="cyclictest"' >> \
+ /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
+
+ When the histogram is displayed, the max value and the saved
+ values corresponding to the max are displayed following the rest
+ of the fields:
+
+ # cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist
+ { next_pid: 2255 } hitcount: 239
+ common_timestamp-ts0: 0
+ max: 27
+ next_comm: cyclictest
+ prev_pid: 0 prev_prio: 120 prev_comm: swapper/1
+
+ { next_pid: 2256 } hitcount: 2355
+ common_timestamp-ts0: 0
+ max: 49 next_comm: cyclictest
+ prev_pid: 0 prev_prio: 120 prev_comm: swapper/0
+
+ Totals:
+ Hits: 12970
+ Entries: 2
+ Dropped: 0

View File

@ -0,0 +1,44 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:52:07 -0600
Subject: [PATCH 41/48] tracing: Make tracing_set_clock() non-static
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Allow tracing code outside of trace.c to access tracing_set_clock().
Some applications may require a particular clock in order to function
properly, such as latency calculations.
Also, add an accessor returning the current clock string.
Link: http://lkml.kernel.org/r/6d1c53e9ee2163f54e1849f5376573f54f0e6009.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit f8913a56885a33eda24452c1839102c305bf7df5)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace.c | 2 +-
kernel/trace/trace.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6214,7 +6214,7 @@ static int tracing_clock_show(struct seq
return 0;
}
-static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
+int tracing_set_clock(struct trace_array *tr, const char *clockstr)
{
int i;
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -289,6 +289,7 @@ extern int trace_array_get(struct trace_
extern void trace_array_put(struct trace_array *tr);
extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs);
+extern int tracing_set_clock(struct trace_array *tr, const char *clockstr);
extern bool trace_clock_in_ns(struct trace_array *tr);

View File

@ -0,0 +1,138 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:52:08 -0600
Subject: [PATCH 42/48] tracing: Add a clock attribute for hist triggers
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
The default clock if timestamps are used in a histogram is "global".
If timestamps aren't used, the clock is irrelevant.
Use the "clock=" param only if you want to override the default
"global" clock for a histogram with timestamps.
Link: http://lkml.kernel.org/r/427bed1389c5d22aa40c3e0683e30cc3d151e260.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Rajvi Jingar <rajvi.jingar@intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 77e7689e0b182465cfcd7c328061b70eecdcde31)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
Documentation/trace/histogram.txt | 11 +++++++++
kernel/trace/trace_events_hist.c | 42 +++++++++++++++++++++++++++++++++++---
2 files changed, 49 insertions(+), 4 deletions(-)
--- a/Documentation/trace/histogram.txt
+++ b/Documentation/trace/histogram.txt
@@ -1671,7 +1671,16 @@ specification. In support of this overa
it is in units of nanoseconds; appending '.usecs' to a
common_timestamp field changes the units to microseconds.
-These features are decribed in more detail in the following sections.
+A note on inter-event timestamps: If common_timestamp is used in a
+histogram, the trace buffer is automatically switched over to using
+absolute timestamps and the "global" trace clock, in order to avoid
+bogus timestamp differences with other clocks that aren't coherent
+across CPUs. This can be overridden by specifying one of the other
+trace clocks instead, using the "clock=XXX" hist trigger attribute,
+where XXX is any of the clocks listed in the tracing/trace_clock
+pseudo-file.
+
+These features are described in more detail in the following sections.
2.2.1 Histogram Variables
-------------------------
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -242,6 +242,7 @@ struct hist_trigger_attrs {
char *vals_str;
char *sort_key_str;
char *name;
+ char *clock;
bool pause;
bool cont;
bool clear;
@@ -1776,6 +1777,7 @@ static void destroy_hist_trigger_attrs(s
kfree(attrs->sort_key_str);
kfree(attrs->keys_str);
kfree(attrs->vals_str);
+ kfree(attrs->clock);
kfree(attrs);
}
@@ -1831,6 +1833,19 @@ static int parse_assignment(char *str, s
ret = -ENOMEM;
goto out;
}
+ } else if (strncmp(str, "clock=", strlen("clock=")) == 0) {
+ strsep(&str, "=");
+ if (!str) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ str = strstrip(str);
+ attrs->clock = kstrdup(str, GFP_KERNEL);
+ if (!attrs->clock) {
+ ret = -ENOMEM;
+ goto out;
+ }
} else if (strncmp(str, "size=", strlen("size=")) == 0) {
int map_bits = parse_map_size(str);
@@ -1895,6 +1910,14 @@ static struct hist_trigger_attrs *parse_
goto free;
}
+ if (!attrs->clock) {
+ attrs->clock = kstrdup("global", GFP_KERNEL);
+ if (!attrs->clock) {
+ ret = -ENOMEM;
+ goto free;
+ }
+ }
+
return attrs;
free:
destroy_hist_trigger_attrs(attrs);
@@ -4934,6 +4957,8 @@ static int event_hist_trigger_print(stru
seq_puts(m, ".descending");
}
seq_printf(m, ":size=%u", (1 << hist_data->map->map_bits));
+ if (hist_data->enable_timestamps)
+ seq_printf(m, ":clock=%s", hist_data->attrs->clock);
print_actions_spec(m, hist_data);
@@ -5201,7 +5226,6 @@ static int hist_register_trigger(char *g
data->paused = true;
if (named_data) {
- destroy_hist_data(data->private_data);
data->private_data = named_data->private_data;
set_named_trigger_data(data, named_data);
data->ops = &event_hist_trigger_named_ops;
@@ -5213,10 +5237,22 @@ static int hist_register_trigger(char *g
goto out;
}
- ret++;
+ if (hist_data->enable_timestamps) {
+ char *clock = hist_data->attrs->clock;
+
+ ret = tracing_set_clock(file->tr, hist_data->attrs->clock);
+ if (ret) {
+ hist_err("Couldn't set trace_clock: ", clock);
+ goto out;
+ }
- if (hist_data->enable_timestamps)
tracing_set_time_stamp_abs(file->tr, true);
+ }
+
+ if (named_data)
+ destroy_hist_data(hist_data);
+
+ ret++;
out:
return ret;
}

View File

@ -0,0 +1,120 @@
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 7 Feb 2018 17:26:32 -0500
Subject: [PATCH 45/48] ring-buffer: Add nesting for adding events within
events
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
The ring-buffer code has recusion protection in case tracing ends up tracing
itself, the ring-buffer will detect that it was called at the same context
(normal, softirq, interrupt or NMI), and not continue to record the event.
With the histogram synthetic events, they are called while tracing another
event at the same context. The recusion protection triggers because it
detects tracing at the same context and stops it.
Add ring_buffer_nest_start() and ring_buffer_nest_end() that will notify the
ring buffer that a trace is about to happen within another trace and that it
is intended, and not to trigger the recursion blocking.
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit f932ff1d98c482716b4b71a5d76b2aa3d65f66f0)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/ring_buffer.h | 3 ++
kernel/trace/ring_buffer.c | 57 +++++++++++++++++++++++++++++++++++++++++---
2 files changed, 57 insertions(+), 3 deletions(-)
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -117,6 +117,9 @@ int ring_buffer_unlock_commit(struct rin
int ring_buffer_write(struct ring_buffer *buffer,
unsigned long length, void *data);
+void ring_buffer_nest_start(struct ring_buffer *buffer);
+void ring_buffer_nest_end(struct ring_buffer *buffer);
+
struct ring_buffer_event *
ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
unsigned long *lost_events);
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -477,6 +477,7 @@ struct ring_buffer_per_cpu {
struct buffer_page *reader_page;
unsigned long lost_events;
unsigned long last_overrun;
+ unsigned long nest;
local_t entries_bytes;
local_t entries;
local_t overrun;
@@ -2629,10 +2630,10 @@ trace_recursive_lock(struct ring_buffer_
bit = pc & NMI_MASK ? RB_CTX_NMI :
pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ;
- if (unlikely(val & (1 << bit)))
+ if (unlikely(val & (1 << (bit + cpu_buffer->nest))))
return 1;
- val |= (1 << bit);
+ val |= (1 << (bit + cpu_buffer->nest));
cpu_buffer->current_context = val;
return 0;
@@ -2641,7 +2642,57 @@ trace_recursive_lock(struct ring_buffer_
static __always_inline void
trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
{
- cpu_buffer->current_context &= cpu_buffer->current_context - 1;
+ cpu_buffer->current_context &=
+ cpu_buffer->current_context - (1 << cpu_buffer->nest);
+}
+
+/* The recursive locking above uses 4 bits */
+#define NESTED_BITS 4
+
+/**
+ * ring_buffer_nest_start - Allow to trace while nested
+ * @buffer: The ring buffer to modify
+ *
+ * The ring buffer has a safty mechanism to prevent recursion.
+ * But there may be a case where a trace needs to be done while
+ * tracing something else. In this case, calling this function
+ * will allow this function to nest within a currently active
+ * ring_buffer_lock_reserve().
+ *
+ * Call this function before calling another ring_buffer_lock_reserve() and
+ * call ring_buffer_nest_end() after the nested ring_buffer_unlock_commit().
+ */
+void ring_buffer_nest_start(struct ring_buffer *buffer)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ int cpu;
+
+ /* Enabled by ring_buffer_nest_end() */
+ preempt_disable_notrace();
+ cpu = raw_smp_processor_id();
+ cpu_buffer = buffer->buffers[cpu];
+ /* This is the shift value for the above recusive locking */
+ cpu_buffer->nest += NESTED_BITS;
+}
+
+/**
+ * ring_buffer_nest_end - Allow to trace while nested
+ * @buffer: The ring buffer to modify
+ *
+ * Must be called after ring_buffer_nest_start() and after the
+ * ring_buffer_unlock_commit().
+ */
+void ring_buffer_nest_end(struct ring_buffer *buffer)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ int cpu;
+
+ /* disabled by ring_buffer_nest_start() */
+ cpu = raw_smp_processor_id();
+ cpu_buffer = buffer->buffers[cpu];
+ /* This is the shift value for the above recusive locking */
+ cpu_buffer->nest -= NESTED_BITS;
+ preempt_enable_notrace();
}
/**

View File

@ -0,0 +1,55 @@
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 7 Feb 2018 17:29:46 -0500
Subject: [PATCH 46/48] tracing: Use the ring-buffer nesting to allow synthetic
events to be traced
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Synthetic events can be done within the recording of other events. Notify
the ring buffer via ring_buffer_nest_start() and ring_buffer_nest_end() that
this is intended and not to block it due to its recursion protection.
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 92c571543120ffed5e725f5b57b9de0b535e9d0a)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/trace_events_hist.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -640,6 +640,7 @@ static notrace void trace_event_raw_even
struct trace_event_file *trace_file = __data;
struct synth_trace_event *entry;
struct trace_event_buffer fbuffer;
+ struct ring_buffer *buffer;
struct synth_event *event;
unsigned int i, n_u64;
int fields_size = 0;
@@ -651,10 +652,17 @@ static notrace void trace_event_raw_even
fields_size = event->n_u64 * sizeof(u64);
+ /*
+ * Avoid ring buffer recursion detection, as this event
+ * is being performed within another event.
+ */
+ buffer = trace_file->tr->trace_buffer.buffer;
+ ring_buffer_nest_start(buffer);
+
entry = trace_event_buffer_reserve(&fbuffer, trace_file,
sizeof(*entry) + fields_size);
if (!entry)
- return;
+ goto out;
for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
if (event->fields[i]->is_string) {
@@ -670,6 +678,8 @@ static notrace void trace_event_raw_even
}
trace_event_buffer_commit(&fbuffer);
+out:
+ ring_buffer_nest_end(buffer);
}
static void free_synth_event_print_fmt(struct trace_event_call *call)

View File

@ -0,0 +1,34 @@
From: Tom Zanussi <tom.zanussi@linux.intel.com>
Date: Mon, 15 Jan 2018 20:52:10 -0600
Subject: [PATCH 47/48] tracing: Add inter-event blurb to HIST_TRIGGERS config
option
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
So that users know that inter-event tracing is supported as part of
the HIST_TRIGGERS option, include text to that effect in the help
text.
Link: http://lkml.kernel.org/r/a38e24231d8d980be636b56d35814570acfd167a.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit 02942764c4fd12caeb29868822b7744fa91a9ad0)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/trace/Kconfig | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -606,7 +606,10 @@ config HIST_TRIGGERS
event activity as an initial guide for further investigation
using more advanced tools.
- See Documentation/trace/events.txt.
+ Inter-event tracing of quantities such as latencies is also
+ supported using hist triggers under this option.
+
+ See Documentation/trace/histogram.txt.
If in doubt, say N.
config MMIOTRACE_TEST

View File

@ -0,0 +1,443 @@
From: Rajvi Jingar <rajvi.jingar@intel.com>
Date: Mon, 15 Jan 2018 20:52:11 -0600
Subject: [PATCH 48/48] selftests: ftrace: Add inter-event hist triggers
testcases
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
This adds inter-event hist triggers testcases which covers following:
- create/remove synthetic event
- disable histogram for synthetic event
- extended error support
- field variable support
- histogram variables
- histogram trigger onmatch action
- histogram trigger onmax action
- histogram trigger onmatch-onmax action
- simple expression support
- combined histogram
Here is the test result.
=== Ftrace unit tests ===
[1] event trigger - test extended error support [PASS]
[2] event trigger - test field variable support [PASS]
[3] event trigger - test inter-event combined histogram trigger [PASS]
[4] event trigger - test inter-event histogram trigger onmatch action [PASS]
[5] event trigger - test inter-event histogram trigger onmatch-onmax action [PASS]
[6] event trigger - test inter-event histogram trigger onmax action [PASS]
[7] event trigger - test synthetic event create remove [PASS]
Link: http://lkml.kernel.org/r/e07ef1e72f7bf0f84dc87c9b736d6dc91b4b0b49.1516069914.git.tom.zanussi@linux.intel.com
Signed-off-by: Rajvi Jingar <rajvi.jingar@intel.com>
Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
(cherry picked from commit fb08b656dc9caee4a097bc4d8e050e2ead59bc24)
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
tools/testing/selftests/ftrace/test.d/functions | 7 +
tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc | 39 ++++++
tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc | 54 +++++++++
tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc | 58 ++++++++++
tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc | 50 ++++++++
tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc | 50 ++++++++
tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc | 48 ++++++++
tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc | 54 +++++++++
8 files changed, 360 insertions(+)
create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -59,6 +59,13 @@ disable_events() {
echo 0 > events/enable
}
+clear_synthetic_events() { # reset all current synthetic events
+ grep -v ^# synthetic_events |
+ while read line; do
+ echo "!$line" >> synthetic_events
+ done
+}
+
initialize_ftrace() { # Reset ftrace to initial-state
# As the initial state, ftrace will be set to nop tracer,
# no events, no triggers, no filters, no function filters,
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
@@ -0,0 +1,39 @@
+#!/bin/sh
+# description: event trigger - test extended error support
+
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test extended error support"
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger &>/dev/null
+if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then
+ fail "Failed to generate extended error in histogram"
+fi
+
+do_reset
+
+exit 0
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
@@ -0,0 +1,54 @@
+#!/bin/sh
+# description: event trigger - test field variable support
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test field variable support"
+
+echo 'wakeup_latency u64 lat; pid_t pid; int prio; char comm[16]' > synthetic_events
+echo 'hist:keys=comm:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger
+echo 'hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
+echo 'hist:keys=pid,prio,comm:vals=lat:sort=pid,prio' > events/synthetic/wakeup_latency/trigger
+
+ping localhost -c 3
+if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
+ fail "Failed to create inter-event histogram"
+fi
+
+if ! grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then
+ fail "Failed to create histogram with field variable"
+fi
+
+echo '!hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+
+if grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then
+ fail "Failed to remove histogram with field variable"
+fi
+
+do_reset
+
+exit 0
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
@@ -0,0 +1,58 @@
+#!/bin/sh
+# description: event trigger - test inter-event combined histogram trigger
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+clear_synthetic_events
+
+echo "Test create synthetic event"
+
+echo 'waking_latency u64 lat pid_t pid' > synthetic_events
+if [ ! -d events/synthetic/waking_latency ]; then
+ fail "Failed to create waking_latency synthetic event"
+fi
+
+echo "Test combined histogram"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger
+echo 'hist:keys=pid:waking_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).waking_latency($waking_lat,pid) if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' > events/synthetic/waking_latency/trigger
+
+echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events
+echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger
+
+echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events
+echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger
+
+ping localhost -c 3
+if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then
+ fail "Failed to create combined histogram"
+fi
+
+do_reset
+
+exit 0
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
@@ -0,0 +1,50 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmatch action
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event"
+echo "Test histogram variables,simple expression support and onmatch action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
+echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
+ping localhost -c 5
+if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
+ fail "Failed to create onmatch action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
@@ -0,0 +1,50 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmatch-onmax action
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event"
+echo "Test histogram variables,simple expression support and onmatch-onmax action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm):onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
+ping localhost -c 5
+if [ ! grep -q "ping" events/synthetic/wakeup_latency/hist -o ! grep -q "max:" events/sched/sched_switch/hist]; then
+ fail "Failed to create onmatch-onmax action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
@@ -0,0 +1,48 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmax action
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test onmax action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_waking/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+ping localhost -c 3
+if ! grep -q "max:" events/sched/sched_switch/hist; then
+ fail "Failed to create onmax action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
@@ -0,0 +1,54 @@
+#!/bin/sh
+# description: event trigger - test synthetic event create remove
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to create wakeup_latency synthetic event"
+fi
+
+reset_trigger
+
+echo "Test create synthetic event with an error"
+echo 'wakeup_latency u64 lat pid_t pid char' > synthetic_events > /dev/null
+if [ -d events/synthetic/wakeup_latency ]; then
+ fail "Created wakeup_latency synthetic event with an invalid format"
+fi
+
+reset_trigger
+
+echo "Test remove synthetic event"
+echo '!wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to delete wakeup_latency synthetic event"
+fi
+
+do_reset
+
+exit 0

View File

@ -0,0 +1,121 @@
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 25 Apr 2018 16:01:37 +0200
Subject: [PATCH] ACPICA: Convert acpi_gbl_hardware lock back to an
acpi_raw_spinlock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
We hit the following bug with -RT:
|BUG: scheduling while atomic: swapper/7/0/0x00000002
|Pid: 0, comm: swapper/7 Not tainted 3.6.11-rt28.19.el6rt.x86_64.debug #1
|Call Trace:
| rt_spin_lock+0x16/0x40
| __schedule_bug+0x67/0x90
| __schedule+0x793/0x7a0
| acpi_os_acquire_lock+0x1f/0x23
| acpi_write_bit_register+0x33/0xb0
| rt_spin_lock_slowlock+0xe5/0x2f0
| acpi_idle_enter_bm+0x8a/0x28e
As the acpi code disables interrupts in acpi_idle_enter_bm, and calls
code that grabs the acpi lock, it causes issues as the lock is currently
in RT a sleeping lock.
The lock was converted from a raw to a sleeping lock due to some
previous issues, and tests that showed it didn't seem to matter.
Unfortunately, it did matter for one of our boxes.
This patch converts the lock back to a raw lock. I've run this code on a
few of my own machines, one being my laptop that uses the acpi quite
extensively. I've been able to suspend and resume without issues.
[ tglx: Made the change exclusive for acpi_gbl_hardware_lock ]
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Cc: John Kacur <jkacur@gmail.com>
Cc: Clark Williams <clark@redhat.com>
Link: http://lkml.kernel.org/r/1360765565.23152.5.camel@gandalf.local.home
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[bigeasy: shorten the backtrace, use the type acpi_raw_spinlock incl.
accessor]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/acpi/acpica/acglobal.h | 2 +-
drivers/acpi/acpica/hwregs.c | 4 ++--
drivers/acpi/acpica/hwxface.c | 4 ++--
drivers/acpi/acpica/utmutex.c | 4 ++--
4 files changed, 7 insertions(+), 7 deletions(-)
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -116,7 +116,7 @@ ACPI_GLOBAL(u8, acpi_gbl_global_lock_pen
* interrupt level
*/
ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock); /* For GPE data structs and registers */
-ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
+ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock);
/* Mutex for _OSI support */
--- a/drivers/acpi/acpica/hwregs.c
+++ b/drivers/acpi/acpica/hwregs.c
@@ -426,14 +426,14 @@ acpi_status acpi_hw_clear_acpi_status(vo
ACPI_BITMASK_ALL_FIXED_STATUS,
ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address)));
- lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
+ lock_flags = acpi_os_acquire_raw_lock(acpi_gbl_hardware_lock);
/* Clear the fixed events in PM1 A/B */
status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS,
ACPI_BITMASK_ALL_FIXED_STATUS);
- acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
+ acpi_os_release_raw_lock(acpi_gbl_hardware_lock, lock_flags);
if (ACPI_FAILURE(status)) {
goto exit;
--- a/drivers/acpi/acpica/hwxface.c
+++ b/drivers/acpi/acpica/hwxface.c
@@ -261,7 +261,7 @@ acpi_status acpi_write_bit_register(u32
return_ACPI_STATUS(AE_BAD_PARAMETER);
}
- lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
+ lock_flags = acpi_os_acquire_raw_lock(acpi_gbl_hardware_lock);
/*
* At this point, we know that the parent register is one of the
@@ -322,7 +322,7 @@ acpi_status acpi_write_bit_register(u32
unlock_and_exit:
- acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
+ acpi_os_release_raw_lock(acpi_gbl_hardware_lock, lock_flags);
return_ACPI_STATUS(status);
}
--- a/drivers/acpi/acpica/utmutex.c
+++ b/drivers/acpi/acpica/utmutex.c
@@ -88,7 +88,7 @@ acpi_status acpi_ut_mutex_initialize(voi
return_ACPI_STATUS (status);
}
- status = acpi_os_create_lock (&acpi_gbl_hardware_lock);
+ status = acpi_os_create_raw_lock(&acpi_gbl_hardware_lock);
if (ACPI_FAILURE (status)) {
return_ACPI_STATUS (status);
}
@@ -145,7 +145,7 @@ void acpi_ut_mutex_terminate(void)
/* Delete the spinlocks */
acpi_os_delete_lock(acpi_gbl_gpe_lock);
- acpi_os_delete_lock(acpi_gbl_hardware_lock);
+ acpi_os_delete_raw_lock(acpi_gbl_hardware_lock);
acpi_os_delete_lock(acpi_gbl_reference_count_lock);
/* Delete the reader/writer lock */

View File

@ -0,0 +1,123 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 25 Apr 2018 15:19:42 +0200
Subject: [PATCH] ACPICA: provide abstraction for raw_spinlock_t
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Provide a new lock type acpi_raw_spinlock which is implemented as
raw_spinlock_t on Linux. This type should be used in code which covers
small areas of code and disables interrupts only for short time even on
a realtime OS.
There is a fallback to spinlock_t if an OS does not provide an
implementation for acpi_raw_spinlock.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/acpi/acpiosxf.h | 21 +++++++++++++++++++++
include/acpi/actypes.h | 4 ++++
include/acpi/platform/aclinux.h | 5 +++++
include/acpi/platform/aclinuxex.h | 30 ++++++++++++++++++++++++++++++
4 files changed, 60 insertions(+)
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -132,6 +132,27 @@ void acpi_os_release_lock(acpi_spinlock
#endif
/*
+ * RAW spinlock primitives. If the OS does not provide them, fallback to
+ * spinlock primitives
+ */
+#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_raw_lock
+# define acpi_os_create_raw_lock(out_handle) acpi_os_create_lock(out_handle)
+#endif
+
+#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_delete_raw_lock
+# define acpi_os_delete_raw_lock(handle) acpi_os_delete_lock(handle)
+#endif
+
+#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_acquire_raw_lock
+# define acpi_os_acquire_raw_lock(handle) acpi_os_acquire_lock(handle)
+#endif
+
+#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_release_raw_lock
+# define acpi_os_release_raw_lock(handle, flags) \
+ acpi_os_release_lock(handle, flags)
+#endif
+
+/*
* Semaphore primitives
*/
#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_semaphore
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -279,6 +279,10 @@ typedef u64 acpi_physical_address;
#define acpi_spinlock void *
#endif
+#ifndef acpi_raw_spinlock
+#define acpi_raw_spinlock acpi_spinlock
+#endif
+
#ifndef acpi_semaphore
#define acpi_semaphore void *
#endif
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -134,6 +134,7 @@
#define acpi_cache_t struct kmem_cache
#define acpi_spinlock spinlock_t *
+#define acpi_raw_spinlock raw_spinlock_t *
#define acpi_cpu_flags unsigned long
/* Use native linux version of acpi_os_allocate_zeroed */
@@ -151,6 +152,10 @@
#define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_acquire_object
#define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id
#define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock
+#define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_raw_lock
+#define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_delete_raw_lock
+#define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_acquire_raw_lock
+#define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_release_raw_lock
/*
* OSL interfaces used by debugger/disassembler
--- a/include/acpi/platform/aclinuxex.h
+++ b/include/acpi/platform/aclinuxex.h
@@ -124,6 +124,36 @@ static inline acpi_thread_id acpi_os_get
lock ? AE_OK : AE_NO_MEMORY; \
})
+
+#define acpi_os_create_raw_lock(__handle) \
+ ({ \
+ raw_spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock)); \
+ if (lock) { \
+ *(__handle) = lock; \
+ raw_spin_lock_init(*(__handle)); \
+ } \
+ lock ? AE_OK : AE_NO_MEMORY; \
+ })
+
+static inline acpi_cpu_flags acpi_os_acquire_raw_lock(acpi_raw_spinlock lockp)
+{
+ acpi_cpu_flags flags;
+
+ raw_spin_lock_irqsave(lockp, flags);
+ return flags;
+}
+
+static inline void acpi_os_release_raw_lock(acpi_raw_spinlock lockp,
+ acpi_cpu_flags flags)
+{
+ raw_spin_unlock_irqrestore(lockp, flags);
+}
+
+static inline void acpi_os_delete_raw_lock(acpi_raw_spinlock handle)
+{
+ ACPI_FREE(handle);
+}
+
static inline u8 acpi_os_readable(void *pointer, acpi_size length)
{
return TRUE;

View File

@ -0,0 +1,152 @@
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Thu, 29 Mar 2018 17:09:27 +0200
Subject: [PATCH] ALSA: pcm: Hide local_irq_disable/enable() and
local_irqsave/restore()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
The snd_pcm_stream_lock_irq*() functions decouple disabling interrupts
from the actual locking process. This does not work as expected if the
locking primitives are replaced like on preempt-rt.
Provide one function for locking which uses correct locking primitives.
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
sound/core/pcm_native.c | 85 ++++++++++++++++++++++++++++++++----------------
1 file changed, 57 insertions(+), 28 deletions(-)
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -99,6 +99,57 @@ static inline void down_write_nonblock(s
cond_resched();
}
+#define PCM_LOCK_DEFAULT 0
+#define PCM_LOCK_IRQ 1
+#define PCM_LOCK_IRQSAVE 2
+
+static unsigned long __snd_pcm_stream_lock_mode(struct snd_pcm_substream *substream,
+ unsigned int mode)
+{
+ unsigned long flags = 0;
+ if (substream->pcm->nonatomic) {
+ down_read_nested(&snd_pcm_link_rwsem, SINGLE_DEPTH_NESTING);
+ mutex_lock(&substream->self_group.mutex);
+ } else {
+ switch (mode) {
+ case PCM_LOCK_DEFAULT:
+ read_lock(&snd_pcm_link_rwlock);
+ break;
+ case PCM_LOCK_IRQ:
+ read_lock_irq(&snd_pcm_link_rwlock);
+ break;
+ case PCM_LOCK_IRQSAVE:
+ read_lock_irqsave(&snd_pcm_link_rwlock, flags);
+ break;
+ }
+ spin_lock(&substream->self_group.lock);
+ }
+ return flags;
+}
+
+static void __snd_pcm_stream_unlock_mode(struct snd_pcm_substream *substream,
+ unsigned int mode, unsigned long flags)
+{
+ if (substream->pcm->nonatomic) {
+ mutex_unlock(&substream->self_group.mutex);
+ up_read(&snd_pcm_link_rwsem);
+ } else {
+ spin_unlock(&substream->self_group.lock);
+
+ switch (mode) {
+ case PCM_LOCK_DEFAULT:
+ read_unlock(&snd_pcm_link_rwlock);
+ break;
+ case PCM_LOCK_IRQ:
+ read_unlock_irq(&snd_pcm_link_rwlock);
+ break;
+ case PCM_LOCK_IRQSAVE:
+ read_unlock_irqrestore(&snd_pcm_link_rwlock, flags);
+ break;
+ }
+ }
+}
+
/**
* snd_pcm_stream_lock - Lock the PCM stream
* @substream: PCM substream
@@ -109,13 +160,7 @@ static inline void down_write_nonblock(s
*/
void snd_pcm_stream_lock(struct snd_pcm_substream *substream)
{
- if (substream->pcm->nonatomic) {
- down_read_nested(&snd_pcm_link_rwsem, SINGLE_DEPTH_NESTING);
- mutex_lock(&substream->self_group.mutex);
- } else {
- read_lock(&snd_pcm_link_rwlock);
- spin_lock(&substream->self_group.lock);
- }
+ __snd_pcm_stream_lock_mode(substream, PCM_LOCK_DEFAULT);
}
EXPORT_SYMBOL_GPL(snd_pcm_stream_lock);
@@ -127,13 +172,7 @@ EXPORT_SYMBOL_GPL(snd_pcm_stream_lock);
*/
void snd_pcm_stream_unlock(struct snd_pcm_substream *substream)
{
- if (substream->pcm->nonatomic) {
- mutex_unlock(&substream->self_group.mutex);
- up_read(&snd_pcm_link_rwsem);
- } else {
- spin_unlock(&substream->self_group.lock);
- read_unlock(&snd_pcm_link_rwlock);
- }
+ __snd_pcm_stream_unlock_mode(substream, PCM_LOCK_DEFAULT, 0);
}
EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock);
@@ -147,9 +186,7 @@ EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock)
*/
void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream)
{
- if (!substream->pcm->nonatomic)
- local_irq_disable();
- snd_pcm_stream_lock(substream);
+ __snd_pcm_stream_lock_mode(substream, PCM_LOCK_IRQ);
}
EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq);
@@ -161,19 +198,13 @@ EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_ir
*/
void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream)
{
- snd_pcm_stream_unlock(substream);
- if (!substream->pcm->nonatomic)
- local_irq_enable();
+ __snd_pcm_stream_unlock_mode(substream, PCM_LOCK_IRQ, 0);
}
EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irq);
unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream)
{
- unsigned long flags = 0;
- if (!substream->pcm->nonatomic)
- local_irq_save(flags);
- snd_pcm_stream_lock(substream);
- return flags;
+ return __snd_pcm_stream_lock_mode(substream, PCM_LOCK_IRQSAVE);
}
EXPORT_SYMBOL_GPL(_snd_pcm_stream_lock_irqsave);
@@ -187,9 +218,7 @@ EXPORT_SYMBOL_GPL(_snd_pcm_stream_lock_i
void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream,
unsigned long flags)
{
- snd_pcm_stream_unlock(substream);
- if (!substream->pcm->nonatomic)
- local_irq_restore(flags);
+ __snd_pcm_stream_unlock_mode(substream, PCM_LOCK_IRQSAVE, flags);
}
EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irqrestore);

View File

@ -1,7 +1,7 @@
From: "Yadi.hu" <yadi.hu@windriver.com>
Date: Wed, 10 Dec 2014 10:32:09 +0800
Subject: ARM: enable irq in translation/section permission fault handlers
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Probably happens on all ARM, with
CONFIG_PREEMPT_RT_FULL
@ -64,7 +64,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -434,6 +434,9 @@ do_translation_fault(unsigned long addr,
@@ -433,6 +433,9 @@ do_translation_fault(unsigned long addr,
if (addr < TASK_SIZE)
return do_page_fault(addr, fsr, regs);
@ -74,7 +74,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
if (user_mode(regs))
goto bad_area;
@@ -501,6 +504,9 @@ do_translation_fault(unsigned long addr,
@@ -500,6 +503,9 @@ do_translation_fault(unsigned long addr,
static int
do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{

View File

@ -1,7 +1,7 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 21 Mar 2013 19:01:05 +0100
Subject: printk: Drop the logbuf_lock more often
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
The lock is hold with irgs off. The latency drops 500us+ on my arm bugs
with a "full" buffer after executing "dmesg" on the shell.
@ -13,7 +13,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1407,6 +1407,8 @@ static int syslog_print_all(char __user
@@ -1411,6 +1411,8 @@ static int syslog_print_all(char __user
{
char *text;
int len = 0;
@ -22,7 +22,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
if (!text)
@@ -1418,6 +1420,14 @@ static int syslog_print_all(char __user
@@ -1422,6 +1424,14 @@ static int syslog_print_all(char __user
u64 seq;
u32 idx;
@ -37,7 +37,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/*
* Find first record that fits, including all following records,
* into the user-provided buffer for this dump.
@@ -1430,6 +1440,14 @@ static int syslog_print_all(char __user
@@ -1434,6 +1444,14 @@ static int syslog_print_all(char __user
len += msg_print_text(msg, true, NULL, 0);
idx = log_next(idx);
seq++;
@ -52,7 +52,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
/* move first record forward until length fits into the buffer */
@@ -1441,6 +1459,14 @@ static int syslog_print_all(char __user
@@ -1445,6 +1463,14 @@ static int syslog_print_all(char __user
len -= msg_print_text(msg, true, NULL, 0);
idx = log_next(idx);
seq++;
@ -67,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
/* last message fitting into this dump */
@@ -1479,6 +1505,7 @@ static int syslog_print_all(char __user
@@ -1483,6 +1509,7 @@ static int syslog_print_all(char __user
clear_seq = log_next_seq;
clear_idx = log_next_idx;
}

View File

@ -0,0 +1,74 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 11 Apr 2018 13:34:26 +0200
Subject: [PATCH] IB/ipoib: replace local_irq_disable() with proper locking
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Commit 78bfe0b5b67f ("IPoIB: Take dev->xmit_lock around mc_list accesses")
introduced xmit_lock lock in ipoib_mcast_restart_task() and commit
932ff279a43a ("[NET]: Add netif_tx_lock") preserved the locking order while
dev->xmit_lock has been replaced with a helper. The netif_tx_lock should
not be acquired with disabled interrupts because it is meant to be a BH
disabling lock.
The priv->lock is always acquired with interrupts disabled. The only
place where netif_addr_lock() and priv->lock nest ist
ipoib_mcast_restart_task(). By reversing the lock order and taking
netif_addr lock with bottom halfs disabled it is possible to get rid of
the local_irq_save() completely.
This requires to take priv->lock with spin_lock_irq() inside the netif_addr
locked section. It's safe to do so because the caller is either a worker
function or __ipoib_ib_dev_flush() which are both calling with interrupts
enabled.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -886,7 +886,6 @@ void ipoib_mcast_restart_task(struct wor
struct netdev_hw_addr *ha;
struct ipoib_mcast *mcast, *tmcast;
LIST_HEAD(remove_list);
- unsigned long flags;
struct ib_sa_mcmember_rec rec;
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
@@ -898,9 +897,8 @@ void ipoib_mcast_restart_task(struct wor
ipoib_dbg_mcast(priv, "restarting multicast task\n");
- local_irq_save(flags);
- netif_addr_lock(dev);
- spin_lock(&priv->lock);
+ netif_addr_lock_bh(dev);
+ spin_lock_irq(&priv->lock);
/*
* Unfortunately, the networking core only gives us a list of all of
@@ -978,9 +976,8 @@ void ipoib_mcast_restart_task(struct wor
}
}
- spin_unlock(&priv->lock);
- netif_addr_unlock(dev);
- local_irq_restore(flags);
+ spin_unlock_irq(&priv->lock);
+ netif_addr_unlock_bh(dev);
ipoib_mcast_remove_list(&remove_list);
@@ -988,9 +985,9 @@ void ipoib_mcast_restart_task(struct wor
* Double check that we are still up
*/
if (test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
- spin_lock_irqsave(&priv->lock, flags);
+ spin_lock_irq(&priv->lock);
__ipoib_mcast_schedule_join_thread(priv, NULL, 0);
- spin_unlock_irqrestore(&priv->lock, flags);
+ spin_unlock_irq(&priv->lock);
}
}

View File

@ -1,7 +1,7 @@
From: Josh Cartwright <joshc@ni.com>
Date: Thu, 11 Feb 2016 11:54:01 -0600
Subject: KVM: arm/arm64: downgrade preempt_disable()d region to migrate_disable()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
kvm_arch_vcpu_ioctl_run() disables the use of preemption when updating
the vgic and timer states to prevent the calling task from migrating to
@ -23,27 +23,27 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -650,7 +650,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v
@@ -678,7 +678,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v
* involves poking the GIC, which must be done in a
* non-preemptible context.
*/
- preempt_disable();
+ migrate_disable();
kvm_pmu_flush_hwstate(vcpu);
@@ -687,7 +687,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v
kvm_pmu_sync_hwstate(vcpu);
kvm_timer_sync_hwstate(vcpu);
/* Flush FP/SIMD state that can't survive guest entry/exit */
kvm_fpsimd_flush_cpu_state();
@@ -729,7 +729,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v
kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
local_irq_enable();
- preempt_enable();
+ migrate_enable();
continue;
}
@@ -742,7 +742,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v
kvm_vgic_sync_hwstate(vcpu);
@@ -803,7 +803,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v
/* Exit types that need handling before we can be preempted */
handle_exit_early(vcpu, run, ret);
- preempt_enable();
+ migrate_enable();

View File

@ -5,7 +5,7 @@ Cc: Anna Schumaker <anna.schumaker@netapp.com>,
linux-nfs@vger.kernel.org, linux-kernel@vger.kernel.org,
tglx@linutronix.de
Subject: NFSv4: replace seqcount_t with a seqlock_t
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
The raw_write_seqcount_begin() in nfs4_reclaim_open_state() bugs me
because it maps to preempt_disable() in -RT which I can't have at this
@ -31,7 +31,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -150,11 +150,11 @@ static int nfs_delegation_claim_opens(st
@@ -151,11 +151,11 @@ static int nfs_delegation_claim_opens(st
sp = state->owner;
/* Block nfs4_proc_unlck */
mutex_lock(&sp->so_delegreturn_mutex);
@ -58,7 +58,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2638,7 +2638,7 @@ static int _nfs4_open_and_get_state(stru
@@ -2778,7 +2778,7 @@ static int _nfs4_open_and_get_state(stru
unsigned int seq;
int ret;
@ -67,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
ret = _nfs4_proc_open(opendata);
if (ret != 0)
@@ -2676,7 +2676,7 @@ static int _nfs4_open_and_get_state(stru
@@ -2816,7 +2816,7 @@ static int _nfs4_open_and_get_state(stru
if (d_inode(dentry) == state->inode) {
nfs_inode_attach_open_context(ctx);
@ -78,7 +78,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
out:
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -494,7 +494,7 @@ nfs4_alloc_state_owner(struct nfs_server
@@ -502,7 +502,7 @@ nfs4_alloc_state_owner(struct nfs_server
nfs4_init_seqid_counter(&sp->so_seqid);
atomic_set(&sp->so_count, 1);
INIT_LIST_HEAD(&sp->so_lru);
@ -87,7 +87,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
mutex_init(&sp->so_delegreturn_mutex);
return sp;
}
@@ -1516,8 +1516,12 @@ static int nfs4_reclaim_open_state(struc
@@ -1554,8 +1554,12 @@ static int nfs4_reclaim_open_state(struc
* recovering after a network partition or a reboot from a
* server that doesn't support a grace period.
*/
@ -101,7 +101,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
restart:
list_for_each_entry(state, &sp->so_states, open_states) {
if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
@@ -1586,14 +1590,20 @@ static int nfs4_reclaim_open_state(struc
@@ -1624,14 +1628,20 @@ static int nfs4_reclaim_open_state(struc
spin_lock(&sp->so_lock);
goto restart;
}

View File

@ -0,0 +1,121 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 16 Feb 2018 11:45:13 +0100
Subject: [PATCH] RCU: skip the "schedule() in RCU section" warning on UP,
too
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
In "RCU: we need to skip that warning but only on sleeping locks" we
skipped a warning on SMP systems in case we schedule out in a RCU
section while attempt to obtain a sleeping lock. This is also required
on UP systems.
In order to do so, I introduce a tiny version of migrate_disable() +
_enable() which only update the counters which we then can check against
on RT && !SMP.
Cc: stable-rt@vger.kernel.org
Reported-by: Grygorii Strashko <grygorii.strashko@ti.com>
Tested-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/preempt.h | 9 +++++++++
include/linux/sched.h | 6 ++++++
kernel/rcu/tree_plugin.h | 2 +-
kernel/sched/core.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 61 insertions(+), 1 deletion(-)
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -211,6 +211,15 @@ extern void migrate_enable(void);
int __migrate_disabled(struct task_struct *p);
+#elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE)
+
+extern void migrate_disable(void);
+extern void migrate_enable(void);
+static inline int __migrate_disabled(struct task_struct *p)
+{
+ return 0;
+}
+
#else
#define migrate_disable() barrier()
#define migrate_enable() barrier()
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -604,6 +604,12 @@ struct task_struct {
# ifdef CONFIG_SCHED_DEBUG
int migrate_disable_atomic;
# endif
+
+#elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE)
+ int migrate_disable;
+# ifdef CONFIG_SCHED_DEBUG
+ int migrate_disable_atomic;
+# endif
#endif
#ifdef CONFIG_PREEMPT_RCU
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -328,7 +328,7 @@ static void rcu_preempt_note_context_swi
int mg_counter = 0;
lockdep_assert_irqs_disabled();
-#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
+#if defined(CONFIG_PREEMPT_RT_BASE)
mg_counter = t->migrate_disable;
#endif
WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0 && !mg_counter);
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7278,4 +7278,49 @@ void migrate_enable(void)
preempt_enable();
}
EXPORT_SYMBOL(migrate_enable);
+
+#elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE)
+void migrate_disable(void)
+{
+ struct task_struct *p = current;
+
+ if (in_atomic() || irqs_disabled()) {
+#ifdef CONFIG_SCHED_DEBUG
+ p->migrate_disable_atomic++;
+#endif
+ return;
+ }
+#ifdef CONFIG_SCHED_DEBUG
+ if (unlikely(p->migrate_disable_atomic)) {
+ tracing_off();
+ WARN_ON_ONCE(1);
+ }
+#endif
+
+ p->migrate_disable++;
+}
+EXPORT_SYMBOL(migrate_disable);
+
+void migrate_enable(void)
+{
+ struct task_struct *p = current;
+
+ if (in_atomic() || irqs_disabled()) {
+#ifdef CONFIG_SCHED_DEBUG
+ p->migrate_disable_atomic--;
+#endif
+ return;
+ }
+
+#ifdef CONFIG_SCHED_DEBUG
+ if (unlikely(p->migrate_disable_atomic)) {
+ tracing_off();
+ WARN_ON_ONCE(1);
+ }
+#endif
+
+ WARN_ON_ONCE(p->migrate_disable <= 0);
+ p->migrate_disable--;
+}
+EXPORT_SYMBOL(migrate_enable);
#endif

View File

@ -2,7 +2,7 @@ From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 21 Sep 2017 14:25:13 +0200
Subject: [PATCH] RCU: we need to skip that warning but only on sleeping
locks
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
This check is okay for upstream. On RT we trigger this while blocking on
sleeping lock. In this case, it is okay to schedule() within a RCU
@ -19,13 +19,13 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -323,9 +323,13 @@ static void rcu_preempt_note_context_swi
@@ -325,9 +325,13 @@ static void rcu_preempt_note_context_swi
struct task_struct *t = current;
struct rcu_data *rdp;
struct rcu_node *rnp;
+ int mg_counter = 0;
RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_preempt_note_context_switch() invoked with interrupts enabled!!!\n");
lockdep_assert_irqs_disabled();
- WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0);
+#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP)
+ mg_counter = t->migrate_disable;

View File

@ -0,0 +1,50 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 11 Apr 2018 11:27:44 +0200
Subject: [PATCH] Revert mm/vmstat.c: fix vmstat_update() preemption BUG
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
This patch reverts commit c7f26ccfb2c3 ("mm/vmstat.c: fix
vmstat_update() preemption BUG").
Steven saw a "using smp_processor_id() in preemptible" message and
added a preempt_disable() section around it to keep it quiet. This is
not the right thing to do it does not fix the real problem.
vmstat_update() is invoked by a kworker on a specific CPU. This worker
it bound to this CPU. The name of the worker was "kworker/1:1" so it
should have been a worker which was bound to CPU1. A worker which can
run on any CPU would have a `u' before the first digit.
smp_processor_id() can be used in a preempt-enabled region as long as
the task is bound to a single CPU which is the case here. If it could
run on an arbitrary CPU then this is the problem we have an should seek
to resolve.
Not only this smp_processor_id() must not be migrated to another CPU but
also refresh_cpu_vm_stats() which might access wrong per-CPU variables.
Not to mention that other code relies on the fact that such a worker
runs on one specific CPU only.
Therefore I revert that commit and we should look instead what broke the
affinity mask of the kworker.
Cc: Steven J. Hill <steven.hill@cavium.com>
Cc: Tejun Heo <htejun@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
mm/vmstat.c | 2 --
1 file changed, 2 deletions(-)
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1839,11 +1839,9 @@ static void vmstat_update(struct work_st
* to occur in the future. Keep on running the
* update worker thread.
*/
- preempt_disable();
queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
this_cpu_ptr(&vmstat_work),
round_jiffies_relative(sysctl_stat_interval));
- preempt_enable();
}
}

View File

@ -0,0 +1,47 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 12 Apr 2018 09:16:22 +0200
Subject: [PATCH] [SCSI] libsas: remove irq save in sas_ata_qc_issue()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Since commit 312d3e56119a ("[SCSI] libsas: remove ata_port.lock
management duties from lldds") the sas_ata_qc_issue() function unlocks
the ata_port.lock and disables interrupts before doing so.
That lock is always taken with disabled interrupts so at this point, the
interrupts are already disabled. There is no need to disable the
interrupts before the unlock operation because they are already
disabled.
Restoring the interrupt state later does not change anything because
they were disabled and remain disabled. Therefore remove the operations
which do not change the behaviour.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/scsi/libsas/sas_ata.c | 3 ---
1 file changed, 3 deletions(-)
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -176,7 +176,6 @@ static void sas_ata_task_done(struct sas
static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
{
- unsigned long flags;
struct sas_task *task;
struct scatterlist *sg;
int ret = AC_ERR_SYSTEM;
@@ -190,7 +189,6 @@ static unsigned int sas_ata_qc_issue(str
/* TODO: audit callers to ensure they are ready for qc_issue to
* unconditionally re-enable interrupts
*/
- local_irq_save(flags);
spin_unlock(ap->lock);
/* If the device fell off, no sense in issuing commands */
@@ -252,7 +250,6 @@ static unsigned int sas_ata_qc_issue(str
out:
spin_lock(ap->lock);
- local_irq_restore(flags);
return ret;
}

View File

@ -0,0 +1,40 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 12 Apr 2018 09:55:25 +0200
Subject: [PATCH] [SCSI] qla2xxx: remove irq save in qla2x00_poll()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
In commit d2ba5675d899 ("[SCSI] qla2xxx: Disable local-interrupts while
polling for RISC status.") added a local_irq_disable() before invoking
the ->intr_handler callback. The function, which was used in this
callback, did not disable interrupts while acquiring the spin_lock so a
deadlock was possible and this change was one possible solution.
The function in question was qla2300_intr_handler() and is using
spin_lock_irqsave() since commit 43fac4d97a1a ("[SCSI] qla2xxx: Resolve
a performance issue in interrupt").
I checked all other ->intr_handler callbacks and all of them use the
irqsave variant so it is safe to remove the local_irq_save() block now.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/scsi/qla2xxx/qla_inline.h | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
--- a/drivers/scsi/qla2xxx/qla_inline.h
+++ b/drivers/scsi/qla2xxx/qla_inline.h
@@ -57,14 +57,12 @@ qla2x00_debounce_register(volatile uint1
static inline void
qla2x00_poll(struct rsp_que *rsp)
{
- unsigned long flags;
struct qla_hw_data *ha = rsp->hw;
- local_irq_save(flags);
+
if (IS_P3P_TYPE(ha))
qla82xx_poll(0, rsp);
else
ha->isp_ops->intr_handler(0, rsp);
- local_irq_restore(flags);
}
static inline uint8_t *

View File

@ -1,7 +1,7 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sat, 27 May 2017 19:02:06 +0200
Subject: kernel/sched/core: add migrate_disable()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
---
include/linux/preempt.h | 23 ++++++++
@ -52,7 +52,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4
#ifdef MODULE
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -580,6 +580,13 @@ struct task_struct {
@@ -592,6 +592,13 @@ struct task_struct {
int nr_cpus_allowed;
const cpumask_t *cpus_ptr;
cpumask_t cpus_mask;
@ -80,7 +80,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4
* boot command line:
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1022,7 +1022,15 @@ void set_cpus_allowed_common(struct task
@@ -1033,7 +1033,15 @@ void set_cpus_allowed_common(struct task
p->nr_cpus_allowed = cpumask_weight(new_mask);
}
@ -97,7 +97,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4
{
struct rq *rq = task_rq(p);
bool queued, running;
@@ -1051,6 +1059,20 @@ void do_set_cpus_allowed(struct task_str
@@ -1062,6 +1070,20 @@ void do_set_cpus_allowed(struct task_str
set_curr_task(rq, p);
}
@ -118,7 +118,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4
/*
* Change a given task's CPU affinity. Migrate the thread to a
* proper CPU and schedule it away if the CPU it's executing on
@@ -1109,9 +1131,16 @@ static int __set_cpus_allowed_ptr(struct
@@ -1120,9 +1142,16 @@ static int __set_cpus_allowed_ptr(struct
}
/* Can the task run on the task's current CPU? If so, we're done */
@ -136,7 +136,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4
dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
if (task_running(rq, p) || p->state == TASK_WAKING) {
struct migration_arg arg = { p, dest_cpu };
@@ -6759,3 +6788,100 @@ const u32 sched_prio_to_wmult[40] = {
@@ -7027,3 +7056,100 @@ const u32 sched_prio_to_wmult[40] = {
/* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717,
/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
};
@ -239,7 +239,7 @@ Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4
+#endif
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -1017,6 +1017,10 @@ void proc_sched_show_task(struct task_st
@@ -1030,6 +1030,10 @@ void proc_sched_show_task(struct task_st
P(dl.runtime);
P(dl.deadline);
}

View File

@ -0,0 +1,34 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 3 Apr 2018 15:13:20 +0200
Subject: [PATCH] alim15x3: move irq-restore before pci_dev_put()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
init_chipset_ali15x3() initializes the chipset during init with disabled
interrupts. There is no need to keep the interrupts disabled during
pci_dev_put().
Move the irq-restore before pci_dev_put() is invoked.
Side note: The same init is performed in
drivers/ata/pata_ali.c::ali_init_chipset() without disabled interrupts.
It looks that the same hardware is supported in the ATA land. Would it
make sense to remove this driver since it is supported in the other
subsystem?
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/ide/alim15x3.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/ide/alim15x3.c
+++ b/drivers/ide/alim15x3.c
@@ -323,9 +323,9 @@ static int init_chipset_ali15x3(struct p
pci_write_config_byte(dev, 0x53, tmpbyte);
}
+ local_irq_restore(flags);
pci_dev_put(north);
pci_dev_put(isa_dev);
- local_irq_restore(flags);
return 0;
}

View File

@ -1,7 +1,7 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 11 Oct 2017 17:43:49 +0200
Subject: apparmor: use a locallock instead preempt_disable()
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
get_buffers() disables preemption which acts as a lock for the per-CPU
variable. Since we can't disable preemption here on RT, a local_lock is

View File

@ -1,7 +1,7 @@
From: Anders Roxell <anders.roxell@linaro.org>
Date: Thu, 14 May 2015 17:52:17 +0200
Subject: arch/arm64: Add lazy preempt support
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
arm64 is missing support for PREEMPT_RT. The main feature which is
lacking is support for lazy preemption. The arch-specific entry code,
@ -21,7 +21,7 @@ Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -103,6 +103,7 @@ config ARM64
@@ -123,6 +123,7 @@ config ARM64
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
@ -39,7 +39,7 @@ Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
};
#define INIT_THREAD_INFO(tsk) \
@@ -82,6 +83,7 @@ void arch_setup_new_exec(void);
@@ -82,6 +83,7 @@ void arch_release_task_struct(struct tas
#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
#define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */
#define TIF_FSCHECK 5 /* Check FS is USER_DS on return */
@ -47,7 +47,7 @@ Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
#define TIF_NOHZ 7
#define TIF_SYSCALL_TRACE 8
#define TIF_SYSCALL_AUDIT 9
@@ -97,6 +99,7 @@ void arch_setup_new_exec(void);
@@ -99,6 +101,7 @@ void arch_release_task_struct(struct tas
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE)
@ -55,7 +55,7 @@ Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
#define _TIF_NOHZ (1 << TIF_NOHZ)
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
@@ -108,8 +111,9 @@ void arch_setup_new_exec(void);
@@ -111,8 +114,9 @@ void arch_release_task_struct(struct tas
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
@ -68,7 +68,7 @@ Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
_TIF_NOHZ)
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -38,6 +38,7 @@ int main(void)
@@ -40,6 +40,7 @@ int main(void)
BLANK();
DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count));
@ -78,7 +78,7 @@ Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -570,11 +570,16 @@ ENDPROC(el1_sync)
@@ -603,11 +603,16 @@ ENDPROC(el1_sync)
#ifdef CONFIG_PREEMPT
ldr w24, [tsk, #TSK_TI_PREEMPT] // get preempt count
@ -98,7 +98,7 @@ Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_on
@@ -588,6 +593,7 @@ ENDPROC(el1_irq)
@@ -621,6 +626,7 @@ ENDPROC(el1_irq)
1: bl preempt_schedule_irq // irq en/disable is done inside
ldr x0, [tsk, #TSK_TI_FLAGS] // get new tasks TI_FLAGS
tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling?
@ -108,12 +108,12 @@ Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -755,7 +755,7 @@ asmlinkage void do_notify_resume(struct
@@ -912,7 +912,7 @@ asmlinkage void do_notify_resume(struct
/* Check valid user FS if needed */
addr_limit_user_check();
- if (thread_flags & _TIF_NEED_RESCHED) {
+ if (thread_flags & _TIF_NEED_RESCHED_MASK) {
schedule();
} else {
local_irq_enable();
/* Unmask Debug and SError for the next task */
local_daif_restore(DAIF_PROCCTX_NOIRQ);

View File

@ -1,7 +1,7 @@
From: Frank Rowand <frank.rowand@am.sony.com>
Date: Mon, 19 Sep 2011 14:51:14 -0700
Subject: arm: Convert arm boot_lock to raw
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
The arm boot_lock is used by the secondary processor startup code. The locking
task is the idle thread, which has idle->sched_class == &idle_sched_class.
@ -32,7 +32,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- a/arch/arm/mach-exynos/platsmp.c
+++ b/arch/arm/mach-exynos/platsmp.c
@@ -229,7 +229,7 @@ static void __iomem *scu_base_addr(void)
@@ -224,7 +224,7 @@ static void __iomem *scu_base_addr(void)
return (void __iomem *)(S5P_VA_SCU);
}
@ -41,7 +41,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
static void exynos_secondary_init(unsigned int cpu)
{
@@ -242,8 +242,8 @@ static void exynos_secondary_init(unsign
@@ -237,8 +237,8 @@ static void exynos_secondary_init(unsign
/*
* Synchronise with the boot thread.
*/
@ -52,7 +52,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
int exynos_set_boot_addr(u32 core_id, unsigned long boot_addr)
@@ -307,7 +307,7 @@ static int exynos_boot_secondary(unsigne
@@ -302,7 +302,7 @@ static int exynos_boot_secondary(unsigne
* Set synchronisation state between this boot processor
* and the secondary one
*/
@ -61,7 +61,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
/*
* The secondary processor is waiting to be released from
@@ -334,7 +334,7 @@ static int exynos_boot_secondary(unsigne
@@ -329,7 +329,7 @@ static int exynos_boot_secondary(unsigne
if (timeout == 0) {
printk(KERN_ERR "cpu1 power enable failed");
@ -70,7 +70,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
return -ETIMEDOUT;
}
}
@@ -380,7 +380,7 @@ static int exynos_boot_secondary(unsigne
@@ -375,7 +375,7 @@ static int exynos_boot_secondary(unsigne
* calibrations, then wait for it to finish
*/
fail:

View File

@ -1,7 +1,7 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 1 Dec 2017 10:42:03 +0100
Subject: [PATCH] arm*: disable NEON in kernel mode
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
NEON in kernel mode is used by the crypto algorithms and raid6 code.
While the raid6 code looks okay, the crypto algorithms do not: NEON
@ -15,13 +15,13 @@ Cc: stable-rt@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
arch/arm/Kconfig | 2 +-
arch/arm64/crypto/Kconfig | 20 ++++++++++----------
arch/arm64/crypto/Kconfig | 26 +++++++++++++-------------
arch/arm64/crypto/crc32-ce-glue.c | 3 ++-
3 files changed, 13 insertions(+), 12 deletions(-)
3 files changed, 16 insertions(+), 15 deletions(-)
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2164,7 +2164,7 @@ config NEON
@@ -2166,7 +2166,7 @@ config NEON
config KERNEL_MODE_NEON
bool "Support for NEON in kernel mode"
@ -32,7 +32,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -19,19 +19,19 @@ config CRYPTO_SHA512_ARM64
@@ -19,37 +19,37 @@ config CRYPTO_SHA512_ARM64
config CRYPTO_SHA1_ARM64_CE
tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
@ -48,6 +48,27 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
select CRYPTO_HASH
select CRYPTO_SHA256_ARM64
config CRYPTO_SHA512_ARM64_CE
tristate "SHA-384/SHA-512 digest algorithm (ARMv8 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_HASH
select CRYPTO_SHA512_ARM64
config CRYPTO_SHA3_ARM64
tristate "SHA3 digest algorithm (ARMv8.2 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_HASH
select CRYPTO_SHA3
config CRYPTO_SM3_ARM64_CE
tristate "SM3 digest algorithm (ARMv8.2 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_HASH
select CRYPTO_SM3
config CRYPTO_GHASH_ARM64_CE
tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
- depends on KERNEL_MODE_NEON
@ -55,7 +76,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
select CRYPTO_HASH
select CRYPTO_GF128MUL
select CRYPTO_AES
@@ -39,7 +39,7 @@ config CRYPTO_GHASH_ARM64_CE
@@ -57,7 +57,7 @@ config CRYPTO_GHASH_ARM64_CE
config CRYPTO_CRCT10DIF_ARM64_CE
tristate "CRCT10DIF digest algorithm using PMULL instructions"
@ -64,7 +85,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
select CRYPTO_HASH
config CRYPTO_CRC32_ARM64_CE
@@ -53,13 +53,13 @@ config CRYPTO_AES_ARM64
@@ -71,13 +71,13 @@ config CRYPTO_AES_ARM64
config CRYPTO_AES_ARM64_CE
tristate "AES core cipher using ARMv8 Crypto Extensions"
@ -80,7 +101,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
select CRYPTO_ALGAPI
select CRYPTO_AES_ARM64_CE
select CRYPTO_AES_ARM64
@@ -67,7 +67,7 @@ config CRYPTO_AES_ARM64_CE_CCM
@@ -85,7 +85,7 @@ config CRYPTO_AES_ARM64_CE_CCM
config CRYPTO_AES_ARM64_CE_BLK
tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
@ -89,7 +110,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
select CRYPTO_BLKCIPHER
select CRYPTO_AES_ARM64_CE
select CRYPTO_AES_ARM64
@@ -75,7 +75,7 @@ config CRYPTO_AES_ARM64_CE_BLK
@@ -93,7 +93,7 @@ config CRYPTO_AES_ARM64_CE_BLK
config CRYPTO_AES_ARM64_NEON_BLK
tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
@ -98,7 +119,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
select CRYPTO_BLKCIPHER
select CRYPTO_AES_ARM64
select CRYPTO_AES
@@ -83,13 +83,13 @@ config CRYPTO_AES_ARM64_NEON_BLK
@@ -101,13 +101,13 @@ config CRYPTO_AES_ARM64_NEON_BLK
config CRYPTO_CHACHA20_NEON
tristate "NEON accelerated ChaCha20 symmetric cipher"
@ -116,7 +137,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
select CRYPTO_AES_ARM64
--- a/arch/arm64/crypto/crc32-ce-glue.c
+++ b/arch/arm64/crypto/crc32-ce-glue.c
@@ -206,7 +206,8 @@ static struct shash_alg crc32_pmull_algs
@@ -208,7 +208,8 @@ static struct shash_alg crc32_pmull_algs
static int __init crc32_pmull_mod_init(void)
{

View File

@ -1,7 +1,7 @@
Subject: arm: Enable highmem for rt
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Feb 2013 11:03:11 +0100
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
fixup highmem for ARM.

View File

@ -1,7 +1,7 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 11 Mar 2013 21:37:27 +0100
Subject: arm/highmem: Flush tlb on unmap
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
The tlb should be flushed on unmap and thus make the mapping entry
invalid. This is only done in the non-debug case which does not look

View File

@ -1,7 +1,7 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 22 Dec 2016 17:28:33 +0100
Subject: [PATCH] arm: include definition for cpumask_t
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
This definition gets pulled in by other files. With the (later) split of
RCU and spinlock.h it won't compile anymore.

View File

@ -1,7 +1,7 @@
From: Yang Shi <yang.shi@linaro.org>
Date: Thu, 10 Nov 2016 16:17:55 -0800
Subject: [PATCH] arm: kprobe: replace patch_lock to raw lock
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
When running kprobe on -rt kernel, the below bug is caught:

View File

@ -1,7 +1,7 @@
Subject: arm: Add support for lazy preemption
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 31 Oct 2012 12:04:11 +0100
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.15-rt13.tar.xz
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
Implement the arm pieces for lazy preempt.
@ -17,7 +17,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -85,6 +85,7 @@ config ARM
@@ -88,6 +88,7 @@ config ARM
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
@ -35,7 +35,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
mm_segment_t addr_limit; /* address limit */
struct task_struct *task; /* main task structure */
__u32 cpu; /* cpu */
@@ -142,7 +143,8 @@ extern int vfp_restore_user_hwstate(stru
@@ -139,7 +140,8 @@ extern int vfp_restore_user_hwstate(stru
#define TIF_SYSCALL_TRACE 4 /* syscall trace active */
#define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */
#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
@ -45,7 +45,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#define TIF_NOHZ 12 /* in adaptive nohz mode */
#define TIF_USING_IWMMXT 17
@@ -152,6 +154,7 @@ extern int vfp_restore_user_hwstate(stru
@@ -149,6 +151,7 @@ extern int vfp_restore_user_hwstate(stru
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
@ -53,7 +53,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
@@ -167,7 +170,8 @@ extern int vfp_restore_user_hwstate(stru
@@ -164,7 +167,8 @@ extern int vfp_restore_user_hwstate(stru
* Change these and you break ASM code in entry-common.S
*/
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
@ -65,7 +65,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#endif /* __ASM_ARM_THREAD_INFO_H */
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -65,6 +65,7 @@ int main(void)
@@ -67,6 +67,7 @@ int main(void)
BLANK();
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
@ -75,7 +75,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -220,11 +220,18 @@ ENDPROC(__dabt_svc)
@@ -216,11 +216,18 @@ ENDPROC(__dabt_svc)
#ifdef CONFIG_PREEMPT
ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
@ -96,7 +96,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#endif
svc_exit r5, irq = 1 @ return from exception
@@ -239,8 +246,14 @@ ENDPROC(__irq_svc)
@@ -235,8 +242,14 @@ ENDPROC(__irq_svc)
1: bl preempt_schedule_irq @ irq en/disable is done inside
ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
tst r0, #_TIF_NEED_RESCHED
@ -114,7 +114,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
__und_fault:
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -53,7 +53,9 @@ saved_pc .req lr
@@ -54,7 +54,9 @@ saved_pc .req lr
cmp r2, #TASK_SIZE
blne addr_limit_check_failed
ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
@ -125,7 +125,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
bne fast_work_pending
@@ -83,8 +85,11 @@ ENDPROC(ret_fast_syscall)
@@ -84,8 +86,11 @@ ENDPROC(ret_fast_syscall)
cmp r2, #TASK_SIZE
blne addr_limit_check_failed
ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
@ -140,7 +140,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -615,7 +615,8 @@ do_work_pending(struct pt_regs *regs, un
@@ -638,7 +638,8 @@ do_work_pending(struct pt_regs *regs, un
*/
trace_hardirqs_off();
do {

Some files were not shown because too many files have changed in this diff Show More