151 lines
5.1 KiB
Diff
151 lines
5.1 KiB
Diff
From: John Ogness <john.ogness@linutronix.de>
|
|
Date: Fri, 23 Feb 2018 00:50:24 +0100
|
|
Subject: [PATCH 07/17] fs/dcache: Avoid a try_lock loop in
|
|
shrink_dentry_list()
|
|
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.16/older/patches-4.16.7-rt1.tar.xz
|
|
|
|
Upstream commit 8f04da2adbdffed8dc4b2feb00ec3b3d84683885
|
|
|
|
shrink_dentry_list() holds dentry->d_lock and needs to acquire
|
|
dentry->d_inode->i_lock. This cannot be done with a spin_lock()
|
|
operation because it's the reverse of the regular lock order.
|
|
To avoid ABBA deadlocks it is done with a trylock loop.
|
|
|
|
Trylock loops are problematic in two scenarios:
|
|
|
|
1) PREEMPT_RT converts spinlocks to 'sleeping' spinlocks, which are
|
|
preemptible. As a consequence the i_lock holder can be preempted
|
|
by a higher priority task. If that task executes the trylock loop
|
|
it will do so forever and live lock.
|
|
|
|
2) In virtual machines trylock loops are problematic as well. The
|
|
VCPU on which the i_lock holder runs can be scheduled out and a
|
|
task on a different VCPU can loop for a whole time slice. In the
|
|
worst case this can lead to starvation. Commits 47be61845c77
|
|
("fs/dcache.c: avoid soft-lockup in dput()") and 046b961b45f9
|
|
("shrink_dentry_list(): take parent's d_lock earlier") are
|
|
addressing exactly those symptoms.
|
|
|
|
Avoid the trylock loop by using dentry_kill(). When pruning ancestors,
|
|
the same code applies that is used to kill a dentry in dput(). This
|
|
also has the benefit that the locking order is now the same. First
|
|
the inode is locked, then the parent.
|
|
|
|
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
|
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
|
|
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
|
---
|
|
fs/dcache.c | 41 ++++++++++-------------------------------
|
|
1 file changed, 10 insertions(+), 31 deletions(-)
|
|
|
|
--- a/fs/dcache.c
|
|
+++ b/fs/dcache.c
|
|
@@ -992,9 +992,11 @@ EXPORT_SYMBOL(d_prune_aliases);
|
|
|
|
/*
|
|
* Lock a dentry from shrink list.
|
|
+ * Called under rcu_read_lock() and dentry->d_lock; the former
|
|
+ * guarantees that nothing we access will be freed under us.
|
|
* Note that dentry is *not* protected from concurrent dentry_kill(),
|
|
- * d_delete(), etc. It is protected from freeing (by the fact of
|
|
- * being on a shrink list), but everything else is fair game.
|
|
+ * d_delete(), etc.
|
|
+ *
|
|
* Return false if dentry has been disrupted or grabbed, leaving
|
|
* the caller to kick it off-list. Otherwise, return true and have
|
|
* that dentry's inode and parent both locked.
|
|
@@ -1009,7 +1011,6 @@ static bool shrink_lock_dentry(struct de
|
|
|
|
inode = dentry->d_inode;
|
|
if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
|
|
- rcu_read_lock(); /* to protect inode */
|
|
spin_unlock(&dentry->d_lock);
|
|
spin_lock(&inode->i_lock);
|
|
spin_lock(&dentry->d_lock);
|
|
@@ -1018,16 +1019,13 @@ static bool shrink_lock_dentry(struct de
|
|
/* changed inode means that somebody had grabbed it */
|
|
if (unlikely(inode != dentry->d_inode))
|
|
goto out;
|
|
- rcu_read_unlock();
|
|
}
|
|
|
|
parent = dentry->d_parent;
|
|
if (IS_ROOT(dentry) || likely(spin_trylock(&parent->d_lock)))
|
|
return true;
|
|
|
|
- rcu_read_lock(); /* to protect parent */
|
|
spin_unlock(&dentry->d_lock);
|
|
- parent = READ_ONCE(dentry->d_parent);
|
|
spin_lock(&parent->d_lock);
|
|
if (unlikely(parent != dentry->d_parent)) {
|
|
spin_unlock(&parent->d_lock);
|
|
@@ -1035,15 +1033,12 @@ static bool shrink_lock_dentry(struct de
|
|
goto out;
|
|
}
|
|
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
|
|
- if (likely(!dentry->d_lockref.count)) {
|
|
- rcu_read_unlock();
|
|
+ if (likely(!dentry->d_lockref.count))
|
|
return true;
|
|
- }
|
|
spin_unlock(&parent->d_lock);
|
|
out:
|
|
if (inode)
|
|
spin_unlock(&inode->i_lock);
|
|
- rcu_read_unlock();
|
|
return false;
|
|
}
|
|
|
|
@@ -1051,12 +1046,13 @@ static void shrink_dentry_list(struct li
|
|
{
|
|
while (!list_empty(list)) {
|
|
struct dentry *dentry, *parent;
|
|
- struct inode *inode;
|
|
|
|
dentry = list_entry(list->prev, struct dentry, d_lru);
|
|
spin_lock(&dentry->d_lock);
|
|
+ rcu_read_lock();
|
|
if (!shrink_lock_dentry(dentry)) {
|
|
bool can_free = false;
|
|
+ rcu_read_unlock();
|
|
d_shrink_del(dentry);
|
|
if (dentry->d_lockref.count < 0)
|
|
can_free = dentry->d_flags & DCACHE_MAY_FREE;
|
|
@@ -1065,6 +1061,7 @@ static void shrink_dentry_list(struct li
|
|
dentry_free(dentry);
|
|
continue;
|
|
}
|
|
+ rcu_read_unlock();
|
|
d_shrink_del(dentry);
|
|
parent = dentry->d_parent;
|
|
__dentry_kill(dentry);
|
|
@@ -1077,26 +1074,8 @@ static void shrink_dentry_list(struct li
|
|
* fragmentation.
|
|
*/
|
|
dentry = parent;
|
|
- while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) {
|
|
- parent = lock_parent(dentry);
|
|
- if (dentry->d_lockref.count != 1) {
|
|
- dentry->d_lockref.count--;
|
|
- spin_unlock(&dentry->d_lock);
|
|
- if (parent)
|
|
- spin_unlock(&parent->d_lock);
|
|
- break;
|
|
- }
|
|
- inode = dentry->d_inode; /* can't be NULL */
|
|
- if (unlikely(!spin_trylock(&inode->i_lock))) {
|
|
- spin_unlock(&dentry->d_lock);
|
|
- if (parent)
|
|
- spin_unlock(&parent->d_lock);
|
|
- cpu_relax();
|
|
- continue;
|
|
- }
|
|
- __dentry_kill(dentry);
|
|
- dentry = parent;
|
|
- }
|
|
+ while (dentry && !lockref_put_or_lock(&dentry->d_lockref))
|
|
+ dentry = dentry_kill(dentry);
|
|
}
|
|
}
|
|
|