vfs: Fix possible escape from mount namespace (CVE-2015-2925)

This commit is contained in:
Ben Hutchings 2015-09-19 03:15:21 +02:00
parent dd220fc567
commit 5932de869a
5 changed files with 235 additions and 0 deletions

5
debian/changelog vendored
View File

@ -8,6 +8,11 @@ linux (4.2-1~exp2) UNRELEASED; urgency=medium
directly
* workqueue: Make flush_workqueue() available again to non GPL modules
(Closes: #798311)
* vfs: Fix possible escape from mount namespace (CVE-2015-2925):
- namei: lift (open-coded) terminate_walk() in follow_dotdot_rcu() into
callers
- dcache: Handle escaped paths in prepend_path
- vfs: Test for and handle paths that are unreachable from their mnt_root
[ Aurelien Jarno ]
* [mips*el] Fix BPF assembly code for pre-R2 CPUs. (fixes FTBFS)

View File

@ -0,0 +1,58 @@
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 15 Aug 2015 13:36:12 -0500
Subject: dcache: Handle escaped paths in prepend_path
Origin: https://git.kernel.org/linus/cde93be45a8a90d8c264c776fab63487b5038a65
A rename can result in a dentry that by walking up d_parent
will never reach it's mnt_root. For lack of a better term
I call this an escaped path.
prepend_path is called by four different functions __d_path,
d_absolute_path, d_path, and getcwd.
__d_path only wants to see paths are connected to the root it passes
in. So __d_path needs prepend_path to return an error.
d_absolute_path similarly wants to see paths that are connected to
some root. Escaped paths are not connected to any mnt_root so
d_absolute_path needs prepend_path to return an error greater
than 1. So escaped paths will be treated like paths on lazily
unmounted mounts.
getcwd needs to prepend "(unreachable)" so getcwd also needs
prepend_path to return an error.
d_path is the interesting hold out. d_path just wants to print
something, and does not care about the weird cases. Which raises
the question what should be printed?
Given that <escaped_path>/<anything> should result in -ENOENT I
believe it is desirable for escaped paths to be printed as empty
paths. As there are not really any meaninful path components when
considered from the perspective of a mount tree.
So tweak prepend_path to return an empty path with an new error
code of 3 when it encounters an escaped path.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
fs/dcache.c | 7 +++++++
1 file changed, 7 insertions(+)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2927,6 +2927,13 @@ restart:
if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
struct mount *parent = ACCESS_ONCE(mnt->mnt_parent);
+ /* Escaped? */
+ if (dentry != vfsmnt->mnt_root) {
+ bptr = *buffer;
+ blen = *buflen;
+ error = 3;
+ break;
+ }
/* Global root? */
if (mnt != parent) {
dentry = ACCESS_ONCE(mnt->mnt_mountpoint);

View File

@ -0,0 +1,65 @@
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 15 Aug 2015 13:36:41 -0500
Subject: dcache: Reduce the scope of i_lock in d_splice_alias
Origin: https://git.kernel.org/linus/a03e283bf5c3d4851b4998122196ce9f849e6dfb
i_lock is only needed until __d_find_any_alias calls dget on the alias
dentry. After that the reference to new ensures that dentry_kill and
d_delete will not remove the inode from the dentry, and remove the
dentry from the inode->d_entry list.
The inode i_lock came to be held over the the __d_move calls in
d_splice_alias through a series of introduction of locks with
increasing smaller scope. First it was the dcache_lock, then
it was the dcache_inode_lock, and finally inode->i_lock.
Furthermore inode->i_lock is not held over any other calls
to d_move or __d_move so it can not provide any meaningful
rename protection.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
fs/dcache.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2719,7 +2719,7 @@ struct dentry *d_ancestor(struct dentry
* This helper attempts to cope with remotely renamed directories
*
* It assumes that the caller is already holding
- * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock
+ * dentry->d_parent->d_inode->i_mutex, and rename_lock
*
* Note: If ever the locking in lock_rename() changes, then please
* remember to update this too...
@@ -2745,7 +2745,6 @@ out_unalias:
__d_move(alias, dentry, false);
ret = 0;
out_err:
- spin_unlock(&inode->i_lock);
if (m2)
mutex_unlock(m2);
if (m1)
@@ -2791,10 +2790,11 @@ struct dentry *d_splice_alias(struct ino
if (S_ISDIR(inode->i_mode)) {
struct dentry *new = __d_find_any_alias(inode);
if (unlikely(new)) {
+ /* The reference to new ensures it remains an alias */
+ spin_unlock(&inode->i_lock);
write_seqlock(&rename_lock);
if (unlikely(d_ancestor(new, dentry))) {
write_sequnlock(&rename_lock);
- spin_unlock(&inode->i_lock);
dput(new);
new = ERR_PTR(-ELOOP);
pr_warn_ratelimited(
@@ -2813,7 +2813,6 @@ struct dentry *d_splice_alias(struct ino
} else {
__d_move(new, dentry, false);
write_sequnlock(&rename_lock);
- spin_unlock(&inode->i_lock);
security_d_instantiate(new, inode);
}
iput(inode);

View File

@ -0,0 +1,104 @@
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 15 Aug 2015 20:27:13 -0500
Subject: vfs: Test for and handle paths that are unreachable from their mnt_root
Origin: https://git.kernel.org/linus/397d425dc26da728396e66d392d5dcb8dac30c37
In rare cases a directory can be renamed out from under a bind mount.
In those cases without special handling it becomes possible to walk up
the directory tree to the root dentry of the filesystem and down
from the root dentry to every other file or directory on the filesystem.
Like division by zero .. from an unconnected path can not be given
a useful semantic as there is no predicting at which path component
the code will realize it is unconnected. We certainly can not match
the current behavior as the current behavior is a security hole.
Therefore when encounting .. when following an unconnected path
return -ENOENT.
- Add a function path_connected to verify path->dentry is reachable
from path->mnt.mnt_root. AKA to validate that rename did not do
something nasty to the bind mount.
To avoid races path_connected must be called after following a path
component to it's next path component.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
fs/namei.c | 27 +++++++++++++++++++++++++--
1 file changed, 25 insertions(+), 2 deletions(-)
diff --git a/fs/namei.c b/fs/namei.c
index 1c2105e..29b9279 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -560,6 +560,24 @@ static int __nd_alloc_stack(struct nameidata *nd)
return 0;
}
+/**
+ * path_connected - Verify that a path->dentry is below path->mnt.mnt_root
+ * @path: nameidate to verify
+ *
+ * Rename can sometimes move a file or directory outside of a bind
+ * mount, path_connected allows those cases to be detected.
+ */
+static bool path_connected(const struct path *path)
+{
+ struct vfsmount *mnt = path->mnt;
+
+ /* Only bind mounts can have disconnected paths */
+ if (mnt->mnt_root == mnt->mnt_sb->s_root)
+ return true;
+
+ return is_subdir(path->dentry, mnt->mnt_root);
+}
+
static inline int nd_alloc_stack(struct nameidata *nd)
{
if (likely(nd->depth != EMBEDDED_LEVELS))
@@ -1296,6 +1314,8 @@ static int follow_dotdot_rcu(struct nameidata *nd)
return -ECHILD;
nd->path.dentry = parent;
nd->seq = seq;
+ if (unlikely(!path_connected(&nd->path)))
+ return -ENOENT;
break;
} else {
struct mount *mnt = real_mount(nd->path.mnt);
@@ -1396,7 +1416,7 @@ static void follow_mount(struct path *path)
}
}
-static void follow_dotdot(struct nameidata *nd)
+static int follow_dotdot(struct nameidata *nd)
{
if (!nd->root.mnt)
set_root(nd);
@@ -1412,6 +1432,8 @@ static void follow_dotdot(struct nameidata *nd)
/* rare case of legitimate dget_parent()... */
nd->path.dentry = dget_parent(nd->path.dentry);
dput(old);
+ if (unlikely(!path_connected(&nd->path)))
+ return -ENOENT;
break;
}
if (!follow_up(&nd->path))
@@ -1419,6 +1441,7 @@ static void follow_dotdot(struct nameidata *nd)
}
follow_mount(&nd->path);
nd->inode = nd->path.dentry->d_inode;
+ return 0;
}
/*
@@ -1634,7 +1657,7 @@ static inline int handle_dots(struct nameidata *nd, int type)
if (nd->flags & LOOKUP_RCU) {
return follow_dotdot_rcu(nd);
} else
- follow_dotdot(nd);
+ return follow_dotdot(nd);
}
return 0;
}

View File

@ -91,3 +91,6 @@ features/all/grsecurity/grkernsec_perf_harden.patch
bugfix/all/gfs2-make-statistics-unsigned-suitable-for-use-with-.patch
bugfix/all/workqueue-make-flush_workqueue-available-again-to-no.patch
bugfix/all/dcache-handle-escaped-paths-in-prepend_path.patch
bugfix/all/dcache-reduce-the-scope-of-i_lock-in-d_splice_alias.patch
bugfix/all/vfs-test-for-and-handle-paths-that-are-unreachable-f.patch