original development tree for Linux kernel GTP module; now long in mainline.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1769 lines
44 KiB

/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* dir.c - Operations for configfs directories.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*
* Based on sysfs:
* sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
*
* configfs Copyright (C) 2005 Oracle. All rights reserved.
*/
#undef DEBUG
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/configfs.h>
#include "configfs_internal.h"
DECLARE_RWSEM(configfs_rename_sem);
/*
* Protects mutations of configfs_dirent linkage together with proper i_mutex
* Also protects mutations of symlinks linkage to target configfs_dirent
* Mutators of configfs_dirent linkage must *both* have the proper inode locked
* and configfs_dirent_lock locked, in that order.
* This allows one to safely traverse configfs_dirent trees and symlinks without
* having to lock inodes.
*
* Protects setting of CONFIGFS_USET_DROPPING: checking the flag
* unlocked is not reliable unless in detach_groups() called from
* rmdir()/unregister() and from configfs_attach_group()
*/
DEFINE_SPINLOCK(configfs_dirent_lock);
static void configfs_d_iput(struct dentry * dentry,
struct inode * inode)
{
struct configfs_dirent *sd = dentry->d_fsdata;
if (sd) {
BUG_ON(sd->s_dentry != dentry);
/* Coordinate with configfs_readdir */
spin_lock(&configfs_dirent_lock);
sd->s_dentry = NULL;
spin_unlock(&configfs_dirent_lock);
configfs_put(sd);
}
iput(inode);
}
/*
* We _must_ delete our dentries on last dput, as the chain-to-parent
* behavior is required to clear the parents of default_groups.
*/
static int configfs_d_delete(const struct dentry *dentry)
{
return 1;
}
const struct dentry_operations configfs_dentry_ops = {
.d_iput = configfs_d_iput,
/* simple_delete_dentry() isn't exported */
.d_delete = configfs_d_delete,
};
configfs: Silence lockdep on mkdir() and rmdir() When attaching default groups (subdirs) of a new group (in mkdir() or in configfs_register()), configfs recursively takes inode&#39;s mutexes along the path from the parent of the new group to the default subdirs. This is needed to ensure that the VFS will not race with operations on these sub-dirs. This is safe for the following reasons: - the VFS allows one to lock first an inode and second one of its children (The lock subclasses for this pattern are respectively I_MUTEX_PARENT and I_MUTEX_CHILD); - from this rule any inode path can be recursively locked in descending order as long as it stays under a single mountpoint and does not follow symlinks. Unfortunately lockdep does not know (yet?) how to handle such recursion. I&#39;ve tried to use Peter Zijlstra&#39;s lock_set_subclass() helper to upgrade i_mutexes from I_MUTEX_CHILD to I_MUTEX_PARENT when we know that we might recursively lock some of their descendant, but this usage does not seem to fit the purpose of lock_set_subclass() because it leads to several i_mutex locked with subclass I_MUTEX_PARENT by the same task. &gt;From inside configfs it is not possible to serialize those recursive locking with a top-level one, because mkdir() and rmdir() are already called with inodes locked by the VFS. So using some mutex_lock_nest_lock() is not an option. I am proposing two solutions: 1) one that wraps recursive mutex_lock()s with lockdep_off()/lockdep_on(). 2) (as suggested earlier by Peter Zijlstra) one that puts the i_mutexes recursively locked in different classes based on their depth from the top-level config_group created. This induces an arbitrary limit (MAX_LOCK_DEPTH - 2 == 46) on the nesting of configfs default groups whenever lockdep is activated but this limit looks reasonably high. Unfortunately, this also isolates VFS operations on configfs default groups from the others and thus lowers the chances to detect locking issues. Nobody likes solution 1), which I can understand. This patch implements solution 2). However lockdep is still not happy with configfs_depend_item(). Next patch reworks the locking of configfs_depend_item() and finally makes lockdep happy. [ Note: This hides a few locking interactions with the VFS from lockdep. That was my big concern, because we like lockdep&#39;s protection. However, the current state always dumps a spurious warning. The locking is correct, so I tell people to ignore the warning and that we&#39;ll keep our eyes on the locking to make sure it stays correct. With this patch, we eliminate the warning. We do lose some of the lockdep protections, but this only means that we still have to keep our eyes on the locking. We&#39;re going to do that anyway. -- Joel ] Signed-off-by: Louis Rilling &lt;louis.rilling@kerlabs.com&gt; Signed-off-by: Joel Becker &lt;joel.becker@oracle.com&gt;
13 years ago
#ifdef CONFIG_LOCKDEP
/*
* Helpers to make lockdep happy with our recursive locking of default groups'
* inodes (see configfs_attach_group() and configfs_detach_group()).
* We put default groups i_mutexes in separate classes according to their depth
* from the youngest non-default group ancestor.
*
* For a non-default group A having default groups A/B, A/C, and A/C/D, default
* groups A/B and A/C will have their inode's mutex in class
* default_group_class[0], and default group A/C/D will be in
* default_group_class[1].
*
* The lock classes are declared and assigned in inode.c, according to the
* s_depth value.
* The s_depth value is initialized to -1, adjusted to >= 0 when attaching
* default groups, and reset to -1 when all default groups are attached. During
* attachment, if configfs_create() sees s_depth > 0, the lock class of the new
* inode's mutex is set to default_group_class[s_depth - 1].
*/
static void configfs_init_dirent_depth(struct configfs_dirent *sd)
{
sd->s_depth = -1;
}
static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd,
struct configfs_dirent *sd)
{
int parent_depth = parent_sd->s_depth;
if (parent_depth >= 0)
sd->s_depth = parent_depth + 1;
}
static void
configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd)
{
/*
* item's i_mutex class is already setup, so s_depth is now only
* used to set new sub-directories s_depth, which is always done
* with item's i_mutex locked.
*/
/*
* sd->s_depth == -1 iff we are a non default group.
* else (we are a default group) sd->s_depth > 0 (see
* create_dir()).
*/
if (sd->s_depth == -1)
/*
* We are a non default group and we are going to create
* default groups.
*/
sd->s_depth = 0;
}
static void
configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd)
{
/* We will not create default groups anymore. */
sd->s_depth = -1;
}
#else /* CONFIG_LOCKDEP */
static void configfs_init_dirent_depth(struct configfs_dirent *sd)
{
}
static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd,
struct configfs_dirent *sd)
{
}
static void
configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd)
{
}
static void
configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd)
{
}
#endif /* CONFIG_LOCKDEP */
/*
* Allocates a new configfs_dirent and links it to the parent configfs_dirent
*/
static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent *parent_sd,
void *element, int type)
{
struct configfs_dirent * sd;
sd = kmem_cache_zalloc(configfs_dir_cachep, GFP_KERNEL);
if (!sd)
return ERR_PTR(-ENOMEM);
atomic_set(&sd->s_count, 1);
INIT_LIST_HEAD(&sd->s_links);
INIT_LIST_HEAD(&sd->s_children);
sd->s_element = element;
sd->s_type = type;
configfs: Silence lockdep on mkdir() and rmdir() When attaching default groups (subdirs) of a new group (in mkdir() or in configfs_register()), configfs recursively takes inode&#39;s mutexes along the path from the parent of the new group to the default subdirs. This is needed to ensure that the VFS will not race with operations on these sub-dirs. This is safe for the following reasons: - the VFS allows one to lock first an inode and second one of its children (The lock subclasses for this pattern are respectively I_MUTEX_PARENT and I_MUTEX_CHILD); - from this rule any inode path can be recursively locked in descending order as long as it stays under a single mountpoint and does not follow symlinks. Unfortunately lockdep does not know (yet?) how to handle such recursion. I&#39;ve tried to use Peter Zijlstra&#39;s lock_set_subclass() helper to upgrade i_mutexes from I_MUTEX_CHILD to I_MUTEX_PARENT when we know that we might recursively lock some of their descendant, but this usage does not seem to fit the purpose of lock_set_subclass() because it leads to several i_mutex locked with subclass I_MUTEX_PARENT by the same task. &gt;From inside configfs it is not possible to serialize those recursive locking with a top-level one, because mkdir() and rmdir() are already called with inodes locked by the VFS. So using some mutex_lock_nest_lock() is not an option. I am proposing two solutions: 1) one that wraps recursive mutex_lock()s with lockdep_off()/lockdep_on(). 2) (as suggested earlier by Peter Zijlstra) one that puts the i_mutexes recursively locked in different classes based on their depth from the top-level config_group created. This induces an arbitrary limit (MAX_LOCK_DEPTH - 2 == 46) on the nesting of configfs default groups whenever lockdep is activated but this limit looks reasonably high. Unfortunately, this also isolates VFS operations on configfs default groups from the others and thus lowers the chances to detect locking issues. Nobody likes solution 1), which I can understand. This patch implements solution 2). However lockdep is still not happy with configfs_depend_item(). Next patch reworks the locking of configfs_depend_item() and finally makes lockdep happy. [ Note: This hides a few locking interactions with the VFS from lockdep. That was my big concern, because we like lockdep&#39;s protection. However, the current state always dumps a spurious warning. The locking is correct, so I tell people to ignore the warning and that we&#39;ll keep our eyes on the locking to make sure it stays correct. With this patch, we eliminate the warning. We do lose some of the lockdep protections, but this only means that we still have to keep our eyes on the locking. We&#39;re going to do that anyway. -- Joel ] Signed-off-by: Louis Rilling &lt;louis.rilling@kerlabs.com&gt; Signed-off-by: Joel Becker &lt;joel.becker@oracle.com&gt;
13 years ago
configfs_init_dirent_depth(sd);
spin_lock(&configfs_dirent_lock);
if (parent_sd->s_type & CONFIGFS_USET_DROPPING) {
spin_unlock(&configfs_dirent_lock);
kmem_cache_free(configfs_dir_cachep, sd);
return ERR_PTR(-ENOENT);
}
list_add(&sd->s_sibling, &parent_sd->s_children);
spin_unlock(&configfs_dirent_lock);
return sd;
}
/*
*
* Return -EEXIST if there is already a configfs element with the same
* name for the same parent.
*
* called with parent inode's i_mutex held
*/
static int configfs_dirent_exists(struct configfs_dirent *parent_sd,
const unsigned char *new)
{
struct configfs_dirent * sd;
list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
if (sd->s_element) {
const unsigned char *existing = configfs_get_name(sd);
if (strcmp(existing, new))
continue;
else
return -EEXIST;
}
}
return 0;
}
int configfs_make_dirent(struct configfs_dirent * parent_sd,
struct dentry * dentry, void * element,
umode_t mode, int type)
{
struct configfs_dirent * sd;
sd = configfs_new_dirent(parent_sd, element, type);
if (IS_ERR(sd))
return PTR_ERR(sd);
sd->s_mode = mode;
sd->s_dentry = dentry;
if (dentry)
dentry->d_fsdata = configfs_get(sd);
return 0;
}
static int init_dir(struct inode * inode)
{
inode->i_op = &configfs_dir_inode_operations;
inode->i_fop = &configfs_dir_operations;
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
return 0;
}
r/o bind mounts: filesystem helpers for custom &#39;struct file&#39;s Why do we need r/o bind mounts? This feature allows a read-only view into a read-write filesystem. In the process of doing that, it also provides infrastructure for keeping track of the number of writers to any given mount. This has a number of uses. It allows chroots to have parts of filesystems writable. It will be useful for containers in the future because users may have root inside a container, but should not be allowed to write to somefilesystems. This also replaces patches that vserver has had out of the tree for several years. It allows security enhancement by making sure that parts of your filesystem read-only (such as when you don&#39;t trust your FTP server), when you don&#39;t want to have entire new filesystems mounted, or when you want atime selectively updated. I&#39;ve been using the following script to test that the feature is working as desired. It takes a directory and makes a regular bind and a r/o bind mount of it. It then performs some normal filesystem operations on the three directories, including ones that are expected to fail, like creating a file on the r/o mount. This patch: Some filesystems forego the vfs and may_open() and create their own &#39;struct file&#39;s. This patch creates a couple of helper functions which can be used by these filesystems, and will provide a unified place which the r/o bind mount code may patch. Also, rename an existing, static-scope init_file() to a less generic name. Signed-off-by: Dave Hansen &lt;haveblue@us.ibm.com&gt; Cc: Christoph Hellwig &lt;hch@lst.de&gt; Signed-off-by: Andrew Morton &lt;akpm@linux-foundation.org&gt; Signed-off-by: Linus Torvalds &lt;torvalds@linux-foundation.org&gt;
14 years ago
static int configfs_init_file(struct inode * inode)
{
inode->i_size = PAGE_SIZE;
inode->i_fop = &configfs_file_operations;
return 0;
}
static int init_symlink(struct inode * inode)
{
inode->i_op = &configfs_symlink_inode_operations;
return 0;
}
static int create_dir(struct config_item * k, struct dentry * p,
struct dentry * d)
{
int error;
umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
if (!error)
error = configfs_make_dirent(p->d_fsdata, d, k, mode,
[PATCH] configfs: Prevent userspace from creating new entries under attaching directories process 1: process 2: configfs_mkdir(&#34;A&#34;) attach_group(&#34;A&#34;) attach_item(&#34;A&#34;) d_instantiate(&#34;A&#34;) populate_groups(&#34;A&#34;) mutex_lock(&#34;A&#34;) attach_group(&#34;A/B&#34;) attach_item(&#34;A&#34;) d_instantiate(&#34;A/B&#34;) mkdir(&#34;A/B/C&#34;) do_path_lookup(&#34;A/B/C&#34;, LOOKUP_PARENT) ok lookup_create(&#34;A/B/C&#34;) mutex_lock(&#34;A/B&#34;) ok configfs_mkdir(&#34;A/B/C&#34;) ok attach_group(&#34;A/C&#34;) attach_item(&#34;A/C&#34;) d_instantiate(&#34;A/C&#34;) populate_groups(&#34;A/C&#34;) mutex_lock(&#34;A/C&#34;) attach_group(&#34;A/C/D&#34;) attach_item(&#34;A/C/D&#34;) failure mutex_unlock(&#34;A/C&#34;) detach_groups(&#34;A/C&#34;) nothing to do mkdir(&#34;A/C/E&#34;) do_path_lookup(&#34;A/C/E&#34;, LOOKUP_PARENT) ok lookup_create(&#34;A/C/E&#34;) mutex_lock(&#34;A/C&#34;) ok configfs_mkdir(&#34;A/C/E&#34;) ok detach_item(&#34;A/C&#34;) d_delete(&#34;A/C&#34;) mutex_unlock(&#34;A&#34;) detach_groups(&#34;A&#34;) mutex_lock(&#34;A/B&#34;) detach_group(&#34;A/B&#34;) detach_groups(&#34;A/B&#34;) nothing since no _default_ group detach_item(&#34;A/B&#34;) mutex_unlock(&#34;A/B&#34;) d_delete(&#34;A/B&#34;) detach_item(&#34;A&#34;) d_delete(&#34;A&#34;) Two bugs: 1/ &#34;A/B/C&#34; and &#34;A/C/E&#34; are created, but never removed while their parent are removed in the end. The same could happen with symlink() instead of mkdir(). 2/ &#34;A&#34; and &#34;A/C&#34; inodes are not locked while detach_item() is called on them, which may probably confuse VFS. This commit fixes 1/, tagging new directories with CONFIGFS_USET_CREATING before building the inode and instantiating the dentry, and validating the whole group+default groups hierarchy in a second pass by clearing CONFIGFS_USET_CREATING. mkdir(), symlink(), lookup(), and dir_open() simply return -ENOENT if called in (or linking to) a directory tagged with CONFIGFS_USET_CREATING. This does not prevent userspace from calling stat() successfuly on such directories, but this prevents userspace from adding (children to | symlinking from/to | read/write attributes of | listing the contents of) not validated items. In other words, userspace will not interact with the subsystem on a new item until the new item creation completes correctly. It was first proposed to re-use CONFIGFS_USET_IN_MKDIR instead of a new flag CONFIGFS_USET_CREATING, but this generated conflicts when checking the target of a new symlink: a valid target directory in the middle of attaching a new user-created child item could be wrongly detected as being attached. 2/ is fixed by next commit. Signed-off-by: Louis Rilling &lt;louis.rilling@kerlabs.com&gt; Signed-off-by: Joel Becker &lt;joel.becker@oracle.com&gt; Signed-off-by: Mark Fasheh &lt;mfasheh@suse.com&gt;
14 years ago
CONFIGFS_DIR | CONFIGFS_USET_CREATING);
if (!error) {
configfs: Silence lockdep on mkdir() and rmdir() When attaching default groups (subdirs) of a new group (in mkdir() or in configfs_register()), configfs recursively takes inode&#39;s mutexes along the path from the parent of the new group to the default subdirs. This is needed to ensure that the VFS will not race with operations on these sub-dirs. This is safe for the following reasons: - the VFS allows one to lock first an inode and second one of its children (The lock subclasses for this pattern are respectively I_MUTEX_PARENT and I_MUTEX_CHILD); - from this rule any inode path can be recursively locked in descending order as long as it stays under a single mountpoint and does not follow symlinks. Unfortunately lockdep does not know (yet?) how to handle such recursion. I&#39;ve tried to use Peter Zijlstra&#39;s lock_set_subclass() helper to upgrade i_mutexes from I_MUTEX_CHILD to I_MUTEX_PARENT when we know that we might recursively lock some of their descendant, but this usage does not seem to fit the purpose of lock_set_subclass() because it leads to several i_mutex locked with subclass I_MUTEX_PARENT by the same task. &gt;From inside configfs it is not possible to serialize those recursive locking with a top-level one, because mkdir() and rmdir() are already called with inodes locked by the VFS. So using some mutex_lock_nest_lock() is not an option. I am proposing two solutions: 1) one that wraps recursive mutex_lock()s with lockdep_off()/lockdep_on(). 2) (as suggested earlier by Peter Zijlstra) one that puts the i_mutexes recursively locked in different classes based on their depth from the top-level config_group created. This induces an arbitrary limit (MAX_LOCK_DEPTH - 2 == 46) on the nesting of configfs default groups whenever lockdep is activated but this limit looks reasonably high. Unfortunately, this also isolates VFS operations on configfs default groups from the others and thus lowers the chances to detect locking issues. Nobody likes solution 1), which I can understand. This patch implements solution 2). However lockdep is still not happy with configfs_depend_item(). Next patch reworks the locking of configfs_depend_item() and finally makes lockdep happy. [ Note: This hides a few locking interactions with the VFS from lockdep. That was my big concern, because we like lockdep&#39;s protection. However, the current state always dumps a spurious warning. The locking is correct, so I tell people to ignore the warning and that we&#39;ll keep our eyes on the locking to make sure it stays correct. With this patch, we eliminate the warning. We do lose some of the lockdep protections, but this only means that we still have to keep our eyes on the locking. We&#39;re going to do that anyway. -- Joel ] Signed-off-by: Louis Rilling &lt;louis.rilling@kerlabs.com&gt; Signed-off-by: Joel Becker &lt;joel.becker@oracle.com&gt;
13 years ago
configfs_set_dir_dirent_depth(p->d_fsdata, d->d_fsdata);
error = configfs_create(d, mode, init_dir);
if (!error) {
inc_nlink(p->d_inode);
} else {
struct configfs_dirent *sd = d->d_fsdata;
if (sd) {
spin_lock(&configfs_dirent_lock);
list_del_init(&sd->s_sibling);
spin_unlock(&configfs_dirent_lock);
configfs_put(sd);
}
}
}
return error;
}
/**
* configfs_create_dir - create a directory for an config_item.
* @item: config_itemwe're creating directory for.
* @dentry: config_item's dentry.
[PATCH] configfs: Prevent userspace from creating new entries under attaching directories process 1: process 2: configfs_mkdir(&#34;A&#34;) attach_group(&#34;A&#34;) attach_item(&#34;A&#34;) d_instantiate(&#34;A&#34;) populate_groups(&#34;A&#34;) mutex_lock(&#34;A&#34;) attach_group(&#34;A/B&#34;) attach_item(&#34;A&#34;) d_instantiate(&#34;A/B&#34;) mkdir(&#34;A/B/C&#34;) do_path_lookup(&#34;A/B/C&#34;, LOOKUP_PARENT) ok lookup_create(&#34;A/B/C&#34;) mutex_lock(&#34;A/B&#34;) ok configfs_mkdir(&#34;A/B/C&#34;) ok attach_group(&#34;A/C&#34;) attach_item(&#34;A/C&#34;) d_instantiate(&#34;A/C&#34;) populate_groups(&#34;A/C&#34;) mutex_lock(&#34;A/C&#34;) attach_group(&#34;A/C/D&#34;) attach_item(&#34;A/C/D&#34;) failure mutex_unlock(&#34;A/C&#34;) detach_groups(&#34;A/C&#34;) nothing to do mkdir(&#34;A/C/E&#34;) do_path_lookup(&#34;A/C/E&#34;, LOOKUP_PARENT) ok lookup_create(&#34;A/C/E&#34;) mutex_lock(&#34;A/C&#34;) ok configfs_mkdir(&#34;A/C/E&#34;) ok detach_item(&#34;A/C&#34;) d_delete(&#34;A/C&#34;) mutex_unlock(&#34;A&#34;) detach_groups(&#34;A&#34;) mutex_lock(&#34;A/B&#34;) detach_group(&#34;A/B&#34;) detach_groups(&#34;A/B&#34;) nothing since no _default_ group detach_item(&#34;A/B&#34;) mutex_unlock(&#34;A/B&#34;) d_delete(&#34;A/B&#34;) detach_item(&#34;A&#34;) d_delete(&#34;A&#34;) Two bugs: 1/ &#34;A/B/C&#34; and &#34;A/C/E&#34; are created, but never removed while their parent are removed in the end. The same could happen with symlink() instead of mkdir(). 2/ &#34;A&#34; and &#34;A/C&#34; inodes are not locked while detach_item() is called on them, which may probably confuse VFS. This commit fixes 1/, tagging new directories with CONFIGFS_USET_CREATING before building the inode and instantiating the dentry, and validating the whole group+default groups hierarchy in a second pass by clearing CONFIGFS_USET_CREATING. mkdir(), symlink(), lookup(), and dir_open() simply return -ENOENT if called in (or linking to) a directory tagged with CONFIGFS_USET_CREATING. This does not prevent userspace from calling stat() successfuly on such directories, but this prevents userspace from adding (children to | symlinking from/to | read/write attributes of | listing the contents of) not validated items. In other words, userspace will not interact with the subsystem on a new item until the new item creation completes correctly. It was first proposed to re-use CONFIGFS_USET_IN_MKDIR instead of a new flag CONFIGFS_USET_CREATING, but this generated conflicts when checking the target of a new symlink: a valid target directory in the middle of attaching a new user-created child item could be wrongly detected as being attached. 2/ is fixed by next commit. Signed-off-by: Louis Rilling &lt;louis.rilling@kerlabs.com&gt; Signed-off-by: Joel Becker &lt;joel.becker@oracle.com&gt; Signed-off-by: Mark Fasheh &lt;mfasheh@suse.com&gt;
14 years ago
*
* Note: user-created entries won't be allowed under this new directory
* until it is validated by configfs_dir_set_ready()
*/
static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
{
struct dentry * parent;
int error = 0;
BUG_ON(!item);
if (item->ci_parent)
parent = item->ci_parent->ci_dentry;
else if (configfs_mount)
parent = configfs_mount->mnt_root;
else
return -EFAULT;
error = create_dir(item,parent,dentry);
if (!error)
item->ci_dentry = dentry;
return error;
}
[PATCH] configfs: Prevent userspace from creating new entries under attaching directories process 1: process 2: configfs_mkdir(&#34;A&#34;) attach_group(&#34;A&#34;) attach_item(&#34;A&#34;) d_instantiate(&#34;A&#34;) populate_groups(&#34;A&#34;) mutex_lock(&#34;A&#34;) attach_group(&#34;A/B&#34;) attach_item(&#34;A&#34;) d_instantiate(&#34;A/B&#34;) mkdir(&#34;A/B/C&#34;) do_path_lookup(&#34;A/B/C&#34;, LOOKUP_PARENT) ok lookup_create(&#34;A/B/C&#34;) mutex_lock(&#34;A/B&#34;) ok configfs_mkdir(&#34;A/B/C&#34;) ok attach_group(&#34;A/C&#34;) attach_item(&#34;A/C&#34;) d_instantiate(&#34;A/C&#34;) populate_groups(&#34;A/C&#34;) mutex_lock(&#34;A/C&#34;) attach_group(&#34;A/C/D&#34;) attach_item(&#34;A/C/D&#34;) failure mutex_unlock(&#34;A/C&#34;) detach_groups(&#34;A/C&#34;) nothing to do mkdir(&#34;A/C/E&#34;) do_path_lookup(&#34;A/C/E&#34;, LOOKUP_PARENT) ok lookup_create(&#34;A/C/E&#34;) mutex_lock(&#34;A/C&#34;) ok configfs_mkdir(&#34;A/C/E&#34;) ok detach_item(&#34;A/C&#34;) d_delete(&#34;A/C&#34;) mutex_unlock(&#34;A&#34;) detach_groups(&#34;A&#34;) mutex_lock(&#34;A/B&#34;) detach_group(&#34;A/B&#34;) detach_groups(&#34;A/B&#34;) nothing since no _default_ group detach_item(&#34;A/B&#34;) mutex_unlock(&#34;A/B&#34;) d_delete(&#34;A/B&#34;) detach_item(&#34;A&#34;) d_delete(&#34;A&#34;) Two bugs: 1/ &#34;A/B/C&#34; and &#34;A/C/E&#34; are created, but never removed while their parent are removed in the end. The same could happen with symlink() instead of mkdir(). 2/ &#34;A&#34; and &#34;A/C&#34; inodes are not locked while detach_item() is called on them, which may probably confuse VFS. This commit fixes 1/, tagging new directories with CONFIGFS_USET_CREATING before building the inode and instantiating the dentry, and validating the whole group+default groups hierarchy in a second pass by clearing CONFIGFS_USET_CREATING. mkdir(), symlink(), lookup(), and dir_open() simply return -ENOENT if called in (or linking to) a directory tagged with CONFIGFS_USET_CREATING. This does not prevent userspace from calling stat() successfuly on such directories, but this prevents userspace from adding (children to | symlinking from/to | read/write attributes of | listing the contents of) not validated items. In other words, userspace will not interact with the subsystem on a new item until the new item creation completes correctly. It was first proposed to re-use CONFIGFS_USET_IN_MKDIR instead of a new flag CONFIGFS_USET_CREATING, but this generated conflicts when checking the target of a new symlink: a valid target directory in the middle of attaching a new user-created child item could be wrongly detected as being attached. 2/ is fixed by next commit. Signed-off-by: Louis Rilling &lt;louis.rilling@kerlabs.com&gt; Signed-off-by: Joel Becker &lt;joel.becker@oracle.com&gt; Signed-off-by: Mark Fasheh &lt;mfasheh@suse.com&gt;
14 years ago
/*
* Allow userspace to create new entries under a new directory created with
* configfs_create_dir(), and under all of its chidlren directories recursively.
* @sd configfs_dirent of the new directory to validate
*
* Caller must hold configfs_dirent_lock.
*/
static void configfs_dir_set_ready(struct configfs_dirent *sd)
{
struct configfs_dirent *child_sd;
sd->s_type &= ~CONFIGFS_USET_CREATING;
list_for_each_entry(child_sd, &sd->s_children, s_sibling)
if (child_sd->s_type & CONFIGFS_USET_CREATING)
configfs_dir_set_ready(child_sd);
}
/*
* Check that a directory does not belong to a directory hierarchy being
* attached and not validated yet.
* @sd configfs_dirent of the directory to check
*
* @return non-zero iff the directory was validated
*
* Note: takes configfs_dirent_lock, so the result may change from false to true
* in two consecutive calls, but never from true to false.
*/
int configfs_dirent_is_ready(struct configfs_dirent *sd)
{
int ret;
spin_lock(&configfs_dirent_lock);
ret = !(sd->s_type & CONFIGFS_USET_CREATING);
spin_unlock(&configfs_dirent_lock);
return ret;
}
int configfs_create_link(struct configfs_symlink *sl,
struct dentry *parent,
struct dentry *dentry)
{
int err = 0;
umode_t mode = S_IFLNK | S_IRWXUGO;
err = configfs_make_dirent(parent->d_fsdata, dentry, sl, mode,
CONFIGFS_ITEM_LINK);
if (!err) {
err = configfs_create(dentry, mode, init_symlink);
if (err) {
struct configfs_dirent *sd = dentry->d_fsdata;
if (sd) {
spin_lock(&configfs_dirent_lock);
list_del_init(&sd->s_sibling);
spin_unlock(&configfs_dirent_lock);
configfs_put(sd);
}
}
}
return err;
}
static void remove_dir(struct dentry * d)
{
struct dentry * parent = dget(d->d_parent);
struct configfs_dirent * sd;
sd = d->d_fsdata;
spin_lock(&configfs_dirent_lock);
list_del_init(&sd->s_sibling);
spin_unlock(&configfs_dirent_lock);
configfs_put(sd);
if (d->d_inode)
simple_rmdir(parent->d_inode,d);
pr_debug(" o %s removing done (%d)\n",d->d_name.name, d->d_count);
dput(parent);
}
/**
* configfs_remove_dir - remove an config_item's directory.
* @item: config_item we're removing.
*
* The only thing special about this is that we remove any files in
* the directory before we remove the directory, and we've inlined
* what used to be configfs_rmdir() below, instead of calling separately.
*
* Caller holds the mutex of the item's inode
*/
static void configfs_remove_dir(struct config_item * item)
{
struct dentry * dentry = dget(item->ci_dentry);
if (!dentry)
return;
remove_dir(dentry);
/**
* Drop reference from dget() on entrance.
*/
dput(dentry);
}
/* attaches attribute's configfs_dirent to the dentry corresponding to the
* attribute file
*/
static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * dentry)
{
struct configfs_attribute * attr = sd->s_element;
int error;
dentry->d_fsdata = configfs_get(sd);
sd->s_dentry = dentry;
r/o bind mounts: filesystem helpers for custom &#39;struct file&#39;s Why do we need r/o bind mounts? This feature allows a read-only view into a read-write filesystem. In the process of doing that, it also provides infrastructure for keeping track of the number of writers to any given mount. This has a number of uses. It allows chroots to have parts of filesystems writable. It will be useful for containers in the future because users may have root inside a container, but should not be allowed to write to somefilesystems. This also replaces patches that vserver has had out of the tree for several years. It allows security enhancement by making sure that parts of your filesystem read-only (such as when you don&#39;t trust your FTP server), when you don&#39;t want to have entire new filesystems mounted, or when you want atime selectively updated. I&#39;ve been using the following script to test that the feature is working as desired. It takes a directory and makes a regular bind and a r/o bind mount of it. It then performs some normal filesystem operations on the three directories, including ones that are expected to fail, like creating a file on the r/o mount. This patch: Some filesystems forego the vfs and may_open() and create their own &#39;struct file&#39;s. This patch creates a couple of helper functions which can be used by these filesystems, and will provide a unified place which the r/o bind mount code may patch. Also, rename an existing, static-scope init_file() to a less generic name. Signed-off-by: Dave Hansen &lt;haveblue@us.ibm.com&gt; Cc: Christoph Hellwig &lt;hch@lst.de&gt; Signed-off-by: Andrew Morton &lt;akpm@linux-foundation.org&gt; Signed-off-by: Linus Torvalds &lt;torvalds@linux-foundation.org&gt;
14 years ago
error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG,
configfs_init_file);
if (error) {
configfs_put(sd);
return error;
}
d_rehash(dentry);
return 0;
}
static struct dentry * configfs_lookup(struct inode *dir,
struct dentry *dentry,
struct nameidata *nd)
{
struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
struct configfs_dirent * sd;
int found = 0;
[PATCH] configfs: Prevent userspace from creating new entries under attaching directories process 1: process 2: configfs_mkdir(&#34;A&#34;) attach_group(&#34;A&#34;) attach_item(&#34;A&#34;) d_instantiate(&#34;A&#34;) populate_groups(&#34;A&#34;) mutex_lock(&#34;A&#34;) attach_group(&#34;A/B&#34;) attach_item(&#34;A&#34;) d_instantiate(&#34;A/B&#34;) mkdir(&#34;A/B/C&#34;) do_path_lookup(&#34;A/B/C&#34;, LOOKUP_PARENT) ok lookup_create(&#34;A/B/C&#34;) mutex_lock(&#34;A/B&#34;) ok configfs_mkdir(&#34;A/B/C&#34;) ok attach_group(&#34;A/C&#34;) attach_item(&#34;A/C&#34;) d_instantiate(&#34;A/C&#34;) populate_groups(&#34;A/C&#34;) mutex_lock(&#34;A/C&#34;) attach_group(&#34;A/C/D&#34;) attach_item(&#34;A/C/D&#34;) failure mutex_unlock(&#34;A/C&#34;) detach_groups(&#34;A/C&#34;) nothing to do mkdir(&#34;A/C/E&#34;) do_path_lookup(&#34;A/C/E&#34;, LOOKUP_PARENT) ok lookup_create(&#34;A/C/E&#34;) mutex_lock(&#34;A/C&#34;) ok configfs_mkdir(&#34;A/C/E&#34;) ok detach_item(&#34;A/C&#34;) d_delete(&#34;A/C&#34;) mutex_unlock(&#34;A&#34;) detach_groups(&#34;A&#34;) mutex_lock(&#34;A/B&#34;) detach_group(&#34;A/B&#34;) detach_groups(&#34;A/B&#34;) nothing since no _default_ group detach_item(&#34;A/B&#34;) mutex_unlock(&#34;A/B&#34;) d_delete(&#34;A/B&#34;) detach_item(&#34;A&#34;) d_delete(&#34;A&#34;) Two bugs: 1/ &#34;A/B/C&#34; and &#34;A/C/E&#34; are created, but never removed while their parent are removed in the end. The same could happen with symlink() instead of mkdir(). 2/ &#34;A&#34; and &#34;A/C&#34; inodes are not locked while detach_item() is called on them, which may probably confuse VFS. This commit fixes 1/, tagging new directories with CONFIGFS_USET_CREATING before building the inode and instantiating the dentry, and validating the whole group+default groups hierarchy in a second pass by clearing CONFIGFS_USET_CREATING. mkdir(), symlink(), lookup(), and dir_open() simply return -ENOENT if called in (or linking to) a directory tagged with CONFIGFS_USET_CREATING. This does not prevent userspace from calling stat() successfuly on such directories, but this prevents userspace from adding (children to | symlinking from/to | read/write attributes of | listing the contents of) not validated items. In other words, userspace will not interact with the subsystem on a new item until the new item creation completes correctly. It was first proposed to re-use CONFIGFS_USET_IN_MKDIR instead of a new flag CONFIGFS_USET_CREATING, but this generated conflicts when checking the target of a new symlink: a valid target directory in the middle of attaching a new user-created child item could be wrongly detected as being attached. 2/ is fixed by next commit. Signed-off-by: Louis Rilling &lt;louis.rilling@kerlabs.com&gt; Signed-off-by: Joel Becker &lt;joel.becker@oracle.com&gt; Signed-off-by: Mark Fasheh &lt;mfasheh@suse.com&gt;
14 years ago
int err;
/*
* Fake invisibility if dir belongs to a group/default groups hierarchy
* being attached
*
* This forbids userspace to read/write attributes of items which may
* not complete their initialization, since the dentries of the
* attributes won't be instantiated.
*/
err = -ENOENT;
if (!configfs_dirent_is_ready(parent_sd))
goto out;
list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
if (sd->s_type & CONFIGFS_NOT_PINNED) {
const unsigned char * name = configfs_get_name(sd);
if (strcmp(name, dentry->d_name.name))
continue;
found = 1;
err = configfs_attach_attr(sd, dentry);
break;
}
}
if (!found) {
/*
* If it doesn't exist and it isn't a NOT_PINNED item,
* it must be negative.
*/
if (dentry->d_name.len > NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);
d_add(dentry, NULL);
return NULL;
}
[PATCH] configfs: Prevent userspace from creating new entries under attaching directories process 1: process 2: configfs_mkdir(&#34;A&#34;) attach_group(&#34;A&#34;) attach_item(&#34;A&#34;) d_instantiate(&#34;A&#34;) populate_groups(&#34;A&#34;) mutex_lock(&#34;A&#34;) attach_group(&#34;A/B&#34;) attach_item(&#34;A&#34;) d_instantiate(&#34;A/B&#34;) mkdir(&#34;A/B/C&#34;) do_path_lookup(&#34;A/B/C&#34;, LOOKUP_PARENT) ok lookup_create(&#34;A/B/C&#34;) mutex_lock(&#34;A/B&#34;) ok configfs_mkdir(&#34;A/B/C&#34;) ok attach_group(&#34;A/C&#34;) attach_item(&#34;A/C&#34;) d_instantiate(&#34;A/C&#34;) populate_groups(&#34;A/C&#34;) mutex_lock(&#34;A/C&#34;) attach_group(&#34;A/C/D&#34;) attach_item(&#34;A/C/D&#34;) failure mutex_unlock(&#34;A/C&#34;) detach_groups(&#34;A/C&#34;) nothing to do mkdir(&#34;A/C/E&#34;) do_path_lookup(&#34;A/C/E&#34;, LOOKUP_PARENT) ok lookup_create(&#34;A/C/E&#34;) mutex_lock(&#34;A/C&#34;) ok configfs_mkdir(&#34;A/C/E&#34;) ok detach_item(&#34;A/C&#34;) d_delete(&#34;A/C&#34;) mutex_unlock(&#34;A&#34;) detach_groups(&#34;A&#34;) mutex_lock(&#34;A/B&#34;) detach_group(&#34;A/B&#34;) detach_groups(&#34;A/B&#34;) nothing since no _default_ group detach_item(&#34;A/B&#34;) mutex_unlock(&#34;A/B&#34;) d_delete(&#34;A/B&#34;) detach_item(&#34;A&#34;) d_delete(&#34;A&#34;) Two bugs: 1/ &#34;A/B/C&#34; and &#34;A/C/E&#34; are created, but never removed while their parent are removed in the end. The same could happen with symlink() instead of mkdir(). 2/ &#34;A&#34; and &#34;A/C&#34; inodes are not locked while detach_item() is called on them, which may probably confuse VFS. This commit fixes 1/, tagging new directories with CONFIGFS_USET_CREATING before building the inode and instantiating the dentry, and validating the whole group+default groups hierarchy in a second pass by clearing CONFIGFS_USET_CREATING. mkdir(), symlink(), lookup(), and dir_open() simply return -ENOENT if called in (or linking to) a directory tagged with CONFIGFS_USET_CREATING. This does not prevent userspace from calling stat() successfuly on such directories, but this prevents userspace from adding (children to | symlinking from/to | read/write attributes of | listing the contents of) not validated items. In other words, userspace will not interact with the subsystem on a new item until the new item creation completes correctly. It was first proposed to re-use CONFIGFS_USET_IN_MKDIR instead of a new flag CONFIGFS_USET_CREATING, but this generated conflicts when checking the target of a new symlink: a valid target directory in the middle of attaching a new user-created child item could be wrongly detected as being attached. 2/ is fixed by next commit. Signed-off-by: Louis Rilling &lt;louis.rilling@kerlabs.com&gt; Signed-off-by: Joel Becker &lt;joel.becker@oracle.com&gt; Signed-off-by: Mark Fasheh &lt;mfasheh@suse.com&gt;
14 years ago
out:
return ERR_PTR(err);
}
/*
* Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are
* attributes and are removed by rmdir(). We recurse, setting
* CONFIGFS_USET_DROPPING on all children that are candidates for
* default detach.
* If there is an error, the caller will reset the flags via
* configfs_detach_rollback().
*/
static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex)
{
struct configfs_dirent *parent_sd = dentry->d_fsdata;
struct configfs_dirent *sd;
int ret;
/* Mark that we're trying to drop the group */
parent_sd->s_type |= CONFIGFS_USET_DROPPING;
ret = -EBUSY;
if (!list_empty(&parent_sd->s_links))
goto out;
ret = 0;
list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
if (!sd->s_element ||
(sd->s_type & CONFIGFS_NOT_PINNED))
continue;
if (sd->s_type & CONFIGFS_USET_DEFAULT) {
/* Abort if racing with mkdir() */
if (sd->s_type & CONFIGFS_USET_IN_MKDIR) {
if (wait_mutex)
*wait_mutex = &sd->s_dentry->d_inode->i_mutex;
return -EAGAIN;
}
/*
* Yup, recursive. If there's a problem, blame
* deep nesting of default_groups
*/
ret = configfs_detach_prep(sd->s_dentry, wait_mutex);
if (!ret)
continue;
} else
ret = -ENOTEMPTY;
break;
}
out:
return ret;
}
/*
* Walk the tree, resetting CONFIGFS_USET_DROPPING wherever it was
* set.
*/
static void configfs_detach_rollback(struct dentry *dentry)
{
struct configfs_dirent *parent_sd = dentry->d_fsdata;
struct configfs_dirent *sd;
parent_sd->s_type &= ~CONFIGFS_USET_DROPPING;
list_for_each_entry(sd, &parent_sd->s_children, s_sibling)
if (sd->s_type & CONFIGFS_USET_DEFAULT)
configfs_detach_rollback(sd->s_dentry);
}
static void detach_attrs(struct config_item * item)
{
struct dentry * dentry = dget(item->ci_dentry);
struct configfs_dirent * parent_sd;
struct configfs_dirent * sd, * tmp;
if (!dentry)
return;
pr_debug("configfs %s: dropping attrs for dir\n",
dentry->d_name.name);
parent_sd = dentry->d_fsdata;
list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) {
if (!sd->s_element || !(sd->s_type & CONFIGFS_NOT_PINNED))
continue;
spin_lock(&configfs_dirent_lock);
list_del_init(&sd->s_sibling);
spin_unlock(&configfs_dirent_lock);
configfs_drop_dentry(sd, dentry);
configfs_put(sd);
}
/**
* Drop reference from dget() on entrance.
*/
dput(dentry);
}
static int populate_attrs(struct config_item *item)
{
struct config_item_type *t = item->ci_type;
struct configfs_attribute *attr;
int error = 0;
int i;
if (!t)
return -EINVAL;
if (t->ct_attrs) {
for (i = 0; (attr = t->ct_attrs[i]) != NULL; i++) {
if ((error = configfs_create_file(item, attr)))
break;
}
}
if (error)
detach_attrs(item);
return error;
}
static int configfs_attach_group(struct config_item *parent_item,
struct config_item *item,
struct dentry *dentry);
static void configfs_detach_group(struct config_item *item);
static void detach_groups(struct config_group *group)
{
struct dentry * dentry = dget(group->cg_item.ci_dentry);
struct dentry *child;
struct configfs_dirent *parent_sd;
struct configfs_dirent *sd, *tmp;
if (!dentry)
return;
parent_sd = dentry->d_fsdata;
list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) {
if (!sd->s_element ||
!(sd->s_type & CONFIGFS_USET_DEFAULT))
continue;
child = sd->s_dentry;
mutex_lock(&child->d_inode->i_mutex);
configfs_detach_group(sd->s_element);
child->d_inode->i_flags |= S_DEAD;
dont_mount(child);
mutex_unlock(&child->d_inode->i_mutex);
d_delete(child);
dput(child);
}
/**
* Drop reference from dget() on entrance.
*/
dput(dentry);
}
/*
* This fakes mkdir(2) on a default_groups[] entry. It
* creates a dentry, attachs it, and then does fixup
* on the sd->s_type.
*
* We could, perhaps, tweak our parent's ->mkdir for a minute and
* try using vfs_mkdir. Just a thought.
*/
static int create_default_group(struct config_group *parent_group,
struct config_group *group)
{
int ret;
struct qstr name;
struct configfs_dirent *sd;
/* We trust the caller holds a reference to parent */
struct dentry *child, *parent = parent_group->cg_item.ci_dentry;
if (!group->cg_item.ci_name)
group->cg_item.ci_name = group->cg_item.ci_namebuf;
name.name = group->cg_item.ci_name;
name.len = strlen(name.name);
name.hash = full_name_hash(name.name, name.len);
ret = -ENOMEM;
child = d_alloc(parent, &name);
if (child) {
d_add(child, NULL);
ret = configfs_attach_group(&parent_group->cg_item,
&group->cg_item, child);
if (!ret) {
sd = child->d_fsdata;
sd->s_type |= CONFIGFS_USET_DEFAULT;
} else {
BUG_ON(child->d_inode);
d_drop(child);
dput(child);
}
}
return ret;
}
static int populate_groups(struct config_group *group)
{
struct config_group *new_group;
int ret = 0;
int i;
if (group->default_groups) {
for (i = 0; group->default_groups[i]; i++) {
new_group = group->default_groups[i];
ret = create_default_group(group, new_group);
if (ret) {
detach_groups(group);
break;
}
}
}
return ret;
}
/*
* All of link_obj/unlink_obj/link_group/unlink_group require that
* subsys->su_mutex is held.
*/
static void unlink_obj(struct config_item *item)
{
struct config_group *group;
group = item->ci_group;
if (group) {
list_del_init(&item->ci_entry);
item->ci_group = NULL;
item->ci_parent = NULL;
/* Drop the reference for ci_entry */
config_item_put(item);
/* Drop the reference for ci_parent */
config_group_put(group);
}
}
static void link_obj(struct config_item *parent_item, struct config_item *item)
{
/*
* Parent seems redundant with group, but it makes certain
* traversals much nicer.
*/
item->ci_parent = parent_item;
/*
* We hold a reference on the parent for the child's ci_parent