From 3e25eb9c4bb649acdddb333d10774b640190f727 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Jan 2012 10:20:35 -0500 Subject: [PATCH 1/9] securityfs: fix object creation races inode needs to be fully set up before we feed it to d_instantiate(). securityfs_create_file() does *not* do so; it sets ->i_fop and ->i_private only after we'd exposed the inode. Unfortunately, that's done fairly deep in call chain, so the amount of churn is considerable. Helper functions killed by substituting into their solitary call sites, dead code removed. We finally can bury default_file_ops, now that the final value of ->i_fop is available (and assigned) at the point where inode is allocated. Reviewed-by: James Morris Signed-off-by: Al Viro --- security/inode.c | 193 +++++++++++++---------------------------------- 1 file changed, 51 insertions(+), 142 deletions(-) diff --git a/security/inode.c b/security/inode.c index 90a70a67d835..43ce6e19015f 100644 --- a/security/inode.c +++ b/security/inode.c @@ -25,100 +25,6 @@ static struct vfsmount *mount; static int mount_count; -/* - * TODO: - * I think I can get rid of these default_file_ops, but not quite sure... - */ -static ssize_t default_read_file(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - return 0; -} - -static ssize_t default_write_file(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - return count; -} - -static int default_open(struct inode *inode, struct file *file) -{ - if (inode->i_private) - file->private_data = inode->i_private; - - return 0; -} - -static const struct file_operations default_file_ops = { - .read = default_read_file, - .write = default_write_file, - .open = default_open, - .llseek = noop_llseek, -}; - -static struct inode *get_inode(struct super_block *sb, umode_t mode, dev_t dev) -{ - struct inode *inode = new_inode(sb); - - if (inode) { - inode->i_ino = get_next_ino(); - inode->i_mode = mode; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - switch (mode & S_IFMT) { - default: - init_special_inode(inode, mode, dev); - break; - case S_IFREG: - inode->i_fop = &default_file_ops; - break; - case S_IFDIR: - inode->i_op = &simple_dir_inode_operations; - inode->i_fop = &simple_dir_operations; - - /* directory inodes start off with i_nlink == 2 (for "." entry) */ - inc_nlink(inode); - break; - } - } - return inode; -} - -/* SMP-safe */ -static int mknod(struct inode *dir, struct dentry *dentry, - umode_t mode, dev_t dev) -{ - struct inode *inode; - int error = -ENOMEM; - - if (dentry->d_inode) - return -EEXIST; - - inode = get_inode(dir->i_sb, mode, dev); - if (inode) { - d_instantiate(dentry, inode); - dget(dentry); - error = 0; - } - return error; -} - -static int mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) -{ - int res; - - mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR; - res = mknod(dir, dentry, mode, 0); - if (!res) - inc_nlink(dir); - return res; -} - -static int create(struct inode *dir, struct dentry *dentry, umode_t mode) -{ - mode = (mode & S_IALLUGO) | S_IFREG; - return mknod(dir, dentry, mode, 0); -} - static inline int positive(struct dentry *dentry) { return dentry->d_inode && !d_unhashed(dentry); @@ -145,38 +51,6 @@ static struct file_system_type fs_type = { .kill_sb = kill_litter_super, }; -static int create_by_name(const char *name, umode_t mode, - struct dentry *parent, - struct dentry **dentry) -{ - int error = 0; - - *dentry = NULL; - - /* If the parent is not specified, we create it in the root. - * We need the root dentry to do this, which is in the super - * block. A pointer to that is in the struct vfsmount that we - * have around. - */ - if (!parent) - parent = mount->mnt_root; - - mutex_lock(&parent->d_inode->i_mutex); - *dentry = lookup_one_len(name, parent, strlen(name)); - if (!IS_ERR(*dentry)) { - if (S_ISDIR(mode)) - error = mkdir(parent->d_inode, *dentry, mode); - else - error = create(parent->d_inode, *dentry, mode); - if (error) - dput(*dentry); - } else - error = PTR_ERR(*dentry); - mutex_unlock(&parent->d_inode->i_mutex); - - return error; -} - /** * securityfs_create_file - create a file in the securityfs filesystem * @@ -209,31 +83,66 @@ struct dentry *securityfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops) { - struct dentry *dentry = NULL; + struct dentry *dentry; + int is_dir = S_ISDIR(mode); + struct inode *dir, *inode; int error; + if (!is_dir) { + BUG_ON(!fops); + mode = (mode & S_IALLUGO) | S_IFREG; + } + pr_debug("securityfs: creating file '%s'\n",name); error = simple_pin_fs(&fs_type, &mount, &mount_count); - if (error) { - dentry = ERR_PTR(error); - goto exit; - } + if (error) + return ERR_PTR(error); - error = create_by_name(name, mode, parent, &dentry); - if (error) { - dentry = ERR_PTR(error); - simple_release_fs(&mount, &mount_count); - goto exit; - } + if (!parent) + parent = mount->mnt_root; + + dir = parent->d_inode; + + mutex_lock(&dir->i_mutex); + dentry = lookup_one_len(name, parent, strlen(name)); + if (IS_ERR(dentry)) + goto out; if (dentry->d_inode) { - if (fops) - dentry->d_inode->i_fop = fops; - if (data) - dentry->d_inode->i_private = data; + error = -EEXIST; + goto out1; } -exit: + + inode = new_inode(dir->i_sb); + if (!inode) { + error = -ENOMEM; + goto out1; + } + + inode->i_ino = get_next_ino(); + inode->i_mode = mode; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_private = data; + if (is_dir) { + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + inc_nlink(inode); + inc_nlink(dir); + } else { + inode->i_fop = fops; + } + d_instantiate(dentry, inode); + dget(dentry); + mutex_unlock(&dir->i_mutex); + return dentry; + +out1: + dput(dentry); + dentry = ERR_PTR(error); +out: + mutex_unlock(&dir->i_mutex); + simple_release_fs(&mount, &mount_count); return dentry; } EXPORT_SYMBOL_GPL(securityfs_create_file); From 0b2c4e39c014219ef73f05ab580c284bf8e6af0a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Jan 2012 10:46:03 -0500 Subject: [PATCH 2/9] coda: deal correctly with allocation failure from coda_cnode_makectl() lookup should fail with ENOMEM, not silently make dentry negative. Switched to saner calling conventions, while we are at it. Signed-off-by: Al Viro --- fs/coda/cnode.c | 21 +++++++++------------ fs/coda/coda_fs_i.h | 2 +- fs/coda/dir.c | 4 ++-- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c index 6475877b0763..8af67c9c47dd 100644 --- a/fs/coda/cnode.c +++ b/fs/coda/cnode.c @@ -156,19 +156,16 @@ struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb) } /* the CONTROL inode is made without asking attributes from Venus */ -int coda_cnode_makectl(struct inode **inode, struct super_block *sb) +struct inode *coda_cnode_makectl(struct super_block *sb) { - int error = -ENOMEM; - - *inode = new_inode(sb); - if (*inode) { - (*inode)->i_ino = CTL_INO; - (*inode)->i_op = &coda_ioctl_inode_operations; - (*inode)->i_fop = &coda_ioctl_operations; - (*inode)->i_mode = 0444; - error = 0; + struct inode *inode = new_inode(sb); + if (inode) { + inode->i_ino = CTL_INO; + inode->i_op = &coda_ioctl_inode_operations; + inode->i_fop = &coda_ioctl_operations; + inode->i_mode = 0444; + return inode; } - - return error; + return ERR_PTR(-ENOMEM); } diff --git a/fs/coda/coda_fs_i.h b/fs/coda/coda_fs_i.h index e35071b1de0e..1c17446f1afe 100644 --- a/fs/coda/coda_fs_i.h +++ b/fs/coda/coda_fs_i.h @@ -51,7 +51,7 @@ struct coda_file_info { int coda_cnode_make(struct inode **, struct CodaFid *, struct super_block *); struct inode *coda_iget(struct super_block *sb, struct CodaFid *fid, struct coda_vattr *attr); -int coda_cnode_makectl(struct inode **inode, struct super_block *sb); +struct inode *coda_cnode_makectl(struct super_block *sb); struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb); void coda_replace_fid(struct inode *, struct CodaFid *, struct CodaFid *); diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 83d2fd8ec24b..df0f9c1b01d3 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -111,7 +111,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc /* control object, create inode on the fly */ if (coda_isroot(dir) && coda_iscontrol(name, length)) { - error = coda_cnode_makectl(&inode, dir->i_sb); + inode = coda_cnode_makectl(dir->i_sb); type = CODA_NOCACHE; goto exit; } @@ -125,7 +125,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc return ERR_PTR(error); exit: - if (inode && (type & CODA_NOCACHE)) + if (inode && !IS_ERR(inode) && (type & CODA_NOCACHE)) coda_flag_inode(inode, C_VATTR | C_PURGE); return d_splice_alias(inode, entry); From f4947fbce208990266920d51837e4e7ba9779db1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Jan 2012 11:11:49 -0500 Subject: [PATCH 3/9] coda: switch coda_cnode_make() to sane API as well, clean coda_lookup() Signed-off-by: Al Viro --- fs/coda/cnode.c | 17 +++++++---------- fs/coda/coda_fs_i.h | 2 +- fs/coda/dir.c | 29 +++++++++++++---------------- fs/coda/inode.c | 10 ++++++---- 4 files changed, 27 insertions(+), 31 deletions(-) diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c index 8af67c9c47dd..911cf30d057d 100644 --- a/fs/coda/cnode.c +++ b/fs/coda/cnode.c @@ -88,24 +88,21 @@ struct inode * coda_iget(struct super_block * sb, struct CodaFid * fid, - link the two up if this is needed - fill in the attributes */ -int coda_cnode_make(struct inode **inode, struct CodaFid *fid, struct super_block *sb) +struct inode *coda_cnode_make(struct CodaFid *fid, struct super_block *sb) { struct coda_vattr attr; + struct inode *inode; int error; /* We get inode numbers from Venus -- see venus source */ error = venus_getattr(sb, fid, &attr); - if ( error ) { - *inode = NULL; - return error; - } + if (error) + return ERR_PTR(error); - *inode = coda_iget(sb, fid, &attr); - if ( IS_ERR(*inode) ) { + inode = coda_iget(sb, fid, &attr); + if (IS_ERR(inode)) printk("coda_cnode_make: coda_iget failed\n"); - return PTR_ERR(*inode); - } - return 0; + return inode; } diff --git a/fs/coda/coda_fs_i.h b/fs/coda/coda_fs_i.h index 1c17446f1afe..b24fdfd8a3f0 100644 --- a/fs/coda/coda_fs_i.h +++ b/fs/coda/coda_fs_i.h @@ -49,7 +49,7 @@ struct coda_file_info { #define C_DYING 0x4 /* from venus (which died) */ #define C_PURGE 0x8 -int coda_cnode_make(struct inode **, struct CodaFid *, struct super_block *); +struct inode *coda_cnode_make(struct CodaFid *, struct super_block *); struct inode *coda_iget(struct super_block *sb, struct CodaFid *fid, struct coda_vattr *attr); struct inode *coda_cnode_makectl(struct super_block *sb); struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb); diff --git a/fs/coda/dir.c b/fs/coda/dir.c index df0f9c1b01d3..177515829062 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -96,12 +96,11 @@ const struct file_operations coda_dir_operations = { /* access routines: lookup, readlink, permission */ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struct nameidata *nd) { - struct inode *inode = NULL; - struct CodaFid resfid = { { 0, } }; - int type = 0; - int error = 0; + struct super_block *sb = dir->i_sb; const char *name = entry->d_name.name; size_t length = entry->d_name.len; + struct inode *inode; + int type = 0; if (length > CODA_MAXNAMLEN) { printk(KERN_ERR "name too long: lookup, %s (%*s)\n", @@ -111,23 +110,21 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc /* control object, create inode on the fly */ if (coda_isroot(dir) && coda_iscontrol(name, length)) { - inode = coda_cnode_makectl(dir->i_sb); + inode = coda_cnode_makectl(sb); type = CODA_NOCACHE; - goto exit; + } else { + struct CodaFid fid = { { 0, } }; + int error = venus_lookup(sb, coda_i2f(dir), name, length, + &type, &fid); + inode = !error ? coda_cnode_make(&fid, sb) : ERR_PTR(error); } - error = venus_lookup(dir->i_sb, coda_i2f(dir), name, length, - &type, &resfid); - if (!error) - error = coda_cnode_make(&inode, &resfid, dir->i_sb); - - if (error && error != -ENOENT) - return ERR_PTR(error); - -exit: - if (inode && !IS_ERR(inode) && (type & CODA_NOCACHE)) + if (!IS_ERR(inode) && (type & CODA_NOCACHE)) coda_flag_inode(inode, C_VATTR | C_PURGE); + if (inode == ERR_PTR(-ENOENT)) + inode = NULL; + return d_splice_alias(inode, entry); } diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 1c08a8cd673a..5e2e1b3f068d 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -204,10 +204,12 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) printk("coda_read_super: rootfid is %s\n", coda_f2s(&fid)); /* make root inode */ - error = coda_cnode_make(&root, &fid, sb); - if ( error || !root ) { - printk("Failure of coda_cnode_make for root: error %d\n", error); - goto error; + root = coda_cnode_make(&fid, sb); + if (IS_ERR(root)) { + error = PTR_ERR(root); + printk("Failure of coda_cnode_make for root: error %d\n", error); + root = NULL; + goto error; } printk("coda_read_super: rootinode is %ld dev %s\n", From eaf5f9073533cde21c7121c136f1c3f072d9cf59 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 10 Jan 2012 18:22:25 +0100 Subject: [PATCH 4/9] fix shrink_dcache_parent() livelock Two (or more) concurrent calls of shrink_dcache_parent() on the same dentry may cause shrink_dcache_parent() to loop forever. Here's what appears to happen: 1 - CPU0: select_parent(P) finds C and puts it on dispose list, returns 1 2 - CPU1: select_parent(P) locks P->d_lock 3 - CPU0: shrink_dentry_list() locks C->d_lock dentry_kill(C) tries to lock P->d_lock but fails, unlocks C->d_lock 4 - CPU1: select_parent(P) locks C->d_lock, moves C from dispose list being processed on CPU0 to the new dispose list, returns 1 5 - CPU0: shrink_dentry_list() finds dispose list empty, returns 6 - Goto 2 with CPU0 and CPU1 switched Basically select_parent() steals the dentry from shrink_dentry_list() and thinks it found a new one, causing shrink_dentry_list() to think it's making progress and loop over and over. One way to trigger this is to make udev calls stat() on the sysfs file while it is going away. Having a file in /lib/udev/rules.d/ with only this one rule seems to the trick: ATTR{vendor}=="0x8086", ATTR{device}=="0x10ca", ENV{PCI_SLOT_NAME}="%k", ENV{MATCHADDR}="$attr{address}", RUN+="/bin/true" Then execute the following loop: while true; do echo -bond0 > /sys/class/net/bonding_masters echo +bond0 > /sys/class/net/bonding_masters echo -bond1 > /sys/class/net/bonding_masters echo +bond1 > /sys/class/net/bonding_masters done One fix would be to check all callers and prevent concurrent calls to shrink_dcache_parent(). But I think a better solution is to stop the stealing behavior. This patch adds a new dentry flag that is set when the dentry is added to the dispose list. The flag is cleared in dentry_lru_del() in case the dentry gets a new reference just before being pruned. If the dentry has this flag, select_parent() will skip it and let shrink_dentry_list() retry pruning it. With select_parent() skipping those dentries there will not be the appearance of progress (new dentries found) when there is none, hence shrink_dcache_parent() will not loop forever. Set the flag is also set in prune_dcache_sb() for consistency as suggested by Linus. Signed-off-by: Miklos Szeredi CC: stable@vger.kernel.org Signed-off-by: Al Viro --- fs/dcache.c | 17 ++++++++++++----- include/linux/dcache.h | 1 + 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 3c6d3113a255..616fedff011a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -243,6 +243,7 @@ static void dentry_lru_add(struct dentry *dentry) static void __dentry_lru_del(struct dentry *dentry) { list_del_init(&dentry->d_lru); + dentry->d_flags &= ~DCACHE_SHRINK_LIST; dentry->d_sb->s_nr_dentry_unused--; dentry_stat.nr_unused--; } @@ -806,6 +807,7 @@ void prune_dcache_sb(struct super_block *sb, int count) spin_unlock(&dentry->d_lock); } else { list_move_tail(&dentry->d_lru, &tmp); + dentry->d_flags |= DCACHE_SHRINK_LIST; spin_unlock(&dentry->d_lock); if (!--count) break; @@ -1097,14 +1099,19 @@ static int select_parent(struct dentry *parent, struct list_head *dispose) /* * move only zero ref count dentries to the dispose list. + * + * Those which are presently on the shrink list, being processed + * by shrink_dentry_list(), shouldn't be moved. Otherwise the + * loop in shrink_dcache_parent() might not make any progress + * and loop forever. */ - if (!dentry->d_count) { - dentry_lru_move_list(dentry, dispose); - found++; - } else { + if (dentry->d_count) { dentry_lru_del(dentry); + } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { + dentry_lru_move_list(dentry, dispose); + dentry->d_flags |= DCACHE_SHRINK_LIST; + found++; } - /* * We can return to the caller if we have found some (this * ensures forward progress). We'll be coming back to find diff --git a/include/linux/dcache.h b/include/linux/dcache.h index a47bda5f76db..31f73220e7d7 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -203,6 +203,7 @@ struct dentry_operations { #define DCACHE_CANT_MOUNT 0x0100 #define DCACHE_GENOCIDE 0x0200 +#define DCACHE_SHRINK_LIST 0x0400 #define DCACHE_NFSFS_RENAMED 0x1000 /* this dentry has been "silly renamed" and has to be deleted on the last From ace8577aeb438025ecf642f5eda3aa551d251951 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 10 Jan 2012 02:43:59 +0300 Subject: [PATCH 5/9] block_dev: Suppress bdev_cache_init() kmemleak warninig Kmemleak reports the following warning in bdev_cache_init() [ 0.003738] kmemleak: Object 0xffff880153035200 (size 256): [ 0.003823] kmemleak: comm "swapper/0", pid 0, jiffies 4294667299 [ 0.003909] kmemleak: min_count = 1 [ 0.003988] kmemleak: count = 0 [ 0.004066] kmemleak: flags = 0x1 [ 0.004144] kmemleak: checksum = 0 [ 0.004224] kmemleak: backtrace: [ 0.004303] [] kmemleak_alloc+0x21/0x3e [ 0.004446] [] kmem_cache_alloc+0xca/0x1dc [ 0.004592] [] alloc_vfsmnt+0x1f/0x198 [ 0.004736] [] vfs_kern_mount+0x36/0xd2 [ 0.004879] [] kern_mount_data+0x18/0x32 [ 0.005025] [] bdev_cache_init+0x51/0x81 [ 0.005169] [] vfs_caches_init+0x101/0x10d [ 0.005313] [] start_kernel+0x344/0x383 [ 0.005456] [] x86_64_start_reservations+0xae/0xb2 [ 0.005602] [] x86_64_start_kernel+0x102/0x111 [ 0.005747] [] 0xffffffffffffffff [ 0.008653] kmemleak: Trying to color unknown object at 0xffff880153035220 as Grey [ 0.008754] Pid: 0, comm: swapper/0 Not tainted 3.3.0-rc0-dbg-04200-g8180888-dirty #888 [ 0.008856] Call Trace: [ 0.008934] [] ? find_and_get_object+0x44/0x118 [ 0.009023] [] paint_ptr+0x57/0x8f [ 0.009109] [] kmemleak_not_leak+0x23/0x42 [ 0.009195] [] bdev_cache_init+0x72/0x81 [ 0.009282] [] vfs_caches_init+0x101/0x10d [ 0.009368] [] start_kernel+0x344/0x383 [ 0.009466] [] x86_64_start_reservations+0xae/0xb2 [ 0.009555] [] ? early_idt_handlers+0x140/0x140 [ 0.009643] [] x86_64_start_kernel+0x102/0x111 due to attempt to mark pointer to `struct vfsmount' as a gray object, which is embedded into `struct mount' returned from alloc_vfsmnt(). Make `bd_mnt' static, avoiding need to tell kmemleak to mark it gray, as suggested by Al Viro. Signed-off-by: Sergey Senozhatsky Signed-off-by: Al Viro --- fs/block_dev.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 69a5b6fbee2b..afe74dda632b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include "internal.h" @@ -521,7 +520,7 @@ static struct super_block *blockdev_superblock __read_mostly; void __init bdev_cache_init(void) { int err; - struct vfsmount *bd_mnt; + static struct vfsmount *bd_mnt; bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| @@ -533,12 +532,7 @@ void __init bdev_cache_init(void) bd_mnt = kern_mount(&bd_type); if (IS_ERR(bd_mnt)) panic("Cannot create bdev pseudo-fs"); - /* - * This vfsmount structure is only used to obtain the - * blockdev_superblock, so tell kmemleak not to report it. - */ - kmemleak_not_leak(bd_mnt); - blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ + blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ } /* From b3f2a92447b8443360ac117a3d7c06689562a70c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Jan 2012 17:48:52 -0500 Subject: [PATCH 6/9] hfsplus: creation of hidden dir on mount can fail Signed-off-by: Al Viro --- fs/hfsplus/super.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index edf0a801446b..427682ca9e48 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -499,9 +499,16 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) if (!sbi->hidden_dir) { mutex_lock(&sbi->vh_mutex); sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR); - hfsplus_create_cat(sbi->hidden_dir->i_ino, root, &str, - sbi->hidden_dir); + if (!sbi->hidden_dir) { + mutex_unlock(&sbi->vh_mutex); + err = -ENOMEM; + goto out_put_root; + } + err = hfsplus_create_cat(sbi->hidden_dir->i_ino, root, + &str, sbi->hidden_dir); mutex_unlock(&sbi->vh_mutex); + if (err) + goto out_put_hidden_dir; hfsplus_mark_inode_dirty(sbi->hidden_dir, HFSPLUS_I_CAT_DIRTY); From 4041bcdc7bef06a2fb29c57394c713a74bd13b08 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Jan 2012 22:20:12 -0500 Subject: [PATCH 7/9] autofs4: autofs4_wait() vs. autofs4_catatonic_mode() race We need to recheck ->catatonic after autofs4_wait() got ->wq_mutex for good, or we might end up with wq inserted into queue after autofs4_catatonic_mode() had done its thing. It will stick there forever, since there won't be anything to clear its ->name.name. A bit of a complication: validate_request() drops and regains ->wq_mutex. It actually ends up the most convenient place to stick the check into... Acked-by: Ian Kent Signed-off-by: Al Viro --- fs/autofs4/waitq.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index e1fbdeef85db..c13273afd546 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -257,6 +257,9 @@ static int validate_request(struct autofs_wait_queue **wait, struct autofs_wait_queue *wq; struct autofs_info *ino; + if (sbi->catatonic) + return -ENOENT; + /* Wait in progress, continue; */ wq = autofs4_find_wait(sbi, qstr); if (wq) { @@ -289,6 +292,9 @@ static int validate_request(struct autofs_wait_queue **wait, if (mutex_lock_interruptible(&sbi->wq_mutex)) return -EINTR; + if (sbi->catatonic) + return -ENOENT; + wq = autofs4_find_wait(sbi, qstr); if (wq) { *wait = wq; @@ -389,7 +395,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, ret = validate_request(&wq, sbi, &qstr, dentry, notify); if (ret <= 0) { - if (ret == 0) + if (ret != -EINTR) mutex_unlock(&sbi->wq_mutex); kfree(qstr.name); return ret; From 8753333266be67ff3a984ac1f6566d31c260bee4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Jan 2012 22:24:48 -0500 Subject: [PATCH 8/9] autofs4: catatonic_mode vs. notify_daemon race we need to hold ->wq_mutex while we are forming the packet to send, lest we have autofs4_catatonic_mode() setting wq->name.name to NULL just as autofs4_notify_daemon() decides to memcpy() from it... We do have check for catatonic mode immediately after that (under ->wq_mutex, as it ought to be) and packet won't be actually sent, but it'll be too late for us if we oops on that memcpy() from NULL... Fix is obvious - just extend the area covered by ->wq_mutex over that switch and check whether it's catatonic *before* doing anything else. Acked-by: Ian Kent Signed-off-by: Al Viro --- fs/autofs4/waitq.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index c13273afd546..9a0256da5d56 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -110,6 +110,13 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, pkt.hdr.proto_version = sbi->version; pkt.hdr.type = type; + mutex_lock(&sbi->wq_mutex); + + /* Check if we have become catatonic */ + if (sbi->catatonic) { + mutex_unlock(&sbi->wq_mutex); + return; + } switch (type) { /* Kernel protocol v4 missing and expire packets */ case autofs_ptype_missing: @@ -163,22 +170,18 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, } default: printk("autofs4_notify_daemon: bad type %d!\n", type); + mutex_unlock(&sbi->wq_mutex); return; } - /* Check if we have become catatonic */ - mutex_lock(&sbi->wq_mutex); - if (!sbi->catatonic) { - pipe = sbi->pipe; - get_file(pipe); - } + pipe = sbi->pipe; + get_file(pipe); + mutex_unlock(&sbi->wq_mutex); - if (pipe) { - if (autofs4_write(pipe, &pkt, pktsz)) - autofs4_catatonic_mode(sbi); - fput(pipe); - } + if (autofs4_write(pipe, &pkt, pktsz)) + autofs4_catatonic_mode(sbi); + fput(pipe); } static int autofs4_getpath(struct autofs_sb_info *sbi, From d668dc56631da067540b2494d2a1f29ff7b5f15a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 Jan 2012 22:35:38 -0500 Subject: [PATCH 9/9] autofs4: deal with autofs4_write/autofs4_write races Just serialize the actual writing of packets into pipe on a new mutex, independent from everything else in the locking hierarchy. As soon as something has started feeding a piece of packet into the pipe to daemon, we *want* everything else about to try the same to wait until we are done. Acked-by: Ian Kent Signed-off-by: Al Viro --- fs/autofs4/autofs_i.h | 1 + fs/autofs4/inode.c | 1 + fs/autofs4/waitq.c | 9 +++++---- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 5869d4e974a9..d8d8e7ba6a1e 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -116,6 +116,7 @@ struct autofs_sb_info { int needs_reghost; struct super_block *sb; struct mutex wq_mutex; + struct mutex pipe_mutex; spinlock_t fs_lock; struct autofs_wait_queue *queues; /* Wait queue pointer */ spinlock_t lookup_lock; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 2ba44c79d548..e16980b00b8d 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -225,6 +225,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) sbi->min_proto = 0; sbi->max_proto = 0; mutex_init(&sbi->wq_mutex); + mutex_init(&sbi->pipe_mutex); spin_lock_init(&sbi->fs_lock); sbi->queues = NULL; spin_lock_init(&sbi->lookup_lock); diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 9a0256da5d56..9ef5b2914407 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -56,26 +56,27 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi) mutex_unlock(&sbi->wq_mutex); } -static int autofs4_write(struct file *file, const void *addr, int bytes) +static int autofs4_write(struct autofs_sb_info *sbi, + struct file *file, const void *addr, int bytes) { unsigned long sigpipe, flags; mm_segment_t fs; const char *data = (const char *)addr; ssize_t wr = 0; - /** WARNING: this is not safe for writing more than PIPE_BUF bytes! **/ - sigpipe = sigismember(¤t->pending.signal, SIGPIPE); /* Save pointer to user space and point back to kernel space */ fs = get_fs(); set_fs(KERNEL_DS); + mutex_lock(&sbi->pipe_mutex); while (bytes && (wr = file->f_op->write(file,data,bytes,&file->f_pos)) > 0) { data += wr; bytes -= wr; } + mutex_lock(&sbi->pipe_mutex); set_fs(fs); @@ -179,7 +180,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, mutex_unlock(&sbi->wq_mutex); - if (autofs4_write(pipe, &pkt, pktsz)) + if (autofs4_write(sbi, pipe, &pkt, pktsz)) autofs4_catatonic_mode(sbi); fput(pipe); }