From 7e71c55ee73988d0cb61045660b899eaac23bf8f Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 22 Sep 2009 10:56:16 +0100 Subject: [PATCH 01/31] GFS2: Fix potential race in glock code We need to be careful of the ordering between clearing the GLF_LOCK bit and scheduling the workqueue. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 8b674b1f3a55..a3f90ad2af80 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -672,12 +672,17 @@ out: return; out_sched: + clear_bit(GLF_LOCK, &gl->gl_flags); + smp_mb__after_clear_bit(); gfs2_glock_hold(gl); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put_nolock(gl); + return; + out_unlock: clear_bit(GLF_LOCK, &gl->gl_flags); - goto out; + smp_mb__after_clear_bit(); + return; } static void delete_work_func(struct work_struct *work) @@ -1375,10 +1380,11 @@ static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) handle_callback(gl, LM_ST_UNLOCKED, 0); nr--; } + clear_bit(GLF_LOCK, &gl->gl_flags); + smp_mb__after_clear_bit(); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put_nolock(gl); spin_unlock(&gl->gl_spin); - clear_bit(GLF_LOCK, &gl->gl_flags); spin_lock(&lru_lock); continue; } From f55073ff1eaf99f6b3bc62134a456638bca043a3 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 28 Sep 2009 10:30:49 +0100 Subject: [PATCH 02/31] GFS2: Fix -o meta mounts for subsequent mounts (i.e. all but the first one) We have a long term plan to use the "-o meta" flag to GFS2 mounts to access the alternate root which is used to store metadata for a GFS2 filesystem. This will allow us to eventually remove support for the gfs2meta filesystem type (which is in any case just a "front end" to the gfs2 filesystem type with the meta/master root). Currently the "-o meta" option is only taken into account on the initial mount of the filesystem. Subsequent mounts of the same filesystem (i.e. on the same device) result in basically the same as bind mounting the root of the original mount. This patch changes that by using what is more or less a copy of get_sb_bdev() and extending it so that it will take into account the alternate root in all cases. The main difference is that we have to parse the mount options a bit earlier. We can then use them to select the appropriate root towards the end of the function. In addition this also fixes a bug where it was possible (but certainly not desirable) to set different ro/rw options for the meta root when mounted via the gfs2meta fs compared with the original mount. Signed-off-by: Steven Whitehouse Cc: Alexander Viro --- fs/gfs2/ops_fstype.c | 135 +++++++++++++++++++++++++++++++++++++------ fs/gfs2/super.c | 16 ++--- fs/gfs2/super.h | 2 +- 3 files changed, 127 insertions(+), 26 deletions(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 52fb6c048981..e5ee06231ae5 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1114,7 +1114,7 @@ void gfs2_online_uevent(struct gfs2_sbd *sdp) * Returns: errno */ -static int fill_super(struct super_block *sb, void *data, int silent) +static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent) { struct gfs2_sbd *sdp; struct gfs2_holder mount_gh; @@ -1125,17 +1125,7 @@ static int fill_super(struct super_block *sb, void *data, int silent) printk(KERN_WARNING "GFS2: can't alloc struct gfs2_sbd\n"); return -ENOMEM; } - - sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT; - sdp->sd_args.ar_data = GFS2_DATA_DEFAULT; - sdp->sd_args.ar_commit = 60; - sdp->sd_args.ar_errors = GFS2_ERRORS_DEFAULT; - - error = gfs2_mount_args(sdp, &sdp->sd_args, data); - if (error) { - printk(KERN_WARNING "GFS2: can't parse mount arguments\n"); - goto fail; - } + sdp->sd_args = *args; if (sdp->sd_args.ar_spectator) { sb->s_flags |= MS_RDONLY; @@ -1243,18 +1233,125 @@ fail: return error; } -static int gfs2_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) +static int set_gfs2_super(struct super_block *s, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); + s->s_bdev = data; + s->s_dev = s->s_bdev->bd_dev; + + /* + * We set the bdi here to the queue backing, file systems can + * overwrite this in ->fill_super() + */ + s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info; + return 0; } -static int test_meta_super(struct super_block *s, void *ptr) +static int test_gfs2_super(struct super_block *s, void *ptr) { struct block_device *bdev = ptr; return (bdev == s->s_bdev); } +/** + * gfs2_get_sb - Get the GFS2 superblock + * @fs_type: The GFS2 filesystem type + * @flags: Mount flags + * @dev_name: The name of the device + * @data: The mount arguments + * @mnt: The vfsmnt for this mount + * + * Q. Why not use get_sb_bdev() ? + * A. We need to select one of two root directories to mount, independent + * of whether this is the initial, or subsequent, mount of this sb + * + * Returns: 0 or -ve on error + */ + +static int gfs2_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, struct vfsmount *mnt) +{ + struct block_device *bdev; + struct super_block *s; + fmode_t mode = FMODE_READ; + int error; + struct gfs2_args args; + struct gfs2_sbd *sdp; + + if (!(flags & MS_RDONLY)) + mode |= FMODE_WRITE; + + bdev = open_bdev_exclusive(dev_name, mode, fs_type); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); + + /* + * once the super is inserted into the list by sget, s_umount + * will protect the lockfs code from trying to start a snapshot + * while we are mounting + */ + mutex_lock(&bdev->bd_fsfreeze_mutex); + if (bdev->bd_fsfreeze_count > 0) { + mutex_unlock(&bdev->bd_fsfreeze_mutex); + error = -EBUSY; + goto error_bdev; + } + s = sget(fs_type, test_gfs2_super, set_gfs2_super, bdev); + mutex_unlock(&bdev->bd_fsfreeze_mutex); + error = PTR_ERR(s); + if (IS_ERR(s)) + goto error_bdev; + + memset(&args, 0, sizeof(args)); + args.ar_quota = GFS2_QUOTA_DEFAULT; + args.ar_data = GFS2_DATA_DEFAULT; + args.ar_commit = 60; + args.ar_errors = GFS2_ERRORS_DEFAULT; + + error = gfs2_mount_args(&args, data); + if (error) { + printk(KERN_WARNING "GFS2: can't parse mount arguments\n"); + if (s->s_root) + goto error_super; + deactivate_locked_super(s); + return error; + } + + if (s->s_root) { + error = -EBUSY; + if ((flags ^ s->s_flags) & MS_RDONLY) + goto error_super; + close_bdev_exclusive(bdev, mode); + } else { + char b[BDEVNAME_SIZE]; + + s->s_flags = flags; + s->s_mode = mode; + strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); + sb_set_blocksize(s, block_size(bdev)); + error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0); + if (error) { + deactivate_locked_super(s); + return error; + } + s->s_flags |= MS_ACTIVE; + bdev->bd_super = s; + } + + sdp = s->s_fs_info; + mnt->mnt_sb = s; + if (args.ar_meta) + mnt->mnt_root = dget(sdp->sd_master_dir); + else + mnt->mnt_root = dget(sdp->sd_root_dir); + return 0; + +error_super: + deactivate_locked_super(s); +error_bdev: + close_bdev_exclusive(bdev, mode); + return error; +} + static int set_meta_super(struct super_block *s, void *ptr) { return -EINVAL; @@ -1274,13 +1371,17 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, dev_name, error); return error; } - s = sget(&gfs2_fs_type, test_meta_super, set_meta_super, + s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, path.dentry->d_inode->i_sb->s_bdev); path_put(&path); if (IS_ERR(s)) { printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); return PTR_ERR(s); } + if ((flags ^ s->s_flags) & MS_RDONLY) { + deactivate_locked_super(s); + return -EBUSY; + } sdp = s->s_fs_info; mnt->mnt_sb = s; mnt->mnt_root = dget(sdp->sd_master_dir); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 0ec3ec672de1..42e5458703f0 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -106,13 +106,13 @@ static const match_table_t tokens = { /** * gfs2_mount_args - Parse mount options - * @sdp: - * @data: + * @args: The structure into which the parsed options will be written + * @options: The options to parse * * Return: errno */ -int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) +int gfs2_mount_args(struct gfs2_args *args, char *options) { char *o; int token; @@ -157,7 +157,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) break; case Opt_debug: if (args->ar_errors == GFS2_ERRORS_PANIC) { - fs_info(sdp, "-o debug and -o errors=panic " + printk(KERN_WARNING "GFS2: -o debug and -o errors=panic " "are mutually exclusive.\n"); return -EINVAL; } @@ -210,7 +210,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) case Opt_commit: rv = match_int(&tmp[0], &args->ar_commit); if (rv || args->ar_commit <= 0) { - fs_info(sdp, "commit mount option requires a positive numeric argument\n"); + printk(KERN_WARNING "GFS2: commit mount option requires a positive numeric argument\n"); return rv ? rv : -EINVAL; } break; @@ -219,7 +219,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) break; case Opt_err_panic: if (args->ar_debug) { - fs_info(sdp, "-o debug and -o errors=panic " + printk(KERN_WARNING "GFS2: -o debug and -o errors=panic " "are mutually exclusive.\n"); return -EINVAL; } @@ -227,7 +227,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) break; case Opt_error: default: - fs_info(sdp, "invalid mount option: %s\n", o); + printk(KERN_WARNING "GFS2: invalid mount option: %s\n", o); return -EINVAL; } } @@ -1062,7 +1062,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) spin_lock(>->gt_spin); args.ar_commit = gt->gt_log_flush_secs; spin_unlock(>->gt_spin); - error = gfs2_mount_args(sdp, &args, data); + error = gfs2_mount_args(&args, data); if (error) return error; diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 235db3682885..ed962ea68c3a 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -27,7 +27,7 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) extern void gfs2_jindex_free(struct gfs2_sbd *sdp); -extern int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *data); +extern int gfs2_mount_args(struct gfs2_args *args, char *data); extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid); extern int gfs2_jdesc_check(struct gfs2_jdesc *jd); From 2646a1f61a3b5525914757f10fa12b5b94713648 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 2 Oct 2009 11:50:54 +0100 Subject: [PATCH 03/31] GFS2: Fix up system xattrs This code has been shamelessly stolen from XFS at the suggestion of Christoph Hellwig. I've not added support for cached ACLs so far... watch for that in a later patch, although this is designed in such a way that they should be easy to add. Signed-off-by: Steven Whitehouse Cc: Christoph Hellwig --- fs/gfs2/acl.c | 170 ++++++++++++++++++++++++++++++++---------------- fs/gfs2/acl.h | 24 ++----- fs/gfs2/xattr.c | 18 ----- 3 files changed, 120 insertions(+), 92 deletions(-) diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 3fc4e3ac7d84..2168da121647 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -26,61 +27,6 @@ #include "trans.h" #include "util.h" -#define ACL_ACCESS 1 -#define ACL_DEFAULT 0 - -int gfs2_acl_validate_set(struct gfs2_inode *ip, int access, - struct gfs2_ea_request *er, int *remove, mode_t *mode) -{ - struct posix_acl *acl; - int error; - - error = gfs2_acl_validate_remove(ip, access); - if (error) - return error; - - if (!er->er_data) - return -EINVAL; - - acl = posix_acl_from_xattr(er->er_data, er->er_data_len); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (!acl) { - *remove = 1; - return 0; - } - - error = posix_acl_valid(acl); - if (error) - goto out; - - if (access) { - error = posix_acl_equiv_mode(acl, mode); - if (!error) - *remove = 1; - else if (error > 0) - error = 0; - } - -out: - posix_acl_release(acl); - return error; -} - -int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access) -{ - if (!GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl) - return -EOPNOTSUPP; - if (!is_owner_or_cap(&ip->i_inode)) - return -EPERM; - if (S_ISLNK(ip->i_inode.i_mode)) - return -EOPNOTSUPP; - if (!access && !S_ISDIR(ip->i_inode.i_mode)) - return -EACCES; - - return 0; -} - static int acl_get(struct gfs2_inode *ip, const char *name, struct posix_acl **acl, struct gfs2_ea_location *el, char **datap, unsigned int *lenp) @@ -277,3 +223,117 @@ out_brelse: return error; } +static int gfs2_acl_type(const char *name) +{ + if (strcmp(name, GFS2_POSIX_ACL_ACCESS) == 0) + return ACL_TYPE_ACCESS; + if (strcmp(name, GFS2_POSIX_ACL_DEFAULT) == 0) + return ACL_TYPE_DEFAULT; + return -EINVAL; +} + +static int gfs2_xattr_system_get(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + int type; + + type = gfs2_acl_type(name); + if (type < 0) + return type; + + return gfs2_xattr_get(inode, GFS2_EATYPE_SYS, name, buffer, size); +} + +static int gfs2_set_mode(struct inode *inode, mode_t mode) +{ + int error = 0; + + if (mode != inode->i_mode) { + struct iattr iattr; + + iattr.ia_valid = ATTR_MODE; + iattr.ia_mode = mode; + + error = gfs2_setattr_simple(GFS2_I(inode), &iattr); + } + + return error; +} + +static int gfs2_xattr_system_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct posix_acl *acl = NULL; + int error = 0, type; + + if (!sdp->sd_args.ar_posix_acl) + return -EOPNOTSUPP; + + type = gfs2_acl_type(name); + if (type < 0) + return type; + if (flags & XATTR_CREATE) + return -EINVAL; + if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) + return value ? -EACCES : 0; + if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER)) + return -EPERM; + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + if (!value) + goto set_acl; + + acl = posix_acl_from_xattr(value, size); + if (!acl) { + /* + * acl_set_file(3) may request that we set default ACLs with + * zero length -- defend (gracefully) against that here. + */ + goto out; + } + if (IS_ERR(acl)) { + error = PTR_ERR(acl); + goto out; + } + + error = posix_acl_valid(acl); + if (error) + goto out_release; + + error = -EINVAL; + if (acl->a_count > GFS2_ACL_MAX_ENTRIES) + goto out_release; + + if (type == ACL_TYPE_ACCESS) { + mode_t mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); + + if (error <= 0) { + posix_acl_release(acl); + acl = NULL; + + if (error < 0) + return error; + } + + error = gfs2_set_mode(inode, mode); + if (error) + goto out_release; + } + +set_acl: + error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, 0); +out_release: + posix_acl_release(acl); +out: + return error; +} + +struct xattr_handler gfs2_xattr_system_handler = { + .prefix = XATTR_SYSTEM_PREFIX, + .get = gfs2_xattr_system_get, + .set = gfs2_xattr_system_set, +}; + diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index 6751930bfb64..cc954390b6da 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h @@ -13,26 +13,12 @@ #include "incore.h" #define GFS2_POSIX_ACL_ACCESS "posix_acl_access" -#define GFS2_POSIX_ACL_ACCESS_LEN 16 #define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" -#define GFS2_POSIX_ACL_DEFAULT_LEN 17 +#define GFS2_ACL_MAX_ENTRIES 25 -#define GFS2_ACL_IS_ACCESS(name, len) \ - ((len) == GFS2_POSIX_ACL_ACCESS_LEN && \ - !memcmp(GFS2_POSIX_ACL_ACCESS, (name), (len))) - -#define GFS2_ACL_IS_DEFAULT(name, len) \ - ((len) == GFS2_POSIX_ACL_DEFAULT_LEN && \ - !memcmp(GFS2_POSIX_ACL_DEFAULT, (name), (len))) - -struct gfs2_ea_request; - -int gfs2_acl_validate_set(struct gfs2_inode *ip, int access, - struct gfs2_ea_request *er, - int *remove, mode_t *mode); -int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access); -int gfs2_check_acl(struct inode *inode, int mask); -int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip); -int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); +extern int gfs2_check_acl(struct inode *inode, int mask); +extern int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip); +extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); +extern struct xattr_handler gfs2_xattr_system_handler; #endif /* __ACL_DOT_H__ */ diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 8a0f8ef6ee27..6b803540951e 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -1507,18 +1507,6 @@ static int gfs2_xattr_user_set(struct inode *inode, const char *name, return gfs2_xattr_set(inode, GFS2_EATYPE_USR, name, value, size, flags); } -static int gfs2_xattr_system_get(struct inode *inode, const char *name, - void *buffer, size_t size) -{ - return gfs2_xattr_get(inode, GFS2_EATYPE_SYS, name, buffer, size); -} - -static int gfs2_xattr_system_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - return gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, flags); -} - static int gfs2_xattr_security_get(struct inode *inode, const char *name, void *buffer, size_t size) { @@ -1543,12 +1531,6 @@ static struct xattr_handler gfs2_xattr_security_handler = { .set = gfs2_xattr_security_set, }; -static struct xattr_handler gfs2_xattr_system_handler = { - .prefix = XATTR_SYSTEM_PREFIX, - .get = gfs2_xattr_system_get, - .set = gfs2_xattr_system_set, -}; - struct xattr_handler *gfs2_xattr_handlers[] = { &gfs2_xattr_user_handler, &gfs2_xattr_security_handler, From 796bd9524731850967d437b7f47a86acc776ea89 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 29 Sep 2009 12:27:23 +0100 Subject: [PATCH 04/31] VFS: Add forget_all_cached_acls() This is required for cluster filesystems which want to use cached ACLs so that they can invalidate the cache when required. Signed-off-by: Steven Whitehouse Cc: Alexander Viro Cc: Christoph Hellwig --- include/linux/posix_acl.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 065a3652a3ea..67608161df6b 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -147,6 +147,20 @@ static inline void forget_cached_acl(struct inode *inode, int type) if (old != ACL_NOT_CACHED) posix_acl_release(old); } + +static inline void forget_all_cached_acls(struct inode *inode) +{ + struct posix_acl *old_access, *old_default; + spin_lock(&inode->i_lock); + old_access = inode->i_acl; + old_default = inode->i_default_acl; + inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; + spin_unlock(&inode->i_lock); + if (old_access != ACL_NOT_CACHED) + posix_acl_release(old_access); + if (old_default != ACL_NOT_CACHED) + posix_acl_release(old_default); +} #endif static inline void cache_no_acl(struct inode *inode) From c65f7fb5342ecb8cb85e9b676327b3a43a5a4735 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 2 Oct 2009 11:54:39 +0100 Subject: [PATCH 05/31] GFS2: Use forget_all_cached_acls() Invalidate all the cached ACLs when we drop the glock. Signed-off-by: Steven Whitehouse --- fs/gfs2/glops.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 6985eef06c39..78554acc0605 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -184,8 +185,10 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) if (flags & DIO_METADATA) { struct address_space *mapping = gl->gl_aspace->i_mapping; truncate_inode_pages(mapping, 0); - if (ip) + if (ip) { set_bit(GIF_INVALID, &ip->i_flags); + forget_all_cached_acls(&ip->i_inode); + } } if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) From 69dca42464962d8d0989b7e09877ba644c9cba66 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 29 Sep 2009 12:40:19 +0100 Subject: [PATCH 06/31] GFS2: Use gfs2_set_mode() instead of munge_mode() These two functions do the same thing, so lets only use one of them. Signed-off-by: Steven Whitehouse --- fs/gfs2/acl.c | 46 +++++++++++----------------------------------- 1 file changed, 11 insertions(+), 35 deletions(-) diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 2168da121647..1be314837d31 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -104,29 +104,20 @@ int gfs2_check_acl(struct inode *inode, int mask) return -EAGAIN; } -static int munge_mode(struct gfs2_inode *ip, mode_t mode) +static int gfs2_set_mode(struct inode *inode, mode_t mode) { - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct buffer_head *dibh; - int error; + int error = 0; - error = gfs2_trans_begin(sdp, RES_DINODE, 0); - if (error) - return error; + if (mode != inode->i_mode) { + struct iattr iattr; - error = gfs2_meta_inode_buffer(ip, &dibh); - if (!error) { - gfs2_assert_withdraw(sdp, - (ip->i_inode.i_mode & S_IFMT) == (mode & S_IFMT)); - ip->i_inode.i_mode = mode; - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(ip, dibh->b_data); - brelse(dibh); + iattr.ia_valid = ATTR_MODE; + iattr.ia_mode = mode; + + error = gfs2_setattr_simple(GFS2_I(inode), &iattr); } - gfs2_trans_end(sdp); - - return 0; + return error; } int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) @@ -151,7 +142,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) if (!acl) { mode &= ~current_umask(); if (mode != ip->i_inode.i_mode) - error = munge_mode(ip, mode); + error = gfs2_set_mode(&ip->i_inode, mode); return error; } @@ -181,7 +172,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) if (error) goto out; munge: - error = munge_mode(ip, mode); + error = gfs2_set_mode(&ip->i_inode, mode); out: posix_acl_release(acl); kfree(data); @@ -244,21 +235,6 @@ static int gfs2_xattr_system_get(struct inode *inode, const char *name, return gfs2_xattr_get(inode, GFS2_EATYPE_SYS, name, buffer, size); } -static int gfs2_set_mode(struct inode *inode, mode_t mode) -{ - int error = 0; - - if (mode != inode->i_mode) { - struct iattr iattr; - - iattr.ia_valid = ATTR_MODE; - iattr.ia_mode = mode; - - error = gfs2_setattr_simple(GFS2_I(inode), &iattr); - } - - return error; -} static int gfs2_xattr_system_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) From 479c427dd60fe1aadbbf2e6cbf2f84942baeb210 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 2 Oct 2009 12:00:00 +0100 Subject: [PATCH 07/31] GFS2: Clean up ACLs To prepare for support for caching of ACLs, this cleans up the GFS2 ACL support by pushing the xattr code back into xattr.c and changing the acl_get function into one which only returns ACLs so that we can drop the caching function into it shortly. Signed-off-by: Steven Whitehouse --- fs/gfs2/acl.c | 164 ++++++++++++++++++++++++------------------------ fs/gfs2/acl.h | 2 +- fs/gfs2/inode.c | 2 +- fs/gfs2/xattr.c | 56 +++++++++++++---- fs/gfs2/xattr.h | 8 +-- 5 files changed, 132 insertions(+), 100 deletions(-) diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 1be314837d31..bd0fce964c41 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -27,53 +27,40 @@ #include "trans.h" #include "util.h" -static int acl_get(struct gfs2_inode *ip, const char *name, - struct posix_acl **acl, struct gfs2_ea_location *el, - char **datap, unsigned int *lenp) +static const char *gfs2_acl_name(int type) { - char *data; - unsigned int len; - int error; + switch (type) { + case ACL_TYPE_ACCESS: + return GFS2_POSIX_ACL_ACCESS; + case ACL_TYPE_DEFAULT: + return GFS2_POSIX_ACL_DEFAULT; + } + return NULL; +} - el->el_bh = NULL; +static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type) +{ + struct posix_acl *acl; + const char *name; + char *data; + int len; if (!ip->i_eattr) - return 0; + return NULL; - error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, name, el); - if (error) - return error; - if (!el->el_ea) - return 0; - if (!GFS2_EA_DATA_LEN(el->el_ea)) - goto out; + name = gfs2_acl_name(type); + if (name == NULL) + return ERR_PTR(-EINVAL); - len = GFS2_EA_DATA_LEN(el->el_ea); - data = kmalloc(len, GFP_NOFS); - error = -ENOMEM; - if (!data) - goto out; + len = gfs2_xattr_acl_get(ip, name, &data); + if (len < 0) + return ERR_PTR(len); + if (len == 0) + return NULL; - error = gfs2_ea_get_copy(ip, el, data, len); - if (error < 0) - goto out_kfree; - error = 0; - - if (acl) { - *acl = posix_acl_from_xattr(data, len); - if (IS_ERR(*acl)) - error = PTR_ERR(*acl); - } - -out_kfree: - if (error || !datap) { - kfree(data); - } else { - *datap = data; - *lenp = len; - } -out: - return error; + acl = posix_acl_from_xattr(data, len); + kfree(data); + return acl; } /** @@ -86,14 +73,12 @@ out: int gfs2_check_acl(struct inode *inode, int mask) { - struct gfs2_ea_location el; - struct posix_acl *acl = NULL; + struct posix_acl *acl; int error; - error = acl_get(GFS2_I(inode), GFS2_POSIX_ACL_ACCESS, &acl, &el, NULL, NULL); - brelse(el.el_bh); - if (error) - return error; + acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS); + if (IS_ERR(acl)) + return PTR_ERR(acl); if (acl) { error = posix_acl_permission(inode, acl, mask); @@ -120,32 +105,57 @@ static int gfs2_set_mode(struct inode *inode, mode_t mode) return error; } -int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) +static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl) { - struct gfs2_ea_location el; - struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct posix_acl *acl = NULL, *clone; - mode_t mode = ip->i_inode.i_mode; - char *data = NULL; - unsigned int len; int error; + int len; + char *data; + const char *name = gfs2_acl_name(type); + + BUG_ON(name == NULL); + len = posix_acl_to_xattr(acl, NULL, 0); + if (len == 0) + return 0; + data = kmalloc(len, GFP_NOFS); + if (data == NULL) + return -ENOMEM; + error = posix_acl_to_xattr(acl, data, len); + if (error < 0) + goto out; + error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, data, len, 0); +out: + kfree(data); + return error; +} + +int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode) +{ + struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); + struct posix_acl *acl, *clone; + mode_t mode = inode->i_mode; + int error = 0; if (!sdp->sd_args.ar_posix_acl) return 0; - if (S_ISLNK(ip->i_inode.i_mode)) + if (S_ISLNK(inode->i_mode)) return 0; - error = acl_get(dip, GFS2_POSIX_ACL_DEFAULT, &acl, &el, &data, &len); - brelse(el.el_bh); - if (error) - return error; + acl = gfs2_acl_get(dip, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); if (!acl) { mode &= ~current_umask(); - if (mode != ip->i_inode.i_mode) - error = gfs2_set_mode(&ip->i_inode, mode); + if (mode != inode->i_mode) + error = gfs2_set_mode(inode, mode); return error; } + if (S_ISDIR(inode->i_mode)) { + error = gfs2_acl_set(inode, ACL_TYPE_DEFAULT, acl); + if (error) + goto out; + } + clone = posix_acl_clone(acl, GFP_NOFS); error = -ENOMEM; if (!clone) @@ -153,43 +163,32 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) posix_acl_release(acl); acl = clone; - if (S_ISDIR(ip->i_inode.i_mode)) { - error = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SYS, - GFS2_POSIX_ACL_DEFAULT, data, len, 0); - if (error) - goto out; - } - error = posix_acl_create_masq(acl, &mode); if (error < 0) goto out; if (error == 0) goto munge; - posix_acl_to_xattr(acl, data, len); - error = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SYS, - GFS2_POSIX_ACL_ACCESS, data, len, 0); + error = gfs2_acl_set(inode, ACL_TYPE_ACCESS, acl); if (error) goto out; munge: - error = gfs2_set_mode(&ip->i_inode, mode); + error = gfs2_set_mode(inode, mode); out: posix_acl_release(acl); - kfree(data); return error; } int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) { - struct posix_acl *acl = NULL, *clone; - struct gfs2_ea_location el; + struct posix_acl *acl, *clone; char *data; unsigned int len; int error; - error = acl_get(ip, GFS2_POSIX_ACL_ACCESS, &acl, &el, &data, &len); - if (error) - goto out_brelse; + acl = gfs2_acl_get(ip, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) + return PTR_ERR(acl); if (!acl) return gfs2_setattr_simple(ip, attr); @@ -202,15 +201,18 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) error = posix_acl_chmod_masq(acl, attr->ia_mode); if (!error) { + len = posix_acl_to_xattr(acl, NULL, 0); + data = kmalloc(len, GFP_NOFS); + error = -ENOMEM; + if (data == NULL) + goto out; posix_acl_to_xattr(acl, data, len); - error = gfs2_ea_acl_chmod(ip, &el, attr, data); + error = gfs2_xattr_acl_chmod(ip, attr, data); + kfree(data); } out: posix_acl_release(acl); - kfree(data); -out_brelse: - brelse(el.el_bh); return error; } diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index cc954390b6da..9306a2e6620c 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h @@ -17,7 +17,7 @@ #define GFS2_ACL_MAX_ENTRIES 25 extern int gfs2_check_acl(struct inode *inode, int mask); -extern int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip); +extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode); extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); extern struct xattr_handler gfs2_xattr_system_handler; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index fb15d3b1f409..6380cd9314b0 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -871,7 +871,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, if (error) goto fail_gunlock2; - error = gfs2_acl_create(dip, GFS2_I(inode)); + error = gfs2_acl_create(dip, inode); if (error) goto fail_gunlock2; diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 6b803540951e..912f5cbc4740 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -186,8 +186,8 @@ static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh, return 0; } -int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name, - struct gfs2_ea_location *el) +static int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name, + struct gfs2_ea_location *el) { struct ea_find ef; int error; @@ -516,8 +516,8 @@ out: return error; } -int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, - char *data, size_t size) +static int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, + char *data, size_t size) { int ret; size_t len = GFS2_EA_DATA_LEN(el->el_ea); @@ -534,6 +534,36 @@ int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, return len; } +int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **ppdata) +{ + struct gfs2_ea_location el; + int error; + int len; + char *data; + + error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, name, &el); + if (error) + return error; + if (!el.el_ea) + goto out; + if (!GFS2_EA_DATA_LEN(el.el_ea)) + goto out; + + len = GFS2_EA_DATA_LEN(el.el_ea); + data = kmalloc(len, GFP_NOFS); + error = -ENOMEM; + if (data == NULL) + goto out; + + error = gfs2_ea_get_copy(ip, &el, data, len); + if (error == 0) + error = len; + *ppdata = data; +out: + brelse(el.el_bh); + return error; +} + /** * gfs2_xattr_get - Get a GFS2 extended attribute * @inode: The inode @@ -1259,22 +1289,26 @@ fail: return error; } -int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el, - struct iattr *attr, char *data) +int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) { + struct gfs2_ea_location el; struct buffer_head *dibh; int error; - if (GFS2_EA_IS_STUFFED(el->el_ea)) { + error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el); + if (error) + return error; + + if (GFS2_EA_IS_STUFFED(el.el_ea)) { error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0); if (error) return error; - gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1); - memcpy(GFS2_EA2DATA(el->el_ea), data, - GFS2_EA_DATA_LEN(el->el_ea)); + gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1); + memcpy(GFS2_EA2DATA(el.el_ea), data, + GFS2_EA_DATA_LEN(el.el_ea)); } else - error = ea_acl_chmod_unstuffed(ip, el->el_ea, data); + error = ea_acl_chmod_unstuffed(ip, el.el_ea, data); if (error) return error; diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h index cbdfd7743733..8d6ae5813c4d 100644 --- a/fs/gfs2/xattr.h +++ b/fs/gfs2/xattr.h @@ -62,11 +62,7 @@ extern int gfs2_ea_dealloc(struct gfs2_inode *ip); /* Exported to acl.c */ -extern int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name, - struct gfs2_ea_location *el); -extern int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, - char *data, size_t size); -extern int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el, - struct iattr *attr, char *data); +extern int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **data); +extern int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data); #endif /* __EATTR_DOT_H__ */ From 106381bfba997b83b64f68f2210e154162fc38e6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 29 Sep 2009 16:26:23 +0100 Subject: [PATCH 08/31] GFS2: Add cached ACLs support The other patches in this series have been building towards being able to support cached ACLs like other filesystems. The only real difference with GFS2 is that we have to invalidate the cache when we drop a glock, but that is dealt with in earlier patches. Signed-off-by: Steven Whitehouse --- fs/gfs2/acl.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index bd0fce964c41..3eb1ea846173 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -48,6 +48,10 @@ static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type) if (!ip->i_eattr) return NULL; + acl = get_cached_acl(&ip->i_inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + name = gfs2_acl_name(type); if (name == NULL) return ERR_PTR(-EINVAL); @@ -123,6 +127,8 @@ static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl) if (error < 0) goto out; error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, data, len, 0); + if (!error) + set_cached_acl(inode, type, acl); out: kfree(data); return error; @@ -209,6 +215,7 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) posix_acl_to_xattr(acl, data, len); error = gfs2_xattr_acl_chmod(ip, attr, data); kfree(data); + set_cached_acl(&ip->i_inode, ACL_TYPE_ACCESS, acl); } out: @@ -228,15 +235,25 @@ static int gfs2_acl_type(const char *name) static int gfs2_xattr_system_get(struct inode *inode, const char *name, void *buffer, size_t size) { + struct posix_acl *acl; int type; + int error; type = gfs2_acl_type(name); if (type < 0) return type; - return gfs2_xattr_get(inode, GFS2_EATYPE_SYS, name, buffer, size); -} + acl = gfs2_acl_get(GFS2_I(inode), type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl == NULL) + return -ENODATA; + error = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + + return error; +} static int gfs2_xattr_system_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) @@ -303,6 +320,12 @@ static int gfs2_xattr_system_set(struct inode *inode, const char *name, set_acl: error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, 0); + if (!error) { + if (acl) + set_cached_acl(inode, type, acl); + else + forget_cached_acl(inode, type); + } out_release: posix_acl_release(acl); out: From ab201832f75f58c8f5093436363f80ffa4a4c9a8 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 29 Sep 2009 16:31:03 +0100 Subject: [PATCH 09/31] VFS: Use GFP_NOFS in posix_acl_from_xattr() GFS2 needs to call this from under a glock, so we need GFP_NOFS and I suspect that other filesystems might require this too. Signed-off-by: Steven Whitehouse --- fs/xattr_acl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xattr_acl.c b/fs/xattr_acl.c index c6ad7c7e3ee9..05ac0fe9c4d3 100644 --- a/fs/xattr_acl.c +++ b/fs/xattr_acl.c @@ -36,7 +36,7 @@ posix_acl_from_xattr(const void *value, size_t size) if (count == 0) return NULL; - acl = posix_acl_alloc(count, GFP_KERNEL); + acl = posix_acl_alloc(count, GFP_NOFS); if (!acl) return ERR_PTR(-ENOMEM); acl_e = acl->a_entries; From 8c42d637f6f2859e0fb28b78d5add7f0dc6d0973 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 11 Sep 2009 14:36:44 +0100 Subject: [PATCH 10/31] GFS2: Alter arguments of gfs2_quota/statfs_sync These two functions are altered so that gfs2_quota_sync may in future be called directly from the VFS. The GFS2 superblock changes to a VFS super block and there is an addition of an int argument which is currently ignored. Signed-off-by: Steven Whitehouse --- fs/gfs2/quota.c | 7 ++++--- fs/gfs2/quota.h | 2 +- fs/gfs2/super.c | 7 ++++--- fs/gfs2/super.h | 2 +- fs/gfs2/sys.c | 4 ++-- 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 2e9b9326bfc9..ed9e1971b2cd 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1069,8 +1069,9 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, } } -int gfs2_quota_sync(struct gfs2_sbd *sdp) +int gfs2_quota_sync(struct super_block *sb, int type) { + struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_quota_data **qda; unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync); unsigned int num_qd; @@ -1298,12 +1299,12 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error) } static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg, - int (*fxn)(struct gfs2_sbd *sdp), + int (*fxn)(struct super_block *sb, int type), unsigned long t, unsigned long *timeo, unsigned int *new_timeo) { if (t >= *timeo) { - int error = fxn(sdp); + int error = fxn(sdp->sd_vfs, 0); quotad_error(sdp, msg, error); *timeo = gfs2_tune_get_i(&sdp->sd_tune, new_timeo) * HZ; } else { diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 0fa5fa63d0e8..437afa77663f 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -25,7 +25,7 @@ extern int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid); extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, u32 uid, u32 gid); -extern int gfs2_quota_sync(struct gfs2_sbd *sdp); +extern int gfs2_quota_sync(struct super_block *sb, int type); extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id); extern int gfs2_quota_init(struct gfs2_sbd *sdp); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 42e5458703f0..e7b24d59d4ff 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -484,8 +484,9 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); } -int gfs2_statfs_sync(struct gfs2_sbd *sdp) +int gfs2_statfs_sync(struct super_block *sb, int type) { + struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; @@ -712,8 +713,8 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) int error; flush_workqueue(gfs2_delete_workqueue); - gfs2_quota_sync(sdp); - gfs2_statfs_sync(sdp); + gfs2_quota_sync(sdp->sd_vfs, 0); + gfs2_statfs_sync(sdp->sd_vfs, 0); error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, &t_gh); diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index ed962ea68c3a..3df60f2d84e3 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -44,7 +44,7 @@ extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf); extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, struct buffer_head *l_bh); -extern int gfs2_statfs_sync(struct gfs2_sbd *sdp); +extern int gfs2_statfs_sync(struct super_block *sb, int type); extern int gfs2_freeze_fs(struct gfs2_sbd *sdp); extern void gfs2_unfreeze_fs(struct gfs2_sbd *sdp); diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 446329728d52..be1b8aca5906 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -158,7 +158,7 @@ static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf, if (simple_strtol(buf, NULL, 0) != 1) return -EINVAL; - gfs2_statfs_sync(sdp); + gfs2_statfs_sync(sdp->sd_vfs, 0); return len; } @@ -171,7 +171,7 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf, if (simple_strtol(buf, NULL, 0) != 1) return -EINVAL; - gfs2_quota_sync(sdp); + gfs2_quota_sync(sdp->sd_vfs, 0); return len; } From cc632e7f93465597896862cf9e50baefb1999215 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 15 Sep 2009 09:59:02 +0100 Subject: [PATCH 11/31] GFS2: Hook gfs2_quota_sync into VFS via gfs2_quotactl_ops The plan is to add further operations to the gfs2_quotactl_ops in future patches. The sync operation is easy, so we start with that one. We plan to use the XFS quota control functions because they more closely match the GFS2 ones. Signed-off-by: Steven Whitehouse --- fs/gfs2/Kconfig | 2 ++ fs/gfs2/ops_fstype.c | 3 +++ fs/gfs2/quota.c | 4 ++++ fs/gfs2/quota.h | 1 + 4 files changed, 10 insertions(+) diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index 5971359d2090..4dcddf83326f 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig @@ -8,6 +8,8 @@ config GFS2_FS select FS_POSIX_ACL select CRC32 select SLOW_WORK + select QUOTA + select QUOTACTL help A cluster filesystem. diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index e5ee06231ae5..36b11cba57e3 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -1138,6 +1139,8 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent sb->s_op = &gfs2_super_ops; sb->s_export_op = &gfs2_export_ops; sb->s_xattr = gfs2_xattr_handlers; + sb->s_qcop = &gfs2_quotactl_ops; + sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; sb->s_time_gran = 1; sb->s_maxbytes = MAX_LFS_FILESIZE; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index ed9e1971b2cd..73a43cee0ea3 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1378,3 +1378,7 @@ int gfs2_quotad(void *data) return 0; } +const struct quotactl_ops gfs2_quotactl_ops = { + .quota_sync = gfs2_quota_sync, +}; + diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 437afa77663f..025d15ba24c0 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -50,5 +50,6 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) } extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask); +extern const struct quotactl_ops gfs2_quotactl_ops; #endif /* __QUOTA_DOT_H__ */ From 91094d0fb650decd8bf48b85d86c892d7ca913ee Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 11 Sep 2009 15:21:56 +0100 Subject: [PATCH 12/31] GFS2: Remove obsolete code in quota.c There is no point in testing for GLF_DEMOTE here, we might as well always release the glock at that point. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.h | 9 --------- fs/gfs2/quota.c | 13 +++++-------- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index c609894ec0d0..13f0bd228132 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -180,15 +180,6 @@ static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl) return gl->gl_state == LM_ST_SHARED; } -static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl) -{ - int ret; - spin_lock(&gl->gl_spin); - ret = test_bit(GLF_DEMOTE, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - return ret; -} - int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, int create, struct gfs2_glock **glp); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 73a43cee0ea3..6aaa6c5e21bc 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -843,9 +843,8 @@ restart: if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { loff_t pos; gfs2_glock_dq_uninit(q_gh); - error = gfs2_glock_nq_init(qd->qd_gl, - LM_ST_EXCLUSIVE, GL_NOCACHE, - q_gh); + error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, + GL_NOCACHE, q_gh); if (error) return error; @@ -871,11 +870,9 @@ restart: qlvb->qb_value = cpu_to_be64(q.qu_value); qd->qd_qb = *qlvb; - if (gfs2_glock_is_blocking(qd->qd_gl)) { - gfs2_glock_dq_uninit(q_gh); - force_refresh = 0; - goto restart; - } + gfs2_glock_dq_uninit(q_gh); + force_refresh = 0; + goto restart; } return 0; From 1d371b5e179d943491a5fddad211cb317f38a142 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 11 Sep 2009 15:57:27 +0100 Subject: [PATCH 13/31] GFS2: Add get_xstate quota function This allows querying of the quota state via the XFS quota API. Signed-off-by: Steven Whitehouse --- fs/gfs2/quota.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 6aaa6c5e21bc..e7114be7b449 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -47,6 +47,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -1375,7 +1376,29 @@ int gfs2_quotad(void *data) return 0; } +static int gfs2_quota_get_xstate(struct super_block *sb, + struct fs_quota_stat *fqs) +{ + struct gfs2_sbd *sdp = sb->s_fs_info; + + memset(fqs, 0, sizeof(struct fs_quota_stat)); + fqs->qs_version = FS_QSTAT_VERSION; + if (sdp->sd_args.ar_quota == GFS2_QUOTA_ON) + fqs->qs_flags = (XFS_QUOTA_UDQ_ENFD | XFS_QUOTA_GDQ_ENFD); + else if (sdp->sd_args.ar_quota == GFS2_QUOTA_ACCOUNT) + fqs->qs_flags = (XFS_QUOTA_UDQ_ACCT | XFS_QUOTA_GDQ_ACCT); + if (sdp->sd_quota_inode) { + fqs->qs_uquota.qfs_ino = GFS2_I(sdp->sd_quota_inode)->i_no_addr; + fqs->qs_uquota.qfs_nblks = sdp->sd_quota_inode->i_blocks; + } + fqs->qs_uquota.qfs_nextents = 1; /* unsupported */ + fqs->qs_gquota = fqs->qs_uquota; /* its the same inode in both cases */ + fqs->qs_incoredqs = atomic_read(&qd_lru_count); + return 0; +} + const struct quotactl_ops gfs2_quotactl_ops = { .quota_sync = gfs2_quota_sync, + .get_xstate = gfs2_quota_get_xstate, }; From ea7623385930c63e2a35749cff9db72094cd06ad Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 15 Sep 2009 16:20:30 +0100 Subject: [PATCH 14/31] GFS2: Add proper error reporting to quota sync via sysfs For some reason, the errors were not making it to userspace. Signed-off-by: Steven Whitehouse --- fs/gfs2/sys.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index be1b8aca5906..c5dad1eb7b91 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -178,6 +178,7 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf, static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf, size_t len) { + int error; u32 id; if (!capable(CAP_SYS_ADMIN)) @@ -185,13 +186,14 @@ static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf, id = simple_strtoul(buf, NULL, 0); - gfs2_quota_refresh(sdp, 1, id); - return len; + error = gfs2_quota_refresh(sdp, 1, id); + return error ? error : len; } static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf, size_t len) { + int error; u32 id; if (!capable(CAP_SYS_ADMIN)) @@ -199,8 +201,8 @@ static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf, id = simple_strtoul(buf, NULL, 0); - gfs2_quota_refresh(sdp, 0, id); - return len; + error = gfs2_quota_refresh(sdp, 0, id); + return error ? error : len; } static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len) From 33a82529e7007ed7beceebc6b3f3cddadb5b67f0 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 15 Sep 2009 16:25:40 +0100 Subject: [PATCH 15/31] GFS2: Remove constant argument from qdsb_get() The "create" argument to qdsb_get() was only ever set to true, so this patch removes that argument. Signed-off-by: Steven Whitehouse --- fs/gfs2/quota.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index e7114be7b449..f790f5af74e3 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -462,12 +462,12 @@ static void qd_unlock(struct gfs2_quota_data *qd) qd_put(qd); } -static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id, int create, +static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id, struct gfs2_quota_data **qdp) { int error; - error = qd_get(sdp, user, id, create, qdp); + error = qd_get(sdp, user, id, CREATE, qdp); if (error) return error; @@ -509,20 +509,20 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return 0; - error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, CREATE, qd); + error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd); if (error) goto out; al->al_qd_num++; qd++; - error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, CREATE, qd); + error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd); if (error) goto out; al->al_qd_num++; qd++; if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) { - error = qdsb_get(sdp, QUOTA_USER, uid, CREATE, qd); + error = qdsb_get(sdp, QUOTA_USER, uid, qd); if (error) goto out; al->al_qd_num++; @@ -530,7 +530,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) } if (gid != NO_QUOTA_CHANGE && gid != ip->i_inode.i_gid) { - error = qdsb_get(sdp, QUOTA_GROUP, gid, CREATE, qd); + error = qdsb_get(sdp, QUOTA_GROUP, gid, qd); if (error) goto out; al->al_qd_num++; From 6a6ada81e4ffc222bf7e54ea7503c7cc98b4f0d8 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 15 Sep 2009 16:30:38 +0100 Subject: [PATCH 16/31] GFS2: Remove constant argument from qd_get() This function was only ever called with the "create" argument set to true, so we can remove it. Signed-off-by: Steven Whitehouse --- fs/gfs2/quota.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index f790f5af74e3..db124af8998e 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -165,7 +165,7 @@ fail: return error; } -static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create, +static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, struct gfs2_quota_data **qdp) { struct gfs2_quota_data *qd = NULL, *new_qd = NULL; @@ -203,7 +203,7 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create, spin_unlock(&qd_lru_lock); - if (qd || !create) { + if (qd) { if (new_qd) { gfs2_glock_put(new_qd->qd_gl); kmem_cache_free(gfs2_quotad_cachep, new_qd); @@ -467,7 +467,7 @@ static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id, { int error; - error = qd_get(sdp, user, id, CREATE, qdp); + error = qd_get(sdp, user, id, qdp); if (error) return error; @@ -1117,7 +1117,7 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id) struct gfs2_holder q_gh; int error; - error = qd_get(sdp, user, id, CREATE, &qd); + error = qd_get(sdp, user, id, &qd); if (error) return error; From 1e72c0f7c40e665d2ed40014750fdd2fa9968bcf Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 15 Sep 2009 20:42:56 +0100 Subject: [PATCH 17/31] GFS2: Clean up gfs2_adjust_quota() and do_glock() Both of these functions contained confusing and in one case duplicate code. This patch adds a new check in do_glock() so that we report -ENOENT if we are asked to sync a quota entry which doesn't exist. Due to the previous patch this is now reported correctly to userspace. Also there are a few new comments, and I hope that the code is easier to understand now. Signed-off-by: Steven Whitehouse --- fs/gfs2/quota.c | 82 ++++++++++++++++--------------------------------- 1 file changed, 26 insertions(+), 56 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index db124af8998e..33e369f108b3 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -15,7 +15,7 @@ * fuzziness in the current usage value of IDs that are being used on different * nodes in the cluster simultaneously. So, it is possible for a user on * multiple nodes to overrun their quota, but that overrun is controlable. - * Since quota tags are part of transactions, there is no need to a quota check + * Since quota tags are part of transactions, there is no need for a quota check * program to be run on node crashes or anything like that. * * There are couple of knobs that let the administrator manage the quota @@ -66,13 +66,6 @@ #define QUOTA_USER 1 #define QUOTA_GROUP 0 -struct gfs2_quota_host { - u64 qu_limit; - u64 qu_warn; - s64 qu_value; - u32 qu_ll_next; -}; - struct gfs2_quota_change_host { u64 qc_change; u32 qc_flags; /* GFS2_QCF_... */ @@ -618,33 +611,19 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) mutex_unlock(&sdp->sd_quota_mutex); } -static void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf) -{ - const struct gfs2_quota *str = buf; - - qu->qu_limit = be64_to_cpu(str->qu_limit); - qu->qu_warn = be64_to_cpu(str->qu_warn); - qu->qu_value = be64_to_cpu(str->qu_value); - qu->qu_ll_next = be32_to_cpu(str->qu_ll_next); -} - -static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf) -{ - struct gfs2_quota *str = buf; - - str->qu_limit = cpu_to_be64(qu->qu_limit); - str->qu_warn = cpu_to_be64(qu->qu_warn); - str->qu_value = cpu_to_be64(qu->qu_value); - str->qu_ll_next = cpu_to_be32(qu->qu_ll_next); - memset(&str->qu_reserved, 0, sizeof(str->qu_reserved)); -} - /** - * gfs2_adjust_quota + * gfs2_adjust_quota - adjust record of current block usage + * @ip: The quota inode + * @loc: Offset of the entry in the quota file + * @change: The amount of change to record + * @qd: The quota data * * This function was mostly borrowed from gfs2_block_truncate_page which was * in turn mostly borrowed from ext3 + * + * Returns: 0 or -ve on error */ + static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, s64 change, struct gfs2_quota_data *qd) { @@ -656,8 +635,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, struct buffer_head *bh; struct page *page; void *kaddr; - char *ptr; - struct gfs2_quota_host qp; + struct gfs2_quota *qp; s64 value; int err = -EIO; @@ -701,18 +679,13 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, gfs2_trans_add_bh(ip->i_gl, bh, 0); kaddr = kmap_atomic(page, KM_USER0); - ptr = kaddr + offset; - gfs2_quota_in(&qp, ptr); - qp.qu_value += change; - value = qp.qu_value; - gfs2_quota_out(&qp, ptr); + qp = kaddr + offset; + value = (s64)be64_to_cpu(qp->qu_value) + change; + qp->qu_value = cpu_to_be64(value); + qd->qd_qb.qb_value = qp->qu_value; flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); err = 0; - qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC); - qd->qd_qb.qb_value = cpu_to_be64(value); - ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_magic = cpu_to_be32(GFS2_MAGIC); - ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_value = cpu_to_be64(value); unlock: unlock_page(page); page_cache_release(page); @@ -741,8 +714,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL); for (qx = 0; qx < num_qd; qx++) { - error = gfs2_glock_nq_init(qda[qx]->qd_gl, - LM_ST_EXCLUSIVE, + error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, &ghs[qx]); if (error) goto out; @@ -797,8 +769,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) qd = qda[x]; offset = qd2offset(qd); error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, - (struct gfs2_quota_data *) - qd); + (struct gfs2_quota_data *)qd); if (error) goto out_end_trans; @@ -829,8 +800,7 @@ static int do_glock(struct gfs2_quota_data *qd, int force_refresh, struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); struct gfs2_holder i_gh; - struct gfs2_quota_host q; - char buf[sizeof(struct gfs2_quota)]; + struct gfs2_quota q; int error; struct gfs2_quota_lvb *qlvb; @@ -853,22 +823,23 @@ restart: if (error) goto fail; - memset(buf, 0, sizeof(struct gfs2_quota)); + memset(&q, 0, sizeof(struct gfs2_quota)); pos = qd2offset(qd); - error = gfs2_internal_read(ip, NULL, buf, &pos, - sizeof(struct gfs2_quota)); + error = gfs2_internal_read(ip, NULL, (char *)&q, &pos, sizeof(q)); if (error < 0) goto fail_gunlock; - + if ((error < sizeof(q)) && force_refresh) { + error = -ENOENT; + goto fail_gunlock; + } gfs2_glock_dq_uninit(&i_gh); - gfs2_quota_in(&q, buf); qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC); qlvb->__pad = 0; - qlvb->qb_limit = cpu_to_be64(q.qu_limit); - qlvb->qb_warn = cpu_to_be64(q.qu_warn); - qlvb->qb_value = cpu_to_be64(q.qu_value); + qlvb->qb_limit = q.qu_limit; + qlvb->qb_warn = q.qu_warn; + qlvb->qb_value = q.qu_value; qd->qd_qb = *qlvb; gfs2_glock_dq_uninit(q_gh); @@ -1126,7 +1097,6 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id) gfs2_glock_dq_uninit(&q_gh); qd_put(qd); - return error; } From 113d6b3c99bf30d8083068d00e3c7304d91d4845 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 28 Sep 2009 11:52:16 +0100 Subject: [PATCH 18/31] GFS2: Add get_xquota support This adds support for viewing the current GFS2 quota settings via the XFS quota API. The setting of quotas will be addressed in a later patch. Fields which are not supported here are left set to zero. Signed-off-by: Steven Whitehouse Reviewed-by: Bob Peterson --- fs/gfs2/quota.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 33e369f108b3..6c5d6aa7d532 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1367,8 +1367,51 @@ static int gfs2_quota_get_xstate(struct super_block *sb, return 0; } +static int gfs2_xquota_get(struct super_block *sb, int type, qid_t id, + struct fs_disk_quota *fdq) +{ + struct gfs2_sbd *sdp = sb->s_fs_info; + struct gfs2_quota_lvb *qlvb; + struct gfs2_quota_data *qd; + struct gfs2_holder q_gh; + int error; + + memset(fdq, 0, sizeof(struct fs_disk_quota)); + + if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) + return -ESRCH; /* Crazy XFS error code */ + + if (type == USRQUOTA) + type = QUOTA_USER; + else if (type == GRPQUOTA) + type = QUOTA_GROUP; + else + return -EINVAL; + + error = qd_get(sdp, type, id, &qd); + if (error) + return error; + error = do_glock(qd, FORCE, &q_gh); + if (error) + goto out; + + qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; + fdq->d_version = FS_DQUOT_VERSION; + fdq->d_flags = (type == QUOTA_USER) ? XFS_USER_QUOTA : XFS_GROUP_QUOTA; + fdq->d_id = id; + fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit); + fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn); + fdq->d_bcount = be64_to_cpu(qlvb->qb_value); + + gfs2_glock_dq_uninit(&q_gh); +out: + qd_put(qd); + return error; +} + const struct quotactl_ops gfs2_quotactl_ops = { .quota_sync = gfs2_quota_sync, .get_xstate = gfs2_quota_get_xstate, + .get_xquota = gfs2_xquota_get, }; From e285c100362762f7440643be637dd332460fdc75 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 23 Sep 2009 13:50:49 +0100 Subject: [PATCH 19/31] GFS2: Add set_xquota support This patch adds the ability to set GFS2 quota limit and warning levels via the XFS quota API. Signed-off-by: Steven Whitehouse --- fs/gfs2/quota.c | 198 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 172 insertions(+), 26 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 6c5d6aa7d532..e8db5346a942 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -615,8 +615,9 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) * gfs2_adjust_quota - adjust record of current block usage * @ip: The quota inode * @loc: Offset of the entry in the quota file - * @change: The amount of change to record + * @change: The amount of usage change to record * @qd: The quota data + * @fdq: The updated limits to record * * This function was mostly borrowed from gfs2_block_truncate_page which was * in turn mostly borrowed from ext3 @@ -625,19 +626,21 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) */ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, - s64 change, struct gfs2_quota_data *qd) + s64 change, struct gfs2_quota_data *qd, + struct fs_disk_quota *fdq) { struct inode *inode = &ip->i_inode; struct address_space *mapping = inode->i_mapping; unsigned long index = loc >> PAGE_CACHE_SHIFT; unsigned offset = loc & (PAGE_CACHE_SIZE - 1); unsigned blocksize, iblock, pos; - struct buffer_head *bh; + struct buffer_head *bh, *dibh; struct page *page; void *kaddr; struct gfs2_quota *qp; s64 value; int err = -EIO; + u64 size; if (gfs2_is_stuffed(ip)) gfs2_unstuff_dinode(ip, NULL); @@ -683,9 +686,34 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, value = (s64)be64_to_cpu(qp->qu_value) + change; qp->qu_value = cpu_to_be64(value); qd->qd_qb.qb_value = qp->qu_value; + if (fdq) { + if (fdq->d_fieldmask & FS_DQ_BSOFT) { + qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit); + qd->qd_qb.qb_warn = qp->qu_warn; + } + if (fdq->d_fieldmask & FS_DQ_BHARD) { + qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit); + qd->qd_qb.qb_limit = qp->qu_limit; + } + } flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); - err = 0; + + err = gfs2_meta_inode_buffer(ip, &dibh); + if (err) + goto unlock; + + size = loc + sizeof(struct gfs2_quota); + if (size > inode->i_size) { + ip->i_disksize = size; + i_size_write(inode, size); + } + inode->i_mtime = inode->i_atime = CURRENT_TIME; + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_dinode_out(ip, dibh->b_data); + brelse(dibh); + mark_inode_dirty(inode); + unlock: unlock_page(page); page_cache_release(page); @@ -713,6 +741,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) return -ENOMEM; sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL); + mutex_lock_nested(&ip->i_inode.i_mutex, I_MUTEX_QUOTA); for (qx = 0; qx < num_qd; qx++) { error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, &ghs[qx]); @@ -768,8 +797,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) for (x = 0; x < num_qd; x++) { qd = qda[x]; offset = qd2offset(qd); - error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, - (struct gfs2_quota_data *)qd); + error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, qd, NULL); if (error) goto out_end_trans; @@ -789,20 +817,44 @@ out_gunlock: out: while (qx--) gfs2_glock_dq_uninit(&ghs[qx]); + mutex_unlock(&ip->i_inode.i_mutex); kfree(ghs); gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl); return error; } +static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd) +{ + struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); + struct gfs2_quota q; + struct gfs2_quota_lvb *qlvb; + loff_t pos; + int error; + + memset(&q, 0, sizeof(struct gfs2_quota)); + pos = qd2offset(qd); + error = gfs2_internal_read(ip, NULL, (char *)&q, &pos, sizeof(q)); + if (error < 0) + return error; + + qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; + qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC); + qlvb->__pad = 0; + qlvb->qb_limit = q.qu_limit; + qlvb->qb_warn = q.qu_warn; + qlvb->qb_value = q.qu_value; + qd->qd_qb = *qlvb; + + return 0; +} + static int do_glock(struct gfs2_quota_data *qd, int force_refresh, struct gfs2_holder *q_gh) { struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); struct gfs2_holder i_gh; - struct gfs2_quota q; int error; - struct gfs2_quota_lvb *qlvb; restart: error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh); @@ -812,7 +864,6 @@ restart: qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { - loff_t pos; gfs2_glock_dq_uninit(q_gh); error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, q_gh); @@ -823,25 +874,11 @@ restart: if (error) goto fail; - memset(&q, 0, sizeof(struct gfs2_quota)); - pos = qd2offset(qd); - error = gfs2_internal_read(ip, NULL, (char *)&q, &pos, sizeof(q)); - if (error < 0) + error = update_qd(sdp, qd); + if (error) goto fail_gunlock; - if ((error < sizeof(q)) && force_refresh) { - error = -ENOENT; - goto fail_gunlock; - } + gfs2_glock_dq_uninit(&i_gh); - - qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; - qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC); - qlvb->__pad = 0; - qlvb->qb_limit = q.qu_limit; - qlvb->qb_warn = q.qu_warn; - qlvb->qb_value = q.qu_value; - qd->qd_qb = *qlvb; - gfs2_glock_dq_uninit(q_gh); force_refresh = 0; goto restart; @@ -1409,9 +1446,118 @@ out: return error; } +/* GFS2 only supports a subset of the XFS fields */ +#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD) + +static int gfs2_xquota_set(struct super_block *sb, int type, qid_t id, + struct fs_disk_quota *fdq) +{ + struct gfs2_sbd *sdp = sb->s_fs_info; + struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); + struct gfs2_quota_data *qd; + struct gfs2_holder q_gh, i_gh; + unsigned int data_blocks, ind_blocks; + unsigned int blocks = 0; + int alloc_required; + struct gfs2_alloc *al; + loff_t offset; + int error; + + if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) + return -ESRCH; /* Crazy XFS error code */ + + switch(type) { + case USRQUOTA: + type = QUOTA_USER; + if (fdq->d_flags != XFS_USER_QUOTA) + return -EINVAL; + break; + case GRPQUOTA: + type = QUOTA_GROUP; + if (fdq->d_flags != XFS_GROUP_QUOTA) + return -EINVAL; + break; + default: + return -EINVAL; + } + + if (fdq->d_fieldmask & ~GFS2_FIELDMASK) + return -EINVAL; + if (fdq->d_id != id) + return -EINVAL; + + error = qd_get(sdp, type, id, &qd); + if (error) + return error; + + mutex_lock(&ip->i_inode.i_mutex); + error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh); + if (error) + goto out_put; + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); + if (error) + goto out_q; + + /* Check for existing entry, if none then alloc new blocks */ + error = update_qd(sdp, qd); + if (error) + goto out_i; + + /* If nothing has changed, this is a no-op */ + if ((fdq->d_fieldmask & FS_DQ_BSOFT) && + (fdq->d_blk_softlimit == be64_to_cpu(qd->qd_qb.qb_warn))) + fdq->d_fieldmask ^= FS_DQ_BSOFT; + if ((fdq->d_fieldmask & FS_DQ_BHARD) && + (fdq->d_blk_hardlimit == be64_to_cpu(qd->qd_qb.qb_limit))) + fdq->d_fieldmask ^= FS_DQ_BHARD; + if (fdq->d_fieldmask == 0) + goto out_i; + + offset = qd2offset(qd); + error = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota), + &alloc_required); + if (error) + goto out_i; + if (alloc_required) { + al = gfs2_alloc_get(ip); + if (al == NULL) + goto out_i; + gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), + &data_blocks, &ind_blocks); + blocks = al->al_requested = 1 + data_blocks + ind_blocks; + error = gfs2_inplace_reserve(ip); + if (error) + goto out_alloc; + } + + error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0); + if (error) + goto out_release; + + /* Apply changes */ + error = gfs2_adjust_quota(ip, offset, 0, qd, fdq); + + gfs2_trans_end(sdp); +out_release: + if (alloc_required) { + gfs2_inplace_release(ip); +out_alloc: + gfs2_alloc_put(ip); + } +out_i: + gfs2_glock_dq_uninit(&i_gh); +out_q: + gfs2_glock_dq_uninit(&q_gh); +out_put: + mutex_unlock(&ip->i_inode.i_mutex); + qd_put(qd); + return error; +} + const struct quotactl_ops gfs2_quotactl_ops = { .quota_sync = gfs2_quota_sync, .get_xstate = gfs2_quota_get_xstate, .get_xquota = gfs2_xquota_get, + .set_xquota = gfs2_xquota_set, }; From 86e931a35e93d94e6e91b57cc76456e16d188ea9 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 28 Sep 2009 12:35:17 +0100 Subject: [PATCH 20/31] VFS: Export dquot_send_warning Sending a message to userspace in a generic format to warn of events (e.g. quota exceeded) in the quota subsystem is a generically useful feature. This patch makes some minor changes to the send_message function from dquot.c renaming it quota_send_message, moving it to quota.c and exporting it for use by filesystems which do not use the dquot code. Signed-off-by: Steven Whitehouse --- fs/quota/Kconfig | 2 +- fs/quota/dquot.c | 93 +++++-------------------------------------- fs/quota/quota.c | 93 +++++++++++++++++++++++++++++++++++++++++++ include/linux/quota.h | 11 +++++ 4 files changed, 114 insertions(+), 85 deletions(-) diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig index 8047e01ef46b..353e78a9ebee 100644 --- a/fs/quota/Kconfig +++ b/fs/quota/Kconfig @@ -17,7 +17,7 @@ config QUOTA config QUOTA_NETLINK_INTERFACE bool "Report quota messages through netlink interface" - depends on QUOTA && NET + depends on QUOTACTL && NET help If you say Y here, quota warnings (about exceeding softlimit, reaching hardlimit, etc.) will be reported through netlink interface. If unsure, diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 39b49c42a7ed..9b6ad908dcb2 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -77,10 +77,6 @@ #include #include #include /* for inode_lock, oddly enough.. */ -#ifdef CONFIG_QUOTA_NETLINK_INTERFACE -#include -#include -#endif #include @@ -1071,73 +1067,6 @@ static void print_warning(struct dquot *dquot, const int warntype) } #endif -#ifdef CONFIG_QUOTA_NETLINK_INTERFACE - -/* Netlink family structure for quota */ -static struct genl_family quota_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = "VFS_DQUOT", - .version = 1, - .maxattr = QUOTA_NL_A_MAX, -}; - -/* Send warning to userspace about user which exceeded quota */ -static void send_warning(const struct dquot *dquot, const char warntype) -{ - static atomic_t seq; - struct sk_buff *skb; - void *msg_head; - int ret; - int msg_size = 4 * nla_total_size(sizeof(u32)) + - 2 * nla_total_size(sizeof(u64)); - - /* We have to allocate using GFP_NOFS as we are called from a - * filesystem performing write and thus further recursion into - * the fs to free some data could cause deadlocks. */ - skb = genlmsg_new(msg_size, GFP_NOFS); - if (!skb) { - printk(KERN_ERR - "VFS: Not enough memory to send quota warning.\n"); - return; - } - msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq), - "a_genl_family, 0, QUOTA_NL_C_WARNING); - if (!msg_head) { - printk(KERN_ERR - "VFS: Cannot store netlink header in quota warning.\n"); - goto err_out; - } - ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, dquot->dq_type); - if (ret) - goto attr_err_out; - ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, dquot->dq_id); - if (ret) - goto attr_err_out; - ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype); - if (ret) - goto attr_err_out; - ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR, - MAJOR(dquot->dq_sb->s_dev)); - if (ret) - goto attr_err_out; - ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, - MINOR(dquot->dq_sb->s_dev)); - if (ret) - goto attr_err_out; - ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid()); - if (ret) - goto attr_err_out; - genlmsg_end(skb, msg_head); - - genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS); - return; -attr_err_out: - printk(KERN_ERR "VFS: Not enough space to compose quota message!\n"); -err_out: - kfree_skb(skb); -} -#endif /* * Write warnings to the console and send warning messages over netlink. * @@ -1145,18 +1074,20 @@ err_out: */ static void flush_warnings(struct dquot *const *dquots, char *warntype) { + struct dquot *dq; int i; - for (i = 0; i < MAXQUOTAS; i++) - if (dquots[i] && warntype[i] != QUOTA_NL_NOWARN && - !warning_issued(dquots[i], warntype[i])) { + for (i = 0; i < MAXQUOTAS; i++) { + dq = dquots[i]; + if (dq && warntype[i] != QUOTA_NL_NOWARN && + !warning_issued(dq, warntype[i])) { #ifdef CONFIG_PRINT_QUOTA_WARNING - print_warning(dquots[i], warntype[i]); -#endif -#ifdef CONFIG_QUOTA_NETLINK_INTERFACE - send_warning(dquots[i], warntype[i]); + print_warning(dq, warntype[i]); #endif + quota_send_warning(dq->dq_type, dq->dq_id, + dq->dq_sb->s_dev, warntype[i]); } + } } static int ignore_hardlimit(struct dquot *dquot) @@ -2607,12 +2538,6 @@ static int __init dquot_init(void) register_shrinker(&dqcache_shrinker); -#ifdef CONFIG_QUOTA_NETLINK_INTERFACE - if (genl_register_family("a_genl_family) != 0) - printk(KERN_ERR - "VFS: Failed to create quota netlink interface.\n"); -#endif - return 0; } module_init(dquot_init); diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 95c5b42384b2..ee91e2756950 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include /* Check validity of generic quotactl commands */ static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, @@ -525,3 +527,94 @@ asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special, return ret; } #endif + + +#ifdef CONFIG_QUOTA_NETLINK_INTERFACE + +/* Netlink family structure for quota */ +static struct genl_family quota_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = "VFS_DQUOT", + .version = 1, + .maxattr = QUOTA_NL_A_MAX, +}; + +/** + * quota_send_warning - Send warning to userspace about exceeded quota + * @type: The quota type: USRQQUOTA, GRPQUOTA,... + * @id: The user or group id of the quota that was exceeded + * @dev: The device on which the fs is mounted (sb->s_dev) + * @warntype: The type of the warning: QUOTA_NL_... + * + * This can be used by filesystems (including those which don't use + * dquot) to send a message to userspace relating to quota limits. + * + */ + +void quota_send_warning(short type, unsigned int id, dev_t dev, + const char warntype) +{ + static atomic_t seq; + struct sk_buff *skb; + void *msg_head; + int ret; + int msg_size = 4 * nla_total_size(sizeof(u32)) + + 2 * nla_total_size(sizeof(u64)); + + /* We have to allocate using GFP_NOFS as we are called from a + * filesystem performing write and thus further recursion into + * the fs to free some data could cause deadlocks. */ + skb = genlmsg_new(msg_size, GFP_NOFS); + if (!skb) { + printk(KERN_ERR + "VFS: Not enough memory to send quota warning.\n"); + return; + } + msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq), + "a_genl_family, 0, QUOTA_NL_C_WARNING); + if (!msg_head) { + printk(KERN_ERR + "VFS: Cannot store netlink header in quota warning.\n"); + goto err_out; + } + ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, type); + if (ret) + goto attr_err_out; + ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, id); + if (ret) + goto attr_err_out; + ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype); + if (ret) + goto attr_err_out; + ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR, MAJOR(dev)); + if (ret) + goto attr_err_out; + ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev)); + if (ret) + goto attr_err_out; + ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid()); + if (ret) + goto attr_err_out; + genlmsg_end(skb, msg_head); + + genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS); + return; +attr_err_out: + printk(KERN_ERR "VFS: Not enough space to compose quota message!\n"); +err_out: + kfree_skb(skb); +} +EXPORT_SYMBOL(quota_send_warning); + +static int __init quota_init(void) +{ + if (genl_register_family("a_genl_family) != 0) + printk(KERN_ERR + "VFS: Failed to create quota netlink interface.\n"); + return 0; +}; + +module_init(quota_init); +#endif + diff --git a/include/linux/quota.h b/include/linux/quota.h index 78c48895b12a..ce9a9b2e5cd4 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -376,6 +376,17 @@ static inline unsigned int dquot_generic_flag(unsigned int flags, int type) return flags >> _DQUOT_STATE_FLAGS; } +#ifdef CONFIG_QUOTA_NETLINK_INTERFACE +extern void quota_send_warning(short type, unsigned int id, dev_t dev, + const char warntype); +#else +static inline void quota_send_warning(short type, unsigned int id, dev_t dev, + const char warntype) +{ + return; +} +#endif /* CONFIG_QUOTA_NETLINK_INTERFACE */ + struct quota_info { unsigned int flags; /* Flags for diskquotas on this device */ struct mutex dqio_mutex; /* lock device while I/O in progress */ From 2ec4650526c5a94d96bb760001fe0685b15988de Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 28 Sep 2009 12:49:15 +0100 Subject: [PATCH 21/31] GFS2: Use dquot_send_warning() This adds support to GFS2 to send quota warnings via netlink. Also it removes a stray \r which was left over from when the code used to print warnings on the console. Signed-off-by: Steven Whitehouse --- fs/gfs2/quota.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index e8db5346a942..1d4fc0413a3f 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include "gfs2.h" @@ -1001,7 +1002,7 @@ static int print_message(struct gfs2_quota_data *qd, char *type) { struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; - printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\r\n", + printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\n", sdp->sd_fsname, type, (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group", qd->qd_id); @@ -1038,6 +1039,10 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { print_message(qd, "exceeded"); + quota_send_warning(test_bit(QDF_USER, &qd->qd_flags) ? + USRQUOTA : GRPQUOTA, qd->qd_id, + sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN); + error = -EDQUOT; break; } else if (be64_to_cpu(qd->qd_qb.qb_warn) && @@ -1045,6 +1050,9 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) time_after_eq(jiffies, qd->qd_last_warn + gfs2_tune_get(sdp, gt_quota_warn_period) * HZ)) { + quota_send_warning(test_bit(QDF_USER, &qd->qd_flags) ? + USRQUOTA : GRPQUOTA, qd->qd_id, + sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); error = print_message(qd, "warning"); qd->qd_last_warn = jiffies; } From 3d3c10f2ce80d2a19e5e02023c2b7ab7086c36d5 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 20 Oct 2009 02:39:44 -0500 Subject: [PATCH 22/31] GFS2: Improve statfs and quota usability GFS2 now has three new mount options, statfs_quantum, quota_quantum and statfs_percent. statfs_quantum and quota_quantum simply allow you to set the tunables of the same name. Setting setting statfs_quantum to 0 will also turn on the statfs_slow tunable. statfs_percent accepts an integer between 0 and 100. Numbers between 1 and 100 will cause GFS2 to do any early sync when the local number of blocks free changes by at least statfs_percent from the totoal number of blocks free. Setting statfs_percent to 0 disables this. Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 4 +++ fs/gfs2/ops_fstype.c | 14 +++++++-- fs/gfs2/quota.c | 21 ++++++++++++-- fs/gfs2/quota.h | 2 ++ fs/gfs2/super.c | 69 +++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 100 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 6edb423f90b3..c239b0f969c8 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -430,6 +430,9 @@ struct gfs2_args { unsigned int ar_discard:1; /* discard requests */ unsigned int ar_errors:2; /* errors=withdraw | panic */ int ar_commit; /* Commit interval */ + int ar_statfs_quantum; /* The fast statfs interval */ + int ar_quota_quantum; /* The quota interval */ + int ar_statfs_percent; /* The % change to force sync */ }; struct gfs2_tune { @@ -558,6 +561,7 @@ struct gfs2_sbd { spinlock_t sd_statfs_spin; struct gfs2_statfs_change_host sd_statfs_master; struct gfs2_statfs_change_host sd_statfs_local; + int sd_statfs_force_sync; /* Resource group stuff */ diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 36b11cba57e3..9744ee920473 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -63,13 +63,10 @@ static void gfs2_tune_init(struct gfs2_tune *gt) gt->gt_quota_warn_period = 10; gt->gt_quota_scale_num = 1; gt->gt_quota_scale_den = 1; - gt->gt_quota_quantum = 60; gt->gt_new_files_jdata = 0; gt->gt_max_readahead = 1 << 18; gt->gt_stall_secs = 600; gt->gt_complain_secs = 10; - gt->gt_statfs_quantum = 30; - gt->gt_statfs_slow = 0; } static struct gfs2_sbd *init_sbd(struct super_block *sb) @@ -1153,6 +1150,15 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; sdp->sd_tune.gt_log_flush_secs = sdp->sd_args.ar_commit; + sdp->sd_tune.gt_quota_quantum = sdp->sd_args.ar_quota_quantum; + if (sdp->sd_args.ar_statfs_quantum) { + sdp->sd_tune.gt_statfs_slow = 0; + sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum; + } + else { + sdp->sd_tune.gt_statfs_slow = 1; + sdp->sd_tune.gt_statfs_quantum = 30; + } error = init_names(sdp, silent); if (error) @@ -1308,6 +1314,8 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags, args.ar_quota = GFS2_QUOTA_DEFAULT; args.ar_data = GFS2_DATA_DEFAULT; args.ar_commit = 60; + args.ar_statfs_quantum = 30; + args.ar_quota_quantum = 60; args.ar_errors = GFS2_ERRORS_DEFAULT; error = gfs2_mount_args(&args, data); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 1d4fc0413a3f..e3bf6eab8750 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1344,6 +1344,14 @@ static void quotad_check_trunc_list(struct gfs2_sbd *sdp) } } +void gfs2_wake_up_statfs(struct gfs2_sbd *sdp) { + if (!sdp->sd_statfs_force_sync) { + sdp->sd_statfs_force_sync = 1; + wake_up(&sdp->sd_quota_wait); + } +} + + /** * gfs2_quotad - Write cached quota changes into the quota file * @sdp: Pointer to GFS2 superblock @@ -1363,8 +1371,15 @@ int gfs2_quotad(void *data) while (!kthread_should_stop()) { /* Update the master statfs file */ - quotad_check_timeo(sdp, "statfs", gfs2_statfs_sync, t, - &statfs_timeo, &tune->gt_statfs_quantum); + if (sdp->sd_statfs_force_sync) { + int error = gfs2_statfs_sync(sdp->sd_vfs, 0); + quotad_error(sdp, "statfs", error); + statfs_timeo = gfs2_tune_get(sdp, gt_statfs_quantum) * HZ; + } + else + quotad_check_timeo(sdp, "statfs", gfs2_statfs_sync, t, + &statfs_timeo, + &tune->gt_statfs_quantum); /* Update quota file */ quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t, @@ -1381,7 +1396,7 @@ int gfs2_quotad(void *data) spin_lock(&sdp->sd_trunc_lock); empty = list_empty(&sdp->sd_trunc_list); spin_unlock(&sdp->sd_trunc_lock); - if (empty) + if (empty && !sdp->sd_statfs_force_sync) t -= schedule_timeout(t); else t = 0; diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 025d15ba24c0..e271fa07ad02 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -32,6 +32,8 @@ extern int gfs2_quota_init(struct gfs2_sbd *sdp); extern void gfs2_quota_cleanup(struct gfs2_sbd *sdp); extern int gfs2_quotad(void *data); +extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp); + static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index e7b24d59d4ff..3fee2fd3ae43 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -70,6 +70,9 @@ enum { Opt_commit, Opt_err_withdraw, Opt_err_panic, + Opt_statfs_quantum, + Opt_statfs_percent, + Opt_quota_quantum, Opt_error, }; @@ -101,6 +104,9 @@ static const match_table_t tokens = { {Opt_commit, "commit=%d"}, {Opt_err_withdraw, "errors=withdraw"}, {Opt_err_panic, "errors=panic"}, + {Opt_statfs_quantum, "statfs_quantum=%d"}, + {Opt_statfs_percent, "statfs_percent=%d"}, + {Opt_quota_quantum, "quota_quantum=%d"}, {Opt_error, NULL} }; @@ -214,6 +220,28 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) return rv ? rv : -EINVAL; } break; + case Opt_statfs_quantum: + rv = match_int(&tmp[0], &args->ar_statfs_quantum); + if (rv || args->ar_statfs_quantum < 0) { + printk(KERN_WARNING "GFS2: statfs_quantum mount option requires a non-negative numeric argument\n"); + return rv ? rv : -EINVAL; + } + break; + case Opt_quota_quantum: + rv = match_int(&tmp[0], &args->ar_quota_quantum); + if (rv || args->ar_quota_quantum <= 0) { + printk(KERN_WARNING "GFS2: quota_quantum mount option requires a positive numeric argument\n"); + return rv ? rv : -EINVAL; + } + break; + case Opt_statfs_percent: + rv = match_int(&tmp[0], &args->ar_statfs_percent); + if (rv || args->ar_statfs_percent < 0 || + args->ar_statfs_percent > 100) { + printk(KERN_WARNING "statfs_percent mount option requires a numeric argument between 0 and 100\n"); + return rv ? rv : -EINVAL; + } + break; case Opt_err_withdraw: args->ar_errors = GFS2_ERRORS_WITHDRAW; break; @@ -442,7 +470,9 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, { struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; + struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; struct buffer_head *l_bh; + int percent, sync_percent; int error; error = gfs2_meta_inode_buffer(l_ip, &l_bh); @@ -456,9 +486,17 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, l_sc->sc_free += free; l_sc->sc_dinodes += dinodes; gfs2_statfs_change_out(l_sc, l_bh->b_data + sizeof(struct gfs2_dinode)); + if (m_sc->sc_free) + percent = (100 * l_sc->sc_free) / m_sc->sc_free; + else + percent = 100; spin_unlock(&sdp->sd_statfs_spin); brelse(l_bh); + sync_percent = sdp->sd_args.ar_statfs_percent; + if (sync_percent && (percent >= sync_percent || + percent <= -sync_percent)) + gfs2_wake_up_statfs(sdp); } void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, @@ -522,6 +560,7 @@ int gfs2_statfs_sync(struct super_block *sb, int type) goto out_bh2; update_statfs(sdp, m_bh, l_bh); + sdp->sd_statfs_force_sync = 0; gfs2_trans_end(sdp); @@ -1062,6 +1101,11 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) spin_lock(>->gt_spin); args.ar_commit = gt->gt_log_flush_secs; + args.ar_quota_quantum = gt->gt_quota_quantum; + if (gt->gt_statfs_slow) + args.ar_statfs_quantum = 0; + else + args.ar_statfs_quantum = gt->gt_statfs_quantum; spin_unlock(>->gt_spin); error = gfs2_mount_args(&args, data); if (error) @@ -1100,6 +1144,15 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) sb->s_flags &= ~MS_POSIXACL; spin_lock(>->gt_spin); gt->gt_log_flush_secs = args.ar_commit; + gt->gt_quota_quantum = args.ar_quota_quantum; + if (args.ar_statfs_quantum) { + gt->gt_statfs_slow = 0; + gt->gt_statfs_quantum = args.ar_statfs_quantum; + } + else { + gt->gt_statfs_slow = 1; + gt->gt_statfs_quantum = 30; + } spin_unlock(>->gt_spin); gfs2_online_uevent(sdp); @@ -1180,7 +1233,7 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) { struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; struct gfs2_args *args = &sdp->sd_args; - int lfsecs; + int val; if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) seq_printf(s, ",meta"); @@ -1241,9 +1294,17 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) } if (args->ar_discard) seq_printf(s, ",discard"); - lfsecs = sdp->sd_tune.gt_log_flush_secs; - if (lfsecs != 60) - seq_printf(s, ",commit=%d", lfsecs); + val = sdp->sd_tune.gt_log_flush_secs; + if (val != 60) + seq_printf(s, ",commit=%d", val); + val = sdp->sd_tune.gt_statfs_quantum; + if (val != 30) + seq_printf(s, ",statfs_quantum=%d", val); + val = sdp->sd_tune.gt_quota_quantum; + if (val != 60) + seq_printf(s, ",quota_quantum=%d", val); + if (args->ar_statfs_percent) + seq_printf(s, ",statfs_percent=%d", args->ar_statfs_percent); if (args->ar_errors != GFS2_ERRORS_DEFAULT) { const char *state; From c14f5735e724cb5338ca8298d42b1658008a10d7 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Mon, 26 Oct 2009 13:29:47 -0500 Subject: [PATCH 23/31] GFS2: remove division from new statfs code It's not necessary to do any 64bit division for the statfs sync code, so remove it. Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse --- fs/gfs2/super.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 3fee2fd3ae43..b1dcfab36465 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -472,7 +472,8 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; struct buffer_head *l_bh; - int percent, sync_percent; + s64 x, y; + int need_sync = 0; int error; error = gfs2_meta_inode_buffer(l_ip, &l_bh); @@ -486,16 +487,16 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, l_sc->sc_free += free; l_sc->sc_dinodes += dinodes; gfs2_statfs_change_out(l_sc, l_bh->b_data + sizeof(struct gfs2_dinode)); - if (m_sc->sc_free) - percent = (100 * l_sc->sc_free) / m_sc->sc_free; - else - percent = 100; + if (sdp->sd_args.ar_statfs_percent) { + x = 100 * l_sc->sc_free; + y = m_sc->sc_free * sdp->sd_args.ar_statfs_percent; + if (x >= y || x <= -y) + need_sync = 1; + } spin_unlock(&sdp->sd_statfs_spin); brelse(l_bh); - sync_percent = sdp->sd_args.ar_statfs_percent; - if (sync_percent && (percent >= sync_percent || - percent <= -sync_percent)) + if (need_sync) gfs2_wake_up_statfs(sdp); } From f25934c5f88655a8d5c3c40a540daed1f0e6dedc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Oct 2009 08:03:27 +0100 Subject: [PATCH 24/31] GFS2: add barrier/nobarrier mount options Currently gfs2 issues barrier unconditionally. There are various reasons to disable them, be that just for testing or for stupid devices flushing large battert backed caches. Add a nobarrier option that matches xfs and btrfs for this. Also add a symmetric barrier option to turn it back on at remount time. Signed-off-by: Christoph Hellwig Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 1 + fs/gfs2/ops_fstype.c | 2 ++ fs/gfs2/super.c | 14 ++++++++++++++ 3 files changed, 17 insertions(+) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index c239b0f969c8..4792200978c8 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -429,6 +429,7 @@ struct gfs2_args { unsigned int ar_meta:1; /* mount metafs */ unsigned int ar_discard:1; /* discard requests */ unsigned int ar_errors:2; /* errors=withdraw | panic */ + unsigned int ar_nobarrier:1; /* do not send barriers */ int ar_commit; /* Commit interval */ int ar_statfs_quantum; /* The fast statfs interval */ int ar_quota_quantum; /* The quota interval */ diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 9744ee920473..edfee24f3636 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1131,6 +1131,8 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent } if (sdp->sd_args.ar_posix_acl) sb->s_flags |= MS_POSIXACL; + if (sdp->sd_args.ar_nobarrier) + set_bit(SDF_NOBARRIERS, &sdp->sd_flags); sb->s_magic = GFS2_MAGIC; sb->s_op = &gfs2_super_ops; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index b1dcfab36465..5e4b31441a9e 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -73,6 +73,8 @@ enum { Opt_statfs_quantum, Opt_statfs_percent, Opt_quota_quantum, + Opt_barrier, + Opt_nobarrier, Opt_error, }; @@ -107,6 +109,8 @@ static const match_table_t tokens = { {Opt_statfs_quantum, "statfs_quantum=%d"}, {Opt_statfs_percent, "statfs_percent=%d"}, {Opt_quota_quantum, "quota_quantum=%d"}, + {Opt_barrier, "barrier"}, + {Opt_nobarrier, "nobarrier"}, {Opt_error, NULL} }; @@ -253,6 +257,12 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) } args->ar_errors = GFS2_ERRORS_PANIC; break; + case Opt_barrier: + args->ar_nobarrier = 0; + break; + case Opt_nobarrier: + args->ar_nobarrier = 1; + break; case Opt_error: default: printk(KERN_WARNING "GFS2: invalid mount option: %s\n", o); @@ -1143,6 +1153,10 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) sb->s_flags |= MS_POSIXACL; else sb->s_flags &= ~MS_POSIXACL; + if (sdp->sd_args.ar_nobarrier) + set_bit(SDF_NOBARRIERS, &sdp->sd_flags); + else + clear_bit(SDF_NOBARRIERS, &sdp->sd_flags); spin_lock(>->gt_spin); gt->gt_log_flush_secs = args.ar_commit; gt->gt_quota_quantum = args.ar_quota_quantum; From cdcfde62dac64c86ff34e483c595d568a252c433 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 30 Oct 2009 10:48:53 +0000 Subject: [PATCH 25/31] GFS2: Display nobarrier option in /proc/mounts Since the default is barriers on, this only displays the nobarrier option when that is active. Signed-off-by: Steven Whitehouse --- fs/gfs2/super.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 5e4b31441a9e..c282ad41f3d1 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1336,6 +1336,9 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) } seq_printf(s, ",errors=%s", state); } + if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) + seq_printf(s, ",nobarrier"); + return 0; } From 1579343a73e32b5886e186e8f3e4db85e420ed3f Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 6 Nov 2009 11:06:37 +0000 Subject: [PATCH 26/31] GFS2: Remove dirent_first() function This function only had one caller left, and that caller only called it for leaf blocks, hence one branch of the "if" was never taken. In addition the call to get_left had already verified the metadata type, so the function can be reduced to a single line of code in its caller. Signed-off-by: Steven Whitehouse --- fs/gfs2/dir.c | 34 +--------------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 297d7e5cebad..25fddc100f18 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -525,38 +525,6 @@ consist_inode: return ERR_PTR(-EIO); } - -/** - * dirent_first - Return the first dirent - * @dip: the directory - * @bh: The buffer - * @dent: Pointer to list of dirents - * - * return first dirent whether bh points to leaf or stuffed dinode - * - * Returns: IS_LEAF, IS_DINODE, or -errno - */ - -static int dirent_first(struct gfs2_inode *dip, struct buffer_head *bh, - struct gfs2_dirent **dent) -{ - struct gfs2_meta_header *h = (struct gfs2_meta_header *)bh->b_data; - - if (be32_to_cpu(h->mh_type) == GFS2_METATYPE_LF) { - if (gfs2_meta_check(GFS2_SB(&dip->i_inode), bh)) - return -EIO; - *dent = (struct gfs2_dirent *)(bh->b_data + - sizeof(struct gfs2_leaf)); - return IS_LEAF; - } else { - if (gfs2_metatype_check(GFS2_SB(&dip->i_inode), bh, GFS2_METATYPE_DI)) - return -EIO; - *dent = (struct gfs2_dirent *)(bh->b_data + - sizeof(struct gfs2_dinode)); - return IS_DINODE; - } -} - static int dirent_check_reclen(struct gfs2_inode *dip, const struct gfs2_dirent *d, const void *end_p) { @@ -1006,7 +974,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) divider = (start + half_len) << (32 - dip->i_depth); /* Copy the entries */ - dirent_first(dip, obh, &dent); + dent = (struct gfs2_dirent *)(obh->b_data + sizeof(struct gfs2_leaf)); do { next = dent; From 2c77634965ee28c8b4790ffb5e83dd5ff7ac8988 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 6 Nov 2009 11:10:51 +0000 Subject: [PATCH 27/31] GFS2: Locking order fix in gfs2_check_blk_state In some cases we already have the rindex lock when we enter this function. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 8f1cfb02a6cb..0608f490c295 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1710,11 +1710,16 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) { struct gfs2_rgrpd *rgd; struct gfs2_holder ri_gh, rgd_gh; + struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex); + int ri_locked = 0; int error; - error = gfs2_rindex_hold(sdp, &ri_gh); - if (error) - goto fail; + if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { + error = gfs2_rindex_hold(sdp, &ri_gh); + if (error) + goto fail; + ri_locked = 1; + } error = -EINVAL; rgd = gfs2_blk2rgrpd(sdp, no_addr); @@ -1730,7 +1735,8 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) gfs2_glock_dq_uninit(&rgd_gh); fail_rindex: - gfs2_glock_dq_uninit(&ri_gh); + if (ri_locked) + gfs2_glock_dq_uninit(&ri_gh); fail: return error; } From 0ab7d13fcbd7ce1658c563e345990ba453719deb Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 6 Nov 2009 16:20:51 +0000 Subject: [PATCH 28/31] GFS2: Tag all metadata with jid There are two spare field in the header common to all GFS2 metadata. One is just the right size to fit a journal id in it, and this patch updates the journal code so that each time a metadata block is modified, we tag it with the journal id of the node which is performing the modification. The reason for this is that it should make it much easier to debug issues which arise if we can tell which node was the last to modify a particular metadata block. Since the field is updated before the block is written into the journal, each journal should only contain metadata which is tagged with its own journal id. The one exception to this is the journal header block, which might have a different node's id in it, if that journal was recovered by another node in the cluster. Thus each journal will contain a record of which nodes recovered it, via the journal header. The other field in the metadata header could potentially be used to hold information about what kind of operation was performed, but for the time being we just zero it on each transaction so that if we use it for that in future, we'll know that the information (where it exists) is reliable. I did consider using the other field to hold the journal sequence number, however since in GFS2's journaling we write the modified data into the journal and not the original data, this gives no information as to what action caused the modification, so I think we can probably come up with a better use for those 64 bits in the future. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 2 -- fs/gfs2/log.c | 2 ++ fs/gfs2/lops.c | 4 ++++ fs/gfs2/recovery.c | 2 ++ include/linux/gfs2_ondisk.h | 6 +++++- 5 files changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 6380cd9314b0..26ba2a4c4a2d 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -947,9 +947,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); - str->di_header.__pad0 = 0; str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); - str->di_header.__pad1 = 0; str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); str->di_mode = cpu_to_be32(ip->i_inode.i_mode); diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 13c6237c5f67..4511b08fc451 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -596,7 +596,9 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) memset(lh, 0, sizeof(struct gfs2_log_header)); lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); + lh->lh_header.__pad0 = cpu_to_be64(0); lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); + lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++); lh->lh_flags = cpu_to_be32(flags); lh->lh_tail = cpu_to_be32(tail); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 9969ff062c5b..de97632ba32f 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -132,6 +132,7 @@ static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type) static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) { struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); + struct gfs2_meta_header *mh; struct gfs2_trans *tr; lock_buffer(bd->bd_bh); @@ -148,6 +149,9 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); gfs2_meta_check(sdp, bd->bd_bh); gfs2_pin(sdp, bd->bd_bh); + mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; + mh->__pad0 = cpu_to_be64(0); + mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); sdp->sd_log_num_buf++; list_add(&le->le_list, &sdp->sd_log_le_buf); tr->tr_num_buf_new++; diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 09fa31965576..4b9bece3d437 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -410,7 +410,9 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea memset(lh, 0, sizeof(struct gfs2_log_header)); lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); + lh->lh_header.__pad0 = cpu_to_be64(0); lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); + lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1); lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT); lh->lh_blkno = cpu_to_be32(lblock); diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index b80c88dedbbb..81f90a59cda6 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -81,7 +81,11 @@ struct gfs2_meta_header { __be32 mh_type; __be64 __pad0; /* Was generation number in gfs1 */ __be32 mh_format; - __be32 __pad1; /* Was incarnation number in gfs1 */ + /* This union is to keep userspace happy */ + union { + __be32 mh_jid; /* Was incarnation number in gfs1 */ + __be32 __pad1; + }; }; /* From 9ae3c6de6981a1e8765b5d029f94555fc0f0fea0 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 10 Nov 2009 12:54:56 -0600 Subject: [PATCH 29/31] GFS2: drop rindex glock to refresh rindex list When a gfs2 filesystem is grown, it needs to rebuild the rindex list to be able to use the new space. gfs2 does this when the rindex is marked not uptodate, which happens when the rindex glock is dropped. However, on a single node setup, there is never any reason to drop the rindex glock, so gfs2 never invalidates the the rindex. This patch makes gfs2 automatically drop the rindex glock after filesystem grows, so it can refresh the rindex list. Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse --- fs/gfs2/aops.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 694b5d48f036..dce062a0b02a 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -819,8 +819,10 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, mark_inode_dirty(inode); } - if (inode == sdp->sd_rindex) + if (inode == sdp->sd_rindex) { adjust_fs_space(inode); + ip->i_gh.gh_flags |= GL_NOCACHE; + } brelse(dibh); gfs2_trans_end(sdp); @@ -889,8 +891,10 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, mark_inode_dirty(inode); } - if (inode == sdp->sd_rindex) + if (inode == sdp->sd_rindex) { adjust_fs_space(inode); + ip->i_gh.gh_flags |= GL_NOCACHE; + } brelse(dibh); gfs2_trans_end(sdp); From c29cd9004e72acb5a6cb8caf08508f1c5edee686 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 18 Nov 2009 18:09:41 +0800 Subject: [PATCH 30/31] writeback: remove unused nonblocking and congestion checks (gfs2) No one is calling wb_writeback and write_cache_pages with wbc.nonblocking=1 any more. And lumpy pageout will want to do nonblocking writeback without the congestion wait. Signed-off-by: Wu Fengguang Signed-off-by: Steven Whitehouse --- fs/gfs2/aops.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index dce062a0b02a..7b8da9415267 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -269,7 +269,6 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping, pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; unsigned offset = i_size & (PAGE_CACHE_SIZE-1); unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); - struct backing_dev_info *bdi = mapping->backing_dev_info; int i; int ret; @@ -313,11 +312,6 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping, if (ret || (--(wbc->nr_to_write) <= 0)) ret = 1; - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - ret = 1; - } - } gfs2_trans_end(sdp); return ret; @@ -338,7 +332,6 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping, static int gfs2_write_cache_jdata(struct address_space *mapping, struct writeback_control *wbc) { - struct backing_dev_info *bdi = mapping->backing_dev_info; int ret = 0; int done = 0; struct pagevec pvec; @@ -348,11 +341,6 @@ static int gfs2_write_cache_jdata(struct address_space *mapping, int scanned = 0; int range_whole = 0; - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - return 0; - } - pagevec_init(&pvec, 0); if (wbc->range_cyclic) { index = mapping->writeback_index; /* Start from prev offset */ From 26bb7505cf7db3560286be9f6384b6d3911f78b5 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 27 Nov 2009 10:31:11 +0000 Subject: [PATCH 31/31] GFS2: Fix glock refcount issues This patch fixes some ref counting issues. Firstly by moving the point at which we drop the ref count after a dlm lock operation has completed we ensure that we never call gfs2_glock_hold() on a lock with a zero ref count. Secondly, by using atomic_dec_and_lock() in gfs2_glock_put() we ensure that at no time will a glock with zero ref count appear on the lru_list. That means that we can remove the check for this in our shrinker (which was racy). Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a3f90ad2af80..f455a03a09e2 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -241,15 +241,14 @@ int gfs2_glock_put(struct gfs2_glock *gl) int rv = 0; write_lock(gl_lock_addr(gl->gl_hash)); - if (atomic_dec_and_test(&gl->gl_ref)) { + if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) { hlist_del(&gl->gl_list); - write_unlock(gl_lock_addr(gl->gl_hash)); - spin_lock(&lru_lock); if (!list_empty(&gl->gl_lru)) { list_del_init(&gl->gl_lru); atomic_dec(&lru_count); } spin_unlock(&lru_lock); + write_unlock(gl_lock_addr(gl->gl_hash)); GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); glock_free(gl); rv = 1; @@ -513,7 +512,6 @@ retry: GLOCK_BUG_ON(gl, 1); } spin_unlock(&gl->gl_spin); - gfs2_glock_put(gl); return; } @@ -524,8 +522,6 @@ retry: if (glops->go_xmote_bh) { spin_unlock(&gl->gl_spin); rv = glops->go_xmote_bh(gl, gh); - if (rv == -EAGAIN) - return; spin_lock(&gl->gl_spin); if (rv) { do_error(gl, rv); @@ -540,7 +536,6 @@ out: clear_bit(GLF_LOCK, &gl->gl_flags); out_locked: spin_unlock(&gl->gl_spin); - gfs2_glock_put(gl); } static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, @@ -600,7 +595,6 @@ __acquires(&gl->gl_spin) if (!(ret & LM_OUT_ASYNC)) { finish_xmote(gl, ret); - gfs2_glock_hold(gl); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); } else { @@ -712,9 +706,12 @@ static void glock_work_func(struct work_struct *work) { unsigned long delay = 0; struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); + int drop_ref = 0; - if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) + if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { finish_xmote(gl, gl->gl_reply); + drop_ref = 1; + } down_read(&gfs2_umount_flush_sem); spin_lock(&gl->gl_spin); if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && @@ -732,6 +729,8 @@ static void glock_work_func(struct work_struct *work) if (!delay || queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) gfs2_glock_put(gl); + if (drop_ref) + gfs2_glock_put(gl); } /** @@ -1366,10 +1365,6 @@ static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) list_del_init(&gl->gl_lru); atomic_dec(&lru_count); - /* Check if glock is about to be freed */ - if (atomic_read(&gl->gl_ref) == 0) - continue; - /* Test for being demotable */ if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { gfs2_glock_hold(gl);