quota: remove dqptr_sem

Remove dqptr_sem to make quota code scalable: Remove the dqptr_sem,
accessing inode->i_dquot now protected by dquot_srcu, and changing
inode->i_dquot is now serialized by dq_data_lock.

Signed-off-by: Lai Siyao <lai.siyao@intel.com>
Signed-off-by: Niu Yawei <yawei.niu@intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
This commit is contained in:
Niu Yawei 2014-06-04 12:23:19 +08:00 committed by Jan Kara
parent 9eb6463f31
commit b9ba6f94b2
3 changed files with 49 additions and 67 deletions

View file

@ -96,13 +96,16 @@
* Note that some things (eg. sb pointer, type, id) doesn't change during
* the life of the dquot structure and so needn't to be protected by a lock
*
* Any operation working on dquots via inode pointers must hold dqptr_sem. If
* operation is just reading pointers from inode (or not using them at all) the
* read lock is enough. If pointers are altered function must hold write lock.
* Operation accessing dquots via inode pointers are protected by dquot_srcu.
* Operation of reading pointer needs srcu_read_lock(&dquot_srcu), and
* synchronize_srcu(&dquot_srcu) is called after clearing pointers from
* inode and before dropping dquot references to avoid use of dquots after
* they are freed. dq_data_lock is used to serialize the pointer setting and
* clearing operations.
* Special care needs to be taken about S_NOQUOTA inode flag (marking that
* inode is a quota file). Functions adding pointers from inode to dquots have
* to check this flag under dqptr_sem and then (if S_NOQUOTA is not set) they
* have to do all pointer modifications before dropping dqptr_sem. This makes
* to check this flag under dq_data_lock and then (if S_NOQUOTA is not set) they
* have to do all pointer modifications before dropping dq_data_lock. This makes
* sure they cannot race with quotaon which first sets S_NOQUOTA flag and
* then drops all pointers to dquots from an inode.
*
@ -116,21 +119,15 @@
* spinlock to internal buffers before writing.
*
* Lock ordering (including related VFS locks) is the following:
* dqonoff_mutex > i_mutex > journal_lock > dqptr_sem > dquot->dq_lock >
* dqio_mutex
* dqonoff_mutex > i_mutex > journal_lock > dquot->dq_lock > dqio_mutex
* dqonoff_mutex > i_mutex comes from dquot_quota_sync, dquot_enable, etc.
* The lock ordering of dqptr_sem imposed by quota code is only dqonoff_sem >
* dqptr_sem. But filesystem has to count with the fact that functions such as
* dquot_alloc_space() acquire dqptr_sem and they usually have to be called
* from inside a transaction to keep filesystem consistency after a crash. Also
* filesystems usually want to do some IO on dquot from ->mark_dirty which is
* called with dqptr_sem held.
*/
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock);
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
EXPORT_SYMBOL(dq_data_lock);
DEFINE_STATIC_SRCU(dquot_srcu);
void __quota_error(struct super_block *sb, const char *func,
const char *fmt, ...)
@ -964,7 +961,6 @@ static void add_dquot_ref(struct super_block *sb, int type)
/*
* Remove references to dquots from inode and add dquot to list for freeing
* if we have the last reference to dquot
* We can't race with anybody because we hold dqptr_sem for writing...
*/
static void remove_inode_dquot_ref(struct inode *inode, int type,
struct list_head *tofree_head)
@ -1024,13 +1020,15 @@ static void remove_dquot_ref(struct super_block *sb, int type,
* We have to scan also I_NEW inodes because they can already
* have quota pointer initialized. Luckily, we need to touch
* only quota pointers and these have separate locking
* (dqptr_sem).
* (dq_data_lock).
*/
spin_lock(&dq_data_lock);
if (!IS_NOQUOTA(inode)) {
if (unlikely(inode_get_rsv_space(inode) > 0))
reserved = 1;
remove_inode_dquot_ref(inode, type, tofree_head);
}
spin_unlock(&dq_data_lock);
}
spin_unlock(&inode_sb_list_lock);
#ifdef CONFIG_QUOTA_DEBUG
@ -1048,9 +1046,8 @@ static void drop_dquot_ref(struct super_block *sb, int type)
LIST_HEAD(tofree_head);
if (sb->dq_op) {
down_write(&sb_dqopt(sb)->dqptr_sem);
remove_dquot_ref(sb, type, &tofree_head);
up_write(&sb_dqopt(sb)->dqptr_sem);
synchronize_srcu(&dquot_srcu);
put_dquot_list(&tofree_head);
}
}
@ -1381,9 +1378,6 @@ static int dquot_active(const struct inode *inode)
/*
* Initialize quota pointers in inode
*
* We do things in a bit complicated way but by that we avoid calling
* dqget() and thus filesystem callbacks under dqptr_sem.
*
* It is better to call this function outside of any transaction as it
* might need a lot of space in journal for dquot structure allocation.
*/
@ -1394,8 +1388,6 @@ static void __dquot_initialize(struct inode *inode, int type)
struct super_block *sb = inode->i_sb;
qsize_t rsv;
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
if (!dquot_active(inode))
return;
@ -1429,7 +1421,7 @@ static void __dquot_initialize(struct inode *inode, int type)
if (!init_needed)
return;
down_write(&sb_dqopt(sb)->dqptr_sem);
spin_lock(&dq_data_lock);
if (IS_NOQUOTA(inode))
goto out_err;
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@ -1449,15 +1441,12 @@ static void __dquot_initialize(struct inode *inode, int type)
* did a write before quota was turned on
*/
rsv = inode_get_rsv_space(inode);
if (unlikely(rsv)) {
spin_lock(&dq_data_lock);
if (unlikely(rsv))
dquot_resv_space(inode->i_dquot[cnt], rsv);
spin_unlock(&dq_data_lock);
}
}
}
out_err:
up_write(&sb_dqopt(sb)->dqptr_sem);
spin_unlock(&dq_data_lock);
/* Drop unused references */
dqput_all(got);
}
@ -1469,19 +1458,24 @@ void dquot_initialize(struct inode *inode)
EXPORT_SYMBOL(dquot_initialize);
/*
* Release all quotas referenced by inode
* Release all quotas referenced by inode.
*
* This function only be called on inode free or converting
* a file to quota file, no other users for the i_dquot in
* both cases, so we needn't call synchronize_srcu() after
* clearing i_dquot.
*/
static void __dquot_drop(struct inode *inode)
{
int cnt;
struct dquot *put[MAXQUOTAS];
down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
put[cnt] = inode->i_dquot[cnt];
inode->i_dquot[cnt] = NULL;
}
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
spin_unlock(&dq_data_lock);
dqput_all(put);
}
@ -1599,15 +1593,11 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
*/
int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
{
int cnt, ret = 0;
int cnt, ret = 0, index;
struct dquot_warn warn[MAXQUOTAS];
struct dquot **dquots = inode->i_dquot;
int reserve = flags & DQUOT_SPACE_RESERVE;
/*
* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex
*/
if (!dquot_active(inode)) {
inode_incr_space(inode, number, reserve);
goto out;
@ -1616,7 +1606,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
warn[cnt].w_type = QUOTA_NL_NOWARN;
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!dquots[cnt])
@ -1643,7 +1633,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
goto out_flush_warn;
mark_all_dquot_dirty(dquots);
out_flush_warn:
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
srcu_read_unlock(&dquot_srcu, index);
flush_warnings(warn);
out:
return ret;
@ -1655,17 +1645,16 @@ EXPORT_SYMBOL(__dquot_alloc_space);
*/
int dquot_alloc_inode(const struct inode *inode)
{
int cnt, ret = 0;
int cnt, ret = 0, index;
struct dquot_warn warn[MAXQUOTAS];
struct dquot * const *dquots = inode->i_dquot;
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
if (!dquot_active(inode))
return 0;
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
warn[cnt].w_type = QUOTA_NL_NOWARN;
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!dquots[cnt])
@ -1685,7 +1674,7 @@ int dquot_alloc_inode(const struct inode *inode)
spin_unlock(&dq_data_lock);
if (ret == 0)
mark_all_dquot_dirty(dquots);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
srcu_read_unlock(&dquot_srcu, index);
flush_warnings(warn);
return ret;
}
@ -1696,14 +1685,14 @@ EXPORT_SYMBOL(dquot_alloc_inode);
*/
int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
{
int cnt;
int cnt, index;
if (!dquot_active(inode)) {
inode_claim_rsv_space(inode, number);
return 0;
}
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock);
/* Claim reserved quotas to allocated quotas */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@ -1715,7 +1704,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
inode_claim_rsv_space(inode, number);
spin_unlock(&dq_data_lock);
mark_all_dquot_dirty(inode->i_dquot);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
srcu_read_unlock(&dquot_srcu, index);
return 0;
}
EXPORT_SYMBOL(dquot_claim_space_nodirty);
@ -1725,14 +1714,14 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty);
*/
void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
{
int cnt;
int cnt, index;
if (!dquot_active(inode)) {
inode_reclaim_rsv_space(inode, number);
return;
}
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock);
/* Claim reserved quotas to allocated quotas */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@ -1744,7 +1733,7 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
inode_reclaim_rsv_space(inode, number);
spin_unlock(&dq_data_lock);
mark_all_dquot_dirty(inode->i_dquot);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
srcu_read_unlock(&dquot_srcu, index);
return;
}
EXPORT_SYMBOL(dquot_reclaim_space_nodirty);
@ -1757,16 +1746,14 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
unsigned int cnt;
struct dquot_warn warn[MAXQUOTAS];
struct dquot **dquots = inode->i_dquot;
int reserve = flags & DQUOT_SPACE_RESERVE;
int reserve = flags & DQUOT_SPACE_RESERVE, index;
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
if (!dquot_active(inode)) {
inode_decr_space(inode, number, reserve);
return;
}
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
int wtype;
@ -1789,7 +1776,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
goto out_unlock;
mark_all_dquot_dirty(dquots);
out_unlock:
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
srcu_read_unlock(&dquot_srcu, index);
flush_warnings(warn);
}
EXPORT_SYMBOL(__dquot_free_space);
@ -1802,13 +1789,12 @@ void dquot_free_inode(const struct inode *inode)
unsigned int cnt;
struct dquot_warn warn[MAXQUOTAS];
struct dquot * const *dquots = inode->i_dquot;
int index;
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
if (!dquot_active(inode))
return;
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
int wtype;
@ -1823,7 +1809,7 @@ void dquot_free_inode(const struct inode *inode)
}
spin_unlock(&dq_data_lock);
mark_all_dquot_dirty(dquots);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
srcu_read_unlock(&dquot_srcu, index);
flush_warnings(warn);
}
EXPORT_SYMBOL(dquot_free_inode);
@ -1837,6 +1823,8 @@ EXPORT_SYMBOL(dquot_free_inode);
* This operation can block, but only after everything is updated
* A transaction must be started when entering this function.
*
* We are holding reference on transfer_from & transfer_to, no need to
* protect them by srcu_read_lock().
*/
int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
{
@ -1849,8 +1837,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
struct dquot_warn warn_from_inodes[MAXQUOTAS];
struct dquot_warn warn_from_space[MAXQUOTAS];
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
if (IS_NOQUOTA(inode))
return 0;
/* Initialize the arrays */
@ -1859,12 +1845,12 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
warn_from_inodes[cnt].w_type = QUOTA_NL_NOWARN;
warn_from_space[cnt].w_type = QUOTA_NL_NOWARN;
}
down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
spin_lock(&dq_data_lock);
if (IS_NOQUOTA(inode)) { /* File without quota accounting? */
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
spin_unlock(&dq_data_lock);
return 0;
}
spin_lock(&dq_data_lock);
cur_space = inode_get_bytes(inode);
rsv_space = inode_get_rsv_space(inode);
space = cur_space + rsv_space;
@ -1918,7 +1904,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
inode->i_dquot[cnt] = transfer_to[cnt];
}
spin_unlock(&dq_data_lock);
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
mark_all_dquot_dirty(transfer_from);
mark_all_dquot_dirty(transfer_to);
@ -1932,7 +1917,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
return 0;
over_quota:
spin_unlock(&dq_data_lock);
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
flush_warnings(warn_to);
return ret;
}

View file

@ -218,7 +218,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
mutex_init(&s->s_dquot.dqio_mutex);
mutex_init(&s->s_dquot.dqonoff_mutex);
init_rwsem(&s->s_dquot.dqptr_sem);
s->s_maxbytes = MAX_NON_LFS;
s->s_op = &default_op;
s->s_time_gran = 1000000000;

View file

@ -390,7 +390,6 @@ struct quota_info {
unsigned int flags; /* Flags for diskquotas on this device */
struct mutex dqio_mutex; /* lock device while I/O in progress */
struct mutex dqonoff_mutex; /* Serialize quotaon & quotaoff */
struct rw_semaphore dqptr_sem; /* serialize ops using quota_info struct, pointers from inode to dquots */
struct inode *files[MAXQUOTAS]; /* inodes of quotafiles */
struct mem_dqinfo info[MAXQUOTAS]; /* Information for each quota type */
const struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */