gfs2 fixes

- Revert a change to delete_work_func() that has gone wrong in commit
   c412a97cf6 ("gfs2: Use TRY lock in gfs2_inode_lookup for UNLINKED
   inodes").
 
 - Avoid dequeuing GL_ASYNC glock holders twice by first checking if the
   holder is still queued.
 
 - gfs2: Always check the inode size of inline inodes when reading in
   inodes to prevent corrupt filesystem images from causing weid errors.
 
 - Properly handle a race between gfs2_create_inode() and
   gfs2_inode_lookup() that causes insert_inode_locked4() to return
   -EBUSY.
 
 - Fix and clean up the interaction between gfs2_create_inode() and
   gfs2_evict_inode() by completely handling the inode deallocation and
  destruction in gfs2_evict_inode().
 
 - Remove support for glock holder auto-demotion as we have no current
   plans of using this feature again.
 
 - And a few more minor cleanups and clarifications.
 -----BEGIN PGP SIGNATURE-----
 
 iQJIBAABCAAyFiEEJZs3krPW0xkhLMTc1b+f6wMTZToFAmOcXbEUHGFncnVlbmJh
 QHJlZGhhdC5jb20ACgkQ1b+f6wMTZToVmA/5AQ8BkPBTmQmwpP1Nlox21Gf1Pf8e
 8Nne19X85ZEkSSRU+2xzF9TetRzBM/LrdV1x0hjzUCveNFsiKBGer/kObT3gh8ST
 HqXRkJz96lHvcQMbNH1JFgYwz9tdxgbCc3xVBAWKeXgy+hrQsiJAnYvlRJpc5T67
 +sGAPcCoVXxmkHhW0STLKFY2jNUem6hxox6wDpEK8JEcMAQJa9s9RCiPlWVKUV/p
 hD9T0Hh336sRIVOOPLqY71tA2cgy4/d95zVo61h5vGpAwVkGkFnHtyMUAbwfJncf
 KljV8y8lLxFoxOcwLJ0Z9bbjM2+fHzOCUiSt245lup3+diTdjr/WN0bn68/wRLfd
 ktylQZdvbPO3q44LeQDQIlPT1xH/Srdm9tZbSyn6p4aRc9s07nVdqBHZ9b4TkREo
 4ZdeSu/OG0+h/kIn9HCPfrmxUKN3a9RMI4cXesLu7WmuNZylpHynVrX78K8TAFfq
 yfTsqjCIe84xppW3Rg2vS3DfAuLwE+QzeYzd9vT1zAKn7krS/f5IXVawG5Tj0K6y
 83eeGuw1BeAH6jNO7ZhomC5Gea/PPn02RmFXhlG1uKMHBMYMI0MBcYmUbp9lweCG
 2jiT43D3fTLMreaTiZUsOC1Qn7HPEb2SKm9YFXM2e5cQh2iLfpg9q0aKRYSYmwbC
 u/JixreXHb+HfkE=
 =mwok
 -----END PGP SIGNATURE-----

Merge tag 'gfs2-v6.1-rc7-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2

Pull gfs2 updtaes from Andreas Gruenbacher:

 - Revert a change to delete_work_func() that has gone wrong in commit
   c412a97cf6 ("gfs2: Use TRY lock in gfs2_inode_lookup for UNLINKED
   inodes").

 - Avoid dequeuing GL_ASYNC glock holders twice by first checking if the
   holder is still queued.

 - gfs2: Always check the inode size of inline inodes when reading in
   inodes to prevent corrupt filesystem images from causing weid errors.

 - Properly handle a race between gfs2_create_inode() and
   gfs2_inode_lookup() that causes insert_inode_locked4() to return
   -EBUSY.

 - Fix and clean up the interaction between gfs2_create_inode() and
   gfs2_evict_inode() by completely handling the inode deallocation and
   destruction in gfs2_evict_inode().

 - Remove support for glock holder auto-demotion as we have no current
   plans of using this feature again.

 - And a few more minor cleanups and clarifications.

* tag 'gfs2-v6.1-rc7-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2:
  gfs2: Remove support for glock holder auto-demotion (2)
  gfs2: Remove support for glock holder auto-demotion
  gfs2: Minor gfs2_try_evict cleanup
  gfs2: Partially revert gfs2_inode_lookup change
  gfs2: Add gfs2_inode_lookup comment
  gfs2: Uninline and improve glock_{set,clear}_object
  gfs2: Simply dequeue iopen glock in gfs2_evict_inode
  gfs2: Clean up after gfs2_create_inode rework
  gfs2: Avoid dequeuing GL_ASYNC glock holders twice
  gfs2: Make gfs2_glock_hold return its glock argument
  gfs2: Always check inode size of inline inodes
  gfs2: Cosmetic gfs2_dinode_{in,out} cleanup
  gfs2: Handle -EBUSY result of insert_inode_locked4
  gfs2: Fix and clean up create / evict interaction
  gfs2: Clean up initialization of "ip" in gfs2_create_inode
  gfs2: Get rid of ghs[] in gfs2_create_inode
  gfs2: Add extra error check in alloc_dinode
This commit is contained in:
Linus Torvalds 2022-12-17 08:18:04 -06:00
commit 6830d50325
11 changed files with 234 additions and 333 deletions

View File

@ -427,8 +427,6 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
return error;
kaddr = kmap_atomic(page);
if (dsize > gfs2_max_stuffed_size(ip))
dsize = gfs2_max_stuffed_size(ip);
memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
kunmap_atomic(kaddr);

View File

@ -61,9 +61,6 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
void *kaddr = kmap(page);
u64 dsize = i_size_read(inode);
if (dsize > gfs2_max_stuffed_size(ip))
dsize = gfs2_max_stuffed_size(ip);
memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
kunmap(page);

View File

@ -1445,14 +1445,13 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
static void __flock_holder_uninit(struct file *file, struct gfs2_holder *fl_gh)
{
struct gfs2_glock *gl = fl_gh->gh_gl;
struct gfs2_glock *gl = gfs2_glock_hold(fl_gh->gh_gl);
/*
* Make sure gfs2_glock_put() won't sleep under the file->f_lock
* spinlock.
*/
gfs2_glock_hold(gl);
spin_lock(&file->f_lock);
gfs2_holder_uninit(fl_gh);
spin_unlock(&file->f_lock);

View File

@ -186,10 +186,11 @@ void gfs2_glock_free(struct gfs2_glock *gl)
*
*/
void gfs2_glock_hold(struct gfs2_glock *gl)
struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl)
{
GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
lockref_get(&gl->gl_lockref);
return gl;
}
/**
@ -205,12 +206,6 @@ static int demote_ok(const struct gfs2_glock *gl)
if (gl->gl_state == LM_ST_UNLOCKED)
return 0;
/*
* Note that demote_ok is used for the lru process of disposing of
* glocks. For this purpose, we don't care if the glock's holders
* have the HIF_MAY_DEMOTE flag set or not. If someone is using
* them, don't demote.
*/
if (!list_empty(&gl->gl_holders))
return 0;
if (glops->go_demote_ok)
@ -393,7 +388,7 @@ static void do_error(struct gfs2_glock *gl, const int ret)
struct gfs2_holder *gh, *tmp;
list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
if (!test_bit(HIF_WAIT, &gh->gh_iflags))
if (test_bit(HIF_HOLDER, &gh->gh_iflags))
continue;
if (ret & LM_OUT_ERROR)
gh->gh_error = -EIO;
@ -407,45 +402,6 @@ static void do_error(struct gfs2_glock *gl, const int ret)
}
}
/**
* demote_incompat_holders - demote incompatible demoteable holders
* @gl: the glock we want to promote
* @current_gh: the newly promoted holder
*
* We're passing the newly promoted holder in @current_gh, but actually, any of
* the strong holders would do.
*/
static void demote_incompat_holders(struct gfs2_glock *gl,
struct gfs2_holder *current_gh)
{
struct gfs2_holder *gh, *tmp;
/*
* Demote incompatible holders before we make ourselves eligible.
* (This holder may or may not allow auto-demoting, but we don't want
* to demote the new holder before it's even granted.)
*/
list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
/*
* Since holders are at the front of the list, we stop when we
* find the first non-holder.
*/
if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
return;
if (gh == current_gh)
continue;
if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags) &&
!may_grant(gl, current_gh, gh)) {
/*
* We should not recurse into do_promote because
* __gfs2_glock_dq only calls handle_callback,
* gfs2_glock_add_to_lru and __gfs2_glock_queue_work.
*/
__gfs2_glock_dq(gh);
}
}
}
/**
* find_first_holder - find the first "holder" gh
* @gl: the glock
@ -464,26 +420,6 @@ static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
return NULL;
}
/**
* find_first_strong_holder - find the first non-demoteable holder
* @gl: the glock
*
* Find the first holder that doesn't have the HIF_MAY_DEMOTE flag set.
*/
static inline struct gfs2_holder *
find_first_strong_holder(struct gfs2_glock *gl)
{
struct gfs2_holder *gh;
list_for_each_entry(gh, &gl->gl_holders, gh_list) {
if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
return NULL;
if (!test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags))
return gh;
}
return NULL;
}
/*
* gfs2_instantiate - Call the glops instantiate function
* @gh: The glock holder
@ -540,9 +476,8 @@ done:
static int do_promote(struct gfs2_glock *gl)
{
struct gfs2_holder *gh, *current_gh;
bool incompat_holders_demoted = false;
current_gh = find_first_strong_holder(gl);
current_gh = find_first_holder(gl);
list_for_each_entry(gh, &gl->gl_holders, gh_list) {
if (test_bit(HIF_HOLDER, &gh->gh_iflags))
continue;
@ -561,11 +496,8 @@ static int do_promote(struct gfs2_glock *gl)
set_bit(HIF_HOLDER, &gh->gh_iflags);
trace_gfs2_promote(gh);
gfs2_holder_wake(gh);
if (!incompat_holders_demoted) {
if (!current_gh)
current_gh = gh;
demote_incompat_holders(gl, current_gh);
incompat_holders_demoted = true;
}
}
return 0;
}
@ -927,6 +859,48 @@ out_unlock:
return;
}
/**
* glock_set_object - set the gl_object field of a glock
* @gl: the glock
* @object: the object
*/
void glock_set_object(struct gfs2_glock *gl, void *object)
{
void *prev_object;
spin_lock(&gl->gl_lockref.lock);
prev_object = gl->gl_object;
gl->gl_object = object;
spin_unlock(&gl->gl_lockref.lock);
if (gfs2_assert_warn(gl->gl_name.ln_sbd, prev_object == NULL)) {
pr_warn("glock=%u/%llx\n",
gl->gl_name.ln_type,
(unsigned long long)gl->gl_name.ln_number);
gfs2_dump_glock(NULL, gl, true);
}
}
/**
* glock_clear_object - clear the gl_object field of a glock
* @gl: the glock
*/
void glock_clear_object(struct gfs2_glock *gl, void *object)
{
void *prev_object;
spin_lock(&gl->gl_lockref.lock);
prev_object = gl->gl_object;
gl->gl_object = NULL;
spin_unlock(&gl->gl_lockref.lock);
if (gfs2_assert_warn(gl->gl_name.ln_sbd,
prev_object == object || prev_object == NULL)) {
pr_warn("glock=%u/%llx\n",
gl->gl_name.ln_type,
(unsigned long long)gl->gl_name.ln_number);
gfs2_dump_glock(NULL, gl, true);
}
}
void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation)
{
struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr;
@ -980,8 +954,6 @@ static bool gfs2_try_evict(struct gfs2_glock *gl)
ip = NULL;
spin_unlock(&gl->gl_lockref.lock);
if (ip) {
struct gfs2_glock *inode_gl = NULL;
gl->gl_no_formal_ino = ip->i_no_formal_ino;
set_bit(GIF_DEFERRED_DELETE, &ip->i_flags);
d_prune_aliases(&ip->i_inode);
@ -991,14 +963,14 @@ static bool gfs2_try_evict(struct gfs2_glock *gl)
spin_lock(&gl->gl_lockref.lock);
ip = gl->gl_object;
if (ip) {
inode_gl = ip->i_gl;
lockref_get(&inode_gl->gl_lockref);
clear_bit(GIF_DEFERRED_DELETE, &ip->i_flags);
if (!igrab(&ip->i_inode))
ip = NULL;
}
spin_unlock(&gl->gl_lockref.lock);
if (inode_gl) {
gfs2_glock_poke(inode_gl);
gfs2_glock_put(inode_gl);
if (ip) {
gfs2_glock_poke(ip->i_gl);
iput(&ip->i_inode);
}
evicted = !ip;
}
@ -1039,6 +1011,7 @@ static void delete_work_func(struct work_struct *work)
if (gfs2_queue_delete_work(gl, 5 * HZ))
return;
}
goto out;
}
inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino,
@ -1051,6 +1024,7 @@ static void delete_work_func(struct work_struct *work)
d_prune_aliases(inode);
iput(inode);
}
out:
gfs2_glock_put(gl);
}
@ -1256,13 +1230,12 @@ void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags,
struct gfs2_holder *gh, unsigned long ip)
{
INIT_LIST_HEAD(&gh->gh_list);
gh->gh_gl = gl;
gh->gh_gl = gfs2_glock_hold(gl);
gh->gh_ip = ip;
gh->gh_owner_pid = get_pid(task_pid(current));
gh->gh_state = state;
gh->gh_flags = flags;
gh->gh_iflags = 0;
gfs2_glock_hold(gl);
}
/**
@ -1496,7 +1469,7 @@ __acquires(&gl->gl_lockref.lock)
if (test_bit(GLF_LOCK, &gl->gl_flags)) {
struct gfs2_holder *current_gh;
current_gh = find_first_strong_holder(gl);
current_gh = find_first_holder(gl);
try_futile = !may_grant(gl, current_gh, gh);
}
if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
@ -1508,8 +1481,6 @@ __acquires(&gl->gl_lockref.lock)
continue;
if (gh->gh_gl->gl_ops->go_type == LM_TYPE_FLOCK)
continue;
if (test_bit(HIF_MAY_DEMOTE, &gh2->gh_iflags))
continue;
if (!pid_is_meaningful(gh2))
continue;
goto trap_recursive;
@ -1619,69 +1590,28 @@ static inline bool needs_demote(struct gfs2_glock *gl)
static void __gfs2_glock_dq(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
unsigned delay = 0;
int fast_path = 0;
/*
* This while loop is similar to function demote_incompat_holders:
* If the glock is due to be demoted (which may be from another node
* or even if this holder is GL_NOCACHE), the weak holders are
* demoted as well, allowing the glock to be demoted.
* This holder should not be cached, so mark it for demote.
* Note: this should be done before the check for needs_demote
* below.
*/
while (gh) {
/*
* If we're in the process of file system withdraw, we cannot
* just dequeue any glocks until our journal is recovered, lest
* we introduce file system corruption. We need two exceptions
* to this rule: We need to allow unlocking of nondisk glocks
* and the glock for our own journal that needs recovery.
*/
if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
glock_blocked_by_withdraw(gl) &&
gh->gh_gl != sdp->sd_jinode_gl) {
sdp->sd_glock_dqs_held++;
spin_unlock(&gl->gl_lockref.lock);
might_sleep();
wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
TASK_UNINTERRUPTIBLE);
spin_lock(&gl->gl_lockref.lock);
}
if (gh->gh_flags & GL_NOCACHE)
handle_callback(gl, LM_ST_UNLOCKED, 0, false);
/*
* This holder should not be cached, so mark it for demote.
* Note: this should be done before the check for needs_demote
* below.
*/
if (gh->gh_flags & GL_NOCACHE)
handle_callback(gl, LM_ST_UNLOCKED, 0, false);
list_del_init(&gh->gh_list);
clear_bit(HIF_HOLDER, &gh->gh_iflags);
trace_gfs2_glock_queue(gh, 0);
list_del_init(&gh->gh_list);
clear_bit(HIF_HOLDER, &gh->gh_iflags);
trace_gfs2_glock_queue(gh, 0);
/*
* If there hasn't been a demote request we are done.
* (Let the remaining holders, if any, keep holding it.)
*/
if (!needs_demote(gl)) {
if (list_empty(&gl->gl_holders))
fast_path = 1;
break;
}
/*
* If we have another strong holder (we cannot auto-demote)
* we are done. It keeps holding it until it is done.
*/
if (find_first_strong_holder(gl))
break;
/*
* If we have a weak holder at the head of the list, it
* (and all others like it) must be auto-demoted. If there
* are no more weak holders, we exit the while loop.
*/
gh = find_first_holder(gl);
/*
* If there hasn't been a demote request we are done.
* (Let the remaining holders, if any, keep holding it.)
*/
if (!needs_demote(gl)) {
if (list_empty(&gl->gl_holders))
fast_path = 1;
}
if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
@ -1705,8 +1635,17 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh)
void gfs2_glock_dq(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
spin_lock(&gl->gl_lockref.lock);
if (!gfs2_holder_queued(gh)) {
/*
* May have already been dequeued because the locking request
* was GL_ASYNC and it has failed in the meantime.
*/
goto out;
}
if (list_is_first(&gh->gh_list, &gl->gl_holders) &&
!test_bit(HIF_HOLDER, &gh->gh_iflags)) {
spin_unlock(&gl->gl_lockref.lock);
@ -1715,7 +1654,26 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
spin_lock(&gl->gl_lockref.lock);
}
/*
* If we're in the process of file system withdraw, we cannot just
* dequeue any glocks until our journal is recovered, lest we introduce
* file system corruption. We need two exceptions to this rule: We need
* to allow unlocking of nondisk glocks and the glock for our own
* journal that needs recovery.
*/
if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
glock_blocked_by_withdraw(gl) &&
gh->gh_gl != sdp->sd_jinode_gl) {
sdp->sd_glock_dqs_held++;
spin_unlock(&gl->gl_lockref.lock);
might_sleep();
wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
TASK_UNINTERRUPTIBLE);
spin_lock(&gl->gl_lockref.lock);
}
__gfs2_glock_dq(gh);
out:
spin_unlock(&gl->gl_lockref.lock);
}
@ -1888,33 +1846,6 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
delay = gl->gl_hold_time;
}
/*
* Note 1: We cannot call demote_incompat_holders from handle_callback
* or gfs2_set_demote due to recursion problems like: gfs2_glock_dq ->
* handle_callback -> demote_incompat_holders -> gfs2_glock_dq
* Plus, we only want to demote the holders if the request comes from
* a remote cluster node because local holder conflicts are resolved
* elsewhere.
*
* Note 2: if a remote node wants this glock in EX mode, lock_dlm will
* request that we set our state to UNLOCKED. Here we mock up a holder
* to make it look like someone wants the lock EX locally. Any SH
* and DF requests should be able to share the lock without demoting.
*
* Note 3: We only want to demote the demoteable holders when there
* are no more strong holders. The demoteable holders might as well
* keep the glock until the last strong holder is done with it.
*/
if (!find_first_strong_holder(gl)) {
struct gfs2_holder mock_gh = {
.gh_gl = gl,
.gh_state = (state == LM_ST_UNLOCKED) ?
LM_ST_EXCLUSIVE : state,
.gh_iflags = BIT(HIF_HOLDER)
};
demote_incompat_holders(gl, &mock_gh);
}
handle_callback(gl, state, delay, true);
__gfs2_glock_queue_work(gl, delay);
spin_unlock(&gl->gl_lockref.lock);
@ -2306,8 +2237,6 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
*p++ = 'H';
if (test_bit(HIF_WAIT, &iflags))
*p++ = 'W';
if (test_bit(HIF_MAY_DEMOTE, &iflags))
*p++ = 'D';
if (flags & GL_SKIP)
*p++ = 's';
*p = 0;

View File

@ -156,8 +156,6 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *
list_for_each_entry(gh, &gl->gl_holders, gh_list) {
if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
break;
if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags))
continue;
if (gh->gh_owner_pid == pid)
goto out;
}
@ -196,7 +194,7 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl)
extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
const struct gfs2_glock_operations *glops,
int create, struct gfs2_glock **glp);
extern void gfs2_glock_hold(struct gfs2_glock *gl);
extern struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl);
extern void gfs2_glock_put(struct gfs2_glock *gl);
extern void gfs2_glock_queue_put(struct gfs2_glock *gl);
@ -288,6 +286,9 @@ extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
extern void gfs2_register_debugfs(void);
extern void gfs2_unregister_debugfs(void);
extern void glock_set_object(struct gfs2_glock *gl, void *object);
extern void glock_clear_object(struct gfs2_glock *gl, void *object);
extern const struct lm_lockops gfs2_dlm_ops;
static inline void gfs2_holder_mark_uninitialized(struct gfs2_holder *gh)
@ -305,64 +306,6 @@ static inline bool gfs2_holder_queued(struct gfs2_holder *gh)
return !list_empty(&gh->gh_list);
}
/**
* glock_set_object - set the gl_object field of a glock
* @gl: the glock
* @object: the object
*/
static inline void glock_set_object(struct gfs2_glock *gl, void *object)
{
spin_lock(&gl->gl_lockref.lock);
if (gfs2_assert_warn(gl->gl_name.ln_sbd, gl->gl_object == NULL))
gfs2_dump_glock(NULL, gl, true);
gl->gl_object = object;
spin_unlock(&gl->gl_lockref.lock);
}
/**
* glock_clear_object - clear the gl_object field of a glock
* @gl: the glock
* @object: the object
*
* I'd love to similarly add this:
* else if (gfs2_assert_warn(gl->gl_sbd, gl->gl_object == object))
* gfs2_dump_glock(NULL, gl, true);
* Unfortunately, that's not possible because as soon as gfs2_delete_inode
* frees the block in the rgrp, another process can reassign it for an I_NEW
* inode in gfs2_create_inode because that calls new_inode, not gfs2_iget.
* That means gfs2_delete_inode may subsequently try to call this function
* for a glock that's already pointing to a brand new inode. If we clear the
* new inode's gl_object, we'll introduce metadata corruption. Function
* gfs2_delete_inode calls clear_inode which calls gfs2_clear_inode which also
* tries to clear gl_object, so it's more than just gfs2_delete_inode.
*
*/
static inline void glock_clear_object(struct gfs2_glock *gl, void *object)
{
spin_lock(&gl->gl_lockref.lock);
if (gl->gl_object == object)
gl->gl_object = NULL;
spin_unlock(&gl->gl_lockref.lock);
}
static inline void gfs2_holder_allow_demote(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
spin_lock(&gl->gl_lockref.lock);
set_bit(HIF_MAY_DEMOTE, &gh->gh_iflags);
spin_unlock(&gl->gl_lockref.lock);
}
static inline void gfs2_holder_disallow_demote(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
spin_lock(&gl->gl_lockref.lock);
clear_bit(HIF_MAY_DEMOTE, &gh->gh_iflags);
spin_unlock(&gl->gl_lockref.lock);
}
extern void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation);
extern bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation);

View File

@ -397,38 +397,39 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
struct timespec64 atime;
u16 height, depth;
umode_t mode = be32_to_cpu(str->di_mode);
bool is_new = ip->i_inode.i_state & I_NEW;
struct inode *inode = &ip->i_inode;
bool is_new = inode->i_state & I_NEW;
if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
goto corrupt;
if (unlikely(!is_new && inode_wrong_type(&ip->i_inode, mode)))
if (unlikely(!is_new && inode_wrong_type(inode, mode)))
goto corrupt;
ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
ip->i_inode.i_mode = mode;
inode->i_mode = mode;
if (is_new) {
ip->i_inode.i_rdev = 0;
inode->i_rdev = 0;
switch (mode & S_IFMT) {
case S_IFBLK:
case S_IFCHR:
ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
be32_to_cpu(str->di_minor));
inode->i_rdev = MKDEV(be32_to_cpu(str->di_major),
be32_to_cpu(str->di_minor));
break;
}
}
i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid));
i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid));
set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
i_uid_write(inode, be32_to_cpu(str->di_uid));
i_gid_write(inode, be32_to_cpu(str->di_gid));
set_nlink(inode, be32_to_cpu(str->di_nlink));
i_size_write(inode, be64_to_cpu(str->di_size));
gfs2_set_inode_blocks(inode, be64_to_cpu(str->di_blocks));
atime.tv_sec = be64_to_cpu(str->di_atime);
atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
if (timespec64_compare(&ip->i_inode.i_atime, &atime) < 0)
ip->i_inode.i_atime = atime;
ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
if (timespec64_compare(&inode->i_atime, &atime) < 0)
inode->i_atime = atime;
inode->i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
inode->i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
inode->i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
inode->i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
ip->i_goal = be64_to_cpu(str->di_goal_meta);
ip->i_generation = be64_to_cpu(str->di_generation);
@ -436,7 +437,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
ip->i_diskflags = be32_to_cpu(str->di_flags);
ip->i_eattr = be64_to_cpu(str->di_eattr);
/* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */
gfs2_set_inode_flags(&ip->i_inode);
gfs2_set_inode_flags(inode);
height = be16_to_cpu(str->di_height);
if (unlikely(height > GFS2_MAX_META_HEIGHT))
goto corrupt;
@ -448,8 +449,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
ip->i_depth = (u8)depth;
ip->i_entries = be32_to_cpu(str->di_entries);
if (S_ISREG(ip->i_inode.i_mode))
gfs2_set_aops(&ip->i_inode);
if (gfs2_is_stuffed(ip) && inode->i_size > gfs2_max_stuffed_size(ip))
goto corrupt;
if (S_ISREG(inode->i_mode))
gfs2_set_aops(inode);
return 0;
corrupt:

View File

@ -252,7 +252,6 @@ struct gfs2_lkstats {
enum {
/* States */
HIF_MAY_DEMOTE = 1,
HIF_HOLDER = 6, /* Set for gh that "holds" the glock */
HIF_WAIT = 10,
};

View File

@ -142,6 +142,11 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
if (unlikely(error))
goto fail;
/*
* The only caller that sets @blktype to GFS2_BLKST_UNLINKED is
* delete_work_func(). Make sure not to cancel the delete work
* from within itself here.
*/
if (blktype == GFS2_BLKST_UNLINKED)
extra_flags |= LM_FLAG_TRY;
else
@ -403,12 +408,17 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks)
goto out_ipreserv;
error = gfs2_alloc_blocks(ip, &ip->i_no_addr, dblocks, 1, &ip->i_generation);
if (error)
goto out_trans_end;
ip->i_no_formal_ino = ip->i_generation;
ip->i_inode.i_ino = ip->i_no_addr;
ip->i_goal = ip->i_no_addr;
if (*dblocks > 1)
ip->i_eattr = ip->i_no_addr + 1;
out_trans_end:
gfs2_trans_end(sdp);
out_ipreserv:
gfs2_inplace_release(ip);
out_quota:
@ -586,6 +596,12 @@ static int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
* @size: The initial size of the inode (ignored for directories)
* @excl: Force fail if inode exists
*
* FIXME: Change to allocate the disk blocks and write them out in the same
* transaction. That way, we can no longer end up in a situation in which an
* inode is allocated, the node crashes, and the block looks like a valid
* inode. (With atomic creates in place, we will also no longer need to zero
* the link count and dirty the inode here on failure.)
*
* Returns: 0 on success, or error code
*/
@ -596,12 +612,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
{
const struct qstr *name = &dentry->d_name;
struct posix_acl *default_acl, *acl;
struct gfs2_holder ghs[2];
struct gfs2_holder d_gh, gh;
struct inode *inode = NULL;
struct gfs2_inode *dip = GFS2_I(dir), *ip;
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_glock *io_gl;
int error, free_vfs_inode = 1;
int error;
u32 aflags = 0;
unsigned blocks = 1;
struct gfs2_diradd da = { .bh = NULL, .save_loc = 1, };
@ -617,10 +633,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (error)
goto fail;
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &d_gh);
if (error)
goto fail;
gfs2_holder_mark_uninitialized(ghs + 1);
gfs2_holder_mark_uninitialized(&gh);
error = create_ok(dip, name, mode);
if (error)
@ -642,7 +658,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
else
error = finish_no_open(file, NULL);
}
gfs2_glock_dq_uninit(ghs);
gfs2_glock_dq_uninit(&d_gh);
goto fail;
} else if (error != -ENOENT) {
goto fail_gunlock;
@ -656,12 +672,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
error = -ENOMEM;
if (!inode)
goto fail_gunlock;
ip = GFS2_I(inode);
error = posix_acl_create(dir, &mode, &default_acl, &acl);
if (error)
goto fail_gunlock;
ip = GFS2_I(inode);
error = gfs2_qa_get(ip);
if (error)
goto fail_free_acls;
@ -723,15 +739,19 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
goto fail_free_inode;
gfs2_cancel_delete_work(io_gl);
retry:
error = insert_inode_locked4(inode, ip->i_no_addr, iget_test, &ip->i_no_addr);
BUG_ON(error);
if (error == -EBUSY)
goto retry;
if (error)
goto fail_gunlock2;
error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT | GL_NOPID,
&ip->i_iopen_gh);
if (error)
goto fail_gunlock2;
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
if (error)
goto fail_gunlock3;
@ -739,10 +759,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (error)
goto fail_gunlock3;
if (blocks > 1) {
ip->i_eattr = ip->i_no_addr + 1;
if (blocks > 1)
gfs2_init_xattr(ip);
}
init_dinode(dip, ip, symname);
gfs2_trans_end(sdp);
@ -750,9 +768,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
glock_set_object(io_gl, ip);
gfs2_set_iop(inode);
free_vfs_inode = 0; /* After this point, the inode is no longer
considered free. Any failures need to undo
the gfs2 structures. */
if (default_acl) {
error = __gfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
if (error)
@ -785,9 +800,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
file->f_mode |= FMODE_CREATED;
error = finish_open(file, dentry, gfs2_open_common);
}
gfs2_glock_dq_uninit(ghs);
gfs2_glock_dq_uninit(&d_gh);
gfs2_qa_put(ip);
gfs2_glock_dq_uninit(ghs + 1);
gfs2_glock_dq_uninit(&gh);
gfs2_glock_put(io_gl);
gfs2_qa_put(dip);
unlock_new_inode(inode);
@ -801,10 +816,6 @@ fail_gunlock3:
fail_gunlock2:
gfs2_glock_put(io_gl);
fail_free_inode:
if (ip->i_gl) {
if (free_vfs_inode) /* else evict will do the put for us */
gfs2_glock_put(ip->i_gl);
}
gfs2_rs_deltree(&ip->i_res);
gfs2_qa_put(ip);
fail_free_acls:
@ -812,20 +823,19 @@ fail_free_acls:
posix_acl_release(acl);
fail_gunlock:
gfs2_dir_no_add(&da);
gfs2_glock_dq_uninit(ghs);
gfs2_glock_dq_uninit(&d_gh);
if (!IS_ERR_OR_NULL(inode)) {
set_bit(GIF_ALLOC_FAILED, &ip->i_flags);
clear_nlink(inode);
if (!free_vfs_inode)
if (ip->i_no_addr)
mark_inode_dirty(inode);
set_bit(free_vfs_inode ? GIF_FREE_VFS_INODE : GIF_ALLOC_FAILED,
&GFS2_I(inode)->i_flags);
if (inode->i_state & I_NEW)
iget_failed(inode);
else
iput(inode);
}
if (gfs2_holder_initialized(ghs + 1))
gfs2_glock_dq_uninit(ghs + 1);
if (gfs2_holder_initialized(&gh))
gfs2_glock_dq_uninit(&gh);
fail:
gfs2_qa_put(dip);
return error;

View File

@ -442,6 +442,12 @@ void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
struct buffer_head *bh;
int ty;
if (!ip->i_gl) {
/* This can only happen during incomplete inode creation. */
BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags));
return;
}
gfs2_ail1_wipe(sdp, bstart, blen);
while (blen) {
ty = REMOVE_META;

View File

@ -379,6 +379,7 @@ out:
void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
{
const struct inode *inode = &ip->i_inode;
struct gfs2_dinode *str = buf;
str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
@ -386,15 +387,15 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
str->di_uid = cpu_to_be32(i_uid_read(&ip->i_inode));
str->di_gid = cpu_to_be32(i_gid_read(&ip->i_inode));
str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
str->di_mode = cpu_to_be32(inode->i_mode);
str->di_uid = cpu_to_be32(i_uid_read(inode));
str->di_gid = cpu_to_be32(i_gid_read(inode));
str->di_nlink = cpu_to_be32(inode->i_nlink);
str->di_size = cpu_to_be64(i_size_read(inode));
str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(inode));
str->di_atime = cpu_to_be64(inode->i_atime.tv_sec);
str->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec);
str->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec);
str->di_goal_meta = cpu_to_be64(ip->i_goal);
str->di_goal_data = cpu_to_be64(ip->i_goal);
@ -402,16 +403,16 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
str->di_flags = cpu_to_be32(ip->i_diskflags);
str->di_height = cpu_to_be16(ip->i_height);
str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
str->di_payload_format = cpu_to_be32(S_ISDIR(inode->i_mode) &&
!(ip->i_diskflags & GFS2_DIF_EXHASH) ?
GFS2_FORMAT_DE : 0);
str->di_depth = cpu_to_be16(ip->i_depth);
str->di_entries = cpu_to_be32(ip->i_entries);
str->di_eattr = cpu_to_be64(ip->i_eattr);
str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
str->di_atime_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
str->di_mtime_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
str->di_ctime_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
}
/**
@ -475,6 +476,12 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
int need_endtrans = 0;
int ret;
if (unlikely(!ip->i_gl)) {
/* This can only happen during incomplete inode creation. */
BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags));
return;
}
if (unlikely(gfs2_withdrawn(sdp)))
return;
if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
@ -927,8 +934,7 @@ static int gfs2_drop_inode(struct inode *inode)
{
struct gfs2_inode *ip = GFS2_I(inode);
if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) &&
inode->i_nlink &&
if (inode->i_nlink &&
gfs2_holder_initialized(&ip->i_iopen_gh)) {
struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
if (test_bit(GLF_DEMOTE, &gl->gl_flags))
@ -1076,7 +1082,13 @@ static void gfs2_final_release_pages(struct gfs2_inode *ip)
struct inode *inode = &ip->i_inode;
struct gfs2_glock *gl = ip->i_gl;
truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0);
if (unlikely(!gl)) {
/* This can only happen during incomplete inode creation. */
BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags));
return;
}
truncate_inode_pages(gfs2_glock2aspace(gl), 0);
truncate_inode_pages(&inode->i_data, 0);
if (atomic_read(&gl->gl_revokes) == 0) {
@ -1218,10 +1230,8 @@ static enum dinode_demise evict_should_delete(struct inode *inode,
struct gfs2_sbd *sdp = sb->s_fs_info;
int ret;
if (test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) {
BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));
if (unlikely(test_bit(GIF_ALLOC_FAILED, &ip->i_flags)))
goto should_delete;
}
if (test_bit(GIF_DEFERRED_DELETE, &ip->i_flags))
return SHOULD_DEFER_EVICTION;
@ -1294,13 +1304,22 @@ static int evict_unlinked_inode(struct inode *inode)
goto out;
}
/* We're about to clear the bitmap for the dinode, but as soon as we
do, gfs2_create_inode can create another inode at the same block
location and try to set gl_object again. We clear gl_object here so
that subsequent inode creates don't see an old gl_object. */
glock_clear_object(ip->i_gl, ip);
if (ip->i_gl)
gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino);
/*
* As soon as we clear the bitmap for the dinode, gfs2_create_inode()
* can get called to recreate it, or even gfs2_inode_lookup() if the
* inode was recreated on another node in the meantime.
*
* However, inserting the new inode into the inode hash table will not
* succeed until the old inode is removed, and that only happens after
* ->evict_inode() returns. The new inode is attached to its inode and
* iopen glocks after inserting it into the inode hash table, so at
* that point we can be sure that both glocks are unused.
*/
ret = gfs2_dinode_dealloc(ip);
gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino);
out:
return ret;
}
@ -1367,12 +1386,7 @@ static void gfs2_evict_inode(struct inode *inode)
struct gfs2_holder gh;
int ret;
if (test_bit(GIF_FREE_VFS_INODE, &ip->i_flags)) {
clear_inode(inode);
return;
}
if (inode->i_nlink || sb_rdonly(sb))
if (inode->i_nlink || sb_rdonly(sb) || !ip->i_no_addr)
goto out;
gfs2_holder_mark_uninitialized(&gh);
@ -1405,12 +1419,9 @@ out:
struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
glock_clear_object(gl, ip);
if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq(&ip->i_iopen_gh);
}
gfs2_glock_hold(gl);
gfs2_holder_uninit(&ip->i_iopen_gh);
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
gfs2_glock_put_eventually(gl);
}
if (ip->i_gl) {
@ -1429,6 +1440,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
ip = alloc_inode_sb(sb, gfs2_inode_cachep, GFP_KERNEL);
if (!ip)
return NULL;
ip->i_no_addr = 0;
ip->i_flags = 0;
ip->i_gl = NULL;
gfs2_holder_mark_uninitialized(&ip->i_iopen_gh);

View File

@ -1412,11 +1412,13 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
ip->i_eattr = 0;
gfs2_add_inode_blocks(&ip->i_inode, -1);
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
gfs2_trans_add_meta(ip->i_gl, dibh);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) {
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
gfs2_trans_add_meta(ip->i_gl, dibh);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
}
}
gfs2_trans_end(sdp);
@ -1445,14 +1447,16 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
if (error)
return error;
error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
if (error)
goto out_quota;
if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) {
error = ea_dealloc_indirect(ip);
if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) {
error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
if (error)
goto out_quota;
if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) {
error = ea_dealloc_indirect(ip);
if (error)
goto out_quota;
}
}
error = ea_dealloc_block(ip);