gfs2 fixes

- Revert a change to delete_work_func() that has gone wrong in commit c412a97cf6 ("gfs2: Use TRY lock in gfs2_inode_lookup for UNLINKED inodes"). - Avoid dequeuing GL_ASYNC glock holders twice by first checking if the holder is still queued. - gfs2: Always check the inode size of inline inodes when reading in inodes to prevent corrupt filesystem images from causing weid errors. - Properly handle a race between gfs2_create_inode() and gfs2_inode_lookup() that causes insert_inode_locked4() to return -EBUSY. - Fix and clean up the interaction between gfs2_create_inode() and gfs2_evict_inode() by completely handling the inode deallocation and destruction in gfs2_evict_inode(). - Remove support for glock holder auto-demotion as we have no current plans of using this feature again. - And a few more minor cleanups and clarifications. -----BEGIN PGP SIGNATURE----- iQJIBAABCAAyFiEEJZs3krPW0xkhLMTc1b+f6wMTZToFAmOcXbEUHGFncnVlbmJh QHJlZGhhdC5jb20ACgkQ1b+f6wMTZToVmA/5AQ8BkPBTmQmwpP1Nlox21Gf1Pf8e 8Nne19X85ZEkSSRU+2xzF9TetRzBM/LrdV1x0hjzUCveNFsiKBGer/kObT3gh8ST HqXRkJz96lHvcQMbNH1JFgYwz9tdxgbCc3xVBAWKeXgy+hrQsiJAnYvlRJpc5T67 +sGAPcCoVXxmkHhW0STLKFY2jNUem6hxox6wDpEK8JEcMAQJa9s9RCiPlWVKUV/p hD9T0Hh336sRIVOOPLqY71tA2cgy4/d95zVo61h5vGpAwVkGkFnHtyMUAbwfJncf KljV8y8lLxFoxOcwLJ0Z9bbjM2+fHzOCUiSt245lup3+diTdjr/WN0bn68/wRLfd ktylQZdvbPO3q44LeQDQIlPT1xH/Srdm9tZbSyn6p4aRc9s07nVdqBHZ9b4TkREo 4ZdeSu/OG0+h/kIn9HCPfrmxUKN3a9RMI4cXesLu7WmuNZylpHynVrX78K8TAFfq yfTsqjCIe84xppW3Rg2vS3DfAuLwE+QzeYzd9vT1zAKn7krS/f5IXVawG5Tj0K6y 83eeGuw1BeAH6jNO7ZhomC5Gea/PPn02RmFXhlG1uKMHBMYMI0MBcYmUbp9lweCG 2jiT43D3fTLMreaTiZUsOC1Qn7HPEb2SKm9YFXM2e5cQh2iLfpg9q0aKRYSYmwbC u/JixreXHb+HfkE= =mwok -----END PGP SIGNATURE----- Merge tag 'gfs2-v6.1-rc7-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2 Pull gfs2 updtaes from Andreas Gruenbacher: - Revert a change to delete_work_func() that has gone wrong in commit c412a97cf6 ("gfs2: Use TRY lock in gfs2_inode_lookup for UNLINKED inodes"). - Avoid dequeuing GL_ASYNC glock holders twice by first checking if the holder is still queued. - gfs2: Always check the inode size of inline inodes when reading in inodes to prevent corrupt filesystem images from causing weid errors. - Properly handle a race between gfs2_create_inode() and gfs2_inode_lookup() that causes insert_inode_locked4() to return -EBUSY. - Fix and clean up the interaction between gfs2_create_inode() and gfs2_evict_inode() by completely handling the inode deallocation and destruction in gfs2_evict_inode(). - Remove support for glock holder auto-demotion as we have no current plans of using this feature again. - And a few more minor cleanups and clarifications. * tag 'gfs2-v6.1-rc7-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2: gfs2: Remove support for glock holder auto-demotion (2) gfs2: Remove support for glock holder auto-demotion gfs2: Minor gfs2_try_evict cleanup gfs2: Partially revert gfs2_inode_lookup change gfs2: Add gfs2_inode_lookup comment gfs2: Uninline and improve glock_{set,clear}_object gfs2: Simply dequeue iopen glock in gfs2_evict_inode gfs2: Clean up after gfs2_create_inode rework gfs2: Avoid dequeuing GL_ASYNC glock holders twice gfs2: Make gfs2_glock_hold return its glock argument gfs2: Always check inode size of inline inodes gfs2: Cosmetic gfs2_dinode_{in,out} cleanup gfs2: Handle -EBUSY result of insert_inode_locked4 gfs2: Fix and clean up create / evict interaction gfs2: Clean up initialization of "ip" in gfs2_create_inode gfs2: Get rid of ghs[] in gfs2_create_inode gfs2: Add extra error check in alloc_dinode
2022-12-17 08:18:04 -06:00 · 2022-12-17 08:18:04 -06:00 · 6830d50325
parent 77856d911a 6b46a06100
commit 6830d50325
11 changed files with 234 additions and 333 deletions
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@ -427,8 +427,6 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
 		return error;

 	kaddr = kmap_atomic(page);
-	if (dsize > gfs2_max_stuffed_size(ip))
-		dsize = gfs2_max_stuffed_size(ip);
 	memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
 	memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
 	kunmap_atomic(kaddr);
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@ -61,9 +61,6 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
 		void *kaddr = kmap(page);
 		u64 dsize = i_size_read(inode);
 
-		if (dsize > gfs2_max_stuffed_size(ip))
-			dsize = gfs2_max_stuffed_size(ip);
-
 		memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
 		memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
 		kunmap(page);
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@ -1445,14 +1445,13 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)

 static void __flock_holder_uninit(struct file *file, struct gfs2_holder *fl_gh)
 {
-	struct gfs2_glock *gl = fl_gh->gh_gl;
+	struct gfs2_glock *gl = gfs2_glock_hold(fl_gh->gh_gl);

 	/*
 	 * Make sure gfs2_glock_put() won't sleep under the file->f_lock
 	 * spinlock.
 	 */

-	gfs2_glock_hold(gl);
 	spin_lock(&file->f_lock);
 	gfs2_holder_uninit(fl_gh);
 	spin_unlock(&file->f_lock);
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@ -186,10 +186,11 @@ void gfs2_glock_free(struct gfs2_glock *gl)
 *
 */

-void gfs2_glock_hold(struct gfs2_glock *gl)
+struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl)
 {
 	GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
 	lockref_get(&gl->gl_lockref);
+	return gl;
 }

 /**
@ -205,12 +206,6 @@ static int demote_ok(const struct gfs2_glock *gl)

 	if (gl->gl_state == LM_ST_UNLOCKED)
 		return 0;
-	/*
-	 * Note that demote_ok is used for the lru process of disposing of
-	 * glocks. For this purpose, we don't care if the glock's holders
-	 * have the HIF_MAY_DEMOTE flag set or not. If someone is using
-	 * them, don't demote.
-	 */
 	if (!list_empty(&gl->gl_holders))
 		return 0;
 	if (glops->go_demote_ok)
@ -393,7 +388,7 @@ static void do_error(struct gfs2_glock *gl, const int ret)
 	struct gfs2_holder *gh, *tmp;

 	list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
-		if (!test_bit(HIF_WAIT, &gh->gh_iflags))
+		if (test_bit(HIF_HOLDER, &gh->gh_iflags))
 			continue;
 		if (ret & LM_OUT_ERROR)
 			gh->gh_error = -EIO;
@ -407,45 +402,6 @@ static void do_error(struct gfs2_glock *gl, const int ret)
 	}
 }

-/**
- * demote_incompat_holders - demote incompatible demoteable holders
- * @gl: the glock we want to promote
- * @current_gh: the newly promoted holder
- *
- * We're passing the newly promoted holder in @current_gh, but actually, any of
- * the strong holders would do.
- */
-static void demote_incompat_holders(struct gfs2_glock *gl,
-				    struct gfs2_holder *current_gh)
-{
-	struct gfs2_holder *gh, *tmp;
-
-	/*
-	 * Demote incompatible holders before we make ourselves eligible.
-	 * (This holder may or may not allow auto-demoting, but we don't want
-	 * to demote the new holder before it's even granted.)
-	 */
-	list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
-		/*
-		 * Since holders are at the front of the list, we stop when we
-		 * find the first non-holder.
-		 */
-		if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
-			return;
-		if (gh == current_gh)
-			continue;
-		if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags) &&
-		    !may_grant(gl, current_gh, gh)) {
-			/*
-			 * We should not recurse into do_promote because
-			 * __gfs2_glock_dq only calls handle_callback,
-			 * gfs2_glock_add_to_lru and __gfs2_glock_queue_work.
-			 */
-			__gfs2_glock_dq(gh);
-		}
-	}
-}
-
 /**
 * find_first_holder - find the first "holder" gh
 * @gl: the glock
@ -464,26 +420,6 @@ static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
 	return NULL;
 }

-/**
- * find_first_strong_holder - find the first non-demoteable holder
- * @gl: the glock
- *
- * Find the first holder that doesn't have the HIF_MAY_DEMOTE flag set.
- */
-static inline struct gfs2_holder *
-find_first_strong_holder(struct gfs2_glock *gl)
-{
-	struct gfs2_holder *gh;
-
-	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
-		if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
-			return NULL;
-		if (!test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags))
-			return gh;
-	}
-	return NULL;
-}
-
 /*
 * gfs2_instantiate - Call the glops instantiate function
 * @gh: The glock holder
@ -540,9 +476,8 @@ done:
 static int do_promote(struct gfs2_glock *gl)
 {
 	struct gfs2_holder *gh, *current_gh;
-	bool incompat_holders_demoted = false;

-	current_gh = find_first_strong_holder(gl);
+	current_gh = find_first_holder(gl);
 	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
 		if (test_bit(HIF_HOLDER, &gh->gh_iflags))
 			continue;
@ -561,11 +496,8 @@ static int do_promote(struct gfs2_glock *gl)
 		set_bit(HIF_HOLDER, &gh->gh_iflags);
 		trace_gfs2_promote(gh);
 		gfs2_holder_wake(gh);
-		if (!incompat_holders_demoted) {
+		if (!current_gh)
 			current_gh = gh;
-			demote_incompat_holders(gl, current_gh);
-			incompat_holders_demoted = true;
-		}
 	}
 	return 0;
 }
@ -927,6 +859,48 @@ out_unlock:
 	return;
 }

+/**
+ * glock_set_object - set the gl_object field of a glock
+ * @gl: the glock
+ * @object: the object
+ */
+void glock_set_object(struct gfs2_glock *gl, void *object)
+{
+	void *prev_object;
+
+	spin_lock(&gl->gl_lockref.lock);
+	prev_object = gl->gl_object;
+	gl->gl_object = object;
+	spin_unlock(&gl->gl_lockref.lock);
+	if (gfs2_assert_warn(gl->gl_name.ln_sbd, prev_object == NULL)) {
+		pr_warn("glock=%u/%llx\n",
+			gl->gl_name.ln_type,
+			(unsigned long long)gl->gl_name.ln_number);
+		gfs2_dump_glock(NULL, gl, true);
+	}
+}
+
+/**
+ * glock_clear_object - clear the gl_object field of a glock
+ * @gl: the glock
+ */
+void glock_clear_object(struct gfs2_glock *gl, void *object)
+{
+	void *prev_object;
+
+	spin_lock(&gl->gl_lockref.lock);
+	prev_object = gl->gl_object;
+	gl->gl_object = NULL;
+	spin_unlock(&gl->gl_lockref.lock);
+	if (gfs2_assert_warn(gl->gl_name.ln_sbd,
+			     prev_object == object || prev_object == NULL)) {
+		pr_warn("glock=%u/%llx\n",
+			gl->gl_name.ln_type,
+			(unsigned long long)gl->gl_name.ln_number);
+		gfs2_dump_glock(NULL, gl, true);
+	}
+}
+
 void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation)
 {
 	struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr;
@ -980,8 +954,6 @@ static bool gfs2_try_evict(struct gfs2_glock *gl)
 		ip = NULL;
 	spin_unlock(&gl->gl_lockref.lock);
 	if (ip) {
-		struct gfs2_glock *inode_gl = NULL;
-
 		gl->gl_no_formal_ino = ip->i_no_formal_ino;
 		set_bit(GIF_DEFERRED_DELETE, &ip->i_flags);
 		d_prune_aliases(&ip->i_inode);
@ -991,14 +963,14 @@ static bool gfs2_try_evict(struct gfs2_glock *gl)
 		spin_lock(&gl->gl_lockref.lock);
 		ip = gl->gl_object;
 		if (ip) {
-			inode_gl = ip->i_gl;
-			lockref_get(&inode_gl->gl_lockref);
 			clear_bit(GIF_DEFERRED_DELETE, &ip->i_flags);
+			if (!igrab(&ip->i_inode))
+				ip = NULL;
 		}
 		spin_unlock(&gl->gl_lockref.lock);
-		if (inode_gl) {
-			gfs2_glock_poke(inode_gl);
-			gfs2_glock_put(inode_gl);
+		if (ip) {
+			gfs2_glock_poke(ip->i_gl);
+			iput(&ip->i_inode);
 		}
 		evicted = !ip;
 	}
@ -1039,6 +1011,7 @@ static void delete_work_func(struct work_struct *work)
 			if (gfs2_queue_delete_work(gl, 5 * HZ))
 				return;
 		}
+		goto out;
 	}

 	inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino,
@ -1051,6 +1024,7 @@ static void delete_work_func(struct work_struct *work)
 		d_prune_aliases(inode);
 		iput(inode);
 	}
+out:
 	gfs2_glock_put(gl);
 }

@ -1256,13 +1230,12 @@ void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags,
 			struct gfs2_holder *gh, unsigned long ip)
 {
 	INIT_LIST_HEAD(&gh->gh_list);
-	gh->gh_gl = gl;
+	gh->gh_gl = gfs2_glock_hold(gl);
 	gh->gh_ip = ip;
 	gh->gh_owner_pid = get_pid(task_pid(current));
 	gh->gh_state = state;
 	gh->gh_flags = flags;
 	gh->gh_iflags = 0;
-	gfs2_glock_hold(gl);
 }

 /**
@ -1496,7 +1469,7 @@ __acquires(&gl->gl_lockref.lock)
 		if (test_bit(GLF_LOCK, &gl->gl_flags)) {
 			struct gfs2_holder *current_gh;

-			current_gh = find_first_strong_holder(gl);
+			current_gh = find_first_holder(gl);
 			try_futile = !may_grant(gl, current_gh, gh);
 		}
 		if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
@ -1508,8 +1481,6 @@ __acquires(&gl->gl_lockref.lock)
 			continue;
 		if (gh->gh_gl->gl_ops->go_type == LM_TYPE_FLOCK)
 			continue;
-		if (test_bit(HIF_MAY_DEMOTE, &gh2->gh_iflags))
-			continue;
 		if (!pid_is_meaningful(gh2))
 			continue;
 		goto trap_recursive;
@ -1619,69 +1590,28 @@ static inline bool needs_demote(struct gfs2_glock *gl)
 static void __gfs2_glock_dq(struct gfs2_holder *gh)
 {
 	struct gfs2_glock *gl = gh->gh_gl;
-	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
 	unsigned delay = 0;
 	int fast_path = 0;

 	/*
-	 * This while loop is similar to function demote_incompat_holders:
-	 * If the glock is due to be demoted (which may be from another node
-	 * or even if this holder is GL_NOCACHE), the weak holders are
-	 * demoted as well, allowing the glock to be demoted.
+	 * This holder should not be cached, so mark it for demote.
+	 * Note: this should be done before the check for needs_demote
+	 * below.
 	 */
-	while (gh) {
-		/*
-		 * If we're in the process of file system withdraw, we cannot
-		 * just dequeue any glocks until our journal is recovered, lest
-		 * we introduce file system corruption. We need two exceptions
-		 * to this rule: We need to allow unlocking of nondisk glocks
-		 * and the glock for our own journal that needs recovery.
-		 */
-		if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
-		    glock_blocked_by_withdraw(gl) &&
-		    gh->gh_gl != sdp->sd_jinode_gl) {
-			sdp->sd_glock_dqs_held++;
-			spin_unlock(&gl->gl_lockref.lock);
-			might_sleep();
-			wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
-				    TASK_UNINTERRUPTIBLE);
-			spin_lock(&gl->gl_lockref.lock);
-		}
+	if (gh->gh_flags & GL_NOCACHE)
+		handle_callback(gl, LM_ST_UNLOCKED, 0, false);

-		/*
-		 * This holder should not be cached, so mark it for demote.
-		 * Note: this should be done before the check for needs_demote
-		 * below.
-		 */
-		if (gh->gh_flags & GL_NOCACHE)
-			handle_callback(gl, LM_ST_UNLOCKED, 0, false);
+	list_del_init(&gh->gh_list);
+	clear_bit(HIF_HOLDER, &gh->gh_iflags);
+	trace_gfs2_glock_queue(gh, 0);

-		list_del_init(&gh->gh_list);
-		clear_bit(HIF_HOLDER, &gh->gh_iflags);
-		trace_gfs2_glock_queue(gh, 0);
-
-		/*
-		 * If there hasn't been a demote request we are done.
-		 * (Let the remaining holders, if any, keep holding it.)
-		 */
-		if (!needs_demote(gl)) {
-			if (list_empty(&gl->gl_holders))
-				fast_path = 1;
-			break;
-		}
-		/*
-		 * If we have another strong holder (we cannot auto-demote)
-		 * we are done. It keeps holding it until it is done.
-		 */
-		if (find_first_strong_holder(gl))
-			break;
-
-		/*
-		 * If we have a weak holder at the head of the list, it
-		 * (and all others like it) must be auto-demoted. If there
-		 * are no more weak holders, we exit the while loop.
-		 */
-		gh = find_first_holder(gl);
+	/*
+	 * If there hasn't been a demote request we are done.
+	 * (Let the remaining holders, if any, keep holding it.)
+	 */
+	if (!needs_demote(gl)) {
+		if (list_empty(&gl->gl_holders))
+			fast_path = 1;
 	}

 	if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
@ -1705,8 +1635,17 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh)
 void gfs2_glock_dq(struct gfs2_holder *gh)
 {
 	struct gfs2_glock *gl = gh->gh_gl;
+	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;

 	spin_lock(&gl->gl_lockref.lock);
+	if (!gfs2_holder_queued(gh)) {
+		/*
+		 * May have already been dequeued because the locking request
+		 * was GL_ASYNC and it has failed in the meantime.
+		 */
+		goto out;
+	}
+
 	if (list_is_first(&gh->gh_list, &gl->gl_holders) &&
 	    !test_bit(HIF_HOLDER, &gh->gh_iflags)) {
 		spin_unlock(&gl->gl_lockref.lock);
@ -1715,7 +1654,26 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
 		spin_lock(&gl->gl_lockref.lock);
 	}

+	/*
+	 * If we're in the process of file system withdraw, we cannot just
+	 * dequeue any glocks until our journal is recovered, lest we introduce
+	 * file system corruption. We need two exceptions to this rule: We need
+	 * to allow unlocking of nondisk glocks and the glock for our own
+	 * journal that needs recovery.
+	 */
+	if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
+	    glock_blocked_by_withdraw(gl) &&
+	    gh->gh_gl != sdp->sd_jinode_gl) {
+		sdp->sd_glock_dqs_held++;
+		spin_unlock(&gl->gl_lockref.lock);
+		might_sleep();
+		wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
+			    TASK_UNINTERRUPTIBLE);
+		spin_lock(&gl->gl_lockref.lock);
+	}
+
 	__gfs2_glock_dq(gh);
+out:
 	spin_unlock(&gl->gl_lockref.lock);
 }

@ -1888,33 +1846,6 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
 		if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
 			delay = gl->gl_hold_time;
 	}
-	/*
-	 * Note 1: We cannot call demote_incompat_holders from handle_callback
-	 * or gfs2_set_demote due to recursion problems like: gfs2_glock_dq ->
-	 * handle_callback -> demote_incompat_holders -> gfs2_glock_dq
-	 * Plus, we only want to demote the holders if the request comes from
-	 * a remote cluster node because local holder conflicts are resolved
-	 * elsewhere.
-	 *
-	 * Note 2: if a remote node wants this glock in EX mode, lock_dlm will
-	 * request that we set our state to UNLOCKED. Here we mock up a holder
-	 * to make it look like someone wants the lock EX locally. Any SH
-	 * and DF requests should be able to share the lock without demoting.
-	 *
-	 * Note 3: We only want to demote the demoteable holders when there
-	 * are no more strong holders. The demoteable holders might as well
-	 * keep the glock until the last strong holder is done with it.
-	 */
-	if (!find_first_strong_holder(gl)) {
-		struct gfs2_holder mock_gh = {
-			.gh_gl = gl,
-			.gh_state = (state == LM_ST_UNLOCKED) ?
-				    LM_ST_EXCLUSIVE : state,
-			.gh_iflags = BIT(HIF_HOLDER)
-		};
-
-		demote_incompat_holders(gl, &mock_gh);
-	}
 	handle_callback(gl, state, delay, true);
 	__gfs2_glock_queue_work(gl, delay);
 	spin_unlock(&gl->gl_lockref.lock);
@ -2306,8 +2237,6 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
 		*p++ = 'H';
 	if (test_bit(HIF_WAIT, &iflags))
 		*p++ = 'W';
-	if (test_bit(HIF_MAY_DEMOTE, &iflags))
-		*p++ = 'D';
 	if (flags & GL_SKIP)
 		*p++ = 's';
 	*p = 0;
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@ -156,8 +156,6 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *
 	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
 		if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
 			break;
-		if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags))
-			continue;
 		if (gh->gh_owner_pid == pid)
 			goto out;
 	}
@ -196,7 +194,7 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl)
 extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 			  const struct gfs2_glock_operations *glops,
 			  int create, struct gfs2_glock **glp);
-extern void gfs2_glock_hold(struct gfs2_glock *gl);
+extern struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl);
 extern void gfs2_glock_put(struct gfs2_glock *gl);
 extern void gfs2_glock_queue_put(struct gfs2_glock *gl);

@ -288,6 +286,9 @@ extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
 extern void gfs2_register_debugfs(void);
 extern void gfs2_unregister_debugfs(void);

+extern void glock_set_object(struct gfs2_glock *gl, void *object);
+extern void glock_clear_object(struct gfs2_glock *gl, void *object);
+
 extern const struct lm_lockops gfs2_dlm_ops;

 static inline void gfs2_holder_mark_uninitialized(struct gfs2_holder *gh)
@ -305,64 +306,6 @@ static inline bool gfs2_holder_queued(struct gfs2_holder *gh)
 	return !list_empty(&gh->gh_list);
 }

-/**
- * glock_set_object - set the gl_object field of a glock
- * @gl: the glock
- * @object: the object
- */
-static inline void glock_set_object(struct gfs2_glock *gl, void *object)
-{
-	spin_lock(&gl->gl_lockref.lock);
-	if (gfs2_assert_warn(gl->gl_name.ln_sbd, gl->gl_object == NULL))
-		gfs2_dump_glock(NULL, gl, true);
-	gl->gl_object = object;
-	spin_unlock(&gl->gl_lockref.lock);
-}
-
-/**
- * glock_clear_object - clear the gl_object field of a glock
- * @gl: the glock
- * @object: the object
- *
- * I'd love to similarly add this:
- *	else if (gfs2_assert_warn(gl->gl_sbd, gl->gl_object == object))
- *		gfs2_dump_glock(NULL, gl, true);
- * Unfortunately, that's not possible because as soon as gfs2_delete_inode
- * frees the block in the rgrp, another process can reassign it for an I_NEW
- * inode in gfs2_create_inode because that calls new_inode, not gfs2_iget.
- * That means gfs2_delete_inode may subsequently try to call this function
- * for a glock that's already pointing to a brand new inode. If we clear the
- * new inode's gl_object, we'll introduce metadata corruption. Function
- * gfs2_delete_inode calls clear_inode which calls gfs2_clear_inode which also
- * tries to clear gl_object, so it's more than just gfs2_delete_inode.
- *
- */
-static inline void glock_clear_object(struct gfs2_glock *gl, void *object)
-{
-	spin_lock(&gl->gl_lockref.lock);
-	if (gl->gl_object == object)
-		gl->gl_object = NULL;
-	spin_unlock(&gl->gl_lockref.lock);
-}
-
-static inline void gfs2_holder_allow_demote(struct gfs2_holder *gh)
-{
-	struct gfs2_glock *gl = gh->gh_gl;
-
-	spin_lock(&gl->gl_lockref.lock);
-	set_bit(HIF_MAY_DEMOTE, &gh->gh_iflags);
-	spin_unlock(&gl->gl_lockref.lock);
-}
-
-static inline void gfs2_holder_disallow_demote(struct gfs2_holder *gh)
-{
-	struct gfs2_glock *gl = gh->gh_gl;
-
-	spin_lock(&gl->gl_lockref.lock);
-	clear_bit(HIF_MAY_DEMOTE, &gh->gh_iflags);
-	spin_unlock(&gl->gl_lockref.lock);
-}
-
 extern void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation);
 extern bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation);

--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@ -397,38 +397,39 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	struct timespec64 atime;
 	u16 height, depth;
 	umode_t mode = be32_to_cpu(str->di_mode);
-	bool is_new = ip->i_inode.i_state & I_NEW;
+	struct inode *inode = &ip->i_inode;
+	bool is_new = inode->i_state & I_NEW;

 	if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
 		goto corrupt;
-	if (unlikely(!is_new && inode_wrong_type(&ip->i_inode, mode)))
+	if (unlikely(!is_new && inode_wrong_type(inode, mode)))
 		goto corrupt;
 	ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
-	ip->i_inode.i_mode = mode;
+	inode->i_mode = mode;
 	if (is_new) {
-		ip->i_inode.i_rdev = 0;
+		inode->i_rdev = 0;
 		switch (mode & S_IFMT) {
 		case S_IFBLK:
 		case S_IFCHR:
-			ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
-						   be32_to_cpu(str->di_minor));
+			inode->i_rdev = MKDEV(be32_to_cpu(str->di_major),
+					      be32_to_cpu(str->di_minor));
 			break;
 		}
 	}

-	i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid));
-	i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid));
-	set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
-	i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
-	gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
+	i_uid_write(inode, be32_to_cpu(str->di_uid));
+	i_gid_write(inode, be32_to_cpu(str->di_gid));
+	set_nlink(inode, be32_to_cpu(str->di_nlink));
+	i_size_write(inode, be64_to_cpu(str->di_size));
+	gfs2_set_inode_blocks(inode, be64_to_cpu(str->di_blocks));
 	atime.tv_sec = be64_to_cpu(str->di_atime);
 	atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
-	if (timespec64_compare(&ip->i_inode.i_atime, &atime) < 0)
-		ip->i_inode.i_atime = atime;
-	ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
-	ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
-	ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
-	ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
+	if (timespec64_compare(&inode->i_atime, &atime) < 0)
+		inode->i_atime = atime;
+	inode->i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
+	inode->i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
+	inode->i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
+	inode->i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);

 	ip->i_goal = be64_to_cpu(str->di_goal_meta);
 	ip->i_generation = be64_to_cpu(str->di_generation);
@ -436,7 +437,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	ip->i_diskflags = be32_to_cpu(str->di_flags);
 	ip->i_eattr = be64_to_cpu(str->di_eattr);
 	/* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */
-	gfs2_set_inode_flags(&ip->i_inode);
+	gfs2_set_inode_flags(inode);
 	height = be16_to_cpu(str->di_height);
 	if (unlikely(height > GFS2_MAX_META_HEIGHT))
 		goto corrupt;
@ -448,8 +449,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	ip->i_depth = (u8)depth;
 	ip->i_entries = be32_to_cpu(str->di_entries);

-	if (S_ISREG(ip->i_inode.i_mode))
-		gfs2_set_aops(&ip->i_inode);
+	if (gfs2_is_stuffed(ip) && inode->i_size > gfs2_max_stuffed_size(ip))
+		goto corrupt;
+
+	if (S_ISREG(inode->i_mode))
+		gfs2_set_aops(inode);

 	return 0;
 corrupt:
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@ -252,7 +252,6 @@ struct gfs2_lkstats {

 enum {
 	/* States */
-	HIF_MAY_DEMOTE		= 1,
 	HIF_HOLDER		= 6,  /* Set for gh that "holds" the glock */
 	HIF_WAIT		= 10,
 };
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@ -142,6 +142,11 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
 		if (unlikely(error))
 			goto fail;

+		/*
+		 * The only caller that sets @blktype to GFS2_BLKST_UNLINKED is
+		 * delete_work_func().  Make sure not to cancel the delete work
+		 * from within itself here.
+		 */
 		if (blktype == GFS2_BLKST_UNLINKED)
 			extra_flags |= LM_FLAG_TRY;
 		else
@ -403,12 +408,17 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks)
 		goto out_ipreserv;

 	error = gfs2_alloc_blocks(ip, &ip->i_no_addr, dblocks, 1, &ip->i_generation);
+	if (error)
+		goto out_trans_end;
+
 	ip->i_no_formal_ino = ip->i_generation;
 	ip->i_inode.i_ino = ip->i_no_addr;
 	ip->i_goal = ip->i_no_addr;
+	if (*dblocks > 1)
+		ip->i_eattr = ip->i_no_addr + 1;

+out_trans_end:
 	gfs2_trans_end(sdp);
-
 out_ipreserv:
 	gfs2_inplace_release(ip);
 out_quota:
@ -586,6 +596,12 @@ static int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
 * @size: The initial size of the inode (ignored for directories)
 * @excl: Force fail if inode exists
 *
+ * FIXME: Change to allocate the disk blocks and write them out in the same
+ * transaction.  That way, we can no longer end up in a situation in which an
+ * inode is allocated, the node crashes, and the block looks like a valid
+ * inode.  (With atomic creates in place, we will also no longer need to zero
+ * the link count and dirty the inode here on failure.)
+ *
 * Returns: 0 on success, or error code
 */

@ -596,12 +612,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 {
 	const struct qstr *name = &dentry->d_name;
 	struct posix_acl *default_acl, *acl;
-	struct gfs2_holder ghs[2];
+	struct gfs2_holder d_gh, gh;
 	struct inode *inode = NULL;
 	struct gfs2_inode *dip = GFS2_I(dir), *ip;
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct gfs2_glock *io_gl;
-	int error, free_vfs_inode = 1;
+	int error;
 	u32 aflags = 0;
 	unsigned blocks = 1;
 	struct gfs2_diradd da = { .bh = NULL, .save_loc = 1, };
@ -617,10 +633,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	if (error)
 		goto fail;

-	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
+	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &d_gh);
 	if (error)
 		goto fail;
-	gfs2_holder_mark_uninitialized(ghs + 1);
+	gfs2_holder_mark_uninitialized(&gh);

 	error = create_ok(dip, name, mode);
 	if (error)
@ -642,7 +658,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 			else
 				error = finish_no_open(file, NULL);
 		}
-		gfs2_glock_dq_uninit(ghs);
+		gfs2_glock_dq_uninit(&d_gh);
 		goto fail;
 	} else if (error != -ENOENT) {
 		goto fail_gunlock;
@ -656,12 +672,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	error = -ENOMEM;
 	if (!inode)
 		goto fail_gunlock;
+	ip = GFS2_I(inode);

 	error = posix_acl_create(dir, &mode, &default_acl, &acl);
 	if (error)
 		goto fail_gunlock;

-	ip = GFS2_I(inode);
 	error = gfs2_qa_get(ip);
 	if (error)
 		goto fail_free_acls;
@ -723,15 +739,19 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 		goto fail_free_inode;
 	gfs2_cancel_delete_work(io_gl);

+retry:
 	error = insert_inode_locked4(inode, ip->i_no_addr, iget_test, &ip->i_no_addr);
-	BUG_ON(error);
+	if (error == -EBUSY)
+		goto retry;
+	if (error)
+		goto fail_gunlock2;

 	error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT | GL_NOPID,
 				   &ip->i_iopen_gh);
 	if (error)
 		goto fail_gunlock2;

-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
 	if (error)
 		goto fail_gunlock3;

@ -739,10 +759,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	if (error)
 		goto fail_gunlock3;

-	if (blocks > 1) {
-		ip->i_eattr = ip->i_no_addr + 1;
+	if (blocks > 1)
 		gfs2_init_xattr(ip);
-	}
 	init_dinode(dip, ip, symname);
 	gfs2_trans_end(sdp);

@ -750,9 +768,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	glock_set_object(io_gl, ip);
 	gfs2_set_iop(inode);

-	free_vfs_inode = 0; /* After this point, the inode is no longer
-			       considered free. Any failures need to undo
-			       the gfs2 structures. */
 	if (default_acl) {
 		error = __gfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
 		if (error)
@ -785,9 +800,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 		file->f_mode |= FMODE_CREATED;
 		error = finish_open(file, dentry, gfs2_open_common);
 	}
-	gfs2_glock_dq_uninit(ghs);
+	gfs2_glock_dq_uninit(&d_gh);
 	gfs2_qa_put(ip);
-	gfs2_glock_dq_uninit(ghs + 1);
+	gfs2_glock_dq_uninit(&gh);
 	gfs2_glock_put(io_gl);
 	gfs2_qa_put(dip);
 	unlock_new_inode(inode);
@ -801,10 +816,6 @@ fail_gunlock3:
 fail_gunlock2:
 	gfs2_glock_put(io_gl);
 fail_free_inode:
-	if (ip->i_gl) {
-		if (free_vfs_inode) /* else evict will do the put for us */
-			gfs2_glock_put(ip->i_gl);
-	}
 	gfs2_rs_deltree(&ip->i_res);
 	gfs2_qa_put(ip);
 fail_free_acls:
@ -812,20 +823,19 @@ fail_free_acls:
 	posix_acl_release(acl);
 fail_gunlock:
 	gfs2_dir_no_add(&da);
-	gfs2_glock_dq_uninit(ghs);
+	gfs2_glock_dq_uninit(&d_gh);
 	if (!IS_ERR_OR_NULL(inode)) {
+		set_bit(GIF_ALLOC_FAILED, &ip->i_flags);
 		clear_nlink(inode);
-		if (!free_vfs_inode)
+		if (ip->i_no_addr)
 			mark_inode_dirty(inode);
-		set_bit(free_vfs_inode ? GIF_FREE_VFS_INODE : GIF_ALLOC_FAILED,
-			&GFS2_I(inode)->i_flags);
 		if (inode->i_state & I_NEW)
 			iget_failed(inode);
 		else
 			iput(inode);
 	}
-	if (gfs2_holder_initialized(ghs + 1))
-		gfs2_glock_dq_uninit(ghs + 1);
+	if (gfs2_holder_initialized(&gh))
+		gfs2_glock_dq_uninit(&gh);
 fail:
 	gfs2_qa_put(dip);
 	return error;
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@ -442,6 +442,12 @@ void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
 	struct buffer_head *bh;
 	int ty;

+	if (!ip->i_gl) {
+		/* This can only happen during incomplete inode creation. */
+		BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags));
+		return;
+	}
+
 	gfs2_ail1_wipe(sdp, bstart, blen);
 	while (blen) {
 		ty = REMOVE_META;
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@ -379,6 +379,7 @@ out:

 void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 {
+	const struct inode *inode = &ip->i_inode;
 	struct gfs2_dinode *str = buf;

 	str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
@ -386,15 +387,15 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
 	str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
 	str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
-	str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
-	str->di_uid = cpu_to_be32(i_uid_read(&ip->i_inode));
-	str->di_gid = cpu_to_be32(i_gid_read(&ip->i_inode));
-	str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
-	str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
-	str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
-	str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
-	str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
-	str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
+	str->di_mode = cpu_to_be32(inode->i_mode);
+	str->di_uid = cpu_to_be32(i_uid_read(inode));
+	str->di_gid = cpu_to_be32(i_gid_read(inode));
+	str->di_nlink = cpu_to_be32(inode->i_nlink);
+	str->di_size = cpu_to_be64(i_size_read(inode));
+	str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(inode));
+	str->di_atime = cpu_to_be64(inode->i_atime.tv_sec);
+	str->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec);
+	str->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec);

 	str->di_goal_meta = cpu_to_be64(ip->i_goal);
 	str->di_goal_data = cpu_to_be64(ip->i_goal);
@ -402,16 +403,16 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)

 	str->di_flags = cpu_to_be32(ip->i_diskflags);
 	str->di_height = cpu_to_be16(ip->i_height);
-	str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
+	str->di_payload_format = cpu_to_be32(S_ISDIR(inode->i_mode) &&
 					     !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
 					     GFS2_FORMAT_DE : 0);
 	str->di_depth = cpu_to_be16(ip->i_depth);
 	str->di_entries = cpu_to_be32(ip->i_entries);

 	str->di_eattr = cpu_to_be64(ip->i_eattr);
-	str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
-	str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
-	str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
+	str->di_atime_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
+	str->di_mtime_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
+	str->di_ctime_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
 }

 /**
@ -475,6 +476,12 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
 	int need_endtrans = 0;
 	int ret;

+	if (unlikely(!ip->i_gl)) {
+		/* This can only happen during incomplete inode creation. */
+		BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags));
+		return;
+	}
+
 	if (unlikely(gfs2_withdrawn(sdp)))
 		return;
 	if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
@ -927,8 +934,7 @@ static int gfs2_drop_inode(struct inode *inode)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);

-	if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) &&
-	    inode->i_nlink &&
+	if (inode->i_nlink &&
 	    gfs2_holder_initialized(&ip->i_iopen_gh)) {
 		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
 		if (test_bit(GLF_DEMOTE, &gl->gl_flags))
@ -1076,7 +1082,13 @@ static void gfs2_final_release_pages(struct gfs2_inode *ip)
 	struct inode *inode = &ip->i_inode;
 	struct gfs2_glock *gl = ip->i_gl;

-	truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0);
+	if (unlikely(!gl)) {
+		/* This can only happen during incomplete inode creation. */
+		BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags));
+		return;
+	}
+
+	truncate_inode_pages(gfs2_glock2aspace(gl), 0);
 	truncate_inode_pages(&inode->i_data, 0);

 	if (atomic_read(&gl->gl_revokes) == 0) {
@ -1218,10 +1230,8 @@ static enum dinode_demise evict_should_delete(struct inode *inode,
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 	int ret;

-	if (test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) {
-		BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));
+	if (unlikely(test_bit(GIF_ALLOC_FAILED, &ip->i_flags)))
 		goto should_delete;
-	}

 	if (test_bit(GIF_DEFERRED_DELETE, &ip->i_flags))
 		return SHOULD_DEFER_EVICTION;
@ -1294,13 +1304,22 @@ static int evict_unlinked_inode(struct inode *inode)
 			goto out;
 	}

-	/* We're about to clear the bitmap for the dinode, but as soon as we
-	   do, gfs2_create_inode can create another inode at the same block
-	   location and try to set gl_object again. We clear gl_object here so
-	   that subsequent inode creates don't see an old gl_object. */
-	glock_clear_object(ip->i_gl, ip);
+	if (ip->i_gl)
+		gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino);
+
+	/*
+	 * As soon as we clear the bitmap for the dinode, gfs2_create_inode()
+	 * can get called to recreate it, or even gfs2_inode_lookup() if the
+	 * inode was recreated on another node in the meantime.
+	 *
+	 * However, inserting the new inode into the inode hash table will not
+	 * succeed until the old inode is removed, and that only happens after
+	 * ->evict_inode() returns.  The new inode is attached to its inode and
+	 *  iopen glocks after inserting it into the inode hash table, so at
+	 *  that point we can be sure that both glocks are unused.
+	 */
+
 	ret = gfs2_dinode_dealloc(ip);
-	gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino);
 out:
 	return ret;
 }
@ -1367,12 +1386,7 @@ static void gfs2_evict_inode(struct inode *inode)
 	struct gfs2_holder gh;
 	int ret;

-	if (test_bit(GIF_FREE_VFS_INODE, &ip->i_flags)) {
-		clear_inode(inode);
-		return;
-	}
-
-	if (inode->i_nlink || sb_rdonly(sb))
+	if (inode->i_nlink || sb_rdonly(sb) || !ip->i_no_addr)
 		goto out;

 	gfs2_holder_mark_uninitialized(&gh);
@ -1405,12 +1419,9 @@ out:
 		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;

 		glock_clear_object(gl, ip);
-		if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
-			ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
-			gfs2_glock_dq(&ip->i_iopen_gh);
-		}
 		gfs2_glock_hold(gl);
-		gfs2_holder_uninit(&ip->i_iopen_gh);
+		ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
+		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
 		gfs2_glock_put_eventually(gl);
 	}
 	if (ip->i_gl) {
@ -1429,6 +1440,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
 	ip = alloc_inode_sb(sb, gfs2_inode_cachep, GFP_KERNEL);
 	if (!ip)
 		return NULL;
+	ip->i_no_addr = 0;
 	ip->i_flags = 0;
 	ip->i_gl = NULL;
 	gfs2_holder_mark_uninitialized(&ip->i_iopen_gh);
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@ -1412,11 +1412,13 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
 	ip->i_eattr = 0;
 	gfs2_add_inode_blocks(&ip->i_inode, -1);

-	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (!error) {
-		gfs2_trans_add_meta(ip->i_gl, dibh);
-		gfs2_dinode_out(ip, dibh->b_data);
-		brelse(dibh);
+	if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) {
+		error = gfs2_meta_inode_buffer(ip, &dibh);
+		if (!error) {
+			gfs2_trans_add_meta(ip->i_gl, dibh);
+			gfs2_dinode_out(ip, dibh->b_data);
+			brelse(dibh);
+		}
 	}

 	gfs2_trans_end(sdp);
@ -1445,14 +1447,16 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
 	if (error)
 		return error;

-	error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
-	if (error)
-		goto out_quota;
-
-	if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) {
-		error = ea_dealloc_indirect(ip);
+	if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) {
+		error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
 		if (error)
 			goto out_quota;
+
+		if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) {
+			error = ea_dealloc_indirect(ip);
+			if (error)
+				goto out_quota;
+		}
 	}

 	error = ea_dealloc_block(ip);