dlm for 6.9

- Fix mistaken variable assignment that caused a refcounting problem.
 - Revert a recent change that began using atomic counters where they
   were not needed (for lkb wait_count.)
 - Add comments around forced state reset for waiting lock operations
   during recovery.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEcGkeEvkvjdvlR90nOBtzx/yAaaoFAmX4raoACgkQOBtzx/yA
 aapyThAAtLcTZXOa9MuZDvLtaQKX4c2MDlqiAhdL0YOYnz3+DAveA8HF1FRbVwL0
 74lA1O/GX0t2TdCrLiq75u+N/Sm2ACtbZEr8z6VeEoxxtOwCVbGKjA0CwDgvhdSe
 hUv5beO4mlguc16l4+u88z1Ta6GylXmWHRL6l2q4dPKmO4qVX6wn9JUT4JHJSQy/
 ACJ3+Lu7ndREBzCmqb4cR4TcHAhBynYmV7IIE3LQprgkCKiX2A3boeOIk+lEhUn5
 aqmwNNF2WDjJ1D5QVKbXu07MraD71rnyZBDuHzjprP01OhgXfUHLIcgdi7GzK8aN
 KnQ9S5hQWHzTiWA/kYgrUq/S5124plm2pMRyh1WDG6g3dhBxh7XsOHUxtgbLaurJ
 LmMxdQgH0lhJ3f+LSm3w8e3m45KxTeCYC2NUVg/icjOGUjAsVx1xMDXzMxoABoWO
 GGVED4i4CesjOyijMuRO9G/0MRb/lIyZkfoZgtHgL20yphmtv0B5XIIz062N28Wf
 PqmsYUz4ESYkxR4u/5VPBey5aYYdhugnOSERC6yH4QQJXyRgGWQn/CSuRrEmJJS2
 CurprPKx99XJZjZE7RJNlvpUrSBcD9Y7R6I3vo6RyrUCNwPJ0Y+Qvydvc9FoMN3R
 tn7fJe7tDfEEsukhGkwp90vK3MLbW5iKv7IaAxyALdSW12A23WM=
 =6RCz
 -----END PGP SIGNATURE-----

Merge tag 'dlm-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm

Pull dlm updates from David Teigland:

 - Fix mistaken variable assignment that caused a refcounting problem

 - Revert a recent change that began using atomic counters where they
   were not needed (for lkb wait_count)

 - Add comments around forced state reset for waiting lock operations
   during recovery

* tag 'dlm-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm:
  dlm: add comments about forced waiters reset
  dlm: revert atomic_t lkb_wait_count
  dlm: fix user space lkb refcounting
This commit is contained in:
Linus Torvalds 2024-03-18 15:39:48 -07:00
commit b3603fcb79
3 changed files with 84 additions and 42 deletions

View File

@ -246,7 +246,7 @@ struct dlm_lkb {
int8_t lkb_highbast; /* highest mode bast sent for */ int8_t lkb_highbast; /* highest mode bast sent for */
int8_t lkb_wait_type; /* type of reply waiting for */ int8_t lkb_wait_type; /* type of reply waiting for */
atomic_t lkb_wait_count; int8_t lkb_wait_count;
int lkb_wait_nodeid; /* for debugging */ int lkb_wait_nodeid; /* for debugging */
struct list_head lkb_statequeue; /* rsb g/c/w list */ struct list_head lkb_statequeue; /* rsb g/c/w list */

View File

@ -1407,7 +1407,6 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
{ {
struct dlm_ls *ls = lkb->lkb_resource->res_ls; struct dlm_ls *ls = lkb->lkb_resource->res_ls;
int error = 0; int error = 0;
int wc;
mutex_lock(&ls->ls_waiters_mutex); mutex_lock(&ls->ls_waiters_mutex);
@ -1429,17 +1428,20 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
error = -EBUSY; error = -EBUSY;
goto out; goto out;
} }
wc = atomic_inc_return(&lkb->lkb_wait_count); lkb->lkb_wait_count++;
hold_lkb(lkb); hold_lkb(lkb);
log_debug(ls, "addwait %x cur %d overlap %d count %d f %x", log_debug(ls, "addwait %x cur %d overlap %d count %d f %x",
lkb->lkb_id, lkb->lkb_wait_type, mstype, wc, lkb->lkb_id, lkb->lkb_wait_type, mstype,
dlm_iflags_val(lkb)); lkb->lkb_wait_count, dlm_iflags_val(lkb));
goto out; goto out;
} }
wc = atomic_fetch_inc(&lkb->lkb_wait_count); DLM_ASSERT(!lkb->lkb_wait_count,
DLM_ASSERT(!wc, dlm_print_lkb(lkb); printk("wait_count %d\n", wc);); dlm_print_lkb(lkb);
printk("wait_count %d\n", lkb->lkb_wait_count););
lkb->lkb_wait_count++;
lkb->lkb_wait_type = mstype; lkb->lkb_wait_type = mstype;
lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */ lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */
hold_lkb(lkb); hold_lkb(lkb);
@ -1502,7 +1504,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype,
log_debug(ls, "remwait %x convert_reply zap overlap_cancel", log_debug(ls, "remwait %x convert_reply zap overlap_cancel",
lkb->lkb_id); lkb->lkb_id);
lkb->lkb_wait_type = 0; lkb->lkb_wait_type = 0;
atomic_dec(&lkb->lkb_wait_count); lkb->lkb_wait_count--;
unhold_lkb(lkb); unhold_lkb(lkb);
goto out_del; goto out_del;
} }
@ -1529,15 +1531,16 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype,
if (overlap_done && lkb->lkb_wait_type) { if (overlap_done && lkb->lkb_wait_type) {
log_error(ls, "remwait error %x reply %d wait_type %d overlap", log_error(ls, "remwait error %x reply %d wait_type %d overlap",
lkb->lkb_id, mstype, lkb->lkb_wait_type); lkb->lkb_id, mstype, lkb->lkb_wait_type);
atomic_dec(&lkb->lkb_wait_count); lkb->lkb_wait_count--;
unhold_lkb(lkb); unhold_lkb(lkb);
lkb->lkb_wait_type = 0; lkb->lkb_wait_type = 0;
} }
DLM_ASSERT(atomic_read(&lkb->lkb_wait_count), dlm_print_lkb(lkb);); DLM_ASSERT(lkb->lkb_wait_count, dlm_print_lkb(lkb););
clear_bit(DLM_IFL_RESEND_BIT, &lkb->lkb_iflags); clear_bit(DLM_IFL_RESEND_BIT, &lkb->lkb_iflags);
if (atomic_dec_and_test(&lkb->lkb_wait_count)) lkb->lkb_wait_count--;
if (!lkb->lkb_wait_count)
list_del_init(&lkb->lkb_wait_reply); list_del_init(&lkb->lkb_wait_reply);
unhold_lkb(lkb); unhold_lkb(lkb);
return 0; return 0;
@ -2666,7 +2669,7 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
goto out; goto out;
/* lock not allowed if there's any op in progress */ /* lock not allowed if there's any op in progress */
if (lkb->lkb_wait_type || atomic_read(&lkb->lkb_wait_count)) if (lkb->lkb_wait_type || lkb->lkb_wait_count)
goto out; goto out;
if (is_overlap(lkb)) if (is_overlap(lkb))
@ -2728,7 +2731,7 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
/* normal unlock not allowed if there's any op in progress */ /* normal unlock not allowed if there's any op in progress */
if (!(args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) && if (!(args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) &&
(lkb->lkb_wait_type || atomic_read(&lkb->lkb_wait_count))) (lkb->lkb_wait_type || lkb->lkb_wait_count))
goto out; goto out;
/* an lkb may be waiting for an rsb lookup to complete where the /* an lkb may be waiting for an rsb lookup to complete where the
@ -5011,21 +5014,32 @@ static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls)
return lkb; return lkb;
} }
/* Deal with lookups and lkb's marked RESEND from _pre. We may now be the /*
master or dir-node for r. Processing the lkb may result in it being placed * Forced state reset for locks that were in the middle of remote operations
back on waiters. */ * when recovery happened (i.e. lkbs that were on the waiters list, waiting
* for a reply from a remote operation.) The lkbs remaining on the waiters
/* We do this after normal locking has been enabled and any saved messages * list need to be reevaluated; some may need resending to a different node
(in requestqueue) have been processed. We should be confident that at * than previously, and some may now need local handling rather than remote.
this point we won't get or process a reply to any of these waiting *
operations. But, new ops may be coming in on the rsbs/locks here from * First, the lkb state for the voided remote operation is forcibly reset,
userspace or remotely. */ * equivalent to what remove_from_waiters() would normally do:
* . lkb removed from ls_waiters list
/* there may have been an overlap unlock/cancel prior to recovery or after * . lkb wait_type cleared
recovery. if before, the lkb may still have a pos wait_count; if after, the * . lkb waiters_count cleared
overlap flag would just have been set and nothing new sent. we can be * . lkb ref count decremented for each waiters_count (almost always 1,
confident here than any replies to either the initial op or overlap ops * but possibly 2 in case of cancel/unlock overlapping, which means
prior to recovery have been received. */ * two remote replies were being expected for the lkb.)
*
* Second, the lkb is reprocessed like an original operation would be,
* by passing it to _request_lock or _convert_lock, which will either
* process the lkb operation locally, or send it to a remote node again
* and put the lkb back onto the waiters list.
*
* When reprocessing the lkb, we may find that it's flagged for an overlapping
* force-unlock or cancel, either from before recovery began, or after recovery
* finished. If this is the case, the unlock/cancel is done directly, and the
* original operation is not initiated again (no _request_lock/_convert_lock.)
*/
int dlm_recover_waiters_post(struct dlm_ls *ls) int dlm_recover_waiters_post(struct dlm_ls *ls)
{ {
@ -5040,6 +5054,11 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
break; break;
} }
/*
* Find an lkb from the waiters list that's been affected by
* recovery node changes, and needs to be reprocessed. Does
* hold_lkb(), adding a refcount.
*/
lkb = find_resend_waiter(ls); lkb = find_resend_waiter(ls);
if (!lkb) if (!lkb)
break; break;
@ -5048,6 +5067,11 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
hold_rsb(r); hold_rsb(r);
lock_rsb(r); lock_rsb(r);
/*
* If the lkb has been flagged for a force unlock or cancel,
* then the reprocessing below will be replaced by just doing
* the unlock/cancel directly.
*/
mstype = lkb->lkb_wait_type; mstype = lkb->lkb_wait_type;
oc = test_and_clear_bit(DLM_IFL_OVERLAP_CANCEL_BIT, oc = test_and_clear_bit(DLM_IFL_OVERLAP_CANCEL_BIT,
&lkb->lkb_iflags); &lkb->lkb_iflags);
@ -5061,22 +5085,40 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
r->res_nodeid, lkb->lkb_nodeid, lkb->lkb_wait_nodeid, r->res_nodeid, lkb->lkb_nodeid, lkb->lkb_wait_nodeid,
dlm_dir_nodeid(r), oc, ou); dlm_dir_nodeid(r), oc, ou);
/* At this point we assume that we won't get a reply to any /*
previous op or overlap op on this lock. First, do a big * No reply to the pre-recovery operation will now be received,
remove_from_waiters() for all previous ops. */ * so a forced equivalent of remove_from_waiters() is needed to
* reset the waiters state that was in place before recovery.
*/
clear_bit(DLM_IFL_RESEND_BIT, &lkb->lkb_iflags); clear_bit(DLM_IFL_RESEND_BIT, &lkb->lkb_iflags);
lkb->lkb_wait_type = 0;
/* drop all wait_count references we still
* hold a reference for this iteration.
*/
while (!atomic_dec_and_test(&lkb->lkb_wait_count))
unhold_lkb(lkb);
/* Forcibly clear wait_type */
lkb->lkb_wait_type = 0;
/*
* Forcibly reset wait_count and associated refcount. The
* wait_count will almost always be 1, but in case of an
* overlapping unlock/cancel it could be 2: see where
* add_to_waiters() finds the lkb is already on the waiters
* list and does lkb_wait_count++; hold_lkb().
*/
while (lkb->lkb_wait_count) {
lkb->lkb_wait_count--;
unhold_lkb(lkb);
}
/* Forcibly remove from waiters list */
mutex_lock(&ls->ls_waiters_mutex); mutex_lock(&ls->ls_waiters_mutex);
list_del_init(&lkb->lkb_wait_reply); list_del_init(&lkb->lkb_wait_reply);
mutex_unlock(&ls->ls_waiters_mutex); mutex_unlock(&ls->ls_waiters_mutex);
/*
* The lkb is now clear of all prior waiters state and can be
* processed locally, or sent to remote node again, or directly
* cancelled/unlocked.
*/
if (oc || ou) { if (oc || ou) {
/* do an unlock or cancel instead of resending */ /* do an unlock or cancel instead of resending */
switch (mstype) { switch (mstype) {

View File

@ -806,7 +806,7 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
struct dlm_lkb *lkb; struct dlm_lkb *lkb;
DECLARE_WAITQUEUE(wait, current); DECLARE_WAITQUEUE(wait, current);
struct dlm_callback *cb; struct dlm_callback *cb;
int rv, copy_lvb = 0; int rv, ret, copy_lvb = 0;
int old_mode, new_mode; int old_mode, new_mode;
if (count == sizeof(struct dlm_device_version)) { if (count == sizeof(struct dlm_device_version)) {
@ -906,9 +906,9 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
trace_dlm_ast(lkb->lkb_resource->res_ls, lkb); trace_dlm_ast(lkb->lkb_resource->res_ls, lkb);
} }
rv = copy_result_to_user(lkb->lkb_ua, ret = copy_result_to_user(lkb->lkb_ua,
test_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags), test_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags),
cb->flags, cb->mode, copy_lvb, buf, count); cb->flags, cb->mode, copy_lvb, buf, count);
kref_put(&cb->ref, dlm_release_callback); kref_put(&cb->ref, dlm_release_callback);
@ -916,7 +916,7 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
if (rv == DLM_DEQUEUE_CALLBACK_LAST) if (rv == DLM_DEQUEUE_CALLBACK_LAST)
dlm_put_lkb(lkb); dlm_put_lkb(lkb);
return rv; return ret;
} }
static __poll_t device_poll(struct file *file, poll_table *wait) static __poll_t device_poll(struct file *file, poll_table *wait)