From 4a99c3d9d6663085e28bc7ac8dae1e985c5a6174 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 9 Aug 2006 11:20:15 -0500 Subject: [PATCH] [DLM] reject replies to old requests When recoveries are aborted by other recoveries we can get replies to status or names requests that we've given up on. This can cause problems if we're making another request and receive an old reply. Add a sequence number to status/names requests and reject replies that don't match. A field already exists for the seq number that's used in other message types. Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/dlm/dlm_internal.h | 1 + fs/dlm/rcom.c | 20 ++++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index ec7e401133fd..da7509986699 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -468,6 +468,7 @@ struct dlm_ls { struct mutex ls_requestqueue_mutex; char *ls_recover_buf; int ls_recover_nodeid; /* for debugging */ + uint64_t ls_rcom_seq; struct list_head ls_recover_list; spinlock_t ls_recover_list_lock; int ls_recover_list_count; diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 5573d8590e58..64ba4929f90b 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c @@ -108,6 +108,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid) error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, 0, &rc, &mh); if (error) goto out; + rc->rc_id = ++ls->ls_rcom_seq; send_rcom(ls, mh, rc); @@ -140,19 +141,30 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in) sizeof(struct rcom_config), &rc, &mh); if (error) return; + rc->rc_id = rc_in->rc_id; rc->rc_result = dlm_recover_status(ls); make_config(ls, (struct rcom_config *) rc->rc_buf); send_rcom(ls, mh, rc); } -static void receive_rcom_status_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) +static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) { + if (rc_in->rc_id != ls->ls_rcom_seq) { + log_debug(ls, "reject old reply %d got %llx wanted %llx", + rc_in->rc_type, rc_in->rc_id, ls->ls_rcom_seq); + return; + } memcpy(ls->ls_recover_buf, rc_in, rc_in->rc_header.h_length); set_bit(LSFL_RCOM_READY, &ls->ls_flags); wake_up(&ls->ls_wait_general); } +static void receive_rcom_status_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) +{ + receive_sync_reply(ls, rc_in); +} + int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) { struct dlm_rcom *rc; @@ -173,6 +185,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) if (error) goto out; memcpy(rc->rc_buf, last_name, last_len); + rc->rc_id = ++ls->ls_rcom_seq; send_rcom(ls, mh, rc); @@ -209,6 +222,7 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in) error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh); if (error) return; + rc->rc_id = rc_in->rc_id; dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen, nodeid); @@ -217,9 +231,7 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in) static void receive_rcom_names_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) { - memcpy(ls->ls_recover_buf, rc_in, rc_in->rc_header.h_length); - set_bit(LSFL_RCOM_READY, &ls->ls_flags); - wake_up(&ls->ls_wait_general); + receive_sync_reply(ls, rc_in); } int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid)