mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-10-01 14:44:12 +00:00
drbd: Avoid NetworkFailure state during disconnect
Disconnecting is a cluster wide state change. In case the peer node agrees to the state transition, it sends back the fact on the meta-data connection and closes both sockets. In case the node node that initiated the state transfer sees the closing action on the data-socket, before the P_STATE_CHG_REPLY packet, it was going into one of the network failure states. At least with the fencing option set to something else thatn "dont-care", the unclean shutdown of the connection causes a short IO freeze or a fence operation. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
c12a3d8c84
commit
599377acb7
3 changed files with 24 additions and 1 deletions
|
@ -857,6 +857,7 @@ enum {
|
||||||
* so shrink_page_list() would not recurse into,
|
* so shrink_page_list() would not recurse into,
|
||||||
* and potentially deadlock on, this drbd worker.
|
* and potentially deadlock on, this drbd worker.
|
||||||
*/
|
*/
|
||||||
|
DISCONNECT_SENT, /* Currently the last bit in this 32bit word */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct drbd_bitmap; /* opaque for drbd_conf */
|
struct drbd_bitmap; /* opaque for drbd_conf */
|
||||||
|
|
|
@ -659,6 +659,9 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
|
||||||
goto abort;
|
goto abort;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (mask.conn == C_MASK && val.conn == C_DISCONNECTING)
|
||||||
|
set_bit(DISCONNECT_SENT, &mdev->flags);
|
||||||
|
|
||||||
wait_event(mdev->state_wait,
|
wait_event(mdev->state_wait,
|
||||||
(rv = _req_st_cond(mdev, mask, val)));
|
(rv = _req_st_cond(mdev, mask, val)));
|
||||||
|
|
||||||
|
|
|
@ -534,7 +534,6 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size)
|
||||||
dev_err(DEV, "sock_recvmsg returned %d\n", rv);
|
dev_err(DEV, "sock_recvmsg returned %d\n", rv);
|
||||||
break;
|
break;
|
||||||
} else if (rv == 0) {
|
} else if (rv == 0) {
|
||||||
dev_info(DEV, "sock was shut down by peer\n");
|
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
/* signal came in, or peer/link went down,
|
/* signal came in, or peer/link went down,
|
||||||
|
@ -547,9 +546,21 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size)
|
||||||
|
|
||||||
set_fs(oldfs);
|
set_fs(oldfs);
|
||||||
|
|
||||||
|
if (rv == 0) {
|
||||||
|
if (test_bit(DISCONNECT_SENT, &mdev->flags)) {
|
||||||
|
long t; /* time_left */
|
||||||
|
t = wait_event_timeout(mdev->state_wait, mdev->state.conn < C_CONNECTED,
|
||||||
|
mdev->net_conf->ping_timeo * HZ/10);
|
||||||
|
if (t)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
dev_info(DEV, "sock was shut down by peer\n");
|
||||||
|
}
|
||||||
|
|
||||||
if (rv != size)
|
if (rv != size)
|
||||||
drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE));
|
drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE));
|
||||||
|
|
||||||
|
out:
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -760,6 +771,7 @@ static int drbd_connect(struct drbd_conf *mdev)
|
||||||
|
|
||||||
D_ASSERT(!mdev->data.socket);
|
D_ASSERT(!mdev->data.socket);
|
||||||
|
|
||||||
|
clear_bit(DISCONNECT_SENT, &mdev->flags);
|
||||||
if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
|
if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
|
||||||
return -2;
|
return -2;
|
||||||
|
|
||||||
|
@ -4680,6 +4692,13 @@ int drbd_asender(struct drbd_thread *thi)
|
||||||
received += rv;
|
received += rv;
|
||||||
buf += rv;
|
buf += rv;
|
||||||
} else if (rv == 0) {
|
} else if (rv == 0) {
|
||||||
|
if (test_bit(DISCONNECT_SENT, &mdev->flags)) {
|
||||||
|
long t; /* time_left */
|
||||||
|
t = wait_event_timeout(mdev->state_wait, mdev->state.conn < C_CONNECTED,
|
||||||
|
mdev->net_conf->ping_timeo * HZ/10);
|
||||||
|
if (t)
|
||||||
|
break;
|
||||||
|
}
|
||||||
dev_err(DEV, "meta connection shut down by peer.\n");
|
dev_err(DEV, "meta connection shut down by peer.\n");
|
||||||
goto reconnect;
|
goto reconnect;
|
||||||
} else if (rv == -EAGAIN) {
|
} else if (rv == -EAGAIN) {
|
||||||
|
|
Loading…
Reference in a new issue