drbd: revert "delay probes", feature is being re-implemented differently

It was a now abandoned attempt to throttle resync bandwidth
based on the delay it causes on the bulk data socket.
It has no userbase yet, and has been disabled by
9173465ccb51c09cc3102a10af93e9f469a0af6f already.
This removes the now unused code.

The basic feature, namely using up "idle" bandwith
of network and disk IO subsystem, with minimal impact
to application IO, is being reimplemented differently.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
This commit is contained in:
Lars Ellenberg 2010-08-03 20:20:20 +02:00 committed by Jens Axboe
parent 85f4cc17a6
commit e7f52dfb4f
8 changed files with 28 additions and 225 deletions

View file

@ -550,12 +550,6 @@ struct p_delay_probe {
u32 offset; /* usecs the probe got sent after the reference time point */
} __packed;
struct delay_probe {
struct list_head list;
unsigned int seq_num;
struct timeval time;
};
/* DCBP: Drbd Compressed Bitmap Packet ... */
static inline enum drbd_bitmap_code
DCBP_get_code(struct p_compressed_bm *p)
@ -942,11 +936,9 @@ struct drbd_conf {
unsigned int ko_count;
struct drbd_work resync_work,
unplug_work,
md_sync_work,
delay_probe_work;
md_sync_work;
struct timer_list resync_timer;
struct timer_list md_sync_timer;
struct timer_list delay_probe_timer;
/* Used after attach while negotiating new disk state. */
union drbd_state new_state_tmp;
@ -1062,12 +1054,6 @@ struct drbd_conf {
u64 ed_uuid; /* UUID of the exposed data */
struct mutex state_mutex;
char congestion_reason; /* Why we where congested... */
struct list_head delay_probes; /* protected by peer_seq_lock */
int data_delay; /* Delay of packets on the data-sock behind meta-sock */
unsigned int delay_seq; /* To generate sequence numbers of delay probes */
struct timeval dps_time; /* delay-probes-start-time */
unsigned int dp_volume_last; /* send_cnt of last delay probe */
int c_sync_rate; /* current resync rate after delay_probe magic */
};
static inline struct drbd_conf *minor_to_mdev(unsigned int minor)

View file

@ -2184,43 +2184,6 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
return ok;
}
static int drbd_send_delay_probe(struct drbd_conf *mdev, struct drbd_socket *ds)
{
struct p_delay_probe dp;
int offset, ok = 0;
struct timeval now;
mutex_lock(&ds->mutex);
if (likely(ds->socket)) {
do_gettimeofday(&now);
offset = now.tv_usec - mdev->dps_time.tv_usec +
(now.tv_sec - mdev->dps_time.tv_sec) * 1000000;
dp.seq_num = cpu_to_be32(mdev->delay_seq);
dp.offset = cpu_to_be32(offset);
ok = _drbd_send_cmd(mdev, ds->socket, P_DELAY_PROBE,
(struct p_header *)&dp, sizeof(dp), 0);
}
mutex_unlock(&ds->mutex);
return ok;
}
static int drbd_send_delay_probes(struct drbd_conf *mdev)
{
int ok;
mdev->delay_seq++;
do_gettimeofday(&mdev->dps_time);
ok = drbd_send_delay_probe(mdev, &mdev->meta);
ok = ok && drbd_send_delay_probe(mdev, &mdev->data);
mdev->dp_volume_last = mdev->send_cnt;
mod_timer(&mdev->delay_probe_timer, jiffies + mdev->sync_conf.dp_interval * HZ / 10);
return ok;
}
/* called on sndtimeo
* returns FALSE if we should retry,
* TRUE if we think connection is dead
@ -2369,27 +2332,6 @@ static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
return 1;
}
static void consider_delay_probes(struct drbd_conf *mdev)
{
return;
}
static int w_delay_probes(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
if (!cancel && mdev->state.conn == C_SYNC_SOURCE)
drbd_send_delay_probes(mdev);
return 1;
}
static void delay_probe_timer_fn(unsigned long data)
{
struct drbd_conf *mdev = (struct drbd_conf *) data;
if (list_empty(&mdev->delay_probe_work.list))
drbd_queue_work(&mdev->data.work, &mdev->delay_probe_work);
}
/* Used to send write requests
* R_PRIMARY -> Peer (P_DATA)
*/
@ -2453,9 +2395,6 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
drbd_put_data_sock(mdev);
if (ok)
consider_delay_probes(mdev);
return ok;
}
@ -2502,9 +2441,6 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
drbd_put_data_sock(mdev);
if (ok)
consider_delay_probes(mdev);
return ok;
}
@ -2666,10 +2602,6 @@ static void drbd_set_defaults(struct drbd_conf *mdev)
/* .rate = */ DRBD_RATE_DEF,
/* .after = */ DRBD_AFTER_DEF,
/* .al_extents = */ DRBD_AL_EXTENTS_DEF,
/* .dp_volume = */ DRBD_DP_VOLUME_DEF,
/* .dp_interval = */ DRBD_DP_INTERVAL_DEF,
/* .throttle_th = */ DRBD_RS_THROTTLE_TH_DEF,
/* .hold_off_th = */ DRBD_RS_HOLD_OFF_TH_DEF,
/* .verify_alg = */ {}, 0,
/* .cpu_mask = */ {}, 0,
/* .csums_alg = */ {}, 0,
@ -2736,24 +2668,17 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
INIT_LIST_HEAD(&mdev->unplug_work.list);
INIT_LIST_HEAD(&mdev->md_sync_work.list);
INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
INIT_LIST_HEAD(&mdev->delay_probes);
INIT_LIST_HEAD(&mdev->delay_probe_work.list);
mdev->resync_work.cb = w_resync_inactive;
mdev->unplug_work.cb = w_send_write_hint;
mdev->md_sync_work.cb = w_md_sync;
mdev->bm_io_work.w.cb = w_bitmap_io;
mdev->delay_probe_work.cb = w_delay_probes;
init_timer(&mdev->resync_timer);
init_timer(&mdev->md_sync_timer);
init_timer(&mdev->delay_probe_timer);
mdev->resync_timer.function = resync_timer_fn;
mdev->resync_timer.data = (unsigned long) mdev;
mdev->md_sync_timer.function = md_sync_timer_fn;
mdev->md_sync_timer.data = (unsigned long) mdev;
mdev->delay_probe_timer.function = delay_probe_timer_fn;
mdev->delay_probe_timer.data = (unsigned long) mdev;
init_waitqueue_head(&mdev->misc_wait);
init_waitqueue_head(&mdev->state_wait);

View file

@ -1557,10 +1557,6 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
sc.rate = DRBD_RATE_DEF;
sc.after = DRBD_AFTER_DEF;
sc.al_extents = DRBD_AL_EXTENTS_DEF;
sc.dp_volume = DRBD_DP_VOLUME_DEF;
sc.dp_interval = DRBD_DP_INTERVAL_DEF;
sc.throttle_th = DRBD_RS_THROTTLE_TH_DEF;
sc.hold_off_th = DRBD_RS_HOLD_OFF_TH_DEF;
} else
memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));

View file

@ -73,21 +73,14 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
seq_printf(seq, "sync'ed:%3u.%u%% ", res / 10, res % 10);
/* if more than 1 GB display in MB */
if (mdev->rs_total > 0x100000L)
seq_printf(seq, "(%lu/%lu)M",
seq_printf(seq, "(%lu/%lu)M\n\t",
(unsigned long) Bit2KB(rs_left >> 10),
(unsigned long) Bit2KB(mdev->rs_total >> 10));
else
seq_printf(seq, "(%lu/%lu)K",
seq_printf(seq, "(%lu/%lu)K\n\t",
(unsigned long) Bit2KB(rs_left),
(unsigned long) Bit2KB(mdev->rs_total));
if (mdev->state.conn == C_SYNC_TARGET)
seq_printf(seq, " queue_delay: %d.%d ms\n\t",
mdev->data_delay / 1000,
(mdev->data_delay % 1000) / 100);
else if (mdev->state.conn == C_SYNC_SOURCE)
seq_printf(seq, " delay_probe: %u\n\t", mdev->delay_seq);
/* see drivers/md/md.c
* We do not want to overflow, so the order of operands and
* the * 100 / 100 trick are important. We do a +1 to be
@ -135,14 +128,6 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
else
seq_printf(seq, " (%ld)", dbdt);
if (mdev->state.conn == C_SYNC_TARGET) {
if (mdev->c_sync_rate > 1000)
seq_printf(seq, " want: %d,%03d",
mdev->c_sync_rate / 1000, mdev->c_sync_rate % 1000);
else
seq_printf(seq, " want: %d", mdev->c_sync_rate);
}
seq_printf(seq, " K/sec\n");
}

View file

@ -3555,14 +3555,15 @@ static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h)
return ok;
}
static int receive_skip(struct drbd_conf *mdev, struct p_header *h)
static int receive_skip_(struct drbd_conf *mdev, struct p_header *h, int silent)
{
/* TODO zero copy sink :) */
static char sink[128];
int size, want, r;
dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
h->command, h->length);
if (!silent)
dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
h->command, h->length);
size = h->length;
while (size > 0) {
@ -3574,6 +3575,16 @@ static int receive_skip(struct drbd_conf *mdev, struct p_header *h)
return size == 0;
}
static int receive_skip(struct drbd_conf *mdev, struct p_header *h)
{
return receive_skip_(mdev, h, 0);
}
static int receive_skip_silent(struct drbd_conf *mdev, struct p_header *h)
{
return receive_skip_(mdev, h, 1);
}
static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h)
{
if (mdev->state.disk >= D_INCONSISTENT)
@ -3586,92 +3597,6 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h)
return TRUE;
}
static void timeval_sub_us(struct timeval* tv, unsigned int us)
{
tv->tv_sec -= us / 1000000;
us = us % 1000000;
if (tv->tv_usec > us) {
tv->tv_usec += 1000000;
tv->tv_sec--;
}
tv->tv_usec -= us;
}
static void got_delay_probe(struct drbd_conf *mdev, int from, struct p_delay_probe *p)
{
struct delay_probe *dp;
struct list_head *le;
struct timeval now;
int seq_num;
int offset;
int data_delay;
seq_num = be32_to_cpu(p->seq_num);
offset = be32_to_cpu(p->offset);
spin_lock(&mdev->peer_seq_lock);
if (!list_empty(&mdev->delay_probes)) {
if (from == USE_DATA_SOCKET)
le = mdev->delay_probes.next;
else
le = mdev->delay_probes.prev;
dp = list_entry(le, struct delay_probe, list);
if (dp->seq_num == seq_num) {
list_del(le);
spin_unlock(&mdev->peer_seq_lock);
do_gettimeofday(&now);
timeval_sub_us(&now, offset);
data_delay =
now.tv_usec - dp->time.tv_usec +
(now.tv_sec - dp->time.tv_sec) * 1000000;
if (data_delay > 0)
mdev->data_delay = data_delay;
kfree(dp);
return;
}
if (dp->seq_num > seq_num) {
spin_unlock(&mdev->peer_seq_lock);
dev_warn(DEV, "Previous allocation failure of struct delay_probe?\n");
return; /* Do not alloca a struct delay_probe.... */
}
}
spin_unlock(&mdev->peer_seq_lock);
dp = kmalloc(sizeof(struct delay_probe), GFP_NOIO);
if (!dp) {
dev_warn(DEV, "Failed to allocate a struct delay_probe, do not worry.\n");
return;
}
dp->seq_num = seq_num;
do_gettimeofday(&dp->time);
timeval_sub_us(&dp->time, offset);
spin_lock(&mdev->peer_seq_lock);
if (from == USE_DATA_SOCKET)
list_add(&dp->list, &mdev->delay_probes);
else
list_add_tail(&dp->list, &mdev->delay_probes);
spin_unlock(&mdev->peer_seq_lock);
}
static int receive_delay_probe(struct drbd_conf *mdev, struct p_header *h)
{
struct p_delay_probe *p = (struct p_delay_probe *)h;
ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
if (drbd_recv(mdev, h->payload, h->length) != h->length)
return FALSE;
got_delay_probe(mdev, USE_DATA_SOCKET, p);
return TRUE;
}
typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct p_header *);
static drbd_cmd_handler_f drbd_default_handler[] = {
@ -3695,7 +3620,7 @@ static drbd_cmd_handler_f drbd_default_handler[] = {
[P_OV_REQUEST] = receive_DataRequest,
[P_OV_REPLY] = receive_DataRequest,
[P_CSUM_RS_REQUEST] = receive_DataRequest,
[P_DELAY_PROBE] = receive_delay_probe,
[P_DELAY_PROBE] = receive_skip_silent,
/* anything missing from this table is in
* the asender_tbl, see get_asender_cmd */
[P_MAX_CMD] = NULL,
@ -4472,11 +4397,9 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header *h)
return TRUE;
}
static int got_delay_probe_m(struct drbd_conf *mdev, struct p_header *h)
static int got_something_to_ignore_m(struct drbd_conf *mdev, struct p_header *h)
{
struct p_delay_probe *p = (struct p_delay_probe *)h;
got_delay_probe(mdev, USE_META_SOCKET, p);
/* IGNORE */
return TRUE;
}
@ -4504,7 +4427,7 @@ static struct asender_cmd *get_asender_cmd(int cmd)
[P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
[P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
[P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
[P_DELAY_PROBE] = { sizeof(struct p_delay_probe), got_delay_probe_m },
[P_DELAY_PROBE] = { sizeof(struct p_delay_probe), got_something_to_ignore_m },
[P_MAX_CMD] = { 0, NULL },
};
if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)

View file

@ -424,18 +424,6 @@ void resync_timer_fn(unsigned long data)
drbd_queue_work(&mdev->data.work, &mdev->resync_work);
}
static int calc_resync_rate(struct drbd_conf *mdev)
{
int d = mdev->data_delay / 1000; /* us -> ms */
int td = mdev->sync_conf.throttle_th * 100; /* 0.1s -> ms */
int hd = mdev->sync_conf.hold_off_th * 100; /* 0.1s -> ms */
int cr = mdev->sync_conf.rate;
return d <= td ? cr :
d >= hd ? 0 :
cr + (cr * (td - d) / (hd - td));
}
int w_make_resync_request(struct drbd_conf *mdev,
struct drbd_work *w, int cancel)
{
@ -473,8 +461,7 @@ int w_make_resync_request(struct drbd_conf *mdev,
max_segment_size = mdev->agreed_pro_version < 94 ?
queue_max_segment_size(mdev->rq_queue) : DRBD_MAX_SEGMENT_SIZE;
mdev->c_sync_rate = calc_resync_rate(mdev);
number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
number = SLEEP_TIME * mdev->sync_conf.rate / ((BM_BLOCK_SIZE / 1024) * HZ);
pe = atomic_read(&mdev->rs_pending_cnt);
mutex_lock(&mdev->data.mutex);

View file

@ -53,7 +53,7 @@
extern const char *drbd_buildtag(void);
#define REL_VERSION "8.3.8"
#define REL_VERSION "8.3.8.1"
#define API_VERSION 88
#define PRO_VERSION_MIN 86
#define PRO_VERSION_MAX 94

View file

@ -78,10 +78,11 @@ NL_PACKET(syncer_conf, 8,
NL_INTEGER( 30, T_MAY_IGNORE, rate)
NL_INTEGER( 31, T_MAY_IGNORE, after)
NL_INTEGER( 32, T_MAY_IGNORE, al_extents)
NL_INTEGER( 71, T_MAY_IGNORE, dp_volume)
NL_INTEGER( 72, T_MAY_IGNORE, dp_interval)
NL_INTEGER( 73, T_MAY_IGNORE, throttle_th)
NL_INTEGER( 74, T_MAY_IGNORE, hold_off_th)
/* NL_INTEGER( 71, T_MAY_IGNORE, dp_volume)
* NL_INTEGER( 72, T_MAY_IGNORE, dp_interval)
* NL_INTEGER( 73, T_MAY_IGNORE, throttle_th)
* NL_INTEGER( 74, T_MAY_IGNORE, hold_off_th)
* feature will be reimplemented differently with 8.3.9 */
NL_STRING( 52, T_MAY_IGNORE, verify_alg, SHARED_SECRET_MAX)
NL_STRING( 51, T_MAY_IGNORE, cpu_mask, 32)
NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX)