Merge branch 'for-jens' of git://git.drbd.org/linux-2.6-drbd into for-2.6.37/drivers

This commit is contained in:
Jens Axboe 2010-10-23 18:43:55 +02:00
commit 53c2eb24ff
9 changed files with 202 additions and 344 deletions

View file

@ -78,11 +78,10 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
init_completion(&md_io.event);
md_io.error = 0;
if ((rw & WRITE) && !test_bit(MD_NO_BARRIER, &mdev->flags))
rw |= REQ_HARDBARRIER;
if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags))
rw |= REQ_FUA;
rw |= REQ_UNPLUG | REQ_SYNC;
retry:
bio = bio_alloc(GFP_NOIO, 1);
bio->bi_bdev = bdev->md_bdev;
bio->bi_sector = sector;
@ -100,17 +99,6 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
wait_for_completion(&md_io.event);
ok = bio_flagged(bio, BIO_UPTODATE) && md_io.error == 0;
/* check for unsupported barrier op.
* would rather check on EOPNOTSUPP, but that is not reliable.
* don't try again for ANY return value != 0 */
if (unlikely((bio->bi_rw & REQ_HARDBARRIER) && !ok)) {
/* Try again with no barrier */
dev_warn(DEV, "Barriers not supported on meta data device - disabling\n");
set_bit(MD_NO_BARRIER, &mdev->flags);
rw &= ~REQ_HARDBARRIER;
bio_put(bio);
goto retry;
}
out:
bio_put(bio);
return ok;
@ -284,18 +272,32 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
u32 xor_sum = 0;
if (!get_ldev(mdev)) {
dev_err(DEV, "get_ldev() failed in w_al_write_transaction\n");
dev_err(DEV,
"disk is %s, cannot start al transaction (-%d +%d)\n",
drbd_disk_str(mdev->state.disk), evicted, new_enr);
complete(&((struct update_al_work *)w)->event);
return 1;
}
/* do we have to do a bitmap write, first?
* TODO reduce maximum latency:
* submit both bios, then wait for both,
* instead of doing two synchronous sector writes. */
* instead of doing two synchronous sector writes.
* For now, we must not write the transaction,
* if we cannot write out the bitmap of the evicted extent. */
if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE)
drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT);
mutex_lock(&mdev->md_io_mutex); /* protects md_io_page, al_tr_cycle, ... */
/* The bitmap write may have failed, causing a state change. */
if (mdev->state.disk < D_INCONSISTENT) {
dev_err(DEV,
"disk is %s, cannot write al transaction (-%d +%d)\n",
drbd_disk_str(mdev->state.disk), evicted, new_enr);
complete(&((struct update_al_work *)w)->event);
put_ldev(mdev);
return 1;
}
mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */
buffer = (struct al_transaction *)page_address(mdev->md_io_page);
buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC);
@ -739,7 +741,7 @@ void drbd_al_apply_to_bm(struct drbd_conf *mdev)
unsigned int enr;
unsigned long add = 0;
char ppb[10];
int i;
int i, tmp;
wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
@ -747,7 +749,9 @@ void drbd_al_apply_to_bm(struct drbd_conf *mdev)
enr = lc_element_by_index(mdev->act_log, i)->lc_number;
if (enr == LC_FREE)
continue;
add += drbd_bm_ALe_set_all(mdev, enr);
tmp = drbd_bm_ALe_set_all(mdev, enr);
dynamic_dev_dbg(DEV, "AL: set %d bits in extent %u\n", tmp, enr);
add += tmp;
}
lc_unlock(mdev->act_log);

View file

@ -114,11 +114,11 @@ struct drbd_conf;
#define D_ASSERT(exp) if (!(exp)) \
dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__)
#define ERR_IF(exp) if (({ \
int _b = (exp) != 0; \
if (_b) dev_err(DEV, "%s: (%s) in %s:%d\n", \
__func__, #exp, __FILE__, __LINE__); \
_b; \
#define ERR_IF(exp) if (({ \
int _b = (exp) != 0; \
if (_b) dev_err(DEV, "ASSERT FAILED: %s: (%s) in %s:%d\n", \
__func__, #exp, __FILE__, __LINE__); \
_b; \
}))
/* Defines to control fault insertion */
@ -749,17 +749,12 @@ struct drbd_epoch {
/* drbd_epoch flag bits */
enum {
DE_BARRIER_IN_NEXT_EPOCH_ISSUED,
DE_BARRIER_IN_NEXT_EPOCH_DONE,
DE_CONTAINS_A_BARRIER,
DE_HAVE_BARRIER_NUMBER,
DE_IS_FINISHING,
};
enum epoch_event {
EV_PUT,
EV_GOT_BARRIER_NR,
EV_BARRIER_DONE,
EV_BECAME_LAST,
EV_CLEANUP = 32, /* used as flag */
};
@ -801,11 +796,6 @@ enum {
__EE_CALL_AL_COMPLETE_IO,
__EE_MAY_SET_IN_SYNC,
/* This epoch entry closes an epoch using a barrier.
* On sucessful completion, the epoch is released,
* and the P_BARRIER_ACK send. */
__EE_IS_BARRIER,
/* In case a barrier failed,
* we need to resubmit without the barrier flag. */
__EE_RESUBMITTED,
@ -820,7 +810,6 @@ enum {
};
#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
#define EE_IS_BARRIER (1<<__EE_IS_BARRIER)
#define EE_RESUBMITTED (1<<__EE_RESUBMITTED)
#define EE_WAS_ERROR (1<<__EE_WAS_ERROR)
#define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST)
@ -843,16 +832,15 @@ enum {
* Gets cleared when the state.conn
* goes into C_CONNECTED state. */
WRITE_BM_AFTER_RESYNC, /* A kmalloc() during resync failed */
NO_BARRIER_SUPP, /* underlying block device doesn't implement barriers */
CONSIDER_RESYNC,
MD_NO_BARRIER, /* meta data device does not support barriers,
so don't even try */
MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */
SUSPEND_IO, /* suspend application io */
BITMAP_IO, /* suspend application io;
once no more io in flight, start bitmap io */
BITMAP_IO_QUEUED, /* Started bitmap IO */
GO_DISKLESS, /* Disk failed, local_cnt reached zero, we are going diskless */
GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */
WAS_IO_ERROR, /* Local disk failed returned IO error */
RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */
NET_CONGESTED, /* The data socket is congested */
@ -947,7 +935,6 @@ enum write_ordering_e {
WO_none,
WO_drain_io,
WO_bdev_flush,
WO_bio_barrier
};
struct fifo_buffer {
@ -1281,6 +1268,7 @@ extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why);
extern void drbd_go_diskless(struct drbd_conf *mdev);
extern void drbd_ldev_destroy(struct drbd_conf *mdev);
/* Meta data layout
@ -1798,17 +1786,17 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach,
case EP_PASS_ON:
if (!forcedetach) {
if (__ratelimit(&drbd_ratelimit_state))
dev_err(DEV, "Local IO failed in %s."
"Passing error on...\n", where);
dev_err(DEV, "Local IO failed in %s.\n", where);
break;
}
/* NOTE fall through to detach case if forcedetach set */
case EP_DETACH:
case EP_CALL_HELPER:
set_bit(WAS_IO_ERROR, &mdev->flags);
if (mdev->state.disk > D_FAILED) {
_drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL);
dev_err(DEV, "Local IO failed in %s."
"Detaching...\n", where);
dev_err(DEV,
"Local IO failed in %s. Detaching...\n", where);
}
break;
}
@ -2127,7 +2115,11 @@ static inline void put_ldev(struct drbd_conf *mdev)
__release(local);
D_ASSERT(i >= 0);
if (i == 0) {
if (mdev->state.disk == D_DISKLESS)
/* even internal references gone, safe to destroy */
drbd_ldev_destroy(mdev);
if (mdev->state.disk == D_FAILED)
/* all application IO references gone. */
drbd_go_diskless(mdev);
wake_up(&mdev->misc_wait);
}
@ -2138,6 +2130,10 @@ static inline int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_stat
{
int io_allowed;
/* never get a reference while D_DISKLESS */
if (mdev->state.disk == D_DISKLESS)
return 0;
atomic_inc(&mdev->local_cnt);
io_allowed = (mdev->state.disk >= mins);
if (!io_allowed)
@ -2406,12 +2402,12 @@ static inline void drbd_md_flush(struct drbd_conf *mdev)
{
int r;
if (test_bit(MD_NO_BARRIER, &mdev->flags))
if (test_bit(MD_NO_FUA, &mdev->flags))
return;
r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL);
if (r) {
set_bit(MD_NO_BARRIER, &mdev->flags);
set_bit(MD_NO_FUA, &mdev->flags);
dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r);
}
}

View file

@ -835,6 +835,15 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN)
ns.conn = os.conn;
/* we cannot fail (again) if we already detached */
if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
ns.disk = D_DISKLESS;
/* if we are only D_ATTACHING yet,
* we can (and should) go directly to D_DISKLESS. */
if (ns.disk == D_FAILED && os.disk == D_ATTACHING)
ns.disk = D_DISKLESS;
/* After C_DISCONNECTING only C_STANDALONE may follow */
if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE)
ns.conn = os.conn;
@ -1056,7 +1065,15 @@ int __drbd_set_state(struct drbd_conf *mdev,
!test_and_set_bit(CONFIG_PENDING, &mdev->flags))
set_bit(DEVICE_DYING, &mdev->flags);
mdev->state.i = ns.i;
/* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
* on the ldev here, to be sure the transition -> D_DISKLESS resp.
* drbd_ldev_destroy() won't happen before our corresponding
* after_state_ch works run, where we put_ldev again. */
if ((os.disk != D_FAILED && ns.disk == D_FAILED) ||
(os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
atomic_inc(&mdev->local_cnt);
mdev->state = ns;
wake_up(&mdev->misc_wait);
wake_up(&mdev->state_wait);
@ -1268,7 +1285,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
drbd_uuid_new_current(mdev);
clear_bit(NEW_CUR_UUID, &mdev->flags);
drbd_md_sync(mdev);
}
spin_lock_irq(&mdev->req_lock);
_drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL);
@ -1365,63 +1381,64 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate");
/* first half of local IO error */
if (os.disk > D_FAILED && ns.disk == D_FAILED) {
enum drbd_io_error_p eh = EP_PASS_ON;
/* first half of local IO error, failure to attach,
* or administrative detach */
if (os.disk != D_FAILED && ns.disk == D_FAILED) {
enum drbd_io_error_p eh;
int was_io_error;
/* corresponding get_ldev was in __drbd_set_state, to serialize
* our cleanup here with the transition to D_DISKLESS,
* so it is safe to dreference ldev here. */
eh = mdev->ldev->dc.on_io_error;
was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
/* current state still has to be D_FAILED,
* there is only one way out: to D_DISKLESS,
* and that may only happen after our put_ldev below. */
if (mdev->state.disk != D_FAILED)
dev_err(DEV,
"ASSERT FAILED: disk is %s during detach\n",
drbd_disk_str(mdev->state.disk));
if (drbd_send_state(mdev))
dev_warn(DEV, "Notified peer that my disk is broken.\n");
dev_warn(DEV, "Notified peer that I am detaching my disk\n");
else
dev_err(DEV, "Sending state for drbd_io_error() failed\n");
dev_err(DEV, "Sending state for detaching disk failed\n");
drbd_rs_cancel_all(mdev);
if (get_ldev_if_state(mdev, D_FAILED)) {
eh = mdev->ldev->dc.on_io_error;
put_ldev(mdev);
}
if (eh == EP_CALL_HELPER)
/* In case we want to get something to stable storage still,
* this may be the last chance.
* Following put_ldev may transition to D_DISKLESS. */
drbd_md_sync(mdev);
put_ldev(mdev);
if (was_io_error && eh == EP_CALL_HELPER)
drbd_khelper(mdev, "local-io-error");
}
/* second half of local IO error, failure to attach,
* or administrative detach,
* after local_cnt references have reached zero again */
if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) {
/* We must still be diskless,
* re-attach has to be serialized with this! */
if (mdev->state.disk != D_DISKLESS)
dev_err(DEV,
"ASSERT FAILED: disk is %s while going diskless\n",
drbd_disk_str(mdev->state.disk));
/* second half of local IO error handling,
* after local_cnt references have reached zero: */
if (os.disk == D_FAILED && ns.disk == D_DISKLESS) {
mdev->rs_total = 0;
mdev->rs_failed = 0;
atomic_set(&mdev->rs_pending_cnt, 0);
}
mdev->rs_total = 0;
mdev->rs_failed = 0;
atomic_set(&mdev->rs_pending_cnt, 0);
if (os.disk > D_DISKLESS && ns.disk == D_DISKLESS) {
/* We must still be diskless,
* re-attach has to be serialized with this! */
if (mdev->state.disk != D_DISKLESS)
dev_err(DEV,
"ASSERT FAILED: disk is %s while going diskless\n",
drbd_disk_str(mdev->state.disk));
/* we cannot assert local_cnt == 0 here, as get_ldev_if_state
* will inc/dec it frequently. Since we became D_DISKLESS, no
* one has touched the protected members anymore, though, so we
* are safe to free them here. */
if (drbd_send_state(mdev))
dev_warn(DEV, "Notified peer that I detached my disk.\n");
dev_warn(DEV, "Notified peer that I'm now diskless.\n");
else
dev_err(DEV, "Sending state for detach failed\n");
lc_destroy(mdev->resync);
mdev->resync = NULL;
lc_destroy(mdev->act_log);
mdev->act_log = NULL;
__no_warn(local,
drbd_free_bc(mdev->ldev);
mdev->ldev = NULL;);
if (mdev->md_io_tmpp) {
__free_page(mdev->md_io_tmpp);
mdev->md_io_tmpp = NULL;
}
dev_err(DEV, "Sending state for being diskless failed\n");
/* corresponding get_ldev in __drbd_set_state
* this may finaly trigger drbd_ldev_destroy. */
put_ldev(mdev);
}
/* Disks got bigger while they were detached */
@ -2772,11 +2789,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
drbd_set_defaults(mdev);
/* for now, we do NOT yet support it,
* even though we start some framework
* to eventually support barriers */
set_bit(NO_BARRIER_SUPP, &mdev->flags);
atomic_set(&mdev->ap_bio_cnt, 0);
atomic_set(&mdev->ap_pending_cnt, 0);
atomic_set(&mdev->rs_pending_cnt, 0);
@ -2842,7 +2854,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
drbd_thread_init(mdev, &mdev->asender, drbd_asender);
mdev->agreed_pro_version = PRO_VERSION_MAX;
mdev->write_ordering = WO_bio_barrier;
mdev->write_ordering = WO_bdev_flush;
mdev->resync_wenr = LC_FREE;
}
@ -2899,7 +2911,6 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev)
D_ASSERT(list_empty(&mdev->resync_work.list));
D_ASSERT(list_empty(&mdev->unplug_work.list));
D_ASSERT(list_empty(&mdev->go_diskless.list));
}
@ -3660,6 +3671,8 @@ void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
get_random_bytes(&val, sizeof(u64));
_drbd_uuid_set(mdev, UI_CURRENT, val);
/* get it to stable storage _now_ */
drbd_md_sync(mdev);
}
void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
@ -3756,19 +3769,31 @@ static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
return 1;
}
void drbd_ldev_destroy(struct drbd_conf *mdev)
{
lc_destroy(mdev->resync);
mdev->resync = NULL;
lc_destroy(mdev->act_log);
mdev->act_log = NULL;
__no_warn(local,
drbd_free_bc(mdev->ldev);
mdev->ldev = NULL;);
if (mdev->md_io_tmpp) {
__free_page(mdev->md_io_tmpp);
mdev->md_io_tmpp = NULL;
}
clear_bit(GO_DISKLESS, &mdev->flags);
}
static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused)
{
D_ASSERT(mdev->state.disk == D_FAILED);
/* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
* inc/dec it frequently. Once we are D_DISKLESS, no one will touch
* the protected members anymore, though, so in the after_state_ch work
* it will be safe to free them. */
* the protected members anymore, though, so once put_ldev reaches zero
* again, it will be safe to free them. */
drbd_force_state(mdev, NS(disk, D_DISKLESS));
/* We need to wait for return of references checked out while we still
* have been D_FAILED, though (drbd_md_sync, bitmap io). */
wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
clear_bit(GO_DISKLESS, &mdev->flags);
return 1;
}
@ -3777,9 +3802,6 @@ void drbd_go_diskless(struct drbd_conf *mdev)
D_ASSERT(mdev->state.disk == D_FAILED);
if (!test_and_set_bit(GO_DISKLESS, &mdev->flags))
drbd_queue_work(&mdev->data.work, &mdev->go_diskless);
/* don't drbd_queue_work_front,
* we need to serialize with the after_state_ch work
* of the -> D_FAILED transition. */
}
/**

View file

@ -870,6 +870,11 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
retcode = ERR_DISK_CONFIGURED;
goto fail;
}
/* It may just now have detached because of IO error. Make sure
* drbd_ldev_destroy is done already, we may end up here very fast,
* e.g. if someone calls attach from the on-io-error handler,
* to realize a "hot spare" feature (not that I'd recommend that) */
wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
/* allocation not in the IO path, cqueue thread context */
nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
@ -1098,9 +1103,9 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
/* Reset the "barriers don't work" bits here, then force meta data to
* be written, to ensure we determine if barriers are supported. */
if (nbc->dc.no_md_flush)
set_bit(MD_NO_BARRIER, &mdev->flags);
set_bit(MD_NO_FUA, &mdev->flags);
else
clear_bit(MD_NO_BARRIER, &mdev->flags);
clear_bit(MD_NO_FUA, &mdev->flags);
/* Point of no return reached.
* Devices and memory are no longer released by error cleanup below.
@ -1112,8 +1117,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
nbc = NULL;
resync_lru = NULL;
mdev->write_ordering = WO_bio_barrier;
drbd_bump_write_ordering(mdev, WO_bio_barrier);
mdev->write_ordering = WO_bdev_flush;
drbd_bump_write_ordering(mdev, WO_bdev_flush);
if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY))
set_bit(CRASHED_PRIMARY, &mdev->flags);
@ -1262,7 +1267,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
force_diskless_dec:
put_ldev(mdev);
force_diskless:
drbd_force_state(mdev, NS(disk, D_DISKLESS));
drbd_force_state(mdev, NS(disk, D_FAILED));
drbd_md_sync(mdev);
release_bdev2_fail:
if (nbc)
@ -1285,10 +1290,19 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
return 0;
}
/* Detaching the disk is a process in multiple stages. First we need to lock
* out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
* Then we transition to D_DISKLESS, and wait for put_ldev() to return all
* internal references as well.
* Only then we have finally detached. */
static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
struct drbd_nl_cfg_reply *reply)
{
drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS));
if (mdev->state.disk == D_DISKLESS)
wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
drbd_resume_io(mdev);
return 0;
}
@ -1953,7 +1967,6 @@ static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
drbd_uuid_new_current(mdev);
clear_bit(NEW_CUR_UUID, &mdev->flags);
drbd_md_sync(mdev);
}
drbd_suspend_io(mdev);
reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));

View file

@ -158,7 +158,6 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
[WO_none] = 'n',
[WO_drain_io] = 'd',
[WO_bdev_flush] = 'f',
[WO_bio_barrier] = 'b',
};
seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n",

View file

@ -49,11 +49,6 @@
#include "drbd_vli.h"
struct flush_work {
struct drbd_work w;
struct drbd_epoch *epoch;
};
enum finish_epoch {
FE_STILL_LIVE,
FE_DESTROYED,
@ -66,16 +61,6 @@ static int drbd_do_auth(struct drbd_conf *mdev);
static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
static int e_end_block(struct drbd_conf *, struct drbd_work *, int);
static struct drbd_epoch *previous_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch)
{
struct drbd_epoch *prev;
spin_lock(&mdev->epoch_lock);
prev = list_entry(epoch->list.prev, struct drbd_epoch, list);
if (prev == epoch || prev == mdev->current_epoch)
prev = NULL;
spin_unlock(&mdev->epoch_lock);
return prev;
}
#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
@ -981,7 +966,7 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi
return TRUE;
}
static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch)
static void drbd_flush(struct drbd_conf *mdev)
{
int rv;
@ -997,24 +982,6 @@ static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d
}
put_ldev(mdev);
}
return drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE);
}
static int w_flush(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
struct flush_work *fw = (struct flush_work *)w;
struct drbd_epoch *epoch = fw->epoch;
kfree(w);
if (!test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags))
drbd_flush_after_epoch(mdev, epoch);
drbd_may_finish_epoch(mdev, epoch, EV_PUT |
(mdev->state.conn < C_CONNECTED ? EV_CLEANUP : 0));
return 1;
}
/**
@ -1027,15 +994,13 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
struct drbd_epoch *epoch,
enum epoch_event ev)
{
int finish, epoch_size;
int epoch_size;
struct drbd_epoch *next_epoch;
int schedule_flush = 0;
enum finish_epoch rv = FE_STILL_LIVE;
spin_lock(&mdev->epoch_lock);
do {
next_epoch = NULL;
finish = 0;
epoch_size = atomic_read(&epoch->epoch_size);
@ -1045,16 +1010,6 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
break;
case EV_GOT_BARRIER_NR:
set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
/* Special case: If we just switched from WO_bio_barrier to
WO_bdev_flush we should not finish the current epoch */
if (test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags) && epoch_size == 1 &&
mdev->write_ordering != WO_bio_barrier &&
epoch == mdev->current_epoch)
clear_bit(DE_CONTAINS_A_BARRIER, &epoch->flags);
break;
case EV_BARRIER_DONE:
set_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags);
break;
case EV_BECAME_LAST:
/* nothing to do*/
@ -1063,23 +1018,7 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
if (epoch_size != 0 &&
atomic_read(&epoch->active) == 0 &&
test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) &&
epoch->list.prev == &mdev->current_epoch->list &&
!test_bit(DE_IS_FINISHING, &epoch->flags)) {
/* Nearly all conditions are met to finish that epoch... */
if (test_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags) ||
mdev->write_ordering == WO_none ||
(epoch_size == 1 && test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) ||
ev & EV_CLEANUP) {
finish = 1;
set_bit(DE_IS_FINISHING, &epoch->flags);
} else if (!test_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags) &&
mdev->write_ordering == WO_bio_barrier) {
atomic_inc(&epoch->active);
schedule_flush = 1;
}
}
if (finish) {
test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
if (!(ev & EV_CLEANUP)) {
spin_unlock(&mdev->epoch_lock);
drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
@ -1102,6 +1041,7 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
/* atomic_set(&epoch->active, 0); is already zero */
if (rv == FE_STILL_LIVE)
rv = FE_RECYCLED;
wake_up(&mdev->ee_wait);
}
}
@ -1113,22 +1053,6 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
spin_unlock(&mdev->epoch_lock);
if (schedule_flush) {
struct flush_work *fw;
fw = kmalloc(sizeof(*fw), GFP_ATOMIC);
if (fw) {
fw->w.cb = w_flush;
fw->epoch = epoch;
drbd_queue_work(&mdev->data.work, &fw->w);
} else {
dev_warn(DEV, "Could not kmalloc a flush_work obj\n");
set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
/* That is not a recursion, only one level */
drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE);
drbd_may_finish_epoch(mdev, epoch, EV_PUT);
}
}
return rv;
}
@ -1144,19 +1068,16 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo)
[WO_none] = "none",
[WO_drain_io] = "drain",
[WO_bdev_flush] = "flush",
[WO_bio_barrier] = "barrier",
};
pwo = mdev->write_ordering;
wo = min(pwo, wo);
if (wo == WO_bio_barrier && mdev->ldev->dc.no_disk_barrier)
wo = WO_bdev_flush;
if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
wo = WO_drain_io;
if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
wo = WO_none;
mdev->write_ordering = wo;
if (pwo != mdev->write_ordering || wo == WO_bio_barrier)
if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
}
@ -1192,7 +1113,7 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
bio->bi_sector = sector;
bio->bi_bdev = mdev->ldev->backing_bdev;
/* we special case some flags in the multi-bio case, see below
* (REQ_UNPLUG, REQ_HARDBARRIER) */
* (REQ_UNPLUG) */
bio->bi_rw = rw;
bio->bi_private = e;
bio->bi_end_io = drbd_endio_sec;
@ -1226,11 +1147,6 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
bio->bi_rw &= ~REQ_UNPLUG;
drbd_generic_make_request(mdev, fault_type, bio);
/* strip off REQ_HARDBARRIER,
* unless it is the first or last bio */
if (bios && bios->bi_next)
bios->bi_rw &= ~REQ_HARDBARRIER;
} while (bios);
maybe_kick_lo(mdev);
return 0;
@ -1244,45 +1160,9 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
return -ENOMEM;
}
/**
* w_e_reissue() - Worker callback; Resubmit a bio, without REQ_HARDBARRIER set
* @mdev: DRBD device.
* @w: work object.
* @cancel: The connection will be closed anyways (unused in this callback)
*/
int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __releases(local)
{
struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
/* We leave DE_CONTAINS_A_BARRIER and EE_IS_BARRIER in place,
(and DE_BARRIER_IN_NEXT_EPOCH_ISSUED in the previous Epoch)
so that we can finish that epoch in drbd_may_finish_epoch().
That is necessary if we already have a long chain of Epochs, before
we realize that REQ_HARDBARRIER is actually not supported */
/* As long as the -ENOTSUPP on the barrier is reported immediately
that will never trigger. If it is reported late, we will just
print that warning and continue correctly for all future requests
with WO_bdev_flush */
if (previous_epoch(mdev, e->epoch))
dev_warn(DEV, "Write ordering was not enforced (one time event)\n");
/* we still have a local reference,
* get_ldev was done in receive_Data. */
e->w.cb = e_end_block;
if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_DT_WR) != 0) {
/* drbd_submit_ee fails for one reason only:
* if was not able to allocate sufficient bios.
* requeue, try again later. */
e->w.cb = w_e_reissue;
drbd_queue_work(&mdev->data.work, &e->w);
}
return 1;
}
static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
{
int rv, issue_flush;
int rv;
struct p_barrier *p = &mdev->data.rbuf.barrier;
struct drbd_epoch *epoch;
@ -1300,44 +1180,40 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign
* Therefore we must send the barrier_ack after the barrier request was
* completed. */
switch (mdev->write_ordering) {
case WO_bio_barrier:
case WO_none:
if (rv == FE_RECYCLED)
return TRUE;
break;
/* receiver context, in the writeout path of the other node.
* avoid potential distributed deadlock */
epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
if (epoch)
break;
else
dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
/* Fall through */
case WO_bdev_flush:
case WO_drain_io:
if (rv == FE_STILL_LIVE) {
set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &mdev->current_epoch->flags);
drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
rv = drbd_flush_after_epoch(mdev, mdev->current_epoch);
}
if (rv == FE_RECYCLED)
return TRUE;
/* The asender will send all the ACKs and barrier ACKs out, since
all EEs moved from the active_ee to the done_ee. We need to
provide a new epoch object for the EEs that come in soon */
break;
}
/* receiver context, in the writeout path of the other node.
* avoid potential distributed deadlock */
epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
if (!epoch) {
dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
issue_flush = !test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &mdev->current_epoch->flags);
drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
if (issue_flush) {
rv = drbd_flush_after_epoch(mdev, mdev->current_epoch);
if (rv == FE_RECYCLED)
return TRUE;
drbd_flush(mdev);
if (atomic_read(&mdev->current_epoch->epoch_size)) {
epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
if (epoch)
break;
}
drbd_wait_ee_list_empty(mdev, &mdev->done_ee);
epoch = mdev->current_epoch;
wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
D_ASSERT(atomic_read(&epoch->active) == 0);
D_ASSERT(epoch->flags == 0);
return TRUE;
default:
dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
return FALSE;
}
epoch->flags = 0;
@ -1652,15 +1528,8 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
sector_t sector = e->sector;
struct drbd_epoch *epoch;
int ok = 1, pcmd;
if (e->flags & EE_IS_BARRIER) {
epoch = previous_epoch(mdev, e->epoch);
if (epoch)
drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE + (cancel ? EV_CLEANUP : 0));
}
if (mdev->net_conf->wire_protocol == DRBD_PROT_C) {
if (likely((e->flags & EE_WAS_ERROR) == 0)) {
pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
@ -1817,27 +1686,6 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
e->epoch = mdev->current_epoch;
atomic_inc(&e->epoch->epoch_size);
atomic_inc(&e->epoch->active);
if (mdev->write_ordering == WO_bio_barrier && atomic_read(&e->epoch->epoch_size) == 1) {
struct drbd_epoch *epoch;
/* Issue a barrier if we start a new epoch, and the previous epoch
was not a epoch containing a single request which already was
a Barrier. */
epoch = list_entry(e->epoch->list.prev, struct drbd_epoch, list);
if (epoch == e->epoch) {
set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
rw |= REQ_HARDBARRIER;
e->flags |= EE_IS_BARRIER;
} else {
if (atomic_read(&epoch->epoch_size) > 1 ||
!test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) {
set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
rw |= REQ_HARDBARRIER;
e->flags |= EE_IS_BARRIER;
}
}
}
spin_unlock(&mdev->epoch_lock);
dp_flags = be32_to_cpu(p->dp_flags);
@ -1995,10 +1843,11 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
break;
}
if (mdev->state.pdsk == D_DISKLESS) {
if (mdev->state.pdsk < D_INCONSISTENT) {
/* In case we have the only disk of the cluster, */
drbd_set_out_of_sync(mdev, e->sector, e->size);
e->flags |= EE_CALL_AL_COMPLETE_IO;
e->flags &= ~EE_MAY_SET_IN_SYNC;
drbd_al_begin_io(mdev, e->sector);
}
@ -3362,7 +3211,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
if (ns.conn == C_MASK) {
ns.conn = C_CONNECTED;
if (mdev->state.disk == D_NEGOTIATING) {
drbd_force_state(mdev, NS(disk, D_DISKLESS));
drbd_force_state(mdev, NS(disk, D_FAILED));
} else if (peer_state.disk == D_NEGOTIATING) {
dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
peer_state.disk = D_DISKLESS;

View file

@ -258,7 +258,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
if (!hlist_unhashed(&req->colision))
hlist_del(&req->colision);
else
D_ASSERT((s & RQ_NET_MASK) == 0);
D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0);
/* for writes we need to do some extra housekeeping */
if (rw == WRITE)
@ -813,7 +813,8 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
mdev->state.conn >= C_CONNECTED));
if (!(local || remote) && !is_susp(mdev->state)) {
dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
if (__ratelimit(&drbd_ratelimit_state))
dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
goto fail_free_complete;
}
@ -942,12 +943,21 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
if (local) {
req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR
: rw == READ ? DRBD_FAULT_DT_RD
: DRBD_FAULT_DT_RA))
/* State may have changed since we grabbed our reference on the
* mdev->ldev member. Double check, and short-circuit to endio.
* In case the last activity log transaction failed to get on
* stable storage, and this is a WRITE, we may not even submit
* this bio. */
if (get_ldev(mdev)) {
if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR
: rw == READ ? DRBD_FAULT_DT_RD
: DRBD_FAULT_DT_RA))
bio_endio(req->private_bio, -EIO);
else
generic_make_request(req->private_bio);
put_ldev(mdev);
} else
bio_endio(req->private_bio, -EIO);
else
generic_make_request(req->private_bio);
}
/* we need to plug ALWAYS since we possibly need to kick lo_dev.
@ -1022,20 +1032,6 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio)
return 0;
}
/* Reject barrier requests if we know the underlying device does
* not support them.
* XXX: Need to get this info from peer as well some how so we
* XXX: reject if EITHER side/data/metadata area does not support them.
*
* because of those XXX, this is not yet enabled,
* i.e. in drbd_init_set_defaults we set the NO_BARRIER_SUPP bit.
*/
if (unlikely(bio->bi_rw & REQ_HARDBARRIER) && test_bit(NO_BARRIER_SUPP, &mdev->flags)) {
/* dev_warn(DEV, "Rejecting barrier request as underlying device does not support\n"); */
bio_endio(bio, -EOPNOTSUPP);
return 0;
}
/*
* what we "blindly" assume:
*/

View file

@ -102,12 +102,6 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local)
put_ldev(mdev);
}
static int is_failed_barrier(int ee_flags)
{
return (ee_flags & (EE_IS_BARRIER|EE_WAS_ERROR|EE_RESUBMITTED))
== (EE_IS_BARRIER|EE_WAS_ERROR);
}
/* writes on behalf of the partner, or resync writes,
* "submitted" by the receiver, final stage. */
static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(local)
@ -119,21 +113,6 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo
int is_syncer_req;
int do_al_complete_io;
/* if this is a failed barrier request, disable use of barriers,
* and schedule for resubmission */
if (is_failed_barrier(e->flags)) {
drbd_bump_write_ordering(mdev, WO_bdev_flush);
spin_lock_irqsave(&mdev->req_lock, flags);
list_del(&e->w.list);
e->flags = (e->flags & ~EE_WAS_ERROR) | EE_RESUBMITTED;
e->w.cb = w_e_reissue;
/* put_ldev actually happens below, once we come here again. */
__release(local);
spin_unlock_irqrestore(&mdev->req_lock, flags);
drbd_queue_work(&mdev->data.work, &e->w);
return;
}
D_ASSERT(e->block_id != ID_VACANT);
/* after we moved e to done_ee,
@ -925,7 +904,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
drbd_md_sync(mdev);
if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) {
dev_warn(DEV, "Writing the whole bitmap, due to failed kmalloc\n");
dev_info(DEV, "Writing the whole bitmap\n");
drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished");
}

View file

@ -53,7 +53,7 @@
extern const char *drbd_buildtag(void);
#define REL_VERSION "8.3.9rc2"
#define REL_VERSION "8.3.9"
#define API_VERSION 88
#define PRO_VERSION_MIN 86
#define PRO_VERSION_MAX 95