Merge branch 'xfs-4.7-error-cfg' into for-next

This commit is contained in:
Dave Chinner 2016-05-20 10:33:38 +10:00
commit 544ad71fc8
8 changed files with 452 additions and 56 deletions

View File

@ -1100,22 +1100,18 @@ xfs_bwrite(
return error;
}
STATIC void
static void
xfs_buf_bio_end_io(
struct bio *bio)
{
xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;
struct xfs_buf *bp = (struct xfs_buf *)bio->bi_private;
/*
* don't overwrite existing errors - otherwise we can lose errors on
* buffers that require multiple bios to complete.
*/
if (bio->bi_error) {
spin_lock(&bp->b_lock);
if (!bp->b_io_error)
bp->b_io_error = bio->bi_error;
spin_unlock(&bp->b_lock);
}
if (bio->bi_error)
cmpxchg(&bp->b_io_error, 0, bio->bi_error);
if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));

View File

@ -183,6 +183,26 @@ typedef struct xfs_buf {
unsigned int b_page_count; /* size of page array */
unsigned int b_offset; /* page offset in first page */
int b_error; /* error code on I/O */
/*
* async write failure retry count. Initialised to zero on the first
* failure, then when it exceeds the maximum configured without a
* success the write is considered to be failed permanently and the
* iodone handler will take appropriate action.
*
* For retry timeouts, we record the jiffie of the first failure. This
* means that we can change the retry timeout for buffers already under
* I/O and thus avoid getting stuck in a retry loop with a long timeout.
*
* last_error is used to ensure that we are getting repeated errors, not
* different errors. e.g. a block device might change ENOSPC to EIO when
* a failure timeout occurs, so we want to re-initialise the error
* retry behaviour appropriately when that happens.
*/
int b_retries;
unsigned long b_first_retry_time; /* in jiffies */
int b_last_error;
const struct xfs_buf_ops *b_ops;
#ifdef XFS_BUF_LOCK_TRACKING

View File

@ -1042,35 +1042,22 @@ xfs_buf_do_callbacks(
}
}
/*
* This is the iodone() function for buffers which have had callbacks
* attached to them by xfs_buf_attach_iodone(). It should remove each
* log item from the buffer's list and call the callback of each in turn.
* When done, the buffer's fsprivate field is set to NULL and the buffer
* is unlocked with a call to iodone().
*/
void
xfs_buf_iodone_callbacks(
static bool
xfs_buf_iodone_callback_error(
struct xfs_buf *bp)
{
struct xfs_log_item *lip = bp->b_fspriv;
struct xfs_mount *mp = lip->li_mountp;
static ulong lasttime;
static xfs_buftarg_t *lasttarg;
if (likely(!bp->b_error))
goto do_callbacks;
struct xfs_error_cfg *cfg;
/*
* If we've already decided to shutdown the filesystem because of
* I/O errors, there's no point in giving this a retry.
*/
if (XFS_FORCED_SHUTDOWN(mp)) {
xfs_buf_stale(bp);
bp->b_flags |= XBF_DONE;
trace_xfs_buf_item_iodone(bp, _RET_IP_);
goto do_callbacks;
}
if (XFS_FORCED_SHUTDOWN(mp))
goto out_stale;
if (bp->b_target != lasttarg ||
time_after(jiffies, (lasttime + 5*HZ))) {
@ -1079,45 +1066,93 @@ xfs_buf_iodone_callbacks(
}
lasttarg = bp->b_target;
/* synchronous writes will have callers process the error */
if (!(bp->b_flags & XBF_ASYNC))
goto out_stale;
trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
ASSERT(bp->b_iodone != NULL);
/*
* If the write was asynchronous then no one will be looking for the
* error. Clear the error state and write the buffer out again.
*
* XXX: This helps against transient write errors, but we need to find
* a way to shut the filesystem down if the writes keep failing.
*
* In practice we'll shut the filesystem down soon as non-transient
* errors tend to affect the whole device and a failing log write
* will make us give up. But we really ought to do better here.
* error. If this is the first failure of this type, clear the error
* state and write the buffer out again. This means we always retry an
* async write failure at least once, but we also need to set the buffer
* up to behave correctly now for repeated failures.
*/
if (bp->b_flags & XBF_ASYNC) {
ASSERT(bp->b_iodone != NULL);
if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL)) ||
bp->b_last_error != bp->b_error) {
bp->b_flags |= (XBF_WRITE | XBF_ASYNC |
XBF_DONE | XBF_WRITE_FAIL);
bp->b_last_error = bp->b_error;
bp->b_retries = 0;
bp->b_first_retry_time = jiffies;
trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */
if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) {
bp->b_flags |= XBF_WRITE | XBF_ASYNC |
XBF_DONE | XBF_WRITE_FAIL;
xfs_buf_submit(bp);
} else {
xfs_buf_relse(bp);
}
return;
xfs_buf_ioerror(bp, 0);
xfs_buf_submit(bp);
return true;
}
/*
* If the write of the buffer was synchronous, we want to make
* sure to return the error to the caller of xfs_bwrite().
* Repeated failure on an async write. Take action according to the
* error configuration we have been set up to use.
*/
cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
++bp->b_retries > cfg->max_retries)
goto permanent_error;
if (cfg->retry_timeout &&
time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
goto permanent_error;
/* At unmount we may treat errors differently */
if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
goto permanent_error;
/* still a transient error, higher layers will retry */
xfs_buf_ioerror(bp, 0);
xfs_buf_relse(bp);
return true;
/*
* Permanent error - we need to trigger a shutdown if we haven't already
* to indicate that inconsistency will result from this action.
*/
permanent_error:
xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
out_stale:
xfs_buf_stale(bp);
bp->b_flags |= XBF_DONE;
trace_xfs_buf_error_relse(bp, _RET_IP_);
return false;
}
/*
* This is the iodone() function for buffers which have had callbacks attached
* to them by xfs_buf_attach_iodone(). We need to iterate the items on the
* callback list, mark the buffer as having no more callbacks and then push the
* buffer through IO completion processing.
*/
void
xfs_buf_iodone_callbacks(
struct xfs_buf *bp)
{
/*
* If there is an error, process it. Some errors require us
* to run callbacks after failure processing is done so we
* detect that and take appropriate action.
*/
if (bp->b_error && xfs_buf_iodone_callback_error(bp))
return;
/*
* Successful IO or permanent error. Either way, we can clear the
* retry state here in preparation for the next error that may occur.
*/
bp->b_last_error = 0;
bp->b_retries = 0;
do_callbacks:
xfs_buf_do_callbacks(bp);
bp->b_fspriv = NULL;
bp->b_iodone = NULL;

View File

@ -680,6 +680,9 @@ xfs_mountfs(
xfs_set_maxicount(mp);
/* enable fail_at_unmount as default */
mp->m_fail_unmount = 1;
error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
if (error)
goto out;
@ -689,10 +692,15 @@ xfs_mountfs(
if (error)
goto out_remove_sysfs;
error = xfs_uuid_mount(mp);
error = xfs_error_sysfs_init(mp);
if (error)
goto out_del_stats;
error = xfs_uuid_mount(mp);
if (error)
goto out_remove_error_sysfs;
/*
* Set the minimum read and write sizes
*/
@ -956,6 +964,7 @@ xfs_mountfs(
cancel_delayed_work_sync(&mp->m_reclaim_work);
xfs_reclaim_inodes(mp, SYNC_WAIT);
out_log_dealloc:
mp->m_flags |= XFS_MOUNT_UNMOUNTING;
xfs_log_mount_cancel(mp);
out_fail_wait:
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
@ -967,6 +976,8 @@ xfs_mountfs(
xfs_da_unmount(mp);
out_remove_uuid:
xfs_uuid_unmount(mp);
out_remove_error_sysfs:
xfs_error_sysfs_del(mp);
out_del_stats:
xfs_sysfs_del(&mp->m_stats.xs_kobj);
out_remove_sysfs:
@ -1004,6 +1015,14 @@ xfs_unmountfs(
*/
xfs_log_force(mp, XFS_LOG_SYNC);
/*
* We now need to tell the world we are unmounting. This will allow
* us to detect that the filesystem is going away and we should error
* out anything that we have been retrying in the background. This will
* prevent neverending retries in AIL pushing from hanging the unmount.
*/
mp->m_flags |= XFS_MOUNT_UNMOUNTING;
/*
* Flush all pending changes from the AIL.
*/
@ -1055,6 +1074,7 @@ xfs_unmountfs(
#endif
xfs_free_perag(mp);
xfs_error_sysfs_del(mp);
xfs_sysfs_del(&mp->m_stats.xs_kobj);
xfs_sysfs_del(&mp->m_kobj);
}

View File

@ -37,6 +37,32 @@ enum {
XFS_LOWSP_MAX,
};
/*
* Error Configuration
*
* Error classes define the subsystem the configuration belongs to.
* Error numbers define the errors that are configurable.
*/
enum {
XFS_ERR_METADATA,
XFS_ERR_CLASS_MAX,
};
enum {
XFS_ERR_DEFAULT,
XFS_ERR_EIO,
XFS_ERR_ENOSPC,
XFS_ERR_ENODEV,
XFS_ERR_ERRNO_MAX,
};
#define XFS_ERR_RETRY_FOREVER -1
struct xfs_error_cfg {
struct xfs_kobj kobj;
int max_retries;
unsigned long retry_timeout; /* in jiffies, 0 = no timeout */
};
typedef struct xfs_mount {
struct super_block *m_super;
xfs_tid_t m_tid; /* next unused tid for fs */
@ -127,6 +153,9 @@ typedef struct xfs_mount {
int64_t m_low_space[XFS_LOWSP_MAX];
/* low free space thresholds */
struct xfs_kobj m_kobj;
struct xfs_kobj m_error_kobj;
struct xfs_kobj m_error_meta_kobj;
struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX];
struct xstats m_stats; /* per-fs stats */
struct workqueue_struct *m_buf_workqueue;
@ -148,6 +177,7 @@ typedef struct xfs_mount {
*/
__uint32_t m_generation;
bool m_fail_unmount;
#ifdef DEBUG
/*
* DEBUG mode instrumentation to test and/or trigger delayed allocation
@ -166,6 +196,7 @@ typedef struct xfs_mount {
#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops
must be synchronous except
for space allocations */
#define XFS_MOUNT_UNMOUNTING (1ULL << 1) /* filesystem is unmounting */
#define XFS_MOUNT_WAS_CLEAN (1ULL << 3)
#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem
operations, typically for
@ -364,4 +395,7 @@ extern void xfs_set_low_space_thresholds(struct xfs_mount *);
int xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb,
xfs_off_t count_fsb);
struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp,
int error_class, int error);
#endif /* __XFS_MOUNT_H__ */

View File

@ -17,10 +17,11 @@
*/
#include "xfs.h"
#include "xfs_sysfs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_sysfs.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
#include "xfs_stats.h"
@ -362,3 +363,291 @@ struct kobj_type xfs_log_ktype = {
.sysfs_ops = &xfs_sysfs_ops,
.default_attrs = xfs_log_attrs,
};
/*
* Metadata IO error configuration
*
* The sysfs structure here is:
* ...xfs/<dev>/error/<class>/<errno>/<error_attrs>
*
* where <class> allows us to discriminate between data IO and metadata IO,
* and any other future type of IO (e.g. special inode or directory error
* handling) we care to support.
*/
static inline struct xfs_error_cfg *
to_error_cfg(struct kobject *kobject)
{
struct xfs_kobj *kobj = to_kobj(kobject);
return container_of(kobj, struct xfs_error_cfg, kobj);
}
static inline struct xfs_mount *
err_to_mp(struct kobject *kobject)
{
struct xfs_kobj *kobj = to_kobj(kobject);
return container_of(kobj, struct xfs_mount, m_error_kobj);
}
static ssize_t
max_retries_show(
struct kobject *kobject,
char *buf)
{
struct xfs_error_cfg *cfg = to_error_cfg(kobject);
return snprintf(buf, PAGE_SIZE, "%d\n", cfg->max_retries);
}
static ssize_t
max_retries_store(
struct kobject *kobject,
const char *buf,
size_t count)
{
struct xfs_error_cfg *cfg = to_error_cfg(kobject);
int ret;
int val;
ret = kstrtoint(buf, 0, &val);
if (ret)
return ret;
if (val < -1)
return -EINVAL;
cfg->max_retries = val;
return count;
}
XFS_SYSFS_ATTR_RW(max_retries);
static ssize_t
retry_timeout_seconds_show(
struct kobject *kobject,
char *buf)
{
struct xfs_error_cfg *cfg = to_error_cfg(kobject);
return snprintf(buf, PAGE_SIZE, "%ld\n",
jiffies_to_msecs(cfg->retry_timeout) / MSEC_PER_SEC);
}
static ssize_t
retry_timeout_seconds_store(
struct kobject *kobject,
const char *buf,
size_t count)
{
struct xfs_error_cfg *cfg = to_error_cfg(kobject);
int ret;
int val;
ret = kstrtoint(buf, 0, &val);
if (ret)
return ret;
/* 1 day timeout maximum */
if (val < 0 || val > 86400)
return -EINVAL;
cfg->retry_timeout = msecs_to_jiffies(val * MSEC_PER_SEC);
return count;
}
XFS_SYSFS_ATTR_RW(retry_timeout_seconds);
static ssize_t
fail_at_unmount_show(
struct kobject *kobject,
char *buf)
{
struct xfs_mount *mp = err_to_mp(kobject);
return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_unmount);
}
static ssize_t
fail_at_unmount_store(
struct kobject *kobject,
const char *buf,
size_t count)
{
struct xfs_mount *mp = err_to_mp(kobject);
int ret;
int val;
ret = kstrtoint(buf, 0, &val);
if (ret)
return ret;
if (val < 0 || val > 1)
return -EINVAL;
mp->m_fail_unmount = val;
return count;
}
XFS_SYSFS_ATTR_RW(fail_at_unmount);
static struct attribute *xfs_error_attrs[] = {
ATTR_LIST(max_retries),
ATTR_LIST(retry_timeout_seconds),
NULL,
};
struct kobj_type xfs_error_cfg_ktype = {
.release = xfs_sysfs_release,
.sysfs_ops = &xfs_sysfs_ops,
.default_attrs = xfs_error_attrs,
};
struct kobj_type xfs_error_ktype = {
.release = xfs_sysfs_release,
.sysfs_ops = &xfs_sysfs_ops,
};
/*
* Error initialization tables. These need to be ordered in the same
* order as the enums used to index the array. All class init tables need to
* define a "default" behaviour as the first entry, all other entries can be
* empty.
*/
struct xfs_error_init {
char *name;
int max_retries;
int retry_timeout; /* in seconds */
};
static const struct xfs_error_init xfs_error_meta_init[XFS_ERR_ERRNO_MAX] = {
{ .name = "default",
.max_retries = XFS_ERR_RETRY_FOREVER,
.retry_timeout = 0,
},
{ .name = "EIO",
.max_retries = XFS_ERR_RETRY_FOREVER,
.retry_timeout = 0,
},
{ .name = "ENOSPC",
.max_retries = XFS_ERR_RETRY_FOREVER,
.retry_timeout = 0,
},
{ .name = "ENODEV",
.max_retries = 0,
},
};
static int
xfs_error_sysfs_init_class(
struct xfs_mount *mp,
int class,
const char *parent_name,
struct xfs_kobj *parent_kobj,
const struct xfs_error_init init[])
{
struct xfs_error_cfg *cfg;
int error;
int i;
ASSERT(class < XFS_ERR_CLASS_MAX);
error = xfs_sysfs_init(parent_kobj, &xfs_error_ktype,
&mp->m_error_kobj, parent_name);
if (error)
return error;
for (i = 0; i < XFS_ERR_ERRNO_MAX; i++) {
cfg = &mp->m_error_cfg[class][i];
error = xfs_sysfs_init(&cfg->kobj, &xfs_error_cfg_ktype,
parent_kobj, init[i].name);
if (error)
goto out_error;
cfg->max_retries = init[i].max_retries;
cfg->retry_timeout = msecs_to_jiffies(
init[i].retry_timeout * MSEC_PER_SEC);
}
return 0;
out_error:
/* unwind the entries that succeeded */
for (i--; i >= 0; i--) {
cfg = &mp->m_error_cfg[class][i];
xfs_sysfs_del(&cfg->kobj);
}
xfs_sysfs_del(parent_kobj);
return error;
}
int
xfs_error_sysfs_init(
struct xfs_mount *mp)
{
int error;
/* .../xfs/<dev>/error/ */
error = xfs_sysfs_init(&mp->m_error_kobj, &xfs_error_ktype,
&mp->m_kobj, "error");
if (error)
return error;
error = sysfs_create_file(&mp->m_error_kobj.kobject,
ATTR_LIST(fail_at_unmount));
if (error)
goto out_error;
/* .../xfs/<dev>/error/metadata/ */
error = xfs_error_sysfs_init_class(mp, XFS_ERR_METADATA,
"metadata", &mp->m_error_meta_kobj,
xfs_error_meta_init);
if (error)
goto out_error;
return 0;
out_error:
xfs_sysfs_del(&mp->m_error_kobj);
return error;
}
void
xfs_error_sysfs_del(
struct xfs_mount *mp)
{
struct xfs_error_cfg *cfg;
int i, j;
for (i = 0; i < XFS_ERR_CLASS_MAX; i++) {
for (j = 0; j < XFS_ERR_ERRNO_MAX; j++) {
cfg = &mp->m_error_cfg[i][j];
xfs_sysfs_del(&cfg->kobj);
}
}
xfs_sysfs_del(&mp->m_error_meta_kobj);
xfs_sysfs_del(&mp->m_error_kobj);
}
struct xfs_error_cfg *
xfs_error_get_cfg(
struct xfs_mount *mp,
int error_class,
int error)
{
struct xfs_error_cfg *cfg;
switch (error) {
case EIO:
cfg = &mp->m_error_cfg[error_class][XFS_ERR_EIO];
break;
case ENOSPC:
cfg = &mp->m_error_cfg[error_class][XFS_ERR_ENOSPC];
break;
case ENODEV:
cfg = &mp->m_error_cfg[error_class][XFS_ERR_ENODEV];
break;
default:
cfg = &mp->m_error_cfg[error_class][XFS_ERR_DEFAULT];
break;
}
return cfg;
}

View File

@ -58,4 +58,7 @@ xfs_sysfs_del(
wait_for_completion(&kobj->complete);
}
int xfs_error_sysfs_init(struct xfs_mount *mp);
void xfs_error_sysfs_del(struct xfs_mount *mp);
#endif /* __XFS_SYSFS_H__ */

View File

@ -364,7 +364,6 @@ DEFINE_BUF_EVENT(xfs_buf_delwri_split);
DEFINE_BUF_EVENT(xfs_buf_get_uncached);
DEFINE_BUF_EVENT(xfs_bdstrat_shut);
DEFINE_BUF_EVENT(xfs_buf_item_relse);
DEFINE_BUF_EVENT(xfs_buf_item_iodone);
DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
DEFINE_BUF_EVENT(xfs_buf_error_relse);
DEFINE_BUF_EVENT(xfs_buf_wait_buftarg);