mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-20 17:41:09 +00:00
xfs: fix for 3.11-rc3
- fix for regression in commitcca9f93a52
, recovery causing filesystem corruption after a crash -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.10 (GNU/Linux) iQIcBAABAgAGBQJR8ZWnAAoJENaLyazVq6ZOb7cP/iLwa59qX5sHAoYRGterE4Li 34xPkihJEjcbvNCtK+rznXT9ohSvwTahnrdlLy/bQ6d0K1gBX3j1DD4cpTvGRWJR hEBbQU0PXhXjRL6ixgxfeNPfbEfNMYhiTFfjPhBjKVgzYN3NnJZ1lv8zTHaeQ8JP m7dEKrrg/J8LsW18fq2E0p/SjKi7cT1mEf8jkcYu0UGYd7yDtQSukMbEjfsIJq9L DpB3QXHQkUf1UlVdUvLncGmcDUPAEt+8/ae9uUpY2nxHv+7jmzAoCyRUCTDYsIh2 gznQjsns56B2FWfnkyzXC3nMaoyIZpT8Fy3FRBsQRKGboOeOPS+/Yyzf/FcLQ8Jl yMXA0oR+3Ft7wJ62+aSuP3/dug8TbBk09bI+RqV4D+GwM7n7kLE/Fo3kQLva5Aqf rZIhwzfBDl51vxRzm4I29wOkfvQRXndy4c0hYtfeVy0lBA2yCFLSlzGha5EX+CxM s1kbpOkuOOE5k5Mgjve/iIKbwG3OKEuPCrESJPG+sTREAkkXkycnVQft2ihJYgg8 yIgPG4fxpIIpwTdC016YAa/raOm/unIG6ko+ec3m2rB2lmo8j3vQOjoIFuV0KYV1 enzhK5F+sQJl9evQOgfJc+uOMgjs1DrE38hnlQ8rc3LXa5Dtb7ReMRAT7z2FxicF keAPwJNrMlwgIyYi+3B+ =hrYY -----END PGP SIGNATURE----- Merge tag 'for-linus-v3.11-rc3' of git://oss.sgi.com/xfs/xfs Pull xfs fix from Ben Myers: "Fix for regression in commitcca9f93a52
("xfs: don't do IO when creating an new inode"), recovery causing filesystem corruption after a crash" * tag 'for-linus-v3.11-rc3' of git://oss.sgi.com/xfs/xfs: xfs: di_flushiter considered harmful
This commit is contained in:
commit
6c4155a9cd
3 changed files with 36 additions and 11 deletions
|
@ -39,6 +39,9 @@ typedef struct xfs_timestamp {
|
||||||
* There is a very similar struct icdinode in xfs_inode which matches the
|
* There is a very similar struct icdinode in xfs_inode which matches the
|
||||||
* layout of the first 96 bytes of this structure, but is kept in native
|
* layout of the first 96 bytes of this structure, but is kept in native
|
||||||
* format instead of big endian.
|
* format instead of big endian.
|
||||||
|
*
|
||||||
|
* Note: di_flushiter is only used by v1/2 inodes - it's effectively a zeroed
|
||||||
|
* padding field for v3 inodes.
|
||||||
*/
|
*/
|
||||||
typedef struct xfs_dinode {
|
typedef struct xfs_dinode {
|
||||||
__be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */
|
__be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */
|
||||||
|
|
|
@ -896,7 +896,6 @@ xfs_dinode_to_disk(
|
||||||
to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
|
to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
|
||||||
to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
|
to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
|
||||||
memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
|
memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
|
||||||
to->di_flushiter = cpu_to_be16(from->di_flushiter);
|
|
||||||
to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
|
to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
|
||||||
to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
|
to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
|
||||||
to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
|
to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
|
||||||
|
@ -924,6 +923,9 @@ xfs_dinode_to_disk(
|
||||||
to->di_lsn = cpu_to_be64(from->di_lsn);
|
to->di_lsn = cpu_to_be64(from->di_lsn);
|
||||||
memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
|
memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
|
||||||
uuid_copy(&to->di_uuid, &from->di_uuid);
|
uuid_copy(&to->di_uuid, &from->di_uuid);
|
||||||
|
to->di_flushiter = 0;
|
||||||
|
} else {
|
||||||
|
to->di_flushiter = cpu_to_be16(from->di_flushiter);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1029,10 +1031,14 @@ xfs_dinode_calc_crc(
|
||||||
/*
|
/*
|
||||||
* Read the disk inode attributes into the in-core inode structure.
|
* Read the disk inode attributes into the in-core inode structure.
|
||||||
*
|
*
|
||||||
* If we are initialising a new inode and we are not utilising the
|
* For version 5 superblocks, if we are initialising a new inode and we are not
|
||||||
* XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new inode core
|
* utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
|
||||||
* with a random generation number. If we are keeping inodes around, we need to
|
* inode core with a random generation number. If we are keeping inodes around,
|
||||||
* read the inode cluster to get the existing generation number off disk.
|
* we need to read the inode cluster to get the existing generation number off
|
||||||
|
* disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
|
||||||
|
* format) then log recovery is dependent on the di_flushiter field being
|
||||||
|
* initialised from the current on-disk value and hence we must also read the
|
||||||
|
* inode off disk.
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
xfs_iread(
|
xfs_iread(
|
||||||
|
@ -1054,6 +1060,7 @@ xfs_iread(
|
||||||
|
|
||||||
/* shortcut IO on inode allocation if possible */
|
/* shortcut IO on inode allocation if possible */
|
||||||
if ((iget_flags & XFS_IGET_CREATE) &&
|
if ((iget_flags & XFS_IGET_CREATE) &&
|
||||||
|
xfs_sb_version_hascrc(&mp->m_sb) &&
|
||||||
!(mp->m_flags & XFS_MOUNT_IKEEP)) {
|
!(mp->m_flags & XFS_MOUNT_IKEEP)) {
|
||||||
/* initialise the on-disk inode core */
|
/* initialise the on-disk inode core */
|
||||||
memset(&ip->i_d, 0, sizeof(ip->i_d));
|
memset(&ip->i_d, 0, sizeof(ip->i_d));
|
||||||
|
@ -2882,12 +2889,18 @@ xfs_iflush_int(
|
||||||
__func__, ip->i_ino, ip->i_d.di_forkoff, ip);
|
__func__, ip->i_ino, ip->i_d.di_forkoff, ip);
|
||||||
goto corrupt_out;
|
goto corrupt_out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* bump the flush iteration count, used to detect flushes which
|
* Inode item log recovery for v1/v2 inodes are dependent on the
|
||||||
* postdate a log record during recovery. This is redundant as we now
|
* di_flushiter count for correct sequencing. We bump the flush
|
||||||
* log every change and hence this can't happen. Still, it doesn't hurt.
|
* iteration count so we can detect flushes which postdate a log record
|
||||||
|
* during recovery. This is redundant as we now log every change and
|
||||||
|
* hence this can't happen but we need to still do it to ensure
|
||||||
|
* backwards compatibility with old kernels that predate logging all
|
||||||
|
* inode changes.
|
||||||
*/
|
*/
|
||||||
ip->i_d.di_flushiter++;
|
if (ip->i_d.di_version < 3)
|
||||||
|
ip->i_d.di_flushiter++;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copy the dirty parts of the inode into the on-disk
|
* Copy the dirty parts of the inode into the on-disk
|
||||||
|
|
|
@ -2592,8 +2592,16 @@ xlog_recover_inode_pass2(
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Skip replay when the on disk inode is newer than the log one */
|
/*
|
||||||
if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
|
* di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
|
||||||
|
* are transactional and if ordering is necessary we can determine that
|
||||||
|
* more accurately by the LSN field in the V3 inode core. Don't trust
|
||||||
|
* the inode versions we might be changing them here - use the
|
||||||
|
* superblock flag to determine whether we need to look at di_flushiter
|
||||||
|
* to skip replay when the on disk inode is newer than the log one
|
||||||
|
*/
|
||||||
|
if (!xfs_sb_version_hascrc(&mp->m_sb) &&
|
||||||
|
dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
|
||||||
/*
|
/*
|
||||||
* Deal with the wrap case, DI_MAX_FLUSH is less
|
* Deal with the wrap case, DI_MAX_FLUSH is less
|
||||||
* than smaller numbers
|
* than smaller numbers
|
||||||
|
@ -2608,6 +2616,7 @@ xlog_recover_inode_pass2(
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Take the opportunity to reset the flush iteration count */
|
/* Take the opportunity to reset the flush iteration count */
|
||||||
dicp->di_flushiter = 0;
|
dicp->di_flushiter = 0;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue