Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (97 commits)
  jbd2: Unify log messages in jbd2 code
  jbd/jbd2: validate sb->s_first in journal_get_superblock()
  ext4: let ext4_ext_rm_leaf work with EXT_DEBUG defined
  ext4: fix a syntax error in ext4_ext_insert_extent when debugging enabled
  ext4: fix a typo in struct ext4_allocation_context
  ext4: Don't normalize an falloc request if it can fit in 1 extent.
  ext4: remove comments about extent mount option in ext4_new_inode()
  ext4: let ext4_discard_partial_buffers handle unaligned range correctly
  ext4: return ENOMEM if find_or_create_pages fails
  ext4: move vars to local scope in ext4_discard_partial_page_buffers_no_lock()
  ext4: Create helper function for EXT4_IO_END_UNWRITTEN and i_aiodio_unwritten
  ext4: optimize locking for end_io extent conversion
  ext4: remove unnecessary call to waitqueue_active()
  ext4: Use correct locking for ext4_end_io_nolock()
  ext4: fix race in xattr block allocation path
  ext4: trace punch_hole correctly in ext4_ext_map_blocks
  ext4: clean up AGGRESSIVE_TEST code
  ext4: move variables to their scope
  ext4: fix quota accounting during migration
  ext4: migrate cleanup
  ...
This commit is contained in:
Linus Torvalds 2011-11-02 10:06:20 -07:00
commit f1f8935a5c
34 changed files with 2916 additions and 1347 deletions

View file

@ -160,7 +160,9 @@ noload if the filesystem was not unmounted cleanly,
lead to any number of problems.
data=journal All data are committed into the journal prior to being
written into the main file system.
written into the main file system. Enabling
this mode will disable delayed allocation and
O_DIRECT support.
data=ordered (*) All data are forced directly out to the main file
system prior to its metadata being committed to the
@ -201,30 +203,19 @@ inode_readahead_blks=n This tuning parameter controls the maximum
table readahead algorithm will pre-read into
the buffer cache. The default value is 32 blocks.
orlov (*) This enables the new Orlov block allocator. It is
enabled by default.
oldalloc This disables the Orlov block allocator and enables
the old block allocator. Orlov should have better
performance - we'd like to get some feedback if it's
the contrary for you.
user_xattr Enables Extended User Attributes. Additionally, you
need to have extended attribute support enabled in the
kernel configuration (CONFIG_EXT4_FS_XATTR). See the
attr(5) manual page and http://acl.bestbits.at/ to
learn more about extended attributes.
nouser_xattr Disables Extended User Attributes.
acl Enables POSIX Access Control Lists support.
Additionally, you need to have ACL support enabled in
the kernel configuration (CONFIG_EXT4_FS_POSIX_ACL).
See the acl(5) manual page and http://acl.bestbits.at/
for more information.
nouser_xattr Disables Extended User Attributes. If you have extended
attribute support enabled in the kernel configuration
(CONFIG_EXT4_FS_XATTR), extended attribute support
is enabled by default on mount. See the attr(5) manual
page and http://acl.bestbits.at/ for more information
about extended attributes.
noacl This option disables POSIX Access Control List
support.
support. If ACL support is enabled in the kernel
configuration (CONFIG_EXT4_FS_POSIX_ACL), ACL is
enabled by default on mount. See the acl(5) manual
page and http://acl.bestbits.at/ for more information
about acl.
bsddf (*) Make 'df' act like BSD.
minixdf Make 'df' act like Minix.
@ -419,8 +410,8 @@ written to the journal first, and then to its final location.
In the event of a crash, the journal can be replayed, bringing both data and
metadata into a consistent state. This mode is the slowest except when data
needs to be read from and written to disk at the same time where it
outperforms all others modes. Currently ext4 does not have delayed
allocation support if this data journalling mode is selected.
outperforms all others modes. Enabling this mode will disable delayed
allocation and O_DIRECT support.
/proc entries
=============

View file

@ -28,7 +28,8 @@
*/
/*
* Calculate the block group number and offset, given a block number
* Calculate the block group number and offset into the block/cluster
* allocation bitmap, given a block number
*/
void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp)
@ -37,7 +38,8 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
ext4_grpblk_t offset;
blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb));
offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)) >>
EXT4_SB(sb)->s_cluster_bits;
if (offsetp)
*offsetp = offset;
if (blockgrpp)
@ -55,131 +57,170 @@ static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block,
return 0;
}
static int ext4_group_used_meta_blocks(struct super_block *sb,
/* Return the number of clusters used for file system metadata; this
* represents the overhead needed by the file system.
*/
unsigned ext4_num_overhead_clusters(struct super_block *sb,
ext4_group_t block_group,
struct ext4_group_desc *gdp)
{
unsigned num_clusters;
int block_cluster = -1, inode_cluster = -1, itbl_cluster = -1, i, c;
ext4_fsblk_t start = ext4_group_first_block_no(sb, block_group);
ext4_fsblk_t itbl_blk;
struct ext4_sb_info *sbi = EXT4_SB(sb);
/* This is the number of clusters used by the superblock,
* block group descriptors, and reserved block group
* descriptor blocks */
num_clusters = ext4_num_base_meta_clusters(sb, block_group);
/*
* For the allocation bitmaps and inode table, we first need
* to check to see if the block is in the block group. If it
* is, then check to see if the cluster is already accounted
* for in the clusters used for the base metadata cluster, or
* if we can increment the base metadata cluster to include
* that block. Otherwise, we will have to track the cluster
* used for the allocation bitmap or inode table explicitly.
* Normally all of these blocks are contiguous, so the special
* case handling shouldn't be necessary except for *very*
* unusual file system layouts.
*/
if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) {
block_cluster = EXT4_B2C(sbi, (start -
ext4_block_bitmap(sb, gdp)));
if (block_cluster < num_clusters)
block_cluster = -1;
else if (block_cluster == num_clusters) {
num_clusters++;
block_cluster = -1;
}
}
if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) {
inode_cluster = EXT4_B2C(sbi,
start - ext4_inode_bitmap(sb, gdp));
if (inode_cluster < num_clusters)
inode_cluster = -1;
else if (inode_cluster == num_clusters) {
num_clusters++;
inode_cluster = -1;
}
}
itbl_blk = ext4_inode_table(sb, gdp);
for (i = 0; i < sbi->s_itb_per_group; i++) {
if (ext4_block_in_group(sb, itbl_blk + i, block_group)) {
c = EXT4_B2C(sbi, start - itbl_blk + i);
if ((c < num_clusters) || (c == inode_cluster) ||
(c == block_cluster) || (c == itbl_cluster))
continue;
if (c == num_clusters) {
num_clusters++;
continue;
}
num_clusters++;
itbl_cluster = c;
}
}
if (block_cluster != -1)
num_clusters++;
if (inode_cluster != -1)
num_clusters++;
return num_clusters;
}
static unsigned int num_clusters_in_group(struct super_block *sb,
ext4_group_t block_group)
{
unsigned int blocks;
if (block_group == ext4_get_groups_count(sb) - 1) {
/*
* Even though mke2fs always initializes the first and
* last group, just in case some other tool was used,
* we need to make sure we calculate the right free
* blocks.
*/
blocks = ext4_blocks_count(EXT4_SB(sb)->s_es) -
ext4_group_first_block_no(sb, block_group);
} else
blocks = EXT4_BLOCKS_PER_GROUP(sb);
return EXT4_NUM_B2C(EXT4_SB(sb), blocks);
}
/* Initializes an uninitialized block bitmap */
void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
ext4_group_t block_group,
struct ext4_group_desc *gdp)
{
unsigned int bit, bit_max;
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t start, tmp;
int flex_bg = 0;
J_ASSERT_BH(bh, buffer_locked(bh));
/* If checksum is bad mark all blocks used to prevent allocation
* essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
ext4_error(sb, "Checksum bad for group %u", block_group);
ext4_free_group_clusters_set(sb, gdp, 0);
ext4_free_inodes_set(sb, gdp, 0);
ext4_itable_unused_set(sb, gdp, 0);
memset(bh->b_data, 0xff, sb->s_blocksize);
return;
}
memset(bh->b_data, 0, sb->s_blocksize);
bit_max = ext4_num_base_meta_clusters(sb, block_group);
for (bit = 0; bit < bit_max; bit++)
ext4_set_bit(bit, bh->b_data);
start = ext4_group_first_block_no(sb, block_group);
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
flex_bg = 1;
/* Set bits for block and inode bitmaps, and inode table */
tmp = ext4_block_bitmap(sb, gdp);
if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
tmp = ext4_inode_bitmap(sb, gdp);
if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
tmp = ext4_inode_table(sb, gdp);
for (; tmp < ext4_inode_table(sb, gdp) +
sbi->s_itb_per_group; tmp++) {
if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
}
/*
* Also if the number of blocks within the group is less than
* the blocksize * 8 ( which is the size of bitmap ), set rest
* of the block bitmap to 1
*/
ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group),
sb->s_blocksize * 8, bh->b_data);
}
/* Return the number of free blocks in a block group. It is used when
* the block bitmap is uninitialized, so we can't just count the bits
* in the bitmap. */
unsigned ext4_free_clusters_after_init(struct super_block *sb,
ext4_group_t block_group,
struct ext4_group_desc *gdp)
{
ext4_fsblk_t tmp;
struct ext4_sb_info *sbi = EXT4_SB(sb);
/* block bitmap, inode bitmap, and inode table blocks */
int used_blocks = sbi->s_itb_per_group + 2;
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp),
block_group))
used_blocks--;
if (!ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp),
block_group))
used_blocks--;
tmp = ext4_inode_table(sb, gdp);
for (; tmp < ext4_inode_table(sb, gdp) +
sbi->s_itb_per_group; tmp++) {
if (!ext4_block_in_group(sb, tmp, block_group))
used_blocks -= 1;
}
}
return used_blocks;
return num_clusters_in_group(sb, block_group) -
ext4_num_overhead_clusters(sb, block_group, gdp);
}
/* Initializes an uninitialized block bitmap if given, and returns the
* number of blocks free in the group. */
unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
ext4_group_t block_group, struct ext4_group_desc *gdp)
{
int bit, bit_max;
ext4_group_t ngroups = ext4_get_groups_count(sb);
unsigned free_blocks, group_blocks;
struct ext4_sb_info *sbi = EXT4_SB(sb);
if (bh) {
J_ASSERT_BH(bh, buffer_locked(bh));
/* If checksum is bad mark all blocks used to prevent allocation
* essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
ext4_error(sb, "Checksum bad for group %u",
block_group);
ext4_free_blks_set(sb, gdp, 0);
ext4_free_inodes_set(sb, gdp, 0);
ext4_itable_unused_set(sb, gdp, 0);
memset(bh->b_data, 0xff, sb->s_blocksize);
return 0;
}
memset(bh->b_data, 0, sb->s_blocksize);
}
/* Check for superblock and gdt backups in this group */
bit_max = ext4_bg_has_super(sb, block_group);
if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
sbi->s_desc_per_block) {
if (bit_max) {
bit_max += ext4_bg_num_gdb(sb, block_group);
bit_max +=
le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
}
} else { /* For META_BG_BLOCK_GROUPS */
bit_max += ext4_bg_num_gdb(sb, block_group);
}
if (block_group == ngroups - 1) {
/*
* Even though mke2fs always initialize first and last group
* if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need
* to make sure we calculate the right free blocks
*/
group_blocks = ext4_blocks_count(sbi->s_es) -
ext4_group_first_block_no(sb, ngroups - 1);
} else {
group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
}
free_blocks = group_blocks - bit_max;
if (bh) {
ext4_fsblk_t start, tmp;
int flex_bg = 0;
for (bit = 0; bit < bit_max; bit++)
ext4_set_bit(bit, bh->b_data);
start = ext4_group_first_block_no(sb, block_group);
if (EXT4_HAS_INCOMPAT_FEATURE(sb,
EXT4_FEATURE_INCOMPAT_FLEX_BG))
flex_bg = 1;
/* Set bits for block and inode bitmaps, and inode table */
tmp = ext4_block_bitmap(sb, gdp);
if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
ext4_set_bit(tmp - start, bh->b_data);
tmp = ext4_inode_bitmap(sb, gdp);
if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
ext4_set_bit(tmp - start, bh->b_data);
tmp = ext4_inode_table(sb, gdp);
for (; tmp < ext4_inode_table(sb, gdp) +
sbi->s_itb_per_group; tmp++) {
if (!flex_bg ||
ext4_block_in_group(sb, tmp, block_group))
ext4_set_bit(tmp - start, bh->b_data);
}
/*
* Also if the number of blocks within the group is
* less than the blocksize * 8 ( which is the size
* of bitmap ), set rest of the block bitmap to 1
*/
ext4_mark_bitmap_end(group_blocks, sb->s_blocksize * 8,
bh->b_data);
}
return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp);
}
/*
* The free blocks are managed by bitmaps. A file system contains several
* blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
@ -362,53 +403,54 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
}
/**
* ext4_has_free_blocks()
* ext4_has_free_clusters()
* @sbi: in-core super block structure.
* @nblocks: number of needed blocks
* @nclusters: number of needed blocks
* @flags: flags from ext4_mb_new_blocks()
*
* Check if filesystem has nblocks free & available for allocation.
* Check if filesystem has nclusters free & available for allocation.
* On success return 1, return 0 on failure.
*/
static int ext4_has_free_blocks(struct ext4_sb_info *sbi,
s64 nblocks, unsigned int flags)
static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
s64 nclusters, unsigned int flags)
{
s64 free_blocks, dirty_blocks, root_blocks;
struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
s64 free_clusters, dirty_clusters, root_clusters;
struct percpu_counter *fcc = &sbi->s_freeclusters_counter;
struct percpu_counter *dcc = &sbi->s_dirtyclusters_counter;
free_blocks = percpu_counter_read_positive(fbc);
dirty_blocks = percpu_counter_read_positive(dbc);
root_blocks = ext4_r_blocks_count(sbi->s_es);
free_clusters = percpu_counter_read_positive(fcc);
dirty_clusters = percpu_counter_read_positive(dcc);
root_clusters = EXT4_B2C(sbi, ext4_r_blocks_count(sbi->s_es));
if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
EXT4_FREEBLOCKS_WATERMARK) {
free_blocks = percpu_counter_sum_positive(fbc);
dirty_blocks = percpu_counter_sum_positive(dbc);
if (free_clusters - (nclusters + root_clusters + dirty_clusters) <
EXT4_FREECLUSTERS_WATERMARK) {
free_clusters = EXT4_C2B(sbi, percpu_counter_sum_positive(fcc));
dirty_clusters = percpu_counter_sum_positive(dcc);
}
/* Check whether we have space after
* accounting for current dirty blocks & root reserved blocks.
/* Check whether we have space after accounting for current
* dirty clusters & root reserved clusters.
*/
if (free_blocks >= ((root_blocks + nblocks) + dirty_blocks))
if (free_clusters >= ((root_clusters + nclusters) + dirty_clusters))
return 1;
/* Hm, nope. Are (enough) root reserved blocks available? */
/* Hm, nope. Are (enough) root reserved clusters available? */
if (sbi->s_resuid == current_fsuid() ||
((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
capable(CAP_SYS_RESOURCE) ||
(flags & EXT4_MB_USE_ROOT_BLOCKS)) {
if (free_blocks >= (nblocks + dirty_blocks))
if (free_clusters >= (nclusters + dirty_clusters))
return 1;
}
return 0;
}
int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
s64 nblocks, unsigned int flags)
int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
s64 nclusters, unsigned int flags)
{
if (ext4_has_free_blocks(sbi, nblocks, flags)) {
percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks);
if (ext4_has_free_clusters(sbi, nclusters, flags)) {
percpu_counter_add(&sbi->s_dirtyclusters_counter, nclusters);
return 0;
} else
return -ENOSPC;
@ -428,7 +470,7 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
*/
int ext4_should_retry_alloc(struct super_block *sb, int *retries)
{
if (!ext4_has_free_blocks(EXT4_SB(sb), 1, 0) ||
if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) ||
(*retries)++ > 3 ||
!EXT4_SB(sb)->s_journal)
return 0;
@ -444,7 +486,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
* @handle: handle to this transaction
* @inode: file inode
* @goal: given target block(filesystem wide)
* @count: pointer to total number of blocks needed
* @count: pointer to total number of clusters needed
* @errp: error code
*
* Return 1st allocated block number on success, *count stores total account
@ -476,18 +518,19 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
dquot_alloc_block_nofail(inode, ar.len);
dquot_alloc_block_nofail(inode,
EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
}
return ret;
}
/**
* ext4_count_free_blocks() -- count filesystem free blocks
* ext4_count_free_clusters() -- count filesystem free clusters
* @sb: superblock
*
* Adds up the number of free blocks from each block group.
* Adds up the number of free clusters from each block group.
*/
ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
{
ext4_fsblk_t desc_count;
struct ext4_group_desc *gdp;
@ -508,7 +551,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp)
continue;
desc_count += ext4_free_blks_count(sb, gdp);
desc_count += ext4_free_group_clusters(sb, gdp);
brelse(bitmap_bh);
bitmap_bh = ext4_read_block_bitmap(sb, i);
if (bitmap_bh == NULL)
@ -516,12 +559,13 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
x = ext4_count_free(bitmap_bh, sb->s_blocksize);
printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
i, ext4_free_blks_count(sb, gdp), x);
i, ext4_free_group_clusters(sb, gdp), x);
bitmap_count += x;
}
brelse(bitmap_bh);
printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu"
", computed = %llu, %llu\n", ext4_free_blocks_count(es),
printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu"
", computed = %llu, %llu\n",
EXT4_B2C(sbi, ext4_free_blocks_count(es)),
desc_count, bitmap_count);
return bitmap_count;
#else
@ -530,7 +574,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp)
continue;
desc_count += ext4_free_blks_count(sb, gdp);
desc_count += ext4_free_group_clusters(sb, gdp);
}
return desc_count;
@ -620,6 +664,31 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
}
/*
* This function returns the number of file system metadata clusters at
* the beginning of a block group, including the reserved gdt blocks.
*/
unsigned ext4_num_base_meta_clusters(struct super_block *sb,
ext4_group_t block_group)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned num;
/* Check for superblock and gdt backups in this group */
num = ext4_bg_has_super(sb, block_group);
if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
sbi->s_desc_per_block) {
if (num) {
num += ext4_bg_num_gdb(sb, block_group);
num += le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
}
} else { /* For META_BG_BLOCK_GROUPS */
num += ext4_bg_num_gdb(sb, block_group);
}
return EXT4_NUM_B2C(sbi, num);
}
/**
* ext4_inode_to_goal_block - return a hint for block allocation
* @inode: inode for block allocation

View file

@ -144,9 +144,17 @@ struct ext4_allocation_request {
#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten)
#define EXT4_MAP_BOUNDARY (1 << BH_Boundary)
#define EXT4_MAP_UNINIT (1 << BH_Uninit)
/* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
* ext4_map_blocks wants to know whether or not the underlying cluster has
* already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
* the requested mapping was from previously mapped (or delayed allocated)
* cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster
* should never appear on buffer_head's state flags.
*/
#define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster)
#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
EXT4_MAP_UNINIT)
EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER)
struct ext4_map_blocks {
ext4_fsblk_t m_pblk;
@ -239,8 +247,11 @@ struct ext4_io_submit {
# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
#endif
#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32))
#define EXT4_CLUSTER_SIZE(s) (EXT4_BLOCK_SIZE(s) << \
EXT4_SB(s)->s_cluster_bits)
#ifdef __KERNEL__
# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
# define EXT4_CLUSTER_BITS(s) (EXT4_SB(s)->s_cluster_bits)
#else
# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10)
#endif
@ -258,6 +269,14 @@ struct ext4_io_submit {
#endif
#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))
/* Translate a block number to a cluster number */
#define EXT4_B2C(sbi, blk) ((blk) >> (sbi)->s_cluster_bits)
/* Translate a cluster number to a block number */
#define EXT4_C2B(sbi, cluster) ((cluster) << (sbi)->s_cluster_bits)
/* Translate # of blks to # of clusters */
#define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \
(sbi)->s_cluster_bits)
/*
* Structure of a blocks group descriptor
*/
@ -289,7 +308,7 @@ struct ext4_group_desc
struct flex_groups {
atomic_t free_inodes;
atomic_t free_blocks;
atomic_t free_clusters;
atomic_t used_dirs;
};
@ -306,6 +325,7 @@ struct flex_groups {
#define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size)
#ifdef __KERNEL__
# define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group)
# define EXT4_CLUSTERS_PER_GROUP(s) (EXT4_SB(s)->s_clusters_per_group)
# define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block)
# define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group)
# define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits)
@ -358,8 +378,7 @@ struct flex_groups {
/* Flags that should be inherited by new inodes from their parent. */
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\
EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
@ -520,6 +539,8 @@ struct ext4_new_group_data {
#define EXT4_GET_BLOCKS_PUNCH_OUT_EXT 0x0020
/* Don't normalize allocation size (used for fallocate) */
#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040
/* Request will not result in inode size update (user for fallocate) */
#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
/*
* Flags used by ext4_free_blocks
@ -528,6 +549,13 @@ struct ext4_new_group_data {
#define EXT4_FREE_BLOCKS_FORGET 0x0002
#define EXT4_FREE_BLOCKS_VALIDATED 0x0004
#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
/*
* Flags used by ext4_discard_partial_page_buffers
*/
#define EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED 0x0001
/*
* ioctl commands
@ -538,9 +566,6 @@ struct ext4_new_group_data {
#define EXT4_IOC_SETVERSION _IOW('f', 4, long)
#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
#ifdef CONFIG_JBD2_DEBUG
#define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
#endif
#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
@ -563,9 +588,6 @@ struct ext4_new_group_data {
#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int)
#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
#define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input)
#ifdef CONFIG_JBD2_DEBUG
#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
#endif
#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
#endif
@ -837,6 +859,7 @@ struct ext4_inode_info {
ext4_group_t i_last_alloc_group;
/* allocation reservation info for delalloc */
/* In case of bigalloc, these refer to clusters rather than blocks */
unsigned int i_reserved_data_blocks;
unsigned int i_reserved_meta_blocks;
unsigned int i_allocated_meta_blocks;
@ -886,7 +909,6 @@ struct ext4_inode_info {
/*
* Mount flags
*/
#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */
#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
@ -918,6 +940,9 @@ struct ext4_inode_info {
#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
#define EXT4_MOUNT2_EXPLICIT_DELALLOC 0x00000001 /* User explicitly
specified delalloc */
#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
~EXT4_MOUNT_##opt
#define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \
@ -968,9 +993,9 @@ struct ext4_super_block {
/*10*/ __le32 s_free_inodes_count; /* Free inodes count */
__le32 s_first_data_block; /* First Data Block */
__le32 s_log_block_size; /* Block size */
__le32 s_obso_log_frag_size; /* Obsoleted fragment size */
__le32 s_log_cluster_size; /* Allocation cluster size */
/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
__le32 s_obso_frags_per_group; /* Obsoleted fragments per group */
__le32 s_clusters_per_group; /* # Clusters per group */
__le32 s_inodes_per_group; /* # Inodes per group */
__le32 s_mtime; /* Mount time */
/*30*/ __le32 s_wtime; /* Write time */
@ -1066,7 +1091,10 @@ struct ext4_super_block {
__u8 s_last_error_func[32]; /* function where the error happened */
#define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
__u8 s_mount_opts[64];
__le32 s_reserved[112]; /* Padding to the end of the block */
__le32 s_usr_quota_inum; /* inode for tracking user quota */
__le32 s_grp_quota_inum; /* inode for tracking group quota */
__le32 s_overhead_clusters; /* overhead blocks/clusters in fs */
__le32 s_reserved[109]; /* Padding to the end of the block */
};
#define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
@ -1086,6 +1114,7 @@ struct ext4_sb_info {
unsigned long s_desc_size; /* Size of a group descriptor in bytes */
unsigned long s_inodes_per_block;/* Number of inodes per block */
unsigned long s_blocks_per_group;/* Number of blocks in a group */
unsigned long s_clusters_per_group; /* Number of clusters in a group */
unsigned long s_inodes_per_group;/* Number of inodes in a group */
unsigned long s_itb_per_group; /* Number of inode table blocks per group */
unsigned long s_gdb_count; /* Number of group descriptor blocks */
@ -1094,6 +1123,8 @@ struct ext4_sb_info {
ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */
unsigned long s_overhead_last; /* Last calculated overhead */
unsigned long s_blocks_last; /* Last seen block count */
unsigned int s_cluster_ratio; /* Number of blocks per cluster */
unsigned int s_cluster_bits; /* log2 of s_cluster_ratio */
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
struct buffer_head * s_sbh; /* Buffer containing the super block */
struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
@ -1117,10 +1148,10 @@ struct ext4_sb_info {
u32 s_hash_seed[4];
int s_def_hash_version;
int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
struct percpu_counter s_freeblocks_counter;
struct percpu_counter s_freeclusters_counter;
struct percpu_counter s_freeinodes_counter;
struct percpu_counter s_dirs_counter;
struct percpu_counter s_dirtyblocks_counter;
struct percpu_counter s_dirtyclusters_counter;
struct blockgroup_lock *s_blockgroup_lock;
struct proc_dir_entry *s_proc;
struct kobject s_kobj;
@ -1136,10 +1167,6 @@ struct ext4_sb_info {
u32 s_max_batch_time;
u32 s_min_batch_time;
struct block_device *journal_bdev;
#ifdef CONFIG_JBD2_DEBUG
struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
#endif
#ifdef CONFIG_QUOTA
char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
int s_jquota_fmt; /* Format of quota to use */
@ -1248,6 +1275,15 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
}
static inline void ext4_set_io_unwritten_flag(struct inode *inode,
struct ext4_io_end *io_end)
{
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
io_end->flag |= EXT4_IO_END_UNWRITTEN;
atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
}
}
/*
* Inode dynamic state flags
*/
@ -1360,6 +1396,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100
#define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200
#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
@ -1402,7 +1439,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
EXT4_FEATURE_RO_COMPAT_HUGE_FILE)
EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
EXT4_FEATURE_RO_COMPAT_BIGALLOC)
/*
* Default values for user and/or group using reserved blocks
@ -1735,9 +1773,9 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
unsigned int flags,
unsigned long *count,
int *errp);
extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
s64 nblocks, unsigned int flags);
extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
s64 nclusters, unsigned int flags);
extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *);
extern void ext4_check_blocks_bitmap(struct super_block *);
extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
ext4_group_t block_group,
@ -1745,12 +1783,18 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
ext4_group_t block_group);
extern unsigned ext4_init_block_bitmap(struct super_block *sb,
struct buffer_head *bh,
ext4_group_t group,
struct ext4_group_desc *desc);
#define ext4_free_blocks_after_init(sb, group, desc) \
ext4_init_block_bitmap(sb, NULL, group, desc)
extern void ext4_init_block_bitmap(struct super_block *sb,
struct buffer_head *bh,
ext4_group_t group,
struct ext4_group_desc *desc);
extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
ext4_group_t block_group,
struct ext4_group_desc *gdp);
extern unsigned ext4_num_base_meta_clusters(struct super_block *sb,
ext4_group_t block_group);
extern unsigned ext4_num_overhead_clusters(struct super_block *sb,
ext4_group_t block_group,
struct ext4_group_desc *gdp);
ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
/* dir.c */
@ -1776,7 +1820,8 @@ extern int ext4fs_dirhash(const char *name, int len, struct
/* ialloc.c */
extern struct inode *ext4_new_inode(handle_t *, struct inode *, int,
const struct qstr *qstr, __u32 goal);
const struct qstr *qstr, __u32 goal,
uid_t *owner);
extern void ext4_free_inode(handle_t *, struct inode *);
extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
extern unsigned long ext4_count_free_inodes(struct super_block *);
@ -1839,6 +1884,12 @@ extern int ext4_block_truncate_page(handle_t *handle,
struct address_space *mapping, loff_t from);
extern int ext4_block_zero_page_range(handle_t *handle,
struct address_space *mapping, loff_t from, loff_t length);
extern int ext4_discard_partial_page_buffers(handle_t *handle,
struct address_space *mapping, loff_t from,
loff_t length, int flags);
extern int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
struct inode *inode, struct page *page, loff_t from,
loff_t length, int flags);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern void ext4_da_update_reserve_space(struct inode *inode,
@ -1927,8 +1978,8 @@ extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
struct ext4_group_desc *bg);
extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
struct ext4_group_desc *bg);
extern __u32 ext4_free_blks_count(struct super_block *sb,
struct ext4_group_desc *bg);
extern __u32 ext4_free_group_clusters(struct super_block *sb,
struct ext4_group_desc *bg);
extern __u32 ext4_free_inodes_count(struct super_block *sb,
struct ext4_group_desc *bg);
extern __u32 ext4_used_dirs_count(struct super_block *sb,
@ -1941,8 +1992,9 @@ extern void ext4_inode_bitmap_set(struct super_block *sb,
struct ext4_group_desc *bg, ext4_fsblk_t blk);
extern void ext4_inode_table_set(struct super_block *sb,
struct ext4_group_desc *bg, ext4_fsblk_t blk);
extern void ext4_free_blks_set(struct super_block *sb,
struct ext4_group_desc *bg, __u32 count);
extern void ext4_free_group_clusters_set(struct super_block *sb,
struct ext4_group_desc *bg,
__u32 count);
extern void ext4_free_inodes_set(struct super_block *sb,
struct ext4_group_desc *bg, __u32 count);
extern void ext4_used_dirs_set(struct super_block *sb,
@ -2051,13 +2103,13 @@ do { \
} while (0)
#ifdef CONFIG_SMP
/* Each CPU can accumulate percpu_counter_batch blocks in their local
* counters. So we need to make sure we have free blocks more
/* Each CPU can accumulate percpu_counter_batch clusters in their local
* counters. So we need to make sure we have free clusters more
* than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times.
*/
#define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
#define EXT4_FREECLUSTERS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
#else
#define EXT4_FREEBLOCKS_WATERMARK 0
#define EXT4_FREECLUSTERS_WATERMARK 0
#endif
static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
@ -2243,10 +2295,19 @@ extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
enum ext4_state_bits {
BH_Uninit /* blocks are allocated but uninitialized on disk */
= BH_JBDPrivateStart,
BH_AllocFromCluster, /* allocated blocks were part of already
* allocated cluster. Note that this flag will
* never, ever appear in a buffer_head's state
* flag. See EXT4_MAP_FROM_CLUSTER to see where
* this is used. */
BH_Da_Mapped, /* Delayed allocated block that now has a mapping. This
* flag is set when ext4_map_blocks is called on a
* delayed allocated block to get its real mapping. */
};
BUFFER_FNS(Uninit, uninit)
TAS_BUFFER_FNS(Uninit, uninit)
BUFFER_FNS(Da_Mapped, da_mapped)
/*
* Add new method to test wether block and inode bitmaps are properly
@ -2282,4 +2343,6 @@ extern void ext4_resize_end(struct super_block *sb);
#endif /* __KERNEL__ */
#include "ext4_extents.h"
#endif /* _EXT4_H */

View file

@ -290,5 +290,7 @@ extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
struct ext4_ext_path *);
extern void ext4_ext_drop_refs(struct ext4_ext_path *);
extern int ext4_ext_check_inode(struct inode *inode);
extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
int search_hint_reverse);
#endif /* _EXT4_EXTENTS */

View file

@ -109,9 +109,11 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
if (ext4_handle_valid(handle)) {
err = jbd2_journal_dirty_metadata(handle, bh);
if (err)
ext4_journal_abort_handle(where, line, __func__,
bh, handle, err);
if (err) {
/* Errors can only happen if there is a bug */
handle->h_err = err;
__ext4_journal_stop(where, line, handle);
}
} else {
if (inode)
mark_buffer_dirty_inode(bh, inode);

File diff suppressed because it is too large Load diff

View file

@ -181,8 +181,8 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
path.dentry = mnt->mnt_root;
cp = d_path(&path, buf, sizeof(buf));
if (!IS_ERR(cp)) {
memcpy(sbi->s_es->s_last_mounted, cp,
sizeof(sbi->s_es->s_last_mounted));
strlcpy(sbi->s_es->s_last_mounted, cp,
sizeof(sbi->s_es->s_last_mounted));
ext4_mark_super_dirty(sb);
}
}

View file

@ -75,7 +75,7 @@ static void dump_completed_IO(struct inode * inode)
* to written.
* The function return the number of pending IOs on success.
*/
extern int ext4_flush_completed_IO(struct inode *inode)
int ext4_flush_completed_IO(struct inode *inode)
{
ext4_io_end_t *io;
struct ext4_inode_info *ei = EXT4_I(inode);
@ -83,14 +83,12 @@ extern int ext4_flush_completed_IO(struct inode *inode)
int ret = 0;
int ret2 = 0;
if (list_empty(&ei->i_completed_io_list))
return ret;
dump_completed_IO(inode);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
while (!list_empty(&ei->i_completed_io_list)){
io = list_entry(ei->i_completed_io_list.next,
ext4_io_end_t, list);
list_del_init(&io->list);
/*
* Calling ext4_end_io_nolock() to convert completed
* IO to written.
@ -107,11 +105,9 @@ extern int ext4_flush_completed_IO(struct inode *inode)
*/
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
ret = ext4_end_io_nolock(io);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
if (ret < 0)
ret2 = ret;
else
list_del_init(&io->list);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
}
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
return (ret2 < 0) ? ret2 : 0;

View file

@ -78,7 +78,7 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
* allocation, essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
ext4_error(sb, "Checksum bad for group %u", block_group);
ext4_free_blks_set(sb, gdp, 0);
ext4_free_group_clusters_set(sb, gdp, 0);
ext4_free_inodes_set(sb, gdp, 0);
ext4_itable_unused_set(sb, gdp, 0);
memset(bh->b_data, 0xff, sb->s_blocksize);
@ -293,121 +293,9 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
ext4_std_error(sb, fatal);
}
/*
* There are two policies for allocating an inode. If the new inode is
* a directory, then a forward search is made for a block group with both
* free space and a low directory-to-inode ratio; if that fails, then of
* the groups with above-average free space, that group with the fewest
* directories already is chosen.
*
* For other inodes, search forward from the parent directory\'s block
* group to find a free inode.
*/
static int find_group_dir(struct super_block *sb, struct inode *parent,
ext4_group_t *best_group)
{
ext4_group_t ngroups = ext4_get_groups_count(sb);
unsigned int freei, avefreei;
struct ext4_group_desc *desc, *best_desc = NULL;
ext4_group_t group;
int ret = -1;
freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter);
avefreei = freei / ngroups;
for (group = 0; group < ngroups; group++) {
desc = ext4_get_group_desc(sb, group, NULL);
if (!desc || !ext4_free_inodes_count(sb, desc))
continue;
if (ext4_free_inodes_count(sb, desc) < avefreei)
continue;
if (!best_desc ||
(ext4_free_blks_count(sb, desc) >
ext4_free_blks_count(sb, best_desc))) {
*best_group = group;
best_desc = desc;
ret = 0;
}
}
return ret;
}
#define free_block_ratio 10
static int find_group_flex(struct super_block *sb, struct inode *parent,
ext4_group_t *best_group)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_desc *desc;
struct flex_groups *flex_group = sbi->s_flex_groups;
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
ext4_group_t ngroups = ext4_get_groups_count(sb);
int flex_size = ext4_flex_bg_size(sbi);
ext4_group_t best_flex = parent_fbg_group;
int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
int flexbg_free_blocks;
int flex_freeb_ratio;
ext4_group_t n_fbg_groups;
ext4_group_t i;
n_fbg_groups = (ngroups + flex_size - 1) >>
sbi->s_log_groups_per_flex;
find_close_to_parent:
flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks);
flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
if (atomic_read(&flex_group[best_flex].free_inodes) &&
flex_freeb_ratio > free_block_ratio)
goto found_flexbg;
if (best_flex && best_flex == parent_fbg_group) {
best_flex--;
goto find_close_to_parent;
}
for (i = 0; i < n_fbg_groups; i++) {
if (i == parent_fbg_group || i == parent_fbg_group - 1)
continue;
flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks);
flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
if (flex_freeb_ratio > free_block_ratio &&
(atomic_read(&flex_group[i].free_inodes))) {
best_flex = i;
goto found_flexbg;
}
if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) ||
((atomic_read(&flex_group[i].free_blocks) >
atomic_read(&flex_group[best_flex].free_blocks)) &&
atomic_read(&flex_group[i].free_inodes)))
best_flex = i;
}
if (!atomic_read(&flex_group[best_flex].free_inodes) ||
!atomic_read(&flex_group[best_flex].free_blocks))
return -1;
found_flexbg:
for (i = best_flex * flex_size; i < ngroups &&
i < (best_flex + 1) * flex_size; i++) {
desc = ext4_get_group_desc(sb, i, NULL);
if (ext4_free_inodes_count(sb, desc)) {
*best_group = i;
goto out;
}
}
return -1;
out:
return 0;
}
struct orlov_stats {
__u32 free_inodes;
__u32 free_blocks;
__u32 free_clusters;
__u32 used_dirs;
};
@ -424,7 +312,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
if (flex_size > 1) {
stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
stats->free_blocks = atomic_read(&flex_group[g].free_blocks);
stats->free_clusters = atomic_read(&flex_group[g].free_clusters);
stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
return;
}
@ -432,11 +320,11 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
desc = ext4_get_group_desc(sb, g, NULL);
if (desc) {
stats->free_inodes = ext4_free_inodes_count(sb, desc);
stats->free_blocks = ext4_free_blks_count(sb, desc);
stats->free_clusters = ext4_free_group_clusters(sb, desc);
stats->used_dirs = ext4_used_dirs_count(sb, desc);
} else {
stats->free_inodes = 0;
stats->free_blocks = 0;
stats->free_clusters = 0;
stats->used_dirs = 0;
}
}
@ -471,10 +359,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
ext4_group_t real_ngroups = ext4_get_groups_count(sb);
int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
unsigned int freei, avefreei;
ext4_fsblk_t freeb, avefreeb;
ext4_fsblk_t freeb, avefreec;
unsigned int ndirs;
int max_dirs, min_inodes;
ext4_grpblk_t min_blocks;
ext4_grpblk_t min_clusters;
ext4_group_t i, grp, g, ngroups;
struct ext4_group_desc *desc;
struct orlov_stats stats;
@ -490,9 +378,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
avefreei = freei / ngroups;
freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
avefreeb = freeb;
do_div(avefreeb, ngroups);
freeb = EXT4_C2B(sbi,
percpu_counter_read_positive(&sbi->s_freeclusters_counter));
avefreec = freeb;
do_div(avefreec, ngroups);
ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
if (S_ISDIR(mode) &&
@ -518,7 +407,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
continue;
if (stats.free_inodes < avefreei)
continue;
if (stats.free_blocks < avefreeb)
if (stats.free_clusters < avefreec)
continue;
grp = g;
ret = 0;
@ -556,7 +445,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
min_inodes = avefreei - inodes_per_group*flex_size / 4;
if (min_inodes < 1)
min_inodes = 1;
min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb)*flex_size / 4;
min_clusters = avefreec - EXT4_CLUSTERS_PER_GROUP(sb)*flex_size / 4;
/*
* Start looking in the flex group where we last allocated an
@ -575,7 +464,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
continue;
if (stats.free_inodes < min_inodes)
continue;
if (stats.free_blocks < min_blocks)
if (stats.free_clusters < min_clusters)
continue;
goto found_flex_bg;
}
@ -659,7 +548,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
*group = parent_group;
desc = ext4_get_group_desc(sb, *group, NULL);
if (desc && ext4_free_inodes_count(sb, desc) &&
ext4_free_blks_count(sb, desc))
ext4_free_group_clusters(sb, desc))
return 0;
/*
@ -683,7 +572,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
*group -= ngroups;
desc = ext4_get_group_desc(sb, *group, NULL);
if (desc && ext4_free_inodes_count(sb, desc) &&
ext4_free_blks_count(sb, desc))
ext4_free_group_clusters(sb, desc))
return 0;
}
@ -802,7 +691,7 @@ static int ext4_claim_inode(struct super_block *sb,
* group to find a free inode.
*/
struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
const struct qstr *qstr, __u32 goal)
const struct qstr *qstr, __u32 goal, uid_t *owner)
{
struct super_block *sb;
struct buffer_head *inode_bitmap_bh = NULL;
@ -816,8 +705,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
int ret2, err = 0;
struct inode *ret;
ext4_group_t i;
int free = 0;
static int once = 1;
ext4_group_t flex_group;
/* Cannot create files in a deleted directory */
@ -843,26 +730,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
goto got_group;
}
if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
ret2 = find_group_flex(sb, dir, &group);
if (ret2 == -1) {
ret2 = find_group_other(sb, dir, &group, mode);
if (ret2 == 0 && once) {
once = 0;
printk(KERN_NOTICE "ext4: find_group_flex "
"failed, fallback succeeded dir %lu\n",
dir->i_ino);
}
}
goto got_group;
}
if (S_ISDIR(mode)) {
if (test_opt(sb, OLDALLOC))
ret2 = find_group_dir(sb, dir, &group);
else
ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
} else
if (S_ISDIR(mode))
ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
else
ret2 = find_group_other(sb, dir, &group, mode);
got_group:
@ -950,26 +820,21 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
goto fail;
}
free = 0;
ext4_lock_group(sb, group);
BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh);
brelse(block_bitmap_bh);
/* recheck and clear flag under lock if we still need to */
ext4_lock_group(sb, group);
if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
free = ext4_free_blocks_after_init(sb, group, gdp);
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
ext4_free_blks_set(sb, gdp, free);
ext4_free_group_clusters_set(sb, gdp,
ext4_free_clusters_after_init(sb, group, gdp));
gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
gdp);
}
ext4_unlock_group(sb, group);
/* Don't need to dirty bitmap block if we didn't change it */
if (free) {
BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
err = ext4_handle_dirty_metadata(handle,
NULL, block_bitmap_bh);
}
brelse(block_bitmap_bh);
if (err)
goto fail;
}
@ -987,8 +852,11 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
flex_group = ext4_flex_group(sbi, group);
atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
}
if (test_opt(sb, GRPID)) {
if (owner) {
inode->i_mode = mode;
inode->i_uid = owner[0];
inode->i_gid = owner[1];
} else if (test_opt(sb, GRPID)) {
inode->i_mode = mode;
inode->i_uid = current_fsuid();
inode->i_gid = dir->i_gid;
@ -1005,11 +873,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
ei->i_dir_start_lookup = 0;
ei->i_disksize = 0;
/*
* Don't inherit extent flag from directory, amongst others. We set
* extent flag on newly created directory and file only if -o extent
* mount option is specified
*/
/* Don't inherit extent flag from directory, amongst others. */
ei->i_flags =
ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
ei->i_file_acl = 0;
@ -1235,7 +1099,7 @@ unsigned long ext4_count_dirs(struct super_block * sb)
* inode allocation from the current group, so we take alloc_sem lock, to
* block ext4_claim_inode until we are finished.
*/
extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
int barrier)
{
struct ext4_group_info *grp = ext4_get_group_info(sb, group);

View file

@ -699,6 +699,13 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
/*
* Okay, we need to do block allocation.
*/
if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
EXT4_ERROR_INODE(inode, "Can't allocate blocks for "
"non-extent mapped inodes with bigalloc");
return -ENOSPC;
}
goal = ext4_find_goal(inode, map->m_lblk, partial);
/* the number of blocks need to allocate for [d,t]indirect blocks */
@ -1343,7 +1350,9 @@ void ext4_ind_truncate(struct inode *inode)
__le32 nr = 0;
int n = 0;
ext4_lblk_t last_block, max_block;
loff_t page_len;
unsigned blocksize = inode->i_sb->s_blocksize;
int err;
handle = start_transaction(inode);
if (IS_ERR(handle))
@ -1354,9 +1363,16 @@ void ext4_ind_truncate(struct inode *inode)
max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
if (inode->i_size & (blocksize - 1))
if (ext4_block_truncate_page(handle, mapping, inode->i_size))
if (inode->i_size % PAGE_CACHE_SIZE != 0) {
page_len = PAGE_CACHE_SIZE -
(inode->i_size & (PAGE_CACHE_SIZE - 1));
err = ext4_discard_partial_page_buffers(handle,
mapping, inode->i_size, page_len, 0);
if (err)
goto out_stop;
}
if (last_block != max_block) {
n = ext4_block_to_path(inode, last_block, offsets, NULL);

View file

@ -42,7 +42,6 @@
#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
#include "ext4_extents.h"
#include "truncate.h"
#include <trace/events/ext4.h>
@ -268,7 +267,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
struct ext4_inode_info *ei = EXT4_I(inode);
spin_lock(&ei->i_block_reservation_lock);
trace_ext4_da_update_reserve_space(inode, used);
trace_ext4_da_update_reserve_space(inode, used, quota_claim);
if (unlikely(used > ei->i_reserved_data_blocks)) {
ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
"with only %d reserved data blocks\n",
@ -281,7 +280,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
/* Update per-inode reservations */
ei->i_reserved_data_blocks -= used;
ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
percpu_counter_sub(&sbi->s_dirtyblocks_counter,
percpu_counter_sub(&sbi->s_dirtyclusters_counter,
used + ei->i_allocated_meta_blocks);
ei->i_allocated_meta_blocks = 0;
@ -291,7 +290,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
* only when we have written all of the delayed
* allocation blocks.
*/
percpu_counter_sub(&sbi->s_dirtyblocks_counter,
percpu_counter_sub(&sbi->s_dirtyclusters_counter,
ei->i_reserved_meta_blocks);
ei->i_reserved_meta_blocks = 0;
ei->i_da_metadata_calc_len = 0;
@ -300,14 +299,14 @@ void ext4_da_update_reserve_space(struct inode *inode,
/* Update quota subsystem for data blocks */
if (quota_claim)
dquot_claim_block(inode, used);
dquot_claim_block(inode, EXT4_C2B(sbi, used));
else {
/*
* We did fallocate with an offset that is already delayed
* allocated. So on delayed allocated writeback we should
* not re-claim the quota for fallocated blocks.
*/
dquot_release_reservation_block(inode, used);
dquot_release_reservation_block(inode, EXT4_C2B(sbi, used));
}
/*
@ -398,6 +397,49 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
return num;
}
/*
* Sets the BH_Da_Mapped bit on the buffer heads corresponding to the given map.
*/
static void set_buffers_da_mapped(struct inode *inode,
struct ext4_map_blocks *map)
{
struct address_space *mapping = inode->i_mapping;
struct pagevec pvec;
int i, nr_pages;
pgoff_t index, end;
index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
end = (map->m_lblk + map->m_len - 1) >>
(PAGE_CACHE_SHIFT - inode->i_blkbits);
pagevec_init(&pvec, 0);
while (index <= end) {
nr_pages = pagevec_lookup(&pvec, mapping, index,
min(end - index + 1,
(pgoff_t)PAGEVEC_SIZE));
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
struct buffer_head *bh, *head;
if (unlikely(page->mapping != mapping) ||
!PageDirty(page))
break;
if (page_has_buffers(page)) {
bh = head = page_buffers(page);
do {
set_buffer_da_mapped(bh);
bh = bh->b_this_page;
} while (bh != head);
}
index++;
}
pagevec_release(&pvec);
}
}
/*
* The ext4_map_blocks() function tries to look up the requested blocks,
* and returns if the blocks are already mapped.
@ -416,7 +458,7 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
* the buffer head is mapped.
*
* It returns 0 if plain look up failed (blocks have not been allocated), in
* that casem, buffer head is unmapped
* that case, buffer head is unmapped
*
* It returns the error in case of allocation failure.
*/
@ -435,9 +477,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
*/
down_read((&EXT4_I(inode)->i_data_sem));
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
retval = ext4_ext_map_blocks(handle, inode, map, 0);
retval = ext4_ext_map_blocks(handle, inode, map, flags &
EXT4_GET_BLOCKS_KEEP_SIZE);
} else {
retval = ext4_ind_map_blocks(handle, inode, map, 0);
retval = ext4_ind_map_blocks(handle, inode, map, flags &
EXT4_GET_BLOCKS_KEEP_SIZE);
}
up_read((&EXT4_I(inode)->i_data_sem));
@ -455,7 +499,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
* Returns if the blocks have already allocated
*
* Note that if blocks have been preallocated
* ext4_ext_get_block() returns th create = 0
* ext4_ext_get_block() returns the create = 0
* with buffer head unmapped.
*/
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
@ -517,9 +561,17 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
ext4_da_update_reserve_space(inode, retval, 1);
}
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
/* If we have successfully mapped the delayed allocated blocks,
* set the BH_Da_Mapped bit on them. Its important to do this
* under the protection of i_data_sem.
*/
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
set_buffers_da_mapped(inode, map);
}
up_write((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
int ret = check_block_validity(inode, map);
@ -909,7 +961,11 @@ static int ext4_ordered_write_end(struct file *file,
ext4_orphan_add(handle, inode);
if (ret2 < 0)
ret = ret2;
} else {
unlock_page(page);
page_cache_release(page);
}
ret2 = ext4_journal_stop(handle);
if (!ret)
ret = ret2;
@ -1037,14 +1093,14 @@ static int ext4_journalled_write_end(struct file *file,
}
/*
* Reserve a single block located at lblock
* Reserve a single cluster located at lblock
*/
static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
{
int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned long md_needed;
unsigned int md_needed;
int ret;
/*
@ -1054,7 +1110,8 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
*/
repeat:
spin_lock(&ei->i_block_reservation_lock);
md_needed = ext4_calc_metadata_amount(inode, lblock);
md_needed = EXT4_NUM_B2C(sbi,
ext4_calc_metadata_amount(inode, lblock));
trace_ext4_da_reserve_space(inode, md_needed);
spin_unlock(&ei->i_block_reservation_lock);
@ -1063,15 +1120,15 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
* us from metadata over-estimation, though we may go over by
* a small amount in the end. Here we just reserve for data.
*/
ret = dquot_reserve_block(inode, 1);
ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
if (ret)
return ret;
/*
* We do still charge estimated metadata to the sb though;
* we cannot afford to run out of free blocks.
*/
if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) {
dquot_release_reservation_block(inode, 1);
if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
yield();
goto repeat;
@ -1118,19 +1175,21 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
* We can release all of the reserved metadata blocks
* only when we have written all of the delayed
* allocation blocks.
* Note that in case of bigalloc, i_reserved_meta_blocks,
* i_reserved_data_blocks, etc. refer to number of clusters.
*/
percpu_counter_sub(&sbi->s_dirtyblocks_counter,
percpu_counter_sub(&sbi->s_dirtyclusters_counter,
ei->i_reserved_meta_blocks);
ei->i_reserved_meta_blocks = 0;
ei->i_da_metadata_calc_len = 0;
}
/* update fs dirty data blocks counter */
percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free);
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
dquot_release_reservation_block(inode, to_free);
dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free));
}
static void ext4_da_page_release_reservation(struct page *page,
@ -1139,6 +1198,9 @@ static void ext4_da_page_release_reservation(struct page *page,
int to_release = 0;
struct buffer_head *head, *bh;
unsigned int curr_off = 0;
struct inode *inode = page->mapping->host;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
int num_clusters;
head = page_buffers(page);
bh = head;
@ -1148,10 +1210,24 @@ static void ext4_da_page_release_reservation(struct page *page,
if ((offset <= curr_off) && (buffer_delay(bh))) {
to_release++;
clear_buffer_delay(bh);
clear_buffer_da_mapped(bh);
}
curr_off = next_off;
} while ((bh = bh->b_this_page) != head);
ext4_da_release_space(page->mapping->host, to_release);
/* If we have released all the blocks belonging to a cluster, then we
* need to release the reserved space for that cluster. */
num_clusters = EXT4_NUM_B2C(sbi, to_release);
while (num_clusters > 0) {
ext4_fsblk_t lblk;
lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
((num_clusters - 1) << sbi->s_cluster_bits);
if (sbi->s_cluster_ratio == 1 ||
!ext4_find_delalloc_cluster(inode, lblk, 1))
ext4_da_release_space(inode, 1);
num_clusters--;
}
}
/*
@ -1253,6 +1329,8 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
clear_buffer_delay(bh);
bh->b_blocknr = pblock;
}
if (buffer_da_mapped(bh))
clear_buffer_da_mapped(bh);
if (buffer_unwritten(bh) ||
buffer_mapped(bh))
BUG_ON(bh->b_blocknr != pblock);
@ -1346,12 +1424,15 @@ static void ext4_print_free_blocks(struct inode *inode)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
printk(KERN_CRIT "Total free blocks count %lld\n",
ext4_count_free_blocks(inode->i_sb));
EXT4_C2B(EXT4_SB(inode->i_sb),
ext4_count_free_clusters(inode->i_sb)));
printk(KERN_CRIT "Free/Dirty block details\n");
printk(KERN_CRIT "free_blocks=%lld\n",
(long long) percpu_counter_sum(&sbi->s_freeblocks_counter));
(long long) EXT4_C2B(EXT4_SB(inode->i_sb),
percpu_counter_sum(&sbi->s_freeclusters_counter)));
printk(KERN_CRIT "dirty_blocks=%lld\n",
(long long) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
(long long) EXT4_C2B(EXT4_SB(inode->i_sb),
percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
printk(KERN_CRIT "Block reservation details\n");
printk(KERN_CRIT "i_reserved_data_blocks=%u\n",
EXT4_I(inode)->i_reserved_data_blocks);
@ -1430,8 +1511,7 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
if (err == -EAGAIN)
goto submit_io;
if (err == -ENOSPC &&
ext4_count_free_blocks(sb)) {
if (err == -ENOSPC && ext4_count_free_clusters(sb)) {
mpd->retval = err;
goto submit_io;
}
@ -1471,13 +1551,15 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
for (i = 0; i < map.m_len; i++)
unmap_underlying_metadata(bdev, map.m_pblk + i);
}
if (ext4_should_order_data(mpd->inode)) {
err = ext4_jbd2_file_inode(handle, mpd->inode);
if (err)
/* This only happens if the journal is aborted */
return;
if (ext4_should_order_data(mpd->inode)) {
err = ext4_jbd2_file_inode(handle, mpd->inode);
if (err) {
/* Only if the journal is aborted */
mpd->retval = err;
goto submit_io;
}
}
}
/*
@ -1583,6 +1665,66 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
}
/*
* This function is grabs code from the very beginning of
* ext4_map_blocks, but assumes that the caller is from delayed write
* time. This function looks up the requested blocks and sets the
* buffer delay bit under the protection of i_data_sem.
*/
static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
struct ext4_map_blocks *map,
struct buffer_head *bh)
{
int retval;
sector_t invalid_block = ~((sector_t) 0xffff);
if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
invalid_block = ~0;
map->m_flags = 0;
ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
"logical block %lu\n", inode->i_ino, map->m_len,
(unsigned long) map->m_lblk);
/*
* Try to see if we can get the block without requesting a new
* file system block.
*/
down_read((&EXT4_I(inode)->i_data_sem));
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
retval = ext4_ext_map_blocks(NULL, inode, map, 0);
else
retval = ext4_ind_map_blocks(NULL, inode, map, 0);
if (retval == 0) {
/*
* XXX: __block_prepare_write() unmaps passed block,
* is it OK?
*/
/* If the block was allocated from previously allocated cluster,
* then we dont need to reserve it again. */
if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
retval = ext4_da_reserve_space(inode, iblock);
if (retval)
/* not enough space to reserve */
goto out_unlock;
}
/* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
* and it should not appear on the bh->b_state.
*/
map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
map_bh(bh, inode->i_sb, invalid_block);
set_buffer_new(bh);
set_buffer_delay(bh);
}
out_unlock:
up_read((&EXT4_I(inode)->i_data_sem));
return retval;
}
/*
* This is a special get_blocks_t callback which is used by
* ext4_da_write_begin(). It will either return mapped block or
@ -1600,10 +1742,6 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
{
struct ext4_map_blocks map;
int ret = 0;
sector_t invalid_block = ~((sector_t) 0xffff);
if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
invalid_block = ~0;
BUG_ON(create == 0);
BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
@ -1616,25 +1754,9 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
* preallocated blocks are unmapped but should treated
* the same as allocated blocks.
*/
ret = ext4_map_blocks(NULL, inode, &map, 0);
if (ret < 0)
ret = ext4_da_map_blocks(inode, iblock, &map, bh);
if (ret <= 0)
return ret;
if (ret == 0) {
if (buffer_delay(bh))
return 0; /* Not sure this could or should happen */
/*
* XXX: __block_write_begin() unmaps passed block, is it OK?
*/
ret = ext4_da_reserve_space(inode, iblock);
if (ret)
/* not enough space to reserve */
return ret;
map_bh(bh, inode->i_sb, invalid_block);
set_buffer_new(bh);
set_buffer_delay(bh);
return 0;
}
map_bh(bh, inode->i_sb, map.m_pblk);
bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
@ -2050,6 +2172,7 @@ static int ext4_da_writepages(struct address_space *mapping,
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
pgoff_t done_index = 0;
pgoff_t end;
struct blk_plug plug;
trace_ext4_da_writepages(inode, wbc);
@ -2128,6 +2251,7 @@ static int ext4_da_writepages(struct address_space *mapping,
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag_pages_for_writeback(mapping, index, end);
blk_start_plug(&plug);
while (!ret && wbc->nr_to_write > 0) {
/*
@ -2178,11 +2302,12 @@ static int ext4_da_writepages(struct address_space *mapping,
ret = 0;
} else if (ret == MPAGE_DA_EXTENT_TAIL) {
/*
* got one extent now try with
* rest of the pages
* Got one extent now try with rest of the pages.
* If mpd.retval is set -EIO, journal is aborted.
* So we don't need to write any more.
*/
pages_written += mpd.pages_written;
ret = 0;
ret = mpd.retval;
io_done = 1;
} else if (wbc->nr_to_write)
/*
@ -2192,6 +2317,7 @@ static int ext4_da_writepages(struct address_space *mapping,
*/
break;
}
blk_finish_plug(&plug);
if (!io_done && !cycled) {
cycled = 1;
index = 0;
@ -2230,10 +2356,11 @@ static int ext4_nonda_switch(struct super_block *sb)
* Delalloc need an accurate free block accounting. So switch
* to non delalloc when we are near to error range.
*/
free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter);
free_blocks = EXT4_C2B(sbi,
percpu_counter_read_positive(&sbi->s_freeclusters_counter));
dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
if (2 * free_blocks < 3 * dirty_blocks ||
free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
/*
* free block count is less than 150% of dirty blocks
* or free blocks is less than watermark
@ -2259,6 +2386,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index;
struct inode *inode = mapping->host;
handle_t *handle;
loff_t page_len;
index = pos >> PAGE_CACHE_SHIFT;
@ -2305,6 +2433,13 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
*/
if (pos + len > inode->i_size)
ext4_truncate_failed_write(inode);
} else {
page_len = pos & (PAGE_CACHE_SIZE - 1);
if (page_len > 0) {
ret = ext4_discard_partial_page_buffers_no_lock(handle,
inode, page, pos - page_len, page_len,
EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
}
}
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@ -2347,6 +2482,7 @@ static int ext4_da_write_end(struct file *file,
loff_t new_i_size;
unsigned long start, end;
int write_mode = (int)(unsigned long)fsdata;
loff_t page_len;
if (write_mode == FALL_BACK_TO_NONDELALLOC) {
if (ext4_should_order_data(inode)) {
@ -2395,6 +2531,16 @@ static int ext4_da_write_end(struct file *file,
}
ret2 = generic_write_end(file, mapping, pos, len, copied,
page, fsdata);
page_len = PAGE_CACHE_SIZE -
((pos + copied - 1) & (PAGE_CACHE_SIZE - 1));
if (page_len > 0) {
ret = ext4_discard_partial_page_buffers_no_lock(handle,
inode, page, pos + copied - 1, page_len,
EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
}
copied = ret2;
if (ret2 < 0)
ret = ret2;
@ -2689,10 +2835,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
* but being more careful is always safe for the future change.
*/
inode = io_end->inode;
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
io_end->flag |= EXT4_IO_END_UNWRITTEN;
atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
}
ext4_set_io_unwritten_flag(inode, io_end);
/* Add the io_end to per-inode completed io list*/
spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
@ -2858,6 +3001,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
struct inode *inode = file->f_mapping->host;
ssize_t ret;
/*
* If we are doing data journalling we don't support O_DIRECT
*/
if (ext4_should_journal_data(inode))
return 0;
trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
@ -2927,6 +3076,7 @@ static const struct address_space_operations ext4_journalled_aops = {
.bmap = ext4_bmap,
.invalidatepage = ext4_invalidatepage,
.releasepage = ext4_releasepage,
.direct_IO = ext4_direct_IO,
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
};
@ -2963,6 +3113,227 @@ void ext4_set_aops(struct inode *inode)
inode->i_mapping->a_ops = &ext4_journalled_aops;
}
/*
* ext4_discard_partial_page_buffers()
* Wrapper function for ext4_discard_partial_page_buffers_no_lock.
* This function finds and locks the page containing the offset
* "from" and passes it to ext4_discard_partial_page_buffers_no_lock.
* Calling functions that already have the page locked should call
* ext4_discard_partial_page_buffers_no_lock directly.
*/
int ext4_discard_partial_page_buffers(handle_t *handle,
struct address_space *mapping, loff_t from,
loff_t length, int flags)
{
struct inode *inode = mapping->host;
struct page *page;
int err = 0;
page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
mapping_gfp_mask(mapping) & ~__GFP_FS);
if (!page)
return -ENOMEM;
err = ext4_discard_partial_page_buffers_no_lock(handle, inode, page,
from, length, flags);
unlock_page(page);
page_cache_release(page);
return err;
}
/*
* ext4_discard_partial_page_buffers_no_lock()
* Zeros a page range of length 'length' starting from offset 'from'.
* Buffer heads that correspond to the block aligned regions of the
* zeroed range will be unmapped. Unblock aligned regions
* will have the corresponding buffer head mapped if needed so that
* that region of the page can be updated with the partial zero out.
*
* This function assumes that the page has already been locked. The
* The range to be discarded must be contained with in the given page.
* If the specified range exceeds the end of the page it will be shortened
* to the end of the page that corresponds to 'from'. This function is
* appropriate for updating a page and it buffer heads to be unmapped and
* zeroed for blocks that have been either released, or are going to be
* released.
*
* handle: The journal handle
* inode: The files inode
* page: A locked page that contains the offset "from"
* from: The starting byte offset (from the begining of the file)
* to begin discarding
* len: The length of bytes to discard
* flags: Optional flags that may be used:
*
* EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED
* Only zero the regions of the page whose buffer heads
* have already been unmapped. This flag is appropriate
* for updateing the contents of a page whose blocks may
* have already been released, and we only want to zero
* out the regions that correspond to those released blocks.
*
* Returns zero on sucess or negative on failure.
*/
int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
struct inode *inode, struct page *page, loff_t from,
loff_t length, int flags)
{
ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
unsigned int offset = from & (PAGE_CACHE_SIZE-1);
unsigned int blocksize, max, pos;
ext4_lblk_t iblock;
struct buffer_head *bh;
int err = 0;
blocksize = inode->i_sb->s_blocksize;
max = PAGE_CACHE_SIZE - offset;
if (index != page->index)
return -EINVAL;
/*
* correct length if it does not fall between
* 'from' and the end of the page
*/
if (length > max || length < 0)
length = max;
iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
if (!page_has_buffers(page)) {
/*
* If the range to be discarded covers a partial block
* we need to get the page buffers. This is because
* partial blocks cannot be released and the page needs
* to be updated with the contents of the block before
* we write the zeros on top of it.
*/
if ((from & (blocksize - 1)) ||
((from + length) & (blocksize - 1))) {
create_empty_buffers(page, blocksize, 0);
} else {
/*
* If there are no partial blocks,
* there is nothing to update,
* so we can return now
*/
return 0;
}
}
/* Find the buffer that contains "offset" */
bh = page_buffers(page);
pos = blocksize;
while (offset >= pos) {
bh = bh->b_this_page;
iblock++;
pos += blocksize;
}
pos = offset;
while (pos < offset + length) {
unsigned int end_of_block, range_to_discard;
err = 0;
/* The length of space left to zero and unmap */
range_to_discard = offset + length - pos;
/* The length of space until the end of the block */
end_of_block = blocksize - (pos & (blocksize-1));
/*
* Do not unmap or zero past end of block
* for this buffer head
*/
if (range_to_discard > end_of_block)
range_to_discard = end_of_block;
/*
* Skip this buffer head if we are only zeroing unampped
* regions of the page
*/
if (flags & EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED &&
buffer_mapped(bh))
goto next;
/* If the range is block aligned, unmap */
if (range_to_discard == blocksize) {
clear_buffer_dirty(bh);
bh->b_bdev = NULL;
clear_buffer_mapped(bh);
clear_buffer_req(bh);
clear_buffer_new(bh);
clear_buffer_delay(bh);
clear_buffer_unwritten(bh);
clear_buffer_uptodate(bh);
zero_user(page, pos, range_to_discard);
BUFFER_TRACE(bh, "Buffer discarded");
goto next;
}
/*
* If this block is not completely contained in the range
* to be discarded, then it is not going to be released. Because
* we need to keep this block, we need to make sure this part
* of the page is uptodate before we modify it by writeing
* partial zeros on it.
*/
if (!buffer_mapped(bh)) {
/*
* Buffer head must be mapped before we can read
* from the block
*/
BUFFER_TRACE(bh, "unmapped");
ext4_get_block(inode, iblock, bh, 0);
/* unmapped? It's a hole - nothing to do */
if (!buffer_mapped(bh)) {
BUFFER_TRACE(bh, "still unmapped");
goto next;
}
}
/* Ok, it's mapped. Make sure it's up-to-date */
if (PageUptodate(page))
set_buffer_uptodate(bh);
if (!buffer_uptodate(bh)) {
err = -EIO;
ll_rw_block(READ, 1, &bh);
wait_on_buffer(bh);
/* Uhhuh. Read error. Complain and punt.*/
if (!buffer_uptodate(bh))
goto next;
}
if (ext4_should_journal_data(inode)) {
BUFFER_TRACE(bh, "get write access");
err = ext4_journal_get_write_access(handle, bh);
if (err)
goto next;
}
zero_user(page, pos, range_to_discard);
err = 0;
if (ext4_should_journal_data(inode)) {
err = ext4_handle_dirty_metadata(handle, inode, bh);
} else
mark_buffer_dirty(bh);
BUFFER_TRACE(bh, "Partial buffer zeroed");
next:
bh = bh->b_this_page;
iblock++;
pos += range_to_discard;
}
return err;
}
/*
* ext4_block_truncate_page() zeroes out a mapping from file offset `from'
* up to the end of the block which corresponds to `from'.
@ -3005,7 +3376,7 @@ int ext4_block_zero_page_range(handle_t *handle,
page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
mapping_gfp_mask(mapping) & ~__GFP_FS);
if (!page)
return -EINVAL;
return -ENOMEM;
blocksize = inode->i_sb->s_blocksize;
max = blocksize - (offset & (blocksize - 1));
@ -3074,11 +3445,8 @@ int ext4_block_zero_page_range(handle_t *handle,
err = 0;
if (ext4_should_journal_data(inode)) {
err = ext4_handle_dirty_metadata(handle, inode, bh);
} else {
if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode)
err = ext4_jbd2_file_inode(handle, inode);
} else
mark_buffer_dirty(bh);
}
unlock:
unlock_page(page);
@ -3119,6 +3487,11 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
return -ENOTSUPP;
}
if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
/* TODO: Add support for bigalloc file systems */
return -ENOTSUPP;
}
return ext4_ext_punch_hole(file, offset, length);
}
@ -4420,6 +4793,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
unlock_page(page);
ret = VM_FAULT_SIGBUS;
ext4_journal_stop(handle);
goto out;
}
ext4_set_inode_state(inode, EXT4_STATE_JDATA);

View file

@ -21,6 +21,7 @@
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = filp->f_dentry->d_inode;
struct super_block *sb = inode->i_sb;
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int flags;
@ -173,33 +174,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
mnt_drop_write(filp->f_path.mnt);
return err;
}
#ifdef CONFIG_JBD2_DEBUG
case EXT4_IOC_WAIT_FOR_READONLY:
/*
* This is racy - by the time we're woken up and running,
* the superblock could be released. And the module could
* have been unloaded. So sue me.
*
* Returns 1 if it slept, else zero.
*/
{
struct super_block *sb = inode->i_sb;
DECLARE_WAITQUEUE(wait, current);
int ret = 0;
set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait);
if (timer_pending(&EXT4_SB(sb)->turn_ro_timer)) {
schedule();
ret = 1;
}
remove_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait);
return ret;
}
#endif
case EXT4_IOC_GROUP_EXTEND: {
ext4_fsblk_t n_blocks_count;
struct super_block *sb = inode->i_sb;
int err, err2=0;
err = ext4_resize_begin(sb);
@ -209,6 +185,13 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (get_user(n_blocks_count, (__u32 __user *)arg))
return -EFAULT;
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
ext4_msg(sb, KERN_ERR,
"Online resizing not supported with bigalloc");
return -EOPNOTSUPP;
}
err = mnt_want_write(filp->f_path.mnt);
if (err)
return err;
@ -250,6 +233,13 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
goto mext_out;
}
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
ext4_msg(sb, KERN_ERR,
"Online defrag not supported with bigalloc");
return -EOPNOTSUPP;
}
err = mnt_want_write(filp->f_path.mnt);
if (err)
goto mext_out;
@ -270,7 +260,6 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case EXT4_IOC_GROUP_ADD: {
struct ext4_new_group_data input;
struct super_block *sb = inode->i_sb;
int err, err2=0;
err = ext4_resize_begin(sb);
@ -281,6 +270,13 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
sizeof(input)))
return -EFAULT;
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
ext4_msg(sb, KERN_ERR,
"Online resizing not supported with bigalloc");
return -EOPNOTSUPP;
}
err = mnt_want_write(filp->f_path.mnt);
if (err)
return err;
@ -337,7 +333,6 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case FITRIM:
{
struct super_block *sb = inode->i_sb;
struct request_queue *q = bdev_get_queue(sb->s_bdev);
struct fstrim_range range;
int ret = 0;
@ -348,7 +343,14 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (!blk_queue_discard(q))
return -EOPNOTSUPP;
if (copy_from_user(&range, (struct fstrim_range *)arg,
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
ext4_msg(sb, KERN_ERR,
"FITRIM not supported with bigalloc");
return -EOPNOTSUPP;
}
if (copy_from_user(&range, (struct fstrim_range __user *)arg,
sizeof(range)))
return -EFAULT;
@ -358,7 +360,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (ret < 0)
return ret;
if (copy_to_user((struct fstrim_range *)arg, &range,
if (copy_to_user((struct fstrim_range __user *)arg, &range,
sizeof(range)))
return -EFAULT;
@ -396,11 +398,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case EXT4_IOC32_SETVERSION_OLD:
cmd = EXT4_IOC_SETVERSION_OLD;
break;
#ifdef CONFIG_JBD2_DEBUG
case EXT4_IOC32_WAIT_FOR_READONLY:
cmd = EXT4_IOC_WAIT_FOR_READONLY;
break;
#endif
case EXT4_IOC32_GETRSVSZ:
cmd = EXT4_IOC_GETRSVSZ;
break;

View file

@ -70,8 +70,8 @@
*
* pa_lstart -> the logical start block for this prealloc space
* pa_pstart -> the physical start block for this prealloc space
* pa_len -> length for this prealloc space
* pa_free -> free space available in this prealloc space
* pa_len -> length for this prealloc space (in clusters)
* pa_free -> free space available in this prealloc space (in clusters)
*
* The inode preallocation space is used looking at the _logical_ start
* block. If only the logical file block falls within the range of prealloc
@ -126,7 +126,8 @@
* list. In case of inode preallocation we follow a list of heuristics
* based on file size. This can be found in ext4_mb_normalize_request. If
* we are doing a group prealloc we try to normalize the request to
* sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is
* sbi->s_mb_group_prealloc. The default value of s_mb_group_prealloc is
* dependent on the cluster size; for non-bigalloc file systems, it is
* 512 blocks. This can be tuned via
* /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in
* terms of number of blocks. If we have mounted the file system with -O
@ -459,7 +460,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
ext4_fsblk_t blocknr;
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
blocknr += first + i;
blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
ext4_grp_locked_error(sb, e4b->bd_group,
inode ? inode->i_ino : 0,
blocknr,
@ -580,7 +581,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
continue;
}
/* both bits in buddy2 must be 0 */
/* both bits in buddy2 must be 1 */
MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
@ -653,7 +654,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
ext4_grpblk_t chunk;
unsigned short border;
BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb));
BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
border = 2 << sb->s_blocksize_bits;
@ -705,7 +706,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
void *buddy, void *bitmap, ext4_group_t group)
{
struct ext4_group_info *grp = ext4_get_group_info(sb, group);
ext4_grpblk_t max = EXT4_BLOCKS_PER_GROUP(sb);
ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
ext4_grpblk_t i = 0;
ext4_grpblk_t first;
ext4_grpblk_t len;
@ -734,7 +735,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
if (free != grp->bb_free) {
ext4_grp_locked_error(sb, group, 0, 0,
"%u blocks in bitmap, %u in gd",
"%u clusters in bitmap, %u in gd",
free, grp->bb_free);
/*
* If we intent to continue, we consider group descritor
@ -1339,7 +1340,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
ext4_fsblk_t blocknr;
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
blocknr += block;
blocknr += EXT4_C2B(EXT4_SB(sb), block);
ext4_grp_locked_error(sb, e4b->bd_group,
inode ? inode->i_ino : 0,
blocknr,
@ -1390,7 +1391,6 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
{
int next = block;
int max;
int ord;
void *buddy;
assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
@ -1432,9 +1432,8 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
if (mb_test_bit(next, EXT4_MB_BITMAP(e4b)))
break;
ord = mb_find_order_for_block(e4b, next);
order = mb_find_order_for_block(e4b, next);
order = ord;
block = next >> order;
ex->fe_len += 1 << order;
}
@ -1624,8 +1623,8 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
struct ext4_free_extent *gex = &ac->ac_g_ex;
BUG_ON(ex->fe_len <= 0);
BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
ac->ac_found++;
@ -1823,15 +1822,15 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
while (free && ac->ac_status == AC_STATUS_CONTINUE) {
i = mb_find_next_zero_bit(bitmap,
EXT4_BLOCKS_PER_GROUP(sb), i);
if (i >= EXT4_BLOCKS_PER_GROUP(sb)) {
EXT4_CLUSTERS_PER_GROUP(sb), i);
if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
/*
* IF we have corrupt bitmap, we won't find any
* free blocks even though group info says we
* we have free blocks
*/
ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
"%d free blocks as per "
"%d free clusters as per "
"group info. But bitmap says 0",
free);
break;
@ -1841,7 +1840,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
BUG_ON(ex.fe_len <= 0);
if (free < ex.fe_len) {
ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
"%d free blocks as per "
"%d free clusters as per "
"group info. But got %d blocks",
free, ex.fe_len);
/*
@ -1887,7 +1886,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
do_div(a, sbi->s_stripe);
i = (a * sbi->s_stripe) - first_group_block;
while (i < EXT4_BLOCKS_PER_GROUP(sb)) {
while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
if (!mb_test_bit(i, bitmap)) {
max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
if (max >= sbi->s_stripe) {
@ -2252,10 +2251,10 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
*/
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
meta_group_info[i]->bb_free =
ext4_free_blocks_after_init(sb, group, desc);
ext4_free_clusters_after_init(sb, group, desc);
} else {
meta_group_info[i]->bb_free =
ext4_free_blks_count(sb, desc);
ext4_free_group_clusters(sb, desc);
}
INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
@ -2473,7 +2472,20 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
sbi->s_mb_stats = MB_DEFAULT_STATS;
sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
/*
* The default group preallocation is 512, which for 4k block
* sizes translates to 2 megabytes. However for bigalloc file
* systems, this is probably too big (i.e, if the cluster size
* is 1 megabyte, then group preallocation size becomes half a
* gigabyte!). As a default, we will keep a two megabyte
* group pralloc size for cluster sizes up to 64k, and after
* that, we will force a minimum group preallocation size of
* 32 clusters. This translates to 8 megs when the cluster
* size is 256k, and 32 megs when the cluster size is 1 meg,
* which seems reasonable as a default.
*/
sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
sbi->s_cluster_bits, 32);
/*
* If there is a s_stripe > 1, then we set the s_mb_group_prealloc
* to the lowest multiple of s_stripe which is bigger than
@ -2490,7 +2502,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
if (sbi->s_locality_groups == NULL) {
ret = -ENOMEM;
goto out;
goto out_free_groupinfo_slab;
}
for_each_possible_cpu(i) {
struct ext4_locality_group *lg;
@ -2503,9 +2515,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
/* init file for buddy data */
ret = ext4_mb_init_backend(sb);
if (ret != 0) {
goto out;
}
if (ret != 0)
goto out_free_locality_groups;
if (sbi->s_proc)
proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
@ -2513,11 +2524,19 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
if (sbi->s_journal)
sbi->s_journal->j_commit_callback = release_blocks_on_commit;
return 0;
out_free_locality_groups:
free_percpu(sbi->s_locality_groups);
sbi->s_locality_groups = NULL;
out_free_groupinfo_slab:
ext4_groupinfo_destroy_slabs();
out:
if (ret) {
kfree(sbi->s_mb_offsets);
kfree(sbi->s_mb_maxs);
}
kfree(sbi->s_mb_offsets);
sbi->s_mb_offsets = NULL;
kfree(sbi->s_mb_maxs);
sbi->s_mb_maxs = NULL;
return ret;
}
@ -2602,11 +2621,13 @@ int ext4_mb_release(struct super_block *sb)
}
static inline int ext4_issue_discard(struct super_block *sb,
ext4_group_t block_group, ext4_grpblk_t block, int count)
ext4_group_t block_group, ext4_grpblk_t cluster, int count)
{
ext4_fsblk_t discard_block;
discard_block = block + ext4_group_first_block_no(sb, block_group);
discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
ext4_group_first_block_no(sb, block_group));
count = EXT4_C2B(EXT4_SB(sb), count);
trace_ext4_discard_blocks(sb,
(unsigned long long) discard_block, count);
return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
@ -2633,7 +2654,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
if (test_opt(sb, DISCARD))
ext4_issue_discard(sb, entry->group,
entry->start_blk, entry->count);
entry->start_cluster, entry->count);
err = ext4_mb_load_buddy(sb, entry->group, &e4b);
/* we expect to find existing buddy because it's pinned */
@ -2646,7 +2667,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
ext4_lock_group(sb, entry->group);
/* Take it out of per group rb tree */
rb_erase(&entry->node, &(db->bb_free_root));
mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count);
/*
* Clear the trimmed flag for the group so that the next
@ -2752,7 +2773,7 @@ void ext4_exit_mballoc(void)
*/
static noinline_for_stack int
ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
handle_t *handle, unsigned int reserv_blks)
handle_t *handle, unsigned int reserv_clstrs)
{
struct buffer_head *bitmap_bh = NULL;
struct ext4_group_desc *gdp;
@ -2783,7 +2804,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
goto out_err;
ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
ext4_free_blks_count(sb, gdp));
ext4_free_group_clusters(sb, gdp));
err = ext4_journal_get_write_access(handle, gdp_bh);
if (err)
@ -2791,7 +2812,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
len = ac->ac_b_ex.fe_len;
len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
if (!ext4_data_block_valid(sbi, block, len)) {
ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
"fs metadata\n", block, block+len);
@ -2823,28 +2844,29 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
ac->ac_b_ex.fe_len);
if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
ext4_free_blks_set(sb, gdp,
ext4_free_blocks_after_init(sb,
ac->ac_b_ex.fe_group, gdp));
ext4_free_group_clusters_set(sb, gdp,
ext4_free_clusters_after_init(sb,
ac->ac_b_ex.fe_group, gdp));
}
len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
ext4_free_blks_set(sb, gdp, len);
len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
ext4_free_group_clusters_set(sb, gdp, len);
gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
/*
* Now reduce the dirty block count also. Should not go negative
*/
if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
/* release all the reserved blocks if non delalloc */
percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
percpu_counter_sub(&sbi->s_dirtyclusters_counter,
reserv_clstrs);
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi,
ac->ac_b_ex.fe_group);
atomic_sub(ac->ac_b_ex.fe_len,
&sbi->s_flex_groups[flex_group].free_blocks);
&sbi->s_flex_groups[flex_group].free_clusters);
}
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@ -2886,6 +2908,7 @@ static noinline_for_stack void
ext4_mb_normalize_request(struct ext4_allocation_context *ac,
struct ext4_allocation_request *ar)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
int bsbits, max;
ext4_lblk_t end;
loff_t size, orig_size, start_off;
@ -2916,7 +2939,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
/* first, let's learn actual file size
* given current request is allocated */
size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
size = size << bsbits;
if (size < i_size_read(ac->ac_inode))
size = i_size_read(ac->ac_inode);
@ -2988,7 +3011,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
continue;
}
pa_end = pa->pa_lstart + pa->pa_len;
pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
pa->pa_len);
/* PA must not overlap original request */
BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
@ -3018,9 +3042,11 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
rcu_read_lock();
list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
ext4_lblk_t pa_end;
spin_lock(&pa->pa_lock);
if (pa->pa_deleted == 0) {
pa_end = pa->pa_lstart + pa->pa_len;
pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
pa->pa_len);
BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
}
spin_unlock(&pa->pa_lock);
@ -3036,14 +3062,14 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
}
BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
start > ac->ac_o_ex.fe_logical);
BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
/* now prepare goal request */
/* XXX: is it better to align blocks WRT to logical
* placement or satisfy big request as is */
ac->ac_g_ex.fe_logical = start;
ac->ac_g_ex.fe_len = size;
ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
/* define goal start in order to merge */
if (ar->pright && (ar->lright == (start + size))) {
@ -3112,14 +3138,16 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
struct ext4_prealloc_space *pa)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
ext4_fsblk_t start;
ext4_fsblk_t end;
int len;
/* found preallocated blocks, use them */
start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
end = min(pa->pa_pstart + pa->pa_len, start + ac->ac_o_ex.fe_len);
len = end - start;
end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
len = EXT4_NUM_B2C(sbi, end - start);
ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
&ac->ac_b_ex.fe_start);
ac->ac_b_ex.fe_len = len;
@ -3127,7 +3155,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
ac->ac_pa = pa;
BUG_ON(start < pa->pa_pstart);
BUG_ON(start + len > pa->pa_pstart + pa->pa_len);
BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
BUG_ON(pa->pa_free < len);
pa->pa_free -= len;
@ -3193,6 +3221,7 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
static noinline_for_stack int
ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
int order, i;
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
struct ext4_locality_group *lg;
@ -3210,12 +3239,14 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
/* all fields in this condition don't change,
* so we can skip locking for them */
if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len)
ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
EXT4_C2B(sbi, pa->pa_len)))
continue;
/* non-extent files can't have physical blocks past 2^32 */
if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS)
(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
EXT4_MAX_BLOCK_FILE_PHYS))
continue;
/* found preallocated blocks, use them */
@ -3291,7 +3322,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
while (n) {
entry = rb_entry(n, struct ext4_free_data, node);
ext4_set_bits(bitmap, entry->start_blk, entry->count);
ext4_set_bits(bitmap, entry->start_cluster, entry->count);
n = rb_next(n);
}
return;
@ -3312,7 +3343,6 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t groupnr;
ext4_grpblk_t start;
int preallocated = 0;
int count = 0;
int len;
/* all form of preallocation discards first load group,
@ -3335,7 +3365,6 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
BUG_ON(groupnr != group);
ext4_set_bits(bitmap, start, len);
preallocated += len;
count++;
}
mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
}
@ -3412,6 +3441,7 @@ static noinline_for_stack int
ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
{
struct super_block *sb = ac->ac_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_prealloc_space *pa;
struct ext4_group_info *grp;
struct ext4_inode_info *ei;
@ -3443,16 +3473,18 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
/* also, we should cover whole original request */
wins = ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len;
wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
/* the smallest one defines real window */
win = min(winl, wins);
offs = ac->ac_o_ex.fe_logical % ac->ac_b_ex.fe_len;
offs = ac->ac_o_ex.fe_logical %
EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
if (offs && offs < win)
win = offs;
ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - win;
ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
EXT4_B2C(sbi, win);
BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
}
@ -3477,7 +3509,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
trace_ext4_mb_new_inode_pa(ac, pa);
ext4_mb_use_inode_pa(ac, pa);
atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
ei = EXT4_I(ac->ac_inode);
grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
@ -3592,7 +3624,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
BUG_ON(pa->pa_deleted == 0);
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
grp_blk_start = pa->pa_pstart - bit;
grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
end = bit + pa->pa_len;
@ -3607,7 +3639,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
free += next - bit;
trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
trace_ext4_mb_release_inode_pa(pa, grp_blk_start + bit,
trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
EXT4_C2B(sbi, bit)),
next - bit);
mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
bit = next + 1;
@ -3690,7 +3723,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
}
if (needed == 0)
needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
INIT_LIST_HEAD(&list);
repeat:
@ -3958,7 +3991,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
return;
size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
>> bsbits;
@ -3969,6 +4002,11 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
return;
}
if (sbi->s_mb_group_prealloc <= 0) {
ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
return;
}
/* don't use group allocation for large files */
size = max(size, isize);
if (size > sbi->s_mb_stream_request) {
@ -4007,8 +4045,8 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
len = ar->len;
/* just a dirty hack to filter too big requests */
if (len >= EXT4_BLOCKS_PER_GROUP(sb) - 10)
len = EXT4_BLOCKS_PER_GROUP(sb) - 10;
if (len >= EXT4_CLUSTERS_PER_GROUP(sb) - 10)
len = EXT4_CLUSTERS_PER_GROUP(sb) - 10;
/* start searching from the goal */
goal = ar->goal;
@ -4019,18 +4057,15 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
/* set up allocation goals */
memset(ac, 0, sizeof(struct ext4_allocation_context));
ac->ac_b_ex.fe_logical = ar->logical;
ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
ac->ac_status = AC_STATUS_CONTINUE;
ac->ac_sb = sb;
ac->ac_inode = ar->inode;
ac->ac_o_ex.fe_logical = ar->logical;
ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
ac->ac_o_ex.fe_group = group;
ac->ac_o_ex.fe_start = block;
ac->ac_o_ex.fe_len = len;
ac->ac_g_ex.fe_logical = ar->logical;
ac->ac_g_ex.fe_group = group;
ac->ac_g_ex.fe_start = block;
ac->ac_g_ex.fe_len = len;
ac->ac_g_ex = ac->ac_o_ex;
ac->ac_flags = ar->flags;
/* we have to define context: we'll we work with a file or
@ -4182,13 +4217,14 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
*/
static int ext4_mb_release_context(struct ext4_allocation_context *ac)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_prealloc_space *pa = ac->ac_pa;
if (pa) {
if (pa->pa_type == MB_GROUP_PA) {
/* see comment in ext4_mb_use_group_pa() */
spin_lock(&pa->pa_lock);
pa->pa_pstart += ac->ac_b_ex.fe_len;
pa->pa_lstart += ac->ac_b_ex.fe_len;
pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
pa->pa_free -= ac->ac_b_ex.fe_len;
pa->pa_len -= ac->ac_b_ex.fe_len;
spin_unlock(&pa->pa_lock);
@ -4249,13 +4285,17 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
struct super_block *sb;
ext4_fsblk_t block = 0;
unsigned int inquota = 0;
unsigned int reserv_blks = 0;
unsigned int reserv_clstrs = 0;
sb = ar->inode->i_sb;
sbi = EXT4_SB(sb);
trace_ext4_request_blocks(ar);
/* Allow to use superuser reservation for quota file */
if (IS_NOQUOTA(ar->inode))
ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
/*
* For delayed allocation, we could skip the ENOSPC and
* EDQUOT check, as blocks and quotas have been already
@ -4269,7 +4309,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
* and verify allocation doesn't exceed the quota limits.
*/
while (ar->len &&
ext4_claim_free_blocks(sbi, ar->len, ar->flags)) {
ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
/* let others to free the space */
yield();
@ -4279,12 +4319,14 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
*errp = -ENOSPC;
return 0;
}
reserv_blks = ar->len;
reserv_clstrs = ar->len;
if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
dquot_alloc_block_nofail(ar->inode, ar->len);
dquot_alloc_block_nofail(ar->inode,
EXT4_C2B(sbi, ar->len));
} else {
while (ar->len &&
dquot_alloc_block(ar->inode, ar->len)) {
dquot_alloc_block(ar->inode,
EXT4_C2B(sbi, ar->len))) {
ar->flags |= EXT4_MB_HINT_NOPREALLOC;
ar->len--;
@ -4328,7 +4370,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
ext4_mb_new_preallocation(ac);
}
if (likely(ac->ac_status == AC_STATUS_FOUND)) {
*errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
*errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
if (*errp == -EAGAIN) {
/*
* drop the reference that we took
@ -4364,13 +4406,13 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
if (ac)
kmem_cache_free(ext4_ac_cachep, ac);
if (inquota && ar->len < inquota)
dquot_free_block(ar->inode, inquota - ar->len);
dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
if (!ar->len) {
if (!ext4_test_inode_state(ar->inode,
EXT4_STATE_DELALLOC_RESERVED))
/* release all the reserved blocks if non delalloc */
percpu_counter_sub(&sbi->s_dirtyblocks_counter,
reserv_blks);
percpu_counter_sub(&sbi->s_dirtyclusters_counter,
reserv_clstrs);
}
trace_ext4_allocate_blocks(ar, (unsigned long long)block);
@ -4388,7 +4430,7 @@ static int can_merge(struct ext4_free_data *entry1,
{
if ((entry1->t_tid == entry2->t_tid) &&
(entry1->group == entry2->group) &&
((entry1->start_blk + entry1->count) == entry2->start_blk))
((entry1->start_cluster + entry1->count) == entry2->start_cluster))
return 1;
return 0;
}
@ -4398,7 +4440,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
struct ext4_free_data *new_entry)
{
ext4_group_t group = e4b->bd_group;
ext4_grpblk_t block;
ext4_grpblk_t cluster;
struct ext4_free_data *entry;
struct ext4_group_info *db = e4b->bd_info;
struct super_block *sb = e4b->bd_sb;
@ -4411,7 +4453,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
BUG_ON(e4b->bd_buddy_page == NULL);
new_node = &new_entry->node;
block = new_entry->start_blk;
cluster = new_entry->start_cluster;
if (!*n) {
/* first free block exent. We need to
@ -4425,13 +4467,14 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
while (*n) {
parent = *n;
entry = rb_entry(parent, struct ext4_free_data, node);
if (block < entry->start_blk)
if (cluster < entry->start_cluster)
n = &(*n)->rb_left;
else if (block >= (entry->start_blk + entry->count))
else if (cluster >= (entry->start_cluster + entry->count))
n = &(*n)->rb_right;
else {
ext4_grp_locked_error(sb, group, 0,
ext4_group_first_block_no(sb, group) + block,
ext4_group_first_block_no(sb, group) +
EXT4_C2B(sbi, cluster),
"Block already on to-be-freed list");
return 0;
}
@ -4445,7 +4488,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
if (node) {
entry = rb_entry(node, struct ext4_free_data, node);
if (can_merge(entry, new_entry)) {
new_entry->start_blk = entry->start_blk;
new_entry->start_cluster = entry->start_cluster;
new_entry->count += entry->count;
rb_erase(node, &(db->bb_free_root));
spin_lock(&sbi->s_md_lock);
@ -4496,6 +4539,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
ext4_group_t block_group;
struct ext4_sb_info *sbi;
struct ext4_buddy e4b;
unsigned int count_clusters;
int err = 0;
int ret;
@ -4544,6 +4588,38 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
if (!ext4_should_writeback_data(inode))
flags |= EXT4_FREE_BLOCKS_METADATA;
/*
* If the extent to be freed does not begin on a cluster
* boundary, we need to deal with partial clusters at the
* beginning and end of the extent. Normally we will free
* blocks at the beginning or the end unless we are explicitly
* requested to avoid doing so.
*/
overflow = block & (sbi->s_cluster_ratio - 1);
if (overflow) {
if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
overflow = sbi->s_cluster_ratio - overflow;
block += overflow;
if (count > overflow)
count -= overflow;
else
return;
} else {
block -= overflow;
count += overflow;
}
}
overflow = count & (sbi->s_cluster_ratio - 1);
if (overflow) {
if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
if (count > overflow)
count -= overflow;
else
return;
} else
count += sbi->s_cluster_ratio - overflow;
}
do_more:
overflow = 0;
ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@ -4552,10 +4628,12 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
* Check to see if we are freeing blocks across a group
* boundary.
*/
if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb);
if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
overflow = EXT4_C2B(sbi, bit) + count -
EXT4_BLOCKS_PER_GROUP(sb);
count -= overflow;
}
count_clusters = EXT4_B2C(sbi, count);
bitmap_bh = ext4_read_block_bitmap(sb, block_group);
if (!bitmap_bh) {
err = -EIO;
@ -4570,9 +4648,9 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
in_range(block, ext4_inode_table(sb, gdp),
EXT4_SB(sb)->s_itb_per_group) ||
EXT4_SB(sb)->s_itb_per_group) ||
in_range(block + count - 1, ext4_inode_table(sb, gdp),
EXT4_SB(sb)->s_itb_per_group)) {
EXT4_SB(sb)->s_itb_per_group)) {
ext4_error(sb, "Freeing blocks in system zone - "
"Block = %llu, count = %lu", block, count);
@ -4597,11 +4675,11 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
#ifdef AGGRESSIVE_CHECK
{
int i;
for (i = 0; i < count; i++)
for (i = 0; i < count_clusters; i++)
BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
}
#endif
trace_ext4_mballoc_free(sb, inode, block_group, bit, count);
trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
err = ext4_mb_load_buddy(sb, block_group, &e4b);
if (err)
@ -4618,13 +4696,13 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
err = -ENOMEM;
goto error_return;
}
new_entry->start_blk = bit;
new_entry->start_cluster = bit;
new_entry->group = block_group;
new_entry->count = count;
new_entry->count = count_clusters;
new_entry->t_tid = handle->h_transaction->t_tid;
ext4_lock_group(sb, block_group);
mb_clear_bits(bitmap_bh->b_data, bit, count);
mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
ext4_mb_free_metadata(handle, &e4b, new_entry);
} else {
/* need to update group_info->bb_free and bitmap
@ -4632,25 +4710,29 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
* them with group lock_held
*/
ext4_lock_group(sb, block_group);
mb_clear_bits(bitmap_bh->b_data, bit, count);
mb_free_blocks(inode, &e4b, bit, count);
mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
mb_free_blocks(inode, &e4b, bit, count_clusters);
}
ret = ext4_free_blks_count(sb, gdp) + count;
ext4_free_blks_set(sb, gdp, ret);
ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
ext4_free_group_clusters_set(sb, gdp, ret);
gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
ext4_unlock_group(sb, block_group);
percpu_counter_add(&sbi->s_freeblocks_counter, count);
percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks);
atomic_add(count_clusters,
&sbi->s_flex_groups[flex_group].free_clusters);
}
ext4_mb_unload_buddy(&e4b);
freed += count;
if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
/* We dirtied the bitmap block */
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@ -4669,8 +4751,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
}
ext4_mark_super_dirty(sb);
error_return:
if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
dquot_free_block(inode, freed);
brelse(bitmap_bh);
ext4_std_error(sb, err);
return;
@ -4778,16 +4858,17 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
ext4_lock_group(sb, block_group);
mb_clear_bits(bitmap_bh->b_data, bit, count);
mb_free_blocks(NULL, &e4b, bit, count);
blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
ext4_free_blks_set(sb, desc, blk_free_count);
blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
ext4_free_group_clusters_set(sb, desc, blk_free_count);
desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
ext4_unlock_group(sb, block_group);
percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
percpu_counter_add(&sbi->s_freeclusters_counter,
EXT4_B2C(sbi, blocks_freed));
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
atomic_add(blocks_freed,
&sbi->s_flex_groups[flex_group].free_blocks);
atomic_add(EXT4_B2C(sbi, blocks_freed),
&sbi->s_flex_groups[flex_group].free_clusters);
}
ext4_mb_unload_buddy(&e4b);
@ -4948,7 +5029,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
struct ext4_group_info *grp;
ext4_group_t first_group, last_group;
ext4_group_t group, ngroups = ext4_get_groups_count(sb);
ext4_grpblk_t cnt = 0, first_block, last_block;
ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
uint64_t start, len, minlen, trimmed = 0;
ext4_fsblk_t first_data_blk =
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
@ -4958,7 +5039,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
len = range->len >> sb->s_blocksize_bits;
minlen = range->minlen >> sb->s_blocksize_bits;
if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)))
return -EINVAL;
if (start + len <= first_data_blk)
goto out;
@ -4969,11 +5050,11 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
/* Determine first and last group to examine based on start and len */
ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
&first_group, &first_block);
&first_group, &first_cluster);
ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
&last_group, &last_block);
&last_group, &last_cluster);
last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
last_block = EXT4_BLOCKS_PER_GROUP(sb);
last_cluster = EXT4_CLUSTERS_PER_GROUP(sb);
if (first_group > last_group)
return -EINVAL;
@ -4993,20 +5074,20 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
* change it for the last group in which case start +
* len < EXT4_BLOCKS_PER_GROUP(sb).
*/
if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb))
last_block = first_block + len;
len -= last_block - first_block;
if (first_cluster + len < EXT4_CLUSTERS_PER_GROUP(sb))
last_cluster = first_cluster + len;
len -= last_cluster - first_cluster;
if (grp->bb_free >= minlen) {
cnt = ext4_trim_all_free(sb, group, first_block,
last_block, minlen);
cnt = ext4_trim_all_free(sb, group, first_cluster,
last_cluster, minlen);
if (cnt < 0) {
ret = cnt;
break;
}
}
trimmed += cnt;
first_block = 0;
first_cluster = 0;
}
range->len = trimmed * sb->s_blocksize;

View file

@ -106,7 +106,7 @@ struct ext4_free_data {
ext4_group_t group;
/* free block extent */
ext4_grpblk_t start_blk;
ext4_grpblk_t start_cluster;
ext4_grpblk_t count;
/* transaction which freed this extent */
@ -139,9 +139,9 @@ enum {
struct ext4_free_extent {
ext4_lblk_t fe_logical;
ext4_grpblk_t fe_start;
ext4_grpblk_t fe_start; /* In cluster units */
ext4_group_t fe_group;
ext4_grpblk_t fe_len;
ext4_grpblk_t fe_len; /* In cluster units */
};
/*
@ -175,7 +175,7 @@ struct ext4_allocation_context {
/* the best found extent */
struct ext4_free_extent ac_b_ex;
/* copy of the bext found extent taken before preallocation efforts */
/* copy of the best found extent taken before preallocation efforts */
struct ext4_free_extent ac_f_ex;
/* number of iterations done. we have to track to limit searching */
@ -216,6 +216,7 @@ struct ext4_buddy {
static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
struct ext4_free_extent *fex)
{
return ext4_group_first_block_no(sb, fex->fe_group) + fex->fe_start;
return ext4_group_first_block_no(sb, fex->fe_group) +
(fex->fe_start << EXT4_SB(sb)->s_cluster_bits);
}
#endif

View file

@ -15,19 +15,18 @@
#include <linux/module.h>
#include <linux/slab.h>
#include "ext4_jbd2.h"
#include "ext4_extents.h"
/*
* The contiguous blocks details which can be
* represented by a single extent
*/
struct list_blocks_struct {
ext4_lblk_t first_block, last_block;
struct migrate_struct {
ext4_lblk_t first_block, last_block, curr_block;
ext4_fsblk_t first_pblock, last_pblock;
};
static int finish_range(handle_t *handle, struct inode *inode,
struct list_blocks_struct *lb)
struct migrate_struct *lb)
{
int retval = 0, needed;
@ -87,8 +86,7 @@ static int finish_range(handle_t *handle, struct inode *inode,
}
static int update_extent_range(handle_t *handle, struct inode *inode,
ext4_fsblk_t pblock, ext4_lblk_t blk_num,
struct list_blocks_struct *lb)
ext4_fsblk_t pblock, struct migrate_struct *lb)
{
int retval;
/*
@ -96,9 +94,10 @@ static int update_extent_range(handle_t *handle, struct inode *inode,
*/
if (lb->first_pblock &&
(lb->last_pblock+1 == pblock) &&
(lb->last_block+1 == blk_num)) {
(lb->last_block+1 == lb->curr_block)) {
lb->last_pblock = pblock;
lb->last_block = blk_num;
lb->last_block = lb->curr_block;
lb->curr_block++;
return 0;
}
/*
@ -106,64 +105,49 @@ static int update_extent_range(handle_t *handle, struct inode *inode,
*/
retval = finish_range(handle, inode, lb);
lb->first_pblock = lb->last_pblock = pblock;
lb->first_block = lb->last_block = blk_num;
lb->first_block = lb->last_block = lb->curr_block;
lb->curr_block++;
return retval;
}
static int update_ind_extent_range(handle_t *handle, struct inode *inode,
ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
struct list_blocks_struct *lb)
ext4_fsblk_t pblock,
struct migrate_struct *lb)
{
struct buffer_head *bh;
__le32 *i_data;
int i, retval = 0;
ext4_lblk_t blk_count = *blk_nump;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
if (!pblock) {
/* Only update the file block number */
*blk_nump += max_entries;
return 0;
}
bh = sb_bread(inode->i_sb, pblock);
if (!bh)
return -EIO;
i_data = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++, blk_count++) {
for (i = 0; i < max_entries; i++) {
if (i_data[i]) {
retval = update_extent_range(handle, inode,
le32_to_cpu(i_data[i]),
blk_count, lb);
le32_to_cpu(i_data[i]), lb);
if (retval)
break;
} else {
lb->curr_block++;
}
}
/* Update the file block number */
*blk_nump = blk_count;
put_bh(bh);
return retval;
}
static int update_dind_extent_range(handle_t *handle, struct inode *inode,
ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
struct list_blocks_struct *lb)
ext4_fsblk_t pblock,
struct migrate_struct *lb)
{
struct buffer_head *bh;
__le32 *i_data;
int i, retval = 0;
ext4_lblk_t blk_count = *blk_nump;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
if (!pblock) {
/* Only update the file block number */
*blk_nump += max_entries * max_entries;
return 0;
}
bh = sb_bread(inode->i_sb, pblock);
if (!bh)
return -EIO;
@ -172,38 +156,28 @@ static int update_dind_extent_range(handle_t *handle, struct inode *inode,
for (i = 0; i < max_entries; i++) {
if (i_data[i]) {
retval = update_ind_extent_range(handle, inode,
le32_to_cpu(i_data[i]),
&blk_count, lb);
le32_to_cpu(i_data[i]), lb);
if (retval)
break;
} else {
/* Only update the file block number */
blk_count += max_entries;
lb->curr_block += max_entries;
}
}
/* Update the file block number */
*blk_nump = blk_count;
put_bh(bh);
return retval;
}
static int update_tind_extent_range(handle_t *handle, struct inode *inode,
ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
struct list_blocks_struct *lb)
ext4_fsblk_t pblock,
struct migrate_struct *lb)
{
struct buffer_head *bh;
__le32 *i_data;
int i, retval = 0;
ext4_lblk_t blk_count = *blk_nump;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
if (!pblock) {
/* Only update the file block number */
*blk_nump += max_entries * max_entries * max_entries;
return 0;
}
bh = sb_bread(inode->i_sb, pblock);
if (!bh)
return -EIO;
@ -212,16 +186,14 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
for (i = 0; i < max_entries; i++) {
if (i_data[i]) {
retval = update_dind_extent_range(handle, inode,
le32_to_cpu(i_data[i]),
&blk_count, lb);
le32_to_cpu(i_data[i]), lb);
if (retval)
break;
} else
} else {
/* Only update the file block number */
blk_count += max_entries * max_entries;
lb->curr_block += max_entries * max_entries;
}
}
/* Update the file block number */
*blk_nump = blk_count;
put_bh(bh);
return retval;
@ -462,12 +434,12 @@ int ext4_ext_migrate(struct inode *inode)
handle_t *handle;
int retval = 0, i;
__le32 *i_data;
ext4_lblk_t blk_count = 0;
struct ext4_inode_info *ei;
struct inode *tmp_inode = NULL;
struct list_blocks_struct lb;
struct migrate_struct lb;
unsigned long max_entries;
__u32 goal;
uid_t owner[2];
/*
* If the filesystem does not support extents, or the inode
@ -495,10 +467,12 @@ int ext4_ext_migrate(struct inode *inode)
}
goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
owner[0] = inode->i_uid;
owner[1] = inode->i_gid;
tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
S_IFREG, NULL, goal);
S_IFREG, NULL, goal, owner);
if (IS_ERR(tmp_inode)) {
retval = -ENOMEM;
retval = PTR_ERR(inode);
ext4_journal_stop(handle);
return retval;
}
@ -551,35 +525,32 @@ int ext4_ext_migrate(struct inode *inode)
/* 32 bit block address 4 bytes */
max_entries = inode->i_sb->s_blocksize >> 2;
for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) {
for (i = 0; i < EXT4_NDIR_BLOCKS; i++) {
if (i_data[i]) {
retval = update_extent_range(handle, tmp_inode,
le32_to_cpu(i_data[i]),
blk_count, &lb);
le32_to_cpu(i_data[i]), &lb);
if (retval)
goto err_out;
}
} else
lb.curr_block++;
}
if (i_data[EXT4_IND_BLOCK]) {
retval = update_ind_extent_range(handle, tmp_inode,
le32_to_cpu(i_data[EXT4_IND_BLOCK]),
&blk_count, &lb);
le32_to_cpu(i_data[EXT4_IND_BLOCK]), &lb);
if (retval)
goto err_out;
} else
blk_count += max_entries;
lb.curr_block += max_entries;
if (i_data[EXT4_DIND_BLOCK]) {
retval = update_dind_extent_range(handle, tmp_inode,
le32_to_cpu(i_data[EXT4_DIND_BLOCK]),
&blk_count, &lb);
le32_to_cpu(i_data[EXT4_DIND_BLOCK]), &lb);
if (retval)
goto err_out;
} else
blk_count += max_entries * max_entries;
lb.curr_block += max_entries * max_entries;
if (i_data[EXT4_TIND_BLOCK]) {
retval = update_tind_extent_range(handle, tmp_inode,
le32_to_cpu(i_data[EXT4_TIND_BLOCK]),
&blk_count, &lb);
le32_to_cpu(i_data[EXT4_TIND_BLOCK]), &lb);
if (retval)
goto err_out;
}

View file

@ -109,7 +109,7 @@ static int kmmpd(void *data)
mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
bdevname(bh->b_bdev, mmp->mmp_bdevname);
memcpy(mmp->mmp_nodename, init_utsname()->sysname,
memcpy(mmp->mmp_nodename, init_utsname()->nodename,
sizeof(mmp->mmp_nodename));
while (!kthread_should_stop()) {
@ -125,8 +125,9 @@ static int kmmpd(void *data)
* Don't spew too many error messages. Print one every
* (s_mmp_update_interval * 60) seconds.
*/
if (retval && (failed_writes % 60) == 0) {
ext4_error(sb, "Error writing to MMP block");
if (retval) {
if ((failed_writes % 60) == 0)
ext4_error(sb, "Error writing to MMP block");
failed_writes++;
}
@ -295,7 +296,8 @@ int ext4_multi_mount_protect(struct super_block *sb,
/*
* write a new random sequence number.
*/
mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq());
seq = mmp_new_seq();
mmp->mmp_seq = cpu_to_le32(seq);
retval = write_mmp_block(bh);
if (retval)

View file

@ -17,7 +17,6 @@
#include <linux/quotaops.h>
#include <linux/slab.h>
#include "ext4_jbd2.h"
#include "ext4_extents.h"
#include "ext4.h"
/**

View file

@ -1586,7 +1586,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
dxtrace(dx_show_index("node", frames[1].entries));
dxtrace(dx_show_index("node",
((struct dx_node *) bh2->b_data)->entries));
err = ext4_handle_dirty_metadata(handle, inode, bh2);
err = ext4_handle_dirty_metadata(handle, dir, bh2);
if (err)
goto journal_error;
brelse (bh2);
@ -1612,7 +1612,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
if (err)
goto journal_error;
}
err = ext4_handle_dirty_metadata(handle, inode, frames[0].bh);
err = ext4_handle_dirty_metadata(handle, dir, frames[0].bh);
if (err) {
ext4_std_error(inode->i_sb, err);
goto cleanup;
@ -1707,9 +1707,8 @@ static void ext4_inc_count(handle_t *handle, struct inode *inode)
*/
static void ext4_dec_count(handle_t *handle, struct inode *inode)
{
drop_nlink(inode);
if (S_ISDIR(inode->i_mode) && inode->i_nlink == 0)
inc_nlink(inode);
if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
drop_nlink(inode);
}
@ -1756,7 +1755,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
inode->i_op = &ext4_file_inode_operations;
@ -1792,7 +1791,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);
@ -1832,7 +1831,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
ext4_handle_sync(handle);
inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
&dentry->d_name, 0);
&dentry->d_name, 0, NULL);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_stop;
@ -1863,7 +1862,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
ext4_set_de_type(dir->i_sb, de, S_IFDIR);
inode->i_nlink = 2;
BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_metadata(handle, dir, dir_block);
err = ext4_handle_dirty_metadata(handle, inode, dir_block);
if (err)
goto out_clear_inode;
err = ext4_mark_inode_dirty(handle, inode);
@ -2279,7 +2278,7 @@ static int ext4_symlink(struct inode *dir,
ext4_handle_sync(handle);
inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
&dentry->d_name, 0);
&dentry->d_name, 0, NULL);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_stop;
@ -2530,7 +2529,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
cpu_to_le32(new_dir->i_ino);
BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
retval = ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
retval = ext4_handle_dirty_metadata(handle, old_inode, dir_bh);
if (retval) {
ext4_std_error(old_dir->i_sb, retval);
goto end_rename;

View file

@ -70,7 +70,6 @@ static void put_io_page(struct ext4_io_page *io_page)
void ext4_free_io_end(ext4_io_end_t *io)
{
int i;
wait_queue_head_t *wq;
BUG_ON(!io);
if (io->page)
@ -78,56 +77,43 @@ void ext4_free_io_end(ext4_io_end_t *io)
for (i = 0; i < io->num_io_pages; i++)
put_io_page(io->pages[i]);
io->num_io_pages = 0;
wq = ext4_ioend_wq(io->inode);
if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
waitqueue_active(wq))
wake_up_all(wq);
if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count))
wake_up_all(ext4_ioend_wq(io->inode));
kmem_cache_free(io_end_cachep, io);
}
/*
* check a range of space and convert unwritten extents to written.
*
* Called with inode->i_mutex; we depend on this when we manipulate
* io->flag, since we could otherwise race with ext4_flush_completed_IO()
*/
int ext4_end_io_nolock(ext4_io_end_t *io)
{
struct inode *inode = io->inode;
loff_t offset = io->offset;
ssize_t size = io->size;
wait_queue_head_t *wq;
int ret = 0;
ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
"list->prev 0x%p\n",
io, inode->i_ino, io->list.next, io->list.prev);
if (list_empty(&io->list))
return ret;
if (!(io->flag & EXT4_IO_END_UNWRITTEN))
return ret;
ret = ext4_convert_unwritten_extents(inode, offset, size);
if (ret < 0) {
printk(KERN_EMERG "%s: failed to convert unwritten "
"extents to written extents, error is %d "
"io is still on inode %lu aio dio list\n",
__func__, ret, inode->i_ino);
return ret;
ext4_msg(inode->i_sb, KERN_EMERG,
"failed to convert unwritten extents to written "
"extents -- potential data loss! "
"(inode %lu, offset %llu, size %zd, error %d)",
inode->i_ino, offset, size, ret);
}
if (io->iocb)
aio_complete(io->iocb, io->result, 0);
/* clear the DIO AIO unwritten flag */
if (io->flag & EXT4_IO_END_UNWRITTEN) {
io->flag &= ~EXT4_IO_END_UNWRITTEN;
/* Wake up anyone waiting on unwritten extent conversion */
wq = ext4_ioend_wq(io->inode);
if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) &&
waitqueue_active(wq)) {
wake_up_all(wq);
}
}
/* Wake up anyone waiting on unwritten extent conversion */
if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten))
wake_up_all(ext4_ioend_wq(io->inode));
return ret;
}
@ -140,9 +126,15 @@ static void ext4_end_io_work(struct work_struct *work)
struct inode *inode = io->inode;
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned long flags;
int ret;
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
if (list_empty(&io->list)) {
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
goto free;
}
if (!mutex_trylock(&inode->i_mutex)) {
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
/*
* Requeue the work instead of waiting so that the work
* items queued after this can be processed.
@ -159,17 +151,11 @@ static void ext4_end_io_work(struct work_struct *work)
io->flag |= EXT4_IO_END_QUEUED;
return;
}
ret = ext4_end_io_nolock(io);
if (ret < 0) {
mutex_unlock(&inode->i_mutex);
return;
}
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
if (!list_empty(&io->list))
list_del_init(&io->list);
list_del_init(&io->list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
(void) ext4_end_io_nolock(io);
mutex_unlock(&inode->i_mutex);
free:
ext4_free_io_end(io);
}
@ -350,10 +336,8 @@ static int io_submit_add_bh(struct ext4_io_submit *io,
if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
(io_end->pages[io_end->num_io_pages-1] != io_page))
goto submit_and_retry;
if (buffer_uninit(bh) && !(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
io_end->flag |= EXT4_IO_END_UNWRITTEN;
atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
}
if (buffer_uninit(bh))
ext4_set_io_unwritten_flag(inode, io_end);
io->io_end->size += bh->b_size;
io->io_next_block++;
ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));

View file

@ -875,7 +875,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
ext4_free_blks_set(sb, gdp, input->free_blocks_count);
ext4_free_group_clusters_set(sb, gdp, input->free_blocks_count);
ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED);
gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
@ -937,8 +937,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
input->reserved_blocks);
/* Update the free space counts */
percpu_counter_add(&sbi->s_freeblocks_counter,
input->free_blocks_count);
percpu_counter_add(&sbi->s_freeclusters_counter,
EXT4_B2C(sbi, input->free_blocks_count));
percpu_counter_add(&sbi->s_freeinodes_counter,
EXT4_INODES_PER_GROUP(sb));
@ -946,8 +946,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
sbi->s_log_groups_per_flex) {
ext4_group_t flex_group;
flex_group = ext4_flex_group(sbi, input->group);
atomic_add(input->free_blocks_count,
&sbi->s_flex_groups[flex_group].free_blocks);
atomic_add(EXT4_B2C(sbi, input->free_blocks_count),
&sbi->s_flex_groups[flex_group].free_clusters);
atomic_add(EXT4_INODES_PER_GROUP(sb),
&sbi->s_flex_groups[flex_group].free_inodes);
}

View file

@ -45,6 +45,7 @@
#include <linux/freezer.h>
#include "ext4.h"
#include "ext4_extents.h"
#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
@ -163,8 +164,8 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb,
(ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
}
__u32 ext4_free_blks_count(struct super_block *sb,
struct ext4_group_desc *bg)
__u32 ext4_free_group_clusters(struct super_block *sb,
struct ext4_group_desc *bg)
{
return le16_to_cpu(bg->bg_free_blocks_count_lo) |
(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
@ -219,8 +220,8 @@ void ext4_inode_table_set(struct super_block *sb,
bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
}
void ext4_free_blks_set(struct super_block *sb,
struct ext4_group_desc *bg, __u32 count)
void ext4_free_group_clusters_set(struct super_block *sb,
struct ext4_group_desc *bg, __u32 count)
{
bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
@ -414,6 +415,22 @@ static void save_error_info(struct super_block *sb, const char *func,
ext4_commit_super(sb, 1);
}
/*
* The del_gendisk() function uninitializes the disk-specific data
* structures, including the bdi structure, without telling anyone
* else. Once this happens, any attempt to call mark_buffer_dirty()
* (for example, by ext4_commit_super), will cause a kernel OOPS.
* This is a kludge to prevent these oops until we can put in a proper
* hook in del_gendisk() to inform the VFS and file system layers.
*/
static int block_device_ejected(struct super_block *sb)
{
struct inode *bd_inode = sb->s_bdev->bd_inode;
struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info;
return bdi->dev == NULL;
}
/* Deal with the reporting of failure conditions on a filesystem such as
* inconsistencies detected or read IO failures.
@ -821,10 +838,10 @@ static void ext4_put_super(struct super_block *sb)
brelse(sbi->s_group_desc[i]);
ext4_kvfree(sbi->s_group_desc);
ext4_kvfree(sbi->s_flex_groups);
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeclusters_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
brelse(sbi->s_sbh);
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)
@ -1057,8 +1074,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, ",nouid32");
if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG))
seq_puts(seq, ",debug");
if (test_opt(sb, OLDALLOC))
seq_puts(seq, ",oldalloc");
#ifdef CONFIG_EXT4_FS_XATTR
if (test_opt(sb, XATTR_USER))
seq_puts(seq, ",user_xattr");
@ -1567,10 +1582,12 @@ static int parse_options(char *options, struct super_block *sb,
set_opt(sb, DEBUG);
break;
case Opt_oldalloc:
set_opt(sb, OLDALLOC);
ext4_msg(sb, KERN_WARNING,
"Ignoring deprecated oldalloc option");
break;
case Opt_orlov:
clear_opt(sb, OLDALLOC);
ext4_msg(sb, KERN_WARNING,
"Ignoring deprecated orlov option");
break;
#ifdef CONFIG_EXT4_FS_XATTR
case Opt_user_xattr:
@ -1801,6 +1818,7 @@ static int parse_options(char *options, struct super_block *sb,
break;
case Opt_nodelalloc:
clear_opt(sb, DELALLOC);
clear_opt2(sb, EXPLICIT_DELALLOC);
break;
case Opt_mblk_io_submit:
set_opt(sb, MBLK_IO_SUBMIT);
@ -1817,6 +1835,7 @@ static int parse_options(char *options, struct super_block *sb,
break;
case Opt_delalloc:
set_opt(sb, DELALLOC);
set_opt2(sb, EXPLICIT_DELALLOC);
break;
case Opt_block_validity:
set_opt(sb, BLOCK_VALIDITY);
@ -1935,7 +1954,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
res = MS_RDONLY;
}
if (read_only)
return res;
goto done;
if (!(sbi->s_mount_state & EXT4_VALID_FS))
ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
"running e2fsck is recommended");
@ -1966,6 +1985,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
ext4_commit_super(sb, 1);
done:
if (test_opt(sb, DEBUG))
printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
"bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
@ -2015,8 +2035,8 @@ static int ext4_fill_flex_info(struct super_block *sb)
flex_group = ext4_flex_group(sbi, i);
atomic_add(ext4_free_inodes_count(sb, gdp),
&sbi->s_flex_groups[flex_group].free_inodes);
atomic_add(ext4_free_blks_count(sb, gdp),
&sbi->s_flex_groups[flex_group].free_blocks);
atomic_add(ext4_free_group_clusters(sb, gdp),
&sbi->s_flex_groups[flex_group].free_clusters);
atomic_add(ext4_used_dirs_count(sb, gdp),
&sbi->s_flex_groups[flex_group].used_dirs);
}
@ -2134,7 +2154,8 @@ static int ext4_check_descriptors(struct super_block *sb,
if (NULL != first_not_zeroed)
*first_not_zeroed = grp;
ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
ext4_free_blocks_count_set(sbi->s_es,
EXT4_C2B(sbi, ext4_count_free_clusters(sb)));
sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
return 1;
}
@ -2454,7 +2475,8 @@ static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
char *buf)
{
return snprintf(buf, PAGE_SIZE, "%llu\n",
(s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
(s64) EXT4_C2B(sbi,
percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
}
static ssize_t session_write_kbytes_show(struct ext4_attr *a,
@ -2682,6 +2704,13 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
return 0;
}
}
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) &&
!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
ext4_msg(sb, KERN_ERR,
"Can't support bigalloc feature without "
"extents feature\n");
return 0;
}
return 1;
}
@ -3087,10 +3116,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
char *cp;
const char *descr;
int ret = -ENOMEM;
int blocksize;
int blocksize, clustersize;
unsigned int db_count;
unsigned int i;
int needs_recovery, has_huge_files;
int needs_recovery, has_huge_files, has_bigalloc;
__u64 blocks_count;
int err;
unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
@ -3224,6 +3253,33 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
&journal_ioprio, NULL, 0))
goto failed_mount;
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
"with data=journal disables delayed "
"allocation and O_DIRECT support!\n");
if (test_opt2(sb, EXPLICIT_DELALLOC)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"both data=journal and delalloc");
goto failed_mount;
}
if (test_opt(sb, DIOREAD_NOLOCK)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"both data=journal and delalloc");
goto failed_mount;
}
if (test_opt(sb, DELALLOC))
clear_opt(sb, DELALLOC);
}
blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
if (test_opt(sb, DIOREAD_NOLOCK)) {
if (blocksize < PAGE_SIZE) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"dioread_nolock if block size != PAGE_SIZE");
goto failed_mount;
}
}
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
@ -3265,8 +3321,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
goto failed_mount;
blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
if (blocksize < EXT4_MIN_BLOCK_SIZE ||
blocksize > EXT4_MAX_BLOCK_SIZE) {
ext4_msg(sb, KERN_ERR,
@ -3369,12 +3423,53 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sb->s_dirt = 1;
}
if (sbi->s_blocks_per_group > blocksize * 8) {
ext4_msg(sb, KERN_ERR,
"#blocks per group too big: %lu",
sbi->s_blocks_per_group);
goto failed_mount;
/* Handle clustersize */
clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_BIGALLOC);
if (has_bigalloc) {
if (clustersize < blocksize) {
ext4_msg(sb, KERN_ERR,
"cluster size (%d) smaller than "
"block size (%d)", clustersize, blocksize);
goto failed_mount;
}
sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
le32_to_cpu(es->s_log_block_size);
sbi->s_clusters_per_group =
le32_to_cpu(es->s_clusters_per_group);
if (sbi->s_clusters_per_group > blocksize * 8) {
ext4_msg(sb, KERN_ERR,
"#clusters per group too big: %lu",
sbi->s_clusters_per_group);
goto failed_mount;
}
if (sbi->s_blocks_per_group !=
(sbi->s_clusters_per_group * (clustersize / blocksize))) {
ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
"clusters per group (%lu) inconsistent",
sbi->s_blocks_per_group,
sbi->s_clusters_per_group);
goto failed_mount;
}
} else {
if (clustersize != blocksize) {
ext4_warning(sb, "fragment/cluster size (%d) != "
"block size (%d)", clustersize,
blocksize);
clustersize = blocksize;
}
if (sbi->s_blocks_per_group > blocksize * 8) {
ext4_msg(sb, KERN_ERR,
"#blocks per group too big: %lu",
sbi->s_blocks_per_group);
goto failed_mount;
}
sbi->s_clusters_per_group = sbi->s_blocks_per_group;
sbi->s_cluster_bits = 0;
}
sbi->s_cluster_ratio = clustersize / blocksize;
if (sbi->s_inodes_per_group > blocksize * 8) {
ext4_msg(sb, KERN_ERR,
"#inodes per group too big: %lu",
@ -3446,10 +3541,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
#ifdef CONFIG_PROC_FS
if (ext4_proc_root)
sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
#endif
bgl_lock_init(sbi->s_blockgroup_lock);
@ -3483,8 +3576,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_err_report.function = print_daily_error_info;
sbi->s_err_report.data = (unsigned long) sb;
err = percpu_counter_init(&sbi->s_freeblocks_counter,
ext4_count_free_blocks(sb));
err = percpu_counter_init(&sbi->s_freeclusters_counter,
ext4_count_free_clusters(sb));
if (!err) {
err = percpu_counter_init(&sbi->s_freeinodes_counter,
ext4_count_free_inodes(sb));
@ -3494,7 +3587,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ext4_count_dirs(sb));
}
if (!err) {
err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
}
if (err) {
ext4_msg(sb, KERN_ERR, "insufficient memory");
@ -3609,13 +3702,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* The journal may have updated the bg summary counts, so we
* need to update the global counters.
*/
percpu_counter_set(&sbi->s_freeblocks_counter,
ext4_count_free_blocks(sb));
percpu_counter_set(&sbi->s_freeclusters_counter,
ext4_count_free_clusters(sb));
percpu_counter_set(&sbi->s_freeinodes_counter,
ext4_count_free_inodes(sb));
percpu_counter_set(&sbi->s_dirs_counter,
ext4_count_dirs(sb));
percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
percpu_counter_set(&sbi->s_dirtyclusters_counter, 0);
no_journal:
/*
@ -3679,25 +3772,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"available");
}
if (test_opt(sb, DELALLOC) &&
(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
"requested data journaling mode");
clear_opt(sb, DELALLOC);
}
if (test_opt(sb, DIOREAD_NOLOCK)) {
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
"option - requested data journaling mode");
clear_opt(sb, DIOREAD_NOLOCK);
}
if (sb->s_blocksize < PAGE_SIZE) {
ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
"option - block size is too small");
clear_opt(sb, DIOREAD_NOLOCK);
}
}
err = ext4_setup_system_zone(sb);
if (err) {
ext4_msg(sb, KERN_ERR, "failed to initialize system "
@ -3710,22 +3784,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (err) {
ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
err);
goto failed_mount4;
goto failed_mount5;
}
err = ext4_register_li_request(sb, first_not_zeroed);
if (err)
goto failed_mount4;
goto failed_mount6;
sbi->s_kobj.kset = ext4_kset;
init_completion(&sbi->s_kobj_unregister);
err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
"%s", sb->s_id);
if (err) {
ext4_mb_release(sb);
ext4_ext_release(sb);
goto failed_mount4;
};
if (err)
goto failed_mount7;
EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
ext4_orphan_cleanup(sb, es);
@ -3759,13 +3830,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
goto failed_mount;
failed_mount7:
ext4_unregister_li_request(sb);
failed_mount6:
ext4_ext_release(sb);
failed_mount5:
ext4_mb_release(sb);
ext4_release_system_zone(sb);
failed_mount4:
iput(root);
sb->s_root = NULL;
ext4_msg(sb, KERN_ERR, "mount failed");
destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
failed_mount_wq:
ext4_release_system_zone(sb);
if (sbi->s_journal) {
jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
@ -3774,10 +3851,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
del_timer(&sbi->s_err_report);
if (sbi->s_flex_groups)
ext4_kvfree(sbi->s_flex_groups);
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeclusters_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
failed_mount2:
@ -4064,7 +4141,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
int error = 0;
if (!sbh)
if (!sbh || block_device_ejected(sb))
return error;
if (buffer_write_io_error(sbh)) {
/*
@ -4100,8 +4177,9 @@ static int ext4_commit_super(struct super_block *sb, int sync)
else
es->s_kbytes_written =
cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeblocks_counter));
ext4_free_blocks_count_set(es,
EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeclusters_counter)));
es->s_free_inodes_count =
cpu_to_le32(percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeinodes_counter));
@ -4506,16 +4584,34 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
return err;
}
/*
* Note: calculating the overhead so we can be compatible with
* historical BSD practice is quite difficult in the face of
* clusters/bigalloc. This is because multiple metadata blocks from
* different block group can end up in the same allocation cluster.
* Calculating the exact overhead in the face of clustered allocation
* requires either O(all block bitmaps) in memory or O(number of block
* groups**2) in time. We will still calculate the superblock for
* older file systems --- and if we come across with a bigalloc file
* system with zero in s_overhead_clusters the estimate will be close to
* correct especially for very large cluster sizes --- but for newer
* file systems, it's better to calculate this figure once at mkfs
* time, and store it in the superblock. If the superblock value is
* present (even for non-bigalloc file systems), we will use it.
*/
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
struct ext4_group_desc *gdp;
u64 fsid;
s64 bfree;
if (test_opt(sb, MINIX_DF)) {
sbi->s_overhead_last = 0;
} else if (es->s_overhead_clusters) {
sbi->s_overhead_last = le32_to_cpu(es->s_overhead_clusters);
} else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
ext4_group_t i, ngroups = ext4_get_groups_count(sb);
ext4_fsblk_t overhead = 0;
@ -4530,24 +4626,16 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
* All of the blocks before first_data_block are
* overhead
*/
overhead = le32_to_cpu(es->s_first_data_block);
overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
/*
* Add the overhead attributed to the superblock and
* block group descriptors. If the sparse superblocks
* feature is turned on, then not all groups have this.
* Add the overhead found in each block group
*/
for (i = 0; i < ngroups; i++) {
overhead += ext4_bg_has_super(sb, i) +
ext4_bg_num_gdb(sb, i);
gdp = ext4_get_group_desc(sb, i, NULL);
overhead += ext4_num_overhead_clusters(sb, i, gdp);
cond_resched();
}
/*
* Every block group has an inode bitmap, a block
* bitmap, and an inode table.
*/
overhead += ngroups * (2 + sbi->s_itb_per_group);
sbi->s_overhead_last = overhead;
smp_wmb();
sbi->s_blocks_last = ext4_blocks_count(es);
@ -4555,11 +4643,12 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_type = EXT4_SUPER_MAGIC;
buf->f_bsize = sb->s_blocksize;
buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
buf->f_blocks = (ext4_blocks_count(es) -
EXT4_C2B(sbi, sbi->s_overhead_last));
bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
/* prevent underflow in case that few free space is available */
buf->f_bfree = max_t(s64, bfree, 0);
buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
if (buf->f_bfree < ext4_r_blocks_count(es))
buf->f_bavail = 0;
@ -4980,13 +5069,11 @@ static int __init ext4_init_fs(void)
return err;
err = ext4_init_system_zone();
if (err)
goto out7;
goto out6;
ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
if (!ext4_kset)
goto out6;
ext4_proc_root = proc_mkdir("fs/ext4", NULL);
if (!ext4_proc_root)
goto out5;
ext4_proc_root = proc_mkdir("fs/ext4", NULL);
err = ext4_init_feat_adverts();
if (err)
@ -5022,12 +5109,12 @@ static int __init ext4_init_fs(void)
out3:
ext4_exit_feat_adverts();
out4:
remove_proc_entry("fs/ext4", NULL);
out5:
if (ext4_proc_root)
remove_proc_entry("fs/ext4", NULL);
kset_unregister(ext4_kset);
out6:
out5:
ext4_exit_system_zone();
out7:
out6:
ext4_exit_pageio();
return err;
}

View file

@ -820,8 +820,14 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
/*
* take i_data_sem because we will test
* i_delalloc_reserved_flag in ext4_mb_new_blocks
*/
down_read((&EXT4_I(inode)->i_data_sem));
block = ext4_new_meta_blocks(handle, inode, goal, 0,
NULL, &error);
up_read((&EXT4_I(inode)->i_data_sem));
if (error)
goto cleanup;
@ -985,11 +991,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
error = ext4_get_inode_loc(inode, &is.iloc);
if (error)
goto cleanup;
error = ext4_journal_get_write_access(handle, is.iloc.bh);
error = ext4_reserve_inode_write(handle, inode, &is.iloc);
if (error)
goto cleanup;

View file

@ -1135,6 +1135,14 @@ static int journal_get_superblock(journal_t *journal)
goto out;
}
if (be32_to_cpu(sb->s_first) == 0 ||
be32_to_cpu(sb->s_first) >= journal->j_maxlen) {
printk(KERN_WARNING
"JBD: Invalid start block of journal: %u\n",
be32_to_cpu(sb->s_first));
goto out;
}
return 0;
out:

View file

@ -352,7 +352,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
J_ASSERT(commit_transaction->t_state == T_RUNNING);
trace_jbd2_start_commit(journal, commit_transaction);
jbd_debug(1, "JBD: starting commit of transaction %d\n",
jbd_debug(1, "JBD2: starting commit of transaction %d\n",
commit_transaction->t_tid);
write_lock(&journal->j_state_lock);
@ -427,7 +427,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
__jbd2_journal_clean_checkpoint_list(journal);
spin_unlock(&journal->j_list_lock);
jbd_debug (3, "JBD: commit phase 1\n");
jbd_debug(3, "JBD2: commit phase 1\n");
/*
* Switch to a new revoke table.
@ -447,7 +447,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
wake_up(&journal->j_wait_transaction_locked);
write_unlock(&journal->j_state_lock);
jbd_debug (3, "JBD: commit phase 2\n");
jbd_debug(3, "JBD2: commit phase 2\n");
/*
* Now start flushing things to disk, in the order they appear
@ -462,7 +462,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
WRITE_SYNC);
blk_finish_plug(&plug);
jbd_debug(3, "JBD: commit phase 2\n");
jbd_debug(3, "JBD2: commit phase 2\n");
/*
* Way to go: we have now written out all of the data for a
@ -522,7 +522,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
J_ASSERT (bufs == 0);
jbd_debug(4, "JBD: get descriptor\n");
jbd_debug(4, "JBD2: get descriptor\n");
descriptor = jbd2_journal_get_descriptor_buffer(journal);
if (!descriptor) {
@ -531,7 +531,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
}
bh = jh2bh(descriptor);
jbd_debug(4, "JBD: got buffer %llu (%p)\n",
jbd_debug(4, "JBD2: got buffer %llu (%p)\n",
(unsigned long long)bh->b_blocknr, bh->b_data);
header = (journal_header_t *)&bh->b_data[0];
header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
@ -625,7 +625,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
commit_transaction->t_buffers == NULL ||
space_left < tag_bytes + 16) {
jbd_debug(4, "JBD: Submit %d IOs\n", bufs);
jbd_debug(4, "JBD2: Submit %d IOs\n", bufs);
/* Write an end-of-descriptor marker before
submitting the IOs. "tag" still points to
@ -707,7 +707,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
so we incur less scheduling load.
*/
jbd_debug(3, "JBD: commit phase 3\n");
jbd_debug(3, "JBD2: commit phase 3\n");
/*
* akpm: these are BJ_IO, and j_list_lock is not needed.
@ -771,7 +771,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
J_ASSERT (commit_transaction->t_shadow_list == NULL);
jbd_debug(3, "JBD: commit phase 4\n");
jbd_debug(3, "JBD2: commit phase 4\n");
/* Here we wait for the revoke record and descriptor record buffers */
wait_for_ctlbuf:
@ -801,7 +801,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (err)
jbd2_journal_abort(journal, err);
jbd_debug(3, "JBD: commit phase 5\n");
jbd_debug(3, "JBD2: commit phase 5\n");
write_lock(&journal->j_state_lock);
J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
commit_transaction->t_state = T_COMMIT_JFLUSH;
@ -830,7 +830,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
transaction can be removed from any checkpoint list it was on
before. */
jbd_debug(3, "JBD: commit phase 6\n");
jbd_debug(3, "JBD2: commit phase 6\n");
J_ASSERT(list_empty(&commit_transaction->t_inode_list));
J_ASSERT(commit_transaction->t_buffers == NULL);
@ -964,7 +964,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
/* Done with this transaction! */
jbd_debug(3, "JBD: commit phase 7\n");
jbd_debug(3, "JBD2: commit phase 7\n");
J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
@ -1039,7 +1039,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
journal->j_commit_callback(journal, commit_transaction);
trace_jbd2_end_commit(journal, commit_transaction);
jbd_debug(1, "JBD: commit %d complete, head %d\n",
jbd_debug(1, "JBD2: commit %d complete, head %d\n",
journal->j_commit_sequence, journal->j_tail_sequence);
if (to_free)
kfree(commit_transaction);

View file

@ -491,7 +491,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
*/
journal->j_commit_request = target;
jbd_debug(1, "JBD: requesting commit %d/%d\n",
jbd_debug(1, "JBD2: requesting commit %d/%d\n",
journal->j_commit_request,
journal->j_commit_sequence);
wake_up(&journal->j_wait_commit);
@ -500,7 +500,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
/* This should never happen, but if it does, preserve
the evidence before kjournald goes into a loop and
increments j_commit_sequence beyond all recognition. */
WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n",
WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n",
journal->j_commit_request,
journal->j_commit_sequence,
target, journal->j_running_transaction ?
@ -645,7 +645,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
}
#endif
while (tid_gt(tid, journal->j_commit_sequence)) {
jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n",
tid, journal->j_commit_sequence);
wake_up(&journal->j_wait_commit);
read_unlock(&journal->j_state_lock);
@ -1093,7 +1093,7 @@ static int journal_reset(journal_t *journal)
first = be32_to_cpu(sb->s_first);
last = be32_to_cpu(sb->s_maxlen);
if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) {
printk(KERN_ERR "JBD: Journal too short (blocks %llu-%llu).\n",
printk(KERN_ERR "JBD2: Journal too short (blocks %llu-%llu).\n",
first, last);
journal_fail_superblock(journal);
return -EINVAL;
@ -1139,7 +1139,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
*/
if (sb->s_start == 0 && journal->j_tail_sequence ==
journal->j_transaction_sequence) {
jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
"(start %ld, seq %d, errno %d)\n",
journal->j_tail, journal->j_tail_sequence,
journal->j_errno);
@ -1163,7 +1163,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
}
read_lock(&journal->j_state_lock);
jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d, errno %d)\n",
journal->j_tail, journal->j_tail_sequence, journal->j_errno);
sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
@ -1216,8 +1216,8 @@ static int journal_get_superblock(journal_t *journal)
ll_rw_block(READ, 1, &bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
printk (KERN_ERR
"JBD: IO error reading journal superblock\n");
printk(KERN_ERR
"JBD2: IO error reading journal superblock\n");
goto out;
}
}
@ -1228,7 +1228,7 @@ static int journal_get_superblock(journal_t *journal)
if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) ||
sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
printk(KERN_WARNING "JBD: no valid journal superblock found\n");
printk(KERN_WARNING "JBD2: no valid journal superblock found\n");
goto out;
}
@ -1240,14 +1240,22 @@ static int journal_get_superblock(journal_t *journal)
journal->j_format_version = 2;
break;
default:
printk(KERN_WARNING "JBD: unrecognised superblock format ID\n");
printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n");
goto out;
}
if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen)
journal->j_maxlen = be32_to_cpu(sb->s_maxlen);
else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) {
printk (KERN_WARNING "JBD: journal file too short\n");
printk(KERN_WARNING "JBD2: journal file too short\n");
goto out;
}
if (be32_to_cpu(sb->s_first) == 0 ||
be32_to_cpu(sb->s_first) >= journal->j_maxlen) {
printk(KERN_WARNING
"JBD2: Invalid start block of journal: %u\n",
be32_to_cpu(sb->s_first));
goto out;
}
@ -1310,8 +1318,8 @@ int jbd2_journal_load(journal_t *journal)
~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
(sb->s_feature_incompat &
~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) {
printk (KERN_WARNING
"JBD: Unrecognised features on journal\n");
printk(KERN_WARNING
"JBD2: Unrecognised features on journal\n");
return -EINVAL;
}
}
@ -1346,7 +1354,7 @@ int jbd2_journal_load(journal_t *journal)
return 0;
recovery_error:
printk (KERN_WARNING "JBD: recovery failed\n");
printk(KERN_WARNING "JBD2: recovery failed\n");
return -EIO;
}
@ -1577,7 +1585,7 @@ static int journal_convert_superblock_v1(journal_t *journal,
struct buffer_head *bh;
printk(KERN_WARNING
"JBD: Converting superblock from version 1 to 2.\n");
"JBD2: Converting superblock from version 1 to 2.\n");
/* Pre-initialise new fields to zero */
offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb);
@ -1694,7 +1702,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
if (!journal->j_tail)
goto no_recovery;
printk (KERN_WARNING "JBD: %s recovery information on journal\n",
printk(KERN_WARNING "JBD2: %s recovery information on journal\n",
write ? "Clearing" : "Ignoring");
err = jbd2_journal_skip_recovery(journal);
@ -2020,7 +2028,7 @@ static int journal_init_jbd2_journal_head_cache(void)
retval = 0;
if (!jbd2_journal_head_cache) {
retval = -ENOMEM;
printk(KERN_EMERG "JBD: no memory for journal_head cache\n");
printk(KERN_EMERG "JBD2: no memory for journal_head cache\n");
}
return retval;
}
@ -2383,7 +2391,7 @@ static void __exit journal_exit(void)
#ifdef CONFIG_JBD2_DEBUG
int n = atomic_read(&nr_journal_heads);
if (n)
printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n);
#endif
jbd2_remove_debugfs_entry();
jbd2_remove_jbd_stats_proc_entry();

View file

@ -89,7 +89,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
err = jbd2_journal_bmap(journal, next, &blocknr);
if (err) {
printk (KERN_ERR "JBD: bad block at offset %u\n",
printk(KERN_ERR "JBD2: bad block at offset %u\n",
next);
goto failed;
}
@ -138,14 +138,14 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
*bhp = NULL;
if (offset >= journal->j_maxlen) {
printk(KERN_ERR "JBD: corrupted journal superblock\n");
printk(KERN_ERR "JBD2: corrupted journal superblock\n");
return -EIO;
}
err = jbd2_journal_bmap(journal, offset, &blocknr);
if (err) {
printk (KERN_ERR "JBD: bad block at offset %u\n",
printk(KERN_ERR "JBD2: bad block at offset %u\n",
offset);
return err;
}
@ -163,7 +163,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
}
if (!buffer_uptodate(bh)) {
printk (KERN_ERR "JBD: Failed to read block at offset %u\n",
printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
offset);
brelse(bh);
return -EIO;
@ -251,10 +251,10 @@ int jbd2_journal_recover(journal_t *journal)
if (!err)
err = do_one_pass(journal, &info, PASS_REPLAY);
jbd_debug(1, "JBD: recovery, exit status %d, "
jbd_debug(1, "JBD2: recovery, exit status %d, "
"recovered transactions %u to %u\n",
err, info.start_transaction, info.end_transaction);
jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n",
jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
/* Restart the log at the next transaction ID, thus invalidating
@ -293,14 +293,14 @@ int jbd2_journal_skip_recovery(journal_t *journal)
err = do_one_pass(journal, &info, PASS_SCAN);
if (err) {
printk(KERN_ERR "JBD: error %d scanning journal\n", err);
printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
++journal->j_transaction_sequence;
} else {
#ifdef CONFIG_JBD2_DEBUG
int dropped = info.end_transaction -
be32_to_cpu(journal->j_superblock->s_sequence);
jbd_debug(1,
"JBD: ignoring %d transaction%s from the journal.\n",
"JBD2: ignoring %d transaction%s from the journal.\n",
dropped, (dropped == 1) ? "" : "s");
#endif
journal->j_transaction_sequence = ++info.end_transaction;
@ -338,7 +338,7 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh,
wrap(journal, *next_log_block);
err = jread(&obh, journal, io_block);
if (err) {
printk(KERN_ERR "JBD: IO error %d recovering block "
printk(KERN_ERR "JBD2: IO error %d recovering block "
"%lu in log\n", err, io_block);
return 1;
} else {
@ -411,7 +411,7 @@ static int do_one_pass(journal_t *journal,
* either the next descriptor block or the final commit
* record. */
jbd_debug(3, "JBD: checking block %ld\n", next_log_block);
jbd_debug(3, "JBD2: checking block %ld\n", next_log_block);
err = jread(&bh, journal, next_log_block);
if (err)
goto failed;
@ -491,8 +491,8 @@ static int do_one_pass(journal_t *journal,
/* Recover what we can, but
* report failure at the end. */
success = err;
printk (KERN_ERR
"JBD: IO error %d recovering "
printk(KERN_ERR
"JBD2: IO error %d recovering "
"block %ld in log\n",
err, io_block);
} else {
@ -520,7 +520,7 @@ static int do_one_pass(journal_t *journal,
journal->j_blocksize);
if (nbh == NULL) {
printk(KERN_ERR
"JBD: Out of memory "
"JBD2: Out of memory "
"during recovery.\n");
err = -ENOMEM;
brelse(bh);
@ -689,7 +689,7 @@ static int do_one_pass(journal_t *journal,
/* It's really bad news if different passes end up at
* different places (but possible due to IO errors). */
if (info->end_transaction != next_commit_ID) {
printk (KERN_ERR "JBD: recovery pass %d ended at "
printk(KERN_ERR "JBD2: recovery pass %d ended at "
"transaction %u, expected %u\n",
pass, next_commit_ID, info->end_transaction);
if (!success)

View file

@ -27,6 +27,7 @@
#include <linux/highmem.h>
#include <linux/hrtimer.h>
#include <linux/backing-dev.h>
#include <linux/bug.h>
#include <linux/module.h>
static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
@ -115,7 +116,7 @@ static inline void update_t_max_wait(transaction_t *transaction,
*/
static int start_this_handle(journal_t *journal, handle_t *handle,
int gfp_mask)
gfp_t gfp_mask)
{
transaction_t *transaction, *new_transaction = NULL;
tid_t tid;
@ -124,7 +125,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
unsigned long ts = jiffies;
if (nblocks > journal->j_max_transaction_buffers) {
printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n",
current->comm, nblocks,
journal->j_max_transaction_buffers);
return -ENOSPC;
@ -320,7 +321,7 @@ static handle_t *new_handle(int nblocks)
* Return a pointer to a newly allocated handle, or an ERR_PTR() value
* on failure.
*/
handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask)
handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask)
{
handle_t *handle = journal_current_handle();
int err;
@ -443,7 +444,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
* transaction capabable of guaranteeing the requested number of
* credits.
*/
int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal;
@ -563,7 +564,7 @@ static void warn_dirty_buffer(struct buffer_head *bh)
char b[BDEVNAME_SIZE];
printk(KERN_WARNING
"JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
"JBD2: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
"There's a risk of filesystem corruption in case of system "
"crash.\n",
bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
@ -1049,6 +1050,10 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
* mark dirty metadata which needs to be journaled as part of the current
* transaction.
*
* The buffer must have previously had jbd2_journal_get_write_access()
* called so that it has a valid journal_head attached to the buffer
* head.
*
* The buffer is placed on the transaction's metadata list and is marked
* as belonging to the transaction.
*
@ -1065,11 +1070,16 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal;
struct journal_head *jh = bh2jh(bh);
int ret = 0;
jbd_debug(5, "journal_head %p\n", jh);
JBUFFER_TRACE(jh, "entry");
if (is_handle_aborted(handle))
goto out;
if (!buffer_jbd(bh)) {
ret = -EUCLEAN;
goto out;
}
jbd_lock_bh_state(bh);
@ -1093,8 +1103,20 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
*/
if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
JBUFFER_TRACE(jh, "fastpath");
J_ASSERT_JH(jh, jh->b_transaction ==
journal->j_running_transaction);
if (unlikely(jh->b_transaction !=
journal->j_running_transaction)) {
printk(KERN_EMERG "JBD: %s: "
"jh->b_transaction (%llu, %p, %u) != "
"journal->j_running_transaction (%p, %u)",
journal->j_devname,
(unsigned long long) bh->b_blocknr,
jh->b_transaction,
jh->b_transaction ? jh->b_transaction->t_tid : 0,
journal->j_running_transaction,
journal->j_running_transaction ?
journal->j_running_transaction->t_tid : 0);
ret = -EINVAL;
}
goto out_unlock_bh;
}
@ -1108,9 +1130,32 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
*/
if (jh->b_transaction != transaction) {
JBUFFER_TRACE(jh, "already on other transaction");
J_ASSERT_JH(jh, jh->b_transaction ==
journal->j_committing_transaction);
J_ASSERT_JH(jh, jh->b_next_transaction == transaction);
if (unlikely(jh->b_transaction !=
journal->j_committing_transaction)) {
printk(KERN_EMERG "JBD: %s: "
"jh->b_transaction (%llu, %p, %u) != "
"journal->j_committing_transaction (%p, %u)",
journal->j_devname,
(unsigned long long) bh->b_blocknr,
jh->b_transaction,
jh->b_transaction ? jh->b_transaction->t_tid : 0,
journal->j_committing_transaction,
journal->j_committing_transaction ?
journal->j_committing_transaction->t_tid : 0);
ret = -EINVAL;
}
if (unlikely(jh->b_next_transaction != transaction)) {
printk(KERN_EMERG "JBD: %s: "
"jh->b_next_transaction (%llu, %p, %u) != "
"transaction (%p, %u)",
journal->j_devname,
(unsigned long long) bh->b_blocknr,
jh->b_next_transaction,
jh->b_next_transaction ?
jh->b_next_transaction->t_tid : 0,
transaction, transaction->t_tid);
ret = -EINVAL;
}
/* And this case is illegal: we can't reuse another
* transaction's data buffer, ever. */
goto out_unlock_bh;
@ -1127,7 +1172,8 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
jbd_unlock_bh_state(bh);
out:
JBUFFER_TRACE(jh, "exit");
return 0;
WARN_ON(ret); /* All errors are bugs, so dump the stack */
return ret;
}
/*

View file

@ -197,8 +197,8 @@ struct ext2_group_desc
/* Flags that should be inherited by new inodes from their parent. */
#define EXT2_FL_INHERITED (EXT2_SECRM_FL | EXT2_UNRM_FL | EXT2_COMPR_FL |\
EXT2_SYNC_FL | EXT2_IMMUTABLE_FL | EXT2_APPEND_FL |\
EXT2_NODUMP_FL | EXT2_NOATIME_FL | EXT2_COMPRBLK_FL|\
EXT2_SYNC_FL | EXT2_NODUMP_FL |\
EXT2_NOATIME_FL | EXT2_COMPRBLK_FL |\
EXT2_NOCOMP_FL | EXT2_JOURNAL_DATA_FL |\
EXT2_NOTAIL_FL | EXT2_DIRSYNC_FL)

View file

@ -180,8 +180,8 @@ struct ext3_group_desc
/* Flags that should be inherited by new inodes from their parent. */
#define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\
EXT3_SYNC_FL | EXT3_IMMUTABLE_FL | EXT3_APPEND_FL |\
EXT3_NODUMP_FL | EXT3_NOATIME_FL | EXT3_COMPRBLK_FL|\
EXT3_SYNC_FL | EXT3_NODUMP_FL |\
EXT3_NOATIME_FL | EXT3_COMPRBLK_FL |\
EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\
EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL)

View file

@ -770,12 +770,13 @@ struct inode {
unsigned long i_ino;
unsigned int i_nlink;
dev_t i_rdev;
loff_t i_size;
struct timespec i_atime;
struct timespec i_mtime;
struct timespec i_ctime;
unsigned int i_blkbits;
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short i_bytes;
blkcnt_t i_blocks;
loff_t i_size;
#ifdef __NEED_I_SIZE_ORDERED
seqcount_t i_size_seqcount;
@ -783,7 +784,6 @@ struct inode {
/* Misc */
unsigned long i_state;
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
struct mutex i_mutex;
unsigned long dirtied_when; /* jiffies of first dirtying */
@ -797,9 +797,10 @@ struct inode {
struct rcu_head i_rcu;
};
atomic_t i_count;
unsigned int i_blkbits;
u64 i_version;
unsigned short i_bytes;
atomic_t i_dio_count;
atomic_t i_writecount;
const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
struct file_lock *i_flock;
struct address_space i_data;
@ -823,7 +824,6 @@ struct inode {
#ifdef CONFIG_IMA
atomic_t i_readcount; /* struct files open RO */
#endif
atomic_t i_writecount;
void *i_private; /* fs or device private pointer */
};

View file

@ -244,6 +244,7 @@ typedef struct journal_superblock_s
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/jbd_common.h>
#define J_ASSERT(assert) BUG_ON(!(assert))
@ -270,69 +271,6 @@ typedef struct journal_superblock_s
#define J_EXPECT_JH(jh, expr, why...) __journal_expect(expr, ## why)
#endif
enum jbd_state_bits {
BH_JBD /* Has an attached ext3 journal_head */
= BH_PrivateStart,
BH_JWrite, /* Being written to log (@@@ DEBUGGING) */
BH_Freed, /* Has been freed (truncated) */
BH_Revoked, /* Has been revoked from the log */
BH_RevokeValid, /* Revoked flag is valid */
BH_JBDDirty, /* Is dirty but journaled */
BH_State, /* Pins most journal_head state */
BH_JournalHead, /* Pins bh->b_private and jh->b_bh */
BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */
};
BUFFER_FNS(JBD, jbd)
BUFFER_FNS(JWrite, jwrite)
BUFFER_FNS(JBDDirty, jbddirty)
TAS_BUFFER_FNS(JBDDirty, jbddirty)
BUFFER_FNS(Revoked, revoked)
TAS_BUFFER_FNS(Revoked, revoked)
BUFFER_FNS(RevokeValid, revokevalid)
TAS_BUFFER_FNS(RevokeValid, revokevalid)
BUFFER_FNS(Freed, freed)
static inline struct buffer_head *jh2bh(struct journal_head *jh)
{
return jh->b_bh;
}
static inline struct journal_head *bh2jh(struct buffer_head *bh)
{
return bh->b_private;
}
static inline void jbd_lock_bh_state(struct buffer_head *bh)
{
bit_spin_lock(BH_State, &bh->b_state);
}
static inline int jbd_trylock_bh_state(struct buffer_head *bh)
{
return bit_spin_trylock(BH_State, &bh->b_state);
}
static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
{
return bit_spin_is_locked(BH_State, &bh->b_state);
}
static inline void jbd_unlock_bh_state(struct buffer_head *bh)
{
bit_spin_unlock(BH_State, &bh->b_state);
}
static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
{
bit_spin_lock(BH_JournalHead, &bh->b_state);
}
static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
{
bit_spin_unlock(BH_JournalHead, &bh->b_state);
}
struct jbd_revoke_table_s;
/**

View file

@ -275,6 +275,7 @@ typedef struct journal_superblock_s
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/jbd_common.h>
#define J_ASSERT(assert) BUG_ON(!(assert))
@ -302,70 +303,6 @@ typedef struct journal_superblock_s
#define J_EXPECT_JH(jh, expr, why...) __journal_expect(expr, ## why)
#endif
enum jbd_state_bits {
BH_JBD /* Has an attached ext3 journal_head */
= BH_PrivateStart,
BH_JWrite, /* Being written to log (@@@ DEBUGGING) */
BH_Freed, /* Has been freed (truncated) */
BH_Revoked, /* Has been revoked from the log */
BH_RevokeValid, /* Revoked flag is valid */
BH_JBDDirty, /* Is dirty but journaled */
BH_State, /* Pins most journal_head state */
BH_JournalHead, /* Pins bh->b_private and jh->b_bh */
BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */
BH_JBDPrivateStart, /* First bit available for private use by FS */
};
BUFFER_FNS(JBD, jbd)
BUFFER_FNS(JWrite, jwrite)
BUFFER_FNS(JBDDirty, jbddirty)
TAS_BUFFER_FNS(JBDDirty, jbddirty)
BUFFER_FNS(Revoked, revoked)
TAS_BUFFER_FNS(Revoked, revoked)
BUFFER_FNS(RevokeValid, revokevalid)
TAS_BUFFER_FNS(RevokeValid, revokevalid)
BUFFER_FNS(Freed, freed)
static inline struct buffer_head *jh2bh(struct journal_head *jh)
{
return jh->b_bh;
}
static inline struct journal_head *bh2jh(struct buffer_head *bh)
{
return bh->b_private;
}
static inline void jbd_lock_bh_state(struct buffer_head *bh)
{
bit_spin_lock(BH_State, &bh->b_state);
}
static inline int jbd_trylock_bh_state(struct buffer_head *bh)
{
return bit_spin_trylock(BH_State, &bh->b_state);
}
static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
{
return bit_spin_is_locked(BH_State, &bh->b_state);
}
static inline void jbd_unlock_bh_state(struct buffer_head *bh)
{
bit_spin_unlock(BH_State, &bh->b_state);
}
static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
{
bit_spin_lock(BH_JournalHead, &bh->b_state);
}
static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
{
bit_spin_unlock(BH_JournalHead, &bh->b_state);
}
/* Flags in jbd_inode->i_flags */
#define __JI_COMMIT_RUNNING 0
/* Commit of the inode data in progress. We use this flag to protect us from
@ -1106,9 +1043,9 @@ static inline handle_t *journal_current_handle(void)
*/
extern handle_t *jbd2_journal_start(journal_t *, int nblocks);
extern handle_t *jbd2__journal_start(journal_t *, int nblocks, int gfp_mask);
extern handle_t *jbd2__journal_start(journal_t *, int nblocks, gfp_t gfp_mask);
extern int jbd2_journal_restart(handle_t *, int nblocks);
extern int jbd2__journal_restart(handle_t *, int nblocks, int gfp_mask);
extern int jbd2__journal_restart(handle_t *, int nblocks, gfp_t gfp_mask);
extern int jbd2_journal_extend (handle_t *, int nblocks);
extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *);

View file

@ -0,0 +1,68 @@
#ifndef _LINUX_JBD_STATE_H
#define _LINUX_JBD_STATE_H
enum jbd_state_bits {
BH_JBD /* Has an attached ext3 journal_head */
= BH_PrivateStart,
BH_JWrite, /* Being written to log (@@@ DEBUGGING) */
BH_Freed, /* Has been freed (truncated) */
BH_Revoked, /* Has been revoked from the log */
BH_RevokeValid, /* Revoked flag is valid */
BH_JBDDirty, /* Is dirty but journaled */
BH_State, /* Pins most journal_head state */
BH_JournalHead, /* Pins bh->b_private and jh->b_bh */
BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */
BH_JBDPrivateStart, /* First bit available for private use by FS */
};
BUFFER_FNS(JBD, jbd)
BUFFER_FNS(JWrite, jwrite)
BUFFER_FNS(JBDDirty, jbddirty)
TAS_BUFFER_FNS(JBDDirty, jbddirty)
BUFFER_FNS(Revoked, revoked)
TAS_BUFFER_FNS(Revoked, revoked)
BUFFER_FNS(RevokeValid, revokevalid)
TAS_BUFFER_FNS(RevokeValid, revokevalid)
BUFFER_FNS(Freed, freed)
static inline struct buffer_head *jh2bh(struct journal_head *jh)
{
return jh->b_bh;
}
static inline struct journal_head *bh2jh(struct buffer_head *bh)
{
return bh->b_private;
}
static inline void jbd_lock_bh_state(struct buffer_head *bh)
{
bit_spin_lock(BH_State, &bh->b_state);
}
static inline int jbd_trylock_bh_state(struct buffer_head *bh)
{
return bit_spin_trylock(BH_State, &bh->b_state);
}
static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
{
return bit_spin_is_locked(BH_State, &bh->b_state);
}
static inline void jbd_unlock_bh_state(struct buffer_head *bh)
{
bit_spin_unlock(BH_State, &bh->b_state);
}
static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
{
bit_spin_lock(BH_JournalHead, &bh->b_state);
}
static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
{
bit_spin_unlock(BH_JournalHead, &bh->b_state);
}
#endif

View file

@ -9,9 +9,12 @@
struct ext4_allocation_context;
struct ext4_allocation_request;
struct ext4_extent;
struct ext4_prealloc_space;
struct ext4_inode_info;
struct mpage_da_data;
struct ext4_map_blocks;
struct ext4_extent;
#define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode))
@ -1032,9 +1035,9 @@ TRACE_EVENT(ext4_forget,
);
TRACE_EVENT(ext4_da_update_reserve_space,
TP_PROTO(struct inode *inode, int used_blocks),
TP_PROTO(struct inode *inode, int used_blocks, int quota_claim),
TP_ARGS(inode, used_blocks),
TP_ARGS(inode, used_blocks, quota_claim),
TP_STRUCT__entry(
__field( dev_t, dev )
@ -1045,6 +1048,7 @@ TRACE_EVENT(ext4_da_update_reserve_space,
__field( int, reserved_data_blocks )
__field( int, reserved_meta_blocks )
__field( int, allocated_meta_blocks )
__field( int, quota_claim )
),
TP_fast_assign(
@ -1053,19 +1057,24 @@ TRACE_EVENT(ext4_da_update_reserve_space,
__entry->mode = inode->i_mode;
__entry->i_blocks = inode->i_blocks;
__entry->used_blocks = used_blocks;
__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
__entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
__entry->reserved_data_blocks =
EXT4_I(inode)->i_reserved_data_blocks;
__entry->reserved_meta_blocks =
EXT4_I(inode)->i_reserved_meta_blocks;
__entry->allocated_meta_blocks =
EXT4_I(inode)->i_allocated_meta_blocks;
__entry->quota_claim = quota_claim;
),
TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d "
"reserved_data_blocks %d reserved_meta_blocks %d "
"allocated_meta_blocks %d",
"allocated_meta_blocks %d quota_claim %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->mode, __entry->i_blocks,
__entry->used_blocks, __entry->reserved_data_blocks,
__entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
__entry->reserved_meta_blocks, __entry->allocated_meta_blocks,
__entry->quota_claim)
);
TRACE_EVENT(ext4_da_reserve_space,
@ -1386,6 +1395,87 @@ DEFINE_EVENT(ext4__truncate, ext4_truncate_exit,
TP_ARGS(inode)
);
/* 'ux' is the uninitialized extent. */
TRACE_EVENT(ext4_ext_convert_to_initialized_enter,
TP_PROTO(struct inode *inode, struct ext4_map_blocks *map,
struct ext4_extent *ux),
TP_ARGS(inode, map, ux),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ext4_lblk_t, m_lblk )
__field( unsigned, m_len )
__field( ext4_lblk_t, u_lblk )
__field( unsigned, u_len )
__field( ext4_fsblk_t, u_pblk )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->m_lblk = map->m_lblk;
__entry->m_len = map->m_len;
__entry->u_lblk = le32_to_cpu(ux->ee_block);
__entry->u_len = ext4_ext_get_actual_len(ux);
__entry->u_pblk = ext4_ext_pblock(ux);
),
TP_printk("dev %d,%d ino %lu m_lblk %u m_len %u u_lblk %u u_len %u "
"u_pblk %llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->m_lblk, __entry->m_len,
__entry->u_lblk, __entry->u_len, __entry->u_pblk)
);
/*
* 'ux' is the uninitialized extent.
* 'ix' is the initialized extent to which blocks are transferred.
*/
TRACE_EVENT(ext4_ext_convert_to_initialized_fastpath,
TP_PROTO(struct inode *inode, struct ext4_map_blocks *map,
struct ext4_extent *ux, struct ext4_extent *ix),
TP_ARGS(inode, map, ux, ix),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ext4_lblk_t, m_lblk )
__field( unsigned, m_len )
__field( ext4_lblk_t, u_lblk )
__field( unsigned, u_len )
__field( ext4_fsblk_t, u_pblk )
__field( ext4_lblk_t, i_lblk )
__field( unsigned, i_len )
__field( ext4_fsblk_t, i_pblk )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->m_lblk = map->m_lblk;
__entry->m_len = map->m_len;
__entry->u_lblk = le32_to_cpu(ux->ee_block);
__entry->u_len = ext4_ext_get_actual_len(ux);
__entry->u_pblk = ext4_ext_pblock(ux);
__entry->i_lblk = le32_to_cpu(ix->ee_block);
__entry->i_len = ext4_ext_get_actual_len(ix);
__entry->i_pblk = ext4_ext_pblock(ix);
),
TP_printk("dev %d,%d ino %lu m_lblk %u m_len %u "
"u_lblk %u u_len %u u_pblk %llu "
"i_lblk %u i_len %u i_pblk %llu ",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->m_lblk, __entry->m_len,
__entry->u_lblk, __entry->u_len, __entry->u_pblk,
__entry->i_lblk, __entry->i_len, __entry->i_pblk)
);
DECLARE_EVENT_CLASS(ext4__map_blocks_enter,
TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
unsigned int len, unsigned int flags),
@ -1589,6 +1679,382 @@ DEFINE_EVENT(ext4__trim, ext4_trim_all_free,
TP_ARGS(sb, group, start, len)
);
TRACE_EVENT(ext4_ext_handle_uninitialized_extents,
TP_PROTO(struct inode *inode, struct ext4_map_blocks *map,
unsigned int allocated, ext4_fsblk_t newblock),
TP_ARGS(inode, map, allocated, newblock),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ext4_lblk_t, lblk )
__field( ext4_fsblk_t, pblk )
__field( unsigned int, len )
__field( int, flags )
__field( unsigned int, allocated )
__field( ext4_fsblk_t, newblk )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->lblk = map->m_lblk;
__entry->pblk = map->m_pblk;
__entry->len = map->m_len;
__entry->flags = map->m_flags;
__entry->allocated = allocated;
__entry->newblk = newblock;
),
TP_printk("dev %d,%d ino %lu m_lblk %u m_pblk %llu m_len %u flags %d"
"allocated %d newblock %llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned) __entry->lblk, (unsigned long long) __entry->pblk,
__entry->len, __entry->flags,
(unsigned int) __entry->allocated,
(unsigned long long) __entry->newblk)
);
TRACE_EVENT(ext4_get_implied_cluster_alloc_exit,
TP_PROTO(struct super_block *sb, struct ext4_map_blocks *map, int ret),
TP_ARGS(sb, map, ret),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ext4_lblk_t, lblk )
__field( ext4_fsblk_t, pblk )
__field( unsigned int, len )
__field( unsigned int, flags )
__field( int, ret )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->lblk = map->m_lblk;
__entry->pblk = map->m_pblk;
__entry->len = map->m_len;
__entry->flags = map->m_flags;
__entry->ret = ret;
),
TP_printk("dev %d,%d m_lblk %u m_pblk %llu m_len %u m_flags %u ret %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->lblk, (unsigned long long) __entry->pblk,
__entry->len, __entry->flags, __entry->ret)
);
TRACE_EVENT(ext4_ext_put_in_cache,
TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len,
ext4_fsblk_t start),
TP_ARGS(inode, lblk, len, start),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ext4_lblk_t, lblk )
__field( unsigned int, len )
__field( ext4_fsblk_t, start )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->lblk = lblk;
__entry->len = len;
__entry->start = start;
),
TP_printk("dev %d,%d ino %lu lblk %u len %u start %llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned) __entry->lblk,
__entry->len,
(unsigned long long) __entry->start)
);
TRACE_EVENT(ext4_ext_in_cache,
TP_PROTO(struct inode *inode, ext4_lblk_t lblk, int ret),
TP_ARGS(inode, lblk, ret),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ext4_lblk_t, lblk )
__field( int, ret )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->lblk = lblk;
__entry->ret = ret;
),
TP_printk("dev %d,%d ino %lu lblk %u ret %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned) __entry->lblk,
__entry->ret)
);
TRACE_EVENT(ext4_find_delalloc_range,
TP_PROTO(struct inode *inode, ext4_lblk_t from, ext4_lblk_t to,
int reverse, int found, ext4_lblk_t found_blk),
TP_ARGS(inode, from, to, reverse, found, found_blk),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ext4_lblk_t, from )
__field( ext4_lblk_t, to )
__field( int, reverse )
__field( int, found )
__field( ext4_lblk_t, found_blk )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->from = from;
__entry->to = to;
__entry->reverse = reverse;
__entry->found = found;
__entry->found_blk = found_blk;
),
TP_printk("dev %d,%d ino %lu from %u to %u reverse %d found %d "
"(blk = %u)",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned) __entry->from, (unsigned) __entry->to,
__entry->reverse, __entry->found,
(unsigned) __entry->found_blk)
);
TRACE_EVENT(ext4_get_reserved_cluster_alloc,
TP_PROTO(struct inode *inode, ext4_lblk_t lblk, unsigned int len),
TP_ARGS(inode, lblk, len),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ext4_lblk_t, lblk )
__field( unsigned int, len )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->lblk = lblk;
__entry->len = len;
),
TP_printk("dev %d,%d ino %lu lblk %u len %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned) __entry->lblk,
__entry->len)
);
TRACE_EVENT(ext4_ext_show_extent,
TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
unsigned short len),
TP_ARGS(inode, lblk, pblk, len),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ext4_lblk_t, lblk )
__field( ext4_fsblk_t, pblk )
__field( unsigned short, len )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->lblk = lblk;
__entry->pblk = pblk;
__entry->len = len;
),
TP_printk("dev %d,%d ino %lu lblk %u pblk %llu len %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned) __entry->lblk,
(unsigned long long) __entry->pblk,
(unsigned short) __entry->len)
);
TRACE_EVENT(ext4_remove_blocks,
TP_PROTO(struct inode *inode, struct ext4_extent *ex,
ext4_lblk_t from, ext4_fsblk_t to,
ext4_fsblk_t partial_cluster),
TP_ARGS(inode, ex, from, to, partial_cluster),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ext4_lblk_t, ee_lblk )
__field( ext4_fsblk_t, ee_pblk )
__field( unsigned short, ee_len )
__field( ext4_lblk_t, from )
__field( ext4_lblk_t, to )
__field( ext4_fsblk_t, partial )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ee_lblk = cpu_to_le32(ex->ee_block);
__entry->ee_pblk = ext4_ext_pblock(ex);
__entry->ee_len = ext4_ext_get_actual_len(ex);
__entry->from = from;
__entry->to = to;
__entry->partial = partial_cluster;
),
TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]"
"from %u to %u partial_cluster %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned) __entry->ee_lblk,
(unsigned long long) __entry->ee_pblk,
(unsigned short) __entry->ee_len,
(unsigned) __entry->from,
(unsigned) __entry->to,
(unsigned) __entry->partial)
);
TRACE_EVENT(ext4_ext_rm_leaf,
TP_PROTO(struct inode *inode, ext4_lblk_t start,
struct ext4_extent *ex, ext4_fsblk_t partial_cluster),
TP_ARGS(inode, start, ex, partial_cluster),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ext4_lblk_t, start )
__field( ext4_lblk_t, ee_lblk )
__field( ext4_fsblk_t, ee_pblk )
__field( short, ee_len )
__field( ext4_fsblk_t, partial )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->start = start;
__entry->ee_lblk = le32_to_cpu(ex->ee_block);
__entry->ee_pblk = ext4_ext_pblock(ex);
__entry->ee_len = ext4_ext_get_actual_len(ex);
__entry->partial = partial_cluster;
),
TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]"
"partial_cluster %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned) __entry->start,
(unsigned) __entry->ee_lblk,
(unsigned long long) __entry->ee_pblk,
(unsigned short) __entry->ee_len,
(unsigned) __entry->partial)
);
TRACE_EVENT(ext4_ext_rm_idx,
TP_PROTO(struct inode *inode, ext4_fsblk_t pblk),
TP_ARGS(inode, pblk),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ext4_fsblk_t, pblk )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->pblk = pblk;
),
TP_printk("dev %d,%d ino %lu index_pblk %llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned long long) __entry->pblk)
);
TRACE_EVENT(ext4_ext_remove_space,
TP_PROTO(struct inode *inode, ext4_lblk_t start, int depth),
TP_ARGS(inode, start, depth),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ext4_lblk_t, start )
__field( int, depth )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->start = start;
__entry->depth = depth;
),
TP_printk("dev %d,%d ino %lu since %u depth %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned) __entry->start,
__entry->depth)
);
TRACE_EVENT(ext4_ext_remove_space_done,
TP_PROTO(struct inode *inode, ext4_lblk_t start, int depth,
ext4_lblk_t partial, unsigned short eh_entries),
TP_ARGS(inode, start, depth, partial, eh_entries),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ext4_lblk_t, start )
__field( int, depth )
__field( ext4_lblk_t, partial )
__field( unsigned short, eh_entries )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->start = start;
__entry->depth = depth;
__entry->partial = partial;
__entry->eh_entries = eh_entries;
),
TP_printk("dev %d,%d ino %lu since %u depth %d partial %u "
"remaining_entries %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned) __entry->start,
__entry->depth,
(unsigned) __entry->partial,
(unsigned short) __entry->eh_entries)
);
#endif /* _TRACE_EXT4_H */
/* This part must be outside protection */