for-5.8-tag

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAl7U50AACgkQxWXV+ddt
 WDtK1g//RXeNsTguYQr1N9R5eUPThjLEI0+4J0l4SYfCPU8Ou3C7nqpOEJJQgm8F
 ezZE+16cWi9U5uGueOc+w0rfyz4AuIXKgzoz+c0/GG2+yV5jp6DsAMbWqojAb96L
 V/N3HxEzR66jqwgVUBE/x5okb2SyY7//B1l/O0amc66XDO7KTMImpIwThere6zWZ
 o2SNpYpHAPQeUYJQx8h+FAW3w1CxrCZmnifazU9Jqe9J7QeQLg7rbUlJDV38jySm
 ZOA8ohKN9U1gPZy+dTU3kdyyuBIq1etkIaSPJANyTo5TczPKiC0IMg75cXtS4ae/
 NSxhccMpSIjVMcIHARzSFGYKNP3sGNRsmaTUg/2Cx/9GoHOhYMiCAVc8qtBBpwJO
 UI0siexrCe64RuTBMRRc128GdFv7IjmSImcdi8xaR62bCcUiNdEa3zvjRe/9tOEH
 ET7Z85oBnKpSzpC3MdhSUU4dtHY5XLawP8z3oUU1VSzSWM2DVjlHf79/VzbOfp18
 miCVpt94lCn/gUX7el6qcnbuvMAjDyeC6HmfD+TwzQgGwyV6TLgKN9lRXeH/Oy6/
 VgjGQSavGHMll3zIGURmrBCXKudjJg0J+IP4wN1TimmSEMfwKH+7tnekQd8y5qlF
 eXEIqlWNykKeDzEnmV9QJy+/cV83hVWM/mUslcTx39tLN/3B/Us=
 =qTt8
 -----END PGP SIGNATURE-----

Merge tag 'for-5.8-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "Highlights:

   - speedup dead root detection during orphan cleanup, eg. when there
     are many deleted subvolumes waiting to be cleaned, the trees are
     now looked up in radix tree instead of a O(N^2) search

   - snapshot creation with inherited qgroup will mark the qgroup
     inconsistent, requires a rescan

   - send will emit file capabilities after chown, this produces a
     stream that does not need postprocessing to set the capabilities
     again

   - direct io ported to iomap infrastructure, cleaned up and simplified
     code, notably removing last use of struct buffer_head in btrfs code

  Core changes:

   - factor out backreference iteration, to be used by ordinary
     backreferences and relocation code

   - improved global block reserve utilization
      * better logic to serialize requests
      * increased maximum available for unlink
      * improved handling on large pages (64K)

   - direct io cleanups and fixes
      * simplify layering, where cloned bios were unnecessarily created
        for some cases
      * error handling fixes (submit, endio)
      * remove repair worker thread, used to avoid deadlocks during
        repair

   - refactored block group reading code, preparatory work for new type
     of block group storage that should improve mount time on large
     filesystems

  Cleanups:

   - cleaned up (and slightly sped up) set/get helpers for metadata data
     structure members

   - root bit REF_COWS got renamed to SHAREABLE to reflect the that the
     blocks of the tree get shared either among subvolumes or with the
     relocation trees

  Fixes:

   - when subvolume deletion fails due to ENOSPC, the filesystem is not
     turned read-only

   - device scan deals with devices from other filesystems that changed
     ownership due to overwrite (mkfs)

   - fix a race between scrub and block group removal/allocation

   - fix long standing bug of a runaway balance operation, printing the
     same line to the syslog, caused by a stale status bit on a reloc
     tree that prevented progress

   - fix corrupt log due to concurrent fsync of inodes with shared
     extents

   - fix space underflow for NODATACOW and buffered writes when it for
     some reason needs to fallback to COW mode"

* tag 'for-5.8-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (133 commits)
  btrfs: fix space_info bytes_may_use underflow during space cache writeout
  btrfs: fix space_info bytes_may_use underflow after nocow buffered write
  btrfs: fix wrong file range cleanup after an error filling dealloc range
  btrfs: remove redundant local variable in read_block_for_search
  btrfs: open code key_search
  btrfs: split btrfs_direct_IO to read and write part
  btrfs: remove BTRFS_INODE_READDIO_NEED_LOCK
  fs: remove dio_end_io()
  btrfs: switch to iomap_dio_rw() for dio
  iomap: remove lockdep_assert_held()
  iomap: add a filesystem hook for direct I/O bio submission
  fs: export generic_file_buffered_read()
  btrfs: turn space cache writeout failure messages into debug messages
  btrfs: include error on messages about failure to write space/inode caches
  btrfs: remove useless 'fail_unlock' label from btrfs_csum_file_blocks()
  btrfs: do not ignore error from btrfs_next_leaf() when inserting checksums
  btrfs: make checksum item extension more efficient
  btrfs: fix corrupt log due to concurrent fsync of inodes with shared extents
  btrfs: unexport btrfs_compress_set_level()
  btrfs: simplify iget helpers
  ...
This commit is contained in:
Linus Torvalds 2020-06-02 19:59:25 -07:00
commit f3cdc8ae11
52 changed files with 3224 additions and 3045 deletions

View file

@ -80,6 +80,7 @@ ForEachMacros:
- 'ax25_uid_for_each'
- '__bio_for_each_bvec'
- 'bio_for_each_bvec'
- 'bio_for_each_bvec_all'
- 'bio_for_each_integrity_vec'
- '__bio_for_each_segment'
- 'bio_for_each_segment'

View file

@ -129,6 +129,7 @@ Usage of helpers:
::
bio_for_each_segment_all()
bio_for_each_bvec_all()
bio_first_bvec_all()
bio_first_page_all()
bio_last_bvec_all()
@ -143,4 +144,5 @@ Usage of helpers:
bio_vec' will contain a multi-page IO vector during the iteration::
bio_for_each_bvec()
bio_for_each_bvec_all()
rq_for_each_bvec()

View file

@ -14,6 +14,7 @@ config BTRFS_FS
select LZO_DECOMPRESS
select ZSTD_COMPRESS
select ZSTD_DECOMPRESS
select FS_IOMAP
select RAID6_PQ
select XOR_BLOCKS
select SRCU

View file

@ -13,6 +13,7 @@
#include "transaction.h"
#include "delayed-ref.h"
#include "locking.h"
#include "misc.h"
/* Just an arbitrary number so we can be sure this happened */
#define BACKREF_FOUND_SHARED 6
@ -537,18 +538,13 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
const u64 *extent_item_pos, bool ignore_offset)
{
struct btrfs_root *root;
struct btrfs_key root_key;
struct extent_buffer *eb;
int ret = 0;
int root_level;
int level = ref->level;
struct btrfs_key search_key = ref->key_for_search;
root_key.objectid = ref->root_id;
root_key.type = BTRFS_ROOT_ITEM_KEY;
root_key.offset = (u64)-1;
root = btrfs_get_fs_root(fs_info, &root_key, false);
root = btrfs_get_fs_root(fs_info, ref->root_id, false);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out_free;
@ -2295,3 +2291,832 @@ void free_ipath(struct inode_fs_paths *ipath)
kvfree(ipath->fspath);
kfree(ipath);
}
struct btrfs_backref_iter *btrfs_backref_iter_alloc(
struct btrfs_fs_info *fs_info, gfp_t gfp_flag)
{
struct btrfs_backref_iter *ret;
ret = kzalloc(sizeof(*ret), gfp_flag);
if (!ret)
return NULL;
ret->path = btrfs_alloc_path();
if (!ret) {
kfree(ret);
return NULL;
}
/* Current backref iterator only supports iteration in commit root */
ret->path->search_commit_root = 1;
ret->path->skip_locking = 1;
ret->fs_info = fs_info;
return ret;
}
int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr)
{
struct btrfs_fs_info *fs_info = iter->fs_info;
struct btrfs_path *path = iter->path;
struct btrfs_extent_item *ei;
struct btrfs_key key;
int ret;
key.objectid = bytenr;
key.type = BTRFS_METADATA_ITEM_KEY;
key.offset = (u64)-1;
iter->bytenr = bytenr;
ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
if (ret < 0)
return ret;
if (ret == 0) {
ret = -EUCLEAN;
goto release;
}
if (path->slots[0] == 0) {
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
ret = -EUCLEAN;
goto release;
}
path->slots[0]--;
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if ((key.type != BTRFS_EXTENT_ITEM_KEY &&
key.type != BTRFS_METADATA_ITEM_KEY) || key.objectid != bytenr) {
ret = -ENOENT;
goto release;
}
memcpy(&iter->cur_key, &key, sizeof(key));
iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0],
path->slots[0]);
iter->end_ptr = (u32)(iter->item_ptr +
btrfs_item_size_nr(path->nodes[0], path->slots[0]));
ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_extent_item);
/*
* Only support iteration on tree backref yet.
*
* This is an extra precaution for non skinny-metadata, where
* EXTENT_ITEM is also used for tree blocks, that we can only use
* extent flags to determine if it's a tree block.
*/
if (btrfs_extent_flags(path->nodes[0], ei) & BTRFS_EXTENT_FLAG_DATA) {
ret = -ENOTSUPP;
goto release;
}
iter->cur_ptr = (u32)(iter->item_ptr + sizeof(*ei));
/* If there is no inline backref, go search for keyed backref */
if (iter->cur_ptr >= iter->end_ptr) {
ret = btrfs_next_item(fs_info->extent_root, path);
/* No inline nor keyed ref */
if (ret > 0) {
ret = -ENOENT;
goto release;
}
if (ret < 0)
goto release;
btrfs_item_key_to_cpu(path->nodes[0], &iter->cur_key,
path->slots[0]);
if (iter->cur_key.objectid != bytenr ||
(iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY)) {
ret = -ENOENT;
goto release;
}
iter->cur_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0],
path->slots[0]);
iter->item_ptr = iter->cur_ptr;
iter->end_ptr = (u32)(iter->item_ptr + btrfs_item_size_nr(
path->nodes[0], path->slots[0]));
}
return 0;
release:
btrfs_backref_iter_release(iter);
return ret;
}
/*
* Go to the next backref item of current bytenr, can be either inlined or
* keyed.
*
* Caller needs to check whether it's inline ref or not by iter->cur_key.
*
* Return 0 if we get next backref without problem.
* Return >0 if there is no extra backref for this bytenr.
* Return <0 if there is something wrong happened.
*/
int btrfs_backref_iter_next(struct btrfs_backref_iter *iter)
{
struct extent_buffer *eb = btrfs_backref_get_eb(iter);
struct btrfs_path *path = iter->path;
struct btrfs_extent_inline_ref *iref;
int ret;
u32 size;
if (btrfs_backref_iter_is_inline_ref(iter)) {
/* We're still inside the inline refs */
ASSERT(iter->cur_ptr < iter->end_ptr);
if (btrfs_backref_has_tree_block_info(iter)) {
/* First tree block info */
size = sizeof(struct btrfs_tree_block_info);
} else {
/* Use inline ref type to determine the size */
int type;
iref = (struct btrfs_extent_inline_ref *)
((unsigned long)iter->cur_ptr);
type = btrfs_extent_inline_ref_type(eb, iref);
size = btrfs_extent_inline_ref_size(type);
}
iter->cur_ptr += size;
if (iter->cur_ptr < iter->end_ptr)
return 0;
/* All inline items iterated, fall through */
}
/* We're at keyed items, there is no inline item, go to the next one */
ret = btrfs_next_item(iter->fs_info->extent_root, iter->path);
if (ret)
return ret;
btrfs_item_key_to_cpu(path->nodes[0], &iter->cur_key, path->slots[0]);
if (iter->cur_key.objectid != iter->bytenr ||
(iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY))
return 1;
iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0],
path->slots[0]);
iter->cur_ptr = iter->item_ptr;
iter->end_ptr = iter->item_ptr + (u32)btrfs_item_size_nr(path->nodes[0],
path->slots[0]);
return 0;
}
void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info,
struct btrfs_backref_cache *cache, int is_reloc)
{
int i;
cache->rb_root = RB_ROOT;
for (i = 0; i < BTRFS_MAX_LEVEL; i++)
INIT_LIST_HEAD(&cache->pending[i]);
INIT_LIST_HEAD(&cache->changed);
INIT_LIST_HEAD(&cache->detached);
INIT_LIST_HEAD(&cache->leaves);
INIT_LIST_HEAD(&cache->pending_edge);
INIT_LIST_HEAD(&cache->useless_node);
cache->fs_info = fs_info;
cache->is_reloc = is_reloc;
}
struct btrfs_backref_node *btrfs_backref_alloc_node(
struct btrfs_backref_cache *cache, u64 bytenr, int level)
{
struct btrfs_backref_node *node;
ASSERT(level >= 0 && level < BTRFS_MAX_LEVEL);
node = kzalloc(sizeof(*node), GFP_NOFS);
if (!node)
return node;
INIT_LIST_HEAD(&node->list);
INIT_LIST_HEAD(&node->upper);
INIT_LIST_HEAD(&node->lower);
RB_CLEAR_NODE(&node->rb_node);
cache->nr_nodes++;
node->level = level;
node->bytenr = bytenr;
return node;
}
struct btrfs_backref_edge *btrfs_backref_alloc_edge(
struct btrfs_backref_cache *cache)
{
struct btrfs_backref_edge *edge;
edge = kzalloc(sizeof(*edge), GFP_NOFS);
if (edge)
cache->nr_edges++;
return edge;
}
/*
* Drop the backref node from cache, also cleaning up all its
* upper edges and any uncached nodes in the path.
*
* This cleanup happens bottom up, thus the node should either
* be the lowest node in the cache or a detached node.
*/
void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
struct btrfs_backref_node *node)
{
struct btrfs_backref_node *upper;
struct btrfs_backref_edge *edge;
if (!node)
return;
BUG_ON(!node->lowest && !node->detached);
while (!list_empty(&node->upper)) {
edge = list_entry(node->upper.next, struct btrfs_backref_edge,
list[LOWER]);
upper = edge->node[UPPER];
list_del(&edge->list[LOWER]);
list_del(&edge->list[UPPER]);
btrfs_backref_free_edge(cache, edge);
if (RB_EMPTY_NODE(&upper->rb_node)) {
BUG_ON(!list_empty(&node->upper));
btrfs_backref_drop_node(cache, node);
node = upper;
node->lowest = 1;
continue;
}
/*
* Add the node to leaf node list if no other child block
* cached.
*/
if (list_empty(&upper->lower)) {
list_add_tail(&upper->lower, &cache->leaves);
upper->lowest = 1;
}
}
btrfs_backref_drop_node(cache, node);
}
/*
* Release all nodes/edges from current cache
*/
void btrfs_backref_release_cache(struct btrfs_backref_cache *cache)
{
struct btrfs_backref_node *node;
int i;
while (!list_empty(&cache->detached)) {
node = list_entry(cache->detached.next,
struct btrfs_backref_node, list);
btrfs_backref_cleanup_node(cache, node);
}
while (!list_empty(&cache->leaves)) {
node = list_entry(cache->leaves.next,
struct btrfs_backref_node, lower);
btrfs_backref_cleanup_node(cache, node);
}
cache->last_trans = 0;
for (i = 0; i < BTRFS_MAX_LEVEL; i++)
ASSERT(list_empty(&cache->pending[i]));
ASSERT(list_empty(&cache->pending_edge));
ASSERT(list_empty(&cache->useless_node));
ASSERT(list_empty(&cache->changed));
ASSERT(list_empty(&cache->detached));
ASSERT(RB_EMPTY_ROOT(&cache->rb_root));
ASSERT(!cache->nr_nodes);
ASSERT(!cache->nr_edges);
}
/*
* Handle direct tree backref
*
* Direct tree backref means, the backref item shows its parent bytenr
* directly. This is for SHARED_BLOCK_REF backref (keyed or inlined).
*
* @ref_key: The converted backref key.
* For keyed backref, it's the item key.
* For inlined backref, objectid is the bytenr,
* type is btrfs_inline_ref_type, offset is
* btrfs_inline_ref_offset.
*/
static int handle_direct_tree_backref(struct btrfs_backref_cache *cache,
struct btrfs_key *ref_key,
struct btrfs_backref_node *cur)
{
struct btrfs_backref_edge *edge;
struct btrfs_backref_node *upper;
struct rb_node *rb_node;
ASSERT(ref_key->type == BTRFS_SHARED_BLOCK_REF_KEY);
/* Only reloc root uses backref pointing to itself */
if (ref_key->objectid == ref_key->offset) {
struct btrfs_root *root;
cur->is_reloc_root = 1;
/* Only reloc backref cache cares about a specific root */
if (cache->is_reloc) {
root = find_reloc_root(cache->fs_info, cur->bytenr);
if (WARN_ON(!root))
return -ENOENT;
cur->root = root;
} else {
/*
* For generic purpose backref cache, reloc root node
* is useless.
*/
list_add(&cur->list, &cache->useless_node);
}
return 0;
}
edge = btrfs_backref_alloc_edge(cache);
if (!edge)
return -ENOMEM;
rb_node = rb_simple_search(&cache->rb_root, ref_key->offset);
if (!rb_node) {
/* Parent node not yet cached */
upper = btrfs_backref_alloc_node(cache, ref_key->offset,
cur->level + 1);
if (!upper) {
btrfs_backref_free_edge(cache, edge);
return -ENOMEM;
}
/*
* Backrefs for the upper level block isn't cached, add the
* block to pending list
*/
list_add_tail(&edge->list[UPPER], &cache->pending_edge);
} else {
/* Parent node already cached */
upper = rb_entry(rb_node, struct btrfs_backref_node, rb_node);
ASSERT(upper->checked);
INIT_LIST_HEAD(&edge->list[UPPER]);
}
btrfs_backref_link_edge(edge, cur, upper, LINK_LOWER);
return 0;
}
/*
* Handle indirect tree backref
*
* Indirect tree backref means, we only know which tree the node belongs to.
* We still need to do a tree search to find out the parents. This is for
* TREE_BLOCK_REF backref (keyed or inlined).
*
* @ref_key: The same as @ref_key in handle_direct_tree_backref()
* @tree_key: The first key of this tree block.
* @path: A clean (released) path, to avoid allocating path everytime
* the function get called.
*/
static int handle_indirect_tree_backref(struct btrfs_backref_cache *cache,
struct btrfs_path *path,
struct btrfs_key *ref_key,
struct btrfs_key *tree_key,
struct btrfs_backref_node *cur)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_backref_node *upper;
struct btrfs_backref_node *lower;
struct btrfs_backref_edge *edge;
struct extent_buffer *eb;
struct btrfs_root *root;
struct rb_node *rb_node;
int level;
bool need_check = true;
int ret;
root = btrfs_get_fs_root(fs_info, ref_key->offset, false);
if (IS_ERR(root))
return PTR_ERR(root);
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
cur->cowonly = 1;
if (btrfs_root_level(&root->root_item) == cur->level) {
/* Tree root */
ASSERT(btrfs_root_bytenr(&root->root_item) == cur->bytenr);
/*
* For reloc backref cache, we may ignore reloc root. But for
* general purpose backref cache, we can't rely on
* btrfs_should_ignore_reloc_root() as it may conflict with
* current running relocation and lead to missing root.
*
* For general purpose backref cache, reloc root detection is
* completely relying on direct backref (key->offset is parent
* bytenr), thus only do such check for reloc cache.
*/
if (btrfs_should_ignore_reloc_root(root) && cache->is_reloc) {
btrfs_put_root(root);
list_add(&cur->list, &cache->useless_node);
} else {
cur->root = root;
}
return 0;
}
level = cur->level + 1;
/* Search the tree to find parent blocks referring to the block */
path->search_commit_root = 1;
path->skip_locking = 1;
path->lowest_level = level;
ret = btrfs_search_slot(NULL, root, tree_key, path, 0, 0);
path->lowest_level = 0;
if (ret < 0) {
btrfs_put_root(root);
return ret;
}
if (ret > 0 && path->slots[level] > 0)
path->slots[level]--;
eb = path->nodes[level];
if (btrfs_node_blockptr(eb, path->slots[level]) != cur->bytenr) {
btrfs_err(fs_info,
"couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)",
cur->bytenr, level - 1, root->root_key.objectid,
tree_key->objectid, tree_key->type, tree_key->offset);
btrfs_put_root(root);
ret = -ENOENT;
goto out;
}
lower = cur;
/* Add all nodes and edges in the path */
for (; level < BTRFS_MAX_LEVEL; level++) {
if (!path->nodes[level]) {
ASSERT(btrfs_root_bytenr(&root->root_item) ==
lower->bytenr);
/* Same as previous should_ignore_reloc_root() call */
if (btrfs_should_ignore_reloc_root(root) &&
cache->is_reloc) {
btrfs_put_root(root);
list_add(&lower->list, &cache->useless_node);
} else {
lower->root = root;
}
break;
}
edge = btrfs_backref_alloc_edge(cache);
if (!edge) {
btrfs_put_root(root);
ret = -ENOMEM;
goto out;
}
eb = path->nodes[level];
rb_node = rb_simple_search(&cache->rb_root, eb->start);
if (!rb_node) {
upper = btrfs_backref_alloc_node(cache, eb->start,
lower->level + 1);
if (!upper) {
btrfs_put_root(root);
btrfs_backref_free_edge(cache, edge);
ret = -ENOMEM;
goto out;
}
upper->owner = btrfs_header_owner(eb);
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
upper->cowonly = 1;
/*
* If we know the block isn't shared we can avoid
* checking its backrefs.
*/
if (btrfs_block_can_be_shared(root, eb))
upper->checked = 0;
else
upper->checked = 1;
/*
* Add the block to pending list if we need to check its
* backrefs, we only do this once while walking up a
* tree as we will catch anything else later on.
*/
if (!upper->checked && need_check) {
need_check = false;
list_add_tail(&edge->list[UPPER],
&cache->pending_edge);
} else {
if (upper->checked)
need_check = true;
INIT_LIST_HEAD(&edge->list[UPPER]);
}
} else {
upper = rb_entry(rb_node, struct btrfs_backref_node,
rb_node);
ASSERT(upper->checked);
INIT_LIST_HEAD(&edge->list[UPPER]);
if (!upper->owner)
upper->owner = btrfs_header_owner(eb);
}
btrfs_backref_link_edge(edge, lower, upper, LINK_LOWER);
if (rb_node) {
btrfs_put_root(root);
break;
}
lower = upper;
upper = NULL;
}
out:
btrfs_release_path(path);
return ret;
}
/*
* Add backref node @cur into @cache.
*
* NOTE: Even if the function returned 0, @cur is not yet cached as its upper
* links aren't yet bi-directional. Needs to finish such links.
* Use btrfs_backref_finish_upper_links() to finish such linkage.
*
* @path: Released path for indirect tree backref lookup
* @iter: Released backref iter for extent tree search
* @node_key: The first key of the tree block
*/
int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache,
struct btrfs_path *path,
struct btrfs_backref_iter *iter,
struct btrfs_key *node_key,
struct btrfs_backref_node *cur)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_backref_edge *edge;
struct btrfs_backref_node *exist;
int ret;
ret = btrfs_backref_iter_start(iter, cur->bytenr);
if (ret < 0)
return ret;
/*
* We skip the first btrfs_tree_block_info, as we don't use the key
* stored in it, but fetch it from the tree block
*/
if (btrfs_backref_has_tree_block_info(iter)) {
ret = btrfs_backref_iter_next(iter);
if (ret < 0)
goto out;
/* No extra backref? This means the tree block is corrupted */
if (ret > 0) {
ret = -EUCLEAN;
goto out;
}
}
WARN_ON(cur->checked);
if (!list_empty(&cur->upper)) {
/*
* The backref was added previously when processing backref of
* type BTRFS_TREE_BLOCK_REF_KEY
*/
ASSERT(list_is_singular(&cur->upper));
edge = list_entry(cur->upper.next, struct btrfs_backref_edge,
list[LOWER]);
ASSERT(list_empty(&edge->list[UPPER]));
exist = edge->node[UPPER];
/*
* Add the upper level block to pending list if we need check
* its backrefs
*/
if (!exist->checked)
list_add_tail(&edge->list[UPPER], &cache->pending_edge);
} else {
exist = NULL;
}
for (; ret == 0; ret = btrfs_backref_iter_next(iter)) {
struct extent_buffer *eb;
struct btrfs_key key;
int type;
cond_resched();
eb = btrfs_backref_get_eb(iter);
key.objectid = iter->bytenr;
if (btrfs_backref_iter_is_inline_ref(iter)) {
struct btrfs_extent_inline_ref *iref;
/* Update key for inline backref */
iref = (struct btrfs_extent_inline_ref *)
((unsigned long)iter->cur_ptr);
type = btrfs_get_extent_inline_ref_type(eb, iref,
BTRFS_REF_TYPE_BLOCK);
if (type == BTRFS_REF_TYPE_INVALID) {
ret = -EUCLEAN;
goto out;
}
key.type = type;
key.offset = btrfs_extent_inline_ref_offset(eb, iref);
} else {
key.type = iter->cur_key.type;
key.offset = iter->cur_key.offset;
}
/*
* Parent node found and matches current inline ref, no need to
* rebuild this node for this inline ref
*/
if (exist &&
((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
exist->owner == key.offset) ||
(key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
exist->bytenr == key.offset))) {
exist = NULL;
continue;
}
/* SHARED_BLOCK_REF means key.offset is the parent bytenr */
if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
ret = handle_direct_tree_backref(cache, &key, cur);
if (ret < 0)
goto out;
continue;
} else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
ret = -EINVAL;
btrfs_print_v0_err(fs_info);
btrfs_handle_fs_error(fs_info, ret, NULL);
goto out;
} else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
continue;
}
/*
* key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref offset
* means the root objectid. We need to search the tree to get
* its parent bytenr.
*/
ret = handle_indirect_tree_backref(cache, path, &key, node_key,
cur);
if (ret < 0)
goto out;
}
ret = 0;
cur->checked = 1;
WARN_ON(exist);
out:
btrfs_backref_iter_release(iter);
return ret;
}
/*
* Finish the upwards linkage created by btrfs_backref_add_tree_node()
*/
int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
struct btrfs_backref_node *start)
{
struct list_head *useless_node = &cache->useless_node;
struct btrfs_backref_edge *edge;
struct rb_node *rb_node;
LIST_HEAD(pending_edge);
ASSERT(start->checked);
/* Insert this node to cache if it's not COW-only */
if (!start->cowonly) {
rb_node = rb_simple_insert(&cache->rb_root, start->bytenr,
&start->rb_node);
if (rb_node)
btrfs_backref_panic(cache->fs_info, start->bytenr,
-EEXIST);
list_add_tail(&start->lower, &cache->leaves);
}
/*
* Use breadth first search to iterate all related edges.
*
* The starting points are all the edges of this node
*/
list_for_each_entry(edge, &start->upper, list[LOWER])
list_add_tail(&edge->list[UPPER], &pending_edge);
while (!list_empty(&pending_edge)) {
struct btrfs_backref_node *upper;
struct btrfs_backref_node *lower;
struct rb_node *rb_node;
edge = list_first_entry(&pending_edge,
struct btrfs_backref_edge, list[UPPER]);
list_del_init(&edge->list[UPPER]);
upper = edge->node[UPPER];
lower = edge->node[LOWER];
/* Parent is detached, no need to keep any edges */
if (upper->detached) {
list_del(&edge->list[LOWER]);
btrfs_backref_free_edge(cache, edge);
/* Lower node is orphan, queue for cleanup */
if (list_empty(&lower->upper))
list_add(&lower->list, useless_node);
continue;
}
/*
* All new nodes added in current build_backref_tree() haven't
* been linked to the cache rb tree.
* So if we have upper->rb_node populated, this means a cache
* hit. We only need to link the edge, as @upper and all its
* parents have already been linked.
*/
if (!RB_EMPTY_NODE(&upper->rb_node)) {
if (upper->lowest) {
list_del_init(&upper->lower);
upper->lowest = 0;
}
list_add_tail(&edge->list[UPPER], &upper->lower);
continue;
}
/* Sanity check, we shouldn't have any unchecked nodes */
if (!upper->checked) {
ASSERT(0);
return -EUCLEAN;
}
/* Sanity check, COW-only node has non-COW-only parent */
if (start->cowonly != upper->cowonly) {
ASSERT(0);
return -EUCLEAN;
}
/* Only cache non-COW-only (subvolume trees) tree blocks */
if (!upper->cowonly) {
rb_node = rb_simple_insert(&cache->rb_root, upper->bytenr,
&upper->rb_node);
if (rb_node) {
btrfs_backref_panic(cache->fs_info,
upper->bytenr, -EEXIST);
return -EUCLEAN;
}
}
list_add_tail(&edge->list[UPPER], &upper->lower);
/*
* Also queue all the parent edges of this uncached node
* to finish the upper linkage
*/
list_for_each_entry(edge, &upper->upper, list[LOWER])
list_add_tail(&edge->list[UPPER], &pending_edge);
}
return 0;
}
void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache,
struct btrfs_backref_node *node)
{
struct btrfs_backref_node *lower;
struct btrfs_backref_node *upper;
struct btrfs_backref_edge *edge;
while (!list_empty(&cache->useless_node)) {
lower = list_first_entry(&cache->useless_node,
struct btrfs_backref_node, list);
list_del_init(&lower->list);
}
while (!list_empty(&cache->pending_edge)) {
edge = list_first_entry(&cache->pending_edge,
struct btrfs_backref_edge, list[UPPER]);
list_del(&edge->list[UPPER]);
list_del(&edge->list[LOWER]);
lower = edge->node[LOWER];
upper = edge->node[UPPER];
btrfs_backref_free_edge(cache, edge);
/*
* Lower is no longer linked to any upper backref nodes and
* isn't in the cache, we can free it ourselves.
*/
if (list_empty(&lower->upper) &&
RB_EMPTY_NODE(&lower->rb_node))
list_add(&lower->list, &cache->useless_node);
if (!RB_EMPTY_NODE(&upper->rb_node))
continue;
/* Add this guy's upper edges to the list to process */
list_for_each_entry(edge, &upper->upper, list[LOWER])
list_add_tail(&edge->list[UPPER],
&cache->pending_edge);
if (list_empty(&upper->upper))
list_add(&upper->list, &cache->useless_node);
}
while (!list_empty(&cache->useless_node)) {
lower = list_first_entry(&cache->useless_node,
struct btrfs_backref_node, list);
list_del_init(&lower->list);
if (lower == node)
node = NULL;
btrfs_backref_free_node(cache, lower);
}
btrfs_backref_cleanup_node(cache, node);
ASSERT(list_empty(&cache->useless_node) &&
list_empty(&cache->pending_edge));
}

View file

@ -8,6 +8,7 @@
#include <linux/btrfs.h>
#include "ulist.h"
#include "disk-io.h"
#include "extent_io.h"
struct inode_fs_paths {
@ -78,4 +79,300 @@ struct prelim_ref {
u64 wanted_disk_byte;
};
/*
* Iterate backrefs of one extent.
*
* Now it only supports iteration of tree block in commit root.
*/
struct btrfs_backref_iter {
u64 bytenr;
struct btrfs_path *path;
struct btrfs_fs_info *fs_info;
struct btrfs_key cur_key;
u32 item_ptr;
u32 cur_ptr;
u32 end_ptr;
};
struct btrfs_backref_iter *btrfs_backref_iter_alloc(
struct btrfs_fs_info *fs_info, gfp_t gfp_flag);
static inline void btrfs_backref_iter_free(struct btrfs_backref_iter *iter)
{
if (!iter)
return;
btrfs_free_path(iter->path);
kfree(iter);
}
static inline struct extent_buffer *btrfs_backref_get_eb(
struct btrfs_backref_iter *iter)
{
if (!iter)
return NULL;
return iter->path->nodes[0];
}
/*
* For metadata with EXTENT_ITEM key (non-skinny) case, the first inline data
* is btrfs_tree_block_info, without a btrfs_extent_inline_ref header.
*
* This helper determines if that's the case.
*/
static inline bool btrfs_backref_has_tree_block_info(
struct btrfs_backref_iter *iter)
{
if (iter->cur_key.type == BTRFS_EXTENT_ITEM_KEY &&
iter->cur_ptr - iter->item_ptr == sizeof(struct btrfs_extent_item))
return true;
return false;
}
int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr);
int btrfs_backref_iter_next(struct btrfs_backref_iter *iter);
static inline bool btrfs_backref_iter_is_inline_ref(
struct btrfs_backref_iter *iter)
{
if (iter->cur_key.type == BTRFS_EXTENT_ITEM_KEY ||
iter->cur_key.type == BTRFS_METADATA_ITEM_KEY)
return true;
return false;
}
static inline void btrfs_backref_iter_release(struct btrfs_backref_iter *iter)
{
iter->bytenr = 0;
iter->item_ptr = 0;
iter->cur_ptr = 0;
iter->end_ptr = 0;
btrfs_release_path(iter->path);
memset(&iter->cur_key, 0, sizeof(iter->cur_key));
}
/*
* Backref cache related structures
*
* The whole objective of backref_cache is to build a bi-directional map
* of tree blocks (represented by backref_node) and all their parents.
*/
/*
* Represent a tree block in the backref cache
*/
struct btrfs_backref_node {
struct {
struct rb_node rb_node;
u64 bytenr;
}; /* Use rb_simple_node for search/insert */
u64 new_bytenr;
/* Objectid of tree block owner, can be not uptodate */
u64 owner;
/* Link to pending, changed or detached list */
struct list_head list;
/* List of upper level edges, which link this node to its parents */
struct list_head upper;
/* List of lower level edges, which link this node to its children */
struct list_head lower;
/* NULL if this node is not tree root */
struct btrfs_root *root;
/* Extent buffer got by COWing the block */
struct extent_buffer *eb;
/* Level of the tree block */
unsigned int level:8;
/* Is the block in a non-shareable tree */
unsigned int cowonly:1;
/* 1 if no child node is in the cache */
unsigned int lowest:1;
/* Is the extent buffer locked */
unsigned int locked:1;
/* Has the block been processed */
unsigned int processed:1;
/* Have backrefs of this block been checked */
unsigned int checked:1;
/*
* 1 if corresponding block has been COWed but some upper level block
* pointers may not point to the new location
*/
unsigned int pending:1;
/* 1 if the backref node isn't connected to any other backref node */
unsigned int detached:1;
/*
* For generic purpose backref cache, where we only care if it's a reloc
* root, doesn't care the source subvolid.
*/
unsigned int is_reloc_root:1;
};
#define LOWER 0
#define UPPER 1
/*
* Represent an edge connecting upper and lower backref nodes.
*/
struct btrfs_backref_edge {
/*
* list[LOWER] is linked to btrfs_backref_node::upper of lower level
* node, and list[UPPER] is linked to btrfs_backref_node::lower of
* upper level node.
*
* Also, build_backref_tree() uses list[UPPER] for pending edges, before
* linking list[UPPER] to its upper level nodes.
*/
struct list_head list[2];
/* Two related nodes */
struct btrfs_backref_node *node[2];
};
struct btrfs_backref_cache {
/* Red black tree of all backref nodes in the cache */
struct rb_root rb_root;
/* For passing backref nodes to btrfs_reloc_cow_block */
struct btrfs_backref_node *path[BTRFS_MAX_LEVEL];
/*
* List of blocks that have been COWed but some block pointers in upper
* level blocks may not reflect the new location
*/
struct list_head pending[BTRFS_MAX_LEVEL];
/* List of backref nodes with no child node */
struct list_head leaves;
/* List of blocks that have been COWed in current transaction */
struct list_head changed;
/* List of detached backref node. */
struct list_head detached;
u64 last_trans;
int nr_nodes;
int nr_edges;
/* List of unchecked backref edges during backref cache build */
struct list_head pending_edge;
/* List of useless backref nodes during backref cache build */
struct list_head useless_node;
struct btrfs_fs_info *fs_info;
/*
* Whether this cache is for relocation
*
* Reloction backref cache require more info for reloc root compared
* to generic backref cache.
*/
unsigned int is_reloc;
};
void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info,
struct btrfs_backref_cache *cache, int is_reloc);
struct btrfs_backref_node *btrfs_backref_alloc_node(
struct btrfs_backref_cache *cache, u64 bytenr, int level);
struct btrfs_backref_edge *btrfs_backref_alloc_edge(
struct btrfs_backref_cache *cache);
#define LINK_LOWER (1 << 0)
#define LINK_UPPER (1 << 1)
static inline void btrfs_backref_link_edge(struct btrfs_backref_edge *edge,
struct btrfs_backref_node *lower,
struct btrfs_backref_node *upper,
int link_which)
{
ASSERT(upper && lower && upper->level == lower->level + 1);
edge->node[LOWER] = lower;
edge->node[UPPER] = upper;
if (link_which & LINK_LOWER)
list_add_tail(&edge->list[LOWER], &lower->upper);
if (link_which & LINK_UPPER)
list_add_tail(&edge->list[UPPER], &upper->lower);
}
static inline void btrfs_backref_free_node(struct btrfs_backref_cache *cache,
struct btrfs_backref_node *node)
{
if (node) {
cache->nr_nodes--;
btrfs_put_root(node->root);
kfree(node);
}
}
static inline void btrfs_backref_free_edge(struct btrfs_backref_cache *cache,
struct btrfs_backref_edge *edge)
{
if (edge) {
cache->nr_edges--;
kfree(edge);
}
}
static inline void btrfs_backref_unlock_node_buffer(
struct btrfs_backref_node *node)
{
if (node->locked) {
btrfs_tree_unlock(node->eb);
node->locked = 0;
}
}
static inline void btrfs_backref_drop_node_buffer(
struct btrfs_backref_node *node)
{
if (node->eb) {
btrfs_backref_unlock_node_buffer(node);
free_extent_buffer(node->eb);
node->eb = NULL;
}
}
/*
* Drop the backref node from cache without cleaning up its children
* edges.
*
* This can only be called on node without parent edges.
* The children edges are still kept as is.
*/
static inline void btrfs_backref_drop_node(struct btrfs_backref_cache *tree,
struct btrfs_backref_node *node)
{
BUG_ON(!list_empty(&node->upper));
btrfs_backref_drop_node_buffer(node);
list_del(&node->list);
list_del(&node->lower);
if (!RB_EMPTY_NODE(&node->rb_node))
rb_erase(&node->rb_node, &tree->rb_root);
btrfs_backref_free_node(tree, node);
}
void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
struct btrfs_backref_node *node);
void btrfs_backref_release_cache(struct btrfs_backref_cache *cache);
static inline void btrfs_backref_panic(struct btrfs_fs_info *fs_info,
u64 bytenr, int errno)
{
btrfs_panic(fs_info, errno,
"Inconsistency in backref cache found at offset %llu",
bytenr);
}
int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache,
struct btrfs_path *path,
struct btrfs_backref_iter *iter,
struct btrfs_key *node_key,
struct btrfs_backref_node *cur);
int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
struct btrfs_backref_node *start);
void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache,
struct btrfs_backref_node *node);
#endif

View file

@ -7,7 +7,6 @@
#include "disk-io.h"
#include "free-space-cache.h"
#include "free-space-tree.h"
#include "disk-io.h"
#include "volumes.h"
#include "transaction.h"
#include "ref-verify.h"
@ -161,6 +160,8 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
struct rb_node *parent = NULL;
struct btrfs_block_group *cache;
ASSERT(block_group->length != 0);
spin_lock(&info->block_group_cache_lock);
p = &info->block_group_cache_tree.rb_node;
@ -863,11 +864,34 @@ static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
}
}
static int remove_block_group_item(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root;
struct btrfs_key key;
int ret;
root = fs_info->extent_root;
key.objectid = block_group->start;
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
key.offset = block_group->length;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0)
ret = -ENOENT;
if (ret < 0)
return ret;
ret = btrfs_del_item(trans, root, path);
return ret;
}
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
u64 group_start, struct extent_map *em)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = fs_info->extent_root;
struct btrfs_path *path;
struct btrfs_block_group *block_group;
struct btrfs_free_cluster *cluster;
@ -1065,26 +1089,25 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&block_group->space_info->lock);
key.objectid = block_group->start;
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
key.offset = block_group->length;
mutex_lock(&fs_info->chunk_mutex);
spin_lock(&block_group->lock);
block_group->removed = 1;
/*
* At this point trimming can't start on this block group, because we
* removed the block group from the tree fs_info->block_group_cache_tree
* so no one can't find it anymore and even if someone already got this
* block group before we removed it from the rbtree, they have already
* incremented block_group->trimming - if they didn't, they won't find
* any free space entries because we already removed them all when we
* called btrfs_remove_free_space_cache().
* At this point trimming or scrub can't start on this block group,
* because we removed the block group from the rbtree
* fs_info->block_group_cache_tree so no one can't find it anymore and
* even if someone already got this block group before we removed it
* from the rbtree, they have already incremented block_group->frozen -
* if they didn't, for the trimming case they won't find any free space
* entries because we already removed them all when we called
* btrfs_remove_free_space_cache().
*
* And we must not remove the extent map from the fs_info->mapping_tree
* to prevent the same logical address range and physical device space
* ranges from being reused for a new block group. This is because our
* fs trim operation (btrfs_trim_fs() / btrfs_ioctl_fitrim()) is
* ranges from being reused for a new block group. This is needed to
* avoid races with trimming and scrub.
*
* An fs trim operation (btrfs_trim_fs() / btrfs_ioctl_fitrim()) is
* completely transactionless, so while it is trimming a range the
* currently running transaction might finish and a new one start,
* allowing for new block groups to be created that can reuse the same
@ -1095,7 +1118,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* in place until the extents have been discarded completely when
* the transaction commit has completed.
*/
remove_em = (atomic_read(&block_group->trimming) == 0);
remove_em = (atomic_read(&block_group->frozen) == 0);
spin_unlock(&block_group->lock);
mutex_unlock(&fs_info->chunk_mutex);
@ -1107,16 +1130,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
/* Once for the block groups rbtree */
btrfs_put_block_group(block_group);
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0)
ret = -EIO;
ret = remove_block_group_item(trans, path, block_group);
if (ret < 0)
goto out;
ret = btrfs_del_item(trans, root, path);
if (ret)
goto out;
if (remove_em) {
struct extent_map_tree *em_tree;
@ -1175,7 +1192,7 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
free_extent_map(em);
return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root,
num_items, 1);
num_items);
}
/*
@ -1284,25 +1301,17 @@ static bool clean_pinned_extents(struct btrfs_trans_handle *trans,
ret = clear_extent_bits(&prev_trans->pinned_extents, start, end,
EXTENT_DIRTY);
if (ret)
goto err;
goto out;
}
ret = clear_extent_bits(&trans->transaction->pinned_extents, start, end,
EXTENT_DIRTY);
if (ret)
goto err;
out:
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
if (prev_trans)
btrfs_put_transaction(prev_trans);
return true;
err:
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
if (prev_trans)
btrfs_put_transaction(prev_trans);
btrfs_dec_block_group_ro(bg);
return false;
return ret == 0;
}
/*
@ -1400,8 +1409,10 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* We could have pending pinned extents for this block group,
* just delete them, we don't care about them anymore.
*/
if (!clean_pinned_extents(trans, block_group))
if (!clean_pinned_extents(trans, block_group)) {
btrfs_dec_block_group_ro(block_group);
goto end_trans;
}
/*
* At this point, the block_group is read only and should fail
@ -1450,7 +1461,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
/* Implicit trim during transaction commit. */
if (trimming)
btrfs_get_block_group_trimming(block_group);
btrfs_freeze_block_group(block_group);
/*
* Btrfs_remove_chunk will abort the transaction if things go
@ -1460,7 +1471,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
if (ret) {
if (trimming)
btrfs_put_block_group_trimming(block_group);
btrfs_unfreeze_block_group(block_group);
goto end_trans;
}
@ -1774,7 +1785,7 @@ static void link_block_group(struct btrfs_block_group *cache)
}
static struct btrfs_block_group *btrfs_create_block_group_cache(
struct btrfs_fs_info *fs_info, u64 start, u64 size)
struct btrfs_fs_info *fs_info, u64 start)
{
struct btrfs_block_group *cache;
@ -1790,7 +1801,6 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
}
cache->start = start;
cache->length = size;
cache->fs_info = fs_info;
cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
@ -1809,7 +1819,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
INIT_LIST_HEAD(&cache->dirty_list);
INIT_LIST_HEAD(&cache->io_list);
btrfs_init_free_space_ctl(cache);
atomic_set(&cache->trimming, 0);
atomic_set(&cache->frozen, 0);
mutex_init(&cache->free_space_lock);
btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
@ -1870,25 +1880,44 @@ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
return ret;
}
static int read_block_group_item(struct btrfs_block_group *cache,
struct btrfs_path *path,
const struct btrfs_key *key)
{
struct extent_buffer *leaf = path->nodes[0];
struct btrfs_block_group_item bgi;
int slot = path->slots[0];
cache->length = key->offset;
read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
sizeof(bgi));
cache->used = btrfs_stack_block_group_used(&bgi);
cache->flags = btrfs_stack_block_group_flags(&bgi);
return 0;
}
static int read_one_block_group(struct btrfs_fs_info *info,
struct btrfs_path *path,
const struct btrfs_key *key,
int need_clear)
{
struct extent_buffer *leaf = path->nodes[0];
struct btrfs_block_group *cache;
struct btrfs_space_info *space_info;
struct btrfs_block_group_item bgi;
const bool mixed = btrfs_fs_incompat(info, MIXED_GROUPS);
int slot = path->slots[0];
int ret;
ASSERT(key->type == BTRFS_BLOCK_GROUP_ITEM_KEY);
cache = btrfs_create_block_group_cache(info, key->objectid, key->offset);
cache = btrfs_create_block_group_cache(info, key->objectid);
if (!cache)
return -ENOMEM;
ret = read_block_group_item(cache, path, key);
if (ret < 0)
goto error;
if (need_clear) {
/*
* When we mount with old space cache, we need to
@ -1903,10 +1932,6 @@ static int read_one_block_group(struct btrfs_fs_info *info,
if (btrfs_test_opt(info, SPACE_CACHE))
cache->disk_cache_state = BTRFS_DC_CLEAR;
}
read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
sizeof(bgi));
cache->used = btrfs_stack_block_group_used(&bgi);
cache->flags = btrfs_stack_block_group_flags(&bgi);
if (!mixed && ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) &&
(cache->flags & BTRFS_BLOCK_GROUP_DATA))) {
btrfs_err(info,
@ -1934,15 +1959,15 @@ static int read_one_block_group(struct btrfs_fs_info *info,
* are empty, and we can just add all the space in and be done with it.
* This saves us _a_lot_ of time, particularly in the full case.
*/
if (key->offset == cache->used) {
if (cache->length == cache->used) {
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
btrfs_free_excluded_extents(cache);
} else if (cache->used == 0) {
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
add_new_free_space(cache, key->objectid,
key->objectid + key->offset);
add_new_free_space(cache, cache->start,
cache->start + cache->length);
btrfs_free_excluded_extents(cache);
}
@ -1952,7 +1977,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
goto error;
}
trace_btrfs_add_block_group(info, cache, 0);
btrfs_update_space_info(info, cache->flags, key->offset,
btrfs_update_space_info(info, cache->flags, cache->length,
cache->used, cache->bytes_super, &space_info);
cache->space_info = space_info;
@ -1991,7 +2016,6 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->reada = READA_FORWARD;
cache_gen = btrfs_super_cache_generation(info->super_copy);
if (btrfs_test_opt(info, SPACE_CACHE) &&
@ -2046,13 +2070,32 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
return ret;
}
static int insert_block_group_item(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_block_group_item bgi;
struct btrfs_root *root;
struct btrfs_key key;
spin_lock(&block_group->lock);
btrfs_set_stack_block_group_used(&bgi, block_group->used);
btrfs_set_stack_block_group_chunk_objectid(&bgi,
BTRFS_FIRST_CHUNK_TREE_OBJECTID);
btrfs_set_stack_block_group_flags(&bgi, block_group->flags);
key.objectid = block_group->start;
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
key.offset = block_group->length;
spin_unlock(&block_group->lock);
root = fs_info->extent_root;
return btrfs_insert_item(trans, root, &key, &bgi, sizeof(bgi));
}
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_block_group *block_group;
struct btrfs_root *extent_root = fs_info->extent_root;
struct btrfs_block_group_item item;
struct btrfs_key key;
int ret = 0;
if (!trans->can_flush_pending_bgs)
@ -2065,21 +2108,11 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
if (ret)
goto next;
spin_lock(&block_group->lock);
btrfs_set_stack_block_group_used(&item, block_group->used);
btrfs_set_stack_block_group_chunk_objectid(&item,
BTRFS_FIRST_CHUNK_TREE_OBJECTID);
btrfs_set_stack_block_group_flags(&item, block_group->flags);
key.objectid = block_group->start;
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
key.offset = block_group->length;
spin_unlock(&block_group->lock);
ret = btrfs_insert_item(trans, extent_root, &key, &item,
sizeof(item));
ret = insert_block_group_item(trans, block_group);
if (ret)
btrfs_abort_transaction(trans, ret);
ret = btrfs_finish_chunk_alloc(trans, key.objectid, key.offset);
ret = btrfs_finish_chunk_alloc(trans, block_group->start,
block_group->length);
if (ret)
btrfs_abort_transaction(trans, ret);
add_block_group_free_space(trans, block_group);
@ -2100,10 +2133,11 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
btrfs_set_log_full_commit(trans);
cache = btrfs_create_block_group_cache(fs_info, chunk_offset, size);
cache = btrfs_create_block_group_cache(fs_info, chunk_offset);
if (!cache)
return -ENOMEM;
cache->length = size;
cache->used = bytes_used;
cache->flags = type;
cache->last_byte_to_unpin = (u64)-1;
@ -2314,13 +2348,13 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
spin_unlock(&sinfo->lock);
}
static int write_one_cache_group(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_block_group *cache)
static int update_block_group_item(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_block_group *cache)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
struct btrfs_root *extent_root = fs_info->extent_root;
struct btrfs_root *root = fs_info->extent_root;
unsigned long bi;
struct extent_buffer *leaf;
struct btrfs_block_group_item bgi;
@ -2330,7 +2364,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
key.offset = cache->length;
ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1);
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret) {
if (ret > 0)
ret = -ENOENT;
@ -2642,7 +2676,7 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
}
}
if (!ret) {
ret = write_one_cache_group(trans, path, cache);
ret = update_block_group_item(trans, path, cache);
/*
* Our block group might still be attached to the list
* of new block groups in the transaction handle of some
@ -2791,7 +2825,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
}
}
if (!ret) {
ret = write_one_cache_group(trans, path, cache);
ret = update_block_group_item(trans, path, cache);
/*
* One of the free space endio workers might have
* created a new block group while updating a free space
@ -2808,7 +2842,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
if (ret == -ENOENT) {
wait_event(cur_trans->writer_wait,
atomic_read(&cur_trans->num_writers) == 1);
ret = write_one_cache_group(trans, path, cache);
ret = update_block_group_item(trans, path, cache);
}
if (ret)
btrfs_abort_transaction(trans, ret);
@ -3384,3 +3418,44 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
}
return 0;
}
void btrfs_freeze_block_group(struct btrfs_block_group *cache)
{
atomic_inc(&cache->frozen);
}
void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct extent_map_tree *em_tree;
struct extent_map *em;
bool cleanup;
spin_lock(&block_group->lock);
cleanup = (atomic_dec_and_test(&block_group->frozen) &&
block_group->removed);
spin_unlock(&block_group->lock);
if (cleanup) {
mutex_lock(&fs_info->chunk_mutex);
em_tree = &fs_info->mapping_tree;
write_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, block_group->start,
1);
BUG_ON(!em); /* logic error, can't happen */
remove_extent_mapping(em_tree, em);
write_unlock(&em_tree->lock);
mutex_unlock(&fs_info->chunk_mutex);
/* once for us and once for the tree */
free_extent_map(em);
free_extent_map(em);
/*
* We may have left one free space entry and other possible
* tasks trimming this block group have left 1 entry each one.
* Free them if any.
*/
__btrfs_remove_free_space_cache(block_group->free_space_ctl);
}
}

View file

@ -129,8 +129,17 @@ struct btrfs_block_group {
/* For read-only block groups */
struct list_head ro_list;
/*
* When non-zero it means the block group's logical address and its
* device extents can not be reused for future block group allocations
* until the counter goes down to 0. This is to prevent them from being
* reused while some task is still using the block group after it was
* deleted - we want to make sure they can only be reused for new block
* groups after that task is done with the deleted block group.
*/
atomic_t frozen;
/* For discard operations */
atomic_t trimming;
struct list_head discard_list;
int discard_index;
u64 discard_eligible_time;
@ -283,6 +292,9 @@ static inline int btrfs_block_group_done(struct btrfs_block_group *cache)
cache->cached == BTRFS_CACHE_ERROR;
}
void btrfs_freeze_block_group(struct btrfs_block_group *cache);
void btrfs_unfreeze_block_group(struct btrfs_block_group *cache);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
u64 physical, u64 **logical, int *naddrs, int *stripe_len);

View file

@ -5,6 +5,7 @@
#include "block-rsv.h"
#include "space-info.h"
#include "transaction.h"
#include "block-group.h"
/*
* HOW DO BLOCK RESERVES WORK
@ -405,6 +406,8 @@ void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
else
block_rsv->full = 0;
if (block_rsv->size >= sinfo->total_bytes)
sinfo->force_alloc = CHUNK_ALLOC_FORCE;
spin_unlock(&block_rsv->lock);
spin_unlock(&sinfo->lock);
}
@ -455,7 +458,7 @@ static struct btrfs_block_rsv *get_block_rsv(
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_rsv *block_rsv = NULL;
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) ||
(root == fs_info->csum_root && trans->adding_csums) ||
(root == fs_info->uuid_root))
block_rsv = trans->block_rsv;

View file

@ -7,6 +7,7 @@
#define BTRFS_INODE_H
#include <linux/hash.h>
#include <linux/refcount.h>
#include "extent_map.h"
#include "extent_io.h"
#include "ordered-data.h"
@ -27,7 +28,6 @@ enum {
BTRFS_INODE_NEEDS_FULL_SYNC,
BTRFS_INODE_COPY_EVERYTHING,
BTRFS_INODE_IN_DELALLOC_LIST,
BTRFS_INODE_READDIO_NEED_LOCK,
BTRFS_INODE_HAS_PROPS,
BTRFS_INODE_SNAPSHOT_FLUSH,
};
@ -293,53 +293,25 @@ static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
return ret;
}
#define BTRFS_DIO_ORIG_BIO_SUBMITTED 0x1
struct btrfs_dio_private {
struct inode *inode;
unsigned long flags;
u64 logical_offset;
u64 disk_bytenr;
u64 bytes;
void *private;
/* number of bios pending for this dio */
atomic_t pending_bios;
/* IO errors */
int errors;
/* orig_bio is our btrfs_io_bio */
struct bio *orig_bio;
/*
* References to this structure. There is one reference per in-flight
* bio plus one while we're still setting up.
*/
refcount_t refs;
/* dio_bio came from fs/direct-io.c */
struct bio *dio_bio;
/*
* The original bio may be split to several sub-bios, this is
* done during endio of sub-bios
*/
blk_status_t (*subio_endio)(struct inode *, struct btrfs_io_bio *,
blk_status_t);
/* Array of checksums */
u8 csums[];
};
/*
* Disable DIO read nolock optimization, so new dio readers will be forced
* to grab i_mutex. It is used to avoid the endless truncate due to
* nonlocked dio read.
*/
static inline void btrfs_inode_block_unlocked_dio(struct btrfs_inode *inode)
{
set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags);
smp_mb();
}
static inline void btrfs_inode_resume_unlocked_dio(struct btrfs_inode *inode)
{
smp_mb__before_atomic();
clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags);
}
/* Array of bytes with variable length, hexadecimal format 0x1234 */
#define CSUM_FMT "0x%*phN"
#define CSUM_FMT_VALUE(size, bytes) size, bytes

View file

@ -194,11 +194,9 @@ static int check_compressed_csum(struct btrfs_inode *inode,
for (i = 0; i < cb->nr_pages; i++) {
page = cb->compressed_pages[i];
crypto_shash_init(shash);
kaddr = kmap_atomic(page);
crypto_shash_update(shash, kaddr, PAGE_SIZE);
crypto_shash_digest(shash, kaddr, PAGE_SIZE, csum);
kunmap_atomic(kaddr);
crypto_shash_final(shash, (u8 *)&csum);
if (memcmp(&csum, cb_sum, csum_size)) {
btrfs_print_data_csum_error(inode, disk_start,
@ -1141,6 +1139,22 @@ static void put_workspace(int type, struct list_head *ws)
}
}
/*
* Adjust @level according to the limits of the compression algorithm or
* fallback to default
*/
static unsigned int btrfs_compress_set_level(int type, unsigned level)
{
const struct btrfs_compress_op *ops = btrfs_compress_op[type];
if (level == 0)
level = ops->default_level;
else
level = min(level, ops->max_level);
return level;
}
/*
* Given an address space and start and length, compress the bytes into @pages
* that are allocated on demand.
@ -1748,19 +1762,3 @@ unsigned int btrfs_compress_str2level(unsigned int type, const char *str)
return level;
}
/*
* Adjust @level according to the limits of the compression algorithm or
* fallback to default
*/
unsigned int btrfs_compress_set_level(int type, unsigned level)
{
const struct btrfs_compress_op *ops = btrfs_compress_op[type];
if (level == 0)
level = ops->default_level;
else
level = min(level, ops->max_level);
return level;
}

View file

@ -140,8 +140,6 @@ extern const struct btrfs_compress_op btrfs_zstd_compress;
const char* btrfs_compress_type2str(enum btrfs_compression_type type);
bool btrfs_compress_is_valid_type(const char *str, size_t len);
unsigned int btrfs_compress_set_level(int type, unsigned level);
int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end);
#endif

View file

@ -144,9 +144,10 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
return eb;
}
/* cowonly root (everything not a reference counted cow subvolume), just get
* put onto a simple dirty list. transaction.c walks this to make sure they
* get properly updated on disk.
/*
* Cowonly root (not-shareable trees, everything not subvolume or reloc roots),
* just get put onto a simple dirty list. Transaction walks this list to make
* sure they get properly updated on disk.
*/
static void add_root_to_dirty_list(struct btrfs_root *root)
{
@ -185,9 +186,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
int level;
struct btrfs_disk_key disk_key;
WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
trans->transid != fs_info->running_transaction->transid);
WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
trans->transid != root->last_trans);
level = btrfs_header_level(buf);
@ -826,12 +827,11 @@ int btrfs_block_can_be_shared(struct btrfs_root *root,
struct extent_buffer *buf)
{
/*
* Tree blocks not in reference counted trees and tree roots
* are never shared. If a block was allocated after the last
* snapshot and the block was not allocated by tree relocation,
* we know the block is not shared.
* Tree blocks not in shareable trees and tree roots are never shared.
* If a block was allocated after the last snapshot and the block was
* not allocated by tree relocation, we know the block is not shared.
*/
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
buf != root->node && buf != root->commit_root &&
(btrfs_header_generation(buf) <=
btrfs_root_last_snapshot(&root->root_item) ||
@ -1024,9 +1024,9 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_assert_tree_locked(buf);
WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
trans->transid != fs_info->running_transaction->transid);
WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
trans->transid != root->last_trans);
level = btrfs_header_level(buf);
@ -1065,7 +1065,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
return ret;
}
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
ret = btrfs_reloc_cow_block(trans, root, buf, cow);
if (ret) {
btrfs_abort_transaction(trans, ret);
@ -1668,15 +1668,8 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
{
int low = 0;
int high = max;
int mid;
int ret;
struct btrfs_disk_key *tmp = NULL;
struct btrfs_disk_key unaligned;
unsigned long offset;
char *kaddr = NULL;
unsigned long map_start = 0;
unsigned long map_len = 0;
int err;
const int key_size = sizeof(struct btrfs_disk_key);
if (low > high) {
btrfs_err(eb->fs_info,
@ -1687,32 +1680,26 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
}
while (low < high) {
unsigned long oip;
unsigned long offset;
struct btrfs_disk_key *tmp;
struct btrfs_disk_key unaligned;
int mid;
mid = (low + high) / 2;
offset = p + mid * item_size;
oip = offset_in_page(offset);
if (!kaddr || offset < map_start ||
(offset + sizeof(struct btrfs_disk_key)) >
map_start + map_len) {
err = map_private_extent_buffer(eb, offset,
sizeof(struct btrfs_disk_key),
&kaddr, &map_start, &map_len);
if (!err) {
tmp = (struct btrfs_disk_key *)(kaddr + offset -
map_start);
} else if (err == 1) {
read_extent_buffer(eb, &unaligned,
offset, sizeof(unaligned));
tmp = &unaligned;
} else {
return err;
}
if (oip + key_size <= PAGE_SIZE) {
const unsigned long idx = offset >> PAGE_SHIFT;
char *kaddr = page_address(eb->pages[idx]);
tmp = (struct btrfs_disk_key *)(kaddr + oip);
} else {
tmp = (struct btrfs_disk_key *)(kaddr + offset -
map_start);
read_extent_buffer(eb, &unaligned, offset, key_size);
tmp = &unaligned;
}
ret = comp_keys(tmp, key);
if (ret < 0)
@ -1733,9 +1720,9 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
* leaves vs nodes
*/
int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
int level, int *slot)
int *slot)
{
if (level == 0)
if (btrfs_header_level(eb) == 0)
return generic_bin_search(eb,
offsetof(struct btrfs_leaf, items),
sizeof(struct btrfs_item),
@ -2348,16 +2335,15 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
struct btrfs_fs_info *fs_info = root->fs_info;
u64 blocknr;
u64 gen;
struct extent_buffer *b = *eb_ret;
struct extent_buffer *tmp;
struct btrfs_key first_key;
int ret;
int parent_level;
blocknr = btrfs_node_blockptr(b, slot);
gen = btrfs_node_ptr_generation(b, slot);
parent_level = btrfs_header_level(b);
btrfs_node_key_to_cpu(b, &first_key, slot);
blocknr = btrfs_node_blockptr(*eb_ret, slot);
gen = btrfs_node_ptr_generation(*eb_ret, slot);
parent_level = btrfs_header_level(*eb_ret);
btrfs_node_key_to_cpu(*eb_ret, &first_key, slot);
tmp = find_extent_buffer(fs_info, blocknr);
if (tmp) {
@ -2501,19 +2487,6 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
return ret;
}
static int key_search(struct extent_buffer *b, const struct btrfs_key *key,
int level, int *prev_cmp, int *slot)
{
if (*prev_cmp != 0) {
*prev_cmp = btrfs_bin_search(b, key, level, slot);
return *prev_cmp;
}
*slot = 0;
return 0;
}
int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
u64 iobjectid, u64 ioff, u8 key_type,
struct btrfs_key *found_key)
@ -2783,9 +2756,23 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
}
}
ret = key_search(b, key, level, &prev_cmp, &slot);
if (ret < 0)
goto done;
/*
* If btrfs_bin_search returns an exact match (prev_cmp == 0)
* we can safely assume the target key will always be in slot 0
* on lower levels due to the invariants BTRFS' btree provides,
* namely that a btrfs_key_ptr entry always points to the
* lowest key in the child node, thus we can skip searching
* lower levels
*/
if (prev_cmp == 0) {
slot = 0;
ret = 0;
} else {
ret = btrfs_bin_search(b, key, &slot);
prev_cmp = ret;
if (ret < 0)
goto done;
}
if (level == 0) {
p->slots[level] = slot;
@ -2909,7 +2896,6 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
int level;
int lowest_unlock = 1;
u8 lowest_level = 0;
int prev_cmp = -1;
lowest_level = p->lowest_level;
WARN_ON(p->nodes[0] != NULL);
@ -2942,12 +2928,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
*/
btrfs_unlock_up_safe(p, level + 1);
/*
* Since we can unwind ebs we want to do a real search every
* time.
*/
prev_cmp = -1;
ret = key_search(b, key, level, &prev_cmp, &slot);
ret = btrfs_bin_search(b, key, &slot);
if (ret < 0)
goto done;
@ -3507,19 +3488,17 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr)
{
struct btrfs_item *start_item;
struct btrfs_item *end_item;
struct btrfs_map_token token;
int data_len;
int nritems = btrfs_header_nritems(l);
int end = min(nritems, start + nr) - 1;
if (!nr)
return 0;
btrfs_init_map_token(&token, l);
start_item = btrfs_item_nr(start);
end_item = btrfs_item_nr(end);
data_len = btrfs_token_item_offset(l, start_item, &token) +
btrfs_token_item_size(l, start_item, &token);
data_len = data_len - btrfs_token_item_offset(l, end_item, &token);
data_len = btrfs_item_offset(l, start_item) +
btrfs_item_size(l, start_item);
data_len = data_len - btrfs_item_offset(l, end_item);
data_len += sizeof(struct btrfs_item) * nr;
WARN_ON(data_len < 0);
return data_len;
@ -3650,8 +3629,8 @@ static noinline int __push_leaf_right(struct btrfs_path *path,
push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
for (i = 0; i < right_nritems; i++) {
item = btrfs_item_nr(i);
push_space -= btrfs_token_item_size(right, item, &token);
btrfs_set_token_item_offset(right, item, push_space, &token);
push_space -= btrfs_token_item_size(&token, item);
btrfs_set_token_item_offset(&token, item, push_space);
}
left_nritems -= push_items;
@ -3859,10 +3838,9 @@ static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(left, item, &token);
btrfs_set_token_item_offset(left, item,
ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size),
&token);
ioff = btrfs_token_item_offset(&token, item);
btrfs_set_token_item_offset(&token, item,
ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size));
}
btrfs_set_header_nritems(left, old_left_nritems + push_items);
@ -3892,9 +3870,8 @@ static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
for (i = 0; i < right_nritems; i++) {
item = btrfs_item_nr(i);
push_space = push_space - btrfs_token_item_size(right,
item, &token);
btrfs_set_token_item_offset(right, item, push_space, &token);
push_space = push_space - btrfs_token_item_size(&token, item);
btrfs_set_token_item_offset(&token, item, push_space);
}
btrfs_mark_buffer_dirty(left);
@ -4036,9 +4013,8 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
struct btrfs_item *item = btrfs_item_nr(i);
u32 ioff;
ioff = btrfs_token_item_offset(right, item, &token);
btrfs_set_token_item_offset(right, item,
ioff + rt_data_off, &token);
ioff = btrfs_token_item_offset(&token, item);
btrfs_set_token_item_offset(&token, item, ioff + rt_data_off);
}
btrfs_set_header_nritems(l, mid);
@ -4541,9 +4517,8 @@ void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
u32 ioff;
item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
ioff + size_diff, &token);
ioff = btrfs_token_item_offset(&token, item);
btrfs_set_token_item_offset(&token, item, ioff + size_diff);
}
/* shift the data */
@ -4640,9 +4615,8 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
u32 ioff;
item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
ioff - data_size, &token);
ioff = btrfs_token_item_offset(&token, item);
btrfs_set_token_item_offset(&token, item, ioff - data_size);
}
/* shift the data */
@ -4718,9 +4692,9 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
u32 ioff;
item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
ioff - total_data, &token);
ioff = btrfs_token_item_offset(&token, item);
btrfs_set_token_item_offset(&token, item,
ioff - total_data);
}
/* shift the items */
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
@ -4739,10 +4713,9 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
btrfs_set_item_key(leaf, &disk_key, slot + i);
item = btrfs_item_nr(slot + i);
btrfs_set_token_item_offset(leaf, item,
data_end - data_size[i], &token);
btrfs_set_token_item_offset(&token, item, data_end - data_size[i]);
data_end -= data_size[i];
btrfs_set_token_item_size(leaf, item, data_size[i], &token);
btrfs_set_token_item_size(&token, item, data_size[i]);
}
btrfs_set_header_nritems(leaf, nritems + nr);
@ -4930,9 +4903,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
u32 ioff;
item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
ioff + dsize, &token);
ioff = btrfs_token_item_offset(&token, item);
btrfs_set_token_item_offset(&token, item, ioff + dsize);
}
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
@ -5103,7 +5075,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
while (1) {
nritems = btrfs_header_nritems(cur);
level = btrfs_header_level(cur);
sret = btrfs_bin_search(cur, min_key, level, &slot);
sret = btrfs_bin_search(cur, min_key, &slot);
if (sret < 0) {
ret = sret;
goto out;

View file

@ -28,6 +28,7 @@
#include <linux/dynamic_debug.h>
#include <linux/refcount.h>
#include <linux/crc32c.h>
#include <linux/iomap.h>
#include "extent-io-tree.h"
#include "extent_io.h"
#include "extent_map.h"
@ -582,6 +583,7 @@ struct btrfs_fs_info {
struct btrfs_root *quota_root;
struct btrfs_root *uuid_root;
struct btrfs_root *free_space_root;
struct btrfs_root *data_reloc_root;
/* the log root tree is a directory of all the other log roots */
struct btrfs_root *log_root_tree;
@ -758,7 +760,6 @@ struct btrfs_fs_info {
struct btrfs_workqueue *endio_workers;
struct btrfs_workqueue *endio_meta_workers;
struct btrfs_workqueue *endio_raid56_workers;
struct btrfs_workqueue *endio_repair_workers;
struct btrfs_workqueue *rmw_workers;
struct btrfs_workqueue *endio_meta_write_workers;
struct btrfs_workqueue *endio_write_workers;
@ -970,7 +971,28 @@ enum {
* is used to tell us when more checks are required
*/
BTRFS_ROOT_IN_TRANS_SETUP,
BTRFS_ROOT_REF_COWS,
/*
* Set if tree blocks of this root can be shared by other roots.
* Only subvolume trees and their reloc trees have this bit set.
* Conflicts with TRACK_DIRTY bit.
*
* This affects two things:
*
* - How balance works
* For shareable roots, we need to use reloc tree and do path
* replacement for balance, and need various pre/post hooks for
* snapshot creation to handle them.
*
* While for non-shareable trees, we just simply do a tree search
* with COW.
*
* - How dirty roots are tracked
* For shareable roots, btrfs_record_root_in_trans() is needed to
* track them, while non-subvolume roots have TRACK_DIRTY bit, they
* don't need to set this manually.
*/
BTRFS_ROOT_SHAREABLE,
BTRFS_ROOT_TRACK_DIRTY,
BTRFS_ROOT_IN_RADIX,
BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
@ -1056,7 +1078,7 @@ struct btrfs_root {
struct btrfs_key defrag_progress;
struct btrfs_key defrag_max;
/* the dirty list is only used by non-reference counted roots */
/* The dirty list is only used by non-shareable roots */
struct list_head dirty_list;
struct list_head root_list;
@ -1146,6 +1168,9 @@ struct btrfs_root {
/* Record pairs of swapped blocks for qgroup */
struct btrfs_qgroup_swapped_blocks swapped_blocks;
/* Used only by log trees, when logging csum items */
struct extent_io_tree log_csum_range;
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
u64 alloc_bytenr;
#endif
@ -1341,7 +1366,7 @@ do { \
BTRFS_INODE_ROOT_ITEM_INIT)
struct btrfs_map_token {
const struct extent_buffer *eb;
struct extent_buffer *eb;
char *kaddr;
unsigned long offset;
};
@ -1353,7 +1378,8 @@ static inline void btrfs_init_map_token(struct btrfs_map_token *token,
struct extent_buffer *eb)
{
token->eb = eb;
token->kaddr = NULL;
token->kaddr = page_address(eb->pages[0]);
token->offset = 0;
}
/* some macros to generate set/get functions for the struct fields. This
@ -1377,15 +1403,14 @@ static inline void btrfs_init_map_token(struct btrfs_map_token *token,
sizeof(((type *)0)->member)))
#define DECLARE_BTRFS_SETGET_BITS(bits) \
u##bits btrfs_get_token_##bits(const struct extent_buffer *eb, \
const void *ptr, unsigned long off, \
struct btrfs_map_token *token); \
void btrfs_set_token_##bits(struct extent_buffer *eb, const void *ptr, \
unsigned long off, u##bits val, \
struct btrfs_map_token *token); \
u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \
const void *ptr, unsigned long off); \
void btrfs_set_token_##bits(struct btrfs_map_token *token, \
const void *ptr, unsigned long off, \
u##bits val); \
u##bits btrfs_get_##bits(const struct extent_buffer *eb, \
const void *ptr, unsigned long off); \
void btrfs_set_##bits(struct extent_buffer *eb, void *ptr, \
void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr, \
unsigned long off, u##bits val);
DECLARE_BTRFS_SETGET_BITS(8)
@ -1400,25 +1425,23 @@ static inline u##bits btrfs_##name(const struct extent_buffer *eb, \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
return btrfs_get_##bits(eb, s, offsetof(type, member)); \
} \
static inline void btrfs_set_##name(struct extent_buffer *eb, type *s, \
static inline void btrfs_set_##name(const struct extent_buffer *eb, type *s, \
u##bits val) \
{ \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
btrfs_set_##bits(eb, s, offsetof(type, member), val); \
} \
static inline u##bits btrfs_token_##name(const struct extent_buffer *eb,\
const type *s, \
struct btrfs_map_token *token) \
static inline u##bits btrfs_token_##name(struct btrfs_map_token *token, \
const type *s) \
{ \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
return btrfs_get_token_##bits(eb, s, offsetof(type, member), token); \
return btrfs_get_token_##bits(token, s, offsetof(type, member));\
} \
static inline void btrfs_set_token_##name(struct extent_buffer *eb, \
type *s, u##bits val, \
struct btrfs_map_token *token) \
static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\
type *s, u##bits val) \
{ \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
btrfs_set_token_##bits(eb, s, offsetof(type, member), val, token); \
btrfs_set_token_##bits(token, s, offsetof(type, member), val); \
}
#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \
@ -1428,7 +1451,7 @@ static inline u##bits btrfs_##name(const struct extent_buffer *eb) \
u##bits res = le##bits##_to_cpu(p->member); \
return res; \
} \
static inline void btrfs_set_##name(struct extent_buffer *eb, \
static inline void btrfs_set_##name(const struct extent_buffer *eb, \
u##bits val) \
{ \
type *p = page_address(eb->pages[0]); \
@ -1446,7 +1469,7 @@ static inline void btrfs_set_##name(type *s, u##bits val) \
}
static inline u64 btrfs_device_total_bytes(struct extent_buffer *eb,
static inline u64 btrfs_device_total_bytes(const struct extent_buffer *eb,
struct btrfs_dev_item *s)
{
BUILD_BUG_ON(sizeof(u64) !=
@ -1454,7 +1477,7 @@ static inline u64 btrfs_device_total_bytes(struct extent_buffer *eb,
return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item,
total_bytes));
}
static inline void btrfs_set_device_total_bytes(struct extent_buffer *eb,
static inline void btrfs_set_device_total_bytes(const struct extent_buffer *eb,
struct btrfs_dev_item *s,
u64 val)
{
@ -1558,13 +1581,13 @@ static inline char *btrfs_stripe_dev_uuid_nr(struct btrfs_chunk *c, int nr)
return btrfs_stripe_dev_uuid(btrfs_stripe_nr(c, nr));
}
static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb,
static inline u64 btrfs_stripe_offset_nr(const struct extent_buffer *eb,
struct btrfs_chunk *c, int nr)
{
return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr));
}
static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb,
static inline u64 btrfs_stripe_devid_nr(const struct extent_buffer *eb,
struct btrfs_chunk *c, int nr)
{
return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr));
@ -1644,31 +1667,21 @@ BTRFS_SETGET_FUNCS(dev_extent_chunk_objectid, struct btrfs_dev_extent,
BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent,
chunk_offset, 64);
BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64);
static inline unsigned long btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
{
unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid);
return (unsigned long)dev + ptr;
}
BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64);
BTRFS_SETGET_FUNCS(extent_generation, struct btrfs_extent_item,
generation, 64);
BTRFS_SETGET_FUNCS(extent_flags, struct btrfs_extent_item, flags, 64);
BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32);
BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8);
static inline void btrfs_tree_block_key(struct extent_buffer *eb,
static inline void btrfs_tree_block_key(const struct extent_buffer *eb,
struct btrfs_tree_block_info *item,
struct btrfs_disk_key *key)
{
read_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
}
static inline void btrfs_set_tree_block_key(struct extent_buffer *eb,
static inline void btrfs_set_tree_block_key(const struct extent_buffer *eb,
struct btrfs_tree_block_info *item,
struct btrfs_disk_key *key)
{
@ -1706,12 +1719,6 @@ static inline u32 btrfs_extent_inline_ref_size(int type)
return 0;
}
BTRFS_SETGET_FUNCS(ref_root_v0, struct btrfs_extent_ref_v0, root, 64);
BTRFS_SETGET_FUNCS(ref_generation_v0, struct btrfs_extent_ref_v0,
generation, 64);
BTRFS_SETGET_FUNCS(ref_objectid_v0, struct btrfs_extent_ref_v0, objectid, 64);
BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32);
/* struct btrfs_node */
BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64);
BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64);
@ -1720,7 +1727,7 @@ BTRFS_SETGET_STACK_FUNCS(stack_key_blockptr, struct btrfs_key_ptr,
BTRFS_SETGET_STACK_FUNCS(stack_key_generation, struct btrfs_key_ptr,
generation, 64);
static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr)
static inline u64 btrfs_node_blockptr(const struct extent_buffer *eb, int nr)
{
unsigned long ptr;
ptr = offsetof(struct btrfs_node, ptrs) +
@ -1728,7 +1735,7 @@ static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr)
return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr);
}
static inline void btrfs_set_node_blockptr(struct extent_buffer *eb,
static inline void btrfs_set_node_blockptr(const struct extent_buffer *eb,
int nr, u64 val)
{
unsigned long ptr;
@ -1737,7 +1744,7 @@ static inline void btrfs_set_node_blockptr(struct extent_buffer *eb,
btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val);
}
static inline u64 btrfs_node_ptr_generation(struct extent_buffer *eb, int nr)
static inline u64 btrfs_node_ptr_generation(const struct extent_buffer *eb, int nr)
{
unsigned long ptr;
ptr = offsetof(struct btrfs_node, ptrs) +
@ -1745,7 +1752,7 @@ static inline u64 btrfs_node_ptr_generation(struct extent_buffer *eb, int nr)
return btrfs_key_generation(eb, (struct btrfs_key_ptr *)ptr);
}
static inline void btrfs_set_node_ptr_generation(struct extent_buffer *eb,
static inline void btrfs_set_node_ptr_generation(const struct extent_buffer *eb,
int nr, u64 val)
{
unsigned long ptr;
@ -1763,7 +1770,7 @@ static inline unsigned long btrfs_node_key_ptr_offset(int nr)
void btrfs_node_key(const struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr);
static inline void btrfs_set_node_key(struct extent_buffer *eb,
static inline void btrfs_set_node_key(const struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr)
{
unsigned long ptr;
@ -2498,8 +2505,6 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_ref *generic_ref);
int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr);
void btrfs_get_block_group_trimming(struct btrfs_block_group *cache);
void btrfs_put_block_group_trimming(struct btrfs_block_group *cache);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
enum btrfs_reserve_flush_enum {
@ -2512,6 +2517,7 @@ enum btrfs_reserve_flush_enum {
BTRFS_RESERVE_FLUSH_LIMIT,
BTRFS_RESERVE_FLUSH_EVICT,
BTRFS_RESERVE_FLUSH_ALL,
BTRFS_RESERVE_FLUSH_ALL_STEAL,
};
enum btrfs_flush_state {
@ -2551,7 +2557,7 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
int level, int *slot);
int *slot);
int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2);
int btrfs_previous_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid,
@ -2896,10 +2902,9 @@ void btrfs_free_inode(struct inode *inode);
int btrfs_drop_inode(struct inode *inode);
int __init btrfs_init_cachep(void);
void __cold btrfs_destroy_cachep(void);
struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
struct inode *btrfs_iget_path(struct super_block *s, u64 ino,
struct btrfs_root *root, struct btrfs_path *path);
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
struct btrfs_root *root);
struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root);
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
struct page *page, size_t pg_offset,
u64 start, u64 end);
@ -2929,6 +2934,9 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end);
void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
u64 end, int uptodate);
extern const struct dentry_operations btrfs_dentry_operations;
ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
extern const struct iomap_ops btrfs_dio_iomap_ops;
extern const struct iomap_dio_ops btrfs_dops;
/* ioctl.c */
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
@ -3381,6 +3389,9 @@ void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending);
int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info);
struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info,
u64 bytenr);
int btrfs_should_ignore_reloc_root(struct btrfs_root *root);
/* scrub.c */
int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,

View file

@ -358,16 +358,14 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
shash->tfm = fs_info->csum_shash;
crypto_shash_init(shash);
/*
* The super_block structure does not span the whole
* BTRFS_SUPER_INFO_SIZE range, we expect that the unused space is
* filled with zeros and is included in the checksum.
*/
crypto_shash_update(shash, raw_disk_sb + BTRFS_CSUM_SIZE,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
crypto_shash_final(shash, result);
crypto_shash_digest(shash, raw_disk_sb + BTRFS_CSUM_SIZE,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, result);
if (memcmp(disk_sb->csum, result, btrfs_super_csum_size(disk_sb)))
return 1;
@ -709,9 +707,7 @@ static void end_workqueue_bio(struct bio *bio)
else
wq = fs_info->endio_write_workers;
} else {
if (unlikely(end_io_wq->metadata == BTRFS_WQ_ENDIO_DIO_REPAIR))
wq = fs_info->endio_repair_workers;
else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
wq = fs_info->endio_raid56_workers;
else if (end_io_wq->metadata)
wq = fs_info->endio_meta_workers;
@ -1135,9 +1131,12 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
root->log_transid = 0;
root->log_transid_committed = -1;
root->last_log_commit = 0;
if (!dummy)
if (!dummy) {
extent_io_tree_init(fs_info, &root->dirty_log_pages,
IO_TREE_ROOT_DIRTY_LOG_PAGES, NULL);
extent_io_tree_init(fs_info, &root->log_csum_range,
IO_TREE_LOG_CSUM_RANGE, NULL);
}
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
@ -1275,12 +1274,13 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
/*
* DON'T set REF_COWS for log trees
* DON'T set SHAREABLE bit for log trees.
*
* log trees do not get reference counted because they go away
* before a real commit is actually done. They do store pointers
* to file data extents, and those reference counts still get
* updated (along with back refs to the log tree).
* Log trees are not exposed to user space thus can't be snapshotted,
* and they go away before a real commit is actually done.
*
* They do store pointers to file data extents, and those reference
* counts still get updated (along with back refs to the log tree).
*/
leaf = btrfs_alloc_tree_block(trans, root, 0, BTRFS_TREE_LOG_OBJECTID,
@ -1418,8 +1418,9 @@ static int btrfs_init_fs_root(struct btrfs_root *root)
if (ret)
goto fail;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
set_bit(BTRFS_ROOT_REF_COWS, &root->state);
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
set_bit(BTRFS_ROOT_SHAREABLE, &root->state);
btrfs_check_and_init_root_item(&root->root_item);
}
@ -1524,6 +1525,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
btrfs_put_root(fs_info->uuid_root);
btrfs_put_root(fs_info->free_space_root);
btrfs_put_root(fs_info->fs_root);
btrfs_put_root(fs_info->data_reloc_root);
btrfs_check_leaked_roots(fs_info);
btrfs_extent_buffer_leak_debug_check(fs_info);
kfree(fs_info->super_copy);
@ -1533,35 +1535,34 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_key *location,
bool check_ref)
u64 objectid, bool check_ref)
{
struct btrfs_root *root;
struct btrfs_path *path;
struct btrfs_key key;
int ret;
if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
if (objectid == BTRFS_ROOT_TREE_OBJECTID)
return btrfs_grab_root(fs_info->tree_root);
if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID)
if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
return btrfs_grab_root(fs_info->extent_root);
if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID)
if (objectid == BTRFS_CHUNK_TREE_OBJECTID)
return btrfs_grab_root(fs_info->chunk_root);
if (location->objectid == BTRFS_DEV_TREE_OBJECTID)
if (objectid == BTRFS_DEV_TREE_OBJECTID)
return btrfs_grab_root(fs_info->dev_root);
if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
if (objectid == BTRFS_CSUM_TREE_OBJECTID)
return btrfs_grab_root(fs_info->csum_root);
if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID)
if (objectid == BTRFS_QUOTA_TREE_OBJECTID)
return btrfs_grab_root(fs_info->quota_root) ?
fs_info->quota_root : ERR_PTR(-ENOENT);
if (location->objectid == BTRFS_UUID_TREE_OBJECTID)
if (objectid == BTRFS_UUID_TREE_OBJECTID)
return btrfs_grab_root(fs_info->uuid_root) ?
fs_info->uuid_root : ERR_PTR(-ENOENT);
if (location->objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
return btrfs_grab_root(fs_info->free_space_root) ?
fs_info->free_space_root : ERR_PTR(-ENOENT);
again:
root = btrfs_lookup_fs_root(fs_info, location->objectid);
root = btrfs_lookup_fs_root(fs_info, objectid);
if (root) {
if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
btrfs_put_root(root);
@ -1570,7 +1571,10 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
return root;
}
root = btrfs_read_tree_root(fs_info->tree_root, location);
key.objectid = objectid;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
root = btrfs_read_tree_root(fs_info->tree_root, &key);
if (IS_ERR(root))
return root;
@ -1590,7 +1594,7 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
}
key.objectid = BTRFS_ORPHAN_OBJECTID;
key.type = BTRFS_ORPHAN_ITEM_KEY;
key.offset = location->objectid;
key.offset = objectid;
ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
btrfs_free_path(path);
@ -1940,7 +1944,6 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
btrfs_destroy_workqueue(fs_info->workers);
btrfs_destroy_workqueue(fs_info->endio_workers);
btrfs_destroy_workqueue(fs_info->endio_raid56_workers);
btrfs_destroy_workqueue(fs_info->endio_repair_workers);
btrfs_destroy_workqueue(fs_info->rmw_workers);
btrfs_destroy_workqueue(fs_info->endio_write_workers);
btrfs_destroy_workqueue(fs_info->endio_freespace_worker);
@ -1981,6 +1984,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root)
free_root_extent_buffers(info->quota_root);
free_root_extent_buffers(info->uuid_root);
free_root_extent_buffers(info->fs_root);
free_root_extent_buffers(info->data_reloc_root);
if (free_chunk_root)
free_root_extent_buffers(info->chunk_root);
free_root_extent_buffers(info->free_space_root);
@ -1993,6 +1997,7 @@ void btrfs_put_root(struct btrfs_root *root)
if (refcount_dec_and_test(&root->refs)) {
WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state));
if (root->anon_dev)
free_anon_bdev(root->anon_dev);
btrfs_drew_lock_destroy(&root->snapshot_lock);
@ -2143,8 +2148,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
fs_info->endio_raid56_workers =
btrfs_alloc_workqueue(fs_info, "endio-raid56", flags,
max_active, 4);
fs_info->endio_repair_workers =
btrfs_alloc_workqueue(fs_info, "endio-repair", flags, 1, 0);
fs_info->rmw_workers =
btrfs_alloc_workqueue(fs_info, "rmw", flags, max_active, 2);
fs_info->endio_write_workers =
@ -2168,7 +2171,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
fs_info->flush_workers &&
fs_info->endio_workers && fs_info->endio_meta_workers &&
fs_info->endio_meta_write_workers &&
fs_info->endio_repair_workers &&
fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
fs_info->endio_freespace_worker && fs_info->rmw_workers &&
fs_info->caching_workers && fs_info->readahead_workers &&
@ -2290,6 +2292,19 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->csum_root = root;
/*
* This tree can share blocks with some other fs tree during relocation
* and we need a proper setup by btrfs_get_fs_root
*/
root = btrfs_get_fs_root(tree_root->fs_info,
BTRFS_DATA_RELOC_TREE_OBJECTID, true);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out;
}
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->data_reloc_root = root;
location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
root = btrfs_read_tree_root(tree_root, &location);
if (!IS_ERR(root)) {
@ -2827,7 +2842,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
u64 generation;
u64 features;
u16 csum_type;
struct btrfs_key location;
struct btrfs_super_block *disk_super;
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_root *tree_root;
@ -3241,11 +3255,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
}
}
location.objectid = BTRFS_FS_TREE_OBJECTID;
location.type = BTRFS_ROOT_ITEM_KEY;
location.offset = 0;
fs_info->fs_root = btrfs_get_fs_root(fs_info, &location, true);
fs_info->fs_root = btrfs_get_fs_root(fs_info, BTRFS_FS_TREE_OBJECTID, true);
if (IS_ERR(fs_info->fs_root)) {
err = PTR_ERR(fs_info->fs_root);
btrfs_warn(fs_info, "failed to read fs tree: %d", err);
@ -3508,10 +3518,9 @@ static int write_dev_supers(struct btrfs_device *device,
btrfs_set_super_bytenr(sb, bytenr);
crypto_shash_init(shash);
crypto_shash_update(shash, (const char *)sb + BTRFS_CSUM_SIZE,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
crypto_shash_final(shash, sb->csum);
crypto_shash_digest(shash, (const char *)sb + BTRFS_CSUM_SIZE,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE,
sb->csum);
page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT,
GFP_NOFS);

View file

@ -25,7 +25,6 @@ enum btrfs_wq_endio_type {
BTRFS_WQ_ENDIO_METADATA,
BTRFS_WQ_ENDIO_FREE_SPACE,
BTRFS_WQ_ENDIO_RAID56,
BTRFS_WQ_ENDIO_DIO_REPAIR,
};
static inline u64 btrfs_sb_offset(int mirror)
@ -67,8 +66,7 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_key *key,
bool check_ref);
u64 objectid, bool check_ref);
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);

View file

@ -64,24 +64,15 @@ struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_root *root;
struct inode *inode;
struct btrfs_key key;
if (objectid < BTRFS_FIRST_FREE_OBJECTID)
return ERR_PTR(-ESTALE);
key.objectid = root_objectid;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
root = btrfs_get_fs_root(fs_info, &key, true);
root = btrfs_get_fs_root(fs_info, root_objectid, true);
if (IS_ERR(root))
return ERR_CAST(root);
key.objectid = objectid;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
inode = btrfs_iget(sb, &key, root);
inode = btrfs_iget(sb, objectid, root);
btrfs_put_root(root);
if (IS_ERR(inode))
return ERR_CAST(inode);
@ -200,9 +191,7 @@ struct dentry *btrfs_get_parent(struct dentry *child)
found_key.offset, 0, 0);
}
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
return d_obtain_alias(btrfs_iget(fs_info->sb, &key, root));
return d_obtain_alias(btrfs_iget(fs_info->sb, key.objectid, root));
fail:
btrfs_free_path(path);
return ERR_PTR(ret);

View file

@ -44,6 +44,7 @@ enum {
IO_TREE_TRANS_DIRTY_PAGES,
IO_TREE_ROOT_DIRTY_LOG_PAGES,
IO_TREE_INODE_FILE_EXTENT,
IO_TREE_LOG_CSUM_RANGE,
IO_TREE_SELFTEST,
};

View file

@ -2114,22 +2114,6 @@ static u64 find_middle(struct rb_root *root)
}
#endif
static inline u64 heads_to_leaves(struct btrfs_fs_info *fs_info, u64 heads)
{
u64 num_bytes;
num_bytes = heads * (sizeof(struct btrfs_extent_item) +
sizeof(struct btrfs_extent_inline_ref));
if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
num_bytes += heads * sizeof(struct btrfs_tree_block_info);
/*
* We don't ever fill up leaves all the way so multiply by 2 just to be
* closer to what we're really going to want to use.
*/
return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(fs_info));
}
/*
* Takes the number of bytes to be csumm'ed and figures out how many leaves it
* would require to store the csums for that many bytes.
@ -2442,7 +2426,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
nritems = btrfs_header_nritems(buf);
level = btrfs_header_level(buf);
if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && level == 0)
return 0;
if (full_backref)
@ -2932,7 +2916,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
&trimmed);
list_del_init(&block_group->bg_list);
btrfs_put_block_group_trimming(block_group);
btrfs_unfreeze_block_group(block_group);
btrfs_put_block_group(block_group);
if (ret) {
@ -3369,6 +3353,7 @@ static struct btrfs_block_group *btrfs_lock_cluster(
struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster,
int delalloc)
__acquires(&cluster->refill_lock)
{
struct btrfs_block_group *used_bg = NULL;
@ -5501,8 +5486,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
*/
if (!for_reloc && !root_dropped)
btrfs_add_dead_root(root);
if (err && err != -EAGAIN)
btrfs_handle_fs_error(fs_info, err, NULL);
return err;
}

View file

@ -2333,7 +2333,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
return 0;
}
int btrfs_repair_eb_io_failure(struct extent_buffer *eb, int mirror_num)
int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
u64 start = eb->start;
@ -2537,8 +2537,9 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
return 0;
}
bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
struct io_failure_record *failrec, int failed_mirror)
static bool btrfs_check_repairable(struct inode *inode, bool needs_validation,
struct io_failure_record *failrec,
int failed_mirror)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
int num_copies;
@ -2561,7 +2562,7 @@ bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
* a) deliver good data to the caller
* b) correct the bad sectors on disk
*/
if (failed_bio_pages > 1) {
if (needs_validation) {
/*
* to fulfill b), we need to know the exact failing sectors, as
* we don't want to rewrite any more than the failed ones. thus,
@ -2600,94 +2601,115 @@ bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
return true;
}
struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
struct io_failure_record *failrec,
struct page *page, int pg_offset, int icsum,
bio_end_io_t *endio_func, void *data)
static bool btrfs_io_needs_validation(struct inode *inode, struct bio *bio)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct bio *bio;
struct btrfs_io_bio *btrfs_failed_bio;
struct btrfs_io_bio *btrfs_bio;
u64 len = 0;
const u32 blocksize = inode->i_sb->s_blocksize;
bio = btrfs_io_bio_alloc(1);
bio->bi_end_io = endio_func;
bio->bi_iter.bi_sector = failrec->logical >> 9;
bio->bi_iter.bi_size = 0;
bio->bi_private = data;
/*
* If bi_status is BLK_STS_OK, then this was a checksum error, not an
* I/O error. In this case, we already know exactly which sector was
* bad, so we don't need to validate.
*/
if (bio->bi_status == BLK_STS_OK)
return false;
btrfs_failed_bio = btrfs_io_bio(failed_bio);
if (btrfs_failed_bio->csum) {
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
/*
* We need to validate each sector individually if the failed I/O was
* for multiple sectors.
*
* There are a few possible bios that can end up here:
* 1. A buffered read bio, which is not cloned.
* 2. A direct I/O read bio, which is cloned.
* 3. A (buffered or direct) repair bio, which is not cloned.
*
* For cloned bios (case 2), we can get the size from
* btrfs_io_bio->iter; for non-cloned bios (cases 1 and 3), we can get
* it from the bvecs.
*/
if (bio_flagged(bio, BIO_CLONED)) {
if (btrfs_io_bio(bio)->iter.bi_size > blocksize)
return true;
} else {
struct bio_vec *bvec;
int i;
btrfs_bio = btrfs_io_bio(bio);
btrfs_bio->csum = btrfs_bio->csum_inline;
icsum *= csum_size;
memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum,
csum_size);
bio_for_each_bvec_all(bvec, bio, i) {
len += bvec->bv_len;
if (len > blocksize)
return true;
}
}
bio_add_page(bio, page, failrec->len, pg_offset);
return bio;
return false;
}
/*
* This is a generic handler for readpage errors. If other copies exist, read
* those and write back good data to the failed position. Does not investigate
* in remapping the failed extent elsewhere, hoping the device will be smart
* enough to do this as needed
*/
static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
struct page *page, u64 start, u64 end,
int failed_mirror)
blk_status_t btrfs_submit_read_repair(struct inode *inode,
struct bio *failed_bio, u64 phy_offset,
struct page *page, unsigned int pgoff,
u64 start, u64 end, int failed_mirror,
submit_bio_hook_t *submit_bio_hook)
{
struct io_failure_record *failrec;
struct inode *inode = page->mapping->host;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
struct bio *bio;
int read_mode = 0;
struct btrfs_io_bio *failed_io_bio = btrfs_io_bio(failed_bio);
const int icsum = phy_offset >> inode->i_sb->s_blocksize_bits;
bool need_validation;
struct bio *repair_bio;
struct btrfs_io_bio *repair_io_bio;
blk_status_t status;
int ret;
unsigned failed_bio_pages = failed_bio->bi_iter.bi_size >> PAGE_SHIFT;
btrfs_debug(fs_info,
"repair read error: read error at %llu", start);
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
if (ret)
return ret;
return errno_to_blk_status(ret);
if (!btrfs_check_repairable(inode, failed_bio_pages, failrec,
need_validation = btrfs_io_needs_validation(inode, failed_bio);
if (!btrfs_check_repairable(inode, need_validation, failrec,
failed_mirror)) {
free_io_failure(failure_tree, tree, failrec);
return -EIO;
return BLK_STS_IOERR;
}
if (failed_bio_pages > 1)
read_mode |= REQ_FAILFAST_DEV;
repair_bio = btrfs_io_bio_alloc(1);
repair_io_bio = btrfs_io_bio(repair_bio);
repair_bio->bi_opf = REQ_OP_READ;
if (need_validation)
repair_bio->bi_opf |= REQ_FAILFAST_DEV;
repair_bio->bi_end_io = failed_bio->bi_end_io;
repair_bio->bi_iter.bi_sector = failrec->logical >> 9;
repair_bio->bi_private = failed_bio->bi_private;
phy_offset >>= inode->i_sb->s_blocksize_bits;
bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
start - page_offset(page),
(int)phy_offset, failed_bio->bi_end_io,
NULL);
bio->bi_opf = REQ_OP_READ | read_mode;
if (failed_io_bio->csum) {
const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
repair_io_bio->csum = repair_io_bio->csum_inline;
memcpy(repair_io_bio->csum,
failed_io_bio->csum + csum_size * icsum, csum_size);
}
bio_add_page(repair_bio, page, failrec->len, pgoff);
repair_io_bio->logical = failrec->start;
repair_io_bio->iter = repair_bio->bi_iter;
btrfs_debug(btrfs_sb(inode->i_sb),
"Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
read_mode, failrec->this_mirror, failrec->in_validation);
"repair read error: submitting new read to mirror %d, in_validation=%d",
failrec->this_mirror, failrec->in_validation);
status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror,
failrec->bio_flags);
status = submit_bio_hook(inode, repair_bio, failrec->this_mirror,
failrec->bio_flags);
if (status) {
free_io_failure(failure_tree, tree, failrec);
bio_put(bio);
ret = blk_status_to_errno(status);
bio_put(repair_bio);
}
return ret;
return status;
}
/* lots and lots of room for performance fixes in the end_bio funcs */
@ -2859,9 +2881,10 @@ static void end_bio_extent_readpage(struct bio *bio)
* If it can't handle the error it will return -EIO and
* we remain responsible for that page.
*/
ret = bio_readpage_error(bio, offset, page, start, end,
mirror);
if (ret == 0) {
if (!btrfs_submit_read_repair(inode, bio, offset, page,
start - page_offset(page),
start, end, mirror,
tree->ops->submit_bio_hook)) {
uptodate = !bio->bi_status;
offset += len;
continue;
@ -4862,7 +4885,7 @@ static void __free_extent_buffer(struct extent_buffer *eb)
kmem_cache_free(extent_buffer_cache, eb);
}
int extent_buffer_under_io(struct extent_buffer *eb)
int extent_buffer_under_io(const struct extent_buffer *eb)
{
return (atomic_read(&eb->io_pages) ||
test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
@ -4967,7 +4990,7 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
return eb;
}
struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src)
{
int i;
struct page *p;
@ -5373,7 +5396,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb)
release_extent_buffer(eb);
}
void clear_extent_buffer_dirty(struct extent_buffer *eb)
void clear_extent_buffer_dirty(const struct extent_buffer *eb)
{
int i;
int num_pages;
@ -5571,8 +5594,7 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
struct page *page;
char *kaddr;
char *dst = (char *)dstv;
size_t start_offset = offset_in_page(eb->start);
unsigned long i = (start_offset + start) >> PAGE_SHIFT;
unsigned long i = start >> PAGE_SHIFT;
if (start + len > eb->len) {
WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
@ -5581,7 +5603,7 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
return;
}
offset = offset_in_page(start_offset + start);
offset = offset_in_page(start);
while (len > 0) {
page = eb->pages[i];
@ -5606,14 +5628,13 @@ int read_extent_buffer_to_user(const struct extent_buffer *eb,
struct page *page;
char *kaddr;
char __user *dst = (char __user *)dstv;
size_t start_offset = offset_in_page(eb->start);
unsigned long i = (start_offset + start) >> PAGE_SHIFT;
unsigned long i = start >> PAGE_SHIFT;
int ret = 0;
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
offset = offset_in_page(start_offset + start);
offset = offset_in_page(start);
while (len > 0) {
page = eb->pages[i];
@ -5634,48 +5655,6 @@ int read_extent_buffer_to_user(const struct extent_buffer *eb,
return ret;
}
/*
* return 0 if the item is found within a page.
* return 1 if the item spans two pages.
* return -EINVAL otherwise.
*/
int map_private_extent_buffer(const struct extent_buffer *eb,
unsigned long start, unsigned long min_len,
char **map, unsigned long *map_start,
unsigned long *map_len)
{
size_t offset;
char *kaddr;
struct page *p;
size_t start_offset = offset_in_page(eb->start);
unsigned long i = (start_offset + start) >> PAGE_SHIFT;
unsigned long end_i = (start_offset + start + min_len - 1) >>
PAGE_SHIFT;
if (start + min_len > eb->len) {
WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
eb->start, eb->len, start, min_len);
return -EINVAL;
}
if (i != end_i)
return 1;
if (i == 0) {
offset = start_offset;
*map_start = 0;
} else {
offset = 0;
*map_start = ((u64)i << PAGE_SHIFT) - start_offset;
}
p = eb->pages[i];
kaddr = page_address(p);
*map = kaddr + offset;
*map_len = PAGE_SIZE - offset;
return 0;
}
int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
unsigned long start, unsigned long len)
{
@ -5684,14 +5663,13 @@ int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
struct page *page;
char *kaddr;
char *ptr = (char *)ptrv;
size_t start_offset = offset_in_page(eb->start);
unsigned long i = (start_offset + start) >> PAGE_SHIFT;
unsigned long i = start >> PAGE_SHIFT;
int ret = 0;
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
offset = offset_in_page(start_offset + start);
offset = offset_in_page(start);
while (len > 0) {
page = eb->pages[i];
@ -5711,7 +5689,7 @@ int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
return ret;
}
void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb,
void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
const void *srcv)
{
char *kaddr;
@ -5722,7 +5700,7 @@ void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb,
BTRFS_FSID_SIZE);
}
void write_extent_buffer_fsid(struct extent_buffer *eb, const void *srcv)
void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *srcv)
{
char *kaddr;
@ -5732,7 +5710,7 @@ void write_extent_buffer_fsid(struct extent_buffer *eb, const void *srcv)
BTRFS_FSID_SIZE);
}
void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
unsigned long start, unsigned long len)
{
size_t cur;
@ -5740,13 +5718,12 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
struct page *page;
char *kaddr;
char *src = (char *)srcv;
size_t start_offset = offset_in_page(eb->start);
unsigned long i = (start_offset + start) >> PAGE_SHIFT;
unsigned long i = start >> PAGE_SHIFT;
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
offset = offset_in_page(start_offset + start);
offset = offset_in_page(start);
while (len > 0) {
page = eb->pages[i];
@ -5763,20 +5740,19 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
}
}
void memzero_extent_buffer(struct extent_buffer *eb, unsigned long start,
void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
unsigned long len)
{
size_t cur;
size_t offset;
struct page *page;
char *kaddr;
size_t start_offset = offset_in_page(eb->start);
unsigned long i = (start_offset + start) >> PAGE_SHIFT;
unsigned long i = start >> PAGE_SHIFT;
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
offset = offset_in_page(start_offset + start);
offset = offset_in_page(start);
while (len > 0) {
page = eb->pages[i];
@ -5792,8 +5768,8 @@ void memzero_extent_buffer(struct extent_buffer *eb, unsigned long start,
}
}
void copy_extent_buffer_full(struct extent_buffer *dst,
struct extent_buffer *src)
void copy_extent_buffer_full(const struct extent_buffer *dst,
const struct extent_buffer *src)
{
int i;
int num_pages;
@ -5806,7 +5782,8 @@ void copy_extent_buffer_full(struct extent_buffer *dst,
page_address(src->pages[i]));
}
void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
void copy_extent_buffer(const struct extent_buffer *dst,
const struct extent_buffer *src,
unsigned long dst_offset, unsigned long src_offset,
unsigned long len)
{
@ -5815,12 +5792,11 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
size_t offset;
struct page *page;
char *kaddr;
size_t start_offset = offset_in_page(dst->start);
unsigned long i = (start_offset + dst_offset) >> PAGE_SHIFT;
unsigned long i = dst_offset >> PAGE_SHIFT;
WARN_ON(src->len != dst_len);
offset = offset_in_page(start_offset + dst_offset);
offset = offset_in_page(dst_offset);
while (len > 0) {
page = dst->pages[i];
@ -5851,12 +5827,11 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
* This helper hides the ugliness of finding the byte in an extent buffer which
* contains a given bit.
*/
static inline void eb_bitmap_offset(struct extent_buffer *eb,
static inline void eb_bitmap_offset(const struct extent_buffer *eb,
unsigned long start, unsigned long nr,
unsigned long *page_index,
size_t *page_offset)
{
size_t start_offset = offset_in_page(eb->start);
size_t byte_offset = BIT_BYTE(nr);
size_t offset;
@ -5865,7 +5840,7 @@ static inline void eb_bitmap_offset(struct extent_buffer *eb,
* the bitmap item in the extent buffer + the offset of the byte in the
* bitmap item.
*/
offset = start_offset + start + byte_offset;
offset = start + byte_offset;
*page_index = offset >> PAGE_SHIFT;
*page_offset = offset_in_page(offset);
@ -5877,7 +5852,7 @@ static inline void eb_bitmap_offset(struct extent_buffer *eb,
* @start: offset of the bitmap item in the extent buffer
* @nr: bit number to test
*/
int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
unsigned long nr)
{
u8 *kaddr;
@ -5899,7 +5874,7 @@ int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
* @pos: bit number of the first bit
* @len: number of bits to set
*/
void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start,
unsigned long pos, unsigned long len)
{
u8 *kaddr;
@ -5941,8 +5916,9 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
* @pos: bit number of the first bit
* @len: number of bits to clear
*/
void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
unsigned long pos, unsigned long len)
void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
unsigned long start, unsigned long pos,
unsigned long len)
{
u8 *kaddr;
struct page *page;
@ -6003,14 +5979,14 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
}
void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
unsigned long src_offset, unsigned long len)
void memcpy_extent_buffer(const struct extent_buffer *dst,
unsigned long dst_offset, unsigned long src_offset,
unsigned long len)
{
struct btrfs_fs_info *fs_info = dst->fs_info;
size_t cur;
size_t dst_off_in_page;
size_t src_off_in_page;
size_t start_offset = offset_in_page(dst->start);
unsigned long dst_i;
unsigned long src_i;
@ -6028,11 +6004,11 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
}
while (len > 0) {
dst_off_in_page = offset_in_page(start_offset + dst_offset);
src_off_in_page = offset_in_page(start_offset + src_offset);
dst_off_in_page = offset_in_page(dst_offset);
src_off_in_page = offset_in_page(src_offset);
dst_i = (start_offset + dst_offset) >> PAGE_SHIFT;
src_i = (start_offset + src_offset) >> PAGE_SHIFT;
dst_i = dst_offset >> PAGE_SHIFT;
src_i = src_offset >> PAGE_SHIFT;
cur = min(len, (unsigned long)(PAGE_SIZE -
src_off_in_page));
@ -6048,8 +6024,9 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
}
}
void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
unsigned long src_offset, unsigned long len)
void memmove_extent_buffer(const struct extent_buffer *dst,
unsigned long dst_offset, unsigned long src_offset,
unsigned long len)
{
struct btrfs_fs_info *fs_info = dst->fs_info;
size_t cur;
@ -6057,7 +6034,6 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
size_t src_off_in_page;
unsigned long dst_end = dst_offset + len - 1;
unsigned long src_end = src_offset + len - 1;
size_t start_offset = offset_in_page(dst->start);
unsigned long dst_i;
unsigned long src_i;
@ -6078,11 +6054,11 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
return;
}
while (len > 0) {
dst_i = (start_offset + dst_end) >> PAGE_SHIFT;
src_i = (start_offset + src_end) >> PAGE_SHIFT;
dst_i = dst_end >> PAGE_SHIFT;
src_i = src_end >> PAGE_SHIFT;
dst_off_in_page = offset_in_page(start_offset + dst_end);
src_off_in_page = offset_in_page(start_offset + src_end);
dst_off_in_page = offset_in_page(dst_end);
src_off_in_page = offset_in_page(src_end);
cur = min_t(unsigned long, len, src_off_in_page + 1);
cur = min(cur, dst_off_in_page + 1);

View file

@ -66,6 +66,10 @@ struct btrfs_io_bio;
struct io_failure_record;
struct extent_io_tree;
typedef blk_status_t (submit_bio_hook_t)(struct inode *inode, struct bio *bio,
int mirror_num,
unsigned long bio_flags);
typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
struct bio *bio, u64 bio_offset);
@ -74,8 +78,7 @@ struct extent_io_ops {
* The following callbacks must be always defined, the function
* pointer will be called unconditionally.
*/
blk_status_t (*submit_bio_hook)(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
submit_bio_hook_t *submit_bio_hook;
int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
struct page *page, u64 start, u64 end,
int mirror);
@ -209,7 +212,7 @@ struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, unsigned long len);
struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start);
struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src);
struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start);
void free_extent_buffer(struct extent_buffer *eb);
@ -227,7 +230,7 @@ static inline int num_extent_pages(const struct extent_buffer *eb)
(eb->start >> PAGE_SHIFT);
}
static inline int extent_buffer_uptodate(struct extent_buffer *eb)
static inline int extent_buffer_uptodate(const struct extent_buffer *eb)
{
return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
}
@ -240,37 +243,37 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dst,
int read_extent_buffer_to_user(const struct extent_buffer *eb,
void __user *dst, unsigned long start,
unsigned long len);
void write_extent_buffer_fsid(struct extent_buffer *eb, const void *src);
void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb,
void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *src);
void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
const void *src);
void write_extent_buffer(struct extent_buffer *eb, const void *src,
void write_extent_buffer(const struct extent_buffer *eb, const void *src,
unsigned long start, unsigned long len);
void copy_extent_buffer_full(struct extent_buffer *dst,
struct extent_buffer *src);
void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
void copy_extent_buffer_full(const struct extent_buffer *dst,
const struct extent_buffer *src);
void copy_extent_buffer(const struct extent_buffer *dst,
const struct extent_buffer *src,
unsigned long dst_offset, unsigned long src_offset,
unsigned long len);
void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
unsigned long src_offset, unsigned long len);
void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
unsigned long src_offset, unsigned long len);
void memzero_extent_buffer(struct extent_buffer *eb, unsigned long start,
void memcpy_extent_buffer(const struct extent_buffer *dst,
unsigned long dst_offset, unsigned long src_offset,
unsigned long len);
void memmove_extent_buffer(const struct extent_buffer *dst,
unsigned long dst_offset, unsigned long src_offset,
unsigned long len);
int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
unsigned long len);
int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
unsigned long pos);
void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start,
unsigned long pos, unsigned long len);
void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
unsigned long pos, unsigned long len);
void clear_extent_buffer_dirty(struct extent_buffer *eb);
void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
unsigned long start, unsigned long pos,
unsigned long len);
void clear_extent_buffer_dirty(const struct extent_buffer *eb);
bool set_extent_buffer_dirty(struct extent_buffer *eb);
void set_extent_buffer_uptodate(struct extent_buffer *eb);
void clear_extent_buffer_uptodate(struct extent_buffer *eb);
int extent_buffer_under_io(struct extent_buffer *eb);
int map_private_extent_buffer(const struct extent_buffer *eb,
unsigned long offset, unsigned long min_len,
char **map, unsigned long *map_start,
unsigned long *map_len);
int extent_buffer_under_io(const struct extent_buffer *eb);
void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
@ -289,7 +292,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
u64 length, u64 logical, struct page *page,
unsigned int pg_offset, int mirror_num);
void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
int btrfs_repair_eb_io_failure(struct extent_buffer *eb, int mirror_num);
int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num);
/*
* When IO fails, either with EIO or csum verification fails, we
@ -311,12 +314,12 @@ struct io_failure_record {
};
bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
struct io_failure_record *failrec, int fail_mirror);
struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
struct io_failure_record *failrec,
struct page *page, int pg_offset, int icsum,
bio_end_io_t *endio_func, void *data);
blk_status_t btrfs_submit_read_repair(struct inode *inode,
struct bio *failed_bio, u64 phy_offset,
struct page *page, unsigned int pgoff,
u64 start, u64 end, int failed_mirror,
submit_bio_hook_t *submit_bio_hook);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
bool find_lock_delalloc_range(struct inode *inode,
struct page *locked_page, u64 *start,

View file

@ -242,11 +242,13 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
/**
* btrfs_lookup_bio_sums - Look up checksums for a bio.
* @inode: inode that the bio is for.
* @bio: bio embedded in btrfs_io_bio.
* @bio: bio to look up.
* @offset: Unless (u64)-1, look up checksums for this offset in the file.
* If (u64)-1, use the page offsets from the bio instead.
* @dst: Buffer of size btrfs_super_csum_size() used to return checksum. If
* NULL, the checksum is returned in btrfs_io_bio(bio)->csum instead.
* @dst: Buffer of size nblocks * btrfs_super_csum_size() used to return
* checksum (nblocks = bio->bi_iter.bi_size / fs_info->sectorsize). If
* NULL, the checksum buffer is allocated and returned in
* btrfs_io_bio(bio)->csum instead.
*
* Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise.
*/
@ -256,7 +258,6 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct bio_vec bvec;
struct bvec_iter iter;
struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
struct btrfs_csum_item *item = NULL;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_path *path;
@ -277,6 +278,8 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
if (!dst) {
struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
btrfs_bio->csum = kmalloc_array(nblocks, csum_size,
GFP_NOFS);
@ -598,13 +601,12 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
index = 0;
}
crypto_shash_init(shash);
data = kmap_atomic(bvec.bv_page);
crypto_shash_update(shash, data + bvec.bv_offset
crypto_shash_digest(shash, data + bvec.bv_offset
+ (i * fs_info->sectorsize),
fs_info->sectorsize);
fs_info->sectorsize,
sums->sums + index);
kunmap_atomic(data);
crypto_shash_final(shash, (char *)(sums->sums + index));
index += csum_size;
offset += fs_info->sectorsize;
this_sum_bytes += fs_info->sectorsize;
@ -869,7 +871,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
}
ret = PTR_ERR(item);
if (ret != -EFBIG && ret != -ENOENT)
goto fail_unlock;
goto out;
if (ret == -EFBIG) {
u32 item_size;
@ -887,10 +889,12 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
nritems = btrfs_header_nritems(path->nodes[0]);
if (!nritems || (path->slots[0] >= nritems - 1)) {
ret = btrfs_next_leaf(root, path);
if (ret == 1)
if (ret < 0) {
goto out;
} else if (ret > 0) {
found_next = 1;
if (ret != 0)
goto insert;
}
slot = path->slots[0];
}
btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
@ -905,14 +909,27 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
}
/*
* at this point, we know the tree has an item, but it isn't big
* enough yet to put our csum in. Grow it
* At this point, we know the tree has a checksum item that ends at an
* offset matching the start of the checksum range we want to insert.
* We try to extend that item as much as possible and then add as many
* checksums to it as they fit.
*
* First check if the leaf has enough free space for at least one
* checksum. If it has go directly to the item extension code, otherwise
* release the path and do a search for insertion before the extension.
*/
if (btrfs_leaf_free_space(leaf) >= csum_size) {
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
csum_offset = (bytenr - found_key.offset) >>
fs_info->sb->s_blocksize_bits;
goto extend_csum;
}
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, &file_key, path,
csum_size, 1);
if (ret < 0)
goto fail_unlock;
goto out;
if (ret > 0) {
if (path->slots[0] == 0)
@ -931,19 +948,13 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
goto insert;
}
extend_csum:
if (csum_offset == btrfs_item_size_nr(leaf, path->slots[0]) /
csum_size) {
int extend_nr;
u64 tmp;
u32 diff;
u32 free_space;
if (btrfs_leaf_free_space(leaf) <
sizeof(struct btrfs_item) + csum_size * 2)
goto insert;
free_space = btrfs_leaf_free_space(leaf) -
sizeof(struct btrfs_item) - csum_size;
tmp = sums->len - total_bytes;
tmp >>= fs_info->sb->s_blocksize_bits;
WARN_ON(tmp < 1);
@ -954,7 +965,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
MAX_CSUM_ITEMS(fs_info, csum_size) * csum_size);
diff = diff - btrfs_item_size_nr(leaf, path->slots[0]);
diff = min(free_space, diff);
diff = min_t(u32, btrfs_leaf_free_space(leaf), diff);
diff /= csum_size;
diff *= csum_size;
@ -985,9 +996,9 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
ins_size);
path->leave_spinning = 0;
if (ret < 0)
goto fail_unlock;
goto out;
if (WARN_ON(ret != 0))
goto fail_unlock;
goto out;
leaf = path->nodes[0];
csum:
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
@ -1017,9 +1028,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
out:
btrfs_free_path(path);
return ret;
fail_unlock:
goto out;
}
void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,

View file

@ -275,26 +275,18 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
{
struct btrfs_root *inode_root;
struct inode *inode;
struct btrfs_key key;
struct btrfs_ioctl_defrag_range_args range;
int num_defrag;
int ret;
/* get the inode */
key.objectid = defrag->root;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
inode_root = btrfs_get_fs_root(fs_info, &key, true);
inode_root = btrfs_get_fs_root(fs_info, defrag->root, true);
if (IS_ERR(inode_root)) {
ret = PTR_ERR(inode_root);
goto cleanup;
}
key.objectid = defrag->ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
inode = btrfs_iget(fs_info->sb, &key, inode_root);
inode = btrfs_iget(fs_info->sb, defrag->ino, inode_root);
btrfs_put_root(inode_root);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
@ -775,7 +767,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
modify_tree = 0;
update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
update_refs = (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) ||
root == fs_info->tree_root);
while (1) {
recow = 0;
@ -1817,21 +1809,61 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
return num_written ? num_written : ret;
}
static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
const struct iov_iter *iter, loff_t offset)
{
const unsigned int blocksize_mask = fs_info->sectorsize - 1;
if (offset & blocksize_mask)
return -EINVAL;
if (iov_iter_alignment(iter) & blocksize_mask)
return -EINVAL;
return 0;
}
static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
loff_t pos;
ssize_t written;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
loff_t pos = iocb->ki_pos;
ssize_t written = 0;
ssize_t written_buffered;
loff_t endbyte;
int err;
size_t count = 0;
bool relock = false;
written = generic_file_direct_write(iocb, from);
if (check_direct_IO(fs_info, from, pos))
goto buffered;
count = iov_iter_count(from);
/*
* If the write DIO is beyond the EOF, we need update the isize, but it
* is protected by i_mutex. So we can not unlock the i_mutex at this
* case.
*/
if (pos + count <= inode->i_size) {
inode_unlock(inode);
relock = true;
} else if (iocb->ki_flags & IOCB_NOWAIT) {
return -EAGAIN;
}
down_read(&BTRFS_I(inode)->dio_sem);
written = iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops, &btrfs_dops,
is_sync_kiocb(iocb));
up_read(&BTRFS_I(inode)->dio_sem);
if (relock)
inode_lock(inode);
if (written < 0 || !iov_iter_count(from))
return written;
buffered:
pos = iocb->ki_pos;
written_buffered = btrfs_buffered_write(iocb, from);
if (written_buffered < 0) {
@ -1970,7 +2002,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
atomic_inc(&BTRFS_I(inode)->sync_writers);
if (iocb->ki_flags & IOCB_DIRECT) {
num_written = __btrfs_direct_write(iocb, from);
num_written = btrfs_direct_write(iocb, from);
} else {
num_written = btrfs_buffered_write(iocb, from);
if (num_written > 0)
@ -3484,9 +3516,54 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
return generic_file_open(inode, filp);
}
static int check_direct_read(struct btrfs_fs_info *fs_info,
const struct iov_iter *iter, loff_t offset)
{
int ret;
int i, seg;
ret = check_direct_IO(fs_info, iter, offset);
if (ret < 0)
return ret;
for (seg = 0; seg < iter->nr_segs; seg++)
for (i = seg + 1; i < iter->nr_segs; i++)
if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
return -EINVAL;
return 0;
}
static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
if (check_direct_read(btrfs_sb(inode->i_sb), to, iocb->ki_pos))
return 0;
inode_lock_shared(inode);
ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dops,
is_sync_kiocb(iocb));
inode_unlock_shared(inode);
return ret;
}
static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
ssize_t ret = 0;
if (iocb->ki_flags & IOCB_DIRECT) {
ret = btrfs_direct_read(iocb, to);
if (ret < 0)
return ret;
}
return generic_file_buffered_read(iocb, to, ret);
}
const struct file_operations btrfs_file_operations = {
.llseek = btrfs_file_llseek,
.read_iter = generic_file_read_iter,
.read_iter = btrfs_file_read_iter,
.splice_read = generic_file_splice_read,
.write_iter = btrfs_file_write_iter,
.mmap = btrfs_file_mmap,

View file

@ -82,7 +82,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
* sure NOFS is set to keep us from deadlocking.
*/
nofs_flag = memalloc_nofs_save();
inode = btrfs_iget_path(fs_info->sb, &location, root, path);
inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path);
btrfs_release_path(path);
memalloc_nofs_restore(nofs_flag);
if (IS_ERR(inode))
@ -1190,13 +1190,10 @@ static int __btrfs_wait_cache_io(struct btrfs_root *root,
if (ret) {
invalidate_inode_pages2(inode->i_mapping);
BTRFS_I(inode)->generation = 0;
if (block_group) {
#ifdef CONFIG_BTRFS_DEBUG
btrfs_err(root->fs_info,
"failed to write free space cache for block group %llu",
block_group->start);
#endif
}
if (block_group)
btrfs_debug(root->fs_info,
"failed to write free space cache for block group %llu error %d",
block_group->start, ret);
}
btrfs_update_inode(trans, root, inode);
@ -1415,11 +1412,9 @@ int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl,
block_group, &block_group->io_ctl, trans);
if (ret) {
#ifdef CONFIG_BTRFS_DEBUG
btrfs_err(fs_info,
"failed to write free space cache for block group %llu",
block_group->start);
#endif
btrfs_debug(fs_info,
"failed to write free space cache for block group %llu error %d",
block_group->start, ret);
spin_lock(&block_group->lock);
block_group->disk_cache_state = BTRFS_DC_ERROR;
spin_unlock(&block_group->lock);
@ -3762,46 +3757,6 @@ static int trim_bitmaps(struct btrfs_block_group *block_group,
return ret;
}
void btrfs_get_block_group_trimming(struct btrfs_block_group *cache)
{
atomic_inc(&cache->trimming);
}
void btrfs_put_block_group_trimming(struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct extent_map_tree *em_tree;
struct extent_map *em;
bool cleanup;
spin_lock(&block_group->lock);
cleanup = (atomic_dec_and_test(&block_group->trimming) &&
block_group->removed);
spin_unlock(&block_group->lock);
if (cleanup) {
mutex_lock(&fs_info->chunk_mutex);
em_tree = &fs_info->mapping_tree;
write_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, block_group->start,
1);
BUG_ON(!em); /* logic error, can't happen */
remove_extent_mapping(em_tree, em);
write_unlock(&em_tree->lock);
mutex_unlock(&fs_info->chunk_mutex);
/* once for us and once for the tree */
free_extent_map(em);
free_extent_map(em);
/*
* We've left one free space entry and other tasks trimming
* this block group have left 1 entry each one. Free them.
*/
__btrfs_remove_free_space_cache(block_group->free_space_ctl);
}
}
int btrfs_trim_block_group(struct btrfs_block_group *block_group,
u64 *trimmed, u64 start, u64 end, u64 minlen)
{
@ -3816,7 +3771,7 @@ int btrfs_trim_block_group(struct btrfs_block_group *block_group,
spin_unlock(&block_group->lock);
return 0;
}
btrfs_get_block_group_trimming(block_group);
btrfs_freeze_block_group(block_group);
spin_unlock(&block_group->lock);
ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false);
@ -3829,7 +3784,7 @@ int btrfs_trim_block_group(struct btrfs_block_group *block_group,
if (rem)
reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end));
out:
btrfs_put_block_group_trimming(block_group);
btrfs_unfreeze_block_group(block_group);
return ret;
}
@ -3846,11 +3801,11 @@ int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
spin_unlock(&block_group->lock);
return 0;
}
btrfs_get_block_group_trimming(block_group);
btrfs_freeze_block_group(block_group);
spin_unlock(&block_group->lock);
ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async);
btrfs_put_block_group_trimming(block_group);
btrfs_unfreeze_block_group(block_group);
return ret;
}
@ -3868,13 +3823,13 @@ int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
spin_unlock(&block_group->lock);
return 0;
}
btrfs_get_block_group_trimming(block_group);
btrfs_freeze_block_group(block_group);
spin_unlock(&block_group->lock);
ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen,
async);
btrfs_put_block_group_trimming(block_group);
btrfs_unfreeze_block_group(block_group);
return ret;
}
@ -4035,11 +3990,9 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
if (release_metadata)
btrfs_delalloc_release_metadata(BTRFS_I(inode),
inode->i_size, true);
#ifdef CONFIG_BTRFS_DEBUG
btrfs_err(fs_info,
"failed to write free ino cache for root %llu",
root->root_key.objectid);
#endif
btrfs_debug(fs_info,
"failed to write free ino cache for root %llu error %d",
root->root_key.objectid, ret);
}
return ret;

File diff suppressed because it is too large Load diff

View file

@ -660,7 +660,7 @@ static noinline int create_subvol(struct inode *dir,
goto fail;
key.offset = (u64)-1;
new_root = btrfs_get_fs_root(fs_info, &key, true);
new_root = btrfs_get_fs_root(fs_info, objectid, true);
if (IS_ERR(new_root)) {
ret = PTR_ERR(new_root);
btrfs_abort_transaction(trans, ret);
@ -748,9 +748,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
struct btrfs_pending_snapshot *pending_snapshot;
struct btrfs_trans_handle *trans;
int ret;
bool snapshot_force_cow = false;
if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
return -EINVAL;
if (atomic_read(&root->nr_swapfiles)) {
@ -771,27 +770,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
goto free_pending;
}
/*
* Force new buffered writes to reserve space even when NOCOW is
* possible. This is to avoid later writeback (running dealloc) to
* fallback to COW mode and unexpectedly fail with ENOSPC.
*/
btrfs_drew_read_lock(&root->snapshot_lock);
ret = btrfs_start_delalloc_snapshot(root);
if (ret)
goto dec_and_free;
/*
* All previous writes have started writeback in NOCOW mode, so now
* we force future writes to fallback to COW mode during snapshot
* creation.
*/
atomic_inc(&root->snapshot_force_cow);
snapshot_force_cow = true;
btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
btrfs_init_block_rsv(&pending_snapshot->block_rsv,
BTRFS_BLOCK_RSV_TEMP);
/*
@ -806,7 +784,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
&pending_snapshot->block_rsv, 8,
false);
if (ret)
goto dec_and_free;
goto free_pending;
pending_snapshot->dentry = dentry;
pending_snapshot->root = root;
@ -848,11 +826,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
fail:
btrfs_put_root(pending_snapshot->snap);
btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
dec_and_free:
if (snapshot_force_cow)
atomic_dec(&root->snapshot_force_cow);
btrfs_drew_read_unlock(&root->snapshot_lock);
free_pending:
kfree(pending_snapshot->root_item);
btrfs_free_path(pending_snapshot->path);
@ -983,6 +956,45 @@ static noinline int btrfs_mksubvol(const struct path *parent,
return error;
}
static noinline int btrfs_mksnapshot(const struct path *parent,
const char *name, int namelen,
struct btrfs_root *root,
bool readonly,
struct btrfs_qgroup_inherit *inherit)
{
int ret;
bool snapshot_force_cow = false;
/*
* Force new buffered writes to reserve space even when NOCOW is
* possible. This is to avoid later writeback (running dealloc) to
* fallback to COW mode and unexpectedly fail with ENOSPC.
*/
btrfs_drew_read_lock(&root->snapshot_lock);
ret = btrfs_start_delalloc_snapshot(root);
if (ret)
goto out;
/*
* All previous writes have started writeback in NOCOW mode, so now
* we force future writes to fallback to COW mode during snapshot
* creation.
*/
atomic_inc(&root->snapshot_force_cow);
snapshot_force_cow = true;
btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
ret = btrfs_mksubvol(parent, name, namelen,
root, readonly, inherit);
out:
if (snapshot_force_cow)
atomic_dec(&root->snapshot_force_cow);
btrfs_drew_read_unlock(&root->snapshot_lock);
return ret;
}
/*
* When we're defragging a range, we don't want to kick it off again
* if it is really just waiting for delalloc to send it down.
@ -1762,7 +1774,7 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,
*/
ret = -EPERM;
} else {
ret = btrfs_mksubvol(&file->f_path, name, namelen,
ret = btrfs_mksnapshot(&file->f_path, name, namelen,
BTRFS_I(src_inode)->root,
readonly, inherit);
}
@ -2127,10 +2139,7 @@ static noinline int search_ioctl(struct inode *inode,
/* search the root of the inode that was passed */
root = btrfs_grab_root(BTRFS_I(inode)->root);
} else {
key.objectid = sk->tree_id;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
root = btrfs_get_fs_root(info, &key, true);
root = btrfs_get_fs_root(info, sk->tree_id, true);
if (IS_ERR(root)) {
btrfs_free_path(path);
return PTR_ERR(root);
@ -2263,10 +2272,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX - 1];
key.objectid = tree_id;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
root = btrfs_get_fs_root(info, &key, true);
root = btrfs_get_fs_root(info, tree_id, true);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
root = NULL;
@ -2359,10 +2365,7 @@ static int btrfs_search_path_in_tree_user(struct inode *inode,
if (dirid != upper_limit.objectid) {
ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1];
key.objectid = treeid;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
root = btrfs_get_fs_root(fs_info, &key, true);
root = btrfs_get_fs_root(fs_info, treeid, true);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out;
@ -2421,7 +2424,7 @@ static int btrfs_search_path_in_tree_user(struct inode *inode,
goto out_put;
}
temp_inode = btrfs_iget(sb, &key2, root);
temp_inode = btrfs_iget(sb, key2.objectid, root);
if (IS_ERR(temp_inode)) {
ret = PTR_ERR(temp_inode);
goto out_put;
@ -2608,9 +2611,7 @@ static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
/* Get root_item of inode's subvolume */
key.objectid = BTRFS_I(inode)->root->root_key.objectid;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
root = btrfs_get_fs_root(fs_info, &key, true);
root = btrfs_get_fs_root(fs_info, key.objectid, true);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out_free;
@ -3278,7 +3279,6 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
struct btrfs_dir_item *di;
struct btrfs_trans_handle *trans;
struct btrfs_path *path = NULL;
struct btrfs_key location;
struct btrfs_disk_key disk_key;
u64 objectid = 0;
u64 dir_id;
@ -3299,11 +3299,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
if (!objectid)
objectid = BTRFS_FS_TREE_OBJECTID;
location.objectid = objectid;
location.type = BTRFS_ROOT_ITEM_KEY;
location.offset = (u64)-1;
new_root = btrfs_get_fs_root(fs_info, &location, true);
new_root = btrfs_get_fs_root(fs_info, objectid, true);
if (IS_ERR(new_root)) {
ret = PTR_ERR(new_root);
goto out;

View file

@ -410,6 +410,7 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
* The rwlock is held for write upon exit.
*/
void btrfs_tree_lock(struct extent_buffer *eb)
__acquires(&eb->lock)
{
u64 start_ns = 0;

View file

@ -6,6 +6,7 @@
#include <linux/sched.h>
#include <linux/wait.h>
#include <asm/div64.h>
#include <linux/rbtree.h>
#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
@ -58,4 +59,57 @@ static inline bool has_single_bit_set(u64 n)
return is_power_of_two_u64(n);
}
/*
* Simple bytenr based rb_tree relate structures
*
* Any structure wants to use bytenr as single search index should have their
* structure start with these members.
*/
struct rb_simple_node {
struct rb_node rb_node;
u64 bytenr;
};
static inline struct rb_node *rb_simple_search(struct rb_root *root, u64 bytenr)
{
struct rb_node *node = root->rb_node;
struct rb_simple_node *entry;
while (node) {
entry = rb_entry(node, struct rb_simple_node, rb_node);
if (bytenr < entry->bytenr)
node = node->rb_left;
else if (bytenr > entry->bytenr)
node = node->rb_right;
else
return node;
}
return NULL;
}
static inline struct rb_node *rb_simple_insert(struct rb_root *root, u64 bytenr,
struct rb_node *node)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct rb_simple_node *entry;
while (*p) {
parent = *p;
entry = rb_entry(parent, struct rb_simple_node, rb_node);
if (bytenr < entry->bytenr)
p = &(*p)->rb_left;
else if (bytenr > entry->bytenr)
p = &(*p)->rb_right;
else
return parent;
}
rb_link_node(node, parent, p);
rb_insert_color(node, root);
return NULL;
}
#endif

View file

@ -408,19 +408,14 @@ int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
struct btrfs_root *parent_root)
{
struct super_block *sb = root->fs_info->sb;
struct btrfs_key key;
struct inode *parent_inode, *child_inode;
int ret;
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
parent_inode = btrfs_iget(sb, &key, parent_root);
parent_inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, parent_root);
if (IS_ERR(parent_inode))
return PTR_ERR(parent_inode);
child_inode = btrfs_iget(sb, &key, root);
child_inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, root);
if (IS_ERR(child_inode)) {
iput(parent_inode);
return PTR_ERR(child_inode);

View file

@ -2622,6 +2622,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
struct btrfs_root *quota_root;
struct btrfs_qgroup *srcgroup;
struct btrfs_qgroup *dstgroup;
bool need_rescan = false;
u32 level_size = 0;
u64 nums;
@ -2765,6 +2766,13 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
goto unlock;
}
++i_qgroups;
/*
* If we're doing a snapshot, and adding the snapshot to a new
* qgroup, the numbers are guaranteed to be incorrect.
*/
if (srcid)
need_rescan = true;
}
for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) {
@ -2784,6 +2792,9 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
dst->rfer = src->rfer - level_size;
dst->rfer_cmpr = src->rfer_cmpr - level_size;
/* Manually tweaking numbers certainly needs a rescan */
need_rescan = true;
}
for (i = 0; i < inherit->num_excl_copies; ++i, i_qgroups += 2) {
struct btrfs_qgroup *src;
@ -2802,6 +2813,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
dst->excl = src->excl + level_size;
dst->excl_cmpr = src->excl_cmpr + level_size;
need_rescan = true;
}
unlock:
@ -2809,6 +2821,8 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
out:
if (!committing)
mutex_unlock(&fs_info->qgroup_ioctl_lock);
if (need_rescan)
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
return ret;
}

File diff suppressed because it is too large Load diff

View file

@ -210,7 +210,6 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
struct extent_buffer *leaf;
struct btrfs_path *path;
struct btrfs_key key;
struct btrfs_key root_key;
struct btrfs_root *root;
int err = 0;
int ret;
@ -223,10 +222,9 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
key.type = BTRFS_ORPHAN_ITEM_KEY;
key.offset = 0;
root_key.type = BTRFS_ROOT_ITEM_KEY;
root_key.offset = (u64)-1;
while (1) {
u64 root_objectid;
ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
if (ret < 0) {
err = ret;
@ -250,10 +248,10 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
key.type != BTRFS_ORPHAN_ITEM_KEY)
break;
root_key.objectid = key.offset;
root_objectid = key.offset;
key.offset++;
root = btrfs_get_fs_root(fs_info, &root_key, false);
root = btrfs_get_fs_root(fs_info, root_objectid, false);
err = PTR_ERR_OR_ZERO(root);
if (err && err != -ENOENT) {
break;
@ -270,7 +268,7 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
break;
}
err = btrfs_del_orphan_item(trans, tree_root,
root_key.objectid);
root_objectid);
btrfs_end_transaction(trans);
if (err) {
btrfs_handle_fs_error(fs_info, err,

View file

@ -647,13 +647,9 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
struct btrfs_fs_info *fs_info = swarn->dev->fs_info;
struct inode_fs_paths *ipath = NULL;
struct btrfs_root *local_root;
struct btrfs_key root_key;
struct btrfs_key key;
root_key.objectid = root;
root_key.type = BTRFS_ROOT_ITEM_KEY;
root_key.offset = (u64)-1;
local_root = btrfs_get_fs_root(fs_info, &root_key, true);
local_root = btrfs_get_fs_root(fs_info, root, true);
if (IS_ERR(local_root)) {
ret = PTR_ERR(local_root);
goto err;
@ -3046,7 +3042,8 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
struct map_lookup *map,
struct btrfs_device *scrub_dev,
int num, u64 base, u64 length)
int num, u64 base, u64 length,
struct btrfs_block_group *cache)
{
struct btrfs_path *path, *ppath;
struct btrfs_fs_info *fs_info = sctx->fs_info;
@ -3284,6 +3281,20 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
break;
}
/*
* If our block group was removed in the meanwhile, just
* stop scrubbing since there is no point in continuing.
* Continuing would prevent reusing its device extents
* for new block groups for a long time.
*/
spin_lock(&cache->lock);
if (cache->removed) {
spin_unlock(&cache->lock);
ret = 0;
goto out;
}
spin_unlock(&cache->lock);
extent = btrfs_item_ptr(l, slot,
struct btrfs_extent_item);
flags = btrfs_extent_flags(l, extent);
@ -3328,13 +3339,14 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
&extent_dev,
&extent_mirror_num);
ret = btrfs_lookup_csums_range(csum_root,
extent_logical,
extent_logical +
extent_len - 1,
&sctx->csum_list, 1);
if (ret)
goto out;
if (flags & BTRFS_EXTENT_FLAG_DATA) {
ret = btrfs_lookup_csums_range(csum_root,
extent_logical,
extent_logical + extent_len - 1,
&sctx->csum_list, 1);
if (ret)
goto out;
}
ret = scrub_extent(sctx, map, extent_logical, extent_len,
extent_physical, extent_dev, flags,
@ -3457,7 +3469,7 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
map->stripes[i].physical == dev_offset) {
ret = scrub_stripe(sctx, map, scrub_dev, i,
chunk_offset, length);
chunk_offset, length, cache);
if (ret)
goto out;
}
@ -3554,6 +3566,23 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
if (!cache)
goto skip;
/*
* Make sure that while we are scrubbing the corresponding block
* group doesn't get its logical address and its device extents
* reused for another block group, which can possibly be of a
* different type and different profile. We do this to prevent
* false error detections and crashes due to bogus attempts to
* repair extents.
*/
spin_lock(&cache->lock);
if (cache->removed) {
spin_unlock(&cache->lock);
btrfs_put_block_group(cache);
goto skip;
}
btrfs_freeze_block_group(cache);
spin_unlock(&cache->lock);
/*
* we need call btrfs_inc_block_group_ro() with scrubs_paused,
* to avoid deadlock caused by:
@ -3609,6 +3638,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
} else {
btrfs_warn(fs_info,
"failed setting block group ro: %d", ret);
btrfs_unfreeze_block_group(cache);
btrfs_put_block_group(cache);
scrub_pause_off(fs_info);
break;
@ -3695,6 +3725,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
spin_unlock(&cache->lock);
}
btrfs_unfreeze_block_group(cache);
btrfs_put_block_group(cache);
if (ret)
break;

View file

@ -23,6 +23,7 @@
#include "btrfs_inode.h"
#include "transaction.h"
#include "compression.h"
#include "xattr.h"
/*
* Maximum number of references an extent can have in order for us to attempt to
@ -4545,6 +4546,10 @@ static int __process_new_xattr(int num, struct btrfs_key *di_key,
struct fs_path *p;
struct posix_acl_xattr_header dummy_acl;
/* Capabilities are emitted by finish_inode_if_needed */
if (!strncmp(name, XATTR_NAME_CAPS, name_len))
return 0;
p = fs_path_alloc();
if (!p)
return -ENOMEM;
@ -4801,17 +4806,12 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
struct inode *inode;
struct page *page;
char *addr;
struct btrfs_key key;
pgoff_t index = offset >> PAGE_SHIFT;
pgoff_t last_index;
unsigned pg_offset = offset_in_page(offset);
ssize_t ret = 0;
key.objectid = sctx->cur_ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
inode = btrfs_iget(fs_info->sb, &key, root);
inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root);
if (IS_ERR(inode))
return PTR_ERR(inode);
@ -5107,6 +5107,64 @@ static int send_extent_data(struct send_ctx *sctx,
return 0;
}
/*
* Search for a capability xattr related to sctx->cur_ino. If the capability is
* found, call send_set_xattr function to emit it.
*
* Return 0 if there isn't a capability, or when the capability was emitted
* successfully, or < 0 if an error occurred.
*/
static int send_capabilities(struct send_ctx *sctx)
{
struct fs_path *fspath = NULL;
struct btrfs_path *path;
struct btrfs_dir_item *di;
struct extent_buffer *leaf;
unsigned long data_ptr;
char *buf = NULL;
int buf_len;
int ret = 0;
path = alloc_path_for_send();
if (!path)
return -ENOMEM;
di = btrfs_lookup_xattr(NULL, sctx->send_root, path, sctx->cur_ino,
XATTR_NAME_CAPS, strlen(XATTR_NAME_CAPS), 0);
if (!di) {
/* There is no xattr for this inode */
goto out;
} else if (IS_ERR(di)) {
ret = PTR_ERR(di);
goto out;
}
leaf = path->nodes[0];
buf_len = btrfs_dir_data_len(leaf, di);
fspath = fs_path_alloc();
buf = kmalloc(buf_len, GFP_KERNEL);
if (!fspath || !buf) {
ret = -ENOMEM;
goto out;
}
ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
if (ret < 0)
goto out;
data_ptr = (unsigned long)(di + 1) + btrfs_dir_name_len(leaf, di);
read_extent_buffer(leaf, buf, data_ptr, buf_len);
ret = send_set_xattr(sctx, fspath, XATTR_NAME_CAPS,
strlen(XATTR_NAME_CAPS), buf, buf_len);
out:
kfree(buf);
fs_path_free(fspath);
btrfs_free_path(path);
return ret;
}
static int clone_range(struct send_ctx *sctx,
struct clone_root *clone_root,
const u64 disk_byte,
@ -5972,6 +6030,10 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
goto out;
}
ret = send_capabilities(sctx);
if (ret < 0)
goto out;
/*
* If other directory inodes depended on our current directory
* inode's move/rename, now do their move/rename operations.
@ -7021,7 +7083,6 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root;
struct btrfs_fs_info *fs_info = send_root->fs_info;
struct btrfs_root *clone_root;
struct btrfs_key key;
struct send_ctx *sctx = NULL;
u32 i;
u64 *clone_sources_tmp = NULL;
@ -7143,11 +7204,8 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
}
for (i = 0; i < arg->clone_sources_count; i++) {
key.objectid = clone_sources_tmp[i];
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
clone_root = btrfs_get_fs_root(fs_info, &key, true);
clone_root = btrfs_get_fs_root(fs_info,
clone_sources_tmp[i], true);
if (IS_ERR(clone_root)) {
ret = PTR_ERR(clone_root);
goto out;
@ -7178,11 +7236,8 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
}
if (arg->parent_root) {
key.objectid = arg->parent_root;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
sctx->parent_root = btrfs_get_fs_root(fs_info, &key, true);
sctx->parent_root = btrfs_get_fs_root(fs_info, arg->parent_root,
true);
if (IS_ERR(sctx->parent_root)) {
ret = PTR_ERR(sctx->parent_root);
goto out;

View file

@ -626,6 +626,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
struct reserve_ticket *ticket = NULL;
struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
struct btrfs_block_rsv *trans_rsv = &fs_info->trans_block_rsv;
struct btrfs_trans_handle *trans;
u64 bytes_needed;
u64 reclaim_bytes = 0;
@ -688,6 +689,11 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
spin_lock(&delayed_refs_rsv->lock);
reclaim_bytes += delayed_refs_rsv->reserved;
spin_unlock(&delayed_refs_rsv->lock);
spin_lock(&trans_rsv->lock);
reclaim_bytes += trans_rsv->reserved;
spin_unlock(&trans_rsv->lock);
if (reclaim_bytes >= bytes_needed)
goto commit;
bytes_needed -= reclaim_bytes;
@ -856,6 +862,34 @@ static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
!test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
}
static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
struct reserve_ticket *ticket)
{
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
u64 min_bytes;
if (global_rsv->space_info != space_info)
return false;
spin_lock(&global_rsv->lock);
min_bytes = div_factor(global_rsv->size, 1);
if (global_rsv->reserved < min_bytes + ticket->bytes) {
spin_unlock(&global_rsv->lock);
return false;
}
global_rsv->reserved -= ticket->bytes;
ticket->bytes = 0;
list_del_init(&ticket->list);
wake_up(&ticket->wait);
space_info->tickets_id++;
if (global_rsv->reserved < global_rsv->size)
global_rsv->full = 0;
spin_unlock(&global_rsv->lock);
return true;
}
/*
* maybe_fail_all_tickets - we've exhausted our flushing, start failing tickets
* @fs_info - fs_info for this fs
@ -888,6 +922,10 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
ticket = list_first_entry(&space_info->tickets,
struct reserve_ticket, list);
if (ticket->steal &&
steal_from_global_rsv(fs_info, space_info, ticket))
return true;
/*
* may_commit_transaction will avoid committing the transaction
* if it doesn't feel like the space reclaimed by the commit
@ -1104,6 +1142,7 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
switch (flush) {
case BTRFS_RESERVE_FLUSH_ALL:
case BTRFS_RESERVE_FLUSH_ALL_STEAL:
wait_reserve_ticket(fs_info, space_info, ticket);
break;
case BTRFS_RESERVE_FLUSH_LIMIT:
@ -1125,11 +1164,17 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
ret = ticket->error;
if (ticket->bytes || ticket->error) {
/*
* Need to delete here for priority tickets. For regular tickets
* either the async reclaim job deletes the ticket from the list
* or we delete it ourselves at wait_reserve_ticket().
* We were a priority ticket, so we need to delete ourselves
* from the list. Because we could have other priority tickets
* behind us that require less space, run
* btrfs_try_granting_tickets() to see if their reservations can
* now be made.
*/
remove_ticket(space_info, ticket);
if (!list_empty(&ticket->list)) {
remove_ticket(space_info, ticket);
btrfs_try_granting_tickets(fs_info, space_info);
}
if (!ret)
ret = -ENOSPC;
}
@ -1145,6 +1190,16 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
return ret;
}
/*
* This returns true if this flush state will go through the ordinary flushing
* code.
*/
static inline bool is_normal_flushing(enum btrfs_reserve_flush_enum flush)
{
return (flush == BTRFS_RESERVE_FLUSH_ALL) ||
(flush == BTRFS_RESERVE_FLUSH_ALL_STEAL);
}
/**
* reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
* @root - the root we're allocating for
@ -1175,8 +1230,17 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
spin_lock(&space_info->lock);
ret = -ENOSPC;
used = btrfs_space_info_used(space_info, true);
pending_tickets = !list_empty(&space_info->tickets) ||
!list_empty(&space_info->priority_tickets);
/*
* We don't want NO_FLUSH allocations to jump everybody, they can
* generally handle ENOSPC in a different way, so treat them the same as
* normal flushers when it comes to skipping pending tickets.
*/
if (is_normal_flushing(flush) || (flush == BTRFS_RESERVE_NO_FLUSH))
pending_tickets = !list_empty(&space_info->tickets) ||
!list_empty(&space_info->priority_tickets);
else
pending_tickets = !list_empty(&space_info->priority_tickets);
/*
* Carry on if we have enough space (short-circuit) OR call
@ -1198,12 +1262,13 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
* the list and we will do our own flushing further down.
*/
if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
ASSERT(space_info->reclaim_size >= 0);
ticket.bytes = orig_bytes;
ticket.error = 0;
space_info->reclaim_size += ticket.bytes;
init_waitqueue_head(&ticket.wait);
if (flush == BTRFS_RESERVE_FLUSH_ALL) {
ticket.steal = (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL);
if (flush == BTRFS_RESERVE_FLUSH_ALL ||
flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) {
list_add_tail(&ticket.list, &space_info->tickets);
if (!space_info->flush) {
space_info->flush = 1;

View file

@ -78,6 +78,7 @@ struct btrfs_space_info {
struct reserve_ticket {
u64 bytes;
int error;
bool steal;
struct list_head list;
wait_queue_head_t wait;
};

View file

@ -17,151 +17,152 @@ static inline void put_unaligned_le8(u8 val, void *p)
*(u8 *)p = val;
}
static bool check_setget_bounds(const struct extent_buffer *eb,
const void *ptr, unsigned off, int size)
{
const unsigned long member_offset = (unsigned long)ptr + off;
if (member_offset > eb->len) {
btrfs_warn(eb->fs_info,
"bad eb member start: ptr 0x%lx start %llu member offset %lu size %d",
(unsigned long)ptr, eb->start, member_offset, size);
return false;
}
if (member_offset + size > eb->len) {
btrfs_warn(eb->fs_info,
"bad eb member end: ptr 0x%lx start %llu member offset %lu size %d",
(unsigned long)ptr, eb->start, member_offset, size);
return false;
}
return true;
}
/*
* this is some deeply nasty code.
* Macro templates that define helpers to read/write extent buffer data of a
* given size, that are also used via ctree.h for access to item members by
* specialized helpers.
*
* The end result is that anyone who #includes ctree.h gets a
* declaration for the btrfs_set_foo functions and btrfs_foo functions,
* which are wrappers of btrfs_set_token_#bits functions and
* btrfs_get_token_#bits functions, which are defined in this file.
* Generic helpers:
* - btrfs_set_8 (for 8/16/32/64)
* - btrfs_get_8 (for 8/16/32/64)
*
* These setget functions do all the extent_buffer related mapping
* required to efficiently read and write specific fields in the extent
* buffers. Every pointer to metadata items in btrfs is really just
* an unsigned long offset into the extent buffer which has been
* cast to a specific type. This gives us all the gcc type checking.
* Generic helpers with a token (cached address of the most recently accessed
* page):
* - btrfs_set_token_8 (for 8/16/32/64)
* - btrfs_get_token_8 (for 8/16/32/64)
*
* The extent buffer api is used to do the page spanning work required to
* have a metadata blocksize different from the page size.
* The set/get functions handle data spanning two pages transparently, in case
* metadata block size is larger than page. Every pointer to metadata items is
* an offset into the extent buffer page array, cast to a specific type. This
* gives us all the type checking.
*
* There are 2 variants defined, one with a token pointer and one without.
* The extent buffer pages stored in the array pages do not form a contiguous
* phyusical range, but the API functions assume the linear offset to the range
* from 0 to metadata node size.
*/
#define DEFINE_BTRFS_SETGET_BITS(bits) \
u##bits btrfs_get_token_##bits(const struct extent_buffer *eb, \
const void *ptr, unsigned long off, \
struct btrfs_map_token *token) \
u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \
const void *ptr, unsigned long off) \
{ \
unsigned long part_offset = (unsigned long)ptr; \
unsigned long offset = part_offset + off; \
void *p; \
int err; \
char *kaddr; \
unsigned long map_start; \
unsigned long map_len; \
int size = sizeof(u##bits); \
u##bits res; \
const unsigned long member_offset = (unsigned long)ptr + off; \
const unsigned long idx = member_offset >> PAGE_SHIFT; \
const unsigned long oip = offset_in_page(member_offset); \
const int size = sizeof(u##bits); \
u8 lebytes[sizeof(u##bits)]; \
const int part = PAGE_SIZE - oip; \
\
ASSERT(token); \
ASSERT(token->eb == eb); \
\
if (token->kaddr && token->offset <= offset && \
(token->offset + PAGE_SIZE >= offset + size)) { \
kaddr = token->kaddr; \
p = kaddr + part_offset - token->offset; \
res = get_unaligned_le##bits(p + off); \
return res; \
ASSERT(token->kaddr); \
ASSERT(check_setget_bounds(token->eb, ptr, off, size)); \
if (token->offset <= member_offset && \
member_offset + size <= token->offset + PAGE_SIZE) { \
return get_unaligned_le##bits(token->kaddr + oip); \
} \
err = map_private_extent_buffer(eb, offset, size, \
&kaddr, &map_start, &map_len); \
if (err) { \
__le##bits leres; \
token->kaddr = page_address(token->eb->pages[idx]); \
token->offset = idx << PAGE_SHIFT; \
if (oip + size <= PAGE_SIZE) \
return get_unaligned_le##bits(token->kaddr + oip); \
\
read_extent_buffer(eb, &leres, offset, size); \
return le##bits##_to_cpu(leres); \
} \
p = kaddr + part_offset - map_start; \
res = get_unaligned_le##bits(p + off); \
token->kaddr = kaddr; \
token->offset = map_start; \
return res; \
memcpy(lebytes, token->kaddr + oip, part); \
token->kaddr = page_address(token->eb->pages[idx + 1]); \
token->offset = (idx + 1) << PAGE_SHIFT; \
memcpy(lebytes + part, token->kaddr, size - part); \
return get_unaligned_le##bits(lebytes); \
} \
u##bits btrfs_get_##bits(const struct extent_buffer *eb, \
const void *ptr, unsigned long off) \
{ \
unsigned long part_offset = (unsigned long)ptr; \
unsigned long offset = part_offset + off; \
void *p; \
int err; \
char *kaddr; \
unsigned long map_start; \
unsigned long map_len; \
int size = sizeof(u##bits); \
u##bits res; \
const unsigned long member_offset = (unsigned long)ptr + off; \
const unsigned long oip = offset_in_page(member_offset); \
const unsigned long idx = member_offset >> PAGE_SHIFT; \
char *kaddr = page_address(eb->pages[idx]); \
const int size = sizeof(u##bits); \
const int part = PAGE_SIZE - oip; \
u8 lebytes[sizeof(u##bits)]; \
\
err = map_private_extent_buffer(eb, offset, size, \
&kaddr, &map_start, &map_len); \
if (err) { \
__le##bits leres; \
ASSERT(check_setget_bounds(eb, ptr, off, size)); \
if (oip + size <= PAGE_SIZE) \
return get_unaligned_le##bits(kaddr + oip); \
\
read_extent_buffer(eb, &leres, offset, size); \
return le##bits##_to_cpu(leres); \
} \
p = kaddr + part_offset - map_start; \
res = get_unaligned_le##bits(p + off); \
return res; \
memcpy(lebytes, kaddr + oip, part); \
kaddr = page_address(eb->pages[idx + 1]); \
memcpy(lebytes + part, kaddr, size - part); \
return get_unaligned_le##bits(lebytes); \
} \
void btrfs_set_token_##bits(struct extent_buffer *eb, \
void btrfs_set_token_##bits(struct btrfs_map_token *token, \
const void *ptr, unsigned long off, \
u##bits val, \
struct btrfs_map_token *token) \
u##bits val) \
{ \
unsigned long part_offset = (unsigned long)ptr; \
unsigned long offset = part_offset + off; \
void *p; \
int err; \
char *kaddr; \
unsigned long map_start; \
unsigned long map_len; \
int size = sizeof(u##bits); \
const unsigned long member_offset = (unsigned long)ptr + off; \
const unsigned long idx = member_offset >> PAGE_SHIFT; \
const unsigned long oip = offset_in_page(member_offset); \
const int size = sizeof(u##bits); \
u8 lebytes[sizeof(u##bits)]; \
const int part = PAGE_SIZE - oip; \
\
ASSERT(token); \
ASSERT(token->eb == eb); \
\
if (token->kaddr && token->offset <= offset && \
(token->offset + PAGE_SIZE >= offset + size)) { \
kaddr = token->kaddr; \
p = kaddr + part_offset - token->offset; \
put_unaligned_le##bits(val, p + off); \
ASSERT(token->kaddr); \
ASSERT(check_setget_bounds(token->eb, ptr, off, size)); \
if (token->offset <= member_offset && \
member_offset + size <= token->offset + PAGE_SIZE) { \
put_unaligned_le##bits(val, token->kaddr + oip); \
return; \
} \
err = map_private_extent_buffer(eb, offset, size, \
&kaddr, &map_start, &map_len); \
if (err) { \
__le##bits val2; \
\
val2 = cpu_to_le##bits(val); \
write_extent_buffer(eb, &val2, offset, size); \
token->kaddr = page_address(token->eb->pages[idx]); \
token->offset = idx << PAGE_SHIFT; \
if (oip + size <= PAGE_SIZE) { \
put_unaligned_le##bits(val, token->kaddr + oip); \
return; \
} \
p = kaddr + part_offset - map_start; \
put_unaligned_le##bits(val, p + off); \
token->kaddr = kaddr; \
token->offset = map_start; \
put_unaligned_le##bits(val, lebytes); \
memcpy(token->kaddr + oip, lebytes, part); \
token->kaddr = page_address(token->eb->pages[idx + 1]); \
token->offset = (idx + 1) << PAGE_SHIFT; \
memcpy(token->kaddr, lebytes + part, size - part); \
} \
void btrfs_set_##bits(struct extent_buffer *eb, void *ptr, \
void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr, \
unsigned long off, u##bits val) \
{ \
unsigned long part_offset = (unsigned long)ptr; \
unsigned long offset = part_offset + off; \
void *p; \
int err; \
char *kaddr; \
unsigned long map_start; \
unsigned long map_len; \
int size = sizeof(u##bits); \
const unsigned long member_offset = (unsigned long)ptr + off; \
const unsigned long oip = offset_in_page(member_offset); \
const unsigned long idx = member_offset >> PAGE_SHIFT; \
char *kaddr = page_address(eb->pages[idx]); \
const int size = sizeof(u##bits); \
const int part = PAGE_SIZE - oip; \
u8 lebytes[sizeof(u##bits)]; \
\
err = map_private_extent_buffer(eb, offset, size, \
&kaddr, &map_start, &map_len); \
if (err) { \
__le##bits val2; \
\
val2 = cpu_to_le##bits(val); \
write_extent_buffer(eb, &val2, offset, size); \
ASSERT(check_setget_bounds(eb, ptr, off, size)); \
if (oip + size <= PAGE_SIZE) { \
put_unaligned_le##bits(val, kaddr + oip); \
return; \
} \
p = kaddr + part_offset - map_start; \
put_unaligned_le##bits(val, p + off); \
\
put_unaligned_le##bits(val, lebytes); \
memcpy(kaddr + oip, lebytes, part); \
kaddr = page_address(eb->pages[idx + 1]); \
memcpy(kaddr, lebytes + part, size - part); \
}
DEFINE_BTRFS_SETGET_BITS(8)

View file

@ -72,23 +72,32 @@ const char * __attribute_const__ btrfs_decode_error(int errno)
char *errstr = "unknown";
switch (errno) {
case -EIO:
case -ENOENT: /* -2 */
errstr = "No such entry";
break;
case -EIO: /* -5 */
errstr = "IO failure";
break;
case -ENOMEM:
case -ENOMEM: /* -12*/
errstr = "Out of memory";
break;
case -EROFS:
errstr = "Readonly filesystem";
break;
case -EEXIST:
case -EEXIST: /* -17 */
errstr = "Object already exists";
break;
case -ENOSPC:
case -ENOSPC: /* -28 */
errstr = "No space left";
break;
case -ENOENT:
errstr = "No such entry";
case -EROFS: /* -30 */
errstr = "Readonly filesystem";
break;
case -EOPNOTSUPP: /* -95 */
errstr = "Operation not supported";
break;
case -EUCLEAN: /* -117 */
errstr = "Filesystem corrupted";
break;
case -EDQUOT: /* -122 */
errstr = "Quota exceeded";
break;
}
@ -1093,10 +1102,7 @@ char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref);
btrfs_release_path(path);
key.objectid = subvol_objectid;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
fs_root = btrfs_get_fs_root(fs_info, &key, true);
fs_root = btrfs_get_fs_root(fs_info, subvol_objectid, true);
if (IS_ERR(fs_root)) {
ret = PTR_ERR(fs_root);
fs_root = NULL;
@ -1211,7 +1217,6 @@ static int btrfs_fill_super(struct super_block *sb,
{
struct inode *inode;
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_key key;
int err;
sb->s_maxbytes = MAX_LFS_FILESIZE;
@ -1239,10 +1244,7 @@ static int btrfs_fill_super(struct super_block *sb,
return err;
}
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
inode = btrfs_iget(sb, &key, fs_info->fs_root);
inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto fail_close;

View file

@ -21,6 +21,7 @@
#include "dev-replace.h"
#include "qgroup.h"
#include "block-group.h"
#include "space-info.h"
#define BTRFS_ROOT_TRANS_TAG 0
@ -141,7 +142,7 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
struct btrfs_block_group,
bg_list);
list_del_init(&cache->bg_list);
btrfs_put_block_group_trimming(cache);
btrfs_unfreeze_block_group(cache);
btrfs_put_block_group(cache);
}
WARN_ON(!list_empty(&transaction->dev_update_list));
@ -348,10 +349,10 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info,
}
/*
* this does all the record keeping required to make sure that a reference
* counted root is properly recorded in a given transaction. This is required
* to make sure the old root from before we joined the transaction is deleted
* when the transaction commits
* This does all the record keeping required to make sure that a shareable root
* is properly recorded in a given transaction. This is required to make sure
* the old root from before we joined the transaction is deleted when the
* transaction commits.
*/
static int record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@ -359,7 +360,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *fs_info = root->fs_info;
if ((test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
if ((test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
root->last_trans < trans->transid) || force) {
WARN_ON(root == fs_info->extent_root);
WARN_ON(!force && root->commit_root != root->node);
@ -438,7 +439,7 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *fs_info = root->fs_info;
if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
return 0;
/*
@ -503,7 +504,7 @@ static inline bool need_reserve_reloc_root(struct btrfs_root *root)
struct btrfs_fs_info *fs_info = root->fs_info;
if (!fs_info->reloc_ctl ||
!test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
!test_bit(BTRFS_ROOT_SHAREABLE, &root->state) ||
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
root->reloc_root)
return false;
@ -523,6 +524,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
u64 num_bytes = 0;
u64 qgroup_reserved = 0;
bool reloc_reserved = false;
bool do_chunk_alloc = false;
int ret;
/* Send isn't supposed to start transactions. */
@ -563,7 +565,8 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
* refill that amount for whatever is missing in the reserve.
*/
num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items);
if (delayed_refs_rsv->full == 0) {
if (flush == BTRFS_RESERVE_FLUSH_ALL &&
delayed_refs_rsv->full == 0) {
delayed_refs_bytes = num_bytes;
num_bytes <<= 1;
}
@ -584,6 +587,9 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
delayed_refs_bytes);
num_bytes -= delayed_refs_bytes;
}
if (rsv->space_info->force_alloc)
do_chunk_alloc = true;
} else if (num_items == 0 && flush == BTRFS_RESERVE_FLUSH_ALL &&
!delayed_refs_rsv->full) {
/*
@ -665,6 +671,19 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
if (!current->journal_info)
current->journal_info = h;
/*
* If the space_info is marked ALLOC_FORCE then we'll get upgraded to
* ALLOC_FORCE the first run through, and then we won't allocate for
* anybody else who races in later. We don't care about the return
* value here.
*/
if (do_chunk_alloc && num_bytes) {
u64 flags = h->block_rsv->space_info->flags;
btrfs_chunk_alloc(h, btrfs_get_alloc_profile(fs_info, flags),
CHUNK_ALLOC_NO_FORCE);
}
/*
* btrfs_record_root_in_trans() needs to alloc new extents, and may
* call btrfs_join_transaction() while we're also starting a
@ -699,43 +718,10 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
struct btrfs_root *root,
unsigned int num_items,
int min_factor)
unsigned int num_items)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans;
u64 num_bytes;
int ret;
/*
* We have two callers: unlink and block group removal. The
* former should succeed even if we will temporarily exceed
* quota and the latter operates on the extent root so
* qgroup enforcement is ignored anyway.
*/
trans = start_transaction(root, num_items, TRANS_START,
BTRFS_RESERVE_FLUSH_ALL, false);
if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
return trans;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans))
return trans;
num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items);
ret = btrfs_cond_migrate_bytes(fs_info, &fs_info->trans_block_rsv,
num_bytes, min_factor);
if (ret) {
btrfs_end_transaction(trans);
return ERR_PTR(ret);
}
trans->block_rsv = &fs_info->trans_block_rsv;
trans->bytes_reserved = num_bytes;
trace_btrfs_space_reservation(fs_info, "transaction",
trans->transid, num_bytes, 1);
return trans;
return start_transaction(root, num_items, TRANS_START,
BTRFS_RESERVE_FLUSH_ALL_STEAL, false);
}
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
@ -1644,7 +1630,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
}
key.offset = (u64)-1;
pending->snap = btrfs_get_fs_root(fs_info, &key, true);
pending->snap = btrfs_get_fs_root(fs_info, objectid, true);
if (IS_ERR(pending->snap)) {
ret = PTR_ERR(pending->snap);
btrfs_abort_transaction(trans, ret);

View file

@ -193,8 +193,7 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
unsigned int num_items);
struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
struct btrfs_root *root,
unsigned int num_items,
int min_factor);
unsigned int num_items);
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_join_transaction_spacecache(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root);

View file

@ -957,10 +957,6 @@ static int check_dev_item(struct extent_buffer *leaf,
return 0;
}
/* Inode item error output has the same format as dir_item_err() */
#define inode_item_err(eb, slot, fmt, ...) \
dir_item_err(eb, slot, fmt, __VA_ARGS__)
static int check_inode_item(struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{

View file

@ -35,7 +35,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
goto out;
}
if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
goto out;
path = btrfs_alloc_path();

View file

@ -505,13 +505,8 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
*/
if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
S_ISREG(btrfs_inode_mode(dst_eb, dst_item)) &&
ino_size != 0) {
struct btrfs_map_token token;
btrfs_init_map_token(&token, dst_eb);
btrfs_set_token_inode_size(dst_eb, dst_item,
ino_size, &token);
}
ino_size != 0)
btrfs_set_inode_size(dst_eb, dst_item, ino_size);
goto no_copy;
}
@ -555,13 +550,9 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
static noinline struct inode *read_one_inode(struct btrfs_root *root,
u64 objectid)
{
struct btrfs_key key;
struct inode *inode;
key.objectid = objectid;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
inode = btrfs_iget(root->fs_info->sb, &key, root);
inode = btrfs_iget(root->fs_info->sb, objectid, root);
if (IS_ERR(inode))
inode = NULL;
return inode;
@ -3299,6 +3290,7 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
clear_extent_bits(&log->dirty_log_pages, 0, (u64)-1,
EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);
extent_io_tree_release(&log->log_csum_range);
btrfs_put_root(log);
}
@ -3816,8 +3808,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
found_key.offset = 0;
found_key.type = 0;
ret = btrfs_bin_search(path->nodes[0], &found_key, 0,
&start_slot);
ret = btrfs_bin_search(path->nodes[0], &found_key, &start_slot);
if (ret < 0)
break;
@ -3853,44 +3844,41 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
* just to say 'this inode exists' and a logging
* to say 'update this inode with these values'
*/
btrfs_set_token_inode_generation(leaf, item, 0, &token);
btrfs_set_token_inode_size(leaf, item, logged_isize, &token);
btrfs_set_token_inode_generation(&token, item, 0);
btrfs_set_token_inode_size(&token, item, logged_isize);
} else {
btrfs_set_token_inode_generation(leaf, item,
BTRFS_I(inode)->generation,
&token);
btrfs_set_token_inode_size(leaf, item, inode->i_size, &token);
btrfs_set_token_inode_generation(&token, item,
BTRFS_I(inode)->generation);
btrfs_set_token_inode_size(&token, item, inode->i_size);
}
btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
btrfs_set_token_inode_uid(&token, item, i_uid_read(inode));
btrfs_set_token_inode_gid(&token, item, i_gid_read(inode));
btrfs_set_token_inode_mode(&token, item, inode->i_mode);
btrfs_set_token_inode_nlink(&token, item, inode->i_nlink);
btrfs_set_token_timespec_sec(leaf, &item->atime,
inode->i_atime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, &item->atime,
inode->i_atime.tv_nsec, &token);
btrfs_set_token_timespec_sec(&token, &item->atime,
inode->i_atime.tv_sec);
btrfs_set_token_timespec_nsec(&token, &item->atime,
inode->i_atime.tv_nsec);
btrfs_set_token_timespec_sec(leaf, &item->mtime,
inode->i_mtime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, &item->mtime,
inode->i_mtime.tv_nsec, &token);
btrfs_set_token_timespec_sec(&token, &item->mtime,
inode->i_mtime.tv_sec);
btrfs_set_token_timespec_nsec(&token, &item->mtime,
inode->i_mtime.tv_nsec);
btrfs_set_token_timespec_sec(leaf, &item->ctime,
inode->i_ctime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, &item->ctime,
inode->i_ctime.tv_nsec, &token);
btrfs_set_token_timespec_sec(&token, &item->ctime,
inode->i_ctime.tv_sec);
btrfs_set_token_timespec_nsec(&token, &item->ctime,
inode->i_ctime.tv_nsec);
btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
&token);
btrfs_set_token_inode_nbytes(&token, item, inode_get_bytes(inode));
btrfs_set_token_inode_sequence(leaf, item,
inode_peek_iversion(inode), &token);
btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
btrfs_set_token_inode_block_group(leaf, item, 0, &token);
btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode));
btrfs_set_token_inode_transid(&token, item, trans->transid);
btrfs_set_token_inode_rdev(&token, item, inode->i_rdev);
btrfs_set_token_inode_flags(&token, item, BTRFS_I(inode)->flags);
btrfs_set_token_inode_block_group(&token, item, 0);
}
static int log_inode_item(struct btrfs_trans_handle *trans,
@ -3916,8 +3904,20 @@ static int log_csums(struct btrfs_trans_handle *trans,
struct btrfs_root *log_root,
struct btrfs_ordered_sum *sums)
{
const u64 lock_end = sums->bytenr + sums->len - 1;
struct extent_state *cached_state = NULL;
int ret;
/*
* Serialize logging for checksums. This is to avoid racing with the
* same checksum being logged by another task that is logging another
* file which happens to refer to the same extent as well. Such races
* can leave checksum items in the log with overlapping ranges.
*/
ret = lock_extent_bits(&log_root->log_csum_range, sums->bytenr,
lock_end, &cached_state);
if (ret)
return ret;
/*
* Due to extent cloning, we might have logged a csum item that covers a
* subrange of a cloned extent, and later we can end up logging a csum
@ -3928,10 +3928,13 @@ static int log_csums(struct btrfs_trans_handle *trans,
* trim and adjust) any existing csum items in the log for this range.
*/
ret = btrfs_del_csums(trans, log_root, sums->bytenr, sums->len);
if (ret)
return ret;
if (!ret)
ret = btrfs_csum_file_blocks(trans, log_root, sums);
return btrfs_csum_file_blocks(trans, log_root, sums);
unlock_extent_cached(&log_root->log_csum_range, sums->bytenr, lock_end,
&cached_state);
return ret;
}
static noinline int copy_items(struct btrfs_trans_handle *trans,
@ -4164,43 +4167,35 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
btrfs_set_token_file_extent_generation(leaf, fi, trans->transid,
&token);
btrfs_set_token_file_extent_generation(&token, fi, trans->transid);
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
btrfs_set_token_file_extent_type(leaf, fi,
BTRFS_FILE_EXTENT_PREALLOC,
&token);
btrfs_set_token_file_extent_type(&token, fi,
BTRFS_FILE_EXTENT_PREALLOC);
else
btrfs_set_token_file_extent_type(leaf, fi,
BTRFS_FILE_EXTENT_REG,
&token);
btrfs_set_token_file_extent_type(&token, fi,
BTRFS_FILE_EXTENT_REG);
block_len = max(em->block_len, em->orig_block_len);
if (em->compress_type != BTRFS_COMPRESS_NONE) {
btrfs_set_token_file_extent_disk_bytenr(leaf, fi,
em->block_start,
&token);
btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len,
&token);
btrfs_set_token_file_extent_disk_bytenr(&token, fi,
em->block_start);
btrfs_set_token_file_extent_disk_num_bytes(&token, fi, block_len);
} else if (em->block_start < EXTENT_MAP_LAST_BYTE) {
btrfs_set_token_file_extent_disk_bytenr(leaf, fi,
btrfs_set_token_file_extent_disk_bytenr(&token, fi,
em->block_start -
extent_offset, &token);
btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len,
&token);
extent_offset);
btrfs_set_token_file_extent_disk_num_bytes(&token, fi, block_len);
} else {
btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 0, &token);
btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, 0,
&token);
btrfs_set_token_file_extent_disk_bytenr(&token, fi, 0);
btrfs_set_token_file_extent_disk_num_bytes(&token, fi, 0);
}
btrfs_set_token_file_extent_offset(leaf, fi, extent_offset, &token);
btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token);
btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->ram_bytes, &token);
btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type,
&token);
btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token);
btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token);
btrfs_set_token_file_extent_offset(&token, fi, extent_offset);
btrfs_set_token_file_extent_num_bytes(&token, fi, em->len);
btrfs_set_token_file_extent_ram_bytes(&token, fi, em->ram_bytes);
btrfs_set_token_file_extent_compression(&token, fi, em->compress_type);
btrfs_set_token_file_extent_encryption(&token, fi, 0);
btrfs_set_token_file_extent_other_encoding(&token, fi, 0);
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
@ -4336,12 +4331,9 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
}
}
}
if (ins_nr > 0) {
if (ins_nr > 0)
ret = copy_items(trans, inode, dst_path, path,
start_slot, ins_nr, 1, 0);
if (ret > 0)
ret = 0;
}
out:
btrfs_release_path(path);
btrfs_free_path(dst_path);
@ -4835,10 +4827,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
key.objectid = ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
inode = btrfs_iget(fs_info->sb, &key, root);
inode = btrfs_iget(fs_info->sb, ino, root);
/*
* If the other inode that had a conflicting dir entry was
* deleted in the current transaction, we need to log its parent
@ -4847,8 +4836,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
if (ret == -ENOENT) {
key.objectid = parent;
inode = btrfs_iget(fs_info->sb, &key, root);
inode = btrfs_iget(fs_info->sb, parent, root);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
} else {
@ -5587,7 +5575,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
continue;
btrfs_release_path(path);
di_inode = btrfs_iget(fs_info->sb, &di_key, root);
di_inode = btrfs_iget(fs_info->sb, di_key.objectid, root);
if (IS_ERR(di_inode)) {
ret = PTR_ERR(di_inode);
goto next_dir_inode;
@ -5713,7 +5701,8 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
cur_offset = item_size;
}
dir_inode = btrfs_iget(fs_info->sb, &inode_key, root);
dir_inode = btrfs_iget(fs_info->sb, inode_key.objectid,
root);
/*
* If the parent inode was deleted, return an error to
* fallback to a transaction commit. This is to prevent
@ -5780,14 +5769,17 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
int slot = path->slots[0];
struct btrfs_key search_key;
struct inode *inode;
u64 ino;
int ret = 0;
btrfs_release_path(path);
ino = found_key.offset;
search_key.objectid = found_key.offset;
search_key.type = BTRFS_INODE_ITEM_KEY;
search_key.offset = 0;
inode = btrfs_iget(fs_info->sb, &search_key, root);
inode = btrfs_iget(fs_info->sb, ino, root);
if (IS_ERR(inode))
return PTR_ERR(inode);
@ -6132,7 +6124,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
struct btrfs_trans_handle *trans;
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_key tmp_key;
struct btrfs_root *log;
struct btrfs_fs_info *fs_info = log_root_tree->fs_info;
struct walk_control wc = {
@ -6194,11 +6185,8 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
goto error;
}
tmp_key.objectid = found_key.offset;
tmp_key.type = BTRFS_ROOT_ITEM_KEY;
tmp_key.offset = (u64)-1;
wc.replay_dest = btrfs_get_fs_root(fs_info, &tmp_key, true);
wc.replay_dest = btrfs_get_fs_root(fs_info, found_key.offset,
true);
if (IS_ERR(wc.replay_dest)) {
ret = PTR_ERR(wc.replay_dest);

View file

@ -257,7 +257,6 @@ static int btrfs_uuid_iter_rem(struct btrfs_root *uuid_root, u8 *uuid, u8 type,
static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info,
u8 *uuid, u8 type, u64 subvolid)
{
struct btrfs_key key;
int ret = 0;
struct btrfs_root *subvol_root;
@ -265,10 +264,7 @@ static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info,
type != BTRFS_UUID_KEY_RECEIVED_SUBVOL)
goto out;
key.objectid = subvolid;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
subvol_root = btrfs_get_fs_root(fs_info, &key, true);
subvol_root = btrfs_get_fs_root(fs_info, subvolid, true);
if (IS_ERR(subvol_root)) {
ret = PTR_ERR(subvol_root);
if (ret == -ENOENT)

View file

@ -280,10 +280,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
* ============
*
* uuid_mutex
* volume_mutex
* device_list_mutex
* chunk_mutex
* balance_mutex
* device_list_mutex
* chunk_mutex
* balance_mutex
*
*
* Exclusive operations, BTRFS_FS_EXCL_OP
@ -1042,6 +1041,8 @@ void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step)
&device->dev_state)) {
if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
&device->dev_state) &&
!test_bit(BTRFS_DEV_STATE_MISSING,
&device->dev_state) &&
(!latest_dev ||
device->generation > latest_dev->generation)) {
latest_dev = device;
@ -1185,7 +1186,6 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
{
struct btrfs_device *device;
struct btrfs_device *latest_dev = NULL;
int ret = 0;
flags |= FMODE_EXCL;
@ -1198,16 +1198,15 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
device->generation > latest_dev->generation)
latest_dev = device;
}
if (fs_devices->open_devices == 0) {
ret = -EINVAL;
goto out;
}
if (fs_devices->open_devices == 0)
return -EINVAL;
fs_devices->opened = 1;
fs_devices->latest_bdev = latest_dev->bdev;
fs_devices->total_rw_bytes = 0;
fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_REGULAR;
out:
return ret;
return 0;
}
static int devid_cmp(void *priv, struct list_head *a, struct list_head *b)
@ -1251,49 +1250,48 @@ void btrfs_release_disk_super(struct btrfs_super_block *super)
put_page(page);
}
static int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr,
struct page **page,
struct btrfs_super_block **disk_super)
static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
u64 bytenr)
{
struct btrfs_super_block *disk_super;
struct page *page;
void *p;
pgoff_t index;
/* make sure our super fits in the device */
if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode))
return 1;
return ERR_PTR(-EINVAL);
/* make sure our super fits in the page */
if (sizeof(**disk_super) > PAGE_SIZE)
return 1;
if (sizeof(*disk_super) > PAGE_SIZE)
return ERR_PTR(-EINVAL);
/* make sure our super doesn't straddle pages on disk */
index = bytenr >> PAGE_SHIFT;
if ((bytenr + sizeof(**disk_super) - 1) >> PAGE_SHIFT != index)
return 1;
if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_SHIFT != index)
return ERR_PTR(-EINVAL);
/* pull in the page with our super */
*page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
index, GFP_KERNEL);
page = read_cache_page_gfp(bdev->bd_inode->i_mapping, index, GFP_KERNEL);
if (IS_ERR(*page))
return 1;
if (IS_ERR(page))
return ERR_CAST(page);
p = page_address(*page);
p = page_address(page);
/* align our pointer to the offset of the super block */
*disk_super = p + offset_in_page(bytenr);
disk_super = p + offset_in_page(bytenr);
if (btrfs_super_bytenr(*disk_super) != bytenr ||
btrfs_super_magic(*disk_super) != BTRFS_MAGIC) {
if (btrfs_super_bytenr(disk_super) != bytenr ||
btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
btrfs_release_disk_super(p);
return 1;
return ERR_PTR(-EINVAL);
}
if ((*disk_super)->label[0] &&
(*disk_super)->label[BTRFS_LABEL_SIZE - 1])
(*disk_super)->label[BTRFS_LABEL_SIZE - 1] = '\0';
if (disk_super->label[0] && disk_super->label[BTRFS_LABEL_SIZE - 1])
disk_super->label[BTRFS_LABEL_SIZE - 1] = 0;
return 0;
return disk_super;
}
int btrfs_forget_devices(const char *path)
@ -1319,7 +1317,6 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
bool new_device_added = false;
struct btrfs_device *device = NULL;
struct block_device *bdev;
struct page *page;
u64 bytenr;
lockdep_assert_held(&uuid_mutex);
@ -1337,8 +1334,9 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
if (IS_ERR(bdev))
return ERR_CAST(bdev);
if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) {
device = ERR_PTR(-EINVAL);
disk_super = btrfs_read_disk_super(bdev, bytenr);
if (IS_ERR(disk_super)) {
device = ERR_CAST(disk_super);
goto error_bdev_put;
}
@ -2663,8 +2661,18 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
ret = btrfs_commit_transaction(trans);
}
/* Update ctime/mtime for libblkid */
/*
* Now that we have written a new super block to this device, check all
* other fs_devices list if device_path alienates any other scanned
* device.
* We can ignore the return value as it typically returns -EINVAL and
* only succeeds if the device was an alien.
*/
btrfs_forget_devices(device_path);
/* Update ctime/mtime for blkid or udev */
update_dev_time(device_path);
return ret;
error_sysfs:

View file

@ -386,25 +386,6 @@ static void dio_bio_end_io(struct bio *bio)
spin_unlock_irqrestore(&dio->bio_lock, flags);
}
/**
* dio_end_io - handle the end io action for the given bio
* @bio: The direct io bio thats being completed
*
* This is meant to be called by any filesystem that uses their own dio_submit_t
* so that the DIO specific endio actions are dealt with after the filesystem
* has done it's completion work.
*/
void dio_end_io(struct bio *bio)
{
struct dio *dio = bio->bi_private;
if (dio->is_async)
dio_bio_end_aio(bio);
else
dio_bio_end_io(bio);
}
EXPORT_SYMBOL_GPL(dio_end_io);
static inline void
dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
struct block_device *bdev,

View file

@ -59,7 +59,7 @@ int iomap_dio_iopoll(struct kiocb *kiocb, bool spin)
EXPORT_SYMBOL_GPL(iomap_dio_iopoll);
static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
struct bio *bio)
struct bio *bio, loff_t pos)
{
atomic_inc(&dio->ref);
@ -67,7 +67,12 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
bio_set_polled(bio, dio->iocb);
dio->submit.last_queue = bdev_get_queue(iomap->bdev);
dio->submit.cookie = submit_bio(bio);
if (dio->dops && dio->dops->submit_io)
dio->submit.cookie = dio->dops->submit_io(
file_inode(dio->iocb->ki_filp),
iomap, bio, pos);
else
dio->submit.cookie = submit_bio(bio);
}
static ssize_t iomap_dio_complete(struct iomap_dio *dio)
@ -191,7 +196,7 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
get_page(page);
__bio_add_page(bio, page, len, 0);
bio_set_op_attrs(bio, REQ_OP_WRITE, flags);
iomap_dio_submit_bio(dio, iomap, bio);
iomap_dio_submit_bio(dio, iomap, bio, pos);
}
static loff_t
@ -299,11 +304,11 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
}
dio->size += n;
pos += n;
copied += n;
nr_pages = iov_iter_npages(dio->submit.iter, BIO_MAX_PAGES);
iomap_dio_submit_bio(dio, iomap, bio);
iomap_dio_submit_bio(dio, iomap, bio, pos);
pos += n;
} while (nr_pages);
/*
@ -411,8 +416,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
struct blk_plug plug;
struct iomap_dio *dio;
lockdep_assert_held(&inode->i_rwsem);
if (!count)
return 0;

View file

@ -169,6 +169,14 @@ static inline void bio_advance_iter(const struct bio *bio,
#define bio_for_each_bvec(bvl, bio, iter) \
__bio_for_each_bvec(bvl, bio, iter, (bio)->bi_iter)
/*
* Iterate over all multi-page bvecs. Drivers shouldn't use this version for the
* same reasons as bio_for_each_segment_all().
*/
#define bio_for_each_bvec_all(bvl, bio, i) \
for (i = 0, bvl = bio_first_bvec_all(bio); \
i < (bio)->bi_vcnt; i++, bvl++) \
#define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len)
static inline unsigned bio_segments(struct bio *bio)

View file

@ -3148,6 +3148,8 @@ extern int generic_file_rw_checks(struct file *file_in, struct file *file_out);
extern int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t *count, unsigned int flags);
extern ssize_t generic_file_buffered_read(struct kiocb *iocb,
struct iov_iter *to, ssize_t already_read);
extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
@ -3209,8 +3211,6 @@ enum {
DIO_SKIP_HOLES = 0x02,
};
void dio_end_io(struct bio *bio);
ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
struct block_device *bdev, struct iov_iter *iter,
get_block_t get_block,

View file

@ -251,6 +251,8 @@ int iomap_writepages(struct address_space *mapping,
struct iomap_dio_ops {
int (*end_io)(struct kiocb *iocb, ssize_t size, int error,
unsigned flags);
blk_qc_t (*submit_io)(struct inode *inode, struct iomap *iomap,
struct bio *bio, loff_t file_offset);
};
ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,

View file

@ -89,6 +89,7 @@ TRACE_DEFINE_ENUM(COMMIT_TRANS);
{ IO_TREE_TRANS_DIRTY_PAGES, "TRANS_DIRTY_PAGES" }, \
{ IO_TREE_ROOT_DIRTY_LOG_PAGES, "ROOT_DIRTY_LOG_PAGES" }, \
{ IO_TREE_INODE_FILE_EXTENT, "INODE_FILE_EXTENT" }, \
{ IO_TREE_LOG_CSUM_RANGE, "LOG_CSUM_RANGE" }, \
{ IO_TREE_SELFTEST, "SELFTEST" })
#define BTRFS_GROUP_FLAGS \

View file

@ -519,15 +519,6 @@ struct btrfs_extent_inline_ref {
__le64 offset;
} __attribute__ ((__packed__));
/* old style backrefs item */
struct btrfs_extent_ref_v0 {
__le64 root;
__le64 generation;
__le64 objectid;
__le32 count;
} __attribute__ ((__packed__));
/* dev extents record free space on individual devices. The owner
* field points back to the chunk allocation mapping tree that allocated
* the extent. The chunk tree uuid field is a way to double check the owner

View file

@ -1991,7 +1991,7 @@ static void shrink_readahead_size_eio(struct file_ra_state *ra)
* * total number of bytes copied, including those the were already @written
* * negative error code if nothing was copied
*/
static ssize_t generic_file_buffered_read(struct kiocb *iocb,
ssize_t generic_file_buffered_read(struct kiocb *iocb,
struct iov_iter *iter, ssize_t written)
{
struct file *filp = iocb->ki_filp;
@ -2243,6 +2243,7 @@ static ssize_t generic_file_buffered_read(struct kiocb *iocb,
file_accessed(filp);
return written ? written : error;
}
EXPORT_SYMBOL_GPL(generic_file_buffered_read);
/**
* generic_file_read_iter - generic filesystem read routine