2017-03-17 06:18:50 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
#include "bcachefs.h"
|
|
|
|
#include "checksum.h"
|
|
|
|
#include "compress.h"
|
|
|
|
#include "extents.h"
|
|
|
|
#include "super-io.h"
|
|
|
|
|
|
|
|
#include <linux/lz4.h>
|
|
|
|
#include <linux/zlib.h>
|
|
|
|
#include <linux/zstd.h>
|
|
|
|
|
|
|
|
/* Bounce buffer: */
|
|
|
|
struct bbuf {
|
|
|
|
void *b;
|
|
|
|
enum {
|
|
|
|
BB_NONE,
|
|
|
|
BB_VMAP,
|
|
|
|
BB_KMALLOC,
|
|
|
|
BB_MEMPOOL,
|
|
|
|
} type;
|
|
|
|
int rw;
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw)
|
|
|
|
{
|
|
|
|
void *b;
|
|
|
|
|
2021-12-14 19:34:03 +00:00
|
|
|
BUG_ON(size > c->opts.encoded_extent_max);
|
2017-03-17 06:18:50 +00:00
|
|
|
|
2023-05-28 22:02:38 +00:00
|
|
|
b = kmalloc(size, GFP_NOFS|__GFP_NOWARN);
|
2017-03-17 06:18:50 +00:00
|
|
|
if (b)
|
|
|
|
return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw };
|
|
|
|
|
2023-05-28 22:02:38 +00:00
|
|
|
b = mempool_alloc(&c->compression_bounce[rw], GFP_NOFS);
|
2017-03-17 06:18:50 +00:00
|
|
|
if (b)
|
|
|
|
return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
|
|
|
|
|
|
|
|
BUG();
|
|
|
|
}
|
|
|
|
|
2020-05-06 19:37:04 +00:00
|
|
|
static bool bio_phys_contig(struct bio *bio, struct bvec_iter start)
|
|
|
|
{
|
|
|
|
struct bio_vec bv;
|
|
|
|
struct bvec_iter iter;
|
|
|
|
void *expected_start = NULL;
|
|
|
|
|
|
|
|
__bio_for_each_bvec(bv, bio, iter, start) {
|
|
|
|
if (expected_start &&
|
|
|
|
expected_start != page_address(bv.bv_page) + bv.bv_offset)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
expected_start = page_address(bv.bv_page) +
|
|
|
|
bv.bv_offset + bv.bv_len;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-03-17 06:18:50 +00:00
|
|
|
static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
|
|
|
|
struct bvec_iter start, int rw)
|
|
|
|
{
|
|
|
|
struct bbuf ret;
|
|
|
|
struct bio_vec bv;
|
|
|
|
struct bvec_iter iter;
|
2020-07-20 17:00:15 +00:00
|
|
|
unsigned nr_pages = 0;
|
2017-03-17 06:18:50 +00:00
|
|
|
struct page *stack_pages[16];
|
|
|
|
struct page **pages = NULL;
|
|
|
|
void *data;
|
|
|
|
|
2021-12-14 19:34:03 +00:00
|
|
|
BUG_ON(start.bi_size > c->opts.encoded_extent_max);
|
2017-03-17 06:18:50 +00:00
|
|
|
|
2020-10-30 21:29:38 +00:00
|
|
|
if (!PageHighMem(bio_iter_page(bio, start)) &&
|
2020-05-06 19:37:04 +00:00
|
|
|
bio_phys_contig(bio, start))
|
|
|
|
return (struct bbuf) {
|
|
|
|
.b = page_address(bio_iter_page(bio, start)) +
|
|
|
|
bio_iter_offset(bio, start),
|
|
|
|
.type = BB_NONE, .rw = rw
|
|
|
|
};
|
|
|
|
|
|
|
|
/* check if we can map the pages contiguously: */
|
2017-03-17 06:18:50 +00:00
|
|
|
__bio_for_each_segment(bv, bio, iter, start) {
|
2020-05-06 19:37:04 +00:00
|
|
|
if (iter.bi_size != start.bi_size &&
|
|
|
|
bv.bv_offset)
|
|
|
|
goto bounce;
|
|
|
|
|
|
|
|
if (bv.bv_len < iter.bi_size &&
|
|
|
|
bv.bv_offset + bv.bv_len < PAGE_SIZE)
|
2017-03-17 06:18:50 +00:00
|
|
|
goto bounce;
|
|
|
|
|
|
|
|
nr_pages++;
|
|
|
|
}
|
|
|
|
|
|
|
|
BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages);
|
|
|
|
|
|
|
|
pages = nr_pages > ARRAY_SIZE(stack_pages)
|
2023-05-28 22:02:38 +00:00
|
|
|
? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS)
|
2017-03-17 06:18:50 +00:00
|
|
|
: stack_pages;
|
|
|
|
if (!pages)
|
|
|
|
goto bounce;
|
|
|
|
|
|
|
|
nr_pages = 0;
|
|
|
|
__bio_for_each_segment(bv, bio, iter, start)
|
|
|
|
pages[nr_pages++] = bv.bv_page;
|
|
|
|
|
|
|
|
data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
|
|
|
|
if (pages != stack_pages)
|
|
|
|
kfree(pages);
|
|
|
|
|
|
|
|
if (data)
|
|
|
|
return (struct bbuf) {
|
|
|
|
.b = data + bio_iter_offset(bio, start),
|
|
|
|
.type = BB_VMAP, .rw = rw
|
|
|
|
};
|
|
|
|
bounce:
|
|
|
|
ret = __bounce_alloc(c, start.bi_size, rw);
|
|
|
|
|
|
|
|
if (rw == READ)
|
|
|
|
memcpy_from_bio(ret.b, bio, start);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct bbuf bio_map_or_bounce(struct bch_fs *c, struct bio *bio, int rw)
|
|
|
|
{
|
|
|
|
return __bio_map_or_bounce(c, bio, bio->bi_iter, rw);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void bio_unmap_or_unbounce(struct bch_fs *c, struct bbuf buf)
|
|
|
|
{
|
|
|
|
switch (buf.type) {
|
|
|
|
case BB_NONE:
|
|
|
|
break;
|
|
|
|
case BB_VMAP:
|
|
|
|
vunmap((void *) ((unsigned long) buf.b & PAGE_MASK));
|
|
|
|
break;
|
|
|
|
case BB_KMALLOC:
|
|
|
|
kfree(buf.b);
|
|
|
|
break;
|
|
|
|
case BB_MEMPOOL:
|
2020-03-29 16:33:41 +00:00
|
|
|
mempool_free(buf.b, &c->compression_bounce[buf.rw]);
|
2017-03-17 06:18:50 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void zlib_set_workspace(z_stream *strm, void *workspace)
|
|
|
|
{
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
strm->workspace = workspace;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __bio_uncompress(struct bch_fs *c, struct bio *src,
|
|
|
|
void *dst_data, struct bch_extent_crc_unpacked crc)
|
|
|
|
{
|
|
|
|
struct bbuf src_data = { NULL };
|
|
|
|
size_t src_len = src->bi_iter.bi_size;
|
|
|
|
size_t dst_len = crc.uncompressed_size << 9;
|
|
|
|
void *workspace;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
src_data = bio_map_or_bounce(c, src, READ);
|
|
|
|
|
|
|
|
switch (crc.compression_type) {
|
2019-12-29 01:17:06 +00:00
|
|
|
case BCH_COMPRESSION_TYPE_lz4_old:
|
|
|
|
case BCH_COMPRESSION_TYPE_lz4:
|
2017-03-17 06:18:50 +00:00
|
|
|
ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
|
|
|
|
src_len, dst_len, dst_len);
|
|
|
|
if (ret != dst_len)
|
|
|
|
goto err;
|
|
|
|
break;
|
2019-12-29 01:17:06 +00:00
|
|
|
case BCH_COMPRESSION_TYPE_gzip: {
|
2017-03-17 06:18:50 +00:00
|
|
|
z_stream strm = {
|
|
|
|
.next_in = src_data.b,
|
|
|
|
.avail_in = src_len,
|
|
|
|
.next_out = dst_data,
|
|
|
|
.avail_out = dst_len,
|
|
|
|
};
|
|
|
|
|
2023-05-28 22:02:38 +00:00
|
|
|
workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS);
|
2017-03-17 06:18:50 +00:00
|
|
|
|
|
|
|
zlib_set_workspace(&strm, workspace);
|
|
|
|
zlib_inflateInit2(&strm, -MAX_WBITS);
|
|
|
|
ret = zlib_inflate(&strm, Z_FINISH);
|
|
|
|
|
|
|
|
mempool_free(workspace, &c->decompress_workspace);
|
|
|
|
|
|
|
|
if (ret != Z_STREAM_END)
|
|
|
|
goto err;
|
|
|
|
break;
|
|
|
|
}
|
2019-12-29 01:17:06 +00:00
|
|
|
case BCH_COMPRESSION_TYPE_zstd: {
|
2017-03-17 06:18:50 +00:00
|
|
|
ZSTD_DCtx *ctx;
|
2020-05-09 03:15:42 +00:00
|
|
|
size_t real_src_len = le32_to_cpup(src_data.b);
|
|
|
|
|
|
|
|
if (real_src_len > src_len - 4)
|
|
|
|
goto err;
|
2017-03-17 06:18:50 +00:00
|
|
|
|
2023-05-28 22:02:38 +00:00
|
|
|
workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS);
|
2017-03-17 06:18:50 +00:00
|
|
|
ctx = zstd_init_dctx(workspace, zstd_dctx_workspace_bound());
|
|
|
|
|
2020-05-09 03:15:42 +00:00
|
|
|
ret = zstd_decompress_dctx(ctx,
|
2017-03-17 06:18:50 +00:00
|
|
|
dst_data, dst_len,
|
2020-05-09 03:15:42 +00:00
|
|
|
src_data.b + 4, real_src_len);
|
2017-03-17 06:18:50 +00:00
|
|
|
|
|
|
|
mempool_free(workspace, &c->decompress_workspace);
|
|
|
|
|
2020-05-09 03:15:42 +00:00
|
|
|
if (ret != dst_len)
|
2017-03-17 06:18:50 +00:00
|
|
|
goto err;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
BUG();
|
|
|
|
}
|
|
|
|
ret = 0;
|
|
|
|
out:
|
|
|
|
bio_unmap_or_unbounce(c, src_data);
|
|
|
|
return ret;
|
|
|
|
err:
|
|
|
|
ret = -EIO;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
|
|
|
|
struct bch_extent_crc_unpacked *crc)
|
|
|
|
{
|
|
|
|
struct bbuf data = { NULL };
|
|
|
|
size_t dst_len = crc->uncompressed_size << 9;
|
|
|
|
|
|
|
|
/* bio must own its pages: */
|
|
|
|
BUG_ON(!bio->bi_vcnt);
|
|
|
|
BUG_ON(DIV_ROUND_UP(crc->live_size, PAGE_SECTORS) > bio->bi_max_vecs);
|
|
|
|
|
2021-12-14 19:34:03 +00:00
|
|
|
if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max ||
|
|
|
|
crc->compressed_size << 9 > c->opts.encoded_extent_max) {
|
2017-03-17 06:18:50 +00:00
|
|
|
bch_err(c, "error rewriting existing data: extent too big");
|
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
|
|
|
|
data = __bounce_alloc(c, dst_len, WRITE);
|
|
|
|
|
|
|
|
if (__bio_uncompress(c, bio, data.b, *crc)) {
|
2023-07-17 02:31:19 +00:00
|
|
|
if (!c->opts.no_data_io)
|
|
|
|
bch_err(c, "error rewriting existing data: decompression error");
|
2017-03-17 06:18:50 +00:00
|
|
|
bio_unmap_or_unbounce(c, data);
|
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2019-07-09 15:16:33 +00:00
|
|
|
* XXX: don't have a good way to assert that the bio was allocated with
|
|
|
|
* enough space, we depend on bch2_move_extent doing the right thing
|
2017-03-17 06:18:50 +00:00
|
|
|
*/
|
2019-07-09 15:16:33 +00:00
|
|
|
bio->bi_iter.bi_size = crc->live_size << 9;
|
2017-03-17 06:18:50 +00:00
|
|
|
|
|
|
|
memcpy_to_bio(bio, bio->bi_iter, data.b + (crc->offset << 9));
|
|
|
|
|
|
|
|
crc->csum_type = 0;
|
|
|
|
crc->compression_type = 0;
|
|
|
|
crc->compressed_size = crc->live_size;
|
|
|
|
crc->uncompressed_size = crc->live_size;
|
|
|
|
crc->offset = 0;
|
|
|
|
crc->csum = (struct bch_csum) { 0, 0 };
|
|
|
|
|
|
|
|
bio_unmap_or_unbounce(c, data);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
|
|
|
|
struct bio *dst, struct bvec_iter dst_iter,
|
|
|
|
struct bch_extent_crc_unpacked crc)
|
|
|
|
{
|
|
|
|
struct bbuf dst_data = { NULL };
|
|
|
|
size_t dst_len = crc.uncompressed_size << 9;
|
2023-03-14 19:35:57 +00:00
|
|
|
int ret;
|
2017-03-17 06:18:50 +00:00
|
|
|
|
2021-12-14 19:34:03 +00:00
|
|
|
if (crc.uncompressed_size << 9 > c->opts.encoded_extent_max ||
|
|
|
|
crc.compressed_size << 9 > c->opts.encoded_extent_max)
|
2017-03-17 06:18:50 +00:00
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
dst_data = dst_len == dst_iter.bi_size
|
|
|
|
? __bio_map_or_bounce(c, dst, dst_iter, WRITE)
|
|
|
|
: __bounce_alloc(c, dst_len, WRITE);
|
|
|
|
|
|
|
|
ret = __bio_uncompress(c, src, dst_data.b, crc);
|
|
|
|
if (ret)
|
|
|
|
goto err;
|
|
|
|
|
2020-05-06 19:37:04 +00:00
|
|
|
if (dst_data.type != BB_NONE &&
|
|
|
|
dst_data.type != BB_VMAP)
|
2017-03-17 06:18:50 +00:00
|
|
|
memcpy_to_bio(dst, dst_iter, dst_data.b + (crc.offset << 9));
|
|
|
|
err:
|
|
|
|
bio_unmap_or_unbounce(c, dst_data);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int attempt_compress(struct bch_fs *c,
|
|
|
|
void *workspace,
|
|
|
|
void *dst, size_t dst_len,
|
|
|
|
void *src, size_t src_len,
|
2023-07-13 02:27:16 +00:00
|
|
|
struct bch_compression_opt compression)
|
2017-03-17 06:18:50 +00:00
|
|
|
{
|
2023-07-13 02:27:16 +00:00
|
|
|
enum bch_compression_type compression_type =
|
|
|
|
__bch2_compression_opt_to_type[compression.type];
|
2017-03-17 06:18:50 +00:00
|
|
|
|
2023-07-13 02:27:16 +00:00
|
|
|
switch (compression_type) {
|
|
|
|
case BCH_COMPRESSION_TYPE_lz4:
|
|
|
|
if (compression.level < LZ4HC_MIN_CLEVEL) {
|
|
|
|
int len = src_len;
|
|
|
|
int ret = LZ4_compress_destSize(
|
|
|
|
src, dst,
|
|
|
|
&len, dst_len,
|
|
|
|
workspace);
|
|
|
|
if (len < src_len)
|
|
|
|
return -len;
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
} else {
|
|
|
|
int ret = LZ4_compress_HC(
|
|
|
|
src, dst,
|
|
|
|
src_len, dst_len,
|
|
|
|
compression.level,
|
|
|
|
workspace);
|
|
|
|
|
|
|
|
return ret ?: -1;
|
|
|
|
}
|
2019-12-29 01:17:06 +00:00
|
|
|
case BCH_COMPRESSION_TYPE_gzip: {
|
2017-03-17 06:18:50 +00:00
|
|
|
z_stream strm = {
|
|
|
|
.next_in = src,
|
|
|
|
.avail_in = src_len,
|
|
|
|
.next_out = dst,
|
|
|
|
.avail_out = dst_len,
|
|
|
|
};
|
|
|
|
|
|
|
|
zlib_set_workspace(&strm, workspace);
|
2023-07-13 02:27:16 +00:00
|
|
|
zlib_deflateInit2(&strm,
|
|
|
|
compression.level
|
|
|
|
? clamp_t(unsigned, compression.level,
|
|
|
|
Z_BEST_SPEED, Z_BEST_COMPRESSION)
|
|
|
|
: Z_DEFAULT_COMPRESSION,
|
2017-03-17 06:18:50 +00:00
|
|
|
Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
|
|
|
|
Z_DEFAULT_STRATEGY);
|
|
|
|
|
|
|
|
if (zlib_deflate(&strm, Z_FINISH) != Z_STREAM_END)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (zlib_deflateEnd(&strm) != Z_OK)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return strm.total_out;
|
|
|
|
}
|
2019-12-29 01:17:06 +00:00
|
|
|
case BCH_COMPRESSION_TYPE_zstd: {
|
2023-07-13 02:27:16 +00:00
|
|
|
/*
|
|
|
|
* rescale:
|
|
|
|
* zstd max compression level is 22, our max level is 15
|
|
|
|
*/
|
|
|
|
unsigned level = min((compression.level * 3) / 2, zstd_max_clevel());
|
|
|
|
ZSTD_parameters params = zstd_get_params(level, c->opts.encoded_extent_max);
|
2023-11-25 04:12:45 +00:00
|
|
|
ZSTD_CCtx *ctx = zstd_init_cctx(workspace, c->zstd_workspace_size);
|
2017-03-17 06:18:50 +00:00
|
|
|
|
2021-01-07 22:06:22 +00:00
|
|
|
/*
|
|
|
|
* ZSTD requires that when we decompress we pass in the exact
|
|
|
|
* compressed size - rounding it up to the nearest sector
|
|
|
|
* doesn't work, so we use the first 4 bytes of the buffer for
|
|
|
|
* that.
|
|
|
|
*
|
|
|
|
* Additionally, the ZSTD code seems to have a bug where it will
|
|
|
|
* write just past the end of the buffer - so subtract a fudge
|
|
|
|
* factor (7 bytes) from the dst buffer size to account for
|
|
|
|
* that.
|
|
|
|
*/
|
2017-03-17 06:18:50 +00:00
|
|
|
size_t len = zstd_compress_cctx(ctx,
|
2021-01-07 22:06:22 +00:00
|
|
|
dst + 4, dst_len - 4 - 7,
|
2017-03-17 06:18:50 +00:00
|
|
|
src, src_len,
|
2023-11-25 04:12:45 +00:00
|
|
|
¶ms);
|
2017-03-17 06:18:50 +00:00
|
|
|
if (zstd_is_error(len))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
*((__le32 *) dst) = cpu_to_le32(len);
|
|
|
|
return len + 4;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
BUG();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned __bio_compress(struct bch_fs *c,
|
|
|
|
struct bio *dst, size_t *dst_len,
|
|
|
|
struct bio *src, size_t *src_len,
|
2023-07-13 02:27:16 +00:00
|
|
|
struct bch_compression_opt compression)
|
2017-03-17 06:18:50 +00:00
|
|
|
{
|
|
|
|
struct bbuf src_data = { NULL }, dst_data = { NULL };
|
|
|
|
void *workspace;
|
2023-07-13 02:27:16 +00:00
|
|
|
enum bch_compression_type compression_type =
|
|
|
|
__bch2_compression_opt_to_type[compression.type];
|
2017-03-17 06:18:50 +00:00
|
|
|
unsigned pad;
|
|
|
|
int ret = 0;
|
|
|
|
|
2019-12-29 01:17:06 +00:00
|
|
|
BUG_ON(compression_type >= BCH_COMPRESSION_TYPE_NR);
|
2017-03-17 06:18:50 +00:00
|
|
|
BUG_ON(!mempool_initialized(&c->compress_workspace[compression_type]));
|
|
|
|
|
|
|
|
/* If it's only one block, don't bother trying to compress: */
|
2021-12-14 19:24:41 +00:00
|
|
|
if (src->bi_iter.bi_size <= c->opts.block_size)
|
2022-10-15 13:25:54 +00:00
|
|
|
return BCH_COMPRESSION_TYPE_incompressible;
|
2017-03-17 06:18:50 +00:00
|
|
|
|
|
|
|
dst_data = bio_map_or_bounce(c, dst, WRITE);
|
|
|
|
src_data = bio_map_or_bounce(c, src, READ);
|
|
|
|
|
2023-05-28 22:02:38 +00:00
|
|
|
workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOFS);
|
2017-03-17 06:18:50 +00:00
|
|
|
|
|
|
|
*src_len = src->bi_iter.bi_size;
|
|
|
|
*dst_len = dst->bi_iter.bi_size;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* XXX: this algorithm sucks when the compression code doesn't tell us
|
|
|
|
* how much would fit, like LZ4 does:
|
|
|
|
*/
|
|
|
|
while (1) {
|
|
|
|
if (*src_len <= block_bytes(c)) {
|
|
|
|
ret = -1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = attempt_compress(c, workspace,
|
|
|
|
dst_data.b, *dst_len,
|
|
|
|
src_data.b, *src_len,
|
2023-07-13 02:27:16 +00:00
|
|
|
compression);
|
2017-03-17 06:18:50 +00:00
|
|
|
if (ret > 0) {
|
|
|
|
*dst_len = ret;
|
|
|
|
ret = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Didn't fit: should we retry with a smaller amount? */
|
|
|
|
if (*src_len <= *dst_len) {
|
|
|
|
ret = -1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If ret is negative, it's a hint as to how much data would fit
|
|
|
|
*/
|
|
|
|
BUG_ON(-ret >= *src_len);
|
|
|
|
|
|
|
|
if (ret < 0)
|
|
|
|
*src_len = -ret;
|
|
|
|
else
|
|
|
|
*src_len -= (*src_len - *dst_len) / 2;
|
|
|
|
*src_len = round_down(*src_len, block_bytes(c));
|
|
|
|
}
|
|
|
|
|
|
|
|
mempool_free(workspace, &c->compress_workspace[compression_type]);
|
|
|
|
|
|
|
|
if (ret)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
/* Didn't get smaller: */
|
|
|
|
if (round_up(*dst_len, block_bytes(c)) >= *src_len)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
pad = round_up(*dst_len, block_bytes(c)) - *dst_len;
|
|
|
|
|
|
|
|
memset(dst_data.b + *dst_len, 0, pad);
|
|
|
|
*dst_len += pad;
|
|
|
|
|
2020-05-06 19:37:04 +00:00
|
|
|
if (dst_data.type != BB_NONE &&
|
|
|
|
dst_data.type != BB_VMAP)
|
2017-03-17 06:18:50 +00:00
|
|
|
memcpy_to_bio(dst, dst->bi_iter, dst_data.b);
|
|
|
|
|
|
|
|
BUG_ON(!*dst_len || *dst_len > dst->bi_iter.bi_size);
|
|
|
|
BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size);
|
|
|
|
BUG_ON(*dst_len & (block_bytes(c) - 1));
|
|
|
|
BUG_ON(*src_len & (block_bytes(c) - 1));
|
2023-07-13 02:27:16 +00:00
|
|
|
ret = compression_type;
|
2017-03-17 06:18:50 +00:00
|
|
|
out:
|
|
|
|
bio_unmap_or_unbounce(c, src_data);
|
|
|
|
bio_unmap_or_unbounce(c, dst_data);
|
2023-07-13 02:27:16 +00:00
|
|
|
return ret;
|
2017-03-17 06:18:50 +00:00
|
|
|
err:
|
2023-07-13 02:27:16 +00:00
|
|
|
ret = BCH_COMPRESSION_TYPE_incompressible;
|
2017-03-17 06:18:50 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned bch2_bio_compress(struct bch_fs *c,
|
|
|
|
struct bio *dst, size_t *dst_len,
|
|
|
|
struct bio *src, size_t *src_len,
|
2023-07-13 02:27:16 +00:00
|
|
|
unsigned compression_opt)
|
2017-03-17 06:18:50 +00:00
|
|
|
{
|
|
|
|
unsigned orig_dst = dst->bi_iter.bi_size;
|
|
|
|
unsigned orig_src = src->bi_iter.bi_size;
|
2023-07-13 02:27:16 +00:00
|
|
|
unsigned compression_type;
|
2017-03-17 06:18:50 +00:00
|
|
|
|
|
|
|
/* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
|
|
|
|
src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size,
|
2021-12-14 19:34:03 +00:00
|
|
|
c->opts.encoded_extent_max);
|
2017-03-17 06:18:50 +00:00
|
|
|
/* Don't generate a bigger output than input: */
|
|
|
|
dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
|
|
|
|
|
|
|
|
compression_type =
|
2023-07-13 02:27:16 +00:00
|
|
|
__bio_compress(c, dst, dst_len, src, src_len,
|
|
|
|
bch2_compression_decode(compression_opt));
|
2017-03-17 06:18:50 +00:00
|
|
|
|
|
|
|
dst->bi_iter.bi_size = orig_dst;
|
|
|
|
src->bi_iter.bi_size = orig_src;
|
|
|
|
return compression_type;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __bch2_fs_compress_init(struct bch_fs *, u64);
|
|
|
|
|
2019-12-29 01:17:06 +00:00
|
|
|
#define BCH_FEATURE_none 0
|
2017-03-17 06:18:50 +00:00
|
|
|
|
|
|
|
static const unsigned bch2_compression_opt_to_feature[] = {
|
2019-12-29 01:17:06 +00:00
|
|
|
#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t,
|
|
|
|
BCH_COMPRESSION_OPTS()
|
2017-03-17 06:18:50 +00:00
|
|
|
#undef x
|
|
|
|
};
|
|
|
|
|
2019-12-29 01:17:06 +00:00
|
|
|
#undef BCH_FEATURE_none
|
2017-03-17 06:18:50 +00:00
|
|
|
|
|
|
|
static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
if ((c->sb.features & f) == f)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
mutex_lock(&c->sb_lock);
|
|
|
|
|
|
|
|
if ((c->sb.features & f) == f) {
|
|
|
|
mutex_unlock(&c->sb_lock);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = __bch2_fs_compress_init(c, c->sb.features|f);
|
|
|
|
if (ret) {
|
|
|
|
mutex_unlock(&c->sb_lock);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
c->disk_sb.sb->features[0] |= cpu_to_le64(f);
|
|
|
|
bch2_write_super(c);
|
|
|
|
mutex_unlock(&c->sb_lock);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int bch2_check_set_has_compressed_data(struct bch_fs *c,
|
2023-07-13 02:27:16 +00:00
|
|
|
unsigned compression_opt)
|
2017-03-17 06:18:50 +00:00
|
|
|
{
|
2023-07-13 02:27:16 +00:00
|
|
|
unsigned compression_type = bch2_compression_decode(compression_opt).type;
|
|
|
|
|
2017-03-17 06:18:50 +00:00
|
|
|
BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
|
|
|
|
|
|
|
|
return compression_type
|
|
|
|
? __bch2_check_set_has_compressed_data(c,
|
|
|
|
1ULL << bch2_compression_opt_to_feature[compression_type])
|
|
|
|
: 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void bch2_fs_compress_exit(struct bch_fs *c)
|
|
|
|
{
|
|
|
|
unsigned i;
|
|
|
|
|
|
|
|
mempool_exit(&c->decompress_workspace);
|
|
|
|
for (i = 0; i < ARRAY_SIZE(c->compress_workspace); i++)
|
|
|
|
mempool_exit(&c->compress_workspace[i]);
|
|
|
|
mempool_exit(&c->compression_bounce[WRITE]);
|
|
|
|
mempool_exit(&c->compression_bounce[READ]);
|
|
|
|
}
|
|
|
|
|
2023-07-07 08:38:29 +00:00
|
|
|
static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
|
2017-03-17 06:18:50 +00:00
|
|
|
{
|
|
|
|
size_t decompress_workspace_size = 0;
|
2023-07-13 02:27:16 +00:00
|
|
|
ZSTD_parameters params = zstd_get_params(zstd_max_clevel(),
|
|
|
|
c->opts.encoded_extent_max);
|
2023-11-25 04:12:45 +00:00
|
|
|
|
|
|
|
c->zstd_workspace_size = zstd_cctx_workspace_bound(¶ms.cParams);
|
|
|
|
|
2017-03-17 06:18:50 +00:00
|
|
|
struct {
|
2023-07-13 02:27:16 +00:00
|
|
|
unsigned feature;
|
|
|
|
enum bch_compression_type type;
|
|
|
|
size_t compress_workspace;
|
|
|
|
size_t decompress_workspace;
|
2017-03-17 06:18:50 +00:00
|
|
|
} compression_types[] = {
|
2023-07-13 02:27:16 +00:00
|
|
|
{ BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4,
|
2023-09-12 22:41:22 +00:00
|
|
|
max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS),
|
|
|
|
0 },
|
2019-12-29 01:17:06 +00:00
|
|
|
{ BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip,
|
2017-03-17 06:18:50 +00:00
|
|
|
zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
|
|
|
|
zlib_inflate_workspacesize(), },
|
2019-12-29 01:17:06 +00:00
|
|
|
{ BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
|
2023-11-25 04:12:45 +00:00
|
|
|
c->zstd_workspace_size,
|
2017-03-17 06:18:50 +00:00
|
|
|
zstd_dctx_workspace_bound() },
|
|
|
|
}, *i;
|
2023-03-14 19:35:57 +00:00
|
|
|
bool have_compressed = false;
|
2017-03-17 06:18:50 +00:00
|
|
|
|
|
|
|
for (i = compression_types;
|
|
|
|
i < compression_types + ARRAY_SIZE(compression_types);
|
|
|
|
i++)
|
2023-03-14 19:35:57 +00:00
|
|
|
have_compressed |= (features & (1 << i->feature)) != 0;
|
2017-03-17 06:18:50 +00:00
|
|
|
|
2023-03-14 19:35:57 +00:00
|
|
|
if (!have_compressed)
|
|
|
|
return 0;
|
2017-03-17 06:18:50 +00:00
|
|
|
|
2023-03-14 19:35:57 +00:00
|
|
|
if (!mempool_initialized(&c->compression_bounce[READ]) &&
|
2024-02-01 11:35:46 +00:00
|
|
|
mempool_init_kvmalloc_pool(&c->compression_bounce[READ],
|
|
|
|
1, c->opts.encoded_extent_max))
|
2023-03-14 19:35:57 +00:00
|
|
|
return -BCH_ERR_ENOMEM_compression_bounce_read_init;
|
2017-03-17 06:18:50 +00:00
|
|
|
|
2023-03-14 19:35:57 +00:00
|
|
|
if (!mempool_initialized(&c->compression_bounce[WRITE]) &&
|
2024-02-01 11:35:46 +00:00
|
|
|
mempool_init_kvmalloc_pool(&c->compression_bounce[WRITE],
|
|
|
|
1, c->opts.encoded_extent_max))
|
2023-03-14 19:35:57 +00:00
|
|
|
return -BCH_ERR_ENOMEM_compression_bounce_write_init;
|
2017-03-17 06:18:50 +00:00
|
|
|
|
|
|
|
for (i = compression_types;
|
|
|
|
i < compression_types + ARRAY_SIZE(compression_types);
|
|
|
|
i++) {
|
|
|
|
decompress_workspace_size =
|
|
|
|
max(decompress_workspace_size, i->decompress_workspace);
|
|
|
|
|
|
|
|
if (!(features & (1 << i->feature)))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (mempool_initialized(&c->compress_workspace[i->type]))
|
|
|
|
continue;
|
|
|
|
|
2024-02-01 11:35:46 +00:00
|
|
|
if (mempool_init_kvmalloc_pool(
|
2017-03-17 06:18:50 +00:00
|
|
|
&c->compress_workspace[i->type],
|
2023-03-14 19:35:57 +00:00
|
|
|
1, i->compress_workspace))
|
|
|
|
return -BCH_ERR_ENOMEM_compression_workspace_init;
|
2017-03-17 06:18:50 +00:00
|
|
|
}
|
|
|
|
|
2023-03-14 19:35:57 +00:00
|
|
|
if (!mempool_initialized(&c->decompress_workspace) &&
|
2024-02-01 11:35:46 +00:00
|
|
|
mempool_init_kvmalloc_pool(&c->decompress_workspace,
|
|
|
|
1, decompress_workspace_size))
|
2023-03-14 19:35:57 +00:00
|
|
|
return -BCH_ERR_ENOMEM_decompression_workspace_init;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-07-13 02:27:16 +00:00
|
|
|
static u64 compression_opt_to_feature(unsigned v)
|
|
|
|
{
|
|
|
|
unsigned type = bch2_compression_decode(v).type;
|
2023-08-07 16:04:05 +00:00
|
|
|
|
|
|
|
return BIT_ULL(bch2_compression_opt_to_feature[type]);
|
2023-07-13 02:27:16 +00:00
|
|
|
}
|
|
|
|
|
2017-03-17 06:18:50 +00:00
|
|
|
int bch2_fs_compress_init(struct bch_fs *c)
|
|
|
|
{
|
|
|
|
u64 f = c->sb.features;
|
|
|
|
|
2023-07-13 02:27:16 +00:00
|
|
|
f |= compression_opt_to_feature(c->opts.compression);
|
|
|
|
f |= compression_opt_to_feature(c->opts.background_compression);
|
2017-03-17 06:18:50 +00:00
|
|
|
|
|
|
|
return __bch2_fs_compress_init(c, f);
|
2023-07-13 02:27:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res,
|
|
|
|
struct printbuf *err)
|
|
|
|
{
|
|
|
|
char *val = kstrdup(_val, GFP_KERNEL);
|
|
|
|
char *p = val, *type_str, *level_str;
|
|
|
|
struct bch_compression_opt opt = { 0 };
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!val)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
type_str = strsep(&p, ":");
|
|
|
|
level_str = p;
|
|
|
|
|
|
|
|
ret = match_string(bch2_compression_opts, -1, type_str);
|
|
|
|
if (ret < 0 && err)
|
|
|
|
prt_str(err, "invalid compression type");
|
|
|
|
if (ret < 0)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
opt.type = ret;
|
|
|
|
|
|
|
|
if (level_str) {
|
|
|
|
unsigned level;
|
|
|
|
|
|
|
|
ret = kstrtouint(level_str, 10, &level);
|
|
|
|
if (!ret && !opt.type && level)
|
|
|
|
ret = -EINVAL;
|
|
|
|
if (!ret && level > 15)
|
|
|
|
ret = -EINVAL;
|
|
|
|
if (ret < 0 && err)
|
|
|
|
prt_str(err, "invalid compression level");
|
|
|
|
if (ret < 0)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
opt.level = level;
|
|
|
|
}
|
|
|
|
|
|
|
|
*res = bch2_compression_encode(opt);
|
|
|
|
err:
|
|
|
|
kfree(val);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
bcachefs: rebalance_work
This adds a new btree, rebalance_work, to eliminate scanning required
for finding extents that need work done on them in the background - i.e.
for the background_target and background_compression options.
rebalance_work is a bitset btree, where a KEY_TYPE_set corresponds to an
extent in the extents or reflink btree at the same pos.
A new extent field is added, bch_extent_rebalance, which indicates that
this extent has work that needs to be done in the background - and which
options to use. This allows per-inode options to be propagated to
indirect extents - at least in some circumstances. In this patch,
changing IO options on a file will not propagate the new options to
indirect extents pointed to by that file.
Updating (setting/clearing) the rebalance_work btree is done by the
extent trigger, which looks at the bch_extent_rebalance field.
Scanning is still requrired after changing IO path options - either just
for a given inode, or for the whole filesystem. We indicate that
scanning is required by adding a KEY_TYPE_cookie key to the
rebalance_work btree: the cookie counter is so that we can detect that
scanning is still required when an option has been flipped mid-way
through an existing scan.
Future possible work:
- Propagate options to indirect extents when being changed
- Add other IO path options - nr_replicas, ec, to rebalance_work so
they can be applied in the background when they change
- Add a counter, for bcachefs fs usage output, showing the pending
amount of rebalance work: we'll probably want to do this after the
disk space accounting rewrite (moving it to a new btree)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2023-10-20 17:33:14 +00:00
|
|
|
void bch2_compression_opt_to_text(struct printbuf *out, u64 v)
|
2023-07-13 02:27:16 +00:00
|
|
|
{
|
|
|
|
struct bch_compression_opt opt = bch2_compression_decode(v);
|
2017-03-17 06:18:50 +00:00
|
|
|
|
bcachefs: rebalance_work
This adds a new btree, rebalance_work, to eliminate scanning required
for finding extents that need work done on them in the background - i.e.
for the background_target and background_compression options.
rebalance_work is a bitset btree, where a KEY_TYPE_set corresponds to an
extent in the extents or reflink btree at the same pos.
A new extent field is added, bch_extent_rebalance, which indicates that
this extent has work that needs to be done in the background - and which
options to use. This allows per-inode options to be propagated to
indirect extents - at least in some circumstances. In this patch,
changing IO options on a file will not propagate the new options to
indirect extents pointed to by that file.
Updating (setting/clearing) the rebalance_work btree is done by the
extent trigger, which looks at the bch_extent_rebalance field.
Scanning is still requrired after changing IO path options - either just
for a given inode, or for the whole filesystem. We indicate that
scanning is required by adding a KEY_TYPE_cookie key to the
rebalance_work btree: the cookie counter is so that we can detect that
scanning is still required when an option has been flipped mid-way
through an existing scan.
Future possible work:
- Propagate options to indirect extents when being changed
- Add other IO path options - nr_replicas, ec, to rebalance_work so
they can be applied in the background when they change
- Add a counter, for bcachefs fs usage output, showing the pending
amount of rebalance work: we'll probably want to do this after the
disk space accounting rewrite (moving it to a new btree)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2023-10-20 17:33:14 +00:00
|
|
|
if (opt.type < BCH_COMPRESSION_OPT_NR)
|
|
|
|
prt_str(out, bch2_compression_opts[opt.type]);
|
|
|
|
else
|
|
|
|
prt_printf(out, "(unknown compression opt %u)", opt.type);
|
2023-07-13 02:27:16 +00:00
|
|
|
if (opt.level)
|
|
|
|
prt_printf(out, ":%u", opt.level);
|
2017-03-17 06:18:50 +00:00
|
|
|
}
|
2023-10-22 22:29:54 +00:00
|
|
|
|
bcachefs: rebalance_work
This adds a new btree, rebalance_work, to eliminate scanning required
for finding extents that need work done on them in the background - i.e.
for the background_target and background_compression options.
rebalance_work is a bitset btree, where a KEY_TYPE_set corresponds to an
extent in the extents or reflink btree at the same pos.
A new extent field is added, bch_extent_rebalance, which indicates that
this extent has work that needs to be done in the background - and which
options to use. This allows per-inode options to be propagated to
indirect extents - at least in some circumstances. In this patch,
changing IO options on a file will not propagate the new options to
indirect extents pointed to by that file.
Updating (setting/clearing) the rebalance_work btree is done by the
extent trigger, which looks at the bch_extent_rebalance field.
Scanning is still requrired after changing IO path options - either just
for a given inode, or for the whole filesystem. We indicate that
scanning is required by adding a KEY_TYPE_cookie key to the
rebalance_work btree: the cookie counter is so that we can detect that
scanning is still required when an option has been flipped mid-way
through an existing scan.
Future possible work:
- Propagate options to indirect extents when being changed
- Add other IO path options - nr_replicas, ec, to rebalance_work so
they can be applied in the background when they change
- Add a counter, for bcachefs fs usage output, showing the pending
amount of rebalance work: we'll probably want to do this after the
disk space accounting rewrite (moving it to a new btree)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2023-10-20 17:33:14 +00:00
|
|
|
void bch2_opt_compression_to_text(struct printbuf *out,
|
|
|
|
struct bch_fs *c,
|
|
|
|
struct bch_sb *sb,
|
|
|
|
u64 v)
|
|
|
|
{
|
|
|
|
return bch2_compression_opt_to_text(out, v);
|
|
|
|
}
|
|
|
|
|
2023-10-22 22:29:54 +00:00
|
|
|
int bch2_opt_compression_validate(u64 v, struct printbuf *err)
|
|
|
|
{
|
|
|
|
if (!bch2_compression_opt_valid(v)) {
|
|
|
|
prt_printf(err, "invalid compression opt %llu", v);
|
|
|
|
return -BCH_ERR_invalid_sb_opt_compression;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|