linux-stable/fs/erofs/compress.h

98 lines
3.1 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2019 HUAWEI, Inc.
* https://www.huawei.com/
*/
#ifndef __EROFS_FS_COMPRESS_H
#define __EROFS_FS_COMPRESS_H
#include "internal.h"
struct z_erofs_decompress_req {
struct super_block *sb;
struct page **in, **out;
unsigned short pageofs_in, pageofs_out;
unsigned int inputsize, outputsize;
erofs: relaxed temporary buffers allocation on readahead Even with inplace decompression, sometimes very few temporary buffers may be still needed for a single decompression shot (e.g. 16 pages for 64k sliding window or 4 pages for 16k sliding window). In low-memory scenarios, it would be better to try to allocate with GFP_NOWAIT on readahead first. That can help reduce the time spent on page allocation under durative memory pressure. Here are detailed performance numbers under multi-app launch benchmark workload [1] on ARM64 Android devices (8-core CPU and 8GB of memory) running a 5.15 LTS kernel with EROFS of 4k pclusters: +----------------------------------------------+ | LZ4 | vanilla | patched | diff | |----------------+---------+---------+---------| | Average (ms) | 3364 | 2684 | -20.21% | [64k sliding window] |----------------+---------+---------+---------| | Average (ms) | 2079 | 1610 | -22.56% | [16k sliding window] +----------------------------------------------+ The total size of system images for 4k pclusters is almost unchanged: (64k sliding window) 9,117,044 KB (16k sliding window) 9,113,096 KB Therefore, in addition to switch the sliding window from 64k to 16k, after applying this patch, it can eventually save 52.14% (3364 -> 1610) on average with no memory reservation. That is particularly useful for embedded devices with limited resources. [1] https://lore.kernel.org/r/20240109074143.4138783-1-guochunhai@vivo.com Suggested-by: Gao Xiang <xiang@kernel.org> Signed-off-by: Chunhai Guo <guochunhai@vivo.com> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Reviewed-by: Yue Hu <huyue2@coolpad.com> Link: https://lore.kernel.org/r/20240126140142.201718-1-hsiangkao@linux.alibaba.com
2024-01-26 14:01:42 +00:00
unsigned int alg; /* the algorithm for decompression */
bool inplace_io, partial_decoding, fillgaps;
erofs: relaxed temporary buffers allocation on readahead Even with inplace decompression, sometimes very few temporary buffers may be still needed for a single decompression shot (e.g. 16 pages for 64k sliding window or 4 pages for 16k sliding window). In low-memory scenarios, it would be better to try to allocate with GFP_NOWAIT on readahead first. That can help reduce the time spent on page allocation under durative memory pressure. Here are detailed performance numbers under multi-app launch benchmark workload [1] on ARM64 Android devices (8-core CPU and 8GB of memory) running a 5.15 LTS kernel with EROFS of 4k pclusters: +----------------------------------------------+ | LZ4 | vanilla | patched | diff | |----------------+---------+---------+---------| | Average (ms) | 3364 | 2684 | -20.21% | [64k sliding window] |----------------+---------+---------+---------| | Average (ms) | 2079 | 1610 | -22.56% | [16k sliding window] +----------------------------------------------+ The total size of system images for 4k pclusters is almost unchanged: (64k sliding window) 9,117,044 KB (16k sliding window) 9,113,096 KB Therefore, in addition to switch the sliding window from 64k to 16k, after applying this patch, it can eventually save 52.14% (3364 -> 1610) on average with no memory reservation. That is particularly useful for embedded devices with limited resources. [1] https://lore.kernel.org/r/20240109074143.4138783-1-guochunhai@vivo.com Suggested-by: Gao Xiang <xiang@kernel.org> Signed-off-by: Chunhai Guo <guochunhai@vivo.com> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Reviewed-by: Yue Hu <huyue2@coolpad.com> Link: https://lore.kernel.org/r/20240126140142.201718-1-hsiangkao@linux.alibaba.com
2024-01-26 14:01:42 +00:00
gfp_t gfp; /* allocation flags for extra temporary buffers */
};
struct z_erofs_decompressor {
int (*config)(struct super_block *sb, struct erofs_super_block *dsb,
void *data, int size);
int (*decompress)(struct z_erofs_decompress_req *rq,
struct page **pagepool);
char *name;
};
/* some special page->private (unsigned long, see below) */
#define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2)
#define Z_EROFS_PREALLOCATED_PAGE (-2UL << 2)
/*
* For all pages in a pcluster, page->private should be one of
* Type Last 2bits page->private
* short-lived page 00 Z_EROFS_SHORTLIVED_PAGE
* preallocated page (tryalloc) 00 Z_EROFS_PREALLOCATED_PAGE
* cached/managed page 00 pointer to z_erofs_pcluster
* online page (file-backed, 01/10/11 sub-index << 2 | count
* some pages can be used for inplace I/O)
*
* page->mapping should be one of
* Type page->mapping
* short-lived page NULL
* preallocated page NULL
* cached/managed page non-NULL or NULL (invalidated/truncated page)
* online page non-NULL
*
* For all managed pages, PG_private should be set with 1 extra refcount,
* which is used for page reclaim / migration.
*/
/*
* short-lived pages are pages directly from buddy system with specific
* page->private (no need to set PagePrivate since these are non-LRU /
* non-movable pages and bypass reclaim / migration code).
*/
static inline bool z_erofs_is_shortlived_page(struct page *page)
{
if (page->private != Z_EROFS_SHORTLIVED_PAGE)
return false;
DBG_BUGON(page->mapping);
return true;
}
static inline bool z_erofs_put_shortlivedpage(struct page **pagepool,
struct page *page)
{
if (!z_erofs_is_shortlived_page(page))
return false;
/* short-lived pages should not be used by others at the same time */
if (page_ref_count(page) > 1) {
put_page(page);
} else {
/* follow the pcluster rule above. */
erofs_pagepool_add(pagepool, page);
}
return true;
}
int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
unsigned int padbufsize);
extern const struct z_erofs_decompressor erofs_decompressors[];
/* prototypes for specific algorithms */
int z_erofs_load_lzma_config(struct super_block *sb,
struct erofs_super_block *dsb, void *data, int size);
int z_erofs_load_deflate_config(struct super_block *sb,
struct erofs_super_block *dsb, void *data, int size);
int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool);
erofs: DEFLATE compression support Add DEFLATE compression as the 3rd supported algorithm. DEFLATE is a popular generic-purpose compression algorithm for quite long time (many advanced formats like gzip, zlib, zip, png are all based on that) as Apple documentation written "If you require interoperability with non-Apple devices, use COMPRESSION_ZLIB. [1]". Due to its popularity, there are several hardware on-market DEFLATE accelerators, such as (s390) DFLTCC, (Intel) IAA/QAT, (HiSilicon) ZIP accelerator, etc. In addition, there are also several high-performence IP cores and even open-source FPGA approches available for DEFLATE. Therefore, it's useful to support DEFLATE compression in order to find a way to utilize these accelerators for asynchronous I/Os and get benefits from these later. Besides, it's a good choice to trade off between compression ratios and performance compared to LZ4 and LZMA. The DEFLATE core format is simple as well as easy to understand, therefore the code size of its decompressor is small even for the bootloader use cases. The runtime memory consumption is quite limited too (e.g. 32K + ~7K for each zlib stream). As usual, EROFS ourperforms similar approaches too. Alternatively, DEFLATE could still be used for some specific files since EROFS supports multiple compression algorithms in one image. [1] https://developer.apple.com/documentation/compression/compression_algorithm Reviewed-by: Chao Yu <chao@kernel.org> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Link: https://lore.kernel.org/r/20230810154859.118330-1-hsiangkao@linux.alibaba.com
2023-08-10 15:48:59 +00:00
int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool);
#endif