diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h index 5c1de6d7ad71..b5d763aa8ff0 100644 --- a/fs/erofs/erofs_fs.h +++ b/fs/erofs/erofs_fs.h @@ -25,6 +25,7 @@ #define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008 #define EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 0x00000008 #define EROFS_FEATURE_INCOMPAT_ZTAILPACKING 0x00000010 +#define EROFS_FEATURE_INCOMPAT_FRAGMENTS 0x00000020 #define EROFS_ALL_FEATURE_INCOMPAT \ (EROFS_FEATURE_INCOMPAT_ZERO_PADDING | \ EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \ @@ -32,7 +33,8 @@ EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \ EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \ EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 | \ - EROFS_FEATURE_INCOMPAT_ZTAILPACKING) + EROFS_FEATURE_INCOMPAT_ZTAILPACKING | \ + EROFS_FEATURE_INCOMPAT_FRAGMENTS) #define EROFS_SB_EXTSLOT_SIZE 16 @@ -71,7 +73,9 @@ struct erofs_super_block { } __packed u1; __le16 extra_devices; /* # of devices besides the primary device */ __le16 devt_slotoff; /* startoff = devt_slotoff * devt_slotsize */ - __u8 reserved2[38]; + __u8 reserved[6]; + __le64 packed_nid; /* nid of the special packed inode */ + __u8 reserved2[24]; }; /* @@ -296,17 +300,26 @@ struct z_erofs_lzma_cfgs { * bit 2 : HEAD2 big pcluster (0 - off; 1 - on) * bit 3 : tailpacking inline pcluster (0 - off; 1 - on) * bit 4 : interlaced plain pcluster (0 - off; 1 - on) + * bit 5 : fragment pcluster (0 - off; 1 - on) */ #define Z_EROFS_ADVISE_COMPACTED_2B 0x0001 #define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002 #define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004 #define Z_EROFS_ADVISE_INLINE_PCLUSTER 0x0008 #define Z_EROFS_ADVISE_INTERLACED_PCLUSTER 0x0010 +#define Z_EROFS_ADVISE_FRAGMENT_PCLUSTER 0x0020 +#define Z_EROFS_FRAGMENT_INODE_BIT 7 struct z_erofs_map_header { - __le16 h_reserved1; - /* indicates the encoded size of tailpacking data */ - __le16 h_idata_size; + union { + /* fragment data offset in the packed inode */ + __le32 h_fragmentoff; + struct { + __le16 h_reserved1; + /* indicates the encoded size of tailpacking data */ + __le16 h_idata_size; + }; + }; __le16 h_advise; /* * bit 0-3 : algorithm type of head 1 (logical cluster type 01); @@ -315,7 +328,8 @@ struct z_erofs_map_header { __u8 h_algorithmtype; /* * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096; - * bit 3-7 : reserved. + * bit 3-6 : reserved; + * bit 7 : move the whole file into packed inode or not. */ __u8 h_clusterbits; }; @@ -404,6 +418,10 @@ struct erofs_dirent { /* check the EROFS on-disk layout strictly at compile time */ static inline void erofs_check_ondisk_layout_definitions(void) { + const __le64 fmh = *(__le64 *)&(struct z_erofs_map_header) { + .h_clusterbits = 1 << Z_EROFS_FRAGMENT_INODE_BIT + }; + BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128); BUILD_BUG_ON(sizeof(struct erofs_inode_compact) != 32); BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64); @@ -421,6 +439,9 @@ static inline void erofs_check_ondisk_layout_definitions(void) BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) < Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1); + /* exclude old compiler versions like gcc 7.5.0 */ + BUILD_BUG_ON(__builtin_constant_p(fmh) ? + fmh != cpu_to_le64(1ULL << 63) : 0); } #endif diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 8dbfeb5f4f84..9f89c1da6229 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -131,6 +131,7 @@ struct erofs_sb_info { struct inode *managed_cache; struct erofs_sb_lz4_info lz4; + struct inode *packed_inode; #endif /* CONFIG_EROFS_FS_ZIP */ struct erofs_dev_context *devs; struct dax_device *dax_dev; @@ -289,6 +290,7 @@ EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE) EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE) EROFS_FEATURE_FUNCS(compr_head2, incompat, INCOMPAT_COMPR_HEAD2) EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING) +EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS) EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM) /* atomic flag definitions */ @@ -324,8 +326,13 @@ struct erofs_inode { unsigned char z_algorithmtype[2]; unsigned char z_logical_clusterbits; unsigned long z_tailextent_headlcn; - erofs_off_t z_idataoff; - unsigned short z_idata_size; + union { + struct { + erofs_off_t z_idataoff; + unsigned short z_idata_size; + }; + erofs_off_t z_fragmentoff; + }; }; #endif /* CONFIG_EROFS_FS_ZIP */ }; @@ -384,6 +391,7 @@ extern const struct address_space_operations z_erofs_aops; enum { BH_Encoded = BH_PrivateStart, BH_FullMapped, + BH_Fragment, }; /* Has a disk mapping */ @@ -394,6 +402,8 @@ enum { #define EROFS_MAP_ENCODED (1 << BH_Encoded) /* The length of extent is full */ #define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped) +/* Located in the special packed inode */ +#define EROFS_MAP_FRAGMENT (1 << BH_Fragment) struct erofs_map_blocks { struct erofs_buf buf; @@ -415,7 +425,7 @@ struct erofs_map_blocks { #define EROFS_GET_BLOCKS_FIEMAP 0x0002 /* Used to map the whole extent if non-negligible data is requested for LZMA */ #define EROFS_GET_BLOCKS_READMORE 0x0004 -/* Used to map tail extent for tailpacking inline pcluster */ +/* Used to map tail extent for tailpacking inline or fragment pcluster */ #define EROFS_GET_BLOCKS_FINDTAIL 0x0008 enum { diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 9f7fe6c04e65..ce20562ca91f 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -381,6 +381,17 @@ static int erofs_read_superblock(struct super_block *sb) #endif sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact)); sbi->root_nid = le16_to_cpu(dsb->root_nid); +#ifdef CONFIG_EROFS_FS_ZIP + sbi->packed_inode = NULL; + if (erofs_sb_has_fragments(sbi) && dsb->packed_nid) { + sbi->packed_inode = + erofs_iget(sb, le64_to_cpu(dsb->packed_nid), false); + if (IS_ERR(sbi->packed_inode)) { + ret = PTR_ERR(sbi->packed_inode); + goto out; + } + } +#endif sbi->inos = le64_to_cpu(dsb->inos); sbi->build_time = le64_to_cpu(dsb->build_time); @@ -411,6 +422,8 @@ static int erofs_read_superblock(struct super_block *sb) erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!"); if (erofs_is_fscache_mode(sb)) erofs_info(sb, "EXPERIMENTAL fscache-based on-demand read feature in use. Use at your own risk!"); + if (erofs_sb_has_fragments(sbi)) + erofs_info(sb, "EXPERIMENTAL compressed fragments feature in use. Use at your own risk!"); out: erofs_put_metabuf(&buf); return ret; @@ -947,6 +960,8 @@ static void erofs_put_super(struct super_block *sb) #ifdef CONFIG_EROFS_FS_ZIP iput(sbi->managed_cache); sbi->managed_cache = NULL; + iput(sbi->packed_inode); + sbi->packed_inode = NULL; #endif erofs_fscache_unregister_fs(sb); } diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c index 341fb43ad587..dd6eb7eccf9a 100644 --- a/fs/erofs/sysfs.c +++ b/fs/erofs/sysfs.c @@ -76,6 +76,7 @@ EROFS_ATTR_FEATURE(device_table); EROFS_ATTR_FEATURE(compr_head2); EROFS_ATTR_FEATURE(sb_chksum); EROFS_ATTR_FEATURE(ztailpacking); +EROFS_ATTR_FEATURE(fragments); static struct attribute *erofs_feat_attrs[] = { ATTR_LIST(zero_padding), @@ -86,6 +87,7 @@ static struct attribute *erofs_feat_attrs[] = { ATTR_LIST(compr_head2), ATTR_LIST(sb_chksum), ATTR_LIST(ztailpacking), + ATTR_LIST(fragments), NULL, }; ATTRIBUTE_GROUPS(erofs_feat); diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 5792ca9e0d5e..c92a72f5bca6 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -650,6 +650,35 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe, la < fe->headoffset; } +static int z_erofs_read_fragment(struct inode *inode, erofs_off_t pos, + struct page *page, unsigned int pageofs, + unsigned int len) +{ + struct inode *packed_inode = EROFS_I_SB(inode)->packed_inode; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; + u8 *src, *dst; + unsigned int i, cnt; + + pos += EROFS_I(inode)->z_fragmentoff; + for (i = 0; i < len; i += cnt) { + cnt = min_t(unsigned int, len - i, + EROFS_BLKSIZ - erofs_blkoff(pos)); + src = erofs_bread(&buf, packed_inode, + erofs_blknr(pos), EROFS_KMAP); + if (IS_ERR(src)) { + erofs_put_metabuf(&buf); + return PTR_ERR(src); + } + + dst = kmap_local_page(page); + memcpy(dst + pageofs + i, src + erofs_blkoff(pos), cnt); + kunmap_local(dst); + pos += cnt; + } + erofs_put_metabuf(&buf); + return 0; +} + static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, struct page *page, struct page **pagepool) { @@ -688,7 +717,8 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, /* didn't get a valid pcluster previously (very rare) */ } - if (!(map->m_flags & EROFS_MAP_MAPPED)) + if (!(map->m_flags & EROFS_MAP_MAPPED) || + map->m_flags & EROFS_MAP_FRAGMENT) goto hitted; err = z_erofs_collector_begin(fe); @@ -735,6 +765,24 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, zero_user_segment(page, cur, end); goto next_part; } + if (map->m_flags & EROFS_MAP_FRAGMENT) { + unsigned int pageofs, skip, len; + + if (offset > map->m_la) { + pageofs = 0; + skip = offset - map->m_la; + } else { + pageofs = map->m_la & ~PAGE_MASK; + skip = 0; + } + len = min_t(unsigned int, map->m_llen - skip, end - cur); + err = z_erofs_read_fragment(inode, skip, page, pageofs, len); + if (err) + goto out; + ++spiltted; + tight = false; + goto next_part; + } exclusive = (!cur && (!spiltted || tight)); if (cur) diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 7196235a441c..d1723910251c 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -17,7 +17,7 @@ int z_erofs_fill_inode(struct inode *inode) struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb); if (!erofs_sb_has_big_pcluster(sbi) && - !erofs_sb_has_ztailpacking(sbi) && + !erofs_sb_has_ztailpacking(sbi) && !erofs_sb_has_fragments(sbi) && vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) { vi->z_advise = 0; vi->z_algorithmtype[0] = 0; @@ -55,10 +55,6 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) goto out_unlock; - DBG_BUGON(!erofs_sb_has_big_pcluster(EROFS_SB(sb)) && - !erofs_sb_has_ztailpacking(EROFS_SB(sb)) && - vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY); - pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize + vi->xattr_isize, 8); kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), @@ -69,6 +65,16 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) } h = kaddr + erofs_blkoff(pos); + /* + * if the highest bit of the 8-byte map header is set, the whole file + * is stored in the packed inode. The rest bits keeps z_fragmentoff. + */ + if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) { + vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; + vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63); + vi->z_tailextent_headlcn = 0; + goto unmap_done; + } vi->z_advise = le16_to_cpu(h->h_advise); vi->z_algorithmtype[0] = h->h_algorithmtype & 15; vi->z_algorithmtype[1] = h->h_algorithmtype >> 4; @@ -123,6 +129,20 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) if (err < 0) goto out_unlock; } + + if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER && + !(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) { + struct erofs_map_blocks map = { + .buf = __EROFS_BUF_INITIALIZER + }; + + vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff); + err = z_erofs_do_map_blocks(inode, &map, + EROFS_GET_BLOCKS_FINDTAIL); + erofs_put_metabuf(&map.buf); + if (err < 0) + goto out_unlock; + } /* paired with smp_mb() at the beginning of the function */ smp_mb(); set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); @@ -598,6 +618,7 @@ static int z_erofs_do_map_blocks(struct inode *inode, { struct erofs_inode *const vi = EROFS_I(inode); bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER; + bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; struct z_erofs_maprecorder m = { .inode = inode, .map = map, @@ -666,12 +687,19 @@ static int z_erofs_do_map_blocks(struct inode *inode, map->m_llen = end - map->m_la; - if (flags & EROFS_GET_BLOCKS_FINDTAIL) + if (flags & EROFS_GET_BLOCKS_FINDTAIL) { vi->z_tailextent_headlcn = m.lcn; + /* for non-compact indexes, fragmentoff is 64 bits */ + if (fragment && + vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) + vi->z_fragmentoff |= (u64)m.pblk << 32; + } if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) { map->m_flags |= EROFS_MAP_META; map->m_pa = vi->z_idataoff; map->m_plen = vi->z_idata_size; + } else if (fragment && m.lcn == vi->z_tailextent_headlcn) { + map->m_flags |= EROFS_MAP_FRAGMENT; } else { map->m_pa = blknr_to_addr(m.pblk); err = z_erofs_get_extent_compressedlen(&m, initial_lcn); @@ -715,6 +743,7 @@ int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, int flags) { + struct erofs_inode *const vi = EROFS_I(inode); int err = 0; trace_z_erofs_map_blocks_iter_enter(inode, map, flags); @@ -731,6 +760,15 @@ int z_erofs_map_blocks_iter(struct inode *inode, if (err) goto out; + if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) && + !vi->z_tailextent_headlcn) { + map->m_la = 0; + map->m_llen = inode->i_size; + map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED | + EROFS_MAP_FRAGMENT; + goto out; + } + err = z_erofs_do_map_blocks(inode, map, flags); out: trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err); @@ -757,7 +795,8 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset, iomap->length = map.m_llen; if (map.m_flags & EROFS_MAP_MAPPED) { iomap->type = IOMAP_MAPPED; - iomap->addr = map.m_pa; + iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ? + IOMAP_NULL_ADDR : map.m_pa; } else { iomap->type = IOMAP_HOLE; iomap->addr = IOMAP_NULL_ADDR;