linux-stable/fs
Yutian Yang 4bbb6e6a1c memcg: charge fs_context and legacy_fs_context
commit bb902cb47c upstream.

This patch adds accounting flags to fs_context and legacy_fs_context
allocation sites so that kernel could correctly charge these objects.

We have written a PoC to demonstrate the effect of the missing-charging
bugs.  The PoC takes around 1,200MB unaccounted memory, while it is
charged for only 362MB memory usage.  We evaluate the PoC on QEMU x86_64
v5.2.90 + Linux kernel v5.10.19 + Debian buster.  All the limitations
including ulimits and sysctl variables are set as default.  Specifically,
the hard NOFILE limit and nr_open in sysctl are both 1,048,576.

/*------------------------- POC code ----------------------------*/

#define _GNU_SOURCE
#include <sys/types.h>
#include <sys/file.h>
#include <time.h>
#include <sys/wait.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <signal.h>
#include <sched.h>
#include <fcntl.h>
#include <linux/mount.h>

#define errExit(msg)    do { perror(msg); exit(EXIT_FAILURE); \
                        } while (0)

#define STACK_SIZE (8 * 1024)
#ifndef __NR_fsopen
#define __NR_fsopen 430
#endif
static inline int fsopen(const char *fs_name, unsigned int flags)
{
        return syscall(__NR_fsopen, fs_name, flags);
}

static char thread_stack[512][STACK_SIZE];

int thread_fn(void* arg)
{
  for (int i = 0; i< 800000; ++i) {
    int fsfd = fsopen("nfs", FSOPEN_CLOEXEC);
    if (fsfd == -1) {
      errExit("fsopen");
    }
  }
  while(1);
  return 0;
}

int main(int argc, char *argv[]) {
  int thread_pid;
  for (int i = 0; i < 1; ++i) {
    thread_pid = clone(thread_fn, thread_stack[i] + STACK_SIZE, \
      SIGCHLD, NULL);
  }
  while(1);
  return 0;
}

/*-------------------------- end --------------------------------*/

Link: https://lkml.kernel.org/r/1626517201-24086-1-git-send-email-nglaive@gmail.com
Signed-off-by: Yutian Yang <nglaive@gmail.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: <shenwenbo@zju.edu.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2022-02-08 18:24:29 +01:00
..
9p 9P: Cast to loff_t before multiplying 2020-11-05 11:43:34 +01:00
adfs
affs fs/affs: release old buffer head on error path 2021-03-04 10:26:48 +01:00
afs afs: Fix incorrect triggering of sillyrename on 3rd-party invalidation 2021-09-30 10:09:22 +02:00
autofs autofs: fix a leak in autofs_expire_indirect() 2019-10-25 00:03:11 -04:00
befs
bfs bfs: don't use WARNING: string when it's just info. 2021-01-06 14:48:39 +01:00
btrfs btrfs: fix deadlock between quota disable and qgroup rescan worker 2022-02-08 18:24:28 +01:00
cachefiles cachefiles: Handle readpage error correctly 2020-11-05 11:43:36 +01:00
ceph ceph: fix handling of "meta" errors 2021-10-27 09:54:27 +02:00
cifs smb3: do not error on fsync when readonly 2021-12-01 09:23:34 +01:00
coda y2038: add inode timestamp clamping 2019-09-19 09:42:37 -07:00
configfs fsnotify: fix fsnotify hooks in pseudo filesystems 2022-02-01 17:24:34 +01:00
cramfs cramfs: fix usage on non-MTD device 2019-11-23 21:44:49 -05:00
crypto fscrypt: add fscrypt_symlink_getattr() for computing st_size 2021-09-12 08:56:38 +02:00
debugfs debugfs: lockdown: Allow reading debugfs files that are not world readable 2022-01-27 09:19:36 +01:00
devpts fsnotify: fix fsnotify hooks in pseudo filesystems 2022-02-01 17:24:34 +01:00
dlm fs: dlm: filter user dlm messages for kernel locks 2022-01-27 09:19:40 +01:00
ecryptfs Revert "ecryptfs: replace BUG_ON with error handling code" 2021-05-26 12:05:19 +02:00
efivarfs efivarfs: revert "fix memory leak in efivarfs_create()" 2020-12-02 08:49:53 +01:00
efs
erofs erofs: fix unsafe pagevec reuse of hooked pclusters 2021-11-21 13:38:51 +01:00
exportfs exportfs_decode_fh(): negative pinned may become positive without the parent locked 2019-11-10 11:56:05 -05:00
ext2 ext2: fix sleeping in atomic bugs on error 2021-10-09 14:39:49 +02:00
ext4 ext4: don't use the orphan list when migrating an inode 2022-01-27 09:19:51 +01:00
f2fs f2fs: fix to reserve space for IO align feature 2022-01-27 09:19:53 +01:00
fat fat: don't allow to mount if the FAT length == 0 2020-06-17 16:40:36 +02:00
freevxfs
fscache fscache: Fix cookie key hashing 2021-09-22 12:26:25 +02:00
fuse fuse: Pass correct lend value to filemap_write_and_wait_range() 2022-01-27 09:19:49 +01:00
gfs2 gfs2: Fix length of holes reported at end-of-file 2021-12-08 09:01:08 +01:00
hfs hfs: add lock nesting notation to hfs_find_init 2021-07-31 08:19:38 +02:00
hfsplus hfsplus: prevent corruption in shrinking truncate 2021-05-19 10:08:29 +02:00
hostfs hostfs: fix memory handling in follow_link() 2021-04-14 08:24:14 +02:00
hpfs
hugetlbfs hugetlbfs: fix mount mode command line processing 2021-07-28 13:31:01 +02:00
iomap mm/swap: consider max pages in iomap_swapfile_add_extent 2021-09-15 09:47:35 +02:00
isofs isofs: Fix out of bound access for corrupted isofs image 2021-11-12 14:43:03 +01:00
jbd2 jbd2: fix up sparse warnings in checkpoint code 2020-11-18 19:20:30 +01:00
jffs2 jffs2: GC deadlock reading a page that is used in jffs2_write_begin() 2022-01-27 09:19:45 +01:00
jfs JFS: fix memleak in jfs_mount 2021-11-17 09:48:42 +01:00
kernfs kernfs: do not call fsnotify() with name without a parent 2020-08-19 08:16:12 +02:00
lockd lockd: lockd server-side shouldn't set fl_ops 2021-09-22 12:26:34 +02:00
minix fs/minix: remove expected error message in block_to_path() 2020-08-21 13:05:37 +02:00
nfs NFS: Ensure the server has an up to date ctime before renaming 2022-02-01 17:24:38 +01:00
nfs_common nfs_common: need lock during iterate through the list 2020-12-30 11:51:22 +01:00
nfsd fsnotify: fix fsnotify hooks in pseudo filesystems 2022-02-01 17:24:34 +01:00
nilfs2 nilfs2: fix memory leak in nilfs_sysfs_delete_snapshot_group 2021-09-26 14:07:13 +02:00
nls
notify fanotify: fix ignore mask logic for events on child and on dir 2020-06-17 16:40:24 +02:00
ntfs ntfs: fix ntfs_test_inode and ntfs_init_locked_inode function type 2021-12-14 14:48:58 +01:00
ocfs2 ocfs2: fix data corruption on truncate 2021-11-17 09:48:17 +01:00
omfs
openpromfs
orangefs orangefs: Fix the size of a memory allocation in orangefs_bufmap_alloc() 2022-01-20 09:19:17 +01:00
overlayfs ovl: fix warning in ovl_create_real() 2021-12-22 09:29:40 +01:00
proc proc/vmcore: fix clearing user buffer by properly using clear_user() 2021-12-01 09:23:31 +01:00
pstore pstore: Fix typo in compression option name 2021-03-04 10:26:45 +01:00
qnx4 qnx4: work around gcc false positive warning bug 2021-09-30 10:09:26 +02:00
qnx6
quota quota: correct error number in free_dqentry() 2021-11-17 09:48:26 +01:00
ramfs ramfs: fix nommu mmap with gaps in the page cache 2020-10-29 09:57:53 +01:00
reiserfs reiserfs: check directory items on read from disk 2021-08-12 13:21:05 +02:00
romfs romfs: fix uninitialized memory leak in romfs_dev_read() 2020-08-26 10:40:51 +02:00
squashfs squashfs: fix divide error in calculate_skip() 2021-05-19 10:08:29 +02:00
sysfs sysfs: Add sysfs_emit and sysfs_emit_at to format sysfs output 2021-03-07 12:20:48 +01:00
sysv
tracefs tracefs: Set all files to the same group ownership as the mount option 2021-12-14 14:49:02 +01:00
ubifs ubifs: Error path in ubifs_remount_rw() seems to wrongly free write buffers 2022-01-27 09:19:49 +01:00
udf udf: Fix NULL ptr deref when converting from inline format 2022-02-01 17:24:34 +01:00
ufs fs/ufs: avoid potential u32 multiplication overflow 2020-08-21 13:05:37 +02:00
unicode unicode: make array 'token' static const, makes object smaller 2019-09-17 11:48:24 -04:00
verity fs-verity: fix signed integer overflow with i_size near S64_MAX 2021-10-06 15:42:30 +02:00
xfs xfs: map unwritten blocks in XFS_IOC_{ALLOC,FREE}SP just like fallocate 2022-01-11 15:23:32 +01:00
Kconfig fs-verity for 5.4 2019-09-18 16:59:14 -07:00
Kconfig.binfmt
Makefile fs-verity for 5.4 2019-09-18 16:59:14 -07:00
aio.c aio: fix use-after-free due to missing POLLFREE handling 2021-12-14 14:49:02 +01:00
anon_inodes.c
attr.c utimes: Clamp the timestamps in notify_change() 2020-02-11 04:35:12 -08:00
bad_inode.c
binfmt_aout.c
binfmt_elf.c elf: don't use MAP_FIXED_NOREPLACE for elf interpreter mappings 2021-10-06 15:42:35 +02:00
binfmt_elf_fdpic.c
binfmt_em86.c
binfmt_flat.c binfmt_flat: revert "binfmt_flat: don't offset the data start" 2020-09-03 11:26:39 +02:00
binfmt_misc.c binfmt_misc: fix possible deadlock in bm_register_write 2021-03-17 17:03:57 +01:00
binfmt_script.c
block_dev.c block: reexpand iov_iter after read/write 2021-05-22 11:38:29 +02:00
buffer.c fs: Don't invalidate page buffers in block_write_full_page() 2020-11-05 11:43:24 +01:00
char_dev.c chardev: Avoid potential use-after-free in 'chrdev_open()' 2020-01-14 20:08:18 +01:00
compat.c
compat_binfmt_elf.c
compat_ioctl.c fix compat handling of FICLONERANGE, FIDEDUPERANGE and FS_IOC_FIEMAP 2020-01-09 10:20:05 +01:00
coredump.c coredump: fix core_pattern parse error 2020-12-11 13:23:30 +01:00
d_path.c fs: fix NULL dereference due to data race in prepend_path() 2020-10-29 09:57:45 +01:00
dax.c dax: fix ENOMEM handling in grab_mapping_entry() 2021-07-14 16:53:25 +02:00
dcache.c fix dget_parent() fastpath race 2020-10-01 13:17:19 +02:00
dcookies.c
direct-io.c fs: direct-io: fix missing sdio->boundary 2021-04-14 08:24:11 +02:00
drop_caches.c fs: avoid softlockups in s_inodes iterators 2020-01-12 12:21:37 +01:00
eventfd.c eventfd: track eventfd_signal() recursion depth 2020-02-11 04:35:37 -08:00
eventpoll.c ep_create_wakeup_source(): dentry name can change under you... 2020-10-07 08:01:31 +02:00
exec.c vfs: check fd has read access in kernel_read_file_from_fd() 2021-10-27 09:54:27 +02:00
fcntl.c fcntl: fix potential deadlock for &fasync_struct.fa_lock 2021-09-15 09:47:28 +02:00
fhandle.c
file.c fget: check that the fd still exists after getting a ref to it 2021-12-08 09:01:11 +01:00
file_table.c
filesystems.c fs/filesystems.c: downgrade user-reachable WARN_ONCE() to pr_warn_once() 2020-04-17 10:50:21 +02:00
fs-writeback.c writeback: fix obtain a reference to a freeing memcg css 2021-07-14 16:53:35 +02:00
fs_context.c memcg: charge fs_context and legacy_fs_context 2022-02-08 18:24:29 +01:00
fs_parser.c
fs_pin.c
fs_struct.c
fs_types.c
fsopen.c
inode.c futex: Fix inode life-time issue 2020-03-25 08:25:58 +01:00
internal.h cgroup1: fix leaked context root causing sporadic NULL deref in LTP 2021-07-31 08:19:37 +02:00
io_uring.c io_uring: Fix current->fs handling in io_sq_wq_submit_work() 2021-01-30 13:54:10 +01:00
ioctl.c compat_ioctl: add compat_ptr_ioctl() 2019-12-17 19:55:30 +01:00
libfs.c libfs: fix error cast of negative value in simple_attr_write() 2020-11-24 13:29:19 +01:00
locks.c locks: reinstate locks_delete_block optimization 2020-03-25 08:25:41 +01:00
mbcache.c
mount.h
mpage.c fs: move guard_bio_eod() after bio_set_op_attrs 2020-01-17 19:48:21 +01:00
namei.c fsnotify: invalidate dcache before IN_DELETE event 2022-02-01 17:24:39 +01:00
namespace.c fs: warn about impending deprecation of mandatory locks 2021-08-26 08:36:22 -04:00
no-block.c
nsfs.c
open.c cifs_atomic_open(): fix double-put on late allocation failure 2020-03-18 07:17:51 +01:00
pipe.c pipe: increase minimum default pipe size to 2 pages 2021-08-12 13:21:02 +02:00
pnode.c propagate_one(): mnt_set_mountpoint() needs mount_lock 2020-05-02 08:48:44 +02:00
pnode.h mount: fix mounting of detached mounts onto targets that reside on shared mounts 2021-03-17 17:03:33 +01:00
posix_acl.c
proc_namespace.c
read_write.c fs: allow deduplication of eof block into the end of the destination file 2020-02-11 04:35:23 -08:00
readdir.c readdir: make sure to verify directory entry for legacy interfaces too 2021-04-21 12:56:16 +02:00
select.c select: Fix indefinitely sleeping task in poll_schedule_timeout() 2022-01-29 10:25:11 +01:00
seq_file.c seq_file: disallow extremely large seq buffer allocations 2021-07-20 16:10:54 +02:00
signalfd.c signalfd: use wake_up_pollfree() 2021-12-14 14:49:02 +01:00
splice.c splice: only read in as much information as there is pipe buffer space 2019-12-17 19:56:52 +01:00
stack.c
stat.c
statfs.c vfs: Fix EOVERFLOW testing in put_compat_statfs64 2019-10-03 14:21:35 -07:00
super.c devtmpfs regression fix: reconfigure on each mount 2022-01-20 09:19:17 +01:00
sync.c
timerfd.c
userfaultfd.c userfaultfd: prevent concurrent API initialization 2021-09-22 12:26:26 +02:00
utimes.c utimes: Clamp the timestamps in notify_change() 2020-02-11 04:35:12 -08:00
xattr.c xattr: break delegations in {set,remove}xattr 2020-08-11 15:33:39 +02:00