vfs-6.12.procfs

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZuQEwAAKCRCRxhvAZXjc
 onI2AQDXa5XhIx0VpLWE9uVImVy3QuUKc/5pI1e1DKMgxLhKCgEAh15a4ETqmVaw
 Zp3ZSzoLD8Ez1WwWb6cWQuHFYRSjtwU=
 =+LKG
 -----END PGP SIGNATURE-----

Merge tag 'vfs-6.12.procfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull procfs updates from Christian Brauner:
 "This contains the following changes for procfs:

   - Add config options and parameters to block forcing memory writes.

     This adds a Kconfig option and boot param to allow removing the
     FOLL_FORCE flag from /proc/<pid>/mem write calls as this can be
     used in various attacks.

     The traditional forcing behavior is kept as default because it can
     break GDB and some other use cases.

     This is the simpler version that you had requested.

   - Restrict overmounting of ephemeral entities.

     It is currently possible to mount on top of various ephemeral
     entities in procfs. This specifically includes magic links. To
     recap, magic links are links of the form /proc/<pid>/fd/<nr>. They
     serve as references to a target file and during path lookup they
     cause a jump to the target path. Such magic links disappear if the
     corresponding file descriptor is closed.

     Currently it is possible to overmount such magic links. This is
     mostly interesting for an attacker that wants to somehow trick a
     process into e.g., reopening something that it didn't intend to
     reopen or to hide a malicious file descriptor.

     But also it risks leaking mounts for long-running processes. When
     overmounting a magic link like above, the mount will not be
     detached when the file descriptor is closed. Only the target
     mountpoint will disappear. Which has the consequence of making it
     impossible to unmount that mount afterwards. So the mount will
     stick around until the process exits and the /proc/<pid>/ directory
     is cleaned up during proc_flush_pid() when the dentries are pruned
     and invalidated.

     That in turn means it's possible for a program to accidentally leak
     mounts and it's also possible to make a task leak mounts without
     it's knowledge if the attacker just keeps overmounting things under
     /proc/<pid>/fd/<nr>.

     Disallow overmounting of such ephemeral entities.

   - Cleanup the readdir method naming in some procfs file operations.

   - Replace kmalloc() and strcpy() with a simple kmemdup() call"

* tag 'vfs-6.12.procfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  proc: fold kmalloc() + strcpy() into kmemdup()
  proc: block mounting on top of /proc/<pid>/fdinfo/*
  proc: block mounting on top of /proc/<pid>/fd/*
  proc: block mounting on top of /proc/<pid>/map_files/*
  proc: add proc_splice_unmountable()
  proc: proc_readfdinfo() -> proc_fdinfo_iterate()
  proc: proc_readfd() -> proc_fd_iterate()
  proc: add config & param to block forcing mem writes
This commit is contained in:
Linus Torvalds 2024-09-16 09:36:59 +02:00
commit e8fc317dfc
6 changed files with 127 additions and 13 deletions

View file

@ -4788,6 +4788,16 @@
printk.time= Show timing data prefixed to each printk message line printk.time= Show timing data prefixed to each printk message line
Format: <bool> (1/Y/y=enable, 0/N/n=disable) Format: <bool> (1/Y/y=enable, 0/N/n=disable)
proc_mem.force_override= [KNL]
Format: {always | ptrace | never}
Traditionally /proc/pid/mem allows memory permissions to be
overridden without restrictions. This option may be set to
restrict that. Can be one of:
- 'always': traditional behavior always allows mem overrides.
- 'ptrace': only allow mem overrides for active ptracers.
- 'never': never allow mem overrides.
If not specified, default is the CONFIG_PROC_MEM_* choice.
processor.max_cstate= [HW,ACPI] processor.max_cstate= [HW,ACPI]
Limit processor to maximum C-state Limit processor to maximum C-state
max_cstate=9 overrides any DMI blacklist limit. max_cstate=9 overrides any DMI blacklist limit.

View file

@ -85,6 +85,7 @@
#include <linux/elf.h> #include <linux/elf.h>
#include <linux/pid_namespace.h> #include <linux/pid_namespace.h>
#include <linux/user_namespace.h> #include <linux/user_namespace.h>
#include <linux/fs_parser.h>
#include <linux/fs_struct.h> #include <linux/fs_struct.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/sched/autogroup.h> #include <linux/sched/autogroup.h>
@ -117,6 +118,40 @@
static u8 nlink_tid __ro_after_init; static u8 nlink_tid __ro_after_init;
static u8 nlink_tgid __ro_after_init; static u8 nlink_tgid __ro_after_init;
enum proc_mem_force {
PROC_MEM_FORCE_ALWAYS,
PROC_MEM_FORCE_PTRACE,
PROC_MEM_FORCE_NEVER
};
static enum proc_mem_force proc_mem_force_override __ro_after_init =
IS_ENABLED(CONFIG_PROC_MEM_NO_FORCE) ? PROC_MEM_FORCE_NEVER :
IS_ENABLED(CONFIG_PROC_MEM_FORCE_PTRACE) ? PROC_MEM_FORCE_PTRACE :
PROC_MEM_FORCE_ALWAYS;
static const struct constant_table proc_mem_force_table[] __initconst = {
{ "always", PROC_MEM_FORCE_ALWAYS },
{ "ptrace", PROC_MEM_FORCE_PTRACE },
{ "never", PROC_MEM_FORCE_NEVER },
{ }
};
static int __init early_proc_mem_force_override(char *buf)
{
if (!buf)
return -EINVAL;
/*
* lookup_constant() defaults to proc_mem_force_override to preseve
* the initial Kconfig choice in case an invalid param gets passed.
*/
proc_mem_force_override = lookup_constant(proc_mem_force_table,
buf, proc_mem_force_override);
return 0;
}
early_param("proc_mem.force_override", early_proc_mem_force_override);
struct pid_entry { struct pid_entry {
const char *name; const char *name;
unsigned int len; unsigned int len;
@ -832,6 +867,28 @@ static int mem_open(struct inode *inode, struct file *file)
return __mem_open(inode, file, PTRACE_MODE_ATTACH); return __mem_open(inode, file, PTRACE_MODE_ATTACH);
} }
static bool proc_mem_foll_force(struct file *file, struct mm_struct *mm)
{
struct task_struct *task;
bool ptrace_active = false;
switch (proc_mem_force_override) {
case PROC_MEM_FORCE_NEVER:
return false;
case PROC_MEM_FORCE_PTRACE:
task = get_proc_task(file_inode(file));
if (task) {
ptrace_active = READ_ONCE(task->ptrace) &&
READ_ONCE(task->mm) == mm &&
READ_ONCE(task->parent) == current;
put_task_struct(task);
}
return ptrace_active;
default:
return true;
}
}
static ssize_t mem_rw(struct file *file, char __user *buf, static ssize_t mem_rw(struct file *file, char __user *buf,
size_t count, loff_t *ppos, int write) size_t count, loff_t *ppos, int write)
{ {
@ -852,7 +909,9 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
if (!mmget_not_zero(mm)) if (!mmget_not_zero(mm))
goto free; goto free;
flags = FOLL_FORCE | (write ? FOLL_WRITE : 0); flags = write ? FOLL_WRITE : 0;
if (proc_mem_foll_force(file, mm))
flags |= FOLL_FORCE;
while (count > 0) { while (count > 0) {
size_t this_len = min_t(size_t, count, PAGE_SIZE); size_t this_len = min_t(size_t, count, PAGE_SIZE);
@ -2274,8 +2333,8 @@ proc_map_files_instantiate(struct dentry *dentry,
inode->i_op = &proc_map_files_link_inode_operations; inode->i_op = &proc_map_files_link_inode_operations;
inode->i_size = 64; inode->i_size = 64;
d_set_d_op(dentry, &tid_map_files_dentry_operations); return proc_splice_unmountable(inode, dentry,
return d_splice_alias(inode, dentry); &tid_map_files_dentry_operations);
} }
static struct dentry *proc_map_files_lookup(struct inode *dir, static struct dentry *proc_map_files_lookup(struct inode *dir,

View file

@ -220,8 +220,8 @@ static struct dentry *proc_fd_instantiate(struct dentry *dentry,
ei->op.proc_get_link = proc_fd_link; ei->op.proc_get_link = proc_fd_link;
tid_fd_update_inode(task, inode, data->mode); tid_fd_update_inode(task, inode, data->mode);
d_set_d_op(dentry, &tid_fd_dentry_operations); return proc_splice_unmountable(inode, dentry,
return d_splice_alias(inode, dentry); &tid_fd_dentry_operations);
} }
static struct dentry *proc_lookupfd_common(struct inode *dir, static struct dentry *proc_lookupfd_common(struct inode *dir,
@ -312,14 +312,14 @@ static int proc_readfd_count(struct inode *inode, loff_t *count)
return 0; return 0;
} }
static int proc_readfd(struct file *file, struct dir_context *ctx) static int proc_fd_iterate(struct file *file, struct dir_context *ctx)
{ {
return proc_readfd_common(file, ctx, proc_fd_instantiate); return proc_readfd_common(file, ctx, proc_fd_instantiate);
} }
const struct file_operations proc_fd_operations = { const struct file_operations proc_fd_operations = {
.read = generic_read_dir, .read = generic_read_dir,
.iterate_shared = proc_readfd, .iterate_shared = proc_fd_iterate,
.llseek = generic_file_llseek, .llseek = generic_file_llseek,
}; };
@ -397,8 +397,8 @@ static struct dentry *proc_fdinfo_instantiate(struct dentry *dentry,
inode->i_fop = &proc_fdinfo_file_operations; inode->i_fop = &proc_fdinfo_file_operations;
tid_fd_update_inode(task, inode, 0); tid_fd_update_inode(task, inode, 0);
d_set_d_op(dentry, &tid_fd_dentry_operations); return proc_splice_unmountable(inode, dentry,
return d_splice_alias(inode, dentry); &tid_fd_dentry_operations);
} }
static struct dentry * static struct dentry *
@ -407,7 +407,7 @@ proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, unsigned int flags)
return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
} }
static int proc_readfdinfo(struct file *file, struct dir_context *ctx) static int proc_fdinfo_iterate(struct file *file, struct dir_context *ctx)
{ {
return proc_readfd_common(file, ctx, return proc_readfd_common(file, ctx,
proc_fdinfo_instantiate); proc_fdinfo_instantiate);
@ -421,6 +421,6 @@ const struct inode_operations proc_fdinfo_inode_operations = {
const struct file_operations proc_fdinfo_operations = { const struct file_operations proc_fdinfo_operations = {
.read = generic_read_dir, .read = generic_read_dir,
.iterate_shared = proc_readfdinfo, .iterate_shared = proc_fdinfo_iterate,
.llseek = generic_file_llseek, .llseek = generic_file_llseek,
}; };

View file

@ -464,9 +464,9 @@ struct proc_dir_entry *proc_symlink(const char *name,
(S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1); (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1);
if (ent) { if (ent) {
ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); ent->size = strlen(dest);
ent->data = kmemdup(dest, ent->size + 1, GFP_KERNEL);
if (ent->data) { if (ent->data) {
strcpy((char*)ent->data,dest);
ent->proc_iops = &proc_link_inode_operations; ent->proc_iops = &proc_link_inode_operations;
ent = proc_register(parent, ent); ent = proc_register(parent, ent);
} else { } else {

View file

@ -349,3 +349,16 @@ static inline void pde_force_lookup(struct proc_dir_entry *pde)
/* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */ /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */
pde->proc_dops = &proc_net_dentry_ops; pde->proc_dops = &proc_net_dentry_ops;
} }
/*
* Add a new procfs dentry that can't serve as a mountpoint. That should
* encompass anything that is ephemeral and can just disappear while the
* process is still around.
*/
static inline struct dentry *proc_splice_unmountable(struct inode *inode,
struct dentry *dentry, const struct dentry_operations *d_ops)
{
d_set_d_op(dentry, d_ops);
dont_mount(dentry);
return d_splice_alias(inode, dentry);
}

View file

@ -19,6 +19,38 @@ config SECURITY_DMESG_RESTRICT
If you are unsure how to answer this question, answer N. If you are unsure how to answer this question, answer N.
choice
prompt "Allow /proc/pid/mem access override"
default PROC_MEM_ALWAYS_FORCE
help
Traditionally /proc/pid/mem allows users to override memory
permissions for users like ptrace, assuming they have ptrace
capability.
This allows people to limit that - either never override, or
require actual active ptrace attachment.
Defaults to the traditional behavior (for now)
config PROC_MEM_ALWAYS_FORCE
bool "Traditional /proc/pid/mem behavior"
help
This allows /proc/pid/mem accesses to override memory mapping
permissions if you have ptrace access rights.
config PROC_MEM_FORCE_PTRACE
bool "Require active ptrace() use for access override"
help
This allows /proc/pid/mem accesses to override memory mapping
permissions for active ptracers like gdb.
config PROC_MEM_NO_FORCE
bool "Never"
help
Never override memory mapping permissions
endchoice
config SECURITY config SECURITY
bool "Enable different security models" bool "Enable different security models"
depends on SYSFS depends on SYSFS