capability: just use a 'u64' instead of a 'u32[2]' array

Back in 2008 we extended the capability bits from 32 to 64, and we did
it by extending the single 32-bit capability word from one word to an
array of two words.  It was then obfuscated by hiding the "2" behind two
macro expansions, with the reasoning being that maybe it gets extended
further some day.

That reasoning may have been valid at the time, but the last thing we
want to do is to extend the capability set any more.  And the array of
values not only causes source code oddities (with loops to deal with
it), but also results in worse code generation.  It's a lose-lose
situation.

So just change the 'u32[2]' into a 'u64' and be done with it.

We still have to deal with the fact that the user space interface is
designed around an array of these 32-bit values, but that was the case
before too, since the array layouts were different (ie user space
doesn't use an array of 32-bit values for individual capability masks,
but an array of 32-bit slices of multiple masks).

So that marshalling of data is actually simplified too, even if it does
remain somewhat obscure and odd.

This was all triggered by my reaction to the new "cap_isidentical()"
introduced recently.  By just using a saner data structure, it went from

	unsigned __capi;
	CAP_FOR_EACH_U32(__capi) {
		if (a.cap[__capi] != b.cap[__capi])
			return false;
	}
	return true;

to just being

	return a.val == b.val;

instead.  Which is rather more obvious both to humans and to compilers.

Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Cc: Serge Hallyn <serge@hallyn.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Paul Moore <paul@paul-moore.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Linus Torvalds 2023-02-28 11:39:09 -08:00
parent 1d2aea1bcf
commit f122a08b19
9 changed files with 148 additions and 237 deletions

View File

@ -300,13 +300,8 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
static void render_cap_t(struct seq_file *m, const char *header,
kernel_cap_t *a)
{
unsigned __capi;
seq_puts(m, header);
CAP_FOR_EACH_U32(__capi) {
seq_put_hex_ll(m, NULL,
a->cap[CAP_LAST_U32 - __capi], 8);
}
seq_put_hex_ll(m, NULL, a->val, 16);
seq_putc(m, '\n');
}

View File

@ -15,28 +15,25 @@
#include <uapi/linux/capability.h>
#include <linux/uidgid.h>
#include <linux/bits.h>
#define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3
#define _KERNEL_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3
extern int file_caps_enabled;
typedef struct kernel_cap_struct {
__u32 cap[_KERNEL_CAPABILITY_U32S];
} kernel_cap_t;
typedef struct { u64 val; } kernel_cap_t;
/* same as vfs_ns_cap_data but in cpu endian and always filled completely */
struct cpu_vfs_cap_data {
__u32 magic_etc;
kuid_t rootid;
kernel_cap_t permitted;
kernel_cap_t inheritable;
kuid_t rootid;
};
#define _USER_CAP_HEADER_SIZE (sizeof(struct __user_cap_header_struct))
#define _KERNEL_CAP_T_SIZE (sizeof(kernel_cap_t))
struct file;
struct inode;
struct dentry;
@ -44,16 +41,6 @@ struct task_struct;
struct user_namespace;
struct mnt_idmap;
extern const kernel_cap_t __cap_empty_set;
extern const kernel_cap_t __cap_init_eff_set;
/*
* Internal kernel functions only
*/
#define CAP_FOR_EACH_U32(__capi) \
for (__capi = 0; __capi < _KERNEL_CAPABILITY_U32S; ++__capi)
/*
* CAP_FS_MASK and CAP_NFSD_MASKS:
*
@ -67,104 +54,52 @@ extern const kernel_cap_t __cap_init_eff_set;
* 2. The security.* and trusted.* xattrs are fs-related MAC permissions
*/
# define CAP_FS_MASK_B0 (CAP_TO_MASK(CAP_CHOWN) \
| CAP_TO_MASK(CAP_MKNOD) \
| CAP_TO_MASK(CAP_DAC_OVERRIDE) \
| CAP_TO_MASK(CAP_DAC_READ_SEARCH) \
| CAP_TO_MASK(CAP_FOWNER) \
| CAP_TO_MASK(CAP_FSETID))
# define CAP_FS_MASK (BIT_ULL(CAP_CHOWN) \
| BIT_ULL(CAP_MKNOD) \
| BIT_ULL(CAP_DAC_OVERRIDE) \
| BIT_ULL(CAP_DAC_READ_SEARCH) \
| BIT_ULL(CAP_FOWNER) \
| BIT_ULL(CAP_FSETID) \
| BIT_ULL(CAP_MAC_OVERRIDE))
#define CAP_VALID_MASK (BIT_ULL(CAP_LAST_CAP+1)-1)
# define CAP_FS_MASK_B1 (CAP_TO_MASK(CAP_MAC_OVERRIDE))
# define CAP_EMPTY_SET ((kernel_cap_t) { 0 })
# define CAP_FULL_SET ((kernel_cap_t) { CAP_VALID_MASK })
# define CAP_FS_SET ((kernel_cap_t) { CAP_FS_MASK | BIT_ULL(CAP_LINUX_IMMUTABLE) })
# define CAP_NFSD_SET ((kernel_cap_t) { CAP_FS_MASK | BIT_ULL(CAP_SYS_RESOURCE) })
#if _KERNEL_CAPABILITY_U32S != 2
# error Fix up hand-coded capability macro initializers
#else /* HAND-CODED capability initializers */
# define cap_clear(c) do { (c).val = 0; } while (0)
#define CAP_LAST_U32 ((_KERNEL_CAPABILITY_U32S) - 1)
#define CAP_LAST_U32_VALID_MASK (CAP_TO_MASK(CAP_LAST_CAP + 1) -1)
# define CAP_EMPTY_SET ((kernel_cap_t){{ 0, 0 }})
# define CAP_FULL_SET ((kernel_cap_t){{ ~0, CAP_LAST_U32_VALID_MASK }})
# define CAP_FS_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \
| CAP_TO_MASK(CAP_LINUX_IMMUTABLE), \
CAP_FS_MASK_B1 } })
# define CAP_NFSD_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \
| CAP_TO_MASK(CAP_SYS_RESOURCE), \
CAP_FS_MASK_B1 } })
#endif /* _KERNEL_CAPABILITY_U32S != 2 */
# define cap_clear(c) do { (c) = __cap_empty_set; } while (0)
#define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag))
#define cap_lower(c, flag) ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag))
#define cap_raised(c, flag) ((c).cap[CAP_TO_INDEX(flag)] & CAP_TO_MASK(flag))
#define CAP_BOP_ALL(c, a, b, OP) \
do { \
unsigned __capi; \
CAP_FOR_EACH_U32(__capi) { \
c.cap[__capi] = a.cap[__capi] OP b.cap[__capi]; \
} \
} while (0)
#define CAP_UOP_ALL(c, a, OP) \
do { \
unsigned __capi; \
CAP_FOR_EACH_U32(__capi) { \
c.cap[__capi] = OP a.cap[__capi]; \
} \
} while (0)
#define cap_raise(c, flag) ((c).val |= BIT_ULL(flag))
#define cap_lower(c, flag) ((c).val &= ~BIT_ULL(flag))
#define cap_raised(c, flag) (((c).val & BIT_ULL(flag)) != 0)
static inline kernel_cap_t cap_combine(const kernel_cap_t a,
const kernel_cap_t b)
{
kernel_cap_t dest;
CAP_BOP_ALL(dest, a, b, |);
return dest;
return (kernel_cap_t) { a.val | b.val };
}
static inline kernel_cap_t cap_intersect(const kernel_cap_t a,
const kernel_cap_t b)
{
kernel_cap_t dest;
CAP_BOP_ALL(dest, a, b, &);
return dest;
return (kernel_cap_t) { a.val & b.val };
}
static inline kernel_cap_t cap_drop(const kernel_cap_t a,
const kernel_cap_t drop)
{
kernel_cap_t dest;
CAP_BOP_ALL(dest, a, drop, &~);
return dest;
}
static inline kernel_cap_t cap_invert(const kernel_cap_t c)
{
kernel_cap_t dest;
CAP_UOP_ALL(dest, c, ~);
return dest;
return (kernel_cap_t) { a.val &~ drop.val };
}
static inline bool cap_isclear(const kernel_cap_t a)
{
unsigned __capi;
CAP_FOR_EACH_U32(__capi) {
if (a.cap[__capi] != 0)
return false;
}
return true;
return !a.val;
}
static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b)
{
unsigned __capi;
CAP_FOR_EACH_U32(__capi) {
if (a.cap[__capi] != b.cap[__capi])
return false;
}
return true;
return a.val == b.val;
}
/*
@ -176,39 +111,31 @@ static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b)
*/
static inline bool cap_issubset(const kernel_cap_t a, const kernel_cap_t set)
{
kernel_cap_t dest;
dest = cap_drop(a, set);
return cap_isclear(dest);
return !(a.val & ~set.val);
}
/* Used to decide between falling back on the old suser() or fsuser(). */
static inline kernel_cap_t cap_drop_fs_set(const kernel_cap_t a)
{
const kernel_cap_t __cap_fs_set = CAP_FS_SET;
return cap_drop(a, __cap_fs_set);
return cap_drop(a, CAP_FS_SET);
}
static inline kernel_cap_t cap_raise_fs_set(const kernel_cap_t a,
const kernel_cap_t permitted)
{
const kernel_cap_t __cap_fs_set = CAP_FS_SET;
return cap_combine(a,
cap_intersect(permitted, __cap_fs_set));
return cap_combine(a, cap_intersect(permitted, CAP_FS_SET));
}
static inline kernel_cap_t cap_drop_nfsd_set(const kernel_cap_t a)
{
const kernel_cap_t __cap_fs_set = CAP_NFSD_SET;
return cap_drop(a, __cap_fs_set);
return cap_drop(a, CAP_NFSD_SET);
}
static inline kernel_cap_t cap_raise_nfsd_set(const kernel_cap_t a,
const kernel_cap_t permitted)
{
const kernel_cap_t __cap_nfsd_set = CAP_NFSD_SET;
return cap_combine(a,
cap_intersect(permitted, __cap_nfsd_set));
return cap_combine(a, cap_intersect(permitted, CAP_NFSD_SET));
}
#ifdef CONFIG_MULTIUSER

View File

@ -22,7 +22,6 @@ static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id,
struct user_namespace *uns = seq_user_ns(m);
struct group_info *gi;
kernel_cap_t cap;
unsigned __capi;
int g;
seq_printf(m, "%5d\n", id);
@ -42,8 +41,7 @@ static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id,
}
seq_puts(m, "\n\tCapEff:\t");
cap = cred->cap_effective;
CAP_FOR_EACH_U32(__capi)
seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8);
seq_put_hex_ll(m, NULL, cap.val, 16);
seq_putc(m, '\n');
return 0;
}

View File

@ -1295,15 +1295,11 @@ out:
static void audit_log_cap(struct audit_buffer *ab, char *prefix,
kernel_cap_t *cap)
{
int i;
if (cap_isclear(*cap)) {
audit_log_format(ab, " %s=0", prefix);
return;
}
audit_log_format(ab, " %s=", prefix);
CAP_FOR_EACH_U32(i)
audit_log_format(ab, "%08x", cap->cap[CAP_LAST_U32 - i]);
audit_log_format(ab, " %s=%016llx", prefix, cap->val);
}
static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)

View File

@ -20,13 +20,6 @@
#include <linux/user_namespace.h>
#include <linux/uaccess.h>
/*
* Leveraged for setting/resetting capabilities
*/
const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET;
EXPORT_SYMBOL(__cap_empty_set);
int file_caps_enabled = 1;
static int __init file_caps_disable(char *str)
@ -151,6 +144,7 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
pid_t pid;
unsigned tocopy;
kernel_cap_t pE, pI, pP;
struct __user_cap_data_struct kdata[2];
ret = cap_validate_magic(header, &tocopy);
if ((dataptr == NULL) || (ret != 0))
@ -163,42 +157,46 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
return -EINVAL;
ret = cap_get_target_pid(pid, &pE, &pI, &pP);
if (!ret) {
struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
unsigned i;
if (ret)
return ret;
for (i = 0; i < tocopy; i++) {
kdata[i].effective = pE.cap[i];
kdata[i].permitted = pP.cap[i];
kdata[i].inheritable = pI.cap[i];
}
/*
* Annoying legacy format with 64-bit capabilities exposed
* as two sets of 32-bit fields, so we need to split the
* capability values up.
*/
kdata[0].effective = pE.val; kdata[1].effective = pE.val >> 32;
kdata[0].permitted = pP.val; kdata[1].permitted = pP.val >> 32;
kdata[0].inheritable = pI.val; kdata[1].inheritable = pI.val >> 32;
/*
* Note, in the case, tocopy < _KERNEL_CAPABILITY_U32S,
* we silently drop the upper capabilities here. This
* has the effect of making older libcap
* implementations implicitly drop upper capability
* bits when they perform a: capget/modify/capset
* sequence.
*
* This behavior is considered fail-safe
* behavior. Upgrading the application to a newer
* version of libcap will enable access to the newer
* capabilities.
*
* An alternative would be to return an error here
* (-ERANGE), but that causes legacy applications to
* unexpectedly fail; the capget/modify/capset aborts
* before modification is attempted and the application
* fails.
*/
if (copy_to_user(dataptr, kdata, tocopy
* sizeof(struct __user_cap_data_struct))) {
return -EFAULT;
}
}
/*
* Note, in the case, tocopy < _KERNEL_CAPABILITY_U32S,
* we silently drop the upper capabilities here. This
* has the effect of making older libcap
* implementations implicitly drop upper capability
* bits when they perform a: capget/modify/capset
* sequence.
*
* This behavior is considered fail-safe
* behavior. Upgrading the application to a newer
* version of libcap will enable access to the newer
* capabilities.
*
* An alternative would be to return an error here
* (-ERANGE), but that causes legacy applications to
* unexpectedly fail; the capget/modify/capset aborts
* before modification is attempted and the application
* fails.
*/
if (copy_to_user(dataptr, kdata, tocopy * sizeof(kdata[0])))
return -EFAULT;
return ret;
return 0;
}
static kernel_cap_t mk_kernel_cap(u32 low, u32 high)
{
return (kernel_cap_t) { (low | ((u64)high << 32)) & CAP_VALID_MASK };
}
/**
@ -221,8 +219,8 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
*/
SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
{
struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
unsigned i, tocopy, copybytes;
struct __user_cap_data_struct kdata[2] = { { 0, }, };
unsigned tocopy, copybytes;
kernel_cap_t inheritable, permitted, effective;
struct cred *new;
int ret;
@ -246,21 +244,9 @@ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
if (copy_from_user(&kdata, data, copybytes))
return -EFAULT;
for (i = 0; i < tocopy; i++) {
effective.cap[i] = kdata[i].effective;
permitted.cap[i] = kdata[i].permitted;
inheritable.cap[i] = kdata[i].inheritable;
}
while (i < _KERNEL_CAPABILITY_U32S) {
effective.cap[i] = 0;
permitted.cap[i] = 0;
inheritable.cap[i] = 0;
i++;
}
effective.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
effective = mk_kernel_cap(kdata[0].effective, kdata[1].effective);
permitted = mk_kernel_cap(kdata[0].permitted, kdata[1].permitted);
inheritable = mk_kernel_cap(kdata[0].inheritable, kdata[1].inheritable);
new = prepare_creds();
if (!new)

View File

@ -501,9 +501,9 @@ static int proc_cap_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table t;
unsigned long cap_array[_KERNEL_CAPABILITY_U32S];
kernel_cap_t new_cap;
int err, i;
unsigned long cap_array[2];
kernel_cap_t new_cap, *cap;
int err;
if (write && (!capable(CAP_SETPCAP) ||
!capable(CAP_SYS_MODULE)))
@ -514,14 +514,16 @@ static int proc_cap_handler(struct ctl_table *table, int write,
* userspace if this is a read.
*/
spin_lock(&umh_sysctl_lock);
for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) {
if (table->data == CAP_BSET)
cap_array[i] = usermodehelper_bset.cap[i];
else if (table->data == CAP_PI)
cap_array[i] = usermodehelper_inheritable.cap[i];
else
BUG();
}
if (table->data == CAP_BSET)
cap = &usermodehelper_bset;
else if (table->data == CAP_PI)
cap = &usermodehelper_inheritable;
else
BUG();
/* Legacy format: capabilities are exposed as two 32-bit values */
cap_array[0] = (u32) cap->val;
cap_array[1] = cap->val >> 32;
spin_unlock(&umh_sysctl_lock);
t = *table;
@ -535,22 +537,15 @@ static int proc_cap_handler(struct ctl_table *table, int write,
if (err < 0)
return err;
/*
* convert from the sysctl array of ulongs to the kernel_cap_t
* internal representation
*/
for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++)
new_cap.cap[i] = cap_array[i];
new_cap.val = (u32)cap_array[0];
new_cap.val += (u64)cap_array[1] << 32;
/*
* Drop everything not in the new_cap (but don't add things)
*/
if (write) {
spin_lock(&umh_sysctl_lock);
if (table->data == CAP_BSET)
usermodehelper_bset = cap_intersect(usermodehelper_bset, new_cap);
if (table->data == CAP_PI)
usermodehelper_inheritable = cap_intersect(usermodehelper_inheritable, new_cap);
*cap = cap_intersect(*cap, new_cap);
spin_unlock(&umh_sysctl_lock);
}
@ -561,14 +556,14 @@ struct ctl_table usermodehelper_table[] = {
{
.procname = "bset",
.data = CAP_BSET,
.maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long),
.maxlen = 2 * sizeof(unsigned long),
.mode = 0600,
.proc_handler = proc_cap_handler,
},
{
.procname = "inheritable",
.data = CAP_PI,
.maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long),
.maxlen = 2 * sizeof(unsigned long),
.mode = 0600,
.proc_handler = proc_cap_handler,
},

View File

@ -304,6 +304,26 @@ fail:
}
EXPORT_SYMBOL_IF_KUNIT(aa_unpack_u64);
static bool aa_unpack_cap_low(struct aa_ext *e, kernel_cap_t *data, const char *name)
{
u32 val;
if (!aa_unpack_u32(e, &val, name))
return false;
data->val = val;
return true;
}
static bool aa_unpack_cap_high(struct aa_ext *e, kernel_cap_t *data, const char *name)
{
u32 val;
if (!aa_unpack_u32(e, &val, name))
return false;
data->val = (u32)data->val | ((u64)val << 32);
return true;
}
VISIBLE_IF_KUNIT bool aa_unpack_array(struct aa_ext *e, const char *name, u16 *size)
{
void *pos = e->pos;
@ -897,25 +917,25 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
profile->path_flags = PATH_MEDIATE_DELETED;
info = "failed to unpack profile capabilities";
if (!aa_unpack_u32(e, &(rules->caps.allow.cap[0]), NULL))
if (!aa_unpack_cap_low(e, &rules->caps.allow, NULL))
goto fail;
if (!aa_unpack_u32(e, &(rules->caps.audit.cap[0]), NULL))
if (!aa_unpack_cap_low(e, &rules->caps.audit, NULL))
goto fail;
if (!aa_unpack_u32(e, &(rules->caps.quiet.cap[0]), NULL))
if (!aa_unpack_cap_low(e, &rules->caps.quiet, NULL))
goto fail;
if (!aa_unpack_u32(e, &tmpcap.cap[0], NULL))
if (!aa_unpack_cap_low(e, &tmpcap, NULL))
goto fail;
info = "failed to unpack upper profile capabilities";
if (aa_unpack_nameX(e, AA_STRUCT, "caps64")) {
/* optional upper half of 64 bit caps */
if (!aa_unpack_u32(e, &(rules->caps.allow.cap[1]), NULL))
if (!aa_unpack_cap_high(e, &rules->caps.allow, NULL))
goto fail;
if (!aa_unpack_u32(e, &(rules->caps.audit.cap[1]), NULL))
if (!aa_unpack_cap_high(e, &rules->caps.audit, NULL))
goto fail;
if (!aa_unpack_u32(e, &(rules->caps.quiet.cap[1]), NULL))
if (!aa_unpack_cap_high(e, &rules->caps.quiet, NULL))
goto fail;
if (!aa_unpack_u32(e, &(tmpcap.cap[1]), NULL))
if (!aa_unpack_cap_high(e, &tmpcap, NULL))
goto fail;
if (!aa_unpack_nameX(e, AA_STRUCTEND, NULL))
goto fail;
@ -924,9 +944,9 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
info = "failed to unpack extended profile capabilities";
if (aa_unpack_nameX(e, AA_STRUCT, "capsx")) {
/* optional extended caps mediation mask */
if (!aa_unpack_u32(e, &(rules->caps.extended.cap[0]), NULL))
if (!aa_unpack_cap_low(e, &rules->caps.extended, NULL))
goto fail;
if (!aa_unpack_u32(e, &(rules->caps.extended.cap[1]), NULL))
if (!aa_unpack_cap_high(e, &rules->caps.extended, NULL))
goto fail;
if (!aa_unpack_nameX(e, AA_STRUCTEND, NULL))
goto fail;

View File

@ -589,7 +589,6 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
bool *has_fcap)
{
struct cred *new = bprm->cred;
unsigned i;
int ret = 0;
if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)
@ -598,22 +597,17 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
if (caps->magic_etc & VFS_CAP_REVISION_MASK)
*has_fcap = true;
CAP_FOR_EACH_U32(i) {
__u32 permitted = caps->permitted.cap[i];
__u32 inheritable = caps->inheritable.cap[i];
/*
* pP' = (X & fP) | (pI & fI)
* The addition of pA' is handled later.
*/
new->cap_permitted.val =
(new->cap_bset.val & caps->permitted.val) |
(new->cap_inheritable.val & caps->inheritable.val);
/*
* pP' = (X & fP) | (pI & fI)
* The addition of pA' is handled later.
*/
new->cap_permitted.cap[i] =
(new->cap_bset.cap[i] & permitted) |
(new->cap_inheritable.cap[i] & inheritable);
if (permitted & ~new->cap_permitted.cap[i])
/* insufficient to execute correctly */
ret = -EPERM;
}
if (caps->permitted.val & ~new->cap_permitted.val)
/* insufficient to execute correctly */
ret = -EPERM;
/*
* For legacy apps, with no internal support for recognizing they
@ -644,7 +638,6 @@ int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
{
struct inode *inode = d_backing_inode(dentry);
__u32 magic_etc;
unsigned tocopy, i;
int size;
struct vfs_ns_cap_data data, *nscaps = &data;
struct vfs_cap_data *caps = (struct vfs_cap_data *) &data;
@ -677,17 +670,14 @@ int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
case VFS_CAP_REVISION_1:
if (size != XATTR_CAPS_SZ_1)
return -EINVAL;
tocopy = VFS_CAP_U32_1;
break;
case VFS_CAP_REVISION_2:
if (size != XATTR_CAPS_SZ_2)
return -EINVAL;
tocopy = VFS_CAP_U32_2;
break;
case VFS_CAP_REVISION_3:
if (size != XATTR_CAPS_SZ_3)
return -EINVAL;
tocopy = VFS_CAP_U32_3;
rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid));
break;
@ -705,15 +695,20 @@ int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
if (!rootid_owns_currentns(rootvfsuid))
return -ENODATA;
CAP_FOR_EACH_U32(i) {
if (i >= tocopy)
break;
cpu_caps->permitted.cap[i] = le32_to_cpu(caps->data[i].permitted);
cpu_caps->inheritable.cap[i] = le32_to_cpu(caps->data[i].inheritable);
cpu_caps->permitted.val = le32_to_cpu(caps->data[0].permitted);
cpu_caps->inheritable.val = le32_to_cpu(caps->data[0].inheritable);
/*
* Rev1 had just a single 32-bit word, later expanded
* to a second one for the high bits
*/
if ((magic_etc & VFS_CAP_REVISION_MASK) != VFS_CAP_REVISION_1) {
cpu_caps->permitted.val += (u64)le32_to_cpu(caps->data[1].permitted) << 32;
cpu_caps->inheritable.val += (u64)le32_to_cpu(caps->data[1].inheritable) << 32;
}
cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
cpu_caps->inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
cpu_caps->permitted.val &= CAP_VALID_MASK;
cpu_caps->inheritable.val &= CAP_VALID_MASK;
cpu_caps->rootid = vfsuid_into_kuid(rootvfsuid);

View File

@ -6,7 +6,7 @@
#include <linux/capability.h>
struct kernel_cap_struct {
__u32 cap[_LINUX_CAPABILITY_U32S_3];
__u64 val;
} __attribute__((preserve_access_index));
struct cred {
@ -19,14 +19,13 @@ SEC("lsm.s/userns_create")
int BPF_PROG(test_userns_create, const struct cred *cred, int ret)
{
struct kernel_cap_struct caps = cred->cap_effective;
int cap_index = CAP_TO_INDEX(CAP_SYS_ADMIN);
__u32 cap_mask = CAP_TO_MASK(CAP_SYS_ADMIN);
__u64 cap_mask = BIT_LL(CAP_SYS_ADMIN);
if (ret)
return 0;
ret = -EPERM;
if (caps.cap[cap_index] & cap_mask)
if (caps.val & cap_mask)
return 0;
return -EPERM;