linux-stable/security/landlock/ruleset.c
Konstantin Meskhidze fff69fb03d
landlock: Support network rules with TCP bind and connect
Add network rules support in the ruleset management helpers and the
landlock_create_ruleset() syscall. Extend user space API to support
network actions:
* Add new network access rights: LANDLOCK_ACCESS_NET_BIND_TCP and
  LANDLOCK_ACCESS_NET_CONNECT_TCP.
* Add a new network rule type: LANDLOCK_RULE_NET_PORT tied to struct
  landlock_net_port_attr. The allowed_access field contains the network
  access rights, and the port field contains the port value according to
  the controlled protocol. This field can take up to a 64-bit value
  but the maximum value depends on the related protocol (e.g. 16-bit
  value for TCP). Network port is in host endianness [1].
* Add a new handled_access_net field to struct landlock_ruleset_attr
  that contains network access rights.
* Increment the Landlock ABI version to 4.

Implement socket_bind() and socket_connect() LSM hooks, which enable
to control TCP socket binding and connection to specific ports.

Expand access_masks_t from u16 to u32 to be able to store network access
rights alongside filesystem access rights for rulesets' handled access
rights.

Access rights are not tied to socket file descriptors but checked at
bind() or connect() call time against the caller's Landlock domain. For
the filesystem, a file descriptor is a direct access to a file/data.
However, for network sockets, we cannot identify for which data or peer
a newly created socket will give access to. Indeed, we need to wait for
a connect or bind request to identify the use case for this socket.
Likewise a directory file descriptor may enable to open another file
(i.e. a new data item), but this opening is also restricted by the
caller's domain, not the file descriptor's access rights [2].

[1] https://lore.kernel.org/r/278ab07f-7583-a4e0-3d37-1bacd091531d@digikod.net
[2] https://lore.kernel.org/r/263c1eb3-602f-57fe-8450-3f138581bee7@digikod.net

Signed-off-by: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Link: https://lore.kernel.org/r/20231026014751.414649-9-konstantin.meskhidze@huawei.com
[mic: Extend commit message, fix typo in comments, and specify
endianness in the documentation]
Co-developed-by: Mickaël Salaün <mic@digikod.net>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
2023-10-26 21:07:15 +02:00

738 lines
19 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Landlock LSM - Ruleset management
*
* Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
* Copyright © 2018-2020 ANSSI
*/
#include <linux/bits.h>
#include <linux/bug.h>
#include <linux/compiler_types.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/lockdep.h>
#include <linux/overflow.h>
#include <linux/rbtree.h>
#include <linux/refcount.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
#include "limits.h"
#include "object.h"
#include "ruleset.h"
static struct landlock_ruleset *create_ruleset(const u32 num_layers)
{
struct landlock_ruleset *new_ruleset;
new_ruleset =
kzalloc(struct_size(new_ruleset, access_masks, num_layers),
GFP_KERNEL_ACCOUNT);
if (!new_ruleset)
return ERR_PTR(-ENOMEM);
refcount_set(&new_ruleset->usage, 1);
mutex_init(&new_ruleset->lock);
new_ruleset->root_inode = RB_ROOT;
#if IS_ENABLED(CONFIG_INET)
new_ruleset->root_net_port = RB_ROOT;
#endif /* IS_ENABLED(CONFIG_INET) */
new_ruleset->num_layers = num_layers;
/*
* hierarchy = NULL
* num_rules = 0
* access_masks[] = 0
*/
return new_ruleset;
}
struct landlock_ruleset *
landlock_create_ruleset(const access_mask_t fs_access_mask,
const access_mask_t net_access_mask)
{
struct landlock_ruleset *new_ruleset;
/* Informs about useless ruleset. */
if (!fs_access_mask && !net_access_mask)
return ERR_PTR(-ENOMSG);
new_ruleset = create_ruleset(1);
if (IS_ERR(new_ruleset))
return new_ruleset;
if (fs_access_mask)
landlock_add_fs_access_mask(new_ruleset, fs_access_mask, 0);
if (net_access_mask)
landlock_add_net_access_mask(new_ruleset, net_access_mask, 0);
return new_ruleset;
}
static void build_check_rule(void)
{
const struct landlock_rule rule = {
.num_layers = ~0,
};
BUILD_BUG_ON(rule.num_layers < LANDLOCK_MAX_NUM_LAYERS);
}
static bool is_object_pointer(const enum landlock_key_type key_type)
{
switch (key_type) {
case LANDLOCK_KEY_INODE:
return true;
#if IS_ENABLED(CONFIG_INET)
case LANDLOCK_KEY_NET_PORT:
return false;
#endif /* IS_ENABLED(CONFIG_INET) */
default:
WARN_ON_ONCE(1);
return false;
}
}
static struct landlock_rule *
create_rule(const struct landlock_id id,
const struct landlock_layer (*const layers)[], const u32 num_layers,
const struct landlock_layer *const new_layer)
{
struct landlock_rule *new_rule;
u32 new_num_layers;
build_check_rule();
if (new_layer) {
/* Should already be checked by landlock_merge_ruleset(). */
if (WARN_ON_ONCE(num_layers >= LANDLOCK_MAX_NUM_LAYERS))
return ERR_PTR(-E2BIG);
new_num_layers = num_layers + 1;
} else {
new_num_layers = num_layers;
}
new_rule = kzalloc(struct_size(new_rule, layers, new_num_layers),
GFP_KERNEL_ACCOUNT);
if (!new_rule)
return ERR_PTR(-ENOMEM);
RB_CLEAR_NODE(&new_rule->node);
if (is_object_pointer(id.type)) {
/* This should be catched by insert_rule(). */
WARN_ON_ONCE(!id.key.object);
landlock_get_object(id.key.object);
}
new_rule->key = id.key;
new_rule->num_layers = new_num_layers;
/* Copies the original layer stack. */
memcpy(new_rule->layers, layers,
flex_array_size(new_rule, layers, num_layers));
if (new_layer)
/* Adds a copy of @new_layer on the layer stack. */
new_rule->layers[new_rule->num_layers - 1] = *new_layer;
return new_rule;
}
static struct rb_root *get_root(struct landlock_ruleset *const ruleset,
const enum landlock_key_type key_type)
{
switch (key_type) {
case LANDLOCK_KEY_INODE:
return &ruleset->root_inode;
#if IS_ENABLED(CONFIG_INET)
case LANDLOCK_KEY_NET_PORT:
return &ruleset->root_net_port;
#endif /* IS_ENABLED(CONFIG_INET) */
default:
WARN_ON_ONCE(1);
return ERR_PTR(-EINVAL);
}
}
static void free_rule(struct landlock_rule *const rule,
const enum landlock_key_type key_type)
{
might_sleep();
if (!rule)
return;
if (is_object_pointer(key_type))
landlock_put_object(rule->key.object);
kfree(rule);
}
static void build_check_ruleset(void)
{
const struct landlock_ruleset ruleset = {
.num_rules = ~0,
.num_layers = ~0,
};
typeof(ruleset.access_masks[0]) access_masks = ~0;
BUILD_BUG_ON(ruleset.num_rules < LANDLOCK_MAX_NUM_RULES);
BUILD_BUG_ON(ruleset.num_layers < LANDLOCK_MAX_NUM_LAYERS);
BUILD_BUG_ON(access_masks <
((LANDLOCK_MASK_ACCESS_FS << LANDLOCK_SHIFT_ACCESS_FS) |
(LANDLOCK_MASK_ACCESS_NET << LANDLOCK_SHIFT_ACCESS_NET)));
}
/**
* insert_rule - Create and insert a rule in a ruleset
*
* @ruleset: The ruleset to be updated.
* @id: The ID to build the new rule with. The underlying kernel object, if
* any, must be held by the caller.
* @layers: One or multiple layers to be copied into the new rule.
* @num_layers: The number of @layers entries.
*
* When user space requests to add a new rule to a ruleset, @layers only
* contains one entry and this entry is not assigned to any level. In this
* case, the new rule will extend @ruleset, similarly to a boolean OR between
* access rights.
*
* When merging a ruleset in a domain, or copying a domain, @layers will be
* added to @ruleset as new constraints, similarly to a boolean AND between
* access rights.
*/
static int insert_rule(struct landlock_ruleset *const ruleset,
const struct landlock_id id,
const struct landlock_layer (*const layers)[],
const size_t num_layers)
{
struct rb_node **walker_node;
struct rb_node *parent_node = NULL;
struct landlock_rule *new_rule;
struct rb_root *root;
might_sleep();
lockdep_assert_held(&ruleset->lock);
if (WARN_ON_ONCE(!layers))
return -ENOENT;
if (is_object_pointer(id.type) && WARN_ON_ONCE(!id.key.object))
return -ENOENT;
root = get_root(ruleset, id.type);
if (IS_ERR(root))
return PTR_ERR(root);
walker_node = &root->rb_node;
while (*walker_node) {
struct landlock_rule *const this =
rb_entry(*walker_node, struct landlock_rule, node);
if (this->key.data != id.key.data) {
parent_node = *walker_node;
if (this->key.data < id.key.data)
walker_node = &((*walker_node)->rb_right);
else
walker_node = &((*walker_node)->rb_left);
continue;
}
/* Only a single-level layer should match an existing rule. */
if (WARN_ON_ONCE(num_layers != 1))
return -EINVAL;
/* If there is a matching rule, updates it. */
if ((*layers)[0].level == 0) {
/*
* Extends access rights when the request comes from
* landlock_add_rule(2), i.e. @ruleset is not a domain.
*/
if (WARN_ON_ONCE(this->num_layers != 1))
return -EINVAL;
if (WARN_ON_ONCE(this->layers[0].level != 0))
return -EINVAL;
this->layers[0].access |= (*layers)[0].access;
return 0;
}
if (WARN_ON_ONCE(this->layers[0].level == 0))
return -EINVAL;
/*
* Intersects access rights when it is a merge between a
* ruleset and a domain.
*/
new_rule = create_rule(id, &this->layers, this->num_layers,
&(*layers)[0]);
if (IS_ERR(new_rule))
return PTR_ERR(new_rule);
rb_replace_node(&this->node, &new_rule->node, root);
free_rule(this, id.type);
return 0;
}
/* There is no match for @id. */
build_check_ruleset();
if (ruleset->num_rules >= LANDLOCK_MAX_NUM_RULES)
return -E2BIG;
new_rule = create_rule(id, layers, num_layers, NULL);
if (IS_ERR(new_rule))
return PTR_ERR(new_rule);
rb_link_node(&new_rule->node, parent_node, walker_node);
rb_insert_color(&new_rule->node, root);
ruleset->num_rules++;
return 0;
}
static void build_check_layer(void)
{
const struct landlock_layer layer = {
.level = ~0,
.access = ~0,
};
BUILD_BUG_ON(layer.level < LANDLOCK_MAX_NUM_LAYERS);
BUILD_BUG_ON(layer.access < LANDLOCK_MASK_ACCESS_FS);
}
/* @ruleset must be locked by the caller. */
int landlock_insert_rule(struct landlock_ruleset *const ruleset,
const struct landlock_id id,
const access_mask_t access)
{
struct landlock_layer layers[] = { {
.access = access,
/* When @level is zero, insert_rule() extends @ruleset. */
.level = 0,
} };
build_check_layer();
return insert_rule(ruleset, id, &layers, ARRAY_SIZE(layers));
}
static inline void get_hierarchy(struct landlock_hierarchy *const hierarchy)
{
if (hierarchy)
refcount_inc(&hierarchy->usage);
}
static void put_hierarchy(struct landlock_hierarchy *hierarchy)
{
while (hierarchy && refcount_dec_and_test(&hierarchy->usage)) {
const struct landlock_hierarchy *const freeme = hierarchy;
hierarchy = hierarchy->parent;
kfree(freeme);
}
}
static int merge_tree(struct landlock_ruleset *const dst,
struct landlock_ruleset *const src,
const enum landlock_key_type key_type)
{
struct landlock_rule *walker_rule, *next_rule;
struct rb_root *src_root;
int err = 0;
might_sleep();
lockdep_assert_held(&dst->lock);
lockdep_assert_held(&src->lock);
src_root = get_root(src, key_type);
if (IS_ERR(src_root))
return PTR_ERR(src_root);
/* Merges the @src tree. */
rbtree_postorder_for_each_entry_safe(walker_rule, next_rule, src_root,
node) {
struct landlock_layer layers[] = { {
.level = dst->num_layers,
} };
const struct landlock_id id = {
.key = walker_rule->key,
.type = key_type,
};
if (WARN_ON_ONCE(walker_rule->num_layers != 1))
return -EINVAL;
if (WARN_ON_ONCE(walker_rule->layers[0].level != 0))
return -EINVAL;
layers[0].access = walker_rule->layers[0].access;
err = insert_rule(dst, id, &layers, ARRAY_SIZE(layers));
if (err)
return err;
}
return err;
}
static int merge_ruleset(struct landlock_ruleset *const dst,
struct landlock_ruleset *const src)
{
int err = 0;
might_sleep();
/* Should already be checked by landlock_merge_ruleset() */
if (WARN_ON_ONCE(!src))
return 0;
/* Only merge into a domain. */
if (WARN_ON_ONCE(!dst || !dst->hierarchy))
return -EINVAL;
/* Locks @dst first because we are its only owner. */
mutex_lock(&dst->lock);
mutex_lock_nested(&src->lock, SINGLE_DEPTH_NESTING);
/* Stacks the new layer. */
if (WARN_ON_ONCE(src->num_layers != 1 || dst->num_layers < 1)) {
err = -EINVAL;
goto out_unlock;
}
dst->access_masks[dst->num_layers - 1] = src->access_masks[0];
/* Merges the @src inode tree. */
err = merge_tree(dst, src, LANDLOCK_KEY_INODE);
if (err)
goto out_unlock;
#if IS_ENABLED(CONFIG_INET)
/* Merges the @src network port tree. */
err = merge_tree(dst, src, LANDLOCK_KEY_NET_PORT);
if (err)
goto out_unlock;
#endif /* IS_ENABLED(CONFIG_INET) */
out_unlock:
mutex_unlock(&src->lock);
mutex_unlock(&dst->lock);
return err;
}
static int inherit_tree(struct landlock_ruleset *const parent,
struct landlock_ruleset *const child,
const enum landlock_key_type key_type)
{
struct landlock_rule *walker_rule, *next_rule;
struct rb_root *parent_root;
int err = 0;
might_sleep();
lockdep_assert_held(&parent->lock);
lockdep_assert_held(&child->lock);
parent_root = get_root(parent, key_type);
if (IS_ERR(parent_root))
return PTR_ERR(parent_root);
/* Copies the @parent inode or network tree. */
rbtree_postorder_for_each_entry_safe(walker_rule, next_rule,
parent_root, node) {
const struct landlock_id id = {
.key = walker_rule->key,
.type = key_type,
};
err = insert_rule(child, id, &walker_rule->layers,
walker_rule->num_layers);
if (err)
return err;
}
return err;
}
static int inherit_ruleset(struct landlock_ruleset *const parent,
struct landlock_ruleset *const child)
{
int err = 0;
might_sleep();
if (!parent)
return 0;
/* Locks @child first because we are its only owner. */
mutex_lock(&child->lock);
mutex_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING);
/* Copies the @parent inode tree. */
err = inherit_tree(parent, child, LANDLOCK_KEY_INODE);
if (err)
goto out_unlock;
#if IS_ENABLED(CONFIG_INET)
/* Copies the @parent network port tree. */
err = inherit_tree(parent, child, LANDLOCK_KEY_NET_PORT);
if (err)
goto out_unlock;
#endif /* IS_ENABLED(CONFIG_INET) */
if (WARN_ON_ONCE(child->num_layers <= parent->num_layers)) {
err = -EINVAL;
goto out_unlock;
}
/* Copies the parent layer stack and leaves a space for the new layer. */
memcpy(child->access_masks, parent->access_masks,
flex_array_size(parent, access_masks, parent->num_layers));
if (WARN_ON_ONCE(!parent->hierarchy)) {
err = -EINVAL;
goto out_unlock;
}
get_hierarchy(parent->hierarchy);
child->hierarchy->parent = parent->hierarchy;
out_unlock:
mutex_unlock(&parent->lock);
mutex_unlock(&child->lock);
return err;
}
static void free_ruleset(struct landlock_ruleset *const ruleset)
{
struct landlock_rule *freeme, *next;
might_sleep();
rbtree_postorder_for_each_entry_safe(freeme, next, &ruleset->root_inode,
node)
free_rule(freeme, LANDLOCK_KEY_INODE);
#if IS_ENABLED(CONFIG_INET)
rbtree_postorder_for_each_entry_safe(freeme, next,
&ruleset->root_net_port, node)
free_rule(freeme, LANDLOCK_KEY_NET_PORT);
#endif /* IS_ENABLED(CONFIG_INET) */
put_hierarchy(ruleset->hierarchy);
kfree(ruleset);
}
void landlock_put_ruleset(struct landlock_ruleset *const ruleset)
{
might_sleep();
if (ruleset && refcount_dec_and_test(&ruleset->usage))
free_ruleset(ruleset);
}
static void free_ruleset_work(struct work_struct *const work)
{
struct landlock_ruleset *ruleset;
ruleset = container_of(work, struct landlock_ruleset, work_free);
free_ruleset(ruleset);
}
void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
{
if (ruleset && refcount_dec_and_test(&ruleset->usage)) {
INIT_WORK(&ruleset->work_free, free_ruleset_work);
schedule_work(&ruleset->work_free);
}
}
/**
* landlock_merge_ruleset - Merge a ruleset with a domain
*
* @parent: Parent domain.
* @ruleset: New ruleset to be merged.
*
* Returns the intersection of @parent and @ruleset, or returns @parent if
* @ruleset is empty, or returns a duplicate of @ruleset if @parent is empty.
*/
struct landlock_ruleset *
landlock_merge_ruleset(struct landlock_ruleset *const parent,
struct landlock_ruleset *const ruleset)
{
struct landlock_ruleset *new_dom;
u32 num_layers;
int err;
might_sleep();
if (WARN_ON_ONCE(!ruleset || parent == ruleset))
return ERR_PTR(-EINVAL);
if (parent) {
if (parent->num_layers >= LANDLOCK_MAX_NUM_LAYERS)
return ERR_PTR(-E2BIG);
num_layers = parent->num_layers + 1;
} else {
num_layers = 1;
}
/* Creates a new domain... */
new_dom = create_ruleset(num_layers);
if (IS_ERR(new_dom))
return new_dom;
new_dom->hierarchy =
kzalloc(sizeof(*new_dom->hierarchy), GFP_KERNEL_ACCOUNT);
if (!new_dom->hierarchy) {
err = -ENOMEM;
goto out_put_dom;
}
refcount_set(&new_dom->hierarchy->usage, 1);
/* ...as a child of @parent... */
err = inherit_ruleset(parent, new_dom);
if (err)
goto out_put_dom;
/* ...and including @ruleset. */
err = merge_ruleset(new_dom, ruleset);
if (err)
goto out_put_dom;
return new_dom;
out_put_dom:
landlock_put_ruleset(new_dom);
return ERR_PTR(err);
}
/*
* The returned access has the same lifetime as @ruleset.
*/
const struct landlock_rule *
landlock_find_rule(const struct landlock_ruleset *const ruleset,
const struct landlock_id id)
{
const struct rb_root *root;
const struct rb_node *node;
root = get_root((struct landlock_ruleset *)ruleset, id.type);
if (IS_ERR(root))
return NULL;
node = root->rb_node;
while (node) {
struct landlock_rule *this =
rb_entry(node, struct landlock_rule, node);
if (this->key.data == id.key.data)
return this;
if (this->key.data < id.key.data)
node = node->rb_right;
else
node = node->rb_left;
}
return NULL;
}
/*
* @layer_masks is read and may be updated according to the access request and
* the matching rule.
* @masks_array_size must be equal to ARRAY_SIZE(*layer_masks).
*
* Returns true if the request is allowed (i.e. relevant layer masks for the
* request are empty).
*/
bool landlock_unmask_layers(const struct landlock_rule *const rule,
const access_mask_t access_request,
layer_mask_t (*const layer_masks)[],
const size_t masks_array_size)
{
size_t layer_level;
if (!access_request || !layer_masks)
return true;
if (!rule)
return false;
/*
* An access is granted if, for each policy layer, at least one rule
* encountered on the pathwalk grants the requested access,
* regardless of its position in the layer stack. We must then check
* the remaining layers for each inode, from the first added layer to
* the last one. When there is multiple requested accesses, for each
* policy layer, the full set of requested accesses may not be granted
* by only one rule, but by the union (binary OR) of multiple rules.
* E.g. /a/b <execute> + /a <read> => /a/b <execute + read>
*/
for (layer_level = 0; layer_level < rule->num_layers; layer_level++) {
const struct landlock_layer *const layer =
&rule->layers[layer_level];
const layer_mask_t layer_bit = BIT_ULL(layer->level - 1);
const unsigned long access_req = access_request;
unsigned long access_bit;
bool is_empty;
/*
* Records in @layer_masks which layer grants access to each
* requested access.
*/
is_empty = true;
for_each_set_bit(access_bit, &access_req, masks_array_size) {
if (layer->access & BIT_ULL(access_bit))
(*layer_masks)[access_bit] &= ~layer_bit;
is_empty = is_empty && !(*layer_masks)[access_bit];
}
if (is_empty)
return true;
}
return false;
}
typedef access_mask_t
get_access_mask_t(const struct landlock_ruleset *const ruleset,
const u16 layer_level);
/**
* landlock_init_layer_masks - Initialize layer masks from an access request
*
* Populates @layer_masks such that for each access right in @access_request,
* the bits for all the layers are set where this access right is handled.
*
* @domain: The domain that defines the current restrictions.
* @access_request: The requested access rights to check.
* @layer_masks: It must contain %LANDLOCK_NUM_ACCESS_FS or
* %LANDLOCK_NUM_ACCESS_NET elements according to @key_type.
* @key_type: The key type to switch between access masks of different types.
*
* Returns: An access mask where each access right bit is set which is handled
* in any of the active layers in @domain.
*/
access_mask_t
landlock_init_layer_masks(const struct landlock_ruleset *const domain,
const access_mask_t access_request,
layer_mask_t (*const layer_masks)[],
const enum landlock_key_type key_type)
{
access_mask_t handled_accesses = 0;
size_t layer_level, num_access;
get_access_mask_t *get_access_mask;
switch (key_type) {
case LANDLOCK_KEY_INODE:
get_access_mask = landlock_get_fs_access_mask;
num_access = LANDLOCK_NUM_ACCESS_FS;
break;
#if IS_ENABLED(CONFIG_INET)
case LANDLOCK_KEY_NET_PORT:
get_access_mask = landlock_get_net_access_mask;
num_access = LANDLOCK_NUM_ACCESS_NET;
break;
#endif /* IS_ENABLED(CONFIG_INET) */
default:
WARN_ON_ONCE(1);
return 0;
}
memset(layer_masks, 0,
array_size(sizeof((*layer_masks)[0]), num_access));
/* An empty access request can happen because of O_WRONLY | O_RDWR. */
if (!access_request)
return 0;
/* Saves all handled accesses per layer. */
for (layer_level = 0; layer_level < domain->num_layers; layer_level++) {
const unsigned long access_req = access_request;
unsigned long access_bit;
for_each_set_bit(access_bit, &access_req, num_access) {
if (BIT_ULL(access_bit) &
get_access_mask(domain, layer_level)) {
(*layer_masks)[access_bit] |=
BIT_ULL(layer_level);
handled_accesses |= BIT_ULL(access_bit);
}
}
}
return handled_accesses;
}