linux-stable/include/uapi/linux/neighbour.h
Daniel Borkmann 7482e3841d net, neigh: Add NTF_MANAGED flag for managed neighbor entries
Allow a user space control plane to insert entries with a new NTF_EXT_MANAGED
flag. The flag then indicates to the kernel that the neighbor entry should be
periodically probed for keeping the entry in NUD_REACHABLE state iff possible.

The use case for this is targeting XDP or tc BPF load-balancers which use
the bpf_fib_lookup() BPF helper in order to piggyback on neighbor resolution
for their backends. Given they cannot be resolved in fast-path, a control
plane inserts the L3 (without L2) entries manually into the neighbor table
and lets the kernel do the neighbor resolution either on the gateway or on
the backend directly in case the latter resides in the same L2. This avoids
to deal with L2 in the control plane and to rebuild what the kernel already
does best anyway.

NTF_EXT_MANAGED can be combined with NTF_EXT_LEARNED in order to avoid GC
eviction. The kernel then adds NTF_MANAGED flagged entries to a per-neighbor
table which gets triggered by the system work queue to periodically call
neigh_event_send() for performing the resolution. The implementation allows
migration from/to NTF_MANAGED neighbor entries, so that already existing
entries can be converted by the control plane if needed. Potentially, we could
make the interval for periodically calling neigh_event_send() configurable;
right now it's set to DELAY_PROBE_TIME which is also in line with mlxsw which
has similar driver-internal infrastructure c723c735fa ("mlxsw: spectrum_router:
Periodically update the kernel's neigh table"). In future, the latter could
possibly reuse the NTF_MANAGED neighbors as well.

Example:

  # ./ip/ip n replace 192.168.178.30 dev enp5s0 managed extern_learn
  # ./ip/ip n
  192.168.178.30 dev enp5s0 lladdr f4:8c:50:5e:71:9a managed extern_learn REACHABLE
  [...]

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Roopa Prabhu <roopa@nvidia.com>
Link: https://linuxplumbersconf.org/event/11/contributions/953/
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-12 11:27:47 +01:00

215 lines
5.6 KiB
C

/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef __LINUX_NEIGHBOUR_H
#define __LINUX_NEIGHBOUR_H
#include <linux/types.h>
#include <linux/netlink.h>
struct ndmsg {
__u8 ndm_family;
__u8 ndm_pad1;
__u16 ndm_pad2;
__s32 ndm_ifindex;
__u16 ndm_state;
__u8 ndm_flags;
__u8 ndm_type;
};
enum {
NDA_UNSPEC,
NDA_DST,
NDA_LLADDR,
NDA_CACHEINFO,
NDA_PROBES,
NDA_VLAN,
NDA_PORT,
NDA_VNI,
NDA_IFINDEX,
NDA_MASTER,
NDA_LINK_NETNSID,
NDA_SRC_VNI,
NDA_PROTOCOL, /* Originator of entry */
NDA_NH_ID,
NDA_FDB_EXT_ATTRS,
NDA_FLAGS_EXT,
__NDA_MAX
};
#define NDA_MAX (__NDA_MAX - 1)
/*
* Neighbor Cache Entry Flags
*/
#define NTF_USE (1 << 0)
#define NTF_SELF (1 << 1)
#define NTF_MASTER (1 << 2)
#define NTF_PROXY (1 << 3) /* == ATF_PUBL */
#define NTF_EXT_LEARNED (1 << 4)
#define NTF_OFFLOADED (1 << 5)
#define NTF_STICKY (1 << 6)
#define NTF_ROUTER (1 << 7)
/* Extended flags under NDA_FLAGS_EXT: */
#define NTF_EXT_MANAGED (1 << 0)
/*
* Neighbor Cache Entry States.
*/
#define NUD_INCOMPLETE 0x01
#define NUD_REACHABLE 0x02
#define NUD_STALE 0x04
#define NUD_DELAY 0x08
#define NUD_PROBE 0x10
#define NUD_FAILED 0x20
/* Dummy states */
#define NUD_NOARP 0x40
#define NUD_PERMANENT 0x80
#define NUD_NONE 0x00
/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no
* address resolution or NUD.
*
* NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true
* for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier
* down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED
* flagged entries explicitly are (which is also consistent with the routing
* subsystem).
*
* When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry
* states don't make sense and thus are ignored. Such entries don't age and
* can roam.
*
* NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf
* of a user space control plane, and automatically refreshed so that (if
* possible) they remain in NUD_REACHABLE state.
*/
struct nda_cacheinfo {
__u32 ndm_confirmed;
__u32 ndm_used;
__u32 ndm_updated;
__u32 ndm_refcnt;
};
/*****************************************************************
* Neighbour tables specific messages.
*
* To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
* NLM_F_DUMP flag set. Every neighbour table configuration is
* spread over multiple messages to avoid running into message
* size limits on systems with many interfaces. The first message
* in the sequence transports all not device specific data such as
* statistics, configuration, and the default parameter set.
* This message is followed by 0..n messages carrying device
* specific parameter sets.
* Although the ordering should be sufficient, NDTA_NAME can be
* used to identify sequences. The initial message can be identified
* by checking for NDTA_CONFIG. The device specific messages do
* not contain this TLV but have NDTPA_IFINDEX set to the
* corresponding interface index.
*
* To change neighbour table attributes, send RTM_SETNEIGHTBL
* with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3],
* NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked
* otherwise. Device specific parameter sets can be changed by
* setting NDTPA_IFINDEX to the interface index of the corresponding
* device.
****/
struct ndt_stats {
__u64 ndts_allocs;
__u64 ndts_destroys;
__u64 ndts_hash_grows;
__u64 ndts_res_failed;
__u64 ndts_lookups;
__u64 ndts_hits;
__u64 ndts_rcv_probes_mcast;
__u64 ndts_rcv_probes_ucast;
__u64 ndts_periodic_gc_runs;
__u64 ndts_forced_gc_runs;
__u64 ndts_table_fulls;
};
enum {
NDTPA_UNSPEC,
NDTPA_IFINDEX, /* u32, unchangeable */
NDTPA_REFCNT, /* u32, read-only */
NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */
NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */
NDTPA_RETRANS_TIME, /* u64, msecs */
NDTPA_GC_STALETIME, /* u64, msecs */
NDTPA_DELAY_PROBE_TIME, /* u64, msecs */
NDTPA_QUEUE_LEN, /* u32 */
NDTPA_APP_PROBES, /* u32 */
NDTPA_UCAST_PROBES, /* u32 */
NDTPA_MCAST_PROBES, /* u32 */
NDTPA_ANYCAST_DELAY, /* u64, msecs */
NDTPA_PROXY_DELAY, /* u64, msecs */
NDTPA_PROXY_QLEN, /* u32 */
NDTPA_LOCKTIME, /* u64, msecs */
NDTPA_QUEUE_LENBYTES, /* u32 */
NDTPA_MCAST_REPROBES, /* u32 */
NDTPA_PAD,
__NDTPA_MAX
};
#define NDTPA_MAX (__NDTPA_MAX - 1)
struct ndtmsg {
__u8 ndtm_family;
__u8 ndtm_pad1;
__u16 ndtm_pad2;
};
struct ndt_config {
__u16 ndtc_key_len;
__u16 ndtc_entry_size;
__u32 ndtc_entries;
__u32 ndtc_last_flush; /* delta to now in msecs */
__u32 ndtc_last_rand; /* delta to now in msecs */
__u32 ndtc_hash_rnd;
__u32 ndtc_hash_mask;
__u32 ndtc_hash_chain_gc;
__u32 ndtc_proxy_qlen;
};
enum {
NDTA_UNSPEC,
NDTA_NAME, /* char *, unchangeable */
NDTA_THRESH1, /* u32 */
NDTA_THRESH2, /* u32 */
NDTA_THRESH3, /* u32 */
NDTA_CONFIG, /* struct ndt_config, read-only */
NDTA_PARMS, /* nested TLV NDTPA_* */
NDTA_STATS, /* struct ndt_stats, read-only */
NDTA_GC_INTERVAL, /* u64, msecs */
NDTA_PAD,
__NDTA_MAX
};
#define NDTA_MAX (__NDTA_MAX - 1)
/* FDB activity notification bits used in NFEA_ACTIVITY_NOTIFY:
* - FDB_NOTIFY_BIT - notify on activity/expire for any entry
* - FDB_NOTIFY_INACTIVE_BIT - mark as inactive to avoid multiple notifications
*/
enum {
FDB_NOTIFY_BIT = (1 << 0),
FDB_NOTIFY_INACTIVE_BIT = (1 << 1)
};
/* embedded into NDA_FDB_EXT_ATTRS:
* [NDA_FDB_EXT_ATTRS] = {
* [NFEA_ACTIVITY_NOTIFY]
* ...
* }
*/
enum {
NFEA_UNSPEC,
NFEA_ACTIVITY_NOTIFY,
NFEA_DONT_REFRESH,
__NFEA_MAX
};
#define NFEA_MAX (__NFEA_MAX - 1)
#endif