linux-stable/include/linux/mlx5/eswitch.h

211 lines
6.4 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
/*
* Copyright (c) 2018 Mellanox Technologies. All rights reserved.
*/
#ifndef _MLX5_ESWITCH_
#define _MLX5_ESWITCH_
#include <linux/mlx5/driver.h>
#include <net/devlink.h>
#define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager)
enum {
MLX5_ESWITCH_LEGACY,
MLX5_ESWITCH_OFFLOADS
};
enum {
REP_ETH,
REP_IB,
NUM_REP_TYPES,
};
enum {
REP_UNREGISTERED,
REP_REGISTERED,
REP_LOADED,
};
enum mlx5_switchdev_event {
MLX5_SWITCHDEV_EVENT_PAIR,
MLX5_SWITCHDEV_EVENT_UNPAIR,
};
struct mlx5_eswitch_rep;
struct mlx5_eswitch_rep_ops {
int (*load)(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep);
void (*unload)(struct mlx5_eswitch_rep *rep);
void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
int (*event)(struct mlx5_eswitch *esw,
struct mlx5_eswitch_rep *rep,
enum mlx5_switchdev_event event,
void *data);
};
struct mlx5_eswitch_rep_data {
void *priv;
atomic_t state;
};
struct mlx5_eswitch_rep {
struct mlx5_eswitch_rep_data rep_data[NUM_REP_TYPES];
u16 vport;
u16 vlan;
/* Only IB rep is using vport_index */
u16 vport_index;
u32 vlan_refcount;
struct mlx5_eswitch *esw;
};
void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
const struct mlx5_eswitch_rep_ops *ops,
u8 rep_type);
void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type);
void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
u16 vport_num,
u8 rep_type);
struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
u16 vport_num);
void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
struct mlx5_flow_handle *
mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
struct mlx5_eswitch *from_esw,
struct mlx5_eswitch_rep *rep, u32 sqn);
#ifdef CONFIG_MLX5_ESWITCH
enum devlink_eswitch_encap_mode
mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev);
bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw);
bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw);
/* Reg C0 usage:
* Reg C0 = < ESW_PFNUM_BITS(4) | ESW_VPORT BITS(12) | ESW_REG_C0_OBJ(16) >
*
* Highest 4 bits of the reg c0 is the PF_NUM (range 0-15), 12 bits of
* unique non-zero vport id (range 1-4095). The rest (lowest 16 bits) is left
* for user data objects managed by a common mapping context.
* PFNUM + VPORT comprise the SOURCE_PORT matching.
*/
#define ESW_VPORT_BITS 12
#define ESW_PFNUM_BITS 4
#define ESW_SOURCE_PORT_METADATA_BITS (ESW_PFNUM_BITS + ESW_VPORT_BITS)
#define ESW_SOURCE_PORT_METADATA_OFFSET (32 - ESW_SOURCE_PORT_METADATA_BITS)
#define ESW_REG_C0_USER_DATA_METADATA_BITS (32 - ESW_SOURCE_PORT_METADATA_BITS)
#define ESW_REG_C0_USER_DATA_METADATA_MASK GENMASK(ESW_REG_C0_USER_DATA_METADATA_BITS - 1, 0)
static inline u32 mlx5_eswitch_get_vport_metadata_mask(void)
{
return GENMASK(31, 32 - ESW_SOURCE_PORT_METADATA_BITS);
}
u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
u16 vport_num);
net/mlx5e: VF tunnel TX traffic offloading When tunnel endpoint is on VF, driver still assumes that endpoint is on uplink and incorrectly configures encap rule offload according to that assumption. As a result, traffic is sent directly to the uplink and rules installed on representor of tunnel endpoint VF are ignored. Implement following changes to allow offloading tx traffic with tunnel endpoint on VF: - For tunneling flows perform route lookup on route and out devices pair. If out device is uplink and route device is VF of same physical port, then modify packet reg_c_0 metadata register (source port) with the value of VF vport. Use eswitch vhca_id->vport mapping introduced in one of previous patches in the series to obtain vport from route netdevice. - Recirculate encapsulated packets to VF vport in order to apply any flow rules installed on VF representor that match on encapsulated traffic. Only enable support for this functionality when all following conditions are true: - Hardware advertises capability to preserve reg_c_0 value on packet recirculation. - Vport metadata matching is enabled. - Termination tables are to be used by the flow. Example TC rules for VF tunnel traffic: 1. Rule that redirects packets from UL to VF rep that has the tunnel endpoint IP address: $ tc -s filter show dev enp8s0f0 ingress filter protocol ip pref 4 flower chain 0 filter protocol ip pref 4 flower chain 0 handle 0x1 dst_mac 16:c9:a0:2d:69:2c src_mac 0c:42:a1:58:ab:e4 eth_type ipv4 ip_flags nofrag in_hw in_hw_count 1 action order 1: mirred (Egress Redirect to device enp8s0f0_0) stolen index 3 ref 1 bind 1 installed 377 sec used 0 sec Action statistics: Sent 114096 bytes 952 pkt (dropped 0, overlimits 0 requeues 0) Sent software 0 bytes 0 pkt Sent hardware 114096 bytes 952 pkt backlog 0b 0p requeues 0 cookie 878fa48d8c423fc08c3b6ca599b50a97 no_percpu used_hw_stats delayed 2. Rule that decapsulates the tunneled flow and redirects to destination VF representor: $ tc -s filter show dev vxlan_sys_4789 ingress filter protocol ip pref 4 flower chain 0 filter protocol ip pref 4 flower chain 0 handle 0x1 dst_mac ca:2e:a7:3f:f5:0f src_mac 0a:40:bd:30:89:99 eth_type ipv4 enc_dst_ip 7.7.7.5 enc_src_ip 7.7.7.1 enc_key_id 98 enc_dst_port 4789 enc_tos 0 ip_flags nofrag in_hw in_hw_count 1 action order 1: tunnel_key unset pipe index 2 ref 1 bind 1 installed 434 sec used 434 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 used_hw_stats delayed action order 2: mirred (Egress Redirect to device enp8s0f0_1) stolen index 4 ref 1 bind 1 installed 434 sec used 0 sec Action statistics: Sent 129936 bytes 1082 pkt (dropped 0, overlimits 0 requeues 0) Sent software 0 bytes 0 pkt Sent hardware 129936 bytes 1082 pkt backlog 0b 0p requeues 0 cookie ac17cf398c4c69e4a5b2f7aabd1b88ff no_percpu used_hw_stats delayed Co-developed-by: Dmytro Linkin <dlinkin@nvidia.com> Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com> Signed-off-by: Vlad Buslov <vladbu@nvidia.com> Reviewed-by: Roi Dayan <roid@nvidia.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
2021-01-21 17:41:52 +00:00
u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
u16 vport_num);
/* Reg C1 usage:
* Reg C1 = < Reserved(1) | ESW_TUN_ID(12) | ESW_TUN_OPTS(11) | ESW_ZONE_ID(8) >
*
* Highest bit is reserved for other offloads as marker bit, next 12 bits of reg c1
* is the encapsulation tunnel id, next 11 bits is encapsulation tunnel options,
* and the lowest 8 bits are used for zone id.
*
* Zone id is used to restore CT flow when packet misses on chain.
*
* Tunnel id and options are used together to restore the tunnel info metadata
* on miss and to support inner header rewrite by means of implicit chain 0
* flows.
*/
#define ESW_RESERVED_BITS 1
#define ESW_ZONE_ID_BITS 8
#define ESW_TUN_OPTS_BITS 11
#define ESW_TUN_ID_BITS 12
#define ESW_TUN_OPTS_OFFSET ESW_ZONE_ID_BITS
#define ESW_TUN_OFFSET ESW_TUN_OPTS_OFFSET
#define ESW_ZONE_ID_MASK GENMASK(ESW_ZONE_ID_BITS - 1, 0)
#define ESW_TUN_OPTS_MASK GENMASK(31 - ESW_TUN_ID_BITS - ESW_RESERVED_BITS, ESW_TUN_OPTS_OFFSET)
#define ESW_TUN_MASK GENMASK(31 - ESW_RESERVED_BITS, ESW_TUN_OFFSET)
#define ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT 0 /* 0 is not a valid tunnel id */
#define ESW_TUN_ID_BRIDGE_INGRESS_PUSH_VLAN ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT
/* 0x7FF is a reserved mapping */
#define ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT GENMASK(ESW_TUN_OPTS_BITS - 1, 0)
#define ESW_TUN_SLOW_TABLE_GOTO_VPORT ((ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT << ESW_TUN_OPTS_BITS) | \
ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT)
#define ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK ESW_TUN_OPTS_MASK
/* 0x7FE is a reserved mapping for bridge ingress push vlan mark */
#define ESW_TUN_OPTS_BRIDGE_INGRESS_PUSH_VLAN (ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT - 1)
#define ESW_TUN_BRIDGE_INGRESS_PUSH_VLAN ((ESW_TUN_ID_BRIDGE_INGRESS_PUSH_VLAN << \
ESW_TUN_OPTS_BITS) | \
ESW_TUN_OPTS_BRIDGE_INGRESS_PUSH_VLAN)
#define ESW_TUN_BRIDGE_INGRESS_PUSH_VLAN_MARK \
GENMASK(31 - ESW_TUN_ID_BITS - ESW_RESERVED_BITS, \
ESW_TUN_OPTS_OFFSET + 1)
u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev);
u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw);
#else /* CONFIG_MLX5_ESWITCH */
static inline u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev)
{
return MLX5_ESWITCH_LEGACY;
}
static inline enum devlink_eswitch_encap_mode
mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
{
return DEVLINK_ESWITCH_ENCAP_MODE_NONE;
}
static inline bool
mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw)
{
return false;
};
static inline bool
mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
{
return false;
};
static inline u32
mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, u16 vport_num)
{
return 0;
};
static inline u32
mlx5_eswitch_get_vport_metadata_mask(void)
{
return 0;
}
static inline u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
{
return 0;
}
static inline struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw)
{
return NULL;
}
#endif /* CONFIG_MLX5_ESWITCH */
static inline bool is_mdev_legacy_mode(struct mlx5_core_dev *dev)
{
return mlx5_eswitch_mode(dev) == MLX5_ESWITCH_LEGACY;
}
static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev)
{
return mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS;
}
#endif