Merge branch 'mlxsw-multi-level-qdisc-offload'

Ido Schimmel says:

====================
mlxsw: Multi-level qdisc offload

Petr says:

Currently, mlxsw admits for offload a suitable root qdisc, and its
children. Thus up to two levels of hierarchy are offloaded. Often, this is
enough: one can configure TCs with RED and TCs with a shaper on, and can
even see counters for each TC by looking at a qdisc at a sufficiently
shallow position.

While simple, the system has obvious shortcomings. It is not possible to
configure both RED and shaping on one TC. It is not possible to place a
PRIO below root TBF, which would then be offloaded as port shaper. FIFOs
are only offloaded at root or directly below, which is confusing to users,
because RED and TBF of course have their own FIFO.

This patch set lifts assumptions that prevent offloading multi-level qdisc
trees.

In patch #1, offload of a graft operation is added to TBF. Grafts are
issued as another qdisc is linked to the qdisc in question, and give
drivers a chance to react to the linking. The absence of this event was not
a major issue so far, because TBF was not considered classful, which
changes with this patchset.

The codebase currently assumes that ETS and PRIO are the only classful
qdiscs. The following patches gradually lift this assumption.

In patch #2, calculation of traffic class and priomap of a qdisc is fixed.

Patch #3 fixes handling of future FIFOs. Child FIFO qdiscs may be created
and notified before their parent qdisc exists and therefore need special
handling.

Patches #4, #5 and #6 unify, respectively, child destruction, child
grafting, and cleanup of statistics.

Patch #7 adds a function that validates whether a given qdisc topology is
offloadable.

Finally in patch #8, TBF and RED become classful. At this point, FIFO
qdiscs grafted to an offloaded qdisc should always be offloaded.

Patch #9 adds a selftest to verify some offloadable and unoffloadable qdisc
trees.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2021-10-19 12:24:52 +01:00
commit 641a305b88
4 changed files with 636 additions and 85 deletions

View file

@ -50,12 +50,24 @@ struct mlxsw_sp_qdisc_ops {
struct mlxsw_sp_qdisc *(*find_class)(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
u32 parent);
unsigned int num_classes;
u8 (*get_prio_bitmap)(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
struct mlxsw_sp_qdisc *child);
int (*get_tclass_num)(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
struct mlxsw_sp_qdisc *child);
};
struct mlxsw_sp_qdisc_ets_band {
u8 prio_bitmap;
int tclass_num;
};
struct mlxsw_sp_qdisc_ets_data {
struct mlxsw_sp_qdisc_ets_band bands[IEEE_8021QAZ_MAX_TCS];
};
struct mlxsw_sp_qdisc {
u32 handle;
int tclass_num;
u8 prio_bitmap;
union {
struct red_stats red;
} xstats_base;
@ -67,6 +79,10 @@ struct mlxsw_sp_qdisc {
u64 backlog;
} stats_base;
union {
struct mlxsw_sp_qdisc_ets_data *ets_data;
};
struct mlxsw_sp_qdisc_ops *ops;
struct mlxsw_sp_qdisc *parent;
struct mlxsw_sp_qdisc *qdiscs;
@ -141,8 +157,7 @@ mlxsw_sp_qdisc_walk_cb_find(struct mlxsw_sp_qdisc *qdisc, void *data)
}
static struct mlxsw_sp_qdisc *
mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent,
bool root_only)
mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent)
{
struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
@ -150,8 +165,6 @@ mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent,
return NULL;
if (parent == TC_H_ROOT)
return &qdisc_state->root_qdisc;
if (root_only)
return NULL;
return mlxsw_sp_qdisc_walk(&qdisc_state->root_qdisc,
mlxsw_sp_qdisc_walk_cb_find, &parent);
}
@ -187,6 +200,32 @@ mlxsw_sp_qdisc_reduce_parent_backlog(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
tmp->stats_base.backlog -= mlxsw_sp_qdisc->stats_base.backlog;
}
static u8 mlxsw_sp_qdisc_get_prio_bitmap(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
{
struct mlxsw_sp_qdisc *parent = mlxsw_sp_qdisc->parent;
if (!parent)
return 0xff;
if (!parent->ops->get_prio_bitmap)
return mlxsw_sp_qdisc_get_prio_bitmap(mlxsw_sp_port, parent);
return parent->ops->get_prio_bitmap(parent, mlxsw_sp_qdisc);
}
#define MLXSW_SP_PORT_DEFAULT_TCLASS 0
static int mlxsw_sp_qdisc_get_tclass_num(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
{
struct mlxsw_sp_qdisc *parent = mlxsw_sp_qdisc->parent;
if (!parent)
return MLXSW_SP_PORT_DEFAULT_TCLASS;
if (!parent->ops->get_tclass_num)
return mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, parent);
return parent->ops->get_tclass_num(parent, mlxsw_sp_qdisc);
}
static int
mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
@ -194,6 +233,7 @@ mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *root_qdisc = &mlxsw_sp_port->qdisc->root_qdisc;
int err_hdroom = 0;
int err = 0;
int i;
if (!mlxsw_sp_qdisc)
return 0;
@ -211,6 +251,9 @@ mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
if (!mlxsw_sp_qdisc->ops)
return 0;
for (i = 0; i < mlxsw_sp_qdisc->num_classes; i++)
mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
&mlxsw_sp_qdisc->qdiscs[i]);
mlxsw_sp_qdisc_reduce_parent_backlog(mlxsw_sp_qdisc);
if (mlxsw_sp_qdisc->ops->destroy)
err = mlxsw_sp_qdisc->ops->destroy(mlxsw_sp_port,
@ -226,6 +269,78 @@ mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
return err_hdroom ?: err;
}
struct mlxsw_sp_qdisc_tree_validate {
bool forbid_ets;
bool forbid_tbf;
bool forbid_red;
};
static int
__mlxsw_sp_qdisc_tree_validate(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
struct mlxsw_sp_qdisc_tree_validate validate);
static int
mlxsw_sp_qdisc_tree_validate_children(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
struct mlxsw_sp_qdisc_tree_validate validate)
{
unsigned int i;
int err;
for (i = 0; i < mlxsw_sp_qdisc->num_classes; i++) {
err = __mlxsw_sp_qdisc_tree_validate(&mlxsw_sp_qdisc->qdiscs[i],
validate);
if (err)
return err;
}
return 0;
}
static int
__mlxsw_sp_qdisc_tree_validate(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
struct mlxsw_sp_qdisc_tree_validate validate)
{
if (!mlxsw_sp_qdisc->ops)
return 0;
switch (mlxsw_sp_qdisc->ops->type) {
case MLXSW_SP_QDISC_FIFO:
break;
case MLXSW_SP_QDISC_RED:
if (validate.forbid_red)
return -EINVAL;
validate.forbid_red = true;
validate.forbid_ets = true;
break;
case MLXSW_SP_QDISC_TBF:
if (validate.forbid_tbf)
return -EINVAL;
validate.forbid_tbf = true;
validate.forbid_ets = true;
break;
case MLXSW_SP_QDISC_PRIO:
case MLXSW_SP_QDISC_ETS:
if (validate.forbid_ets)
return -EINVAL;
validate.forbid_ets = true;
break;
default:
WARN_ON(1);
return -EINVAL;
}
return mlxsw_sp_qdisc_tree_validate_children(mlxsw_sp_qdisc, validate);
}
static int mlxsw_sp_qdisc_tree_validate(struct mlxsw_sp_port *mlxsw_sp_port)
{
struct mlxsw_sp_qdisc_tree_validate validate = {};
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
mlxsw_sp_qdisc = &mlxsw_sp_port->qdisc->root_qdisc;
return __mlxsw_sp_qdisc_tree_validate(mlxsw_sp_qdisc, validate);
}
static int mlxsw_sp_qdisc_create(struct mlxsw_sp_port *mlxsw_sp_port,
u32 handle,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
@ -268,6 +383,10 @@ static int mlxsw_sp_qdisc_create(struct mlxsw_sp_port *mlxsw_sp_port,
mlxsw_sp_qdisc->num_classes = ops->num_classes;
mlxsw_sp_qdisc->ops = ops;
mlxsw_sp_qdisc->handle = handle;
err = mlxsw_sp_qdisc_tree_validate(mlxsw_sp_port);
if (err)
goto err_replace;
err = ops->replace(mlxsw_sp_port, handle, mlxsw_sp_qdisc, params);
if (err)
goto err_replace;
@ -406,13 +525,17 @@ mlxsw_sp_qdisc_collect_tc_stats(struct mlxsw_sp_port *mlxsw_sp_port,
u64 *p_tx_bytes, u64 *p_tx_packets,
u64 *p_drops, u64 *p_backlog)
{
int tclass_num = mlxsw_sp_qdisc->tclass_num;
struct mlxsw_sp_port_xstats *xstats;
u64 tx_bytes, tx_packets;
u8 prio_bitmap;
int tclass_num;
prio_bitmap = mlxsw_sp_qdisc_get_prio_bitmap(mlxsw_sp_port,
mlxsw_sp_qdisc);
tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port,
mlxsw_sp_qdisc);
xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
mlxsw_sp_qdisc->prio_bitmap,
mlxsw_sp_qdisc_bstats_per_priority_get(xstats, prio_bitmap,
&tx_packets, &tx_bytes);
*p_tx_packets += tx_packets;
@ -506,17 +629,21 @@ static void
mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
{
int tclass_num = mlxsw_sp_qdisc->tclass_num;
struct mlxsw_sp_qdisc_stats *stats_base;
struct mlxsw_sp_port_xstats *xstats;
struct red_stats *red_base;
u8 prio_bitmap;
int tclass_num;
prio_bitmap = mlxsw_sp_qdisc_get_prio_bitmap(mlxsw_sp_port,
mlxsw_sp_qdisc);
tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port,
mlxsw_sp_qdisc);
xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
stats_base = &mlxsw_sp_qdisc->stats_base;
red_base = &mlxsw_sp_qdisc->xstats_base.red;
mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
mlxsw_sp_qdisc->prio_bitmap,
mlxsw_sp_qdisc_bstats_per_priority_get(xstats, prio_bitmap,
&stats_base->tx_packets,
&stats_base->tx_bytes);
red_base->prob_mark = xstats->tc_ecn[tclass_num];
@ -533,8 +660,10 @@ static int
mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
{
return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port,
mlxsw_sp_qdisc->tclass_num);
int tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port,
mlxsw_sp_qdisc);
return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port, tclass_num);
}
static int
@ -564,6 +693,14 @@ mlxsw_sp_qdisc_red_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
return 0;
}
static int
mlxsw_sp_qdisc_future_fifo_replace(struct mlxsw_sp_port *mlxsw_sp_port,
u32 handle, unsigned int band,
struct mlxsw_sp_qdisc *child_qdisc);
static void
mlxsw_sp_qdisc_future_fifos_init(struct mlxsw_sp_port *mlxsw_sp_port,
u32 handle);
static int
mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
@ -571,9 +708,19 @@ mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
struct tc_red_qopt_offload_params *p = params;
int tclass_num = mlxsw_sp_qdisc->tclass_num;
int tclass_num;
u32 min, max;
u64 prob;
int err;
err = mlxsw_sp_qdisc_future_fifo_replace(mlxsw_sp_port, handle, 0,
&mlxsw_sp_qdisc->qdiscs[0]);
if (err)
return err;
mlxsw_sp_qdisc_future_fifos_init(mlxsw_sp_port, TC_H_UNSPEC);
tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port,
mlxsw_sp_qdisc);
/* calculate probability in percentage */
prob = p->probability;
@ -616,11 +763,13 @@ mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port,
void *xstats_ptr)
{
struct red_stats *xstats_base = &mlxsw_sp_qdisc->xstats_base.red;
int tclass_num = mlxsw_sp_qdisc->tclass_num;
struct mlxsw_sp_port_xstats *xstats;
struct red_stats *res = xstats_ptr;
int early_drops, marks, pdrops;
int tclass_num;
tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port,
mlxsw_sp_qdisc);
xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
early_drops = xstats->wred_drop[tclass_num] - xstats_base->prob_drop;
@ -643,11 +792,13 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
struct tc_qopt_offload_stats *stats_ptr)
{
int tclass_num = mlxsw_sp_qdisc->tclass_num;
struct mlxsw_sp_qdisc_stats *stats_base;
struct mlxsw_sp_port_xstats *xstats;
u64 overlimits;
int tclass_num;
tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port,
mlxsw_sp_qdisc);
xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
stats_base = &mlxsw_sp_qdisc->stats_base;
@ -665,11 +816,12 @@ static struct mlxsw_sp_qdisc *
mlxsw_sp_qdisc_leaf_find_class(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
u32 parent)
{
return NULL;
/* RED and TBF are formally classful qdiscs, but all class references,
* including X:0, just refer to the same one class.
*/
return &mlxsw_sp_qdisc->qdiscs[0];
}
#define MLXSW_SP_PORT_DEFAULT_TCLASS 0
static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_red = {
.type = MLXSW_SP_QDISC_RED,
.check_params = mlxsw_sp_qdisc_red_check_params,
@ -680,14 +832,19 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_red = {
.get_xstats = mlxsw_sp_qdisc_get_red_xstats,
.clean_stats = mlxsw_sp_setup_tc_qdisc_red_clean_stats,
.find_class = mlxsw_sp_qdisc_leaf_find_class,
.num_classes = 1,
};
static int mlxsw_sp_qdisc_graft(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
u8 band, u32 child_handle);
static int __mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_red_qopt_offload *p)
{
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false);
mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent);
if (!mlxsw_sp_qdisc)
return -EOPNOTSUPP;
@ -709,6 +866,9 @@ static int __mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
case TC_RED_STATS:
return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
&p->stats);
case TC_RED_GRAFT:
return mlxsw_sp_qdisc_graft(mlxsw_sp_port, mlxsw_sp_qdisc, 0,
p->child_handle);
default:
return -EOPNOTSUPP;
}
@ -749,9 +909,12 @@ static int
mlxsw_sp_qdisc_tbf_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
{
int tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port,
mlxsw_sp_qdisc);
return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_SUBGROUP,
mlxsw_sp_qdisc->tclass_num, 0,
tclass_num, 0,
MLXSW_REG_QEEC_MAS_DIS, 0);
}
@ -835,9 +998,19 @@ mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
{
struct tc_tbf_qopt_offload_replace_params *p = params;
u64 rate_kbps = mlxsw_sp_qdisc_tbf_rate_kbps(p);
int tclass_num;
u8 burst_size;
int err;
err = mlxsw_sp_qdisc_future_fifo_replace(mlxsw_sp_port, handle, 0,
&mlxsw_sp_qdisc->qdiscs[0]);
if (err)
return err;
mlxsw_sp_qdisc_future_fifos_init(mlxsw_sp_port, TC_H_UNSPEC);
tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port,
mlxsw_sp_qdisc);
err = mlxsw_sp_qdisc_tbf_bs(mlxsw_sp_port, p->max_size, &burst_size);
if (WARN_ON_ONCE(err))
/* check_params above was supposed to reject this value. */
@ -853,7 +1026,7 @@ mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
*/
return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_SUBGROUP,
mlxsw_sp_qdisc->tclass_num, 0,
tclass_num, 0,
rate_kbps, burst_size);
}
@ -886,6 +1059,7 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_tbf = {
.get_stats = mlxsw_sp_qdisc_get_tbf_stats,
.clean_stats = mlxsw_sp_setup_tc_qdisc_leaf_clean_stats,
.find_class = mlxsw_sp_qdisc_leaf_find_class,
.num_classes = 1,
};
static int __mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port,
@ -893,7 +1067,7 @@ static int __mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port,
{
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false);
mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent);
if (!mlxsw_sp_qdisc)
return -EOPNOTSUPP;
@ -912,6 +1086,9 @@ static int __mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port,
case TC_TBF_STATS:
return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
&p->stats);
case TC_TBF_GRAFT:
return mlxsw_sp_qdisc_graft(mlxsw_sp_port, mlxsw_sp_qdisc, 0,
p->child_handle);
default:
return -EOPNOTSUPP;
}
@ -962,6 +1139,32 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_fifo = {
.clean_stats = mlxsw_sp_setup_tc_qdisc_leaf_clean_stats,
};
static int
mlxsw_sp_qdisc_future_fifo_replace(struct mlxsw_sp_port *mlxsw_sp_port,
u32 handle, unsigned int band,
struct mlxsw_sp_qdisc *child_qdisc)
{
struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
if (handle == qdisc_state->future_handle &&
qdisc_state->future_fifos[band])
return mlxsw_sp_qdisc_replace(mlxsw_sp_port, TC_H_UNSPEC,
child_qdisc,
&mlxsw_sp_qdisc_ops_fifo,
NULL);
return 0;
}
static void
mlxsw_sp_qdisc_future_fifos_init(struct mlxsw_sp_port *mlxsw_sp_port,
u32 handle)
{
struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
qdisc_state->future_handle = handle;
memset(qdisc_state->future_fifos, 0, sizeof(qdisc_state->future_fifos));
}
static int __mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_fifo_qopt_offload *p)
{
@ -970,16 +1173,15 @@ static int __mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port,
unsigned int band;
u32 parent_handle;
mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false);
mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent);
if (!mlxsw_sp_qdisc && p->handle == TC_H_UNSPEC) {
parent_handle = TC_H_MAJ(p->parent);
if (parent_handle != qdisc_state->future_handle) {
/* This notifications is for a different Qdisc than
* previously. Wipe the future cache.
*/
memset(qdisc_state->future_fifos, 0,
sizeof(qdisc_state->future_fifos));
qdisc_state->future_handle = parent_handle;
mlxsw_sp_qdisc_future_fifos_init(mlxsw_sp_port,
parent_handle);
}
band = TC_H_MIN(p->parent) - 1;
@ -1038,11 +1240,10 @@ static int __mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
mlxsw_sp_port_ets_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_SUBGROUP,
i, 0, false, 0);
mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
&mlxsw_sp_qdisc->qdiscs[i]);
mlxsw_sp_qdisc->qdiscs[i].prio_bitmap = 0;
}
kfree(mlxsw_sp_qdisc->ets_data);
mlxsw_sp_qdisc->ets_data = NULL;
return 0;
}
@ -1071,6 +1272,31 @@ mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
return __mlxsw_sp_qdisc_ets_check_params(p->bands);
}
static struct mlxsw_sp_qdisc *
mlxsw_sp_qdisc_walk_cb_clean_stats(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
void *mlxsw_sp_port)
{
u64 backlog;
if (mlxsw_sp_qdisc->ops) {
backlog = mlxsw_sp_qdisc->stats_base.backlog;
if (mlxsw_sp_qdisc->ops->clean_stats)
mlxsw_sp_qdisc->ops->clean_stats(mlxsw_sp_port,
mlxsw_sp_qdisc);
mlxsw_sp_qdisc->stats_base.backlog = backlog;
}
return NULL;
}
static void
mlxsw_sp_qdisc_tree_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
{
mlxsw_sp_qdisc_walk(mlxsw_sp_qdisc, mlxsw_sp_qdisc_walk_cb_clean_stats,
mlxsw_sp_port);
}
static int
__mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
@ -1079,69 +1305,80 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port,
const unsigned int *weights,
const u8 *priomap)
{
struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
struct mlxsw_sp_qdisc_ets_data *ets_data = mlxsw_sp_qdisc->ets_data;
struct mlxsw_sp_qdisc_ets_band *ets_band;
struct mlxsw_sp_qdisc *child_qdisc;
int tclass, i, band, backlog;
u8 old_priomap;
u8 old_priomap, new_priomap;
int i, band;
int err;
if (!ets_data) {
ets_data = kzalloc(sizeof(*ets_data), GFP_KERNEL);
if (!ets_data)
return -ENOMEM;
mlxsw_sp_qdisc->ets_data = ets_data;
for (band = 0; band < mlxsw_sp_qdisc->num_classes; band++) {
int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
ets_band = &ets_data->bands[band];
ets_band->tclass_num = tclass_num;
}
}
for (band = 0; band < nbands; band++) {
tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
int tclass_num;
child_qdisc = &mlxsw_sp_qdisc->qdiscs[band];
old_priomap = child_qdisc->prio_bitmap;
child_qdisc->prio_bitmap = 0;
ets_band = &ets_data->bands[band];
tclass_num = ets_band->tclass_num;
old_priomap = ets_band->prio_bitmap;
new_priomap = 0;
err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_SUBGROUP,
tclass, 0, !!quanta[band],
tclass_num, 0, !!quanta[band],
weights[band]);
if (err)
return err;
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
if (priomap[i] == band) {
child_qdisc->prio_bitmap |= BIT(i);
new_priomap |= BIT(i);
if (BIT(i) & old_priomap)
continue;
err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port,
i, tclass);
i, tclass_num);
if (err)
return err;
}
}
child_qdisc->tclass_num = tclass;
ets_band->prio_bitmap = new_priomap;
if (old_priomap != child_qdisc->prio_bitmap &&
child_qdisc->ops && child_qdisc->ops->clean_stats) {
backlog = child_qdisc->stats_base.backlog;
child_qdisc->ops->clean_stats(mlxsw_sp_port,
child_qdisc);
child_qdisc->stats_base.backlog = backlog;
}
if (old_priomap != new_priomap)
mlxsw_sp_qdisc_tree_clean_stats(mlxsw_sp_port,
child_qdisc);
if (handle == qdisc_state->future_handle &&
qdisc_state->future_fifos[band]) {
err = mlxsw_sp_qdisc_replace(mlxsw_sp_port, TC_H_UNSPEC,
child_qdisc,
&mlxsw_sp_qdisc_ops_fifo,
NULL);
if (err)
return err;
}
err = mlxsw_sp_qdisc_future_fifo_replace(mlxsw_sp_port, handle,
band, child_qdisc);
if (err)
return err;
}
for (; band < IEEE_8021QAZ_MAX_TCS; band++) {
tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
ets_band = &ets_data->bands[band];
ets_band->prio_bitmap = 0;
child_qdisc = &mlxsw_sp_qdisc->qdiscs[band];
child_qdisc->prio_bitmap = 0;
mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc);
mlxsw_sp_port_ets_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_SUBGROUP,
tclass, 0, false, 0);
ets_band->tclass_num, 0, false, 0);
}
qdisc_state->future_handle = TC_H_UNSPEC;
memset(qdisc_state->future_fifos, 0, sizeof(qdisc_state->future_fifos));
mlxsw_sp_qdisc_future_fifos_init(mlxsw_sp_port, TC_H_UNSPEC);
return 0;
}
@ -1243,6 +1480,31 @@ mlxsw_sp_qdisc_prio_find_class(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
return &mlxsw_sp_qdisc->qdiscs[band];
}
static struct mlxsw_sp_qdisc_ets_band *
mlxsw_sp_qdisc_ets_get_band(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
struct mlxsw_sp_qdisc *child)
{
unsigned int band = child - mlxsw_sp_qdisc->qdiscs;
if (WARN_ON(band >= IEEE_8021QAZ_MAX_TCS))
band = 0;
return &mlxsw_sp_qdisc->ets_data->bands[band];
}
static u8
mlxsw_sp_qdisc_ets_get_prio_bitmap(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
struct mlxsw_sp_qdisc *child)
{
return mlxsw_sp_qdisc_ets_get_band(mlxsw_sp_qdisc, child)->prio_bitmap;
}
static int
mlxsw_sp_qdisc_ets_get_tclass_num(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
struct mlxsw_sp_qdisc *child)
{
return mlxsw_sp_qdisc_ets_get_band(mlxsw_sp_qdisc, child)->tclass_num;
}
static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = {
.type = MLXSW_SP_QDISC_PRIO,
.check_params = mlxsw_sp_qdisc_prio_check_params,
@ -1253,6 +1515,8 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = {
.clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
.find_class = mlxsw_sp_qdisc_prio_find_class,
.num_classes = IEEE_8021QAZ_MAX_TCS,
.get_prio_bitmap = mlxsw_sp_qdisc_ets_get_prio_bitmap,
.get_tclass_num = mlxsw_sp_qdisc_ets_get_tclass_num,
};
static int
@ -1304,6 +1568,8 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_ets = {
.clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
.find_class = mlxsw_sp_qdisc_prio_find_class,
.num_classes = IEEE_8021QAZ_MAX_TCS,
.get_prio_bitmap = mlxsw_sp_qdisc_ets_get_prio_bitmap,
.get_tclass_num = mlxsw_sp_qdisc_ets_get_tclass_num,
};
/* Linux allows linking of Qdiscs to arbitrary classes (so long as the resulting
@ -1331,10 +1597,9 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_ets = {
* grafted corresponds to the parent handle. If the two don't match, we
* unoffload the child.
*/
static int
__mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
u8 band, u32 child_handle)
static int mlxsw_sp_qdisc_graft(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
u8 band, u32 child_handle)
{
struct mlxsw_sp_qdisc *old_qdisc;
u32 parent;
@ -1367,21 +1632,12 @@ __mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port,
return -EOPNOTSUPP;
}
static int
mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
struct tc_prio_qopt_offload_graft_params *p)
{
return __mlxsw_sp_qdisc_ets_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
p->band, p->child_handle);
}
static int __mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_prio_qopt_offload *p)
{
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true);
mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent);
if (!mlxsw_sp_qdisc)
return -EOPNOTSUPP;
@ -1401,8 +1657,9 @@ static int __mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
&p->stats);
case TC_PRIO_GRAFT:
return mlxsw_sp_qdisc_prio_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
&p->graft_params);
return mlxsw_sp_qdisc_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
p->graft_params.band,
p->graft_params.child_handle);
default:
return -EOPNOTSUPP;
}
@ -1425,7 +1682,7 @@ static int __mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port,
{
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true);
mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent);
if (!mlxsw_sp_qdisc)
return -EOPNOTSUPP;
@ -1445,9 +1702,9 @@ static int __mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port,
return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
&p->stats);
case TC_ETS_GRAFT:
return __mlxsw_sp_qdisc_ets_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
p->graft_params.band,
p->graft_params.child_handle);
return mlxsw_sp_qdisc_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
p->graft_params.band,
p->graft_params.child_handle);
default:
return -EOPNOTSUPP;
}
@ -1902,6 +2159,7 @@ mlxsw_sp_setup_tc_block_qevent_bind(struct mlxsw_sp_port *mlxsw_sp_port,
struct flow_block_cb *block_cb;
struct mlxsw_sp_qdisc *qdisc;
bool register_block = false;
int tclass_num;
int err;
block_cb = flow_block_cb_lookup(f->block, mlxsw_sp_qevent_block_cb, mlxsw_sp);
@ -1934,9 +2192,10 @@ mlxsw_sp_setup_tc_block_qevent_bind(struct mlxsw_sp_port *mlxsw_sp_port,
goto err_binding_exists;
}
tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, qdisc);
qevent_binding = mlxsw_sp_qevent_binding_create(mlxsw_sp_port,
f->sch->handle,
qdisc->tclass_num,
tclass_num,
span_trigger,
action_mask);
if (IS_ERR(qevent_binding)) {
@ -2048,8 +2307,6 @@ int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
return -ENOMEM;
mutex_init(&qdisc_state->lock);
qdisc_state->root_qdisc.prio_bitmap = 0xff;
qdisc_state->root_qdisc.tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS;
mlxsw_sp_port->qdisc = qdisc_state;
return 0;
}

View file

@ -977,6 +977,7 @@ enum tc_tbf_command {
TC_TBF_REPLACE,
TC_TBF_DESTROY,
TC_TBF_STATS,
TC_TBF_GRAFT,
};
struct tc_tbf_qopt_offload_replace_params {
@ -992,6 +993,7 @@ struct tc_tbf_qopt_offload {
union {
struct tc_tbf_qopt_offload_replace_params replace_params;
struct tc_qopt_offload_stats stats;
u32 child_handle;
};
};

View file

@ -184,6 +184,20 @@ static int tbf_offload_dump(struct Qdisc *sch)
return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt);
}
static void tbf_offload_graft(struct Qdisc *sch, struct Qdisc *new,
struct Qdisc *old, struct netlink_ext_ack *extack)
{
struct tc_tbf_qopt_offload graft_offload = {
.handle = sch->handle,
.parent = sch->parent,
.child_handle = new->handle,
.command = TC_TBF_GRAFT,
};
qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
TC_SETUP_QDISC_TBF, &graft_offload, extack);
}
/* GSO packet is too big, segment it so that tbf can transmit
* each segment in time
*/
@ -547,6 +561,8 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
new = &noop_qdisc;
*old = qdisc_replace(sch, new, &q->qdisc);
tbf_offload_graft(sch, new, *old, extack);
return 0;
}

View file

@ -0,0 +1,276 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Test qdisc offload indication
ALL_TESTS="
test_root
test_etsprio
"
NUM_NETIFS=1
lib_dir=$(dirname $0)/../../../net/forwarding
source $lib_dir/lib.sh
check_not_offloaded()
{
local handle=$1; shift
local h
local offloaded
h=$(qdisc_stats_get $h1 "$handle" .handle)
[[ $h == '"'$handle'"' ]]
check_err $? "Qdisc with handle $handle does not exist"
offloaded=$(qdisc_stats_get $h1 "$handle" .offloaded)
[[ $offloaded == true ]]
check_fail $? "Qdisc with handle $handle offloaded, but should not be"
}
check_all_offloaded()
{
local handle=$1; shift
if [[ ! -z $handle ]]; then
local offloaded=$(qdisc_stats_get $h1 "$handle" .offloaded)
[[ $offloaded == true ]]
check_err $? "Qdisc with handle $handle not offloaded"
fi
local unoffloaded=$(tc q sh dev $h1 invisible |
grep -v offloaded |
sed s/root/parent\ root/ |
cut -d' ' -f 5)
[[ -z $unoffloaded ]]
check_err $? "Qdiscs with following parents not offloaded: $unoffloaded"
pre_cleanup
}
with_ets()
{
local handle=$1; shift
local locus=$1; shift
tc qdisc add dev $h1 $locus handle $handle \
ets bands 8 priomap 7 6 5 4 3 2 1 0
"$@"
tc qdisc del dev $h1 $locus
}
with_prio()
{
local handle=$1; shift
local locus=$1; shift
tc qdisc add dev $h1 $locus handle $handle \
prio bands 8 priomap 7 6 5 4 3 2 1 0
"$@"
tc qdisc del dev $h1 $locus
}
with_red()
{
local handle=$1; shift
local locus=$1; shift
tc qdisc add dev $h1 $locus handle $handle \
red limit 1000000 min 200000 max 300000 probability 0.5 avpkt 1500
"$@"
tc qdisc del dev $h1 $locus
}
with_tbf()
{
local handle=$1; shift
local locus=$1; shift
tc qdisc add dev $h1 $locus handle $handle \
tbf rate 400Mbit burst 128K limit 1M
"$@"
tc qdisc del dev $h1 $locus
}
with_pfifo()
{
local handle=$1; shift
local locus=$1; shift
tc qdisc add dev $h1 $locus handle $handle pfifo limit 100K
"$@"
tc qdisc del dev $h1 $locus
}
with_bfifo()
{
local handle=$1; shift
local locus=$1; shift
tc qdisc add dev $h1 $locus handle $handle bfifo limit 100K
"$@"
tc qdisc del dev $h1 $locus
}
with_drr()
{
local handle=$1; shift
local locus=$1; shift
tc qdisc add dev $h1 $locus handle $handle drr
"$@"
tc qdisc del dev $h1 $locus
}
with_qdiscs()
{
local handle=$1; shift
local parent=$1; shift
local kind=$1; shift
local next_handle=$((handle * 2))
local locus;
if [[ $kind == "--" ]]; then
local cmd=$1; shift
$cmd $(printf %x: $parent) "$@"
else
if ((parent == 0)); then
locus=root
else
locus=$(printf "parent %x:1" $parent)
fi
with_$kind $(printf %x: $handle) "$locus" \
with_qdiscs $next_handle $handle "$@"
fi
}
get_name()
{
local parent=$1; shift
local name=$(echo "" "${@^^}" | tr ' ' -)
if ((parent != 0)); then
kind=$(qdisc_stats_get $h1 $parent: .kind)
kind=${kind%\"}
kind=${kind#\"}
name="-${kind^^}$name"
fi
echo root$name
}
do_test_offloaded()
{
local handle=$1; shift
local parent=$1; shift
RET=0
with_qdiscs $handle $parent "$@" -- check_all_offloaded
log_test $(get_name $parent "$@")" offloaded"
}
do_test_nooffload()
{
local handle=$1; shift
local parent=$1; shift
local name=$(echo "${@^^}" | tr ' ' -)
local kind
RET=0
with_qdiscs $handle $parent "$@" -- check_not_offloaded
log_test $(get_name $parent "$@")" not offloaded"
}
do_test_combinations()
{
local handle=$1; shift
local parent=$1; shift
local cont
local leaf
local fifo
for cont in "" ets prio; do
for leaf in "" red tbf "red tbf" "tbf red"; do
for fifo in "" pfifo bfifo; do
if [[ -z "$cont$leaf$fifo" ]]; then
continue
fi
do_test_offloaded $handle $parent \
$cont $leaf $fifo
done
done
done
for cont in ets prio; do
for leaf in red tbf; do
do_test_nooffload $handle $parent $cont red tbf $leaf
do_test_nooffload $handle $parent $cont tbf red $leaf
done
for leaf in "red red" "tbf tbf"; do
do_test_nooffload $handle $parent $cont $leaf
done
done
do_test_nooffload $handle $parent drr
}
test_root()
{
do_test_combinations 1 0
}
do_test_etsprio()
{
local parent=$1; shift
local tbfpfx=$1; shift
local cont
for cont in ets prio; do
RET=0
with_$cont 8: "$parent" \
with_red 11: "parent 8:1" \
with_red 12: "parent 8:2" \
with_tbf 13: "parent 8:3" \
with_tbf 14: "parent 8:4" \
check_all_offloaded
log_test "root$tbfpfx-ETS-{RED,TBF} offloaded"
RET=0
with_$cont 8: "$parent" \
with_red 81: "parent 8:1" \
with_tbf 811: "parent 81:1" \
with_tbf 84: "parent 8:4" \
with_red 841: "parent 84:1" \
check_all_offloaded
log_test "root$tbfpfx-ETS-{RED-TBF,TBF-RED} offloaded"
RET=0
with_$cont 8: "$parent" \
with_red 81: "parent 8:1" \
with_tbf 811: "parent 81:1" \
with_bfifo 8111: "parent 811:1" \
with_tbf 82: "parent 8:2" \
with_red 821: "parent 82:1" \
with_bfifo 8211: "parent 821:1" \
check_all_offloaded
log_test "root$tbfpfx-ETS-{RED-TBF-bFIFO,TBF-RED-bFIFO} offloaded"
done
}
test_etsprio()
{
do_test_etsprio root ""
}
cleanup()
{
tc qdisc del dev $h1 root &>/dev/null
}
trap cleanup EXIT
h1=${NETIFS[p1]}
tests_run
exit $EXIT_STATUS