sfc: suppress MCDI errors from ARFS

In high connection count usage, the NIC's filter table may be filled with sufficiently many ARFS filters that further insertions fail. As this does not represent a correctness issue, do not log the resulting MCDI errors. Add a debug-level message under the (by default disabled) rx_status category instead; and take the opportunity to do a little extra expiry work. Since there are now multiple workitems able to call __efx_filter_rfs_expire on a given channel, it is possible for them to race and thus pass quotas which, combined, exceed rfs_filter_count. Thus, don't WARN_ON if we loop all the way around the table with quota left over. Signed-off-by: Edward Cree <ecree@solarflare.com> Tested-by: David Ahern <dahern@digitalocean.com> Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
2024-09-13 22:25:03 +00:00 · 2019-11-22 17:57:19 +00:00 · 2019-11-22 17:57:19 +00:00 · 0aa6608dae
commit 0aa6608dae
parent 8490e75cdb
2 changed files with 30 additions and 6 deletions
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@ -4202,11 +4202,15 @@ static int efx_ef10_filter_push(struct efx_nic *efx,
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_FILTER_OP_EXT_OUT_LEN);
+	size_t outlen;
 	int rc;

 	efx_ef10_filter_push_prep(efx, spec, inbuf, *handle, ctx, replacing);
-	rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf),
-			  outbuf, sizeof(outbuf), NULL);
+	rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf),
+				outbuf, sizeof(outbuf), &outlen);
+	if (rc && spec->priority != EFX_FILTER_PRI_HINT)
+		efx_mcdi_display_error(efx, MC_CMD_FILTER_OP, sizeof(inbuf),
+				       outbuf, outlen, rc);
 	if (rc == 0)
 		*handle = MCDI_QWORD(outbuf, FILTER_OP_OUT_HANDLE);
 	if (rc == -ENOSPC)
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@ -1032,6 +1032,26 @@ static void efx_filter_rfs_work(struct work_struct *data)
 				   req->spec.rem_host, ntohs(req->spec.rem_port),
 				   req->spec.loc_host, ntohs(req->spec.loc_port),
 				   req->rxq_index, req->flow_id, rc, arfs_id);
+	} else {
+		if (req->spec.ether_type == htons(ETH_P_IP))
+			netif_dbg(efx, rx_status, efx->net_dev,
+				  "failed to steer %s %pI4:%u:%pI4:%u to queue %u [flow %u rc %d id %u]\n",
+				  (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
+				  req->spec.rem_host, ntohs(req->spec.rem_port),
+				  req->spec.loc_host, ntohs(req->spec.loc_port),
+				  req->rxq_index, req->flow_id, rc, arfs_id);
+		else
+			netif_dbg(efx, rx_status, efx->net_dev,
+				  "failed to steer %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u rc %d id %u]\n",
+				  (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
+				  req->spec.rem_host, ntohs(req->spec.rem_port),
+				  req->spec.loc_host, ntohs(req->spec.loc_port),
+				  req->rxq_index, req->flow_id, rc, arfs_id);
+		/* We're overloading the NIC's filter tables, so let's do a
+		 * chunk of extra expiry work.
+		 */
+		__efx_filter_rfs_expire(channel, min(channel->rfs_filter_count,
+						     100u));
 	}

 	/* Release references */
@ -1170,11 +1190,11 @@ bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota)
 		if (++index == size)
 			index = 0;
 		/* If we were called with a quota that exceeds the total number
-		 * of filters in the table (which should never happen), ensure
-		 * that we don't loop forever - stop when we've examined every
-		 * row of the table.
+		 * of filters in the table (which shouldn't happen, but could
+		 * if two callers race), ensure that we don't loop forever -
+		 * stop when we've examined every row of the table.
 		 */
-		if (WARN_ON(index == start && quota))
+		if (index == start)
 			break;
 	}