Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

Cross-merge networking fixes after downstream PR.

Conflicts:

net/ipv4/raw.c
  3632679d9e ("ipv{4,6}/raw: fix output xfrm lookup wrt protocol")
  c85be08fc4 ("raw: Stop using RTO_ONLINK.")
https://lore.kernel.org/all/20230525110037.2b532b83@canb.auug.org.au/

Adjacent changes:

drivers/net/ethernet/freescale/fec_main.c
  9025944fdd ("net: fec: add dma_wmb to ensure correct descriptor values")
  144470c88c ("net: fec: using the standard return codes when xdp xmit errors")

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2023-05-25 19:56:10 -07:00
commit d4031ec844
80 changed files with 1460 additions and 786 deletions

View File

@ -127,13 +127,32 @@ the value of ``Message-ID`` to the URL above.
Updating patch status
~~~~~~~~~~~~~~~~~~~~~
It may be tempting to help the maintainers and update the state of your
own patches when you post a new version or spot a bug. Please **do not**
do that.
Interfering with the patch status on patchwork will only cause confusion. Leave
it to the maintainer to figure out what is the most recent and current
version that should be applied. If there is any doubt, the maintainer
will reply and ask what should be done.
Contributors and reviewers do not have the permissions to update patch
state directly in patchwork. Patchwork doesn't expose much information
about the history of the state of patches, therefore having multiple
people update the state leads to confusion.
Instead of delegating patchwork permissions netdev uses a simple mail
bot which looks for special commands/lines within the emails sent to
the mailing list. For example to mark a series as Changes Requested
one needs to send the following line anywhere in the email thread::
pw-bot: changes-requested
As a result the bot will set the entire series to Changes Requested.
This may be useful when author discovers a bug in their own series
and wants to prevent it from getting applied.
The use of the bot is entirely optional, if in doubt ignore its existence
completely. Maintainers will classify and update the state of the patches
themselves. No email should ever be sent to the list with the main purpose
of communicating with the bot, the bot commands should be seen as metadata.
The use of the bot is restricted to authors of the patches (the ``From:``
header on patch submission and command must match!), maintainers themselves
and a handful of senior reviewers. Bot records its activity here:
https://patchwork.hopto.org/pw-bot.html
Review timelines
~~~~~~~~~~~~~~~~

View File

@ -8165,6 +8165,7 @@ F: include/linux/spi/spi-fsl-dspi.h
FREESCALE ENETC ETHERNET DRIVERS
M: Claudiu Manoil <claudiu.manoil@nxp.com>
M: Vladimir Oltean <vladimir.oltean@nxp.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/ethernet/freescale/enetc/

View File

@ -1319,17 +1319,17 @@ static void nxp_serdev_remove(struct serdev_device *serdev)
hci_free_dev(hdev);
}
static struct btnxpuart_data w8987_data = {
static struct btnxpuart_data w8987_data __maybe_unused = {
.helper_fw_name = NULL,
.fw_name = FIRMWARE_W8987,
};
static struct btnxpuart_data w8997_data = {
static struct btnxpuart_data w8997_data __maybe_unused = {
.helper_fw_name = FIRMWARE_HELPER,
.fw_name = FIRMWARE_W8997,
};
static const struct of_device_id nxpuart_of_match_table[] = {
static const struct of_device_id nxpuart_of_match_table[] __maybe_unused = {
{ .compatible = "nxp,88w8987-bt", .data = &w8987_data },
{ .compatible = "nxp,88w8997-bt", .data = &w8997_data },
{ }

View File

@ -3950,7 +3950,11 @@ static int bond_slave_netdev_event(unsigned long event,
unblock_netpoll_tx();
break;
case NETDEV_FEAT_CHANGE:
bond_compute_features(bond);
if (!bond->notifier_ctx) {
bond->notifier_ctx = true;
bond_compute_features(bond);
bond->notifier_ctx = false;
}
break;
case NETDEV_RESEND_IGMP:
/* Propagate to master device */
@ -6345,6 +6349,8 @@ static int bond_init(struct net_device *bond_dev)
if (!bond->wq)
return -ENOMEM;
bond->notifier_ctx = false;
spin_lock_init(&bond->stats_lock);
netdev_lockdep_set_classes(bond_dev);

View File

@ -195,6 +195,7 @@ static int tc589_probe(struct pcmcia_device *link)
{
struct el3_private *lp;
struct net_device *dev;
int ret;
dev_dbg(&link->dev, "3c589_attach()\n");
@ -218,7 +219,15 @@ static int tc589_probe(struct pcmcia_device *link)
dev->ethtool_ops = &netdev_ethtool_ops;
return tc589_config(link);
ret = tc589_config(link);
if (ret)
goto err_free_netdev;
return 0;
err_free_netdev:
free_netdev(dev);
return ret;
}
static void tc589_detach(struct pcmcia_device *link)

View File

@ -3813,6 +3813,11 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
index = fec_enet_get_bd_index(last_bdp, &txq->bd);
txq->tx_skbuff[index] = NULL;
/* Make sure the updates to rest of the descriptor are performed before
* transferring ownership.
*/
dma_wmb();
/* Send it on its way. Tell FEC it's ready, interrupt when done,
* it's the last BD of the frame, and to put the CRC on the end.
*/
@ -3822,8 +3827,14 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
/* If this was the last BD in the ring, start at the beginning again. */
bdp = fec_enet_get_nextdesc(last_bdp, &txq->bd);
/* Make sure the update to bdp are performed before txq->bd.cur. */
dma_wmb();
txq->bd.cur = bdp;
/* Trigger transmission start */
writel(0, txq->bd.reg_desc_active);
return 0;
}
@ -3852,12 +3863,6 @@ static int fec_enet_xdp_xmit(struct net_device *dev,
sent_frames++;
}
/* Make sure the update to bdp and tx_skbuff are performed. */
wmb();
/* Trigger transmission start */
writel(0, txq->bd.reg_desc_active);
__netif_tx_unlock(nq);
return sent_frames;

View File

@ -656,9 +656,7 @@ static void otx2_sqe_add_ext(struct otx2_nic *pfvf, struct otx2_snd_queue *sq,
htons(ext->lso_sb - skb_network_offset(skb));
} else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
ext->lso_format = pfvf->hw.lso_tsov6_idx;
ipv6_hdr(skb)->payload_len =
htons(ext->lso_sb - skb_network_offset(skb));
ipv6_hdr(skb)->payload_len = htons(tcp_hdrlen(skb));
} else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
__be16 l3_proto = vlan_get_protocol(skb);
struct udphdr *udph = udp_hdr(skb);

View File

@ -3269,18 +3269,14 @@ static int mtk_open(struct net_device *dev)
eth->dsa_meta[i] = md_dst;
}
} else {
/* Hardware special tag parsing needs to be disabled if at least
* one MAC does not use DSA.
/* Hardware DSA untagging and VLAN RX offloading need to be
* disabled if at least one MAC does not use DSA.
*/
u32 val = mtk_r32(eth, MTK_CDMP_IG_CTRL);
val &= ~MTK_CDMP_STAG_EN;
mtk_w32(eth, val, MTK_CDMP_IG_CTRL);
val = mtk_r32(eth, MTK_CDMQ_IG_CTRL);
val &= ~MTK_CDMQ_STAG_EN;
mtk_w32(eth, val, MTK_CDMQ_IG_CTRL);
mtk_w32(eth, 0, MTK_CDMP_EG_CTRL);
}

View File

@ -1920,9 +1920,10 @@ static void mlx5_cmd_err_trace(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod
static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
u32 syndrome, int err)
{
const char *namep = mlx5_command_str(opcode);
struct mlx5_cmd_stats *stats;
if (!err)
if (!err || !(strcmp(namep, "unknown command opcode")))
return;
stats = &dev->cmd.stats[opcode];

View File

@ -175,6 +175,8 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
/* ensure cq space is freed before enabling more cqes */
wmb();
mlx5e_txqsq_wake(&ptpsq->txqsq);
return work_done == budget;
}

View File

@ -1369,11 +1369,13 @@ static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow;
list_for_each_entry(flow, encap_flows, tmp_list) {
struct mlx5_flow_attr *attr = flow->attr;
struct mlx5_esw_flow_attr *esw_attr;
struct mlx5_flow_attr *attr;
if (!mlx5e_is_offloaded_flow(flow))
continue;
attr = mlx5e_tc_get_encap_attr(flow);
esw_attr = attr->esw_attr;
if (flow_flag_test(flow, SLOW))

View File

@ -193,6 +193,8 @@ static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size)
return pi;
}
void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq);
static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
return be16_to_cpu(cqe->shampo.header_entry_index) & (rq->mpwqe.shampo->hd_per_wq - 1);

View File

@ -1665,11 +1665,9 @@ bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_
int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
{
struct mlx5e_priv *out_priv, *route_priv;
struct mlx5_devcom *devcom = NULL;
struct mlx5_core_dev *route_mdev;
struct mlx5_eswitch *esw;
u16 vhca_id;
int err;
out_priv = netdev_priv(out_dev);
esw = out_priv->mdev->priv.eswitch;
@ -1678,6 +1676,9 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro
vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
if (mlx5_lag_is_active(out_priv->mdev)) {
struct mlx5_devcom *devcom;
int err;
/* In lag case we may get devices from different eswitch instances.
* If we failed to get vport num, it means, mostly, that we on the wrong
* eswitch.
@ -1686,16 +1687,16 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro
if (err != -ENOENT)
return err;
rcu_read_lock();
devcom = out_priv->mdev->priv.devcom;
esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
if (!esw)
return -ENODEV;
esw = mlx5_devcom_get_peer_data_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
err = esw ? mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport) : -ENODEV;
rcu_read_unlock();
return err;
}
err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
if (devcom)
mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
return err;
return mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
}
static int
@ -5301,6 +5302,8 @@ int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv)
goto err_action_counter;
}
mlx5_esw_offloads_devcom_init(esw);
return 0;
err_action_counter:
@ -5329,7 +5332,7 @@ void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv)
priv = netdev_priv(rpriv->netdev);
esw = priv->mdev->priv.eswitch;
mlx5e_tc_clean_fdb_peer_flows(esw);
mlx5_esw_offloads_devcom_cleanup(esw);
mlx5e_tc_tun_cleanup(uplink_priv->encap);
@ -5643,22 +5646,43 @@ bool mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb)
0, NULL);
}
static struct mapping_ctx *
mlx5e_get_priv_obj_mapping(struct mlx5e_priv *priv)
{
struct mlx5e_tc_table *tc;
struct mlx5_eswitch *esw;
struct mapping_ctx *ctx;
if (is_mdev_switchdev_mode(priv->mdev)) {
esw = priv->mdev->priv.eswitch;
ctx = esw->offloads.reg_c0_obj_pool;
} else {
tc = mlx5e_fs_get_tc(priv->fs);
ctx = tc->mapping;
}
return ctx;
}
int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr,
u64 act_miss_cookie, u32 *act_miss_mapping)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_mapped_obj mapped_obj = {};
struct mlx5_eswitch *esw;
struct mapping_ctx *ctx;
int err;
ctx = esw->offloads.reg_c0_obj_pool;
ctx = mlx5e_get_priv_obj_mapping(priv);
mapped_obj.type = MLX5_MAPPED_OBJ_ACT_MISS;
mapped_obj.act_miss_cookie = act_miss_cookie;
err = mapping_add(ctx, &mapped_obj, act_miss_mapping);
if (err)
return err;
if (!is_mdev_switchdev_mode(priv->mdev))
return 0;
esw = priv->mdev->priv.eswitch;
attr->act_id_restore_rule = esw_add_restore_rule(esw, *act_miss_mapping);
if (IS_ERR(attr->act_id_restore_rule))
goto err_rule;
@ -5673,10 +5697,9 @@ err_rule:
void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr,
u32 act_miss_mapping)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mapping_ctx *ctx;
struct mapping_ctx *ctx = mlx5e_get_priv_obj_mapping(priv);
ctx = esw->offloads.reg_c0_obj_pool;
mlx5_del_flow_rules(attr->act_id_restore_rule);
if (is_mdev_switchdev_mode(priv->mdev))
mlx5_del_flow_rules(attr->act_id_restore_rule);
mapping_remove(ctx, act_miss_mapping);
}

View File

@ -762,6 +762,17 @@ static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_t
}
}
void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq)
{
if (netif_tx_queue_stopped(sq->txq) &&
mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) &&
mlx5e_ptpsq_fifo_has_room(sq) &&
!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
netif_tx_wake_queue(sq->txq);
sq->stats->wake++;
}
}
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
{
struct mlx5e_sq_stats *stats;
@ -861,13 +872,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
netdev_tx_completed_queue(sq->txq, npkts, nbytes);
if (netif_tx_queue_stopped(sq->txq) &&
mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) &&
mlx5e_ptpsq_fifo_has_room(sq) &&
!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
netif_tx_wake_queue(sq->txq);
stats->wake++;
}
mlx5e_txqsq_wake(sq);
return (i == MLX5E_TX_CQ_POLL_BUDGET);
}

View File

@ -161,20 +161,22 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
}
}
/* budget=0 means we may be in IRQ context, do as little as possible */
if (unlikely(!budget))
goto out;
busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
if (c->xdp)
busy |= mlx5e_poll_xdpsq_cq(&c->rq_xdpsq.cq);
if (likely(budget)) { /* budget=0 means: don't poll rx rings */
if (xsk_open)
work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget);
if (xsk_open)
work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget);
if (likely(budget - work_done))
work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done);
if (likely(budget - work_done))
work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done);
busy |= work_done == budget;
}
busy |= work_done == budget;
mlx5e_poll_ico_cq(&c->icosq.cq);
if (mlx5e_poll_ico_cq(&c->async_icosq.cq))

View File

@ -1104,7 +1104,7 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
struct mlx5_eq_table *table = dev->priv.eq_table;
mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
mlx5_irq_table_destroy(dev);
mlx5_irq_table_free_irqs(dev);
mutex_unlock(&table->lock);
}

View File

@ -342,6 +342,7 @@ struct mlx5_eswitch {
u32 large_group_num;
} params;
struct blocking_notifier_head n_head;
bool paired[MLX5_MAX_PORTS];
};
void esw_offloads_disable(struct mlx5_eswitch *esw);
@ -369,6 +370,8 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs);
void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf);
void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw);
void mlx5_eswitch_disable(struct mlx5_eswitch *esw);
void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw);
void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw);
int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
u16 vport, const u8 *mac);
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
@ -775,6 +778,8 @@ static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; }
static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) {}
static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {}
static inline void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw) {}
static inline void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) {}
static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
static inline
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; }

View File

@ -2776,6 +2776,9 @@ static int mlx5_esw_offloads_devcom_event(int event,
mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
break;
if (esw->paired[mlx5_get_dev_index(peer_esw->dev)])
break;
err = mlx5_esw_offloads_set_ns_peer(esw, peer_esw, true);
if (err)
goto err_out;
@ -2787,14 +2790,18 @@ static int mlx5_esw_offloads_devcom_event(int event,
if (err)
goto err_pair;
esw->paired[mlx5_get_dev_index(peer_esw->dev)] = true;
peer_esw->paired[mlx5_get_dev_index(esw->dev)] = true;
mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true);
break;
case ESW_OFFLOADS_DEVCOM_UNPAIR:
if (!mlx5_devcom_is_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS))
if (!esw->paired[mlx5_get_dev_index(peer_esw->dev)])
break;
mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false);
esw->paired[mlx5_get_dev_index(peer_esw->dev)] = false;
peer_esw->paired[mlx5_get_dev_index(esw->dev)] = false;
mlx5_esw_offloads_unpair(peer_esw);
mlx5_esw_offloads_unpair(esw);
mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false);
@ -2813,7 +2820,7 @@ err_out:
return err;
}
static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw)
{
struct mlx5_devcom *devcom = esw->dev->priv.devcom;
@ -2836,7 +2843,7 @@ static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
ESW_OFFLOADS_DEVCOM_PAIR, esw);
}
static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
{
struct mlx5_devcom *devcom = esw->dev->priv.devcom;
@ -3281,8 +3288,6 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
if (err)
goto err_vports;
esw_offloads_devcom_init(esw);
return 0;
err_vports:
@ -3323,7 +3328,6 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
void esw_offloads_disable(struct mlx5_eswitch *esw)
{
esw_offloads_devcom_cleanup(esw);
mlx5_eswitch_disable_pf_vf_vports(esw);
esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK);
esw_set_passing_vport_metadata(esw, false);

View File

@ -3,6 +3,7 @@
#include <linux/mlx5/vport.h>
#include "lib/devcom.h"
#include "mlx5_core.h"
static LIST_HEAD(devcom_list);
@ -13,7 +14,7 @@ static LIST_HEAD(devcom_list);
struct mlx5_devcom_component {
struct {
void *data;
void __rcu *data;
} device[MLX5_DEVCOM_PORTS_SUPPORTED];
mlx5_devcom_event_handler_t handler;
@ -77,6 +78,7 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
if (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_DEVCOM_PORTS_SUPPORTED)
return NULL;
mlx5_dev_list_lock();
sguid0 = mlx5_query_nic_system_image_guid(dev);
list_for_each_entry(iter, &devcom_list, list) {
struct mlx5_core_dev *tmp_dev = NULL;
@ -102,8 +104,10 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
if (!priv) {
priv = mlx5_devcom_list_alloc();
if (!priv)
return ERR_PTR(-ENOMEM);
if (!priv) {
devcom = ERR_PTR(-ENOMEM);
goto out;
}
idx = 0;
new_priv = true;
@ -112,13 +116,16 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
priv->devs[idx] = dev;
devcom = mlx5_devcom_alloc(priv, idx);
if (!devcom) {
kfree(priv);
return ERR_PTR(-ENOMEM);
if (new_priv)
kfree(priv);
devcom = ERR_PTR(-ENOMEM);
goto out;
}
if (new_priv)
list_add(&priv->list, &devcom_list);
out:
mlx5_dev_list_unlock();
return devcom;
}
@ -131,6 +138,7 @@ void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom)
if (IS_ERR_OR_NULL(devcom))
return;
mlx5_dev_list_lock();
priv = devcom->priv;
priv->devs[devcom->idx] = NULL;
@ -141,10 +149,12 @@ void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom)
break;
if (i != MLX5_DEVCOM_PORTS_SUPPORTED)
return;
goto out;
list_del(&priv->list);
kfree(priv);
out:
mlx5_dev_list_unlock();
}
void mlx5_devcom_register_component(struct mlx5_devcom *devcom,
@ -162,7 +172,7 @@ void mlx5_devcom_register_component(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id];
down_write(&comp->sem);
comp->handler = handler;
comp->device[devcom->idx].data = data;
rcu_assign_pointer(comp->device[devcom->idx].data, data);
up_write(&comp->sem);
}
@ -176,8 +186,9 @@ void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id];
down_write(&comp->sem);
comp->device[devcom->idx].data = NULL;
RCU_INIT_POINTER(comp->device[devcom->idx].data, NULL);
up_write(&comp->sem);
synchronize_rcu();
}
int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
@ -193,12 +204,15 @@ int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id];
down_write(&comp->sem);
for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
if (i != devcom->idx && comp->device[i].data) {
err = comp->handler(event, comp->device[i].data,
event_data);
for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) {
void *data = rcu_dereference_protected(comp->device[i].data,
lockdep_is_held(&comp->sem));
if (i != devcom->idx && data) {
err = comp->handler(event, data, event_data);
break;
}
}
up_write(&comp->sem);
return err;
@ -213,7 +227,7 @@ void mlx5_devcom_set_paired(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id];
WARN_ON(!rwsem_is_locked(&comp->sem));
comp->paired = paired;
WRITE_ONCE(comp->paired, paired);
}
bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
@ -222,7 +236,7 @@ bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
if (IS_ERR_OR_NULL(devcom))
return false;
return devcom->priv->components[id].paired;
return READ_ONCE(devcom->priv->components[id].paired);
}
void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
@ -236,7 +250,7 @@ void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id];
down_read(&comp->sem);
if (!comp->paired) {
if (!READ_ONCE(comp->paired)) {
up_read(&comp->sem);
return NULL;
}
@ -245,7 +259,29 @@ void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
if (i != devcom->idx)
break;
return comp->device[i].data;
return rcu_dereference_protected(comp->device[i].data, lockdep_is_held(&comp->sem));
}
void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id)
{
struct mlx5_devcom_component *comp;
int i;
if (IS_ERR_OR_NULL(devcom))
return NULL;
for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
if (i != devcom->idx)
break;
comp = &devcom->priv->components[id];
/* This can change concurrently, however 'data' pointer will remain
* valid for the duration of RCU read section.
*/
if (!READ_ONCE(comp->paired))
return NULL;
return rcu_dereference(comp->device[i].data);
}
void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,

View File

@ -41,6 +41,7 @@ bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
enum mlx5_devcom_components id);
void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id);
void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
enum mlx5_devcom_components id);

View File

@ -1049,7 +1049,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
dev->dm = mlx5_dm_create(dev);
if (IS_ERR(dev->dm))
mlx5_core_warn(dev, "Failed to init device memory%d\n", err);
mlx5_core_warn(dev, "Failed to init device memory %ld\n", PTR_ERR(dev->dm));
dev->tracer = mlx5_fw_tracer_create(dev);
dev->hv_vhca = mlx5_hv_vhca_create(dev);

View File

@ -15,6 +15,7 @@ int mlx5_irq_table_init(struct mlx5_core_dev *dev);
void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
int mlx5_irq_table_create(struct mlx5_core_dev *dev);
void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev);
int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table);
int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table);
struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev);

View File

@ -32,6 +32,7 @@ struct mlx5_irq {
struct mlx5_irq_pool *pool;
int refcount;
struct msi_map map;
u32 pool_index;
};
struct mlx5_irq_table {
@ -132,7 +133,7 @@ static void irq_release(struct mlx5_irq *irq)
struct cpu_rmap *rmap;
#endif
xa_erase(&pool->irqs, irq->map.index);
xa_erase(&pool->irqs, irq->pool_index);
/* free_irq requires that affinity_hint and rmap will be cleared before
* calling it. To satisfy this requirement, we call
* irq_cpu_rmap_remove() to remove the notifier
@ -276,11 +277,11 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
}
irq->pool = pool;
irq->refcount = 1;
irq->map.index = i;
err = xa_err(xa_store(&pool->irqs, irq->map.index, irq, GFP_KERNEL));
irq->pool_index = i;
err = xa_err(xa_store(&pool->irqs, irq->pool_index, irq, GFP_KERNEL));
if (err) {
mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
irq->map.index, err);
irq->pool_index, err);
goto err_xa;
}
return irq;
@ -567,7 +568,7 @@ int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
struct mlx5_irq *irq;
int i;
af_desc.is_managed = 1;
af_desc.is_managed = false;
for (i = 0; i < nirqs; i++) {
cpumask_set_cpu(cpus[i], &af_desc.mask);
irq = mlx5_irq_request(dev, i + 1, &af_desc, rmap);
@ -691,6 +692,24 @@ static void irq_pools_destroy(struct mlx5_irq_table *table)
irq_pool_free(table->pcif_pool);
}
static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool)
{
struct mlx5_irq *irq;
unsigned long index;
xa_for_each(&pool->irqs, index, irq)
free_irq(irq->map.virq, &irq->nh);
}
static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table)
{
if (table->sf_ctrl_pool) {
mlx5_irq_pool_free_irqs(table->sf_comp_pool);
mlx5_irq_pool_free_irqs(table->sf_ctrl_pool);
}
mlx5_irq_pool_free_irqs(table->pcif_pool);
}
/* irq_table API */
int mlx5_irq_table_init(struct mlx5_core_dev *dev)
@ -774,6 +793,17 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
pci_free_irq_vectors(dev->pdev);
}
void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev)
{
struct mlx5_irq_table *table = dev->priv.irq_table;
if (mlx5_core_is_sf(dev))
return;
mlx5_irq_pools_free_irqs(table);
pci_free_irq_vectors(dev->pdev);
}
int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
{
if (table->sf_comp_pool)

View File

@ -117,6 +117,8 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id);
caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols);
caps->sw_format_ver = MLX5_CAP_GEN(mdev, steering_format_version);
caps->roce_caps.fl_rc_qp_when_roce_disabled =
MLX5_CAP_GEN(mdev, fl_rc_qp_when_roce_disabled);
if (MLX5_CAP_GEN(mdev, roce)) {
err = dr_cmd_query_nic_vport_roce_en(mdev, 0, &roce_en);
@ -124,7 +126,7 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
return err;
caps->roce_caps.roce_en = roce_en;
caps->roce_caps.fl_rc_qp_when_roce_disabled =
caps->roce_caps.fl_rc_qp_when_roce_disabled |=
MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_disabled);
caps->roce_caps.fl_rc_qp_when_roce_enabled =
MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_enabled);

View File

@ -15,7 +15,8 @@ static u32 dr_ste_crc32_calc(const void *input_data, size_t length)
{
u32 crc = crc32(0, input_data, length);
return (__force u32)htonl(crc);
return (__force u32)((crc >> 24) & 0xff) | ((crc << 8) & 0xff0000) |
((crc >> 8) & 0xff00) | ((crc << 24) & 0xff000000);
}
bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps)

View File

@ -1039,6 +1039,16 @@ static int lan966x_reset_switch(struct lan966x *lan966x)
reset_control_reset(switch_reset);
/* Don't reinitialize the switch core, if it is already initialized. In
* case it is initialized twice, some pointers inside the queue system
* in HW will get corrupted and then after a while the queue system gets
* full and no traffic is passing through the switch. The issue is seen
* when loading and unloading the driver and sending traffic through the
* switch.
*/
if (lan_rd(lan966x, SYS_RESET_CFG) & SYS_RESET_CFG_CORE_ENA)
return 0;
lan_wr(SYS_RESET_CFG_CORE_ENA_SET(0), lan966x, SYS_RESET_CFG);
lan_wr(SYS_RAM_INIT_RAM_INIT_SET(1), lan966x, SYS_RAM_INIT);
ret = readx_poll_timeout(lan966x_ram_init, lan966x,

View File

@ -6138,6 +6138,7 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
return 0;
out_error:
nv_mgmt_release_sema(dev);
if (phystate_orig)
writel(phystate|NVREG_ADAPTCTL_RUNNING, base + NvRegAdapterControl);
out_freering:

View File

@ -616,10 +616,10 @@ struct rtl8169_private {
struct work_struct work;
} wk;
spinlock_t config25_lock;
spinlock_t mac_ocp_lock;
raw_spinlock_t config25_lock;
raw_spinlock_t mac_ocp_lock;
spinlock_t cfg9346_usage_lock;
raw_spinlock_t cfg9346_usage_lock;
int cfg9346_usage_count;
unsigned supports_gmii:1;
@ -671,20 +671,20 @@ static void rtl_lock_config_regs(struct rtl8169_private *tp)
{
unsigned long flags;
spin_lock_irqsave(&tp->cfg9346_usage_lock, flags);
raw_spin_lock_irqsave(&tp->cfg9346_usage_lock, flags);
if (!--tp->cfg9346_usage_count)
RTL_W8(tp, Cfg9346, Cfg9346_Lock);
spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags);
raw_spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags);
}
static void rtl_unlock_config_regs(struct rtl8169_private *tp)
{
unsigned long flags;
spin_lock_irqsave(&tp->cfg9346_usage_lock, flags);
raw_spin_lock_irqsave(&tp->cfg9346_usage_lock, flags);
if (!tp->cfg9346_usage_count++)
RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags);
raw_spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags);
}
static void rtl_pci_commit(struct rtl8169_private *tp)
@ -698,10 +698,10 @@ static void rtl_mod_config2(struct rtl8169_private *tp, u8 clear, u8 set)
unsigned long flags;
u8 val;
spin_lock_irqsave(&tp->config25_lock, flags);
raw_spin_lock_irqsave(&tp->config25_lock, flags);
val = RTL_R8(tp, Config2);
RTL_W8(tp, Config2, (val & ~clear) | set);
spin_unlock_irqrestore(&tp->config25_lock, flags);
raw_spin_unlock_irqrestore(&tp->config25_lock, flags);
}
static void rtl_mod_config5(struct rtl8169_private *tp, u8 clear, u8 set)
@ -709,10 +709,10 @@ static void rtl_mod_config5(struct rtl8169_private *tp, u8 clear, u8 set)
unsigned long flags;
u8 val;
spin_lock_irqsave(&tp->config25_lock, flags);
raw_spin_lock_irqsave(&tp->config25_lock, flags);
val = RTL_R8(tp, Config5);
RTL_W8(tp, Config5, (val & ~clear) | set);
spin_unlock_irqrestore(&tp->config25_lock, flags);
raw_spin_unlock_irqrestore(&tp->config25_lock, flags);
}
static bool rtl_is_8125(struct rtl8169_private *tp)
@ -899,9 +899,9 @@ static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
{
unsigned long flags;
spin_lock_irqsave(&tp->mac_ocp_lock, flags);
raw_spin_lock_irqsave(&tp->mac_ocp_lock, flags);
__r8168_mac_ocp_write(tp, reg, data);
spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
raw_spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
}
static u16 __r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg)
@ -919,9 +919,9 @@ static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg)
unsigned long flags;
u16 val;
spin_lock_irqsave(&tp->mac_ocp_lock, flags);
raw_spin_lock_irqsave(&tp->mac_ocp_lock, flags);
val = __r8168_mac_ocp_read(tp, reg);
spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
raw_spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
return val;
}
@ -932,10 +932,10 @@ static void r8168_mac_ocp_modify(struct rtl8169_private *tp, u32 reg, u16 mask,
unsigned long flags;
u16 data;
spin_lock_irqsave(&tp->mac_ocp_lock, flags);
raw_spin_lock_irqsave(&tp->mac_ocp_lock, flags);
data = __r8168_mac_ocp_read(tp, reg);
__r8168_mac_ocp_write(tp, reg, (data & ~mask) | set);
spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
raw_spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
}
/* Work around a hw issue with RTL8168g PHY, the quirk disables
@ -1420,14 +1420,14 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
r8168_mac_ocp_modify(tp, 0xc0b6, BIT(0), 0);
}
spin_lock_irqsave(&tp->config25_lock, flags);
raw_spin_lock_irqsave(&tp->config25_lock, flags);
for (i = 0; i < tmp; i++) {
options = RTL_R8(tp, cfg[i].reg) & ~cfg[i].mask;
if (wolopts & cfg[i].opt)
options |= cfg[i].mask;
RTL_W8(tp, cfg[i].reg, options);
}
spin_unlock_irqrestore(&tp->config25_lock, flags);
raw_spin_unlock_irqrestore(&tp->config25_lock, flags);
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
@ -5179,9 +5179,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
tp->eee_adv = -1;
tp->ocp_base = OCP_STD_PHY_BASE;
spin_lock_init(&tp->cfg9346_usage_lock);
spin_lock_init(&tp->config25_lock);
spin_lock_init(&tp->mac_ocp_lock);
raw_spin_lock_init(&tp->cfg9346_usage_lock);
raw_spin_lock_init(&tp->config25_lock);
raw_spin_lock_init(&tp->mac_ocp_lock);
dev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev,
struct pcpu_sw_netstats);

View File

@ -171,9 +171,14 @@ static int efx_devlink_info_nvram_partition(struct efx_nic *efx,
rc = efx_mcdi_nvram_metadata(efx, partition_type, NULL, version, NULL,
0);
/* If the partition does not exist, that is not an error. */
if (rc == -ENOENT)
return 0;
if (rc) {
netif_err(efx, drv, efx->net_dev, "mcdi nvram %s: failed\n",
version_name);
netif_err(efx, drv, efx->net_dev, "mcdi nvram %s: failed (rc=%d)\n",
version_name, rc);
return rc;
}
@ -187,36 +192,33 @@ static int efx_devlink_info_nvram_partition(struct efx_nic *efx,
static int efx_devlink_info_stored_versions(struct efx_nic *efx,
struct devlink_info_req *req)
{
int rc;
int err;
rc = efx_devlink_info_nvram_partition(efx, req,
NVRAM_PARTITION_TYPE_BUNDLE,
DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID);
if (rc)
return rc;
/* We do not care here about the specific error but just if an error
* happened. The specific error will be reported inside the call
* through system messages, and if any error happened in any call
* below, we report it through extack.
*/
err = efx_devlink_info_nvram_partition(efx, req,
NVRAM_PARTITION_TYPE_BUNDLE,
DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID);
rc = efx_devlink_info_nvram_partition(efx, req,
NVRAM_PARTITION_TYPE_MC_FIRMWARE,
DEVLINK_INFO_VERSION_GENERIC_FW_MGMT);
if (rc)
return rc;
err |= efx_devlink_info_nvram_partition(efx, req,
NVRAM_PARTITION_TYPE_MC_FIRMWARE,
DEVLINK_INFO_VERSION_GENERIC_FW_MGMT);
rc = efx_devlink_info_nvram_partition(efx, req,
NVRAM_PARTITION_TYPE_SUC_FIRMWARE,
EFX_DEVLINK_INFO_VERSION_FW_MGMT_SUC);
if (rc)
return rc;
err |= efx_devlink_info_nvram_partition(efx, req,
NVRAM_PARTITION_TYPE_SUC_FIRMWARE,
EFX_DEVLINK_INFO_VERSION_FW_MGMT_SUC);
rc = efx_devlink_info_nvram_partition(efx, req,
NVRAM_PARTITION_TYPE_EXPANSION_ROM,
EFX_DEVLINK_INFO_VERSION_FW_EXPROM);
if (rc)
return rc;
err |= efx_devlink_info_nvram_partition(efx, req,
NVRAM_PARTITION_TYPE_EXPANSION_ROM,
EFX_DEVLINK_INFO_VERSION_FW_EXPROM);
rc = efx_devlink_info_nvram_partition(efx, req,
NVRAM_PARTITION_TYPE_EXPANSION_UEFI,
EFX_DEVLINK_INFO_VERSION_FW_UEFI);
return rc;
err |= efx_devlink_info_nvram_partition(efx, req,
NVRAM_PARTITION_TYPE_EXPANSION_UEFI,
EFX_DEVLINK_INFO_VERSION_FW_UEFI);
return err;
}
#define EFX_VER_FLAG(_f) \
@ -587,27 +589,20 @@ static int efx_devlink_info_get(struct devlink *devlink,
{
struct efx_devlink *devlink_private = devlink_priv(devlink);
struct efx_nic *efx = devlink_private->efx;
int rc;
int err;
/* Several different MCDI commands are used. We report first error
* through extack returning at that point. Specific error
* information via system messages.
/* Several different MCDI commands are used. We report if errors
* happened through extack. Specific error information via system
* messages inside the calls.
*/
rc = efx_devlink_info_board_cfg(efx, req);
if (rc) {
NL_SET_ERR_MSG_MOD(extack, "Getting board info failed");
return rc;
}
rc = efx_devlink_info_stored_versions(efx, req);
if (rc) {
NL_SET_ERR_MSG_MOD(extack, "Getting stored versions failed");
return rc;
}
rc = efx_devlink_info_running_versions(efx, req);
if (rc) {
NL_SET_ERR_MSG_MOD(extack, "Getting running versions failed");
return rc;
}
err = efx_devlink_info_board_cfg(efx, req);
err |= efx_devlink_info_stored_versions(efx, req);
err |= efx_devlink_info_running_versions(efx, req);
if (err)
NL_SET_ERR_MSG_MOD(extack, "Errors when getting device info. Check system messages");
return 0;
}

View File

@ -179,6 +179,7 @@ enum rgmii_clock_delay {
#define VSC8502_RGMII_CNTL 20
#define VSC8502_RGMII_RX_DELAY_MASK 0x0070
#define VSC8502_RGMII_TX_DELAY_MASK 0x0007
#define VSC8502_RGMII_RX_CLK_DISABLE 0x0800
#define MSCC_PHY_WOL_LOWER_MAC_ADDR 21
#define MSCC_PHY_WOL_MID_MAC_ADDR 22
@ -276,6 +277,7 @@ enum rgmii_clock_delay {
/* Microsemi PHY ID's
* Code assumes lowest nibble is 0
*/
#define PHY_ID_VSC8501 0x00070530
#define PHY_ID_VSC8502 0x00070630
#define PHY_ID_VSC8504 0x000704c0
#define PHY_ID_VSC8514 0x00070670

View File

@ -519,16 +519,27 @@ out_unlock:
* * 2.0 ns (which causes the data to be sampled at exactly half way between
* clock transitions at 1000 Mbps) if delays should be enabled
*/
static int vsc85xx_rgmii_set_skews(struct phy_device *phydev, u32 rgmii_cntl,
u16 rgmii_rx_delay_mask,
u16 rgmii_tx_delay_mask)
static int vsc85xx_update_rgmii_cntl(struct phy_device *phydev, u32 rgmii_cntl,
u16 rgmii_rx_delay_mask,
u16 rgmii_tx_delay_mask)
{
u16 rgmii_rx_delay_pos = ffs(rgmii_rx_delay_mask) - 1;
u16 rgmii_tx_delay_pos = ffs(rgmii_tx_delay_mask) - 1;
u16 reg_val = 0;
int rc;
u16 mask = 0;
int rc = 0;
mutex_lock(&phydev->lock);
/* For traffic to pass, the VSC8502 family needs the RX_CLK disable bit
* to be unset for all PHY modes, so do that as part of the paged
* register modification.
* For some family members (like VSC8530/31/40/41) this bit is reserved
* and read-only, and the RX clock is enabled by default.
*/
if (rgmii_cntl == VSC8502_RGMII_CNTL)
mask |= VSC8502_RGMII_RX_CLK_DISABLE;
if (phy_interface_is_rgmii(phydev))
mask |= rgmii_rx_delay_mask | rgmii_tx_delay_mask;
if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID ||
phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
@ -537,31 +548,20 @@ static int vsc85xx_rgmii_set_skews(struct phy_device *phydev, u32 rgmii_cntl,
phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
reg_val |= RGMII_CLK_DELAY_2_0_NS << rgmii_tx_delay_pos;
rc = phy_modify_paged(phydev, MSCC_PHY_PAGE_EXTENDED_2,
rgmii_cntl,
rgmii_rx_delay_mask | rgmii_tx_delay_mask,
reg_val);
mutex_unlock(&phydev->lock);
if (mask)
rc = phy_modify_paged(phydev, MSCC_PHY_PAGE_EXTENDED_2,
rgmii_cntl, mask, reg_val);
return rc;
}
static int vsc85xx_default_config(struct phy_device *phydev)
{
int rc;
phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
if (phy_interface_mode_is_rgmii(phydev->interface)) {
rc = vsc85xx_rgmii_set_skews(phydev, VSC8502_RGMII_CNTL,
VSC8502_RGMII_RX_DELAY_MASK,
VSC8502_RGMII_TX_DELAY_MASK);
if (rc)
return rc;
}
return 0;
return vsc85xx_update_rgmii_cntl(phydev, VSC8502_RGMII_CNTL,
VSC8502_RGMII_RX_DELAY_MASK,
VSC8502_RGMII_TX_DELAY_MASK);
}
static int vsc85xx_get_tunable(struct phy_device *phydev,
@ -1758,13 +1758,11 @@ static int vsc8584_config_init(struct phy_device *phydev)
if (ret)
return ret;
if (phy_interface_is_rgmii(phydev)) {
ret = vsc85xx_rgmii_set_skews(phydev, VSC8572_RGMII_CNTL,
VSC8572_RGMII_RX_DELAY_MASK,
VSC8572_RGMII_TX_DELAY_MASK);
if (ret)
return ret;
}
ret = vsc85xx_update_rgmii_cntl(phydev, VSC8572_RGMII_CNTL,
VSC8572_RGMII_RX_DELAY_MASK,
VSC8572_RGMII_TX_DELAY_MASK);
if (ret)
return ret;
ret = genphy_soft_reset(phydev);
if (ret)
@ -2316,6 +2314,30 @@ static int vsc85xx_probe(struct phy_device *phydev)
/* Microsemi VSC85xx PHYs */
static struct phy_driver vsc85xx_driver[] = {
{
.phy_id = PHY_ID_VSC8501,
.name = "Microsemi GE VSC8501 SyncE",
.phy_id_mask = 0xfffffff0,
/* PHY_BASIC_FEATURES */
.soft_reset = &genphy_soft_reset,
.config_init = &vsc85xx_config_init,
.config_aneg = &vsc85xx_config_aneg,
.read_status = &vsc85xx_read_status,
.handle_interrupt = vsc85xx_handle_interrupt,
.config_intr = &vsc85xx_config_intr,
.suspend = &genphy_suspend,
.resume = &genphy_resume,
.probe = &vsc85xx_probe,
.set_wol = &vsc85xx_wol_set,
.get_wol = &vsc85xx_wol_get,
.get_tunable = &vsc85xx_get_tunable,
.set_tunable = &vsc85xx_set_tunable,
.read_page = &vsc85xx_phy_read_page,
.write_page = &vsc85xx_phy_write_page,
.get_sset_count = &vsc85xx_get_sset_count,
.get_strings = &vsc85xx_get_strings,
.get_stats = &vsc85xx_get_stats,
},
{
.phy_id = PHY_ID_VSC8502,
.name = "Microsemi GE VSC8502 SyncE",
@ -2656,6 +2678,8 @@ static struct phy_driver vsc85xx_driver[] = {
module_phy_driver(vsc85xx_driver);
static struct mdio_device_id __maybe_unused vsc85xx_tbl[] = {
{ PHY_ID_VSC8501, 0xfffffff0, },
{ PHY_ID_VSC8502, 0xfffffff0, },
{ PHY_ID_VSC8504, 0xfffffff0, },
{ PHY_ID_VSC8514, 0xfffffff0, },
{ PHY_ID_VSC8530, 0xfffffff0, },

View File

@ -1629,6 +1629,7 @@ static int team_init(struct net_device *dev)
team->dev = dev;
team_set_no_mode(team);
team->notifier_ctx = false;
team->pcpu_stats = netdev_alloc_pcpu_stats(struct team_pcpu_stats);
if (!team->pcpu_stats)
@ -3022,7 +3023,11 @@ static int team_device_event(struct notifier_block *unused,
team_del_slave(port->team->dev, dev);
break;
case NETDEV_FEAT_CHANGE:
team_compute_features(port->team);
if (!port->team->notifier_ctx) {
port->team->notifier_ctx = true;
team_compute_features(port->team);
port->team->notifier_ctx = false;
}
break;
case NETDEV_PRECHANGEMTU:
/* Forbid to change mtu of underlaying device */

View File

@ -181,9 +181,12 @@ static u32 cdc_ncm_check_tx_max(struct usbnet *dev, u32 new_tx)
else
min = ctx->max_datagram_size + ctx->max_ndp_size + sizeof(struct usb_cdc_ncm_nth32);
max = min_t(u32, CDC_NCM_NTB_MAX_SIZE_TX, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize));
if (max == 0)
if (le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize) == 0)
max = CDC_NCM_NTB_MAX_SIZE_TX; /* dwNtbOutMaxSize not set */
else
max = clamp_t(u32, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize),
USB_CDC_NCM_NTB_MIN_OUT_SIZE,
CDC_NCM_NTB_MAX_SIZE_TX);
/* some devices set dwNtbOutMaxSize too low for the above default */
min = min(min, max);
@ -1244,6 +1247,9 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
* further.
*/
if (skb_out == NULL) {
/* If even the smallest allocation fails, abort. */
if (ctx->tx_curr_size == USB_CDC_NCM_NTB_MIN_OUT_SIZE)
goto alloc_failed;
ctx->tx_low_mem_max_cnt = min(ctx->tx_low_mem_max_cnt + 1,
(unsigned)CDC_NCM_LOW_MEM_MAX_CNT);
ctx->tx_low_mem_val = ctx->tx_low_mem_max_cnt;
@ -1262,13 +1268,8 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
skb_out = alloc_skb(ctx->tx_curr_size, GFP_ATOMIC);
/* No allocation possible so we will abort */
if (skb_out == NULL) {
if (skb != NULL) {
dev_kfree_skb_any(skb);
dev->net->stats.tx_dropped++;
}
goto exit_no_skb;
}
if (!skb_out)
goto alloc_failed;
ctx->tx_low_mem_val--;
}
if (ctx->is_ndp16) {
@ -1461,6 +1462,11 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
return skb_out;
alloc_failed:
if (skb) {
dev_kfree_skb_any(skb);
dev->net->stats.tx_dropped++;
}
exit_no_skb:
/* Start timer, if there is a remaining non-empty skb */
if (ctx->tx_curr_skb != NULL && n > 0)

View File

@ -208,6 +208,7 @@ struct team {
bool queue_override_enabled;
struct list_head *qom_lists; /* array of queue override mapping lists */
bool port_mtu_change_allowed;
bool notifier_ctx;
struct {
unsigned int count;
unsigned int interval; /* in ms */

View File

@ -1705,7 +1705,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 rc[0x1];
u8 uar_4k[0x1];
u8 reserved_at_241[0x9];
u8 reserved_at_241[0x7];
u8 fl_rc_qp_when_roce_disabled[0x1];
u8 regexp_params[0x1];
u8 uar_sz[0x6];
u8 port_selection_cap[0x1];
u8 reserved_at_248[0x1];

View File

@ -1587,6 +1587,16 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
to->l4_hash = from->l4_hash;
};
static inline int skb_cmp_decrypted(const struct sk_buff *skb1,
const struct sk_buff *skb2)
{
#ifdef CONFIG_TLS_DEVICE
return skb2->decrypted - skb1->decrypted;
#else
return 0;
#endif
}
static inline void skb_copy_decrypted(struct sk_buff *to,
const struct sk_buff *from)
{

View File

@ -71,7 +71,6 @@ struct sk_psock_link {
};
struct sk_psock_work_state {
struct sk_buff *skb;
u32 len;
u32 off;
};
@ -105,7 +104,7 @@ struct sk_psock {
struct proto *sk_proto;
struct mutex work_mutex;
struct sk_psock_work_state work_state;
struct work_struct work;
struct delayed_work work;
struct rcu_work rwork;
};

View File

@ -1327,7 +1327,7 @@ int hci_le_create_cis(struct hci_conn *conn);
struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
u8 role);
int hci_conn_del(struct hci_conn *conn);
void hci_conn_del(struct hci_conn *conn);
void hci_conn_hash_flush(struct hci_dev *hdev);
void hci_conn_check_pending(struct hci_dev *hdev);

View File

@ -219,6 +219,7 @@ struct bonding {
struct bond_up_slave __rcu *usable_slaves;
struct bond_up_slave __rcu *all_slaves;
bool force_primary;
bool notifier_ctx;
s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
int (*recv_probe)(const struct sk_buff *, struct bonding *,
struct slave *);

View File

@ -76,6 +76,7 @@ struct ipcm_cookie {
__be32 addr;
int oif;
struct ip_options_rcu *opt;
__u8 protocol;
__u8 ttl;
__s16 tos;
char priority;
@ -96,6 +97,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
ipcm->sockc.tsflags = inet->sk.sk_tsflags;
ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
ipcm->addr = inet->inet_saddr;
ipcm->protocol = inet->inet_num;
}
#define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))

View File

@ -399,22 +399,4 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid)
page_pool_update_nid(pool, new_nid);
}
static inline void page_pool_ring_lock(struct page_pool *pool)
__acquires(&pool->ring.producer_lock)
{
if (in_softirq())
spin_lock(&pool->ring.producer_lock);
else
spin_lock_bh(&pool->ring.producer_lock);
}
static inline void page_pool_ring_unlock(struct page_pool *pool)
__releases(&pool->ring.producer_lock)
{
if (in_softirq())
spin_unlock(&pool->ring.producer_lock);
else
spin_unlock_bh(&pool->ring.producer_lock);
}
#endif /* _NET_PAGE_POOL_H */

View File

@ -1468,6 +1468,8 @@ static inline void tcp_adjust_rcv_ssthresh(struct sock *sk)
}
void tcp_cleanup_rbuf(struct sock *sk, int copied);
void __tcp_cleanup_rbuf(struct sock *sk, int copied);
/* We provision sk_rcvbuf around 200% of sk_rcvlowat.
* If 87.5 % (7/8) of the space has been consumed, we want to override
@ -2324,6 +2326,14 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
#endif /* CONFIG_BPF_SYSCALL */
#ifdef CONFIG_INET
void tcp_eat_skb(struct sock *sk, struct sk_buff *skb);
#else
static inline void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
{
}
#endif
int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
struct sk_msg *msg, u32 bytes, int flags);
#endif /* CONFIG_NET_SOCK_MSG */

View File

@ -126,6 +126,7 @@ struct tls_strparser {
u32 mark : 8;
u32 stopped : 1;
u32 copy_mode : 1;
u32 mixed_decrypted : 1;
u32 msg_ready : 1;
struct strp_msg stm;

View File

@ -163,6 +163,7 @@ struct in_addr {
#define IP_MULTICAST_ALL 49
#define IP_UNICAST_IF 50
#define IP_LOCAL_PORT_RANGE 51
#define IP_PROTOCOL 52
#define MCAST_EXCLUDE 0
#define MCAST_INCLUDE 1

View File

@ -1215,7 +1215,7 @@ static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value
ret = htab_lock_bucket(htab, b, hash, &flags);
if (ret)
return ret;
goto err_lock_bucket;
l_old = lookup_elem_raw(head, hash, key, key_size);
@ -1236,6 +1236,7 @@ static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value
err:
htab_unlock_bucket(htab, b, hash, flags);
err_lock_bucket:
if (ret)
htab_lru_push_free(htab, l_new);
else if (l_old)
@ -1338,7 +1339,7 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
ret = htab_lock_bucket(htab, b, hash, &flags);
if (ret)
return ret;
goto err_lock_bucket;
l_old = lookup_elem_raw(head, hash, key, key_size);
@ -1361,6 +1362,7 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
ret = 0;
err:
htab_unlock_bucket(htab, b, hash, flags);
err_lock_bucket:
if (l_new)
bpf_lru_push_free(&htab->lru, &l_new->lru_node);
return ret;

View File

@ -859,4 +859,4 @@ static int __init bpf_offload_init(void)
return rhashtable_init(&offdevs, &offdevs_params);
}
late_initcall(bpf_offload_init);
core_initcall(bpf_offload_init);

View File

@ -17432,7 +17432,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
insn->dst_reg,
shift);
insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
(1ULL << size * 8) - 1);
}
}

View File

@ -1083,8 +1083,28 @@ static void hci_conn_unlink(struct hci_conn *conn)
if (!conn->parent) {
struct hci_link *link, *t;
list_for_each_entry_safe(link, t, &conn->link_list, list)
hci_conn_unlink(link->conn);
list_for_each_entry_safe(link, t, &conn->link_list, list) {
struct hci_conn *child = link->conn;
hci_conn_unlink(child);
/* If hdev is down it means
* hci_dev_close_sync/hci_conn_hash_flush is in progress
* and links don't need to be cleanup as all connections
* would be cleanup.
*/
if (!test_bit(HCI_UP, &hdev->flags))
continue;
/* Due to race, SCO connection might be not established
* yet at this point. Delete it now, otherwise it is
* possible for it to be stuck and can't be deleted.
*/
if ((child->type == SCO_LINK ||
child->type == ESCO_LINK) &&
child->handle == HCI_CONN_HANDLE_UNSET)
hci_conn_del(child);
}
return;
}
@ -1092,35 +1112,30 @@ static void hci_conn_unlink(struct hci_conn *conn)
if (!conn->link)
return;
hci_conn_put(conn->parent);
conn->parent = NULL;
list_del_rcu(&conn->link->list);
synchronize_rcu();
hci_conn_drop(conn->parent);
hci_conn_put(conn->parent);
conn->parent = NULL;
kfree(conn->link);
conn->link = NULL;
/* Due to race, SCO connection might be not established
* yet at this point. Delete it now, otherwise it is
* possible for it to be stuck and can't be deleted.
*/
if (conn->handle == HCI_CONN_HANDLE_UNSET)
hci_conn_del(conn);
}
int hci_conn_del(struct hci_conn *conn)
void hci_conn_del(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
BT_DBG("%s hcon %p handle %d", hdev->name, conn, conn->handle);
hci_conn_unlink(conn);
cancel_delayed_work_sync(&conn->disc_work);
cancel_delayed_work_sync(&conn->auto_accept_work);
cancel_delayed_work_sync(&conn->idle_work);
if (conn->type == ACL_LINK) {
hci_conn_unlink(conn);
/* Unacked frames */
hdev->acl_cnt += conn->sent;
} else if (conn->type == LE_LINK) {
@ -1131,13 +1146,6 @@ int hci_conn_del(struct hci_conn *conn)
else
hdev->acl_cnt += conn->sent;
} else {
struct hci_conn *acl = conn->parent;
if (acl) {
hci_conn_unlink(conn);
hci_conn_drop(acl);
}
/* Unacked ISO frames */
if (conn->type == ISO_LINK) {
if (hdev->iso_pkts)
@ -1160,8 +1168,6 @@ int hci_conn_del(struct hci_conn *conn)
* rest of hci_conn_del.
*/
hci_conn_cleanup(conn);
return 0;
}
struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type)
@ -2462,22 +2468,21 @@ timer:
/* Drop all connection on the device */
void hci_conn_hash_flush(struct hci_dev *hdev)
{
struct hci_conn_hash *h = &hdev->conn_hash;
struct hci_conn *c, *n;
struct list_head *head = &hdev->conn_hash.list;
struct hci_conn *conn;
BT_DBG("hdev %s", hdev->name);
list_for_each_entry_safe(c, n, &h->list, list) {
c->state = BT_CLOSED;
hci_disconn_cfm(c, HCI_ERROR_LOCAL_HOST_TERM);
/* Unlink before deleting otherwise it is possible that
* hci_conn_del removes the link which may cause the list to
* contain items already freed.
*/
hci_conn_unlink(c);
hci_conn_del(c);
/* We should not traverse the list here, because hci_conn_del
* can remove extra links, which may cause the list traversal
* to hit items that have already been released.
*/
while ((conn = list_first_entry_or_null(head,
struct hci_conn,
list)) != NULL) {
conn->state = BT_CLOSED;
hci_disconn_cfm(conn, HCI_ERROR_LOCAL_HOST_TERM);
hci_conn_del(conn);
}
}

View File

@ -134,6 +134,29 @@ EXPORT_SYMBOL(page_pool_ethtool_stats_get);
#define recycle_stat_add(pool, __stat, val)
#endif
static bool page_pool_producer_lock(struct page_pool *pool)
__acquires(&pool->ring.producer_lock)
{
bool in_softirq = in_softirq();
if (in_softirq)
spin_lock(&pool->ring.producer_lock);
else
spin_lock_bh(&pool->ring.producer_lock);
return in_softirq;
}
static void page_pool_producer_unlock(struct page_pool *pool,
bool in_softirq)
__releases(&pool->ring.producer_lock)
{
if (in_softirq)
spin_unlock(&pool->ring.producer_lock);
else
spin_unlock_bh(&pool->ring.producer_lock);
}
static int page_pool_init(struct page_pool *pool,
const struct page_pool_params *params)
{
@ -617,6 +640,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count)
{
int i, bulk_len = 0;
bool in_softirq;
for (i = 0; i < count; i++) {
struct page *page = virt_to_head_page(data[i]);
@ -635,7 +659,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
return;
/* Bulk producer into ptr_ring page_pool cache */
page_pool_ring_lock(pool);
in_softirq = page_pool_producer_lock(pool);
for (i = 0; i < bulk_len; i++) {
if (__ptr_ring_produce(&pool->ring, data[i])) {
/* ring full */
@ -644,7 +668,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
}
}
recycle_stat_add(pool, ring, i);
page_pool_ring_unlock(pool);
page_pool_producer_unlock(pool, in_softirq);
/* Hopefully all pages was return into ptr_ring */
if (likely(i == bulk_len))

View File

@ -5206,8 +5206,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
} else {
skb = skb_clone(orig_skb, GFP_ATOMIC);
if (skb_orphan_frags_rx(skb, GFP_ATOMIC))
if (skb_orphan_frags_rx(skb, GFP_ATOMIC)) {
kfree_skb(skb);
return;
}
}
if (!skb)
return;

View File

@ -481,8 +481,6 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
msg_rx = sk_psock_peek_msg(psock);
}
out:
if (psock->work_state.skb && copied > 0)
schedule_work(&psock->work);
return copied;
}
EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
@ -624,42 +622,33 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
static void sk_psock_skb_state(struct sk_psock *psock,
struct sk_psock_work_state *state,
struct sk_buff *skb,
int len, int off)
{
spin_lock_bh(&psock->ingress_lock);
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
state->skb = skb;
state->len = len;
state->off = off;
} else {
sock_drop(psock->sk, skb);
}
spin_unlock_bh(&psock->ingress_lock);
}
static void sk_psock_backlog(struct work_struct *work)
{
struct sk_psock *psock = container_of(work, struct sk_psock, work);
struct delayed_work *dwork = to_delayed_work(work);
struct sk_psock *psock = container_of(dwork, struct sk_psock, work);
struct sk_psock_work_state *state = &psock->work_state;
struct sk_buff *skb = NULL;
u32 len = 0, off = 0;
bool ingress;
u32 len, off;
int ret;
mutex_lock(&psock->work_mutex);
if (unlikely(state->skb)) {
spin_lock_bh(&psock->ingress_lock);
skb = state->skb;
if (unlikely(state->len)) {
len = state->len;
off = state->off;
state->skb = NULL;
spin_unlock_bh(&psock->ingress_lock);
}
if (skb)
goto start;
while ((skb = skb_dequeue(&psock->ingress_skb))) {
while ((skb = skb_peek(&psock->ingress_skb))) {
len = skb->len;
off = 0;
if (skb_bpf_strparser(skb)) {
@ -668,7 +657,6 @@ static void sk_psock_backlog(struct work_struct *work)
off = stm->offset;
len = stm->full_len;
}
start:
ingress = skb_bpf_ingress(skb);
skb_bpf_redirect_clear(skb);
do {
@ -678,22 +666,28 @@ start:
len, ingress);
if (ret <= 0) {
if (ret == -EAGAIN) {
sk_psock_skb_state(psock, state, skb,
len, off);
sk_psock_skb_state(psock, state, len, off);
/* Delay slightly to prioritize any
* other work that might be here.
*/
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
schedule_delayed_work(&psock->work, 1);
goto end;
}
/* Hard errors break pipe and stop xmit. */
sk_psock_report_error(psock, ret ? -ret : EPIPE);
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
sock_drop(psock->sk, skb);
goto end;
}
off += ret;
len -= ret;
} while (len);
if (!ingress)
skb = skb_dequeue(&psock->ingress_skb);
if (!ingress) {
kfree_skb(skb);
}
}
end:
mutex_unlock(&psock->work_mutex);
@ -734,7 +728,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
INIT_LIST_HEAD(&psock->link);
spin_lock_init(&psock->link_lock);
INIT_WORK(&psock->work, sk_psock_backlog);
INIT_DELAYED_WORK(&psock->work, sk_psock_backlog);
mutex_init(&psock->work_mutex);
INIT_LIST_HEAD(&psock->ingress_msg);
spin_lock_init(&psock->ingress_lock);
@ -786,11 +780,6 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock)
skb_bpf_redirect_clear(skb);
sock_drop(psock->sk, skb);
}
kfree_skb(psock->work_state.skb);
/* We null the skb here to ensure that calls to sk_psock_backlog
* do not pick up the free'd skb.
*/
psock->work_state.skb = NULL;
__sk_psock_purge_ingress_msg(psock);
}
@ -809,7 +798,6 @@ void sk_psock_stop(struct sk_psock *psock)
spin_lock_bh(&psock->ingress_lock);
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
sk_psock_cork_free(psock);
__sk_psock_zap_ingress(psock);
spin_unlock_bh(&psock->ingress_lock);
}
@ -823,7 +811,8 @@ static void sk_psock_destroy(struct work_struct *work)
sk_psock_done_strp(psock);
cancel_work_sync(&psock->work);
cancel_delayed_work_sync(&psock->work);
__sk_psock_zap_ingress(psock);
mutex_destroy(&psock->work_mutex);
psock_progs_drop(&psock->progs);
@ -938,7 +927,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
}
skb_queue_tail(&psock_other->ingress_skb, skb);
schedule_work(&psock_other->work);
schedule_delayed_work(&psock_other->work, 0);
spin_unlock_bh(&psock_other->ingress_lock);
return 0;
}
@ -990,10 +979,8 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
err = -EIO;
sk_other = psock->sk;
if (sock_flag(sk_other, SOCK_DEAD) ||
!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
skb_bpf_redirect_clear(skb);
!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
goto out_free;
}
skb_bpf_set_ingress(skb);
@ -1018,22 +1005,23 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
spin_lock_bh(&psock->ingress_lock);
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
skb_queue_tail(&psock->ingress_skb, skb);
schedule_work(&psock->work);
schedule_delayed_work(&psock->work, 0);
err = 0;
}
spin_unlock_bh(&psock->ingress_lock);
if (err < 0) {
skb_bpf_redirect_clear(skb);
if (err < 0)
goto out_free;
}
}
break;
case __SK_REDIRECT:
tcp_eat_skb(psock->sk, skb);
err = sk_psock_skb_redirect(psock, skb);
break;
case __SK_DROP:
default:
out_free:
skb_bpf_redirect_clear(skb);
tcp_eat_skb(psock->sk, skb);
sock_drop(psock->sk, skb);
}
@ -1049,7 +1037,7 @@ static void sk_psock_write_space(struct sock *sk)
psock = sk_psock(sk);
if (likely(psock)) {
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
schedule_work(&psock->work);
schedule_delayed_work(&psock->work, 0);
write_space = psock->saved_write_space;
}
rcu_read_unlock();
@ -1078,8 +1066,7 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
skb_dst_drop(skb);
skb_bpf_redirect_clear(skb);
ret = bpf_prog_run_pin_on_cpu(prog, skb);
if (ret == SK_PASS)
skb_bpf_set_strparser(skb);
skb_bpf_set_strparser(skb);
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
skb->sk = NULL;
}
@ -1183,12 +1170,11 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
int ret = __SK_DROP;
int len = skb->len;
skb_get(skb);
rcu_read_lock();
psock = sk_psock(sk);
if (unlikely(!psock)) {
len = 0;
tcp_eat_skb(sk, skb);
sock_drop(sk, skb);
goto out;
}
@ -1212,12 +1198,21 @@ out:
static void sk_psock_verdict_data_ready(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
int copied;
trace_sk_data_ready(sk);
if (unlikely(!sock || !sock->ops || !sock->ops->read_skb))
return;
sock->ops->read_skb(sk, sk_psock_verdict_recv);
copied = sock->ops->read_skb(sk, sk_psock_verdict_recv);
if (copied >= 0) {
struct sk_psock *psock;
rcu_read_lock();
psock = sk_psock(sk);
psock->saved_data_ready(sk);
rcu_read_unlock();
}
}
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)

View File

@ -1644,9 +1644,10 @@ void sock_map_close(struct sock *sk, long timeout)
rcu_read_unlock();
sk_psock_stop(psock);
release_sock(sk);
cancel_work_sync(&psock->work);
cancel_delayed_work_sync(&psock->work);
sk_psock_put(sk, psock);
}
/* Make sure we do not recurse. This is a bug.
* Leak the socket instead of crashing on a stack overflow.
*/

View File

@ -102,7 +102,7 @@ struct handshake_req_alloc_test_param handshake_req_alloc_params[] = {
{
.desc = "handshake_req_alloc excessive privsize",
.proto = &handshake_req_alloc_proto_6,
.gfp = GFP_KERNEL,
.gfp = GFP_KERNEL | __GFP_NOWARN,
.expect_success = false,
},
{
@ -209,6 +209,7 @@ static void handshake_req_submit_test4(struct kunit *test)
{
struct handshake_req *req, *result;
struct socket *sock;
struct file *filp;
int err;
/* Arrange */
@ -218,9 +219,10 @@ static void handshake_req_submit_test4(struct kunit *test)
err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
&sock, 1);
KUNIT_ASSERT_EQ(test, err, 0);
sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
KUNIT_ASSERT_NOT_NULL(test, sock->sk);
sock->file = filp;
err = handshake_req_submit(sock, req, GFP_KERNEL);
KUNIT_ASSERT_EQ(test, err, 0);
@ -241,6 +243,7 @@ static void handshake_req_submit_test5(struct kunit *test)
struct handshake_req *req;
struct handshake_net *hn;
struct socket *sock;
struct file *filp;
struct net *net;
int saved, err;
@ -251,9 +254,10 @@ static void handshake_req_submit_test5(struct kunit *test)
err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
&sock, 1);
KUNIT_ASSERT_EQ(test, err, 0);
sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
KUNIT_ASSERT_NOT_NULL(test, sock->sk);
sock->file = filp;
net = sock_net(sock->sk);
hn = handshake_pernet(net);
@ -276,6 +280,7 @@ static void handshake_req_submit_test6(struct kunit *test)
{
struct handshake_req *req1, *req2;
struct socket *sock;
struct file *filp;
int err;
/* Arrange */
@ -287,9 +292,10 @@ static void handshake_req_submit_test6(struct kunit *test)
err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
&sock, 1);
KUNIT_ASSERT_EQ(test, err, 0);
sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
KUNIT_ASSERT_NOT_NULL(test, sock->sk);
sock->file = filp;
/* Act */
err = handshake_req_submit(sock, req1, GFP_KERNEL);
@ -307,6 +313,7 @@ static void handshake_req_cancel_test1(struct kunit *test)
{
struct handshake_req *req;
struct socket *sock;
struct file *filp;
bool result;
int err;
@ -318,8 +325,9 @@ static void handshake_req_cancel_test1(struct kunit *test)
&sock, 1);
KUNIT_ASSERT_EQ(test, err, 0);
sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
sock->file = filp;
err = handshake_req_submit(sock, req, GFP_KERNEL);
KUNIT_ASSERT_EQ(test, err, 0);
@ -340,6 +348,7 @@ static void handshake_req_cancel_test2(struct kunit *test)
struct handshake_req *req, *next;
struct handshake_net *hn;
struct socket *sock;
struct file *filp;
struct net *net;
bool result;
int err;
@ -352,8 +361,9 @@ static void handshake_req_cancel_test2(struct kunit *test)
&sock, 1);
KUNIT_ASSERT_EQ(test, err, 0);
sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
sock->file = filp;
err = handshake_req_submit(sock, req, GFP_KERNEL);
KUNIT_ASSERT_EQ(test, err, 0);
@ -380,6 +390,7 @@ static void handshake_req_cancel_test3(struct kunit *test)
struct handshake_req *req, *next;
struct handshake_net *hn;
struct socket *sock;
struct file *filp;
struct net *net;
bool result;
int err;
@ -392,8 +403,9 @@ static void handshake_req_cancel_test3(struct kunit *test)
&sock, 1);
KUNIT_ASSERT_EQ(test, err, 0);
sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
sock->file = filp;
err = handshake_req_submit(sock, req, GFP_KERNEL);
KUNIT_ASSERT_EQ(test, err, 0);
@ -436,6 +448,7 @@ static void handshake_req_destroy_test1(struct kunit *test)
{
struct handshake_req *req;
struct socket *sock;
struct file *filp;
int err;
/* Arrange */
@ -448,8 +461,9 @@ static void handshake_req_destroy_test1(struct kunit *test)
&sock, 1);
KUNIT_ASSERT_EQ(test, err, 0);
sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
sock->file = filp;
err = handshake_req_submit(sock, req, GFP_KERNEL);
KUNIT_ASSERT_EQ(test, err, 0);

View File

@ -317,7 +317,14 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
ipc->tos = val;
ipc->priority = rt_tos2priority(ipc->tos);
break;
case IP_PROTOCOL:
if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
return -EINVAL;
val = *(int *)CMSG_DATA(cmsg);
if (val < 1 || val > 255)
return -EINVAL;
ipc->protocol = val;
break;
default:
return -EINVAL;
}
@ -1761,6 +1768,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_LOCAL_PORT_RANGE:
val = inet->local_port_range.hi << 16 | inet->local_port_range.lo;
break;
case IP_PROTOCOL:
val = inet_sk(sk)->inet_num;
break;
default:
sockopt_release_sock(sk);
return -ENOPROTOOPT;

View File

@ -532,6 +532,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
ipcm_init_sk(&ipc, inet);
/* Keep backward compat */
if (hdrincl)
ipc.protocol = IPPROTO_RAW;
if (msg->msg_controllen) {
err = ip_cmsg_send(sk, msg, &ipc, false);
@ -597,7 +600,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, scope,
hdrincl ? IPPROTO_RAW : sk->sk_protocol,
hdrincl ? ipc.protocol : sk->sk_protocol,
inet_sk_flowi_flags(sk) |
(hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0, sk->sk_uid);

View File

@ -1451,7 +1451,7 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
* calculation of whether or not we must ACK for the sake of
* a window update.
*/
static void __tcp_cleanup_rbuf(struct sock *sk, int copied)
void __tcp_cleanup_rbuf(struct sock *sk, int copied)
{
struct tcp_sock *tp = tcp_sk(sk);
bool time_to_ack = false;
@ -1653,7 +1653,6 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
tcp_flags = TCP_SKB_CB(skb)->tcp_flags;
used = recv_actor(sk, skb);
consume_skb(skb);
if (used < 0) {
if (!copied)
copied = used;
@ -1667,14 +1666,6 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
break;
}
}
WRITE_ONCE(tp->copied_seq, seq);
tcp_rcv_space_adjust(sk);
/* Clean up data we have read: This will do ACK frames. */
if (copied > 0)
__tcp_cleanup_rbuf(sk, copied);
return copied;
}
EXPORT_SYMBOL(tcp_read_skb);

View File

@ -11,6 +11,24 @@
#include <net/inet_common.h>
#include <net/tls.h>
void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tcp;
int copied;
if (!skb || !skb->len || !sk_is_tcp(sk))
return;
if (skb_bpf_strparser(skb))
return;
tcp = tcp_sk(sk);
copied = tcp->copied_seq + skb->len;
WRITE_ONCE(tcp->copied_seq, copied);
tcp_rcv_space_adjust(sk);
__tcp_cleanup_rbuf(sk, skb->len);
}
static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
struct sk_msg *msg, u32 apply_bytes, int flags)
{
@ -178,14 +196,34 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
return ret;
}
static bool is_next_msg_fin(struct sk_psock *psock)
{
struct scatterlist *sge;
struct sk_msg *msg_rx;
int i;
msg_rx = sk_psock_peek_msg(psock);
i = msg_rx->sg.start;
sge = sk_msg_elem(msg_rx, i);
if (!sge->length) {
struct sk_buff *skb = msg_rx->skb;
if (skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
return true;
}
return false;
}
static int tcp_bpf_recvmsg_parser(struct sock *sk,
struct msghdr *msg,
size_t len,
int flags,
int *addr_len)
{
struct tcp_sock *tcp = tcp_sk(sk);
u32 seq = tcp->copied_seq;
struct sk_psock *psock;
int copied;
int copied = 0;
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
@ -198,8 +236,43 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
return tcp_recvmsg(sk, msg, len, flags, addr_len);
lock_sock(sk);
/* We may have received data on the sk_receive_queue pre-accept and
* then we can not use read_skb in this context because we haven't
* assigned a sk_socket yet so have no link to the ops. The work-around
* is to check the sk_receive_queue and in these cases read skbs off
* queue again. The read_skb hook is not running at this point because
* of lock_sock so we avoid having multiple runners in read_skb.
*/
if (unlikely(!skb_queue_empty(&sk->sk_receive_queue))) {
tcp_data_ready(sk);
/* This handles the ENOMEM errors if we both receive data
* pre accept and are already under memory pressure. At least
* let user know to retry.
*/
if (unlikely(!skb_queue_empty(&sk->sk_receive_queue))) {
copied = -EAGAIN;
goto out;
}
}
msg_bytes_ready:
copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
/* The typical case for EFAULT is the socket was gracefully
* shutdown with a FIN pkt. So check here the other case is
* some error on copy_page_to_iter which would be unexpected.
* On fin return correct return code to zero.
*/
if (copied == -EFAULT) {
bool is_fin = is_next_msg_fin(psock);
if (is_fin) {
copied = 0;
seq++;
goto out;
}
}
seq += copied;
if (!copied) {
long timeo;
int data;
@ -237,6 +310,10 @@ msg_bytes_ready:
copied = -EAGAIN;
}
out:
WRITE_ONCE(tcp->copied_seq, seq);
tcp_rcv_space_adjust(sk);
if (copied > 0)
__tcp_cleanup_rbuf(sk, copied);
release_sock(sk);
sk_psock_put(sk, psock);
return copied;

View File

@ -1774,7 +1774,7 @@ EXPORT_SYMBOL(__skb_recv_udp);
int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
struct sk_buff *skb;
int err, copied;
int err;
try_again:
skb = skb_recv_udp(sk, MSG_DONTWAIT, &err);
@ -1793,10 +1793,7 @@ try_again:
}
WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
copied = recv_actor(sk, skb);
kfree_skb(skb);
return copied;
return recv_actor(sk, skb);
}
EXPORT_SYMBOL(udp_read_skb);

View File

@ -64,6 +64,8 @@ struct proto udplite_prot = {
.per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc,
.sysctl_mem = sysctl_udp_mem,
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp_sock),
.h.udp_table = &udplite_table,
};

View File

@ -143,6 +143,8 @@ int ipv6_find_tlv(const struct sk_buff *skb, int offset, int type)
optlen = 1;
break;
default:
if (len < 2)
goto bad;
optlen = nh[offset + 1] + 2;
if (optlen > len)
goto bad;

View File

@ -793,7 +793,8 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!proto)
proto = inet->inet_num;
else if (proto != inet->inet_num)
else if (proto != inet->inet_num &&
inet->inet_num != IPPROTO_RAW)
return -EINVAL;
if (proto > 255)

View File

@ -60,6 +60,8 @@ struct proto udplitev6_prot = {
.per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc,
.sysctl_mem = sysctl_udp_mem,
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp6_sock),
.h.udp_table = &udplite_table,
};

View File

@ -324,9 +324,12 @@ bool sctp_transport_pl_recv(struct sctp_transport *t)
t->pl.probe_size += SCTP_PL_BIG_STEP;
} else if (t->pl.state == SCTP_PL_SEARCH) {
if (!t->pl.probe_high) {
t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP,
SCTP_MAX_PLPMTU);
return false;
if (t->pl.probe_size < SCTP_MAX_PLPMTU) {
t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP,
SCTP_MAX_PLPMTU);
return false;
}
t->pl.probe_high = SCTP_MAX_PLPMTU;
}
t->pl.probe_size += SCTP_PL_MIN_STEP;
if (t->pl.probe_size >= t->pl.probe_high) {
@ -341,7 +344,7 @@ bool sctp_transport_pl_recv(struct sctp_transport *t)
} else if (t->pl.state == SCTP_PL_COMPLETE) {
/* Raise probe_size again after 30 * interval in Search Complete */
t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
t->pl.probe_size += SCTP_PL_MIN_STEP;
t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_MIN_STEP, SCTP_MAX_PLPMTU);
}
return t->pl.state == SCTP_PL_COMPLETE;

View File

@ -2000,8 +2000,10 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
return rc;
/* create send buffer and rmb */
if (smc_buf_create(new_smc, false))
if (smc_buf_create(new_smc, false)) {
smc_conn_abort(new_smc, ini->first_contact_local);
return SMC_CLC_DECL_MEM;
}
return 0;
}
@ -2217,8 +2219,11 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
smcr_version = ini->smcr_version;
ini->smcr_version = SMC_V2;
rc = smc_listen_rdma_init(new_smc, ini);
if (!rc)
if (!rc) {
rc = smc_listen_rdma_reg(new_smc, ini->first_contact_local);
if (rc)
smc_conn_abort(new_smc, ini->first_contact_local);
}
if (!rc)
return;
ini->smcr_version = smcr_version;

View File

@ -127,6 +127,7 @@ static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
int i, j;
/* do link balancing */
conn->lnk = NULL; /* reset conn->lnk first */
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *lnk = &conn->lgr->lnk[i];

View File

@ -167,6 +167,11 @@ static inline bool tls_strp_msg_ready(struct tls_sw_context_rx *ctx)
return ctx->strp.msg_ready;
}
static inline bool tls_strp_msg_mixed_decrypted(struct tls_sw_context_rx *ctx)
{
return ctx->strp.mixed_decrypted;
}
#ifdef CONFIG_TLS_DEVICE
int tls_device_init(void);
void tls_device_cleanup(void);

View File

@ -1005,20 +1005,14 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx)
struct tls_sw_context_rx *sw_ctx = tls_sw_ctx_rx(tls_ctx);
struct sk_buff *skb = tls_strp_msg(sw_ctx);
struct strp_msg *rxm = strp_msg(skb);
int is_decrypted = skb->decrypted;
int is_encrypted = !is_decrypted;
struct sk_buff *skb_iter;
int left;
int is_decrypted, is_encrypted;
left = rxm->full_len - skb->len;
/* Check if all the data is decrypted already */
skb_iter = skb_shinfo(skb)->frag_list;
while (skb_iter && left > 0) {
is_decrypted &= skb_iter->decrypted;
is_encrypted &= !skb_iter->decrypted;
left -= skb_iter->len;
skb_iter = skb_iter->next;
if (!tls_strp_msg_mixed_decrypted(sw_ctx)) {
is_decrypted = skb->decrypted;
is_encrypted = !is_decrypted;
} else {
is_decrypted = 0;
is_encrypted = 0;
}
trace_tls_device_decrypted(sk, tcp_sk(sk)->copied_seq - rxm->full_len,

View File

@ -29,34 +29,50 @@ static void tls_strp_anchor_free(struct tls_strparser *strp)
struct skb_shared_info *shinfo = skb_shinfo(strp->anchor);
DEBUG_NET_WARN_ON_ONCE(atomic_read(&shinfo->dataref) != 1);
shinfo->frag_list = NULL;
if (!strp->copy_mode)
shinfo->frag_list = NULL;
consume_skb(strp->anchor);
strp->anchor = NULL;
}
static struct sk_buff *
tls_strp_skb_copy(struct tls_strparser *strp, struct sk_buff *in_skb,
int offset, int len)
{
struct sk_buff *skb;
int i, err;
skb = alloc_skb_with_frags(0, len, TLS_PAGE_ORDER,
&err, strp->sk->sk_allocation);
if (!skb)
return NULL;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
WARN_ON_ONCE(skb_copy_bits(in_skb, offset,
skb_frag_address(frag),
skb_frag_size(frag)));
offset += skb_frag_size(frag);
}
skb->len = len;
skb->data_len = len;
skb_copy_header(skb, in_skb);
return skb;
}
/* Create a new skb with the contents of input copied to its page frags */
static struct sk_buff *tls_strp_msg_make_copy(struct tls_strparser *strp)
{
struct strp_msg *rxm;
struct sk_buff *skb;
int i, err, offset;
skb = alloc_skb_with_frags(0, strp->stm.full_len, TLS_PAGE_ORDER,
&err, strp->sk->sk_allocation);
skb = tls_strp_skb_copy(strp, strp->anchor, strp->stm.offset,
strp->stm.full_len);
if (!skb)
return NULL;
offset = strp->stm.offset;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
WARN_ON_ONCE(skb_copy_bits(strp->anchor, offset,
skb_frag_address(frag),
skb_frag_size(frag)));
offset += skb_frag_size(frag);
}
skb_copy_header(skb, strp->anchor);
rxm = strp_msg(skb);
rxm->offset = 0;
return skb;
@ -180,22 +196,22 @@ static void tls_strp_flush_anchor_copy(struct tls_strparser *strp)
for (i = 0; i < shinfo->nr_frags; i++)
__skb_frag_unref(&shinfo->frags[i], false);
shinfo->nr_frags = 0;
if (strp->copy_mode) {
kfree_skb_list(shinfo->frag_list);
shinfo->frag_list = NULL;
}
strp->copy_mode = 0;
strp->mixed_decrypted = 0;
}
static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
unsigned int offset, size_t in_len)
static int tls_strp_copyin_frag(struct tls_strparser *strp, struct sk_buff *skb,
struct sk_buff *in_skb, unsigned int offset,
size_t in_len)
{
struct tls_strparser *strp = (struct tls_strparser *)desc->arg.data;
struct sk_buff *skb;
skb_frag_t *frag;
size_t len, chunk;
skb_frag_t *frag;
int sz;
if (strp->msg_ready)
return 0;
skb = strp->anchor;
frag = &skb_shinfo(skb)->frags[skb->len / PAGE_SIZE];
len = in_len;
@ -208,19 +224,26 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
skb_frag_size(frag),
chunk));
sz = tls_rx_msg_size(strp, strp->anchor);
if (sz < 0) {
desc->error = sz;
return 0;
}
/* We may have over-read, sz == 0 is guaranteed under-read */
if (sz > 0)
chunk = min_t(size_t, chunk, sz - skb->len);
skb->len += chunk;
skb->data_len += chunk;
skb_frag_size_add(frag, chunk);
sz = tls_rx_msg_size(strp, skb);
if (sz < 0)
return sz;
/* We may have over-read, sz == 0 is guaranteed under-read */
if (unlikely(sz && sz < skb->len)) {
int over = skb->len - sz;
WARN_ON_ONCE(over > chunk);
skb->len -= over;
skb->data_len -= over;
skb_frag_size_add(frag, -over);
chunk -= over;
}
frag++;
len -= chunk;
offset += chunk;
@ -247,15 +270,99 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
offset += chunk;
}
if (strp->stm.full_len == skb->len) {
read_done:
return in_len - len;
}
static int tls_strp_copyin_skb(struct tls_strparser *strp, struct sk_buff *skb,
struct sk_buff *in_skb, unsigned int offset,
size_t in_len)
{
struct sk_buff *nskb, *first, *last;
struct skb_shared_info *shinfo;
size_t chunk;
int sz;
if (strp->stm.full_len)
chunk = strp->stm.full_len - skb->len;
else
chunk = TLS_MAX_PAYLOAD_SIZE + PAGE_SIZE;
chunk = min(chunk, in_len);
nskb = tls_strp_skb_copy(strp, in_skb, offset, chunk);
if (!nskb)
return -ENOMEM;
shinfo = skb_shinfo(skb);
if (!shinfo->frag_list) {
shinfo->frag_list = nskb;
nskb->prev = nskb;
} else {
first = shinfo->frag_list;
last = first->prev;
last->next = nskb;
first->prev = nskb;
}
skb->len += chunk;
skb->data_len += chunk;
if (!strp->stm.full_len) {
sz = tls_rx_msg_size(strp, skb);
if (sz < 0)
return sz;
/* We may have over-read, sz == 0 is guaranteed under-read */
if (unlikely(sz && sz < skb->len)) {
int over = skb->len - sz;
WARN_ON_ONCE(over > chunk);
skb->len -= over;
skb->data_len -= over;
__pskb_trim(nskb, nskb->len - over);
chunk -= over;
}
strp->stm.full_len = sz;
}
return chunk;
}
static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
unsigned int offset, size_t in_len)
{
struct tls_strparser *strp = (struct tls_strparser *)desc->arg.data;
struct sk_buff *skb;
int ret;
if (strp->msg_ready)
return 0;
skb = strp->anchor;
if (!skb->len)
skb_copy_decrypted(skb, in_skb);
else
strp->mixed_decrypted |= !!skb_cmp_decrypted(skb, in_skb);
if (IS_ENABLED(CONFIG_TLS_DEVICE) && strp->mixed_decrypted)
ret = tls_strp_copyin_skb(strp, skb, in_skb, offset, in_len);
else
ret = tls_strp_copyin_frag(strp, skb, in_skb, offset, in_len);
if (ret < 0) {
desc->error = ret;
ret = 0;
}
if (strp->stm.full_len && strp->stm.full_len == skb->len) {
desc->count = 0;
strp->msg_ready = 1;
tls_rx_msg_ready(strp);
}
read_done:
return in_len - len;
return ret;
}
static int tls_strp_read_copyin(struct tls_strparser *strp)
@ -315,15 +422,19 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort)
return 0;
}
static bool tls_strp_check_no_dup(struct tls_strparser *strp)
static bool tls_strp_check_queue_ok(struct tls_strparser *strp)
{
unsigned int len = strp->stm.offset + strp->stm.full_len;
struct sk_buff *skb;
struct sk_buff *first, *skb;
u32 seq;
skb = skb_shinfo(strp->anchor)->frag_list;
seq = TCP_SKB_CB(skb)->seq;
first = skb_shinfo(strp->anchor)->frag_list;
skb = first;
seq = TCP_SKB_CB(first)->seq;
/* Make sure there's no duplicate data in the queue,
* and the decrypted status matches.
*/
while (skb->len < len) {
seq += skb->len;
len -= skb->len;
@ -331,6 +442,8 @@ static bool tls_strp_check_no_dup(struct tls_strparser *strp)
if (TCP_SKB_CB(skb)->seq != seq)
return false;
if (skb_cmp_decrypted(first, skb))
return false;
}
return true;
@ -411,7 +524,7 @@ static int tls_strp_read_sock(struct tls_strparser *strp)
return tls_strp_read_copy(strp, true);
}
if (!tls_strp_check_no_dup(strp))
if (!tls_strp_check_queue_ok(strp))
return tls_strp_read_copy(strp, false);
strp->msg_ready = 1;

View File

@ -2304,10 +2304,14 @@ static void tls_data_ready(struct sock *sk)
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct sk_psock *psock;
gfp_t alloc_save;
trace_sk_data_ready(sk);
alloc_save = sk->sk_allocation;
sk->sk_allocation = GFP_ATOMIC;
tls_strp_data_ready(&ctx->strp);
sk->sk_allocation = alloc_save;
psock = sk_psock_get(sk);
if (psock) {

View File

@ -2450,7 +2450,7 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
struct unix_sock *u = unix_sk(sk);
struct sk_buff *skb;
int err, copied;
int err;
mutex_lock(&u->iolock);
skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
@ -2458,10 +2458,7 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
if (!skb)
return err;
copied = recv_actor(sk, skb);
kfree_skb(skb);
return copied;
return recv_actor(sk, skb);
}
/*

View File

@ -1441,7 +1441,6 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
struct sock *sk = sk_vsock(vsk);
struct sk_buff *skb;
int off = 0;
int copied;
int err;
spin_lock_bh(&vvs->rx_lock);
@ -1454,9 +1453,7 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
if (!skb)
return err;
copied = recv_actor(sk, skb);
kfree_skb(skb);
return copied;
return recv_actor(sk, skb);
}
EXPORT_SYMBOL_GPL(virtio_transport_read_skb);

View File

@ -498,7 +498,6 @@ int main(int argc, char **argv)
"Option -%c requires an argument.\n\n",
optopt);
case 'h':
__fallthrough;
default:
Usage();
return 0;

View File

@ -197,7 +197,7 @@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_r
$(OUTPUT)/sign-file: ../../../../scripts/sign-file.c
$(call msg,SIGN-FILE,,$@)
$(Q)$(CC) $(shell $(HOSTPKG_CONFIG)--cflags libcrypto 2> /dev/null) \
$(Q)$(CC) $(shell $(HOSTPKG_CONFIG) --cflags libcrypto 2> /dev/null) \
$< -o $@ \
$(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto)

View File

@ -2,6 +2,7 @@
// Copyright (c) 2020 Cloudflare
#include <error.h>
#include <netinet/tcp.h>
#include <sys/epoll.h>
#include "test_progs.h"
#include "test_skmsg_load_helpers.skel.h"
@ -9,8 +10,12 @@
#include "test_sockmap_invalid_update.skel.h"
#include "test_sockmap_skb_verdict_attach.skel.h"
#include "test_sockmap_progs_query.skel.h"
#include "test_sockmap_pass_prog.skel.h"
#include "test_sockmap_drop_prog.skel.h"
#include "bpf_iter_sockmap.skel.h"
#include "sockmap_helpers.h"
#define TCP_REPAIR 19 /* TCP sock is under repair right now */
#define TCP_REPAIR_ON 1
@ -350,6 +355,126 @@ out:
test_sockmap_progs_query__destroy(skel);
}
#define MAX_EVENTS 10
static void test_sockmap_skb_verdict_shutdown(void)
{
struct epoll_event ev, events[MAX_EVENTS];
int n, err, map, verdict, s, c1, p1;
struct test_sockmap_pass_prog *skel;
int epollfd;
int zero = 0;
char b;
skel = test_sockmap_pass_prog__open_and_load();
if (!ASSERT_OK_PTR(skel, "open_and_load"))
return;
verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
map = bpf_map__fd(skel->maps.sock_map_rx);
err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
if (!ASSERT_OK(err, "bpf_prog_attach"))
goto out;
s = socket_loopback(AF_INET, SOCK_STREAM);
if (s < 0)
goto out;
err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1);
if (err < 0)
goto out;
err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
if (err < 0)
goto out_close;
shutdown(p1, SHUT_WR);
ev.events = EPOLLIN;
ev.data.fd = c1;
epollfd = epoll_create1(0);
if (!ASSERT_GT(epollfd, -1, "epoll_create(0)"))
goto out_close;
err = epoll_ctl(epollfd, EPOLL_CTL_ADD, c1, &ev);
if (!ASSERT_OK(err, "epoll_ctl(EPOLL_CTL_ADD)"))
goto out_close;
err = epoll_wait(epollfd, events, MAX_EVENTS, -1);
if (!ASSERT_EQ(err, 1, "epoll_wait(fd)"))
goto out_close;
n = recv(c1, &b, 1, SOCK_NONBLOCK);
ASSERT_EQ(n, 0, "recv_timeout(fin)");
out_close:
close(c1);
close(p1);
out:
test_sockmap_pass_prog__destroy(skel);
}
static void test_sockmap_skb_verdict_fionread(bool pass_prog)
{
int expected, zero = 0, sent, recvd, avail;
int err, map, verdict, s, c0, c1, p0, p1;
struct test_sockmap_pass_prog *pass;
struct test_sockmap_drop_prog *drop;
char buf[256] = "0123456789";
if (pass_prog) {
pass = test_sockmap_pass_prog__open_and_load();
if (!ASSERT_OK_PTR(pass, "open_and_load"))
return;
verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
map = bpf_map__fd(pass->maps.sock_map_rx);
expected = sizeof(buf);
} else {
drop = test_sockmap_drop_prog__open_and_load();
if (!ASSERT_OK_PTR(drop, "open_and_load"))
return;
verdict = bpf_program__fd(drop->progs.prog_skb_verdict);
map = bpf_map__fd(drop->maps.sock_map_rx);
/* On drop data is consumed immediately and copied_seq inc'd */
expected = 0;
}
err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
if (!ASSERT_OK(err, "bpf_prog_attach"))
goto out;
s = socket_loopback(AF_INET, SOCK_STREAM);
if (!ASSERT_GT(s, -1, "socket_loopback(s)"))
goto out;
err = create_socket_pairs(s, AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1);
if (!ASSERT_OK(err, "create_socket_pairs(s)"))
goto out;
err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
goto out_close;
sent = xsend(p1, &buf, sizeof(buf), 0);
ASSERT_EQ(sent, sizeof(buf), "xsend(p0)");
err = ioctl(c1, FIONREAD, &avail);
ASSERT_OK(err, "ioctl(FIONREAD) error");
ASSERT_EQ(avail, expected, "ioctl(FIONREAD)");
/* On DROP test there will be no data to read */
if (pass_prog) {
recvd = recv_timeout(c1, &buf, sizeof(buf), SOCK_NONBLOCK, IO_TIMEOUT_SEC);
ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(c0)");
}
out_close:
close(c0);
close(p0);
close(c1);
close(p1);
out:
if (pass_prog)
test_sockmap_pass_prog__destroy(pass);
else
test_sockmap_drop_prog__destroy(drop);
}
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
@ -384,4 +509,10 @@ void test_sockmap_basic(void)
test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT);
if (test__start_subtest("sockmap skb_verdict progs query"))
test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
if (test__start_subtest("sockmap skb_verdict shutdown"))
test_sockmap_skb_verdict_shutdown();
if (test__start_subtest("sockmap skb_verdict fionread"))
test_sockmap_skb_verdict_fionread(true);
if (test__start_subtest("sockmap skb_verdict fionread on drop"))
test_sockmap_skb_verdict_fionread(false);
}

View File

@ -0,0 +1,390 @@
#ifndef __SOCKMAP_HELPERS__
#define __SOCKMAP_HELPERS__
#include <linux/vm_sockets.h>
#define IO_TIMEOUT_SEC 30
#define MAX_STRERR_LEN 256
#define MAX_TEST_NAME 80
/* workaround for older vm_sockets.h */
#ifndef VMADDR_CID_LOCAL
#define VMADDR_CID_LOCAL 1
#endif
#define __always_unused __attribute__((__unused__))
#define _FAIL(errnum, fmt...) \
({ \
error_at_line(0, (errnum), __func__, __LINE__, fmt); \
CHECK_FAIL(true); \
})
#define FAIL(fmt...) _FAIL(0, fmt)
#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
#define FAIL_LIBBPF(err, msg) \
({ \
char __buf[MAX_STRERR_LEN]; \
libbpf_strerror((err), __buf, sizeof(__buf)); \
FAIL("%s: %s", (msg), __buf); \
})
/* Wrappers that fail the test on error and report it. */
#define xaccept_nonblock(fd, addr, len) \
({ \
int __ret = \
accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
if (__ret == -1) \
FAIL_ERRNO("accept"); \
__ret; \
})
#define xbind(fd, addr, len) \
({ \
int __ret = bind((fd), (addr), (len)); \
if (__ret == -1) \
FAIL_ERRNO("bind"); \
__ret; \
})
#define xclose(fd) \
({ \
int __ret = close((fd)); \
if (__ret == -1) \
FAIL_ERRNO("close"); \
__ret; \
})
#define xconnect(fd, addr, len) \
({ \
int __ret = connect((fd), (addr), (len)); \
if (__ret == -1) \
FAIL_ERRNO("connect"); \
__ret; \
})
#define xgetsockname(fd, addr, len) \
({ \
int __ret = getsockname((fd), (addr), (len)); \
if (__ret == -1) \
FAIL_ERRNO("getsockname"); \
__ret; \
})
#define xgetsockopt(fd, level, name, val, len) \
({ \
int __ret = getsockopt((fd), (level), (name), (val), (len)); \
if (__ret == -1) \
FAIL_ERRNO("getsockopt(" #name ")"); \
__ret; \
})
#define xlisten(fd, backlog) \
({ \
int __ret = listen((fd), (backlog)); \
if (__ret == -1) \
FAIL_ERRNO("listen"); \
__ret; \
})
#define xsetsockopt(fd, level, name, val, len) \
({ \
int __ret = setsockopt((fd), (level), (name), (val), (len)); \
if (__ret == -1) \
FAIL_ERRNO("setsockopt(" #name ")"); \
__ret; \
})
#define xsend(fd, buf, len, flags) \
({ \
ssize_t __ret = send((fd), (buf), (len), (flags)); \
if (__ret == -1) \
FAIL_ERRNO("send"); \
__ret; \
})
#define xrecv_nonblock(fd, buf, len, flags) \
({ \
ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
IO_TIMEOUT_SEC); \
if (__ret == -1) \
FAIL_ERRNO("recv"); \
__ret; \
})
#define xsocket(family, sotype, flags) \
({ \
int __ret = socket(family, sotype, flags); \
if (__ret == -1) \
FAIL_ERRNO("socket"); \
__ret; \
})
#define xbpf_map_delete_elem(fd, key) \
({ \
int __ret = bpf_map_delete_elem((fd), (key)); \
if (__ret < 0) \
FAIL_ERRNO("map_delete"); \
__ret; \
})
#define xbpf_map_lookup_elem(fd, key, val) \
({ \
int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
if (__ret < 0) \
FAIL_ERRNO("map_lookup"); \
__ret; \
})
#define xbpf_map_update_elem(fd, key, val, flags) \
({ \
int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
if (__ret < 0) \
FAIL_ERRNO("map_update"); \
__ret; \
})
#define xbpf_prog_attach(prog, target, type, flags) \
({ \
int __ret = \
bpf_prog_attach((prog), (target), (type), (flags)); \
if (__ret < 0) \
FAIL_ERRNO("prog_attach(" #type ")"); \
__ret; \
})
#define xbpf_prog_detach2(prog, target, type) \
({ \
int __ret = bpf_prog_detach2((prog), (target), (type)); \
if (__ret < 0) \
FAIL_ERRNO("prog_detach2(" #type ")"); \
__ret; \
})
#define xpthread_create(thread, attr, func, arg) \
({ \
int __ret = pthread_create((thread), (attr), (func), (arg)); \
errno = __ret; \
if (__ret) \
FAIL_ERRNO("pthread_create"); \
__ret; \
})
#define xpthread_join(thread, retval) \
({ \
int __ret = pthread_join((thread), (retval)); \
errno = __ret; \
if (__ret) \
FAIL_ERRNO("pthread_join"); \
__ret; \
})
static inline int poll_read(int fd, unsigned int timeout_sec)
{
struct timeval timeout = { .tv_sec = timeout_sec };
fd_set rfds;
int r;
FD_ZERO(&rfds);
FD_SET(fd, &rfds);
r = select(fd + 1, &rfds, NULL, NULL, &timeout);
if (r == 0)
errno = ETIME;
return r == 1 ? 0 : -1;
}
static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
unsigned int timeout_sec)
{
if (poll_read(fd, timeout_sec))
return -1;
return accept(fd, addr, len);
}
static inline int recv_timeout(int fd, void *buf, size_t len, int flags,
unsigned int timeout_sec)
{
if (poll_read(fd, timeout_sec))
return -1;
return recv(fd, buf, len, flags);
}
static inline void init_addr_loopback4(struct sockaddr_storage *ss,
socklen_t *len)
{
struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
addr4->sin_family = AF_INET;
addr4->sin_port = 0;
addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
*len = sizeof(*addr4);
}
static inline void init_addr_loopback6(struct sockaddr_storage *ss,
socklen_t *len)
{
struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
addr6->sin6_family = AF_INET6;
addr6->sin6_port = 0;
addr6->sin6_addr = in6addr_loopback;
*len = sizeof(*addr6);
}
static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss,
socklen_t *len)
{
struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss));
addr->svm_family = AF_VSOCK;
addr->svm_port = VMADDR_PORT_ANY;
addr->svm_cid = VMADDR_CID_LOCAL;
*len = sizeof(*addr);
}
static inline void init_addr_loopback(int family, struct sockaddr_storage *ss,
socklen_t *len)
{
switch (family) {
case AF_INET:
init_addr_loopback4(ss, len);
return;
case AF_INET6:
init_addr_loopback6(ss, len);
return;
case AF_VSOCK:
init_addr_loopback_vsock(ss, len);
return;
default:
FAIL("unsupported address family %d", family);
}
}
static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
{
return (struct sockaddr *)ss;
}
static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
{
u64 value;
u32 key;
int err;
key = 0;
value = fd1;
err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
if (err)
return err;
key = 1;
value = fd2;
return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
}
static inline int create_pair(int s, int family, int sotype, int *c, int *p)
{
struct sockaddr_storage addr;
socklen_t len;
int err = 0;
len = sizeof(addr);
err = xgetsockname(s, sockaddr(&addr), &len);
if (err)
return err;
*c = xsocket(family, sotype, 0);
if (*c < 0)
return errno;
err = xconnect(*c, sockaddr(&addr), len);
if (err) {
err = errno;
goto close_cli0;
}
*p = xaccept_nonblock(s, NULL, NULL);
if (*p < 0) {
err = errno;
goto close_cli0;
}
return err;
close_cli0:
close(*c);
return err;
}
static inline int create_socket_pairs(int s, int family, int sotype,
int *c0, int *c1, int *p0, int *p1)
{
int err;
err = create_pair(s, family, sotype, c0, p0);
if (err)
return err;
err = create_pair(s, family, sotype, c1, p1);
if (err) {
close(*c0);
close(*p0);
}
return err;
}
static inline int enable_reuseport(int s, int progfd)
{
int err, one = 1;
err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
if (err)
return -1;
err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
sizeof(progfd));
if (err)
return -1;
return 0;
}
static inline int socket_loopback_reuseport(int family, int sotype, int progfd)
{
struct sockaddr_storage addr;
socklen_t len;
int err, s;
init_addr_loopback(family, &addr, &len);
s = xsocket(family, sotype, 0);
if (s == -1)
return -1;
if (progfd >= 0)
enable_reuseport(s, progfd);
err = xbind(s, sockaddr(&addr), len);
if (err)
goto close;
if (sotype & SOCK_DGRAM)
return s;
err = xlisten(s, SOMAXCONN);
if (err)
goto close;
return s;
close:
xclose(s);
return -1;
}
static inline int socket_loopback(int family, int sotype)
{
return socket_loopback_reuseport(family, sotype, -1);
}
#endif // __SOCKMAP_HELPERS__

View File

@ -20,11 +20,6 @@
#include <unistd.h>
#include <linux/vm_sockets.h>
/* workaround for older vm_sockets.h */
#ifndef VMADDR_CID_LOCAL
#define VMADDR_CID_LOCAL 1
#endif
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
@ -32,315 +27,7 @@
#include "test_progs.h"
#include "test_sockmap_listen.skel.h"
#define IO_TIMEOUT_SEC 30
#define MAX_STRERR_LEN 256
#define MAX_TEST_NAME 80
#define __always_unused __attribute__((__unused__))
#define _FAIL(errnum, fmt...) \
({ \
error_at_line(0, (errnum), __func__, __LINE__, fmt); \
CHECK_FAIL(true); \
})
#define FAIL(fmt...) _FAIL(0, fmt)
#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
#define FAIL_LIBBPF(err, msg) \
({ \
char __buf[MAX_STRERR_LEN]; \
libbpf_strerror((err), __buf, sizeof(__buf)); \
FAIL("%s: %s", (msg), __buf); \
})
/* Wrappers that fail the test on error and report it. */
#define xaccept_nonblock(fd, addr, len) \
({ \
int __ret = \
accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
if (__ret == -1) \
FAIL_ERRNO("accept"); \
__ret; \
})
#define xbind(fd, addr, len) \
({ \
int __ret = bind((fd), (addr), (len)); \
if (__ret == -1) \
FAIL_ERRNO("bind"); \
__ret; \
})
#define xclose(fd) \
({ \
int __ret = close((fd)); \
if (__ret == -1) \
FAIL_ERRNO("close"); \
__ret; \
})
#define xconnect(fd, addr, len) \
({ \
int __ret = connect((fd), (addr), (len)); \
if (__ret == -1) \
FAIL_ERRNO("connect"); \
__ret; \
})
#define xgetsockname(fd, addr, len) \
({ \
int __ret = getsockname((fd), (addr), (len)); \
if (__ret == -1) \
FAIL_ERRNO("getsockname"); \
__ret; \
})
#define xgetsockopt(fd, level, name, val, len) \
({ \
int __ret = getsockopt((fd), (level), (name), (val), (len)); \
if (__ret == -1) \
FAIL_ERRNO("getsockopt(" #name ")"); \
__ret; \
})
#define xlisten(fd, backlog) \
({ \
int __ret = listen((fd), (backlog)); \
if (__ret == -1) \
FAIL_ERRNO("listen"); \
__ret; \
})
#define xsetsockopt(fd, level, name, val, len) \
({ \
int __ret = setsockopt((fd), (level), (name), (val), (len)); \
if (__ret == -1) \
FAIL_ERRNO("setsockopt(" #name ")"); \
__ret; \
})
#define xsend(fd, buf, len, flags) \
({ \
ssize_t __ret = send((fd), (buf), (len), (flags)); \
if (__ret == -1) \
FAIL_ERRNO("send"); \
__ret; \
})
#define xrecv_nonblock(fd, buf, len, flags) \
({ \
ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
IO_TIMEOUT_SEC); \
if (__ret == -1) \
FAIL_ERRNO("recv"); \
__ret; \
})
#define xsocket(family, sotype, flags) \
({ \
int __ret = socket(family, sotype, flags); \
if (__ret == -1) \
FAIL_ERRNO("socket"); \
__ret; \
})
#define xbpf_map_delete_elem(fd, key) \
({ \
int __ret = bpf_map_delete_elem((fd), (key)); \
if (__ret < 0) \
FAIL_ERRNO("map_delete"); \
__ret; \
})
#define xbpf_map_lookup_elem(fd, key, val) \
({ \
int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
if (__ret < 0) \
FAIL_ERRNO("map_lookup"); \
__ret; \
})
#define xbpf_map_update_elem(fd, key, val, flags) \
({ \
int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
if (__ret < 0) \
FAIL_ERRNO("map_update"); \
__ret; \
})
#define xbpf_prog_attach(prog, target, type, flags) \
({ \
int __ret = \
bpf_prog_attach((prog), (target), (type), (flags)); \
if (__ret < 0) \
FAIL_ERRNO("prog_attach(" #type ")"); \
__ret; \
})
#define xbpf_prog_detach2(prog, target, type) \
({ \
int __ret = bpf_prog_detach2((prog), (target), (type)); \
if (__ret < 0) \
FAIL_ERRNO("prog_detach2(" #type ")"); \
__ret; \
})
#define xpthread_create(thread, attr, func, arg) \
({ \
int __ret = pthread_create((thread), (attr), (func), (arg)); \
errno = __ret; \
if (__ret) \
FAIL_ERRNO("pthread_create"); \
__ret; \
})
#define xpthread_join(thread, retval) \
({ \
int __ret = pthread_join((thread), (retval)); \
errno = __ret; \
if (__ret) \
FAIL_ERRNO("pthread_join"); \
__ret; \
})
static int poll_read(int fd, unsigned int timeout_sec)
{
struct timeval timeout = { .tv_sec = timeout_sec };
fd_set rfds;
int r;
FD_ZERO(&rfds);
FD_SET(fd, &rfds);
r = select(fd + 1, &rfds, NULL, NULL, &timeout);
if (r == 0)
errno = ETIME;
return r == 1 ? 0 : -1;
}
static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
unsigned int timeout_sec)
{
if (poll_read(fd, timeout_sec))
return -1;
return accept(fd, addr, len);
}
static int recv_timeout(int fd, void *buf, size_t len, int flags,
unsigned int timeout_sec)
{
if (poll_read(fd, timeout_sec))
return -1;
return recv(fd, buf, len, flags);
}
static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len)
{
struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
addr4->sin_family = AF_INET;
addr4->sin_port = 0;
addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
*len = sizeof(*addr4);
}
static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len)
{
struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
addr6->sin6_family = AF_INET6;
addr6->sin6_port = 0;
addr6->sin6_addr = in6addr_loopback;
*len = sizeof(*addr6);
}
static void init_addr_loopback_vsock(struct sockaddr_storage *ss, socklen_t *len)
{
struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss));
addr->svm_family = AF_VSOCK;
addr->svm_port = VMADDR_PORT_ANY;
addr->svm_cid = VMADDR_CID_LOCAL;
*len = sizeof(*addr);
}
static void init_addr_loopback(int family, struct sockaddr_storage *ss,
socklen_t *len)
{
switch (family) {
case AF_INET:
init_addr_loopback4(ss, len);
return;
case AF_INET6:
init_addr_loopback6(ss, len);
return;
case AF_VSOCK:
init_addr_loopback_vsock(ss, len);
return;
default:
FAIL("unsupported address family %d", family);
}
}
static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
{
return (struct sockaddr *)ss;
}
static int enable_reuseport(int s, int progfd)
{
int err, one = 1;
err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
if (err)
return -1;
err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
sizeof(progfd));
if (err)
return -1;
return 0;
}
static int socket_loopback_reuseport(int family, int sotype, int progfd)
{
struct sockaddr_storage addr;
socklen_t len;
int err, s;
init_addr_loopback(family, &addr, &len);
s = xsocket(family, sotype, 0);
if (s == -1)
return -1;
if (progfd >= 0)
enable_reuseport(s, progfd);
err = xbind(s, sockaddr(&addr), len);
if (err)
goto close;
if (sotype & SOCK_DGRAM)
return s;
err = xlisten(s, SOMAXCONN);
if (err)
goto close;
return s;
close:
xclose(s);
return -1;
}
static int socket_loopback(int family, int sotype)
{
return socket_loopback_reuseport(family, sotype, -1);
}
#include "sockmap_helpers.h"
static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused,
int family, int sotype, int mapfd)
@ -984,31 +671,12 @@ static const char *redir_mode_str(enum redir_mode mode)
}
}
static int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
{
u64 value;
u32 key;
int err;
key = 0;
value = fd1;
err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
if (err)
return err;
key = 1;
value = fd2;
return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
}
static void redir_to_connected(int family, int sotype, int sock_mapfd,
int verd_mapfd, enum redir_mode mode)
{
const char *log_prefix = redir_mode_str(mode);
struct sockaddr_storage addr;
int s, c0, c1, p0, p1;
unsigned int pass;
socklen_t len;
int err, n;
u32 key;
char b;
@ -1019,36 +687,13 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
if (s < 0)
return;
len = sizeof(addr);
err = xgetsockname(s, sockaddr(&addr), &len);
err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
if (err)
goto close_srv;
c0 = xsocket(family, sotype, 0);
if (c0 < 0)
goto close_srv;
err = xconnect(c0, sockaddr(&addr), len);
if (err)
goto close_cli0;
p0 = xaccept_nonblock(s, NULL, NULL);
if (p0 < 0)
goto close_cli0;
c1 = xsocket(family, sotype, 0);
if (c1 < 0)
goto close_peer0;
err = xconnect(c1, sockaddr(&addr), len);
if (err)
goto close_cli1;
p1 = xaccept_nonblock(s, NULL, NULL);
if (p1 < 0)
goto close_cli1;
err = add_to_sockmap(sock_mapfd, p0, p1);
if (err)
goto close_peer1;
goto close;
n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1);
if (n < 0)
@ -1056,12 +701,12 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
if (n == 0)
FAIL("%s: incomplete write", log_prefix);
if (n < 1)
goto close_peer1;
goto close;
key = SK_PASS;
err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
if (err)
goto close_peer1;
goto close;
if (pass != 1)
FAIL("%s: want pass count 1, have %d", log_prefix, pass);
n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC);
@ -1070,13 +715,10 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
if (n == 0)
FAIL("%s: incomplete recv", log_prefix);
close_peer1:
close:
xclose(p1);
close_cli1:
xclose(c1);
close_peer0:
xclose(p0);
close_cli0:
xclose(c0);
close_srv:
xclose(s);

View File

@ -0,0 +1,32 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
__type(key, int);
__type(value, int);
} sock_map_rx SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
__type(key, int);
__type(value, int);
} sock_map_tx SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
__type(key, int);
__type(value, int);
} sock_map_msg SEC(".maps");
SEC("sk_skb")
int prog_skb_verdict(struct __sk_buff *skb)
{
return SK_DROP;
}
char _license[] SEC("license") = "GPL";

View File

@ -191,7 +191,7 @@ SEC("sockops")
int bpf_sockmap(struct bpf_sock_ops *skops)
{
__u32 lport, rport;
int op, err, ret;
int op, ret;
op = (int) skops->op;
@ -203,10 +203,10 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
if (lport == 10000) {
ret = 1;
#ifdef SOCKMAP
err = bpf_sock_map_update(skops, &sock_map, &ret,
bpf_sock_map_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#else
err = bpf_sock_hash_update(skops, &sock_map, &ret,
bpf_sock_hash_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#endif
}
@ -218,10 +218,10 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
if (bpf_ntohl(rport) == 10001) {
ret = 10;
#ifdef SOCKMAP
err = bpf_sock_map_update(skops, &sock_map, &ret,
bpf_sock_map_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#else
err = bpf_sock_hash_update(skops, &sock_map, &ret,
bpf_sock_hash_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#endif
}
@ -230,8 +230,6 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
break;
}
__sink(err);
return 0;
}

View File

@ -0,0 +1,32 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
__type(key, int);
__type(value, int);
} sock_map_rx SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
__type(key, int);
__type(value, int);
} sock_map_tx SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
__type(key, int);
__type(value, int);
} sock_map_msg SEC(".maps");
SEC("sk_skb")
int prog_skb_verdict(struct __sk_buff *skb)
{
return SK_PASS;
}
char _license[] SEC("license") = "GPL";

View File

@ -68,7 +68,7 @@ setup()
cleanup()
{
$IP link del dev dummy0 &> /dev/null
ip netns del ns1
ip netns del ns1 &> /dev/null
ip netns del ns2 &> /dev/null
}