diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst index 8820360d00da..35d83e24dbdb 100644 --- a/Documentation/bpf/btf.rst +++ b/Documentation/bpf/btf.rst @@ -131,7 +131,7 @@ The following sections detail encoding of each kind. ``btf_type`` is followed by a ``u32`` with the following bits arrangement:: #define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24) - #define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16) + #define BTF_INT_OFFSET(VAL) (((VAL) & 0x00ff0000) >> 16) #define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff) The ``BTF_INT_ENCODING`` has the following attributes:: diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index 03c065855eaf..d65ad5746f94 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -437,20 +437,6 @@ more details, with real examples. The second argument is optional, and if supplied will be used if first argument is not supported. - cc-ldoption - cc-ldoption is used to check if $(CC) when used to link object files - supports the given option. An optional second option may be - specified if first option are not supported. - - Example: - #arch/x86/kernel/Makefile - vsyscall-flags += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) - - In the above example, vsyscall-flags will be assigned the option - -Wl$(comma)--hash-style=sysv if it is supported by $(CC). - The second argument is optional, and if supplied will be used - if first argument is not supported. - as-instr as-instr checks if the assembler reports a specific instruction and then outputs either option1 or option2 diff --git a/MAINTAINERS b/MAINTAINERS index 5cfbea4ce575..0c55b0fedbe2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11068,10 +11068,8 @@ S: Supported F: drivers/net/ethernet/qlogic/netxen/ NFC SUBSYSTEM -M: Samuel Ortiz -L: linux-wireless@vger.kernel.org -L: linux-nfc@lists.01.org (subscribers-only) -S: Supported +L: netdev@vger.kernel.org +S: Orphan F: net/nfc/ F: include/net/nfc/ F: include/uapi/linux/nfc.h diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index 5278c57dce73..302cf0ba1600 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -2767,12 +2767,6 @@ static int ia_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg) case MEMDUMP: { switch (ia_cmds.sub_cmd) { - case MEMDUMP_DEV: - if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (copy_to_user(ia_cmds.buf, iadev, sizeof(IADEV))) - return -EFAULT; - ia_cmds.status = 0; - break; case MEMDUMP_SEGREG: if (!capable(CAP_NET_ADMIN)) return -EPERM; tmps = (u16 __user *)ia_cmds.buf; diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index cbcc40d776b9..269b24a3baa1 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -109,15 +109,15 @@ u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw) } struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, - int vport_index) + u16 vport_num) { - return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_IB); + return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_IB); } struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, - int vport_index) + u16 vport_num) { - return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_ETH); + return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_ETH); } struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw) @@ -125,9 +125,10 @@ struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw) return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB); } -struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport) +struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, + u16 vport_num) { - return mlx5_eswitch_vport_rep(esw, vport); + return mlx5_eswitch_vport_rep(esw, vport_num); } struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h index 1d9778da8a50..8336e0517a5c 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.h +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -14,17 +14,17 @@ extern const struct mlx5_ib_profile uplink_rep_profile; u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw); struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, - int vport_index); + u16 vport_num); struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw); struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, - int vport_index); + u16 vport_num); void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev); void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev); struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq, u16 port); struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, - int vport_index); + u16 vport_num); #else /* CONFIG_MLX5_ESWITCH */ static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw) { @@ -33,7 +33,7 @@ static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw) static inline struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, - int vport_index) + u16 vport_num) { return NULL; } @@ -46,7 +46,7 @@ struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw) static inline struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, - int vport_index) + u16 vport_num) { return NULL; } @@ -63,7 +63,7 @@ struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, static inline struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, - int vport_index) + u16 vport_num) { return NULL; } diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 21cde7e78621..0d3ba056cda3 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -40,7 +40,7 @@ obj-$(CONFIG_ARCNET) += arcnet/ obj-$(CONFIG_DEV_APPLETALK) += appletalk/ obj-$(CONFIG_CAIF) += caif/ obj-$(CONFIG_CAN) += can/ -obj-$(CONFIG_NET_DSA) += dsa/ +obj-y += dsa/ obj-$(CONFIG_ETHERNET) += ethernet/ obj-$(CONFIG_FDDI) += fddi/ obj-$(CONFIG_HIPPI) += hippi/ diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index c049410bc888..bebd9b1aeb64 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3343,7 +3343,7 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, if (!err) err = -ENODEV; - dev_err(&pdev->dev, "failed to get macb_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to get macb_clk (%d)\n", err); return err; } @@ -3352,7 +3352,7 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, if (!err) err = -ENODEV; - dev_err(&pdev->dev, "failed to get hclk (%u)\n", err); + dev_err(&pdev->dev, "failed to get hclk (%d)\n", err); return err; } @@ -3370,31 +3370,31 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, err = clk_prepare_enable(*pclk); if (err) { - dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable pclk (%d)\n", err); return err; } err = clk_prepare_enable(*hclk); if (err) { - dev_err(&pdev->dev, "failed to enable hclk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable hclk (%d)\n", err); goto err_disable_pclk; } err = clk_prepare_enable(*tx_clk); if (err) { - dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable tx_clk (%d)\n", err); goto err_disable_hclk; } err = clk_prepare_enable(*rx_clk); if (err) { - dev_err(&pdev->dev, "failed to enable rx_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable rx_clk (%d)\n", err); goto err_disable_txclk; } err = clk_prepare_enable(*tsu_clk); if (err) { - dev_err(&pdev->dev, "failed to enable tsu_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable tsu_clk (%d)\n", err); goto err_disable_rxclk; } @@ -3868,7 +3868,7 @@ static int at91ether_clk_init(struct platform_device *pdev, struct clk **pclk, err = clk_prepare_enable(*pclk); if (err) { - dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable pclk (%d)\n", err); return err; } diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 5bb9eb35d76d..491475d87736 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -313,7 +313,9 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) while (bds_to_clean && tx_frm_cnt < ENETC_DEFAULT_TX_WORK) { bool is_eof = !!tx_swbd->skb; - enetc_unmap_tx_buff(tx_ring, tx_swbd); + if (likely(tx_swbd->dma)) + enetc_unmap_tx_buff(tx_ring, tx_swbd); + if (is_eof) { napi_consume_skb(tx_swbd->skb, napi_budget); tx_swbd->skb = NULL; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 1ecad9ffabae..b9519b6ad727 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -570,6 +570,7 @@ static const struct ethtool_ops enetc_pf_ethtool_ops = { .get_ringparam = enetc_get_ringparam, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, + .get_link = ethtool_op_get_link, }; static const struct ethtool_ops enetc_vf_ethtool_ops = { @@ -584,6 +585,7 @@ static const struct ethtool_ops enetc_vf_ethtool_ops = { .get_rxfh = enetc_get_rxfh, .set_rxfh = enetc_set_rxfh, .get_ringparam = enetc_get_ringparam, + .get_link = ethtool_op_get_link, }; void enetc_set_ethtool_ops(struct net_device *ndev) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 15876a6e7598..78287c517095 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -721,7 +721,7 @@ static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, ndev->watchdog_timeo = 5 * HZ; ndev->max_mtu = ENETC_MAX_MTU; - ndev->hw_features = NETIF_F_RXCSUM | NETIF_F_HW_CSUM | + ndev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_LOOPBACK; ndev->features = NETIF_F_HIGHDMA | NETIF_F_SG | diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c index 64bebee9f52a..72c3ea887bcf 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c @@ -130,7 +130,7 @@ static void enetc_vf_netdev_setup(struct enetc_si *si, struct net_device *ndev, ndev->watchdog_timeo = 5 * HZ; ndev->max_mtu = ENETC_MAX_MTU; - ndev->hw_features = NETIF_F_RXCSUM | NETIF_F_HW_CSUM | + ndev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; ndev->features = NETIF_F_HIGHDMA | NETIF_F_SG | diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index ffed2d4c9403..9c481823b3e8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -1492,7 +1492,7 @@ int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, rule.port = port; rule.qpn = qpn; INIT_LIST_HEAD(&rule.list); - mlx4_err(dev, "going promisc on %x\n", port); + mlx4_info(dev, "going promisc on %x\n", port); return mlx4_flow_attach(dev, &rule, regid_p); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 9aca8086ee01..88ccfcfcd128 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -8,6 +8,7 @@ config MLX5_CORE select NET_DEVLINK imply PTP_1588_CLOCK imply VXLAN + imply MLXFW default n ---help--- Core driver for low level functionality of the ConnectX-4 and diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 937ba4bcb056..d2ab8cd8ad9f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1604,7 +1604,27 @@ void mlx5_cmd_flush(struct mlx5_core_dev *dev) static int status_to_err(u8 status) { - return status ? -1 : 0; /* TBD more meaningful codes */ + switch (status) { + case MLX5_CMD_DELIVERY_STAT_OK: + case MLX5_DRIVER_STATUS_ABORTED: + return 0; + case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR: + case MLX5_CMD_DELIVERY_STAT_TOK_ERR: + return -EBADR; + case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR: + case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR: + case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR: + return -EFAULT; /* Bad address */ + case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR: + case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR: + case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR: + case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR: + return -ENOMSG; + case MLX5_CMD_DELIVERY_STAT_FW_ERR: + return -EIO; + default: + return -EINVAL; + } } static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c index 4746f2d28fb6..0ccd6d40baf7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -26,7 +26,7 @@ static int mlx5_peer_pf_disable_hca(struct mlx5_core_dev *dev) MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); MLX5_SET(disable_hca_in, in, function_id, 0); - MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + MLX5_SET(disable_hca_in, in, embedded_cpu_function, 0); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 7efaa58ae034..dd764e0471f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1901,6 +1901,22 @@ static int mlx5e_flash_device(struct net_device *dev, return mlx5e_ethtool_flash_device(priv, flash); } +#ifndef CONFIG_MLX5_EN_RXNFC +/* When CONFIG_MLX5_EN_RXNFC=n we only support ETHTOOL_GRXRINGS + * otherwise this function will be defined from en_fs_ethtool.c + */ +static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (info->cmd != ETHTOOL_GRXRINGS) + return -EOPNOTSUPP; + /* ring_count is needed by ethtool -x */ + info->data = priv->channels.params.num_channels; + return 0; +} +#endif + const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, @@ -1919,8 +1935,8 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .get_rxfh_indir_size = mlx5e_get_rxfh_indir_size, .get_rxfh = mlx5e_get_rxfh, .set_rxfh = mlx5e_set_rxfh, -#ifdef CONFIG_MLX5_EN_RXNFC .get_rxnfc = mlx5e_get_rxnfc, +#ifdef CONFIG_MLX5_EN_RXNFC .set_rxnfc = mlx5e_set_rxnfc, #endif .flash_device = mlx5e_flash_device, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 91e24f1cead8..5283e16c69e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -65,9 +65,26 @@ static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, static void mlx5e_rep_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + strlcpy(drvinfo->driver, mlx5e_rep_driver_name, sizeof(drvinfo->driver)); strlcpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version)); + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d (%.16s)", + fw_rev_maj(mdev), fw_rev_min(mdev), + fw_rev_sub(mdev), mdev->board_id); +} + +static void mlx5e_uplink_rep_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_rep_get_drvinfo(dev, drvinfo); + strlcpy(drvinfo->bus_info, pci_name(priv->mdev->pdev), + sizeof(drvinfo->bus_info)); } static const struct counter_desc sw_rep_stats_desc[] = { @@ -363,7 +380,7 @@ static const struct ethtool_ops mlx5e_vf_rep_ethtool_ops = { }; static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = { - .get_drvinfo = mlx5e_rep_get_drvinfo, + .get_drvinfo = mlx5e_uplink_rep_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = mlx5e_rep_get_strings, .get_sset_count = mlx5e_rep_get_sset_count, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 122f457091a2..31cd02f11499 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1595,7 +1595,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { struct flow_match_vlan match; - flow_rule_match_vlan(rule, &match); + flow_rule_match_cvlan(rule, &match); if (match.mask->vlan_id || match.mask->vlan_priority || match.mask->vlan_tpid) { @@ -1916,6 +1916,19 @@ struct mlx5_fields { offsetof(struct pedit_headers, field) + (off), \ MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)} +/* masked values are the same and there are no rewrites that do not have a + * match. + */ +#define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \ + type matchmaskx = *(type *)(matchmaskp); \ + type matchvalx = *(type *)(matchvalp); \ + type maskx = *(type *)(maskp); \ + type valx = *(type *)(valp); \ + \ + (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \ + matchmaskx)); \ +}) + static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp, void *matchmaskp, int size) { @@ -1923,16 +1936,13 @@ static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp, switch (size) { case sizeof(u8): - same = ((*(u8 *)valp) & (*(u8 *)maskp)) == - ((*(u8 *)matchvalp) & (*(u8 *)matchmaskp)); + same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp); break; case sizeof(u16): - same = ((*(u16 *)valp) & (*(u16 *)maskp)) == - ((*(u16 *)matchvalp) & (*(u16 *)matchmaskp)); + same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp); break; case sizeof(u32): - same = ((*(u32 *)valp) & (*(u32 *)maskp)) == - ((*(u32 *)matchvalp) & (*(u32 *)matchmaskp)); + same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp); break; } @@ -2557,8 +2567,10 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, /* in case all pedit actions are skipped, remove the MOD_HDR * flag. */ - if (parse_attr->num_mod_hdr_actions == 0) + if (parse_attr->num_mod_hdr_actions == 0) { action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + kfree(parse_attr->mod_hdr_actions); + } } attr->action = action; @@ -2995,6 +3007,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, */ if (parse_attr->num_mod_hdr_actions == 0) { action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + kfree(parse_attr->mod_hdr_actions); if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))) attr->split_count = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 7b61126fcec9..195a7d903cec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -361,7 +361,7 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, } stats->bytes += num_bytes; - stats->xmit_more += netdev_xmit_more(); + stats->xmit_more += xmit_more; headlen = skb->len - ihs - skb->data_len; ds_cnt += !!headlen; @@ -624,7 +624,8 @@ mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey, } netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, - struct mlx5_av *av, u32 dqpn, u32 dqkey) + struct mlx5_av *av, u32 dqpn, u32 dqkey, + bool xmit_more) { struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5i_tx_wqe *wqe; @@ -660,7 +661,7 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, } stats->bytes += num_bytes; - stats->xmit_more += netdev_xmit_more(); + stats->xmit_more += xmit_more; headlen = skb->len - ihs - skb->data_len; ds_cnt += !!headlen; @@ -705,7 +706,7 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, goto err_drop; mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes, - num_dma, wi, cseg, false); + num_dma, wi, cseg, xmit_more); return NETDEV_TX_OK; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 9ea0ccfe5ef5..6a921e24cd5e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1526,7 +1526,7 @@ static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) static void esw_apply_vport_conf(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - int vport_num = vport->vport; + u16 vport_num = vport->vport; if (esw->manager_vport == vport_num) return; @@ -1915,7 +1915,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) /* Vport Administration */ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, - int vport, u8 mac[ETH_ALEN]) + u16 vport, u8 mac[ETH_ALEN]) { struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); u64 node_guid; @@ -1959,7 +1959,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, } int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, - int vport, int link_state) + u16 vport, int link_state) { struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); int err = 0; @@ -1989,7 +1989,7 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, } int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, - int vport, struct ifla_vf_info *ivi) + u16 vport, struct ifla_vf_info *ivi) { struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); @@ -2014,7 +2014,7 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, } int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, - int vport, u16 vlan, u8 qos, u8 set_flags) + u16 vport, u16 vlan, u8 qos, u8 set_flags) { struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); int err = 0; @@ -2047,7 +2047,7 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, } int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, - int vport, u16 vlan, u8 qos) + u16 vport, u16 vlan, u8 qos) { u8 set_flags = 0; @@ -2058,7 +2058,7 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, } int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, - int vport, bool spoofchk) + u16 vport, bool spoofchk) { struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); bool pschk; @@ -2208,7 +2208,7 @@ int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting) } int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, - int vport, bool setting) + u16 vport, bool setting) { struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); @@ -2278,7 +2278,7 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) return 0; } -int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, +int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, u32 max_rate, u32 min_rate) { struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); @@ -2368,7 +2368,7 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, } int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, - int vport_num, + u16 vport_num, struct ifla_vf_stats *vf_stats) { struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index ed3fad689ec9..d043d6f9797d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -246,23 +246,23 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode); void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, - int vport, u8 mac[ETH_ALEN]); + u16 vport, u8 mac[ETH_ALEN]); int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, - int vport, int link_state); + u16 vport, int link_state); int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, - int vport, u16 vlan, u8 qos); + u16 vport, u16 vlan, u8 qos); int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, - int vport, bool spoofchk); + u16 vport, bool spoofchk); int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, - int vport_num, bool setting); -int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, + u16 vport_num, bool setting); +int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, u32 max_rate, u32 min_rate); int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting); int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting); int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, - int vport, struct ifla_vf_info *ivi); + u16 vport, struct ifla_vf_info *ivi); int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, - int vport, + u16 vport, struct ifla_vf_stats *vf_stats); void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule); @@ -296,7 +296,7 @@ u32 mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw); struct mlx5_flow_handle * -mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, struct mlx5_flow_destination *dest); enum { @@ -366,7 +366,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *attr); int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, - int vport, u16 vlan, u8 qos, u8 set_flags); + u16 vport, u16 vlan, u8 qos, u8 set_flags); static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev, u8 vlan_depth) @@ -430,7 +430,7 @@ static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw, return vport_num; } -static inline int mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw, +static inline u16 mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw, int index) { if (index == mlx5_eswitch_ecpf_idx(esw) && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index e09ae27485ee..47b446d30f71 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -57,7 +57,7 @@ static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, u16 vport_num) { - u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num); + int idx = mlx5_eswitch_vport_num_to_index(esw, vport_num); WARN_ON(idx > esw->total_vports - 1); return &esw->offloads.vport_reps[idx]; @@ -515,7 +515,8 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, } struct mlx5_flow_handle * -mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) +mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport, + u32 sqn) { struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_destination dest = {}; @@ -1181,7 +1182,7 @@ static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw) } struct mlx5_flow_handle * -mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, struct mlx5_flow_destination *dest) { struct mlx5_flow_act flow_act = {0}; @@ -1731,13 +1732,14 @@ static void esw_prio_tag_acls_cleanup(struct mlx5_eswitch *esw) struct mlx5_vport *vport; int i; - mlx5_esw_for_each_vf_vport(esw, i, vport, esw->nvports) { + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->dev->priv.sriov.num_vfs) { esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); } } -static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports) +static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int vf_nvports, + int nvports) { int err; @@ -1745,7 +1747,7 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports) mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) { - err = esw_prio_tag_acls_config(esw, nvports); + err = esw_prio_tag_acls_config(esw, vf_nvports); if (err) return err; } @@ -1838,7 +1840,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, { int err; - err = esw_offloads_steering_init(esw, total_nvports); + err = esw_offloads_steering_init(esw, vf_nvports, total_nvports); if (err) return err; @@ -2243,7 +2245,7 @@ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) } void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, - int vport, + u16 vport, u8 rep_type) { struct mlx5_eswitch_rep *rep; @@ -2264,7 +2266,7 @@ void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type) EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev); struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, - int vport) + u16 vport) { return mlx5_eswitch_get_rep(esw, vport); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index fb5b61727ee7..d7ca7e82a832 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1380,6 +1380,8 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT && d1->vport.num == d2->vport.num && d1->vport.flags == d2->vport.flags && + ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ? + (d1->vport.vhca_id == d2->vport.vhca_id) : true) && ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ? (d1->vport.reformat_id == d2->vport.reformat_id) : true)) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index ada1b7c0e0b8..9ca492b430d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -619,7 +619,7 @@ static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb, struct mlx5_ib_ah *mah = to_mah(address); struct mlx5i_priv *ipriv = epriv->ppriv; - return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey); + return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey, netdev_xmit_more()); } static void mlx5i_set_pkey_index(struct net_device *netdev, int id) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index 9165ca567047..e19ba3fcd1b7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -119,7 +119,8 @@ static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq, } netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, - struct mlx5_av *av, u32 dqpn, u32 dqkey); + struct mlx5_av *av, u32 dqpn, u32 dqkey, + bool xmit_more); void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index bcbe07ec22be..6ee6de7f0160 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -122,6 +122,12 @@ void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core) } EXPORT_SYMBOL(mlxsw_core_driver_priv); +bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->driver->res_query_enabled; +} +EXPORT_SYMBOL(mlxsw_core_res_query_enabled); + struct mlxsw_rx_listener_item { struct list_head list; struct mlxsw_rx_listener rxl; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 917be621c904..e3832cb5bdda 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -28,6 +28,8 @@ unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core); void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core); +bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core); + int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver); void mlxsw_core_driver_unregister(struct mlxsw_driver *mlxsw_driver); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c index c1c1965d7acc..72539a9a3847 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -3,6 +3,7 @@ #include #include +#include #include "core.h" #include "core_env.h" @@ -162,7 +163,7 @@ int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module, { u8 module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE]; u16 offset = MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE; - u8 module_rev_id, module_id; + u8 module_rev_id, module_id, diag_mon; unsigned int read_size; int err; @@ -195,8 +196,21 @@ int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module, } break; case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP: + /* Verify if transceiver provides diagnostic monitoring page */ + err = mlxsw_env_query_module_eeprom(mlxsw_core, module, + SFP_DIAGMON, 1, &diag_mon, + &read_size); + if (err) + return err; + + if (read_size < 1) + return -EIO; + modinfo->type = ETH_MODULE_SFF_8472; - modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + if (diag_mon) + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + else + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN / 2; break; default: return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index 6956bbebe2f1..496dc904c5ed 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -518,6 +518,9 @@ static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon) u8 width; int err; + if (!mlxsw_core_res_query_enabled(mlxsw_hwmon->core)) + return 0; + /* Add extra attributes for module temperature. Sensor index is * assigned to sensor_count value, while all indexed before * sensor_count are already utilized by the sensors connected through diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 472f63f9fac5..d3e851e7ca72 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -740,6 +740,9 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, struct mlxsw_thermal_module *module_tz; int i, err; + if (!mlxsw_core_res_query_enabled(core)) + return 0; + thermal->tz_module_arr = kcalloc(module_count, sizeof(*thermal->tz_module_arr), GFP_KERNEL); @@ -776,6 +779,9 @@ mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal) unsigned int module_count = mlxsw_core_max_ports(thermal->core); int i; + if (!mlxsw_core_res_query_enabled(thermal->core)) + return; + for (i = module_count - 1; i >= 0; i--) mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]); kfree(thermal->tz_module_arr); diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index faa06edf95ac..8c67505865a4 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -168,6 +168,7 @@ void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb) return; } + rcu_read_lock(); for (i = 0; i < count; i++) { ipv4_addr = payload->tun_info[i].ipv4; port = be32_to_cpu(payload->tun_info[i].egress_port); @@ -183,6 +184,7 @@ void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb) neigh_event_send(n, NULL); neigh_release(n); } + rcu_read_unlock(); } static int @@ -367,9 +369,10 @@ void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb) payload = nfp_flower_cmsg_get_data(skb); + rcu_read_lock(); netdev = nfp_app_dev_get(app, be32_to_cpu(payload->ingress_port), NULL); if (!netdev) - goto route_fail_warning; + goto fail_rcu_unlock; flow.daddr = payload->ipv4_addr; flow.flowi4_proto = IPPROTO_UDP; @@ -379,21 +382,23 @@ void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb) rt = ip_route_output_key(dev_net(netdev), &flow); err = PTR_ERR_OR_ZERO(rt); if (err) - goto route_fail_warning; + goto fail_rcu_unlock; #else - goto route_fail_warning; + goto fail_rcu_unlock; #endif /* Get the neighbour entry for the lookup */ n = dst_neigh_lookup(&rt->dst, &flow.daddr); ip_rt_put(rt); if (!n) - goto route_fail_warning; - nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_KERNEL); + goto fail_rcu_unlock; + nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_ATOMIC); neigh_release(n); + rcu_read_unlock(); return; -route_fail_warning: +fail_rcu_unlock: + rcu_read_unlock(); nfp_flower_cmsg_warn(app, "Requested route not found.\n"); } diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia_main.c index eed4fe3d871f..0fedd28fdb6e 100644 --- a/drivers/net/phy/aquantia_main.c +++ b/drivers/net/phy/aquantia_main.c @@ -487,6 +487,7 @@ static int aqr107_config_init(struct phy_device *phydev) /* Check that the PHY interface type is compatible */ if (phydev->interface != PHY_INTERFACE_MODE_SGMII && phydev->interface != PHY_INTERFACE_MODE_2500BASEX && + phydev->interface != PHY_INTERFACE_MODE_XGMII && phydev->interface != PHY_INTERFACE_MODE_10GKR) return -ENODEV; diff --git a/drivers/net/ppp/ppp_deflate.c b/drivers/net/ppp/ppp_deflate.c index b5edc7f96a39..685e875f5164 100644 --- a/drivers/net/ppp/ppp_deflate.c +++ b/drivers/net/ppp/ppp_deflate.c @@ -610,12 +610,20 @@ static struct compressor ppp_deflate_draft = { static int __init deflate_init(void) { - int answer = ppp_register_compressor(&ppp_deflate); - if (answer == 0) - printk(KERN_INFO - "PPP Deflate Compression module registered\n"); - ppp_register_compressor(&ppp_deflate_draft); - return answer; + int rc; + + rc = ppp_register_compressor(&ppp_deflate); + if (rc) + return rc; + + rc = ppp_register_compressor(&ppp_deflate_draft); + if (rc) { + ppp_unregister_compressor(&ppp_deflate); + return rc; + } + + pr_info("PPP Deflate Compression module registered\n"); + return 0; } static void __exit deflate_cleanup(void) diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c index b86c5ce9a92a..7e44110746dd 100644 --- a/drivers/net/usb/aqc111.c +++ b/drivers/net/usb/aqc111.c @@ -320,7 +320,6 @@ static int aqc111_get_link_ksettings(struct net_device *net, static void aqc111_set_phy_speed(struct usbnet *dev, u8 autoneg, u16 speed) { struct aqc111_data *aqc111_data = dev->driver_priv; - u32 phy_on_the_wire; aqc111_data->phy_cfg &= ~AQ_ADV_MASK; aqc111_data->phy_cfg |= AQ_PAUSE; @@ -362,8 +361,7 @@ static void aqc111_set_phy_speed(struct usbnet *dev, u8 autoneg, u16 speed) } } - phy_on_the_wire = aqc111_data->phy_cfg; - aqc111_write32_cmd(dev, AQ_PHY_OPS, 0, 0, &phy_on_the_wire); + aqc111_write32_cmd(dev, AQ_PHY_OPS, 0, 0, &aqc111_data->phy_cfg); } static int aqc111_set_link_ksettings(struct net_device *net, @@ -439,7 +437,7 @@ static int aqc111_change_mtu(struct net_device *net, int new_mtu) aqc111_write16_cmd(dev, AQ_ACCESS_MAC, SFR_MEDIUM_STATUS_MODE, 2, ®16); - if (dev->net->mtu > 12500 && dev->net->mtu <= 16334) { + if (dev->net->mtu > 12500) { memcpy(buf, &AQC111_BULKIN_SIZE[2], 5); /* RX bulk configuration */ aqc111_write_cmd(dev, AQ_ACCESS_MAC, SFR_RX_BULKIN_QCTRL, @@ -453,10 +451,8 @@ static int aqc111_change_mtu(struct net_device *net, int new_mtu) reg16 = 0x1020; else if (dev->net->mtu <= 12500) reg16 = 0x1420; - else if (dev->net->mtu <= 16334) - reg16 = 0x1A20; else - return 0; + reg16 = 0x1A20; aqc111_write16_cmd(dev, AQ_ACCESS_MAC, SFR_PAUSE_WATERLVL_LOW, 2, ®16); @@ -757,7 +753,6 @@ static void aqc111_unbind(struct usbnet *dev, struct usb_interface *intf) { struct aqc111_data *aqc111_data = dev->driver_priv; u16 reg16; - u32 phy_on_the_wire; /* Force bz */ reg16 = SFR_PHYPWR_RSTCTL_BZ; @@ -771,9 +766,8 @@ static void aqc111_unbind(struct usbnet *dev, struct usb_interface *intf) aqc111_data->phy_cfg &= ~AQ_ADV_MASK; aqc111_data->phy_cfg |= AQ_LOW_POWER; aqc111_data->phy_cfg &= ~AQ_PHY_POWER_EN; - phy_on_the_wire = aqc111_data->phy_cfg; aqc111_write32_cmd_nopm(dev, AQ_PHY_OPS, 0, 0, - &phy_on_the_wire); + &aqc111_data->phy_cfg); kfree(aqc111_data); } @@ -996,7 +990,6 @@ static int aqc111_reset(struct usbnet *dev) { struct aqc111_data *aqc111_data = dev->driver_priv; u8 reg8 = 0; - u32 phy_on_the_wire; dev->rx_urb_size = URB_SIZE; @@ -1009,9 +1002,8 @@ static int aqc111_reset(struct usbnet *dev) /* Power up ethernet PHY */ aqc111_data->phy_cfg = AQ_PHY_POWER_EN; - phy_on_the_wire = aqc111_data->phy_cfg; aqc111_write32_cmd(dev, AQ_PHY_OPS, 0, 0, - &phy_on_the_wire); + &aqc111_data->phy_cfg); /* Set the MAC address */ aqc111_write_cmd(dev, AQ_ACCESS_MAC, SFR_NODE_ID, ETH_ALEN, @@ -1042,7 +1034,6 @@ static int aqc111_stop(struct usbnet *dev) { struct aqc111_data *aqc111_data = dev->driver_priv; u16 reg16 = 0; - u32 phy_on_the_wire; aqc111_read16_cmd(dev, AQ_ACCESS_MAC, SFR_MEDIUM_STATUS_MODE, 2, ®16); @@ -1054,9 +1045,8 @@ static int aqc111_stop(struct usbnet *dev) /* Put PHY to low power*/ aqc111_data->phy_cfg |= AQ_LOW_POWER; - phy_on_the_wire = aqc111_data->phy_cfg; aqc111_write32_cmd(dev, AQ_PHY_OPS, 0, 0, - &phy_on_the_wire); + &aqc111_data->phy_cfg); netif_carrier_off(dev->net); @@ -1332,7 +1322,6 @@ static int aqc111_suspend(struct usb_interface *intf, pm_message_t message) u16 temp_rx_ctrl = 0x00; u16 reg16; u8 reg8; - u32 phy_on_the_wire; usbnet_suspend(intf, message); @@ -1404,14 +1393,12 @@ static int aqc111_suspend(struct usb_interface *intf, pm_message_t message) aqc111_write_cmd(dev, AQ_WOL_CFG, 0, 0, WOL_CFG_SIZE, &wol_cfg); - phy_on_the_wire = aqc111_data->phy_cfg; aqc111_write32_cmd(dev, AQ_PHY_OPS, 0, 0, - &phy_on_the_wire); + &aqc111_data->phy_cfg); } else { aqc111_data->phy_cfg |= AQ_LOW_POWER; - phy_on_the_wire = aqc111_data->phy_cfg; aqc111_write32_cmd(dev, AQ_PHY_OPS, 0, 0, - &phy_on_the_wire); + &aqc111_data->phy_cfg); /* Disable RX path */ aqc111_read16_cmd_nopm(dev, AQ_ACCESS_MAC, @@ -1428,7 +1415,7 @@ static int aqc111_resume(struct usb_interface *intf) { struct usbnet *dev = usb_get_intfdata(intf); struct aqc111_data *aqc111_data = dev->driver_priv; - u16 reg16, oldreg16; + u16 reg16; u8 reg8; netif_carrier_off(dev->net); @@ -1444,11 +1431,9 @@ static int aqc111_resume(struct usb_interface *intf) /* Configure RX control register => start operation */ reg16 = aqc111_data->rxctl; reg16 &= ~SFR_RX_CTL_START; - /* needs to be saved in case endianness is swapped */ - oldreg16 = reg16; aqc111_write16_cmd_nopm(dev, AQ_ACCESS_MAC, SFR_RX_CTL, 2, ®16); - reg16 = oldreg16 | SFR_RX_CTL_START; + reg16 |= SFR_RX_CTL_START; aqc111_write16_cmd_nopm(dev, AQ_ACCESS_MAC, SFR_RX_CTL, 2, ®16); aqc111_set_phy_speed(dev, aqc111_data->autoneg, diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 5c3ac97519b7..d9a6699abe59 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1259,6 +1259,8 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)}, /* Telit LE920, LE920A4 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1260, 2)}, /* Telit LE910Cx */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1261, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1900, 1)}, /* Telit LN940 series */ {QMI_FIXED_INTF(0x1c9e, 0x9801, 3)}, /* Telewell TW-3G HSPA+ */ {QMI_FIXED_INTF(0x1c9e, 0x9803, 4)}, /* Telewell TW-3G HSPA+ */ diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index daf7908d1e35..84dd4a8980c5 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1008,6 +1008,7 @@ int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping, struct list_head *workspace; int ret; + level = btrfs_compress_op[type]->set_level(level); workspace = get_workspace(type, level); ret = btrfs_compress_op[type]->compress_pages(workspace, mapping, start, pages, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f79e477a378e..1aee51a9f3bf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -757,12 +757,14 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, } static void add_pinned_bytes(struct btrfs_fs_info *fs_info, - struct btrfs_ref *ref) + struct btrfs_ref *ref, int sign) { struct btrfs_space_info *space_info; - s64 num_bytes = -ref->len; + s64 num_bytes; u64 flags; + ASSERT(sign == 1 || sign == -1); + num_bytes = sign * ref->len; if (ref->type == BTRFS_REF_METADATA) { if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID) flags = BTRFS_BLOCK_GROUP_SYSTEM; @@ -2063,7 +2065,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_ref_tree_mod(fs_info, generic_ref); if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) - add_pinned_bytes(fs_info, generic_ref); + add_pinned_bytes(fs_info, generic_ref, -1); return ret; } @@ -3882,8 +3884,7 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags) info->space_info_kobj, "%s", alloc_name(space_info->flags)); if (ret) { - percpu_counter_destroy(&space_info->total_bytes_pinned); - kfree(space_info); + kobject_put(&space_info->kobj); return ret; } @@ -7190,7 +7191,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, } out: if (pin) - add_pinned_bytes(fs_info, &generic_ref); + add_pinned_bytes(fs_info, &generic_ref, 1); if (last_ref) { /* @@ -7238,7 +7239,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref) btrfs_ref_tree_mod(fs_info, ref); if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) - add_pinned_bytes(fs_info, ref); + add_pinned_bytes(fs_info, ref, 1); return ret; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 7e85dca0e6f2..89f5be2bfb43 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2067,6 +2067,18 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) int ret = 0, err; u64 len; + /* + * If the inode needs a full sync, make sure we use a full range to + * avoid log tree corruption, due to hole detection racing with ordered + * extent completion for adjacent ranges, and assertion failures during + * hole detection. + */ + if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &BTRFS_I(inode)->runtime_flags)) { + start = 0; + end = LLONG_MAX; + } + /* * The range length can be represented by u64, we have to do the typecasts * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync() @@ -2554,10 +2566,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend, &cached_state); - if (ret) { - inode_unlock(inode); + if (ret) goto out_only_mutex; - } path = btrfs_alloc_path(); if (!path) { diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index ca2716917e37..a9e2e66152ee 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -332,6 +332,7 @@ static int inherit_props(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info = root->fs_info; int ret; int i; + bool need_reserve = false; if (!test_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(parent)->runtime_flags)) @@ -357,11 +358,20 @@ static int inherit_props(struct btrfs_trans_handle *trans, if (ret) continue; - num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); - ret = btrfs_block_rsv_add(root, trans->block_rsv, - num_bytes, BTRFS_RESERVE_NO_FLUSH); - if (ret) - return ret; + /* + * Currently callers should be reserving 1 item for properties, + * since we only have 1 property that we currently support. If + * we add more in the future we need to try and reserve more + * space for them. But we should also revisit how we do space + * reservations if we do add more properties in the future. + */ + if (need_reserve) { + num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); + ret = btrfs_block_rsv_add(root, trans->block_rsv, + num_bytes, BTRFS_RESERVE_NO_FLUSH); + if (ret) + return ret; + } ret = btrfs_setxattr(trans, inode, h->xattr_name, value, strlen(value), 0); @@ -375,9 +385,13 @@ static int inherit_props(struct btrfs_trans_handle *trans, &BTRFS_I(inode)->runtime_flags); } - btrfs_block_rsv_release(fs_info, trans->block_rsv, num_bytes); - if (ret) - return ret; + if (need_reserve) { + btrfs_block_rsv_release(fs_info, trans->block_rsv, + num_bytes); + if (ret) + return ret; + } + need_reserve = true; } return 0; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 1b9a5d0de139..22124122728c 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -132,10 +132,8 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root return -ENOMEM; ret = btrfs_search_slot(trans, root, key, path, 0, 1); - if (ret < 0) { - btrfs_abort_transaction(trans, ret); + if (ret < 0) goto out; - } if (ret > 0) { btrfs_crit(fs_info, diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 5a5930e3d32b..2f078b77fe14 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -825,7 +825,12 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs, fs_devs->fsid_kobj.kset = btrfs_kset; error = kobject_init_and_add(&fs_devs->fsid_kobj, &btrfs_ktype, parent, "%pU", fs_devs->fsid); - return error; + if (error) { + kobject_put(&fs_devs->fsid_kobj); + return error; + } + + return 0; } int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 748cd1598255..96fce4bef4e7 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -107,8 +107,26 @@ static void file_extent_err(const struct extent_buffer *eb, int slot, (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))); \ }) +static u64 file_extent_end(struct extent_buffer *leaf, + struct btrfs_key *key, + struct btrfs_file_extent_item *extent) +{ + u64 end; + u64 len; + + if (btrfs_file_extent_type(leaf, extent) == BTRFS_FILE_EXTENT_INLINE) { + len = btrfs_file_extent_ram_bytes(leaf, extent); + end = ALIGN(key->offset + len, leaf->fs_info->sectorsize); + } else { + len = btrfs_file_extent_num_bytes(leaf, extent); + end = key->offset + len; + } + return end; +} + static int check_extent_data_item(struct extent_buffer *leaf, - struct btrfs_key *key, int slot) + struct btrfs_key *key, int slot, + struct btrfs_key *prev_key) { struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_file_extent_item *fi; @@ -188,6 +206,28 @@ static int check_extent_data_item(struct extent_buffer *leaf, CHECK_FE_ALIGNED(leaf, slot, fi, offset, sectorsize) || CHECK_FE_ALIGNED(leaf, slot, fi, num_bytes, sectorsize)) return -EUCLEAN; + + /* + * Check that no two consecutive file extent items, in the same leaf, + * present ranges that overlap each other. + */ + if (slot > 0 && + prev_key->objectid == key->objectid && + prev_key->type == BTRFS_EXTENT_DATA_KEY) { + struct btrfs_file_extent_item *prev_fi; + u64 prev_end; + + prev_fi = btrfs_item_ptr(leaf, slot - 1, + struct btrfs_file_extent_item); + prev_end = file_extent_end(leaf, prev_key, prev_fi); + if (prev_end > key->offset) { + file_extent_err(leaf, slot - 1, +"file extent end range (%llu) goes beyond start offset (%llu) of the next file extent", + prev_end, key->offset); + return -EUCLEAN; + } + } + return 0; } @@ -774,14 +814,15 @@ static int check_inode_item(struct extent_buffer *leaf, * Common point to switch the item-specific validation. */ static int check_leaf_item(struct extent_buffer *leaf, - struct btrfs_key *key, int slot) + struct btrfs_key *key, int slot, + struct btrfs_key *prev_key) { int ret = 0; struct btrfs_chunk *chunk; switch (key->type) { case BTRFS_EXTENT_DATA_KEY: - ret = check_extent_data_item(leaf, key, slot); + ret = check_extent_data_item(leaf, key, slot, prev_key); break; case BTRFS_EXTENT_CSUM_KEY: ret = check_csum_item(leaf, key, slot); @@ -928,7 +969,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data) * Check if the item size and content meet other * criteria */ - ret = check_leaf_item(leaf, &key, slot); + ret = check_leaf_item(leaf, &key, slot, &prev_key); if (ret < 0) return ret; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 6adcd8a2c5c7..6c47f6ed3e94 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4182,6 +4182,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, *last_extent, 0, 0, len, 0, len, 0, 0, 0); + *last_extent += len; } } } diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 59631dd0777c..4fb3aa2dc975 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -36,6 +36,7 @@ struct bpf_map_ops { void (*map_free)(struct bpf_map *map); int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); void (*map_release_uref)(struct bpf_map *map); + void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key); /* funcs callable from userspace and from eBPF programs */ void *(*map_lookup_elem)(struct bpf_map *map, void *key); diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 0ca77dd1429c..cf226c190329 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -51,13 +51,13 @@ void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, u8 rep_type); void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type); void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, - int vport, + u16 vport_num, u8 rep_type); struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, - int vport); + u16 vport_num); void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type); u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw); struct mlx5_flow_handle * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, - int vport, u32 sqn); + u16 vport_num, u32 sqn); #endif diff --git a/include/linux/of_net.h b/include/linux/of_net.h index 9cd72aab76fe..0f0346e6829c 100644 --- a/include/linux/of_net.h +++ b/include/linux/of_net.h @@ -22,7 +22,7 @@ static inline int of_get_phy_mode(struct device_node *np) static inline const void *of_get_mac_address(struct device_node *np) { - return NULL; + return ERR_PTR(-ENODEV); } static inline struct net_device *of_find_net_device_by_node(struct device_node *np) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index f7714d3b46bd..9f8bc06d4136 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -84,7 +84,7 @@ struct bucket_table { struct lockdep_map dep_map; - struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp; + struct rhash_lock_head *buckets[] ____cacheline_aligned_in_smp; }; /* @@ -261,13 +261,13 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, void *arg); void rhashtable_destroy(struct rhashtable *ht); -struct rhash_lock_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash); -struct rhash_lock_head __rcu **__rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash); -struct rhash_lock_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, - struct bucket_table *tbl, - unsigned int hash); +struct rhash_lock_head **rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash); +struct rhash_lock_head **__rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash); +struct rhash_lock_head **rht_bucket_nested_insert(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned int hash); #define rht_dereference(p, ht) \ rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) @@ -284,21 +284,21 @@ struct rhash_lock_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, #define rht_entry(tpos, pos, member) \ ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) -static inline struct rhash_lock_head __rcu *const *rht_bucket( +static inline struct rhash_lock_head *const *rht_bucket( const struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : &tbl->buckets[hash]; } -static inline struct rhash_lock_head __rcu **rht_bucket_var( +static inline struct rhash_lock_head **rht_bucket_var( struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) : &tbl->buckets[hash]; } -static inline struct rhash_lock_head __rcu **rht_bucket_insert( +static inline struct rhash_lock_head **rht_bucket_insert( struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) : @@ -349,6 +349,12 @@ static inline void rht_unlock(struct bucket_table *tbl, local_bh_enable(); } +static inline struct rhash_head __rcu *__rht_ptr( + struct rhash_lock_head *const *bkt) +{ + return (struct rhash_head __rcu *)((unsigned long)*bkt & ~BIT(0)); +} + /* * Where 'bkt' is a bucket and might be locked: * rht_ptr() dereferences that pointer and clears the lock bit. @@ -356,30 +362,30 @@ static inline void rht_unlock(struct bucket_table *tbl, * access is guaranteed, such as when destroying the table. */ static inline struct rhash_head *rht_ptr( - struct rhash_lock_head __rcu * const *bkt, + struct rhash_lock_head *const *bkt, struct bucket_table *tbl, unsigned int hash) { - const struct rhash_lock_head *p = - rht_dereference_bucket_rcu(*bkt, tbl, hash); - - if ((((unsigned long)p) & ~BIT(0)) == 0) - return RHT_NULLS_MARKER(bkt); - return (void *)(((unsigned long)p) & ~BIT(0)); -} - -static inline struct rhash_head *rht_ptr_exclusive( - struct rhash_lock_head __rcu * const *bkt) -{ - const struct rhash_lock_head *p = - rcu_dereference_protected(*bkt, 1); + struct rhash_head __rcu *p = __rht_ptr(bkt); if (!p) return RHT_NULLS_MARKER(bkt); - return (void *)(((unsigned long)p) & ~BIT(0)); + + return rht_dereference_bucket_rcu(p, tbl, hash); } -static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt, +static inline struct rhash_head *rht_ptr_exclusive( + struct rhash_lock_head *const *bkt) +{ + struct rhash_head __rcu *p = __rht_ptr(bkt); + + if (!p) + return RHT_NULLS_MARKER(bkt); + + return rcu_dereference_protected(p, 1); +} + +static inline void rht_assign_locked(struct rhash_lock_head **bkt, struct rhash_head *obj) { struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt; @@ -390,7 +396,7 @@ static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt, } static inline void rht_assign_unlock(struct bucket_table *tbl, - struct rhash_lock_head __rcu **bkt, + struct rhash_lock_head **bkt, struct rhash_head *obj) { struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt; @@ -587,7 +593,7 @@ static inline struct rhash_head *__rhashtable_lookup( .ht = ht, .key = key, }; - struct rhash_lock_head __rcu * const *bkt; + struct rhash_lock_head *const *bkt; struct bucket_table *tbl; struct rhash_head *he; unsigned int hash; @@ -703,7 +709,7 @@ static inline void *__rhashtable_insert_fast( .ht = ht, .key = key, }; - struct rhash_lock_head __rcu **bkt; + struct rhash_lock_head **bkt; struct rhash_head __rcu **pprev; struct bucket_table *tbl; struct rhash_head *head; @@ -989,7 +995,7 @@ static inline int __rhashtable_remove_fast_one( struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) { - struct rhash_lock_head __rcu **bkt; + struct rhash_lock_head **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; unsigned int hash; @@ -1141,7 +1147,7 @@ static inline int __rhashtable_replace_fast( struct rhash_head *obj_old, struct rhash_head *obj_new, const struct rhashtable_params params) { - struct rhash_lock_head __rcu **bkt; + struct rhash_lock_head **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; unsigned int hash; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6d58fa8a65fd..2ee5e63195c0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1434,10 +1434,12 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy) struct ubuf_info *uarg = skb_zcopy(skb); if (uarg) { - if (uarg->callback == sock_zerocopy_callback) { + if (skb_zcopy_is_nouarg(skb)) { + /* no notification callback */ + } else if (uarg->callback == sock_zerocopy_callback) { uarg->zerocopy = uarg->zerocopy && zerocopy; sock_zerocopy_put(uarg); - } else if (!skb_zcopy_is_nouarg(skb)) { + } else { uarg->callback(uarg, zerocopy); } @@ -2691,7 +2693,8 @@ static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask) { if (likely(!skb_zcopy(skb))) return 0; - if (skb_uarg(skb)->callback == sock_zerocopy_callback) + if (!skb_zcopy_is_nouarg(skb) && + skb_uarg(skb)->callback == sock_zerocopy_callback) return 0; return skb_copy_ubufs(skb, gfp_mask); } diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 6200900434e1..a2df99f9b196 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -71,6 +71,8 @@ void flow_rule_match_eth_addrs(const struct flow_rule *rule, struct flow_match_eth_addrs *out); void flow_rule_match_vlan(const struct flow_rule *rule, struct flow_match_vlan *out); +void flow_rule_match_cvlan(const struct flow_rule *rule, + struct flow_match_vlan *out); void flow_rule_match_ipv4_addrs(const struct flow_rule *rule, struct flow_match_ipv4_addrs *out); void flow_rule_match_ipv6_addrs(const struct flow_rule *rule, diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 40105738e2f6..525f701653ca 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -167,7 +167,8 @@ struct fib6_info { dst_nocount:1, dst_nopolicy:1, dst_host:1, - unused:3; + fib6_destroying:1, + unused:2; struct fib6_nh fib6_nh; struct rcu_head rcu; diff --git a/include/net/sock.h b/include/net/sock.h index 4d208c0f9c14..0680fa988497 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1473,7 +1473,7 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) sock_set_flag(sk, SOCK_QUEUE_SHRUNK); sk->sk_wmem_queued -= skb->truesize; sk_mem_uncharge(sk, skb->truesize); - if (!sk->sk_tx_skb_cache) { + if (!sk->sk_tx_skb_cache && !skb_cloned(skb)) { skb_zcopy_clear(skb, true); sk->sk_tx_skb_cache = skb; return; diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index 9310652ca4f9..63ae4a39e58b 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -83,7 +83,7 @@ struct btf_type { * is the 32 bits arrangement: */ #define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24) -#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16) +#define BTF_INT_OFFSET(VAL) (((VAL) & 0x00ff0000) >> 16) #define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff) /* Attributes stored in the BTF_INT_ENCODING */ diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 191b79948424..1e525d70f833 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -164,6 +164,9 @@ static void dev_map_free(struct bpf_map *map) bpf_clear_redirect_map(map); synchronize_rcu(); + /* Make sure prior __dev_map_entry_free() have completed. */ + rcu_barrier(); + /* To ensure all pending flush operations have completed wait for flush * bitmap to indicate all flush_needed bits to be zero on _all_ cpus. * Because the above synchronize_rcu() ensures the map is disconnected diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 192d32e77db3..0f2708fde5f7 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -527,18 +527,30 @@ static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) return insn - insn_buf; } -static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) +static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map, + void *key, const bool mark) { struct htab_elem *l = __htab_map_lookup_elem(map, key); if (l) { - bpf_lru_node_set_ref(&l->lru_node); + if (mark) + bpf_lru_node_set_ref(&l->lru_node); return l->key + round_up(map->key_size, 8); } return NULL; } +static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) +{ + return __htab_lru_map_lookup_elem(map, key, true); +} + +static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key) +{ + return __htab_lru_map_lookup_elem(map, key, false); +} + static u32 htab_lru_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { @@ -1250,6 +1262,7 @@ const struct bpf_map_ops htab_lru_map_ops = { .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, .map_lookup_elem = htab_lru_map_lookup_elem, + .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys, .map_update_elem = htab_lru_map_update_elem, .map_delete_elem = htab_lru_map_delete_elem, .map_gen_lookup = htab_lru_map_gen_lookup, @@ -1281,7 +1294,6 @@ static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key) int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) { - struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l; void __percpu *pptr; int ret = -ENOENT; @@ -1297,8 +1309,9 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) l = __htab_map_lookup_elem(map, key); if (!l) goto out; - if (htab_is_lru(htab)) - bpf_lru_node_set_ref(&l->lru_node); + /* We do not mark LRU map element here in order to not mess up + * eviction heuristics when user space does a map walk. + */ pptr = htab_elem_get_ptr(l, map->key_size); for_each_possible_cpu(cpu) { bpf_long_memcpy(value + off, diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index bc53e5b20ddc..84a80b02db99 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -518,7 +518,7 @@ int bpf_obj_get_user(const char __user *pathname, int flags) static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type) { struct bpf_prog *prog; - int ret = inode_permission(inode, MAY_READ | MAY_WRITE); + int ret = inode_permission(inode, MAY_READ); if (ret) return ERR_PTR(ret); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ad3ccf82f31d..cb5440b02e82 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -808,7 +808,10 @@ static int map_lookup_elem(union bpf_attr *attr) err = map->ops->map_peek_elem(map, value); } else { rcu_read_lock(); - ptr = map->ops->map_lookup_elem(map, key); + if (map->ops->map_lookup_elem_sys_only) + ptr = map->ops->map_lookup_elem_sys_only(map, key); + else + ptr = map->ops->map_lookup_elem(map, key); if (IS_ERR(ptr)) { err = PTR_ERR(ptr); } else if (!ptr) { diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b496ffdf5f36..f92d6ad5e080 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1297,7 +1297,8 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, } #ifdef CONFIG_MODULES -int bpf_event_notify(struct notifier_block *nb, unsigned long op, void *module) +static int bpf_event_notify(struct notifier_block *nb, unsigned long op, + void *module) { struct bpf_trace_module *btm, *tmp; struct module *mod = module; @@ -1336,7 +1337,7 @@ static struct notifier_block bpf_module_nb = { .notifier_call = bpf_event_notify, }; -int __init bpf_event_init(void) +static int __init bpf_event_init(void) { register_module_notifier(&bpf_module_nb); return 0; diff --git a/lib/random32.c b/lib/random32.c index 4aaa76404d56..763b920a6206 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -171,9 +171,9 @@ static void prandom_seed_early(struct rnd_state *state, u32 seed, /** * prandom_seed - add entropy to pseudo random number generator - * @seed: seed value + * @entropy: entropy value * - * Add some additional seeding to the prandom pool. + * Add some additional entropy to the prandom pool. */ void prandom_seed(u32 entropy) { diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 6529fe1b45c1..935ec80f213f 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -34,7 +34,7 @@ union nested_table { union nested_table __rcu *table; - struct rhash_lock_head __rcu *bucket; + struct rhash_lock_head *bucket; }; static u32 head_hashfn(struct rhashtable *ht, @@ -131,7 +131,7 @@ static union nested_table *nested_table_alloc(struct rhashtable *ht, INIT_RHT_NULLS_HEAD(ntbl[i].bucket); } - if (cmpxchg(prev, NULL, ntbl) == NULL) + if (cmpxchg((union nested_table **)prev, NULL, ntbl) == NULL) return ntbl; /* Raced with another thread. */ kfree(ntbl); @@ -216,7 +216,7 @@ static struct bucket_table *rhashtable_last_table(struct rhashtable *ht, } static int rhashtable_rehash_one(struct rhashtable *ht, - struct rhash_lock_head __rcu **bkt, + struct rhash_lock_head **bkt, unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); @@ -269,7 +269,7 @@ static int rhashtable_rehash_chain(struct rhashtable *ht, unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); - struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash); + struct rhash_lock_head **bkt = rht_bucket_var(old_tbl, old_hash); int err; if (!bkt) @@ -296,7 +296,8 @@ static int rhashtable_rehash_attach(struct rhashtable *ht, * rcu_assign_pointer(). */ - if (cmpxchg(&old_tbl->future_tbl, NULL, new_tbl) != NULL) + if (cmpxchg((struct bucket_table **)&old_tbl->future_tbl, NULL, + new_tbl) != NULL) return -EEXIST; return 0; @@ -478,7 +479,7 @@ static int rhashtable_insert_rehash(struct rhashtable *ht, } static void *rhashtable_lookup_one(struct rhashtable *ht, - struct rhash_lock_head __rcu **bkt, + struct rhash_lock_head **bkt, struct bucket_table *tbl, unsigned int hash, const void *key, struct rhash_head *obj) { @@ -529,7 +530,7 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, } static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, - struct rhash_lock_head __rcu **bkt, + struct rhash_lock_head **bkt, struct bucket_table *tbl, unsigned int hash, struct rhash_head *obj, @@ -584,7 +585,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, { struct bucket_table *new_tbl; struct bucket_table *tbl; - struct rhash_lock_head __rcu **bkt; + struct rhash_lock_head **bkt; unsigned int hash; void *data; @@ -1166,8 +1167,8 @@ void rhashtable_destroy(struct rhashtable *ht) } EXPORT_SYMBOL_GPL(rhashtable_destroy); -struct rhash_lock_head __rcu **__rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash) +struct rhash_lock_head **__rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); unsigned int index = hash & ((1 << tbl->nest) - 1); @@ -1195,10 +1196,10 @@ struct rhash_lock_head __rcu **__rht_bucket_nested(const struct bucket_table *tb } EXPORT_SYMBOL_GPL(__rht_bucket_nested); -struct rhash_lock_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash) +struct rhash_lock_head **rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash) { - static struct rhash_lock_head __rcu *rhnull; + static struct rhash_lock_head *rhnull; if (!rhnull) INIT_RHT_NULLS_HEAD(rhnull); @@ -1206,9 +1207,9 @@ struct rhash_lock_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, } EXPORT_SYMBOL_GPL(rht_bucket_nested); -struct rhash_lock_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, - struct bucket_table *tbl, - unsigned int hash) +struct rhash_lock_head **rht_bucket_nested_insert(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned int hash) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); unsigned int index = hash & ((1 << tbl->nest) - 1); diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c index 7aae0b56829e..cce839bf49f7 100644 --- a/net/caif/cfdbgl.c +++ b/net/caif/cfdbgl.c @@ -26,7 +26,7 @@ struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info) cfsrvl_init(dbg, channel_id, dev_info, false); dbg->layer.receive = cfdbgl_receive; dbg->layer.transmit = cfdbgl_transmit; - snprintf(dbg->layer.name, CAIF_LAYER_NAME_SZ - 1, "dbg%d", channel_id); + snprintf(dbg->layer.name, CAIF_LAYER_NAME_SZ, "dbg%d", channel_id); return &dbg->layer; } diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c index 3bdddb32d55a..58fdb99a390f 100644 --- a/net/caif/cfdgml.c +++ b/net/caif/cfdgml.c @@ -33,8 +33,7 @@ struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info) cfsrvl_init(dgm, channel_id, dev_info, true); dgm->layer.receive = cfdgml_receive; dgm->layer.transmit = cfdgml_transmit; - snprintf(dgm->layer.name, CAIF_LAYER_NAME_SZ - 1, "dgm%d", channel_id); - dgm->layer.name[CAIF_LAYER_NAME_SZ - 1] = '\0'; + snprintf(dgm->layer.name, CAIF_LAYER_NAME_SZ, "dgm%d", channel_id); return &dgm->layer; } diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c index 1728fa4471cf..be7c43a92ead 100644 --- a/net/caif/cfutill.c +++ b/net/caif/cfutill.c @@ -33,7 +33,7 @@ struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info) cfsrvl_init(util, channel_id, dev_info, true); util->layer.receive = cfutill_receive; util->layer.transmit = cfutill_transmit; - snprintf(util->layer.name, CAIF_LAYER_NAME_SZ - 1, "util1"); + snprintf(util->layer.name, CAIF_LAYER_NAME_SZ, "util1"); return &util->layer; } diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c index 262224581efa..35dd3a600dd1 100644 --- a/net/caif/cfveil.c +++ b/net/caif/cfveil.c @@ -32,7 +32,7 @@ struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info) cfsrvl_init(vei, channel_id, dev_info, true); vei->layer.receive = cfvei_receive; vei->layer.transmit = cfvei_transmit; - snprintf(vei->layer.name, CAIF_LAYER_NAME_SZ - 1, "vei%d", channel_id); + snprintf(vei->layer.name, CAIF_LAYER_NAME_SZ, "vei%d", channel_id); return &vei->layer; } diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c index b3b110e8a350..73615e3b3b58 100644 --- a/net/caif/cfvidl.c +++ b/net/caif/cfvidl.c @@ -29,7 +29,7 @@ struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info) cfsrvl_init(vid, channel_id, dev_info, false); vid->layer.receive = cfvidl_receive; vid->layer.transmit = cfvidl_transmit; - snprintf(vid->layer.name, CAIF_LAYER_NAME_SZ - 1, "vid1"); + snprintf(vid->layer.name, CAIF_LAYER_NAME_SZ, "vid1"); return &vid->layer; } diff --git a/net/core/dev.c b/net/core/dev.c index 108ac8137b9b..b6b8505cfb3e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8927,7 +8927,7 @@ static void netdev_wait_allrefs(struct net_device *dev) refcnt = netdev_refcnt_read(dev); - if (time_after(jiffies, warning_time + 10 * HZ)) { + if (refcnt && time_after(jiffies, warning_time + 10 * HZ)) { pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", dev->name, refcnt); warning_time = jiffies; diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index c3a00eac4804..5ce7d47a960e 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -54,6 +54,13 @@ void flow_rule_match_vlan(const struct flow_rule *rule, } EXPORT_SYMBOL(flow_rule_match_vlan); +void flow_rule_match_cvlan(const struct flow_rule *rule, + struct flow_match_vlan *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_CVLAN, out); +} +EXPORT_SYMBOL(flow_rule_match_cvlan); + void flow_rule_match_ipv4_addrs(const struct flow_rule *rule, struct flow_match_ipv4_addrs *out) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2bd12afb9297..adcc045952c2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1496,14 +1496,15 @@ static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev) return ret; } -static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev) +static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev, + bool force) { int ifindex = dev_get_iflink(dev); - if (dev->ifindex == ifindex) - return 0; + if (force || dev->ifindex != ifindex) + return nla_put_u32(skb, IFLA_LINK, ifindex); - return nla_put_u32(skb, IFLA_LINK, ifindex); + return 0; } static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb, @@ -1520,6 +1521,8 @@ static int rtnl_fill_link_netnsid(struct sk_buff *skb, const struct net_device *dev, struct net *src_net) { + bool put_iflink = false; + if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) { struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); @@ -1528,10 +1531,12 @@ static int rtnl_fill_link_netnsid(struct sk_buff *skb, if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) return -EMSGSIZE; + + put_iflink = true; } } - return 0; + return nla_put_iflink(skb, dev, put_iflink); } static int rtnl_fill_link_af(struct sk_buff *skb, @@ -1617,7 +1622,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, #ifdef CONFIG_RPS nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || #endif - nla_put_iflink(skb, dev) || put_master_ifindex(skb, dev) || nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || (dev->qdisc && diff --git a/net/core/skmsg.c b/net/core/skmsg.c index cc94d921476c..93bffaad2135 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -411,6 +411,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) sk_mem_charge(sk, skb->len); copied = skb->len; msg->sg.start = 0; + msg->sg.size = copied; msg->sg.end = num_sge == MAX_MSG_FRAGS ? 0 : num_sge; msg->skb = skb; @@ -554,8 +555,10 @@ static void sk_psock_destroy_deferred(struct work_struct *gc) struct sk_psock *psock = container_of(gc, struct sk_psock, gc); /* No sk_callback_lock since already detached. */ - strp_stop(&psock->parser.strp); - strp_done(&psock->parser.strp); + + /* Parser has been stopped */ + if (psock->progs.skb_parser) + strp_done(&psock->parser.strp); cancel_work_sync(&psock->work); diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c index 15427163a041..0480918bfc7c 100644 --- a/net/ipv4/bpfilter/sockopt.c +++ b/net/ipv4/bpfilter/sockopt.c @@ -30,13 +30,11 @@ static int bpfilter_mbox_request(struct sock *sk, int optname, mutex_lock(&bpfilter_ops.lock); if (!bpfilter_ops.sockopt) { mutex_unlock(&bpfilter_ops.lock); - err = request_module("bpfilter"); + request_module("bpfilter"); mutex_lock(&bpfilter_ops.lock); - if (err) - goto out; if (!bpfilter_ops.sockopt) { - err = -ECHILD; + err = -ENOPROTOOPT; goto out; } } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 7ccb5f87f70b..834be7daeb32 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -1113,7 +1113,7 @@ static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, __u16 srcp = ntohs(inet->inet_sport); seq_printf(f, "%5d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d", + " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u", bucket, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index dc91c27bb788..0e482f07b37f 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -1076,7 +1076,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) srcp = inet->inet_num; seq_printf(seq, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n", + " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n", i, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1fa15beb8380..53d61ca3ac4b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -855,7 +855,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, if (likely(!size)) { skb = sk->sk_tx_skb_cache; - if (skb && !skb_cloned(skb)) { + if (skb) { skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); sk->sk_tx_skb_cache = NULL; pskb_trim(skb, 0); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 1bb7321a256d..3d1e15401384 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -27,7 +27,10 @@ static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock, int flags, long timeo, int *err) { DEFINE_WAIT_FUNC(wait, woken_wake_function); - int ret; + int ret = 0; + + if (!timeo) + return ret; add_wait_queue(sk_sleep(sk), &wait); sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); @@ -528,8 +531,6 @@ static void tcp_bpf_remove(struct sock *sk, struct sk_psock *psock) { struct sk_psock_link *link; - sk_psock_cork_free(psock); - __sk_psock_purge_ingress_msg(psock); while ((link = sk_psock_link_pop(psock))) { sk_psock_unlink(sk, link); sk_psock_free_link(link); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 20f6fac5882e..c61edd023b35 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6024,6 +6024,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) { tcp_try_undo_loss(sk, false); + + /* Reset rtx states to prevent spurious retransmits_timed_out() */ + tcp_sk(sk)->retrans_stamp = 0; inet_csk(sk)->icsk_retransmits = 0; /* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 3c58ba02af7d..8fb250ed53d4 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2883,7 +2883,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, __u16 srcp = ntohs(inet->inet_sport); seq_printf(f, "%5d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d", + " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u", bucket, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), udp_rqueue_get(sp), diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index ee4a4e54d016..f07fb24f4ba1 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -1034,7 +1034,7 @@ void __ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, src = &sp->sk_v6_rcv_saddr; seq_printf(seq, "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n", + "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n", bucket, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 08e0390e001c..008421b550c6 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -904,6 +904,12 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i, { int cpu; + /* Make sure rt6_make_pcpu_route() wont add other percpu routes + * while we are cleaning them here. + */ + f6i->fib6_destroying = 1; + mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */ + /* release the reference to this fib entry from * all of its cached pcpu routes */ @@ -927,6 +933,9 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, { struct fib6_table *table = rt->fib6_table; + if (rt->rt6i_pcpu) + fib6_drop_pcpu_from(rt, table); + if (refcount_read(&rt->fib6_ref) != 1) { /* This route is used as dummy address holder in some split * nodes. It is not leaked, but it still holds other resources, @@ -948,9 +957,6 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, fn = rcu_dereference_protected(fn->parent, lockdep_is_held(&table->tb6_lock)); } - - if (rt->rt6i_pcpu) - fib6_drop_pcpu_from(rt, table); } } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 23a20d62daac..7a014ca877ed 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -111,8 +111,8 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, int iif, int type, u32 portid, u32 seq, unsigned int flags); static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res, - struct in6_addr *daddr, - struct in6_addr *saddr); + const struct in6_addr *daddr, + const struct in6_addr *saddr); #ifdef CONFIG_IPV6_ROUTE_INFO static struct fib6_info *rt6_add_route_info(struct net *net, @@ -1295,6 +1295,13 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net, prev = cmpxchg(p, NULL, pcpu_rt); BUG_ON(prev); + if (res->f6i->fib6_destroying) { + struct fib6_info *from; + + from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL); + fib6_info_release(from); + } + return pcpu_rt; } @@ -1566,31 +1573,44 @@ void rt6_flush_exceptions(struct fib6_info *rt) * Caller has to hold rcu_read_lock() */ static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res, - struct in6_addr *daddr, - struct in6_addr *saddr) + const struct in6_addr *daddr, + const struct in6_addr *saddr) { + const struct in6_addr *src_key = NULL; struct rt6_exception_bucket *bucket; - struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; struct rt6_info *ret = NULL; - bucket = rcu_dereference(res->f6i->rt6i_exception_bucket); - #ifdef CONFIG_IPV6_SUBTREES /* fib6i_src.plen != 0 indicates f6i is in subtree * and exception table is indexed by a hash of * both fib6_dst and fib6_src. - * Otherwise, the exception table is indexed by - * a hash of only fib6_dst. + * However, the src addr used to create the hash + * might not be exactly the passed in saddr which + * is a /128 addr from the flow. + * So we need to use f6i->fib6_src to redo lookup + * if the passed in saddr does not find anything. + * (See the logic in ip6_rt_cache_alloc() on how + * rt->rt6i_src is updated.) */ if (res->f6i->fib6_src.plen) src_key = saddr; +find_ex: #endif + bucket = rcu_dereference(res->f6i->rt6i_exception_bucket); rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) ret = rt6_ex->rt6i; +#ifdef CONFIG_IPV6_SUBTREES + /* Use fib6_src as src_key and redo lookup */ + if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) { + src_key = &res->f6i->fib6_src.addr; + goto find_ex; + } +#endif + return ret; } @@ -2665,12 +2685,10 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res, const struct in6_addr *daddr, const struct in6_addr *saddr) { - struct rt6_exception_bucket *bucket; const struct fib6_nh *nh = res->nh; struct fib6_info *f6i = res->f6i; - const struct in6_addr *src_key; - struct rt6_exception *rt6_ex; struct inet6_dev *idev; + struct rt6_info *rt; u32 mtu = 0; if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) { @@ -2679,18 +2697,10 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res, goto out; } - src_key = NULL; -#ifdef CONFIG_IPV6_SUBTREES - if (f6i->fib6_src.plen) - src_key = saddr; -#endif - - bucket = rcu_dereference(f6i->rt6i_exception_bucket); - rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); - if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) - mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU); - - if (likely(!mtu)) { + rt = rt6_find_cached_rt(res, daddr, saddr); + if (unlikely(rt)) { + mtu = dst_metric_raw(&rt->dst, RTAX_MTU); + } else { struct net_device *dev = nh->fib_nh_dev; mtu = IPV6_MIN_MTU; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 216ab915dd54..718a97d5f1fd 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2642,7 +2642,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v) struct sock *s = v; struct netlink_sock *nlk = nlk_sk(s); - seq_printf(seq, "%pK %-3d %-10u %08x %-8d %-8d %-5d %-8d %-8d %-8lu\n", + seq_printf(seq, "%pK %-3d %-10u %08x %-8d %-8d %-5d %-8d %-8u %-8lu\n", s, s->sk_protocol, nlk->portid, diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 30187990257f..2567af2fbd6f 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -607,7 +607,7 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v) struct pn_sock *pn = pn_sk(sk); seq_printf(seq, "%2d %04X:%04X:%02X %02X %08X:%08X %5d %lu " - "%d %pK %d", + "%d %pK %u", sk->sk_protocol, pn->sobject, pn->dobject, pn->resource, sk->sk_state, sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk), diff --git a/net/socket.c b/net/socket.c index 472fbefa5d9b..72372dc5dd70 100644 --- a/net/socket.c +++ b/net/socket.c @@ -645,14 +645,6 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) } EXPORT_SYMBOL(__sock_tx_timestamp); -/** - * sock_sendmsg - send a message through @sock - * @sock: socket - * @msg: message to send - * - * Sends @msg through @sock, passing through LSM. - * Returns the number of bytes sent, or an error code. - */ INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *, size_t)); static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) @@ -663,6 +655,14 @@ static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) return ret; } +/** + * sock_sendmsg - send a message through @sock + * @sock: socket + * @msg: message to send + * + * Sends @msg through @sock, passing through LSM. + * Returns the number of bytes sent, or an error code. + */ int sock_sendmsg(struct socket *sock, struct msghdr *msg) { int err = security_socket_sendmsg(sock, msg, @@ -875,15 +875,6 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, } EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); -/** - * sock_recvmsg - receive a message from @sock - * @sock: socket - * @msg: message to receive - * @flags: message flags - * - * Receives @msg from @sock, passing through LSM. Returns the total number - * of bytes received, or an error. - */ INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *, size_t , int )); static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, @@ -893,6 +884,15 @@ static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, msg_data_left(msg), flags); } +/** + * sock_recvmsg - receive a message from @sock + * @sock: socket + * @msg: message to receive + * @flags: message flags + * + * Receives @msg from @sock, passing through LSM. Returns the total number + * of bytes received, or an error. + */ int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags) { int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags); diff --git a/net/tipc/core.c b/net/tipc/core.c index 27cccd101ef6..ddd2e0f67c07 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -131,10 +131,6 @@ static int __init tipc_init(void) if (err) goto out_netlink_compat; - err = tipc_socket_init(); - if (err) - goto out_socket; - err = tipc_register_sysctl(); if (err) goto out_sysctl; @@ -143,6 +139,10 @@ static int __init tipc_init(void) if (err) goto out_pernet; + err = tipc_socket_init(); + if (err) + goto out_socket; + err = tipc_bearer_setup(); if (err) goto out_bearer; @@ -150,12 +150,12 @@ static int __init tipc_init(void) pr_info("Started in single node mode\n"); return 0; out_bearer: + tipc_socket_stop(); +out_socket: unregister_pernet_subsys(&tipc_net_ops); out_pernet: tipc_unregister_sysctl(); out_sysctl: - tipc_socket_stop(); -out_socket: tipc_netlink_compat_stop(); out_netlink_compat: tipc_netlink_stop(); @@ -167,10 +167,10 @@ static int __init tipc_init(void) static void __exit tipc_exit(void) { tipc_bearer_cleanup(); + tipc_socket_stop(); unregister_pernet_subsys(&tipc_net_ops); tipc_netlink_stop(); tipc_netlink_compat_stop(); - tipc_socket_stop(); tipc_unregister_sysctl(); pr_info("Deactivated\n"); diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index a827547aa102..982a8dc49e03 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -35,6 +35,9 @@ /* The MTU is 16KB per the host side's design */ #define HVS_MTU_SIZE (1024 * 16) +/* How long to wait for graceful shutdown of a connection */ +#define HVS_CLOSE_TIMEOUT (8 * HZ) + struct vmpipe_proto_header { u32 pkt_type; u32 data_size; @@ -305,19 +308,32 @@ static void hvs_channel_cb(void *ctx) sk->sk_write_space(sk); } +static void hvs_do_close_lock_held(struct vsock_sock *vsk, + bool cancel_timeout) +{ + struct sock *sk = sk_vsock(vsk); + + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + if (vsock_stream_has_data(vsk) <= 0) + sk->sk_state = TCP_CLOSING; + sk->sk_state_change(sk); + if (vsk->close_work_scheduled && + (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { + vsk->close_work_scheduled = false; + vsock_remove_sock(vsk); + + /* Release the reference taken while scheduling the timeout */ + sock_put(sk); + } +} + static void hvs_close_connection(struct vmbus_channel *chan) { struct sock *sk = get_per_channel_state(chan); - struct vsock_sock *vsk = vsock_sk(sk); lock_sock(sk); - - sk->sk_state = TCP_CLOSE; - sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN; - - sk->sk_state_change(sk); - + hvs_do_close_lock_held(vsock_sk(sk), true); release_sock(sk); } @@ -452,50 +468,80 @@ static int hvs_connect(struct vsock_sock *vsk) return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id); } +static void hvs_shutdown_lock_held(struct hvsock *hvs, int mode) +{ + struct vmpipe_proto_header hdr; + + if (hvs->fin_sent || !hvs->chan) + return; + + /* It can't fail: see hvs_channel_writable_bytes(). */ + (void)hvs_send_data(hvs->chan, (struct hvs_send_buf *)&hdr, 0); + hvs->fin_sent = true; +} + static int hvs_shutdown(struct vsock_sock *vsk, int mode) { struct sock *sk = sk_vsock(vsk); - struct vmpipe_proto_header hdr; - struct hvs_send_buf *send_buf; - struct hvsock *hvs; if (!(mode & SEND_SHUTDOWN)) return 0; lock_sock(sk); - - hvs = vsk->trans; - if (hvs->fin_sent) - goto out; - - send_buf = (struct hvs_send_buf *)&hdr; - - /* It can't fail: see hvs_channel_writable_bytes(). */ - (void)hvs_send_data(hvs->chan, send_buf, 0); - - hvs->fin_sent = true; -out: + hvs_shutdown_lock_held(vsk->trans, mode); release_sock(sk); return 0; } +static void hvs_close_timeout(struct work_struct *work) +{ + struct vsock_sock *vsk = + container_of(work, struct vsock_sock, close_work.work); + struct sock *sk = sk_vsock(vsk); + + sock_hold(sk); + lock_sock(sk); + if (!sock_flag(sk, SOCK_DONE)) + hvs_do_close_lock_held(vsk, false); + + vsk->close_work_scheduled = false; + release_sock(sk); + sock_put(sk); +} + +/* Returns true, if it is safe to remove socket; false otherwise */ +static bool hvs_close_lock_held(struct vsock_sock *vsk) +{ + struct sock *sk = sk_vsock(vsk); + + if (!(sk->sk_state == TCP_ESTABLISHED || + sk->sk_state == TCP_CLOSING)) + return true; + + if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK) + hvs_shutdown_lock_held(vsk->trans, SHUTDOWN_MASK); + + if (sock_flag(sk, SOCK_DONE)) + return true; + + /* This reference will be dropped by the delayed close routine */ + sock_hold(sk); + INIT_DELAYED_WORK(&vsk->close_work, hvs_close_timeout); + vsk->close_work_scheduled = true; + schedule_delayed_work(&vsk->close_work, HVS_CLOSE_TIMEOUT); + return false; +} + static void hvs_release(struct vsock_sock *vsk) { struct sock *sk = sk_vsock(vsk); - struct hvsock *hvs = vsk->trans; - struct vmbus_channel *chan; + bool remove_sock; lock_sock(sk); - - sk->sk_state = TCP_CLOSING; - vsock_remove_sock(vsk); - + remove_sock = hvs_close_lock_held(vsk); release_sock(sk); - - chan = hvs->chan; - if (chan) - hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN); - + if (remove_sock) + vsock_remove_sock(vsk); } static void hvs_destruct(struct vsock_sock *vsk) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 15eb5d3d4750..96ab344f17bb 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -702,28 +702,27 @@ static int __init virtio_vsock_init(void) if (!virtio_vsock_workqueue) return -ENOMEM; - ret = register_virtio_driver(&virtio_vsock_driver); + ret = vsock_core_init(&virtio_transport.transport); if (ret) goto out_wq; - ret = vsock_core_init(&virtio_transport.transport); + ret = register_virtio_driver(&virtio_vsock_driver); if (ret) - goto out_vdr; + goto out_vci; return 0; -out_vdr: - unregister_virtio_driver(&virtio_vsock_driver); +out_vci: + vsock_core_exit(); out_wq: destroy_workqueue(virtio_vsock_workqueue); return ret; - } static void __exit virtio_vsock_exit(void) { - vsock_core_exit(); unregister_virtio_driver(&virtio_vsock_driver); + vsock_core_exit(); destroy_workqueue(virtio_vsock_workqueue); } diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 602715fc9a75..f3f3d06cb6d8 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -786,12 +786,19 @@ static bool virtio_transport_close(struct vsock_sock *vsk) void virtio_transport_release(struct vsock_sock *vsk) { + struct virtio_vsock_sock *vvs = vsk->trans; + struct virtio_vsock_pkt *pkt, *tmp; struct sock *sk = &vsk->sk; bool remove_sock = true; lock_sock(sk); if (sk->sk_type == SOCK_STREAM) remove_sock = virtio_transport_close(vsk); + + list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) { + list_del(&pkt->list); + virtio_transport_free_pkt(pkt); + } release_sock(sk); if (remove_sock) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 410233c5681e..7a43ae6b2a44 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3264,7 +3264,8 @@ static void decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) { const struct iphdr *iph = ip_hdr(skb); - u8 *xprth = skb_network_header(skb) + iph->ihl * 4; + int ihl = iph->ihl; + u8 *xprth = skb_network_header(skb) + ihl * 4; struct flowi4 *fl4 = &fl->u.ip4; int oif = 0; @@ -3275,6 +3276,11 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) fl4->flowi4_mark = skb->mark; fl4->flowi4_oif = reverse ? skb->skb_iif : oif; + fl4->flowi4_proto = iph->protocol; + fl4->daddr = reverse ? iph->saddr : iph->daddr; + fl4->saddr = reverse ? iph->daddr : iph->saddr; + fl4->flowi4_tos = iph->tos; + if (!ip_is_fragment(iph)) { switch (iph->protocol) { case IPPROTO_UDP: @@ -3286,7 +3292,7 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ports; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; ports = (__be16 *)xprth; fl4->fl4_sport = ports[!!reverse]; @@ -3298,7 +3304,7 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) pskb_may_pull(skb, xprth + 2 - skb->data)) { u8 *icmp; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; icmp = xprth; fl4->fl4_icmp_type = icmp[0]; @@ -3310,7 +3316,7 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) pskb_may_pull(skb, xprth + 4 - skb->data)) { __be32 *ehdr; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; ehdr = (__be32 *)xprth; fl4->fl4_ipsec_spi = ehdr[0]; @@ -3321,7 +3327,7 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) pskb_may_pull(skb, xprth + 8 - skb->data)) { __be32 *ah_hdr; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; ah_hdr = (__be32 *)xprth; fl4->fl4_ipsec_spi = ah_hdr[1]; @@ -3332,7 +3338,7 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ipcomp_hdr; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; ipcomp_hdr = (__be16 *)xprth; fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); @@ -3344,7 +3350,7 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) __be16 *greflags; __be32 *gre_hdr; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; greflags = (__be16 *)xprth; gre_hdr = (__be32 *)xprth; @@ -3360,10 +3366,6 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) break; } } - fl4->flowi4_proto = iph->protocol; - fl4->daddr = reverse ? iph->saddr : iph->daddr; - fl4->saddr = reverse ? iph->daddr : iph->saddr; - fl4->flowi4_tos = iph->tos; } #if IS_ENABLED(CONFIG_IPV6) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index a675ce11a573..e2de6c4dce90 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -138,11 +138,6 @@ cc-disable-warning = $(call try-run,\ # Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1) cc-ifversion = $(shell [ $(CONFIG_GCC_VERSION)0 $(1) $(2)000 ] && echo $(3) || echo $(4)) -# cc-ldoption -# Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both) -cc-ldoption = $(call try-run,\ - $(CC) $(1) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2)) - # ld-option # Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y) ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3)) diff --git a/scripts/modules-check.sh b/scripts/modules-check.sh index 2f659530e1ec..39e8cb36ba19 100755 --- a/scripts/modules-check.sh +++ b/scripts/modules-check.sh @@ -6,10 +6,10 @@ set -e # Check uniqueness of module names check_same_name_modules() { - for m in $(sed 's:.*/::' modules.order modules.builtin | sort | uniq -d) + for m in $(sed 's:.*/::' modules.order | sort | uniq -d) do - echo "warning: same basename if the following are built as modules:" >&2 - sed "/\/$m/!d;s:^kernel/: :" modules.order modules.builtin >&2 + echo "warning: same module names found:" >&2 + sed -n "/\/$m/s:^kernel/: :p" modules.order >&2 done } diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 97c3478ee6e7..7b7ac0f6cec9 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -35,6 +35,7 @@ #include #include #include +#include #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE @@ -102,6 +103,9 @@ struct kvm_regs { #define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ #define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */ #define KVM_ARM_VCPU_PMU_V3 3 /* Support guest PMUv3 */ +#define KVM_ARM_VCPU_SVE 4 /* enable SVE for this CPU */ +#define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ +#define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ struct kvm_vcpu_init { __u32 target; @@ -226,6 +230,45 @@ struct kvm_vcpu_events { KVM_REG_ARM_FW | ((r) & 0xffff)) #define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0) +/* SVE registers */ +#define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT) + +/* Z- and P-regs occupy blocks at the following offsets within this range: */ +#define KVM_REG_ARM64_SVE_ZREG_BASE 0 +#define KVM_REG_ARM64_SVE_PREG_BASE 0x400 +#define KVM_REG_ARM64_SVE_FFR_BASE 0x600 + +#define KVM_ARM64_SVE_NUM_ZREGS __SVE_NUM_ZREGS +#define KVM_ARM64_SVE_NUM_PREGS __SVE_NUM_PREGS + +#define KVM_ARM64_SVE_MAX_SLICES 32 + +#define KVM_REG_ARM64_SVE_ZREG(n, i) \ + (KVM_REG_ARM64 | KVM_REG_ARM64_SVE | KVM_REG_ARM64_SVE_ZREG_BASE | \ + KVM_REG_SIZE_U2048 | \ + (((n) & (KVM_ARM64_SVE_NUM_ZREGS - 1)) << 5) | \ + ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1))) + +#define KVM_REG_ARM64_SVE_PREG(n, i) \ + (KVM_REG_ARM64 | KVM_REG_ARM64_SVE | KVM_REG_ARM64_SVE_PREG_BASE | \ + KVM_REG_SIZE_U256 | \ + (((n) & (KVM_ARM64_SVE_NUM_PREGS - 1)) << 5) | \ + ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1))) + +#define KVM_REG_ARM64_SVE_FFR(i) \ + (KVM_REG_ARM64 | KVM_REG_ARM64_SVE | KVM_REG_ARM64_SVE_FFR_BASE | \ + KVM_REG_SIZE_U256 | \ + ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1))) + +#define KVM_ARM64_SVE_VQ_MIN __SVE_VQ_MIN +#define KVM_ARM64_SVE_VQ_MAX __SVE_VQ_MAX + +/* Vector lengths pseudo-register: */ +#define KVM_REG_ARM64_SVE_VLS (KVM_REG_ARM64 | KVM_REG_ARM64_SVE | \ + KVM_REG_SIZE_U512 | 0xffff) +#define KVM_ARM64_SVE_VLS_WORDS \ + ((KVM_ARM64_SVE_VQ_MAX - KVM_ARM64_SVE_VQ_MIN) / 64 + 1) + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 26ca425f4c2c..b0f72dea8b11 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -482,6 +482,8 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */ #define KVM_REG_PPC_ICP_PPRI_MASK 0xff +#define KVM_REG_PPC_VP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x8d) + /* Device control API: PPC-specific devices */ #define KVM_DEV_MPIC_GRP_MISC 1 #define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */ @@ -677,4 +679,48 @@ struct kvm_ppc_cpu_char { #define KVM_XICS_PRESENTED (1ULL << 43) #define KVM_XICS_QUEUED (1ULL << 44) +/* POWER9 XIVE Native Interrupt Controller */ +#define KVM_DEV_XIVE_GRP_CTRL 1 +#define KVM_DEV_XIVE_RESET 1 +#define KVM_DEV_XIVE_EQ_SYNC 2 +#define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */ +#define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */ +#define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit EQ identifier */ +#define KVM_DEV_XIVE_GRP_SOURCE_SYNC 5 /* 64-bit source identifier */ + +/* Layout of 64-bit XIVE source attribute values */ +#define KVM_XIVE_LEVEL_SENSITIVE (1ULL << 0) +#define KVM_XIVE_LEVEL_ASSERTED (1ULL << 1) + +/* Layout of 64-bit XIVE source configuration attribute values */ +#define KVM_XIVE_SOURCE_PRIORITY_SHIFT 0 +#define KVM_XIVE_SOURCE_PRIORITY_MASK 0x7 +#define KVM_XIVE_SOURCE_SERVER_SHIFT 3 +#define KVM_XIVE_SOURCE_SERVER_MASK 0xfffffff8ULL +#define KVM_XIVE_SOURCE_MASKED_SHIFT 32 +#define KVM_XIVE_SOURCE_MASKED_MASK 0x100000000ULL +#define KVM_XIVE_SOURCE_EISN_SHIFT 33 +#define KVM_XIVE_SOURCE_EISN_MASK 0xfffffffe00000000ULL + +/* Layout of 64-bit EQ identifier */ +#define KVM_XIVE_EQ_PRIORITY_SHIFT 0 +#define KVM_XIVE_EQ_PRIORITY_MASK 0x7 +#define KVM_XIVE_EQ_SERVER_SHIFT 3 +#define KVM_XIVE_EQ_SERVER_MASK 0xfffffff8ULL + +/* Layout of EQ configuration values (64 bytes) */ +struct kvm_ppc_xive_eq { + __u32 flags; + __u32 qshift; + __u64 qaddr; + __u32 qtoggle; + __u32 qindex; + __u8 pad[40]; +}; + +#define KVM_XIVE_EQ_ALWAYS_NOTIFY 0x00000001 + +#define KVM_XIVE_TIMA_PAGE_OFFSET 0 +#define KVM_XIVE_ESB_PAGE_OFFSET 4 + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index 09652eabe769..47104e5b47fd 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -153,7 +153,9 @@ struct kvm_s390_vm_cpu_subfunc { __u8 ppno[16]; /* with MSA5 */ __u8 kma[16]; /* with MSA8 */ __u8 kdsa[16]; /* with MSA9 */ - __u8 reserved[1792]; + __u8 sortl[32]; /* with STFLE.150 */ + __u8 dfltcc[32]; /* with STFLE.151 */ + __u8 reserved[1728]; }; /* kvm attributes for crypto */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 981ff9479648..75f27ee2c263 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -344,6 +344,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ @@ -382,5 +383,7 @@ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ #define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ +#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 58a2cd002a4b..7317438ecd9e 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -208,8 +208,8 @@ static int dump_btf_type(const struct btf *btf, __u32 id, break; } case BTF_KIND_FWD: { - const char *fwd_kind = BTF_INFO_KIND(t->info) ? "union" - : "struct"; + const char *fwd_kind = BTF_INFO_KFLAG(t->info) ? "union" + : "struct"; if (json_output) jsonw_string_field(w, "fwd_kind", fwd_kind); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index fc495b27f0fc..26336bad0442 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -879,6 +879,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) } } + set_max_rlimit(); + obj = __bpf_object__open_xattr(&attr, bpf_flags); if (IS_ERR_OR_NULL(obj)) { p_err("failed to open object file"); @@ -958,8 +960,6 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) goto err_close_obj; } - set_max_rlimit(); - err = bpf_object__load(obj); if (err) { p_err("failed to load object file"); diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index dee7292e1df6..a87904daf103 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -832,9 +832,21 @@ __SYSCALL(__NR_io_uring_setup, sys_io_uring_setup) __SYSCALL(__NR_io_uring_enter, sys_io_uring_enter) #define __NR_io_uring_register 427 __SYSCALL(__NR_io_uring_register, sys_io_uring_register) +#define __NR_open_tree 428 +__SYSCALL(__NR_open_tree, sys_open_tree) +#define __NR_move_mount 429 +__SYSCALL(__NR_move_mount, sys_move_mount) +#define __NR_fsopen 430 +__SYSCALL(__NR_fsopen, sys_fsopen) +#define __NR_fsconfig 431 +__SYSCALL(__NR_fsconfig, sys_fsconfig) +#define __NR_fsmount 432 +__SYSCALL(__NR_fsmount, sys_fsmount) +#define __NR_fspick 433 +__SYSCALL(__NR_fspick, sys_fspick) #undef __NR_syscalls -#define __NR_syscalls 428 +#define __NR_syscalls 434 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 300f336633f2..661d73f9a919 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -649,6 +649,7 @@ struct drm_gem_open { #define DRM_CAP_PAGE_FLIP_TARGET 0x11 #define DRM_CAP_CRTC_IN_VBLANK_EVENT 0x12 #define DRM_CAP_SYNCOBJ 0x13 +#define DRM_CAP_SYNCOBJ_TIMELINE 0x14 /** DRM_IOCTL_GET_CAP ioctl argument type */ struct drm_get_cap { @@ -735,8 +736,18 @@ struct drm_syncobj_handle { __u32 pad; }; +struct drm_syncobj_transfer { + __u32 src_handle; + __u32 dst_handle; + __u64 src_point; + __u64 dst_point; + __u32 flags; + __u32 pad; +}; + #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) /* wait for time point to become available */ struct drm_syncobj_wait { __u64 handles; /* absolute timeout */ @@ -747,12 +758,33 @@ struct drm_syncobj_wait { __u32 pad; }; +struct drm_syncobj_timeline_wait { + __u64 handles; + /* wait on specific timeline point for every handles*/ + __u64 points; + /* absolute timeout */ + __s64 timeout_nsec; + __u32 count_handles; + __u32 flags; + __u32 first_signaled; /* only valid when not waiting all */ + __u32 pad; +}; + + struct drm_syncobj_array { __u64 handles; __u32 count_handles; __u32 pad; }; +struct drm_syncobj_timeline_array { + __u64 handles; + __u64 points; + __u32 count_handles; + __u32 pad; +}; + + /* Query current scanout sequence number */ struct drm_crtc_get_sequence { __u32 crtc_id; /* requested crtc_id */ @@ -909,6 +941,11 @@ extern "C" { #define DRM_IOCTL_MODE_GET_LEASE DRM_IOWR(0xC8, struct drm_mode_get_lease) #define DRM_IOCTL_MODE_REVOKE_LEASE DRM_IOWR(0xC9, struct drm_mode_revoke_lease) +#define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait) +#define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct drm_syncobj_timeline_array) +#define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) + /** * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 397810fa2d33..3a73f5316766 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -62,6 +62,28 @@ extern "C" { #define I915_ERROR_UEVENT "ERROR" #define I915_RESET_UEVENT "RESET" +/* + * i915_user_extension: Base class for defining a chain of extensions + * + * Many interfaces need to grow over time. In most cases we can simply + * extend the struct and have userspace pass in more data. Another option, + * as demonstrated by Vulkan's approach to providing extensions for forward + * and backward compatibility, is to use a list of optional structs to + * provide those extra details. + * + * The key advantage to using an extension chain is that it allows us to + * redefine the interface more easily than an ever growing struct of + * increasing complexity, and for large parts of that interface to be + * entirely optional. The downside is more pointer chasing; chasing across + * the __user boundary with pointers encapsulated inside u64. + */ +struct i915_user_extension { + __u64 next_extension; + __u32 name; + __u32 flags; /* All undefined bits must be zero. */ + __u32 rsvd[4]; /* Reserved for future use; must be zero. */ +}; + /* * MOCS indexes used for GPU surfaces, defining the cacheability of the * surface data and the coherency for this data wrt. CPU vs. GPU accesses. @@ -99,9 +121,23 @@ enum drm_i915_gem_engine_class { I915_ENGINE_CLASS_VIDEO = 2, I915_ENGINE_CLASS_VIDEO_ENHANCE = 3, + /* should be kept compact */ + I915_ENGINE_CLASS_INVALID = -1 }; +/* + * There may be more than one engine fulfilling any role within the system. + * Each engine of a class is given a unique instance number and therefore + * any engine can be specified by its class:instance tuplet. APIs that allow + * access to any engine in the system will use struct i915_engine_class_instance + * for this identification. + */ +struct i915_engine_class_instance { + __u16 engine_class; /* see enum drm_i915_gem_engine_class */ + __u16 engine_instance; +}; + /** * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915 * @@ -319,6 +355,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_PERF_ADD_CONFIG 0x37 #define DRM_I915_PERF_REMOVE_CONFIG 0x38 #define DRM_I915_QUERY 0x39 +/* Must be kept compact -- no holes */ #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -367,6 +404,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey) #define DRM_IOCTL_I915_GEM_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT, struct drm_i915_gem_wait) #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create) +#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_ext) #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy) #define DRM_IOCTL_I915_REG_READ DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read) #define DRM_IOCTL_I915_GET_RESET_STATS DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats) @@ -476,6 +514,7 @@ typedef struct drm_i915_irq_wait { #define I915_SCHEDULER_CAP_ENABLED (1ul << 0) #define I915_SCHEDULER_CAP_PRIORITY (1ul << 1) #define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) +#define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3) #define I915_PARAM_HUC_STATUS 42 @@ -559,6 +598,8 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_MMAP_GTT_COHERENT 52 +/* Must be kept compact -- no holes and well documented */ + typedef struct drm_i915_getparam { __s32 param; /* @@ -574,6 +615,7 @@ typedef struct drm_i915_getparam { #define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY 2 #define I915_SETPARAM_ALLOW_BATCHBUFFER 3 #define I915_SETPARAM_NUM_USED_FENCES 4 +/* Must be kept compact -- no holes */ typedef struct drm_i915_setparam { int param; @@ -972,7 +1014,7 @@ struct drm_i915_gem_execbuffer2 { * struct drm_i915_gem_exec_fence *fences. */ __u64 cliprects_ptr; -#define I915_EXEC_RING_MASK (7<<0) +#define I915_EXEC_RING_MASK (0x3f) #define I915_EXEC_DEFAULT (0<<0) #define I915_EXEC_RENDER (1<<0) #define I915_EXEC_BSD (2<<0) @@ -1120,32 +1162,34 @@ struct drm_i915_gem_busy { * as busy may become idle before the ioctl is completed. * * Furthermore, if the object is busy, which engine is busy is only - * provided as a guide. There are race conditions which prevent the - * report of which engines are busy from being always accurate. - * However, the converse is not true. If the object is idle, the - * result of the ioctl, that all engines are idle, is accurate. + * provided as a guide and only indirectly by reporting its class + * (there may be more than one engine in each class). There are race + * conditions which prevent the report of which engines are busy from + * being always accurate. However, the converse is not true. If the + * object is idle, the result of the ioctl, that all engines are idle, + * is accurate. * * The returned dword is split into two fields to indicate both - * the engines on which the object is being read, and the - * engine on which it is currently being written (if any). + * the engine classess on which the object is being read, and the + * engine class on which it is currently being written (if any). * * The low word (bits 0:15) indicate if the object is being written * to by any engine (there can only be one, as the GEM implicit * synchronisation rules force writes to be serialised). Only the - * engine for the last write is reported. + * engine class (offset by 1, I915_ENGINE_CLASS_RENDER is reported as + * 1 not 0 etc) for the last write is reported. * - * The high word (bits 16:31) are a bitmask of which engines are - * currently reading from the object. Multiple engines may be + * The high word (bits 16:31) are a bitmask of which engines classes + * are currently reading from the object. Multiple engines may be * reading from the object simultaneously. * - * The value of each engine is the same as specified in the - * EXECBUFFER2 ioctl, i.e. I915_EXEC_RENDER, I915_EXEC_BSD etc. - * Note I915_EXEC_DEFAULT is a symbolic value and is mapped to - * the I915_EXEC_RENDER engine for execution, and so it is never + * The value of each engine class is the same as specified in the + * I915_CONTEXT_SET_ENGINES parameter and via perf, i.e. + * I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc. * reported as active itself. Some hardware may have parallel * execution engines, e.g. multiple media engines, which are - * mapped to the same identifier in the EXECBUFFER2 ioctl and - * so are not separately reported for busyness. + * mapped to the same class identifier and so are not separately + * reported for busyness. * * Caveat emptor: * Only the boolean result of this query is reliable; that is whether @@ -1412,16 +1456,158 @@ struct drm_i915_gem_wait { }; struct drm_i915_gem_context_create { - /* output: id of new context*/ - __u32 ctx_id; + __u32 ctx_id; /* output: id of new context*/ __u32 pad; }; +struct drm_i915_gem_context_create_ext { + __u32 ctx_id; /* output: id of new context*/ + __u32 flags; +#define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS (1u << 0) +#define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \ + (-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1)) + __u64 extensions; +}; + +struct drm_i915_gem_context_param { + __u32 ctx_id; + __u32 size; + __u64 param; +#define I915_CONTEXT_PARAM_BAN_PERIOD 0x1 +#define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2 +#define I915_CONTEXT_PARAM_GTT_SIZE 0x3 +#define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4 +#define I915_CONTEXT_PARAM_BANNABLE 0x5 +#define I915_CONTEXT_PARAM_PRIORITY 0x6 +#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */ +#define I915_CONTEXT_DEFAULT_PRIORITY 0 +#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */ + /* + * When using the following param, value should be a pointer to + * drm_i915_gem_context_param_sseu. + */ +#define I915_CONTEXT_PARAM_SSEU 0x7 + +/* + * Not all clients may want to attempt automatic recover of a context after + * a hang (for example, some clients may only submit very small incremental + * batches relying on known logical state of previous batches which will never + * recover correctly and each attempt will hang), and so would prefer that + * the context is forever banned instead. + * + * If set to false (0), after a reset, subsequent (and in flight) rendering + * from this context is discarded, and the client will need to create a new + * context to use instead. + * + * If set to true (1), the kernel will automatically attempt to recover the + * context by skipping the hanging batch and executing the next batch starting + * from the default context state (discarding the incomplete logical context + * state lost due to the reset). + * + * On creation, all new contexts are marked as recoverable. + */ +#define I915_CONTEXT_PARAM_RECOVERABLE 0x8 +/* Must be kept compact -- no holes and well documented */ + + __u64 value; +}; + +/** + * Context SSEU programming + * + * It may be necessary for either functional or performance reason to configure + * a context to run with a reduced number of SSEU (where SSEU stands for Slice/ + * Sub-slice/EU). + * + * This is done by configuring SSEU configuration using the below + * @struct drm_i915_gem_context_param_sseu for every supported engine which + * userspace intends to use. + * + * Not all GPUs or engines support this functionality in which case an error + * code -ENODEV will be returned. + * + * Also, flexibility of possible SSEU configuration permutations varies between + * GPU generations and software imposed limitations. Requesting such a + * combination will return an error code of -EINVAL. + * + * NOTE: When perf/OA is active the context's SSEU configuration is ignored in + * favour of a single global setting. + */ +struct drm_i915_gem_context_param_sseu { + /* + * Engine class & instance to be configured or queried. + */ + struct i915_engine_class_instance engine; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 flags; + + /* + * Mask of slices to enable for the context. Valid values are a subset + * of the bitmask value returned for I915_PARAM_SLICE_MASK. + */ + __u64 slice_mask; + + /* + * Mask of subslices to enable for the context. Valid values are a + * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK. + */ + __u64 subslice_mask; + + /* + * Minimum/Maximum number of EUs to enable per subslice for the + * context. min_eus_per_subslice must be inferior or equal to + * max_eus_per_subslice. + */ + __u16 min_eus_per_subslice; + __u16 max_eus_per_subslice; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 rsvd; +}; + +struct drm_i915_gem_context_create_ext_setparam { +#define I915_CONTEXT_CREATE_EXT_SETPARAM 0 + struct i915_user_extension base; + struct drm_i915_gem_context_param param; +}; + struct drm_i915_gem_context_destroy { __u32 ctx_id; __u32 pad; }; +/* + * DRM_I915_GEM_VM_CREATE - + * + * Create a new virtual memory address space (ppGTT) for use within a context + * on the same file. Extensions can be provided to configure exactly how the + * address space is setup upon creation. + * + * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is + * returned in the outparam @id. + * + * No flags are defined, with all bits reserved and must be zero. + * + * An extension chain maybe provided, starting with @extensions, and terminated + * by the @next_extension being 0. Currently, no extensions are defined. + * + * DRM_I915_GEM_VM_DESTROY - + * + * Destroys a previously created VM id, specified in @id. + * + * No extensions or flags are allowed currently, and so must be zero. + */ +struct drm_i915_gem_vm_control { + __u64 extensions; + __u32 flags; + __u32 vm_id; +}; + struct drm_i915_reg_read { /* * Register offset. @@ -1434,6 +1620,7 @@ struct drm_i915_reg_read { __u64 val; /* Return value */ }; + /* Known registers: * * Render engine timestamp - 0x2358 + 64bit - gen7+ @@ -1473,86 +1660,6 @@ struct drm_i915_gem_userptr { __u32 handle; }; -struct drm_i915_gem_context_param { - __u32 ctx_id; - __u32 size; - __u64 param; -#define I915_CONTEXT_PARAM_BAN_PERIOD 0x1 -#define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2 -#define I915_CONTEXT_PARAM_GTT_SIZE 0x3 -#define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4 -#define I915_CONTEXT_PARAM_BANNABLE 0x5 -#define I915_CONTEXT_PARAM_PRIORITY 0x6 -#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */ -#define I915_CONTEXT_DEFAULT_PRIORITY 0 -#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */ - /* - * When using the following param, value should be a pointer to - * drm_i915_gem_context_param_sseu. - */ -#define I915_CONTEXT_PARAM_SSEU 0x7 - __u64 value; -}; - -/** - * Context SSEU programming - * - * It may be necessary for either functional or performance reason to configure - * a context to run with a reduced number of SSEU (where SSEU stands for Slice/ - * Sub-slice/EU). - * - * This is done by configuring SSEU configuration using the below - * @struct drm_i915_gem_context_param_sseu for every supported engine which - * userspace intends to use. - * - * Not all GPUs or engines support this functionality in which case an error - * code -ENODEV will be returned. - * - * Also, flexibility of possible SSEU configuration permutations varies between - * GPU generations and software imposed limitations. Requesting such a - * combination will return an error code of -EINVAL. - * - * NOTE: When perf/OA is active the context's SSEU configuration is ignored in - * favour of a single global setting. - */ -struct drm_i915_gem_context_param_sseu { - /* - * Engine class & instance to be configured or queried. - */ - __u16 engine_class; - __u16 engine_instance; - - /* - * Unused for now. Must be cleared to zero. - */ - __u32 flags; - - /* - * Mask of slices to enable for the context. Valid values are a subset - * of the bitmask value returned for I915_PARAM_SLICE_MASK. - */ - __u64 slice_mask; - - /* - * Mask of subslices to enable for the context. Valid values are a - * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK. - */ - __u64 subslice_mask; - - /* - * Minimum/Maximum number of EUs to enable per subslice for the - * context. min_eus_per_subslice must be inferior or equal to - * max_eus_per_subslice. - */ - __u16 min_eus_per_subslice; - __u16 max_eus_per_subslice; - - /* - * Unused for now. Must be cleared to zero. - */ - __u32 rsvd; -}; - enum drm_i915_oa_format { I915_OA_FORMAT_A13 = 1, /* HSW only */ I915_OA_FORMAT_A29, /* HSW only */ @@ -1714,6 +1821,7 @@ struct drm_i915_perf_oa_config { struct drm_i915_query_item { __u64 query_id; #define DRM_I915_QUERY_TOPOLOGY_INFO 1 +/* Must be kept compact -- no holes and well documented */ /* * When set to zero by userspace, this is filled with the size of the diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index 9310652ca4f9..63ae4a39e58b 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -83,7 +83,7 @@ struct btf_type { * is the 32 bits arrangement: */ #define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24) -#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16) +#define BTF_INT_OFFSET(VAL) (((VAL) & 0x00ff0000) >> 16) #define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff) /* Attributes stored in the BTF_INT_ENCODING */ diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h index a2f8658f1c55..1d338357df8a 100644 --- a/tools/include/uapi/linux/fcntl.h +++ b/tools/include/uapi/linux/fcntl.h @@ -91,5 +91,7 @@ #define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ #define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ +#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ + #endif /* _UAPI_LINUX_FCNTL_H */ diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h index 121e82ce296b..59c71fa8c553 100644 --- a/tools/include/uapi/linux/fs.h +++ b/tools/include/uapi/linux/fs.h @@ -320,6 +320,9 @@ struct fscrypt_key { #define SYNC_FILE_RANGE_WAIT_BEFORE 1 #define SYNC_FILE_RANGE_WRITE 2 #define SYNC_FILE_RANGE_WAIT_AFTER 4 +#define SYNC_FILE_RANGE_WRITE_AND_WAIT (SYNC_FILE_RANGE_WRITE | \ + SYNC_FILE_RANGE_WAIT_BEFORE | \ + SYNC_FILE_RANGE_WAIT_AFTER) /* * Flags for preadv2/pwritev2: diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 6d4ea4b6c922..2fe12b40d503 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -986,8 +986,13 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 #define KVM_CAP_EXCEPTION_PAYLOAD 164 #define KVM_CAP_ARM_VM_IPA_SIZE 165 -#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 +#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 /* Obsolete */ #define KVM_CAP_HYPERV_CPUID 167 +#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 168 +#define KVM_CAP_PPC_IRQ_XIVE 169 +#define KVM_CAP_ARM_SVE 170 +#define KVM_CAP_ARM_PTRAUTH_ADDRESS 171 +#define KVM_CAP_ARM_PTRAUTH_GENERIC 172 #ifdef KVM_CAP_IRQ_ROUTING @@ -1145,6 +1150,7 @@ struct kvm_dirty_tlb { #define KVM_REG_SIZE_U256 0x0050000000000000ULL #define KVM_REG_SIZE_U512 0x0060000000000000ULL #define KVM_REG_SIZE_U1024 0x0070000000000000ULL +#define KVM_REG_SIZE_U2048 0x0080000000000000ULL struct kvm_reg_list { __u64 n; /* number of regs */ @@ -1211,6 +1217,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_ARM_VGIC_ITS, #define KVM_DEV_TYPE_ARM_VGIC_ITS KVM_DEV_TYPE_ARM_VGIC_ITS + KVM_DEV_TYPE_XIVE, +#define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_MAX, }; @@ -1434,12 +1442,15 @@ struct kvm_enc_region { #define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state) #define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state) -/* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT */ +/* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT_2 */ #define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log) /* Available with KVM_CAP_HYPERV_CPUID */ #define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2) +/* Available with KVM_CAP_ARM_SVE */ +#define KVM_ARM_VCPU_FINALIZE _IOW(KVMIO, 0xc2, int) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h index 3f9ec42510b0..96a0240f23fe 100644 --- a/tools/include/uapi/linux/mount.h +++ b/tools/include/uapi/linux/mount.h @@ -55,4 +55,66 @@ #define MS_MGC_VAL 0xC0ED0000 #define MS_MGC_MSK 0xffff0000 +/* + * open_tree() flags. + */ +#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */ +#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */ + +/* + * move_mount() flags. + */ +#define MOVE_MOUNT_F_SYMLINKS 0x00000001 /* Follow symlinks on from path */ +#define MOVE_MOUNT_F_AUTOMOUNTS 0x00000002 /* Follow automounts on from path */ +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ +#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */ +#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */ +#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ +#define MOVE_MOUNT__MASK 0x00000077 + +/* + * fsopen() flags. + */ +#define FSOPEN_CLOEXEC 0x00000001 + +/* + * fspick() flags. + */ +#define FSPICK_CLOEXEC 0x00000001 +#define FSPICK_SYMLINK_NOFOLLOW 0x00000002 +#define FSPICK_NO_AUTOMOUNT 0x00000004 +#define FSPICK_EMPTY_PATH 0x00000008 + +/* + * The type of fsconfig() call made. + */ +enum fsconfig_command { + FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ + FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ + FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ + FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ + FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ + FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ + FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */ + FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ +}; + +/* + * fsmount() flags. + */ +#define FSMOUNT_CLOEXEC 0x00000001 + +/* + * Mount attributes. + */ +#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */ +#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */ +#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */ +#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */ +#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */ +#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */ +#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */ +#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ +#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ + #endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h index 22627f80063e..ed4ee170bee2 100644 --- a/tools/include/uapi/linux/sched.h +++ b/tools/include/uapi/linux/sched.h @@ -10,6 +10,7 @@ #define CLONE_FS 0x00000200 /* set if fs info shared between processes */ #define CLONE_FILES 0x00000400 /* set if open files shared between processes */ #define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */ +#define CLONE_PIDFD 0x00001000 /* set if a pidfd should be placed in parent */ #define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */ #define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */ #define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */ diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 75eaf10b9e1a..03348c4d6bd4 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -11,7 +11,7 @@ #include "btf.h" #include "bpf.h" #include "libbpf.h" -#include "libbpf_util.h" +#include "libbpf_internal.h" #define max(a, b) ((a) > (b) ? (a) : (b)) #define min(a, b) ((a) < (b) ? (a) : (b)) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7e3b79d7c25f..197b574406b3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -43,7 +43,6 @@ #include "bpf.h" #include "btf.h" #include "str_error.h" -#include "libbpf_util.h" #include "libbpf_internal.h" #ifndef EM_BPF @@ -1696,7 +1695,7 @@ bpf_object__probe_caps(struct bpf_object *obj) for (i = 0; i < ARRAY_SIZE(probe_fn); i++) { ret = probe_fn[i](obj); if (ret < 0) - return ret; + pr_debug("Probe #%d failed with %d.\n", i, ret); } return 0; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 789e435b5900..f3025b4d90e1 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -21,6 +21,19 @@ #define BTF_PARAM_ENC(name, type) (name), (type) #define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size) +extern void libbpf_print(enum libbpf_print_level level, + const char *format, ...) + __attribute__((format(printf, 2, 3))); + +#define __pr(level, fmt, ...) \ +do { \ + libbpf_print(level, "libbpf: " fmt, ##__VA_ARGS__); \ +} while (0) + +#define pr_warning(fmt, ...) __pr(LIBBPF_WARN, fmt, ##__VA_ARGS__) +#define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__) +#define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) + int libbpf__probe_raw_btf(const char *raw_types, size_t types_len, const char *str_sec, size_t str_len); diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h index da94c4cb2e4d..59c779c5790c 100644 --- a/tools/lib/bpf/libbpf_util.h +++ b/tools/lib/bpf/libbpf_util.h @@ -10,19 +10,6 @@ extern "C" { #endif -extern void libbpf_print(enum libbpf_print_level level, - const char *format, ...) - __attribute__((format(printf, 2, 3))); - -#define __pr(level, fmt, ...) \ -do { \ - libbpf_print(level, "libbpf: " fmt, ##__VA_ARGS__); \ -} while (0) - -#define pr_warning(fmt, ...) __pr(LIBBPF_WARN, fmt, ##__VA_ARGS__) -#define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__) -#define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) - /* Use these barrier functions instead of smp_[rw]mb() when they are * used in a libbpf header file. That way they can be built into the * application that uses libbpf. diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index a3d1a302bc9c..38667b62f1fe 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -29,7 +29,7 @@ #include "bpf.h" #include "libbpf.h" -#include "libbpf_util.h" +#include "libbpf_internal.h" #include "xsk.h" #ifndef SOL_XDP diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl index c88fd32563eb..459469b7222c 100755 --- a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl +++ b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl @@ -56,7 +56,7 @@ create_table() echo "};" } -$gcc -E -dM -x c $input \ +$gcc -E -dM -x c -I $incpath/include/uapi $input \ |sed -ne 's/^#define __NR_//p' \ |sort -t' ' -k2 -nu \ |create_table diff --git a/tools/perf/arch/s390/util/machine.c b/tools/perf/arch/s390/util/machine.c index 0b2054007314..a19690a17291 100644 --- a/tools/perf/arch/s390/util/machine.c +++ b/tools/perf/arch/s390/util/machine.c @@ -5,16 +5,19 @@ #include "util.h" #include "machine.h" #include "api/fs/fs.h" +#include "debug.h" int arch__fix_module_text_start(u64 *start, const char *name) { + u64 m_start = *start; char path[PATH_MAX]; snprintf(path, PATH_MAX, "module/%.*s/sections/.text", (int)strlen(name) - 2, name + 1); - - if (sysfs__read_ull(path, (unsigned long long *)start) < 0) - return -1; + if (sysfs__read_ull(path, (unsigned long long *)start) < 0) { + pr_debug2("Using module %s start:%#lx\n", path, m_start); + *start = m_start; + } return 0; } diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 92ee0b4378d4..b4e6f9e6204a 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -349,6 +349,12 @@ 425 common io_uring_setup __x64_sys_io_uring_setup 426 common io_uring_enter __x64_sys_io_uring_enter 427 common io_uring_register __x64_sys_io_uring_register +428 common open_tree __x64_sys_open_tree +429 common move_mount __x64_sys_move_mount +430 common fsopen __x64_sys_fsopen +431 common fsconfig __x64_sys_fsconfig +432 common fsmount __x64_sys_fsmount +433 common fspick __x64_sys_fspick # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 7691980b7df1..f101576d1c72 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -161,9 +161,16 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest continue; } - } else + } else if (mem_start == kallsyms.vmlinux_map->end) { + /* + * Ignore aliases to _etext, i.e. to the end of the kernel text area, + * such as __indirect_thunk_end. + */ + continue; + } else { pr_debug("ERR : %#" PRIx64 ": %s not on kallsyms\n", mem_start, sym->name); + } err = -1; } diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index e0311c9750ad..9097543a818b 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -271,7 +271,7 @@ static int string_set_value(struct bt_ctf_field *field, const char *string) if (i > 0) strncpy(buffer, string, i); } - strncat(buffer + p, numstr, 4); + memcpy(buffer + p, numstr, 4); p += 3; } } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 28a9541c4835..dc7aafe45a2b 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -924,7 +924,8 @@ const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL}; * symbol_name if it's not that important. */ static int machine__get_running_kernel_start(struct machine *machine, - const char **symbol_name, u64 *start) + const char **symbol_name, + u64 *start, u64 *end) { char filename[PATH_MAX]; int i, err = -1; @@ -949,6 +950,11 @@ static int machine__get_running_kernel_start(struct machine *machine, *symbol_name = name; *start = addr; + + err = kallsyms__get_function_start(filename, "_etext", &addr); + if (!err) + *end = addr; + return 0; } @@ -1441,7 +1447,7 @@ int machine__create_kernel_maps(struct machine *machine) struct dso *kernel = machine__get_kernel(machine); const char *name = NULL; struct map *map; - u64 addr = 0; + u64 start = 0, end = ~0ULL; int ret; if (kernel == NULL) @@ -1460,9 +1466,9 @@ int machine__create_kernel_maps(struct machine *machine) "continuing anyway...\n", machine->pid); } - if (!machine__get_running_kernel_start(machine, &name, &addr)) { + if (!machine__get_running_kernel_start(machine, &name, &start, &end)) { if (name && - map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, name, addr)) { + map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, name, start)) { machine__destroy_kernel_maps(machine); ret = -1; goto out_put; @@ -1472,16 +1478,19 @@ int machine__create_kernel_maps(struct machine *machine) * we have a real start address now, so re-order the kmaps * assume it's the last in the kmaps */ - machine__update_kernel_mmap(machine, addr, ~0ULL); + machine__update_kernel_mmap(machine, start, end); } if (machine__create_extra_kernel_maps(machine, kernel)) pr_debug("Problems creating extra kernel maps, continuing anyway...\n"); - /* update end address of the kernel map using adjacent module address */ - map = map__next(machine__kernel_map(machine)); - if (map) - machine__set_kernel_mmap(machine, addr, map->start); + if (end == ~0ULL) { + /* update end address of the kernel map using adjacent module address */ + map = map__next(machine__kernel_map(machine)); + if (map) + machine__set_kernel_mmap(machine, start, map->start); + } + out_put: dso__put(kernel); return ret; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 2310a1752983..54cf163347f7 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -647,6 +647,26 @@ static void perf_event__throttle_swap(union perf_event *event, swap_sample_id_all(event, &event->throttle + 1); } +static void perf_event__namespaces_swap(union perf_event *event, + bool sample_id_all) +{ + u64 i; + + event->namespaces.pid = bswap_32(event->namespaces.pid); + event->namespaces.tid = bswap_32(event->namespaces.tid); + event->namespaces.nr_namespaces = bswap_64(event->namespaces.nr_namespaces); + + for (i = 0; i < event->namespaces.nr_namespaces; i++) { + struct perf_ns_link_info *ns = &event->namespaces.link_info[i]; + + ns->dev = bswap_64(ns->dev); + ns->ino = bswap_64(ns->ino); + } + + if (sample_id_all) + swap_sample_id_all(event, &event->namespaces.link_info[i]); +} + static u8 revbyte(u8 b) { int rev = (b >> 4) | ((b & 0xf) << 4); @@ -887,6 +907,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_LOST_SAMPLES] = perf_event__all64_swap, [PERF_RECORD_SWITCH] = perf_event__switch_swap, [PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap, + [PERF_RECORD_NAMESPACES] = perf_event__namespaces_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 403045a2bbea..b413ba5b9835 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -133,7 +133,7 @@ void thread__put(struct thread *thread) } } -struct namespaces *thread__namespaces(const struct thread *thread) +static struct namespaces *__thread__namespaces(const struct thread *thread) { if (list_empty(&thread->namespaces_list)) return NULL; @@ -141,10 +141,21 @@ struct namespaces *thread__namespaces(const struct thread *thread) return list_first_entry(&thread->namespaces_list, struct namespaces, list); } +struct namespaces *thread__namespaces(const struct thread *thread) +{ + struct namespaces *ns; + + down_read((struct rw_semaphore *)&thread->namespaces_lock); + ns = __thread__namespaces(thread); + up_read((struct rw_semaphore *)&thread->namespaces_lock); + + return ns; +} + static int __thread__set_namespaces(struct thread *thread, u64 timestamp, struct namespaces_event *event) { - struct namespaces *new, *curr = thread__namespaces(thread); + struct namespaces *new, *curr = __thread__namespaces(thread); new = namespaces__new(event); if (!new) diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index a877803e4ba8..dd5d69529382 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -31,6 +31,7 @@ test_section_names test_tcpnotify_user test_libbpf test_tcp_check_syncookie_user +test_sysctl alu32 libbpf.pc libbpf.so.* diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 6e80b66d7fb1..5f6f9e7aba2a 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -278,7 +278,7 @@ static int (*bpf_skb_change_type)(void *ctx, __u32 type) = (void *) BPF_FUNC_skb_change_type; static unsigned int (*bpf_get_hash_recalc)(void *ctx) = (void *) BPF_FUNC_get_hash_recalc; -static unsigned long long (*bpf_get_current_task)(void *ctx) = +static unsigned long long (*bpf_get_current_task)(void) = (void *) BPF_FUNC_get_current_task; static int (*bpf_skb_change_tail)(void *ctx, __u32 len, __u64 flags) = (void *) BPF_FUNC_skb_change_tail; diff --git a/tools/testing/selftests/bpf/map_tests/.gitignore b/tools/testing/selftests/bpf/map_tests/.gitignore new file mode 100644 index 000000000000..45984a364647 --- /dev/null +++ b/tools/testing/selftests/bpf/map_tests/.gitignore @@ -0,0 +1 @@ +tests.h diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 8b54adfd6264..fbd1d88a6095 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -242,12 +242,12 @@ void test_flow_dissector(void) */ err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); - CHECK(err, "bpf_prog_attach", "err %d errno %d", err, errno); + CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno); tap_fd = create_tap("tap0"); - CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d", tap_fd, errno); + CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d\n", tap_fd, errno); err = ifup("tap0"); - CHECK(err, "ifup", "err %d errno %d", err, errno); + CHECK(err, "ifup", "err %d errno %d\n", err, errno); for (i = 0; i < ARRAY_SIZE(tests); i++) { struct bpf_flow_keys flow_keys = {}; @@ -255,7 +255,7 @@ void test_flow_dissector(void) __u32 key = 0; err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt)); - CHECK(err < 0, "tx_tap", "err %d errno %d", err, errno); + CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno); err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys); CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err); @@ -264,5 +264,6 @@ void test_flow_dissector(void) CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); } + bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index 781c7de343be..1b25a7e348dc 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -18,9 +18,11 @@ #include #include +#include #include "bpf_util.h" #include "bpf_rlimit.h" +#include "../../../include/linux/filter.h" #define LOCAL_FREE_TARGET (128) #define PERCPU_FREE_TARGET (4) @@ -40,6 +42,68 @@ static int create_map(int map_type, int map_flags, unsigned int size) return map_fd; } +static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key, + void *value) +{ + struct bpf_load_program_attr prog; + struct bpf_create_map_attr map; + struct bpf_insn insns[] = { + BPF_LD_MAP_VALUE(BPF_REG_9, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, fd), + BPF_LD_IMM64(BPF_REG_3, key), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_9, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }; + __u8 data[64] = {}; + int mfd, pfd, ret, zero = 0; + __u32 retval = 0; + + memset(&map, 0, sizeof(map)); + map.map_type = BPF_MAP_TYPE_ARRAY; + map.key_size = sizeof(int); + map.value_size = sizeof(unsigned long long); + map.max_entries = 1; + + mfd = bpf_create_map_xattr(&map); + if (mfd < 0) + return -1; + + insns[0].imm = mfd; + + memset(&prog, 0, sizeof(prog)); + prog.prog_type = BPF_PROG_TYPE_SCHED_CLS; + prog.insns = insns; + prog.insns_cnt = ARRAY_SIZE(insns); + prog.license = "GPL"; + + pfd = bpf_load_program_xattr(&prog, NULL, 0); + if (pfd < 0) { + close(mfd); + return -1; + } + + ret = bpf_prog_test_run(pfd, 1, data, sizeof(data), + NULL, NULL, &retval, NULL); + if (ret < 0 || retval != 42) { + ret = -1; + } else { + assert(!bpf_map_lookup_elem(mfd, &zero, value)); + ret = 0; + } + close(pfd); + close(mfd); + return ret; +} + static int map_subset(int map0, int map1) { unsigned long long next_key = 0; @@ -87,7 +151,7 @@ static int sched_next_online(int pid, int *next_to_try) return ret; } -/* Size of the LRU amp is 2 +/* Size of the LRU map is 2 * Add key=1 (+1 key) * Add key=2 (+1 key) * Lookup Key=1 @@ -157,7 +221,7 @@ static void test_lru_sanity0(int map_type, int map_flags) * stop LRU from removing key=1 */ key = 1; - assert(!bpf_map_lookup_elem(lru_map_fd, &key, value)); + assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); assert(value[0] == 1234); key = 3; @@ -167,7 +231,8 @@ static void test_lru_sanity0(int map_type, int map_flags) /* key=2 has been removed from the LRU */ key = 2; - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && + errno == ENOENT); assert(map_equal(lru_map_fd, expected_map_fd)); @@ -221,7 +286,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) /* Lookup 1 to tgt_free/2 */ end_key = 1 + batch_size; for (key = 1; key < end_key; key++) { - assert(!bpf_map_lookup_elem(lru_map_fd, &key, value)); + assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, BPF_NOEXIST)); } @@ -322,10 +387,11 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) end_key = 1 + batch_size; value[0] = 4321; for (key = 1; key < end_key; key++) { - assert(bpf_map_lookup_elem(lru_map_fd, &key, value)); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && + errno == ENOENT); assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); - assert(!bpf_map_lookup_elem(lru_map_fd, &key, value)); + assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); assert(value[0] == 4321); assert(!bpf_map_update_elem(expected_map_fd, &key, value, BPF_NOEXIST)); @@ -404,7 +470,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) /* Lookup key 1 to tgt_free*3/2 */ end_key = tgt_free + batch_size; for (key = 1; key < end_key; key++) { - assert(!bpf_map_lookup_elem(lru_map_fd, &key, value)); + assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, BPF_NOEXIST)); } @@ -463,7 +529,7 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free) assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); for (key = 1; key <= tgt_free; key++) { - assert(!bpf_map_lookup_elem(lru_map_fd, &key, value)); + assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, BPF_NOEXIST)); } @@ -494,16 +560,16 @@ static void do_test_lru_sanity5(unsigned long long last_key, int map_fd) unsigned long long key, value[nr_cpus]; /* Ensure the last key inserted by previous CPU can be found */ - assert(!bpf_map_lookup_elem(map_fd, &last_key, value)); - + assert(!bpf_map_lookup_elem_with_ref_bit(map_fd, last_key, value)); value[0] = 1234; key = last_key + 1; assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST)); - assert(!bpf_map_lookup_elem(map_fd, &key, value)); + assert(!bpf_map_lookup_elem_with_ref_bit(map_fd, key, value)); /* Cannot find the last key because it was removed by LRU */ - assert(bpf_map_lookup_elem(map_fd, &last_key, value)); + assert(bpf_map_lookup_elem(map_fd, &last_key, value) == -1 && + errno == ENOENT); } /* Test map with only one element */ @@ -590,8 +656,8 @@ static void test_lru_sanity6(int map_type, int map_flags, int tgt_free) /* Make ref bit sticky for key: [1, tgt_free] */ for (stable_key = 1; stable_key <= tgt_free; stable_key++) { /* Mark the ref bit */ - assert(!bpf_map_lookup_elem(lru_map_fd, &stable_key, - value)); + assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, + stable_key, value)); } assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -612,6 +678,198 @@ static void test_lru_sanity6(int map_type, int map_flags, int tgt_free) printf("Pass\n"); } +/* Size of the LRU map is 2 + * Add key=1 (+1 key) + * Add key=2 (+1 key) + * Lookup Key=1 (datapath) + * Lookup Key=2 (syscall) + * Add Key=3 + * => Key=2 will be removed by LRU + * Iterate map. Only found key=1 and key=3 + */ +static void test_lru_sanity7(int map_type, int map_flags) +{ + unsigned long long key, value[nr_cpus]; + int lru_map_fd, expected_map_fd; + int next_cpu = 0; + + printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, + map_flags); + + assert(sched_next_online(0, &next_cpu) != -1); + + if (map_flags & BPF_F_NO_COMMON_LRU) + lru_map_fd = create_map(map_type, map_flags, 2 * nr_cpus); + else + lru_map_fd = create_map(map_type, map_flags, 2); + assert(lru_map_fd != -1); + + expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, 2); + assert(expected_map_fd != -1); + + value[0] = 1234; + + /* insert key=1 element */ + + key = 1; + assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); + assert(!bpf_map_update_elem(expected_map_fd, &key, value, + BPF_NOEXIST)); + + /* BPF_NOEXIST means: add new element if it doesn't exist */ + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1 + /* key=1 already exists */ + && errno == EEXIST); + + /* insert key=2 element */ + + /* check that key=2 is not found */ + key = 2; + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && + errno == ENOENT); + + /* BPF_EXIST means: update existing element */ + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 && + /* key=2 is not there */ + errno == ENOENT); + + assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); + + /* insert key=3 element */ + + /* check that key=3 is not found */ + key = 3; + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && + errno == ENOENT); + + /* check that key=1 can be found and mark the ref bit to + * stop LRU from removing key=1 + */ + key = 1; + assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); + assert(value[0] == 1234); + + /* check that key=2 can be found and do _not_ mark ref bit. + * this will be evicted on next update. + */ + key = 2; + assert(!bpf_map_lookup_elem(lru_map_fd, &key, value)); + assert(value[0] == 1234); + + key = 3; + assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); + assert(!bpf_map_update_elem(expected_map_fd, &key, value, + BPF_NOEXIST)); + + /* key=2 has been removed from the LRU */ + key = 2; + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && + errno == ENOENT); + + assert(map_equal(lru_map_fd, expected_map_fd)); + + close(expected_map_fd); + close(lru_map_fd); + + printf("Pass\n"); +} + +/* Size of the LRU map is 2 + * Add key=1 (+1 key) + * Add key=2 (+1 key) + * Lookup Key=1 (syscall) + * Lookup Key=2 (datapath) + * Add Key=3 + * => Key=1 will be removed by LRU + * Iterate map. Only found key=2 and key=3 + */ +static void test_lru_sanity8(int map_type, int map_flags) +{ + unsigned long long key, value[nr_cpus]; + int lru_map_fd, expected_map_fd; + int next_cpu = 0; + + printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, + map_flags); + + assert(sched_next_online(0, &next_cpu) != -1); + + if (map_flags & BPF_F_NO_COMMON_LRU) + lru_map_fd = create_map(map_type, map_flags, 2 * nr_cpus); + else + lru_map_fd = create_map(map_type, map_flags, 2); + assert(lru_map_fd != -1); + + expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, 2); + assert(expected_map_fd != -1); + + value[0] = 1234; + + /* insert key=1 element */ + + key = 1; + assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); + + /* BPF_NOEXIST means: add new element if it doesn't exist */ + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1 + /* key=1 already exists */ + && errno == EEXIST); + + /* insert key=2 element */ + + /* check that key=2 is not found */ + key = 2; + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && + errno == ENOENT); + + /* BPF_EXIST means: update existing element */ + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 && + /* key=2 is not there */ + errno == ENOENT); + + assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); + assert(!bpf_map_update_elem(expected_map_fd, &key, value, + BPF_NOEXIST)); + + /* insert key=3 element */ + + /* check that key=3 is not found */ + key = 3; + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && + errno == ENOENT); + + /* check that key=1 can be found and do _not_ mark ref bit. + * this will be evicted on next update. + */ + key = 1; + assert(!bpf_map_lookup_elem(lru_map_fd, &key, value)); + assert(value[0] == 1234); + + /* check that key=2 can be found and mark the ref bit to + * stop LRU from removing key=2 + */ + key = 2; + assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); + assert(value[0] == 1234); + + key = 3; + assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); + assert(!bpf_map_update_elem(expected_map_fd, &key, value, + BPF_NOEXIST)); + + /* key=1 has been removed from the LRU */ + key = 1; + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && + errno == ENOENT); + + assert(map_equal(lru_map_fd, expected_map_fd)); + + close(expected_map_fd); + close(lru_map_fd); + + printf("Pass\n"); +} + int main(int argc, char **argv) { int map_types[] = {BPF_MAP_TYPE_LRU_HASH, @@ -637,6 +895,8 @@ int main(int argc, char **argv) test_lru_sanity4(map_types[t], map_flags[f], tgt_free); test_lru_sanity5(map_types[t], map_flags[f]); test_lru_sanity6(map_types[t], map_flags[f], tgt_free); + test_lru_sanity7(map_types[t], map_flags[f]); + test_lru_sanity8(map_types[t], map_flags[f]); printf("\n"); } diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 524b15dabb3c..b9171a7b3aaa 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -430,15 +430,15 @@ setup_xfrm() { veth_a_addr="${2}" veth_b_addr="${3}" - run_cmd "${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel" || return 1 - run_cmd "${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel" - run_cmd "${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel" - run_cmd "${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel" + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1 + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel + run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel - run_cmd "${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel" - run_cmd "${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel" - run_cmd "${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel" - run_cmd "${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel" + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel + run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel } setup_xfrm4() { diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index 21159f5f3362..14fcf3104c77 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -8,6 +8,11 @@ ksft_skip=4 ret=0 test_inet_nat=true +cleanup() +{ + for i in 0 1 2; do ip netns del ns$i;done +} + nft --version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Could not run test without nft tool" @@ -21,6 +26,13 @@ if [ $? -ne 0 ];then fi ip netns add ns0 +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace" + exit $ksft_skip +fi + +trap cleanup EXIT + ip netns add ns1 ip netns add ns2 @@ -347,7 +359,7 @@ EOF test_masquerade6() { local family=$1 - local natflags=$1 + local natflags=$2 local lret=0 ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null @@ -392,18 +404,13 @@ EOF ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then -<<<<<<< HEAD - echo "ERROR: cannot ping ns1 from ns2 with active $family masquerading" -======= - echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags" ->>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 + echo "ERROR: cannot ping ns1 from ns2 with active $family masquerade $natflags" lret=1 fi # ns1 should have seen packets from ns0, due to masquerade expect="packets 1 bytes 104" for dir in "in6" "out6" ; do - cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") if [ $? -ne 0 ]; then bad_counter ns1 ns0$dir "$expect" @@ -433,38 +440,27 @@ EOF fi done -<<<<<<< HEAD - ip netns exec ns0 nft flush chain $family nat postrouting -======= ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags (attempt 2)" lret=1 fi - ip netns exec ns0 nft flush chain ip6 nat postrouting ->>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 + ip netns exec ns0 nft flush chain $family nat postrouting if [ $? -ne 0 ]; then echo "ERROR: Could not flush $family nat postrouting" 1>&2 lret=1 fi -<<<<<<< HEAD - test $lret -eq 0 && echo "PASS: $family IPv6 masquerade for ns2" -======= - test $lret -eq 0 && echo "PASS: IPv6 masquerade $natflags for ns2" ->>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 + test $lret -eq 0 && echo "PASS: $family IPv6 masquerade $natflags for ns2" return $lret } test_masquerade() { -<<<<<<< HEAD local family=$1 -======= - local natflags=$1 ->>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 + local natflags=$2 local lret=0 ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null @@ -509,11 +505,7 @@ EOF ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then -<<<<<<< HEAD - echo "ERROR: cannot ping ns1 from ns2 with active $family masquerading" -======= - echo "ERROR: cannot ping ns1 from ns2 with active ip masquere $natflags" ->>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 + echo "ERROR: cannot ping ns1 from ns2 with active $family masquerade $natflags" lret=1 fi @@ -549,27 +541,19 @@ EOF fi done -<<<<<<< HEAD - ip netns exec ns0 nft flush chain $family nat postrouting -======= ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then echo "ERROR: cannot ping ns1 from ns2 with active ip masquerade $natflags (attempt 2)" lret=1 fi - ip netns exec ns0 nft flush chain ip nat postrouting ->>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 + ip netns exec ns0 nft flush chain $family nat postrouting if [ $? -ne 0 ]; then echo "ERROR: Could not flush $family nat postrouting" 1>&2 lret=1 fi -<<<<<<< HEAD - test $lret -eq 0 && echo "PASS: $family IP masquerade for ns2" -======= - test $lret -eq 0 && echo "PASS: IP masquerade $natflags for ns2" ->>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 + test $lret -eq 0 && echo "PASS: $family IP masquerade $natflags for ns2" return $lret } @@ -842,21 +826,14 @@ reset_counters $test_inet_nat && test_local_dnat inet $test_inet_nat && test_local_dnat6 inet +for flags in "" "fully-random"; do reset_counters -<<<<<<< HEAD -test_masquerade ip -test_masquerade6 ip6 +test_masquerade ip $flags +test_masquerade6 ip6 $flags reset_counters -$test_inet_nat && test_masquerade inet -$test_inet_nat && test_masquerade6 inet -======= -test_masquerade "" -test_masquerade6 "" - -reset_counters -test_masquerade "fully-random" -test_masquerade6 "fully-random" ->>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 +$test_inet_nat && test_masquerade inet $flags +$test_inet_nat && test_masquerade6 inet $flags +done reset_counters test_redirect ip @@ -865,6 +842,4 @@ reset_counters $test_inet_nat && test_redirect inet $test_inet_nat && test_redirect6 inet -for i in 0 1 2; do ip netns del ns$i;done - exit $ret