Networking fixes for 6.3-rc8, including fixes from netfilter and bpf

Current release - regressions:
 
   - sched: clear actions pointer in miss cookie init fail
 
   - mptcp: fix accept vs worker race
 
   - bpf: fix bpf_arch_text_poke() with new_addr == NULL on s390
 
   - eth: bnxt_en: fix a possible NULL pointer dereference in unload path
 
   - eth: veth: take into account peer device for NETDEV_XDP_ACT_NDO_XMIT xdp_features flag
 
 Current release - new code bugs:
 
   - eth: revert "net/mlx5: Enable management PF initialization"
 
 Previous releases - regressions:
 
   - netfilter: fix recent physdev match breakage
 
   - bpf: fix incorrect verifier pruning due to missing register precision taints
 
   - eth: virtio_net: fix overflow inside xdp_linearize_page()
 
   - eth: cxgb4: fix use after free bugs caused by circular dependency problem
 
   - eth: mlxsw: pci: fix possible crash during initialization
 
 Previous releases - always broken:
 
   - sched: sch_qfq: prevent slab-out-of-bounds in qfq_activate_agg
 
   - netfilter: validate catch-all set elements
 
   - bridge: don't notify FDB entries with "master dynamic"
 
   - eth: bonding: fix memory leak when changing bond type to ethernet
 
   - eth: i40e: fix accessing vsi->active_filters without holding lock
 
 Misc:
 
   - Mat is back as MPTCP co-maintainer
 
 Signed-off-by: Paolo Abeni <pabeni@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQJGBAABCAAwFiEEg1AjqC77wbdLX2LbKSR5jcyPE6QFAmRBF5ISHHBhYmVuaUBy
 ZWRoYXQuY29tAAoJECkkeY3MjxOkj5sP/itK7DeAzufFIe1SUY+WYdbhAj7XTJso
 q5bpF09wmLW9RLPxZ/hLMnCUniCSBBoJ/3oeBD8SgRBQJKSLjh1WTLYgFxfEZEeY
 DvydMxiurH13pxgMBpCUSTlqDbiLkZ51Sy2sSGJcoJK8XRfA265/D7ZEBFJRIJS9
 wr2prLspZmlN/5dnt8WIXubf83o5mkJ7DneSMBGuJXE2akJ7VBROz10pK1HVMALq
 c6p/Kt92iffEiZZYCnqogrQOu3hLcSCLRTM7Wb3giIX9jaE84Hr9fV+zfG/JDeCJ
 kgjEiKOExnusd8Nq91cClDt92ceRWU5s1M1UxJ5r4Mxjnq0Ug+I3ayItS9bXcEqH
 0PmDql4bKFUue7QiJZkCsusKjlf5R1XxE0Zt+lANn+FWr8THKxvnrbpCjT0ZUvQv
 7kI+Q4g7AFSNoWgM9SwtiTMQmxI8BUo7kgaBLz2IvFDzau4T+yDLKZ+3gyewwp0e
 RN4pac8YyChuuMBmVrZGxVHPA3fKu7C7jCc/xGaMHcQSgFCsQtPpKZVa1SxLR/ZZ
 efMB/J2+GIGv2i5YecH4DItNUd0QhZnXgBjLEaDmEGk4rHIlc9JDy3frD5Qrs4pW
 Dq2zvveRVT30b52sOjkYzEvTU5R/s1nio3RGklUE4hDCV1DkehThAFaX68cIcgeR
 63uRXDpogRs+
 =xUNa
 -----END PGP SIGNATURE-----

Merge tag 'net-6.3-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

Pull networking fixes from Paolo Abeni:
 "Including fixes from netfilter and bpf.

  There are a few fixes for new code bugs, including the Mellanox one
  noted in the last networking pull. No known regressions outstanding.

  Current release - regressions:

   - sched: clear actions pointer in miss cookie init fail

   - mptcp: fix accept vs worker race

   - bpf: fix bpf_arch_text_poke() with new_addr == NULL on s390

   - eth: bnxt_en: fix a possible NULL pointer dereference in unload
     path

   - eth: veth: take into account peer device for
     NETDEV_XDP_ACT_NDO_XMIT xdp_features flag

  Current release - new code bugs:

   - eth: revert "net/mlx5: Enable management PF initialization"

  Previous releases - regressions:

   - netfilter: fix recent physdev match breakage

   - bpf: fix incorrect verifier pruning due to missing register
     precision taints

   - eth: virtio_net: fix overflow inside xdp_linearize_page()

   - eth: cxgb4: fix use after free bugs caused by circular dependency
     problem

   - eth: mlxsw: pci: fix possible crash during initialization

  Previous releases - always broken:

   - sched: sch_qfq: prevent slab-out-of-bounds in qfq_activate_agg

   - netfilter: validate catch-all set elements

   - bridge: don't notify FDB entries with "master dynamic"

   - eth: bonding: fix memory leak when changing bond type to ethernet

   - eth: i40e: fix accessing vsi->active_filters without holding lock

  Misc:

   - Mat is back as MPTCP co-maintainer"

* tag 'net-6.3-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (33 commits)
  net: bridge: switchdev: don't notify FDB entries with "master dynamic"
  Revert "net/mlx5: Enable management PF initialization"
  MAINTAINERS: Resume MPTCP co-maintainer role
  mailmap: add entries for Mat Martineau
  e1000e: Disable TSO on i219-LM card to increase speed
  bnxt_en: fix free-runnig PHC mode
  net: dsa: microchip: ksz8795: Correctly handle huge frame configuration
  bpf: Fix incorrect verifier pruning due to missing register precision taints
  hamradio: drop ISA_DMA_API dependency
  mlxsw: pci: Fix possible crash during initialization
  mptcp: fix accept vs worker race
  mptcp: stops worker on unaccepted sockets at listener close
  net: rpl: fix rpl header size calculation
  net: vmxnet3: Fix NULL pointer dereference in vmxnet3_rq_rx_complete()
  bonding: Fix memory leak when changing bond type to Ethernet
  veth: take into account peer device for NETDEV_XDP_ACT_NDO_XMIT xdp_features flag
  mlxfw: fix null-ptr-deref in mlxfw_mfa2_tlv_next()
  bnxt_en: Fix a possible NULL pointer dereference in unload path
  bnxt_en: Do not initialize PTP on older P3/P4 chips
  netfilter: nf_tables: tighten netlink attribute requirements for catch-all elements
  ...
This commit is contained in:
Linus Torvalds 2023-04-20 11:03:51 -07:00
commit 23309d600d
36 changed files with 351 additions and 161 deletions

View File

@ -299,6 +299,8 @@ Martin Kepplinger <martink@posteo.de> <martin.kepplinger@puri.sm>
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
Martyna Szapar-Mudlaw <martyna.szapar-mudlaw@linux.intel.com> <martyna.szapar-mudlaw@intel.com>
Mathieu Othacehe <m.othacehe@gmail.com>
Mat Martineau <martineau@kernel.org> <mathew.j.martineau@linux.intel.com>
Mat Martineau <martineau@kernel.org> <mathewm@codeaurora.org>
Matthew Wilcox <willy@infradead.org> <matthew.r.wilcox@intel.com>
Matthew Wilcox <willy@infradead.org> <matthew@wil.cx>
Matthew Wilcox <willy@infradead.org> <mawilcox@linuxonhyperv.com>

View File

@ -7,6 +7,21 @@ ice devlink support
This document describes the devlink features implemented by the ``ice``
device driver.
Parameters
==========
.. list-table:: Generic parameters implemented
* - Name
- Mode
- Notes
* - ``enable_roce``
- runtime
- mutually exclusive with ``enable_iwarp``
* - ``enable_iwarp``
- runtime
- mutually exclusive with ``enable_roce``
Info versions
=============

View File

@ -14594,6 +14594,7 @@ F: net/netlabel/
NETWORKING [MPTCP]
M: Matthieu Baerts <matthieu.baerts@tessares.net>
M: Mat Martineau <martineau@kernel.org>
L: netdev@vger.kernel.org
L: mptcp@lists.linux.dev
S: Maintained

View File

@ -539,7 +539,7 @@ static void bpf_jit_plt(void *plt, void *ret, void *target)
{
memcpy(plt, bpf_plt, BPF_PLT_SIZE);
*(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret;
*(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target;
*(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target ?: ret;
}
/*
@ -2010,7 +2010,9 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
} __packed insn;
char expected_plt[BPF_PLT_SIZE];
char current_plt[BPF_PLT_SIZE];
char new_plt[BPF_PLT_SIZE];
char *plt;
char *ret;
int err;
/* Verify the branch to be patched. */
@ -2032,12 +2034,15 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE);
if (err < 0)
return err;
bpf_jit_plt(expected_plt, (char *)ip + 6, old_addr);
ret = (char *)ip + 6;
bpf_jit_plt(expected_plt, ret, old_addr);
if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE))
return -EINVAL;
/* Adjust the call address. */
bpf_jit_plt(new_plt, ret, new_addr);
s390_kernel_write(plt + (bpf_plt_target - bpf_plt),
&new_addr, sizeof(void *));
new_plt + (bpf_plt_target - bpf_plt),
sizeof(void *));
}
/* Adjust the mask of the branch. */

View File

@ -1777,14 +1777,15 @@ void bond_lower_state_changed(struct slave *slave)
/* The bonding driver uses ether_setup() to convert a master bond device
* to ARPHRD_ETHER, that resets the target netdevice's flags so we always
* have to restore the IFF_MASTER flag, and only restore IFF_SLAVE if it was set
* have to restore the IFF_MASTER flag, and only restore IFF_SLAVE and IFF_UP
* if they were set
*/
static void bond_ether_setup(struct net_device *bond_dev)
{
unsigned int slave_flag = bond_dev->flags & IFF_SLAVE;
unsigned int flags = bond_dev->flags & (IFF_SLAVE | IFF_UP);
ether_setup(bond_dev);
bond_dev->flags |= IFF_MASTER | slave_flag;
bond_dev->flags |= IFF_MASTER | flags;
bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
}

View File

@ -96,7 +96,7 @@ static int ksz8795_change_mtu(struct ksz_device *dev, int frame_size)
if (frame_size > KSZ8_LEGAL_PACKET_SIZE)
ctrl2 |= SW_LEGAL_PACKET_DISABLE;
else if (frame_size > KSZ8863_NORMAL_PACKET_SIZE)
if (frame_size > KSZ8863_NORMAL_PACKET_SIZE)
ctrl1 |= SW_HUGE_PACKET;
ret = ksz_rmw8(dev, REG_SW_CTRL_1, SW_HUGE_PACKET, ctrl1);

View File

@ -2388,7 +2388,7 @@ static int bnxt_async_event_process(struct bnxt *bp,
case ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE: {
switch (BNXT_EVENT_PHC_EVENT_TYPE(data1)) {
case ASYNC_EVENT_CMPL_PHC_UPDATE_EVENT_DATA1_FLAGS_PHC_RTC_UPDATE:
if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) {
if (BNXT_PTP_USE_RTC(bp)) {
struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
u64 ns;
@ -7627,7 +7627,7 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
u8 flags;
int rc;
if (bp->hwrm_spec_code < 0x10801) {
if (bp->hwrm_spec_code < 0x10801 || !BNXT_CHIP_P5_THOR(bp)) {
rc = -ENODEV;
goto no_ptp;
}

View File

@ -304,7 +304,7 @@ void bnxt_rdma_aux_device_uninit(struct bnxt *bp)
struct auxiliary_device *adev;
/* Skip if no auxiliary device init was done. */
if (!(bp->flags & BNXT_FLAG_ROCE_CAP))
if (!bp->aux_priv)
return;
aux_priv = bp->aux_priv;
@ -324,6 +324,7 @@ static void bnxt_aux_dev_release(struct device *dev)
bp->edev = NULL;
kfree(aux_priv->edev);
kfree(aux_priv);
bp->aux_priv = NULL;
}
static void bnxt_set_edev_info(struct bnxt_en_dev *edev, struct bnxt *bp)
@ -359,19 +360,18 @@ void bnxt_rdma_aux_device_init(struct bnxt *bp)
if (!(bp->flags & BNXT_FLAG_ROCE_CAP))
return;
bp->aux_priv = kzalloc(sizeof(*bp->aux_priv), GFP_KERNEL);
if (!bp->aux_priv)
aux_priv = kzalloc(sizeof(*bp->aux_priv), GFP_KERNEL);
if (!aux_priv)
goto exit;
bp->aux_priv->id = ida_alloc(&bnxt_aux_dev_ids, GFP_KERNEL);
if (bp->aux_priv->id < 0) {
aux_priv->id = ida_alloc(&bnxt_aux_dev_ids, GFP_KERNEL);
if (aux_priv->id < 0) {
netdev_warn(bp->dev,
"ida alloc failed for ROCE auxiliary device\n");
kfree(bp->aux_priv);
kfree(aux_priv);
goto exit;
}
aux_priv = bp->aux_priv;
aux_dev = &aux_priv->aux_dev;
aux_dev->id = aux_priv->id;
aux_dev->name = "rdma";
@ -380,10 +380,11 @@ void bnxt_rdma_aux_device_init(struct bnxt *bp)
rc = auxiliary_device_init(aux_dev);
if (rc) {
ida_free(&bnxt_aux_dev_ids, bp->aux_priv->id);
kfree(bp->aux_priv);
ida_free(&bnxt_aux_dev_ids, aux_priv->id);
kfree(aux_priv);
goto exit;
}
bp->aux_priv = aux_priv;
/* From this point, all cleanup will happen via the .release callback &
* any error unwinding will need to include a call to

View File

@ -1135,7 +1135,7 @@ void cxgb4_cleanup_tc_flower(struct adapter *adap)
return;
if (adap->flower_stats_timer.function)
del_timer_sync(&adap->flower_stats_timer);
timer_shutdown_sync(&adap->flower_stats_timer);
cancel_work_sync(&adap->flower_stats_work);
rhashtable_destroy(&adap->flower_tbl);
adap->tc_flower_initialized = false;

View File

@ -5288,31 +5288,6 @@ static void e1000_watchdog_task(struct work_struct *work)
ew32(TARC(0), tarc0);
}
/* disable TSO for pcie and 10/100 speeds, to avoid
* some hardware issues
*/
if (!(adapter->flags & FLAG_TSO_FORCE)) {
switch (adapter->link_speed) {
case SPEED_10:
case SPEED_100:
e_info("10/100 speed: disabling TSO\n");
netdev->features &= ~NETIF_F_TSO;
netdev->features &= ~NETIF_F_TSO6;
break;
case SPEED_1000:
netdev->features |= NETIF_F_TSO;
netdev->features |= NETIF_F_TSO6;
break;
default:
/* oops */
break;
}
if (hw->mac.type == e1000_pch_spt) {
netdev->features &= ~NETIF_F_TSO;
netdev->features &= ~NETIF_F_TSO6;
}
}
/* enable transmits in the hardware, need to do this
* after setting TARC(0)
*/
@ -7526,6 +7501,32 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
NETIF_F_RXCSUM |
NETIF_F_HW_CSUM);
/* disable TSO for pcie and 10/100 speeds to avoid
* some hardware issues and for i219 to fix transfer
* speed being capped at 60%
*/
if (!(adapter->flags & FLAG_TSO_FORCE)) {
switch (adapter->link_speed) {
case SPEED_10:
case SPEED_100:
e_info("10/100 speed: disabling TSO\n");
netdev->features &= ~NETIF_F_TSO;
netdev->features &= ~NETIF_F_TSO6;
break;
case SPEED_1000:
netdev->features |= NETIF_F_TSO;
netdev->features |= NETIF_F_TSO6;
break;
default:
/* oops */
break;
}
if (hw->mac.type == e1000_pch_spt) {
netdev->features &= ~NETIF_F_TSO;
netdev->features &= ~NETIF_F_TSO6;
}
}
/* Set user-changeable features (subset of all device features) */
netdev->hw_features = netdev->features;
netdev->hw_features |= NETIF_F_RXFCS;

View File

@ -11059,8 +11059,11 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
pf->hw.aq.asq_last_status));
}
/* reinit the misc interrupt */
if (pf->flags & I40E_FLAG_MSIX_ENABLED)
if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
ret = i40e_setup_misc_vector(pf);
if (ret)
goto end_unlock;
}
/* Add a filter to drop all Flow control frames from any VSI from being
* transmitted. By doing so we stop a malicious VF from sending out
@ -14133,15 +14136,15 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
vsi->id = ctxt.vsi_number;
}
vsi->active_filters = 0;
clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
spin_lock_bh(&vsi->mac_filter_hash_lock);
vsi->active_filters = 0;
/* If macvlan filters already exist, force them to get loaded */
hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
f->state = I40E_FILTER_NEW;
f_count++;
}
spin_unlock_bh(&vsi->mac_filter_hash_lock);
clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
if (f_count) {
vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;

View File

@ -59,9 +59,6 @@ bool mlx5_eth_supported(struct mlx5_core_dev *dev)
if (!IS_ENABLED(CONFIG_MLX5_CORE_EN))
return false;
if (mlx5_core_is_management_pf(dev))
return false;
if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
return false;
@ -201,9 +198,6 @@ bool mlx5_rdma_supported(struct mlx5_core_dev *dev)
if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND))
return false;
if (mlx5_core_is_management_pf(dev))
return false;
if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV)
return false;

View File

@ -75,10 +75,6 @@ int mlx5_ec_init(struct mlx5_core_dev *dev)
if (!mlx5_core_is_ecpf(dev))
return 0;
/* Management PF don't have a peer PF */
if (mlx5_core_is_management_pf(dev))
return 0;
return mlx5_host_pf_init(dev);
}
@ -89,10 +85,6 @@ void mlx5_ec_cleanup(struct mlx5_core_dev *dev)
if (!mlx5_core_is_ecpf(dev))
return;
/* Management PF don't have a peer PF */
if (mlx5_core_is_management_pf(dev))
return;
mlx5_host_pf_cleanup(dev);
err = mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_HOST_PF]);

View File

@ -1488,7 +1488,7 @@ int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *
void *hca_caps;
int err;
if (!mlx5_core_is_ecpf(dev) || mlx5_core_is_management_pf(dev)) {
if (!mlx5_core_is_ecpf(dev)) {
*max_sfs = 0;
return 0;
}

View File

@ -31,6 +31,8 @@ mlxfw_mfa2_tlv_next(const struct mlxfw_mfa2_file *mfa2_file,
if (tlv->type == MLXFW_MFA2_TLV_MULTI_PART) {
multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, tlv);
if (!multi)
return NULL;
tlv_len = NLA_ALIGN(tlv_len + be16_to_cpu(multi->total_len));
}

View File

@ -26,7 +26,7 @@
#define MLXSW_PCI_CIR_TIMEOUT_MSECS 1000
#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 900000
#define MLXSW_PCI_SW_RESET_WAIT_MSECS 200
#define MLXSW_PCI_SW_RESET_WAIT_MSECS 400
#define MLXSW_PCI_FW_READY 0xA1844
#define MLXSW_PCI_FW_READY_MASK 0xFFFF
#define MLXSW_PCI_FW_READY_MAGIC 0x5E

View File

@ -541,7 +541,6 @@ int efx_net_open(struct net_device *net_dev)
else
efx->state = STATE_NET_UP;
efx_selftest_async_start(efx);
return 0;
}

View File

@ -544,6 +544,8 @@ void efx_start_all(struct efx_nic *efx)
/* Start the hardware monitor if there is one */
efx_start_monitor(efx);
efx_selftest_async_start(efx);
/* Link state detection is normally event-driven; we have
* to poll now because we could have missed a change
*/

View File

@ -47,7 +47,7 @@ config BPQETHER
config SCC
tristate "Z8530 SCC driver"
depends on ISA && AX25 && ISA_DMA_API
depends on ISA && AX25
help
These cards are used to connect your Linux box to an amateur radio
in order to communicate with other computers. If you want to use

View File

@ -1262,11 +1262,12 @@ static void veth_set_xdp_features(struct net_device *dev)
peer = rtnl_dereference(priv->peer);
if (peer && peer->real_num_tx_queues <= dev->real_num_rx_queues) {
struct veth_priv *priv_peer = netdev_priv(peer);
xdp_features_t val = NETDEV_XDP_ACT_BASIC |
NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_RX_SG;
if (priv->_xdp_prog || veth_gro_requested(dev))
if (priv_peer->_xdp_prog || veth_gro_requested(peer))
val |= NETDEV_XDP_ACT_NDO_XMIT |
NETDEV_XDP_ACT_NDO_XMIT_SG;
xdp_set_features_flag(dev, val);
@ -1504,19 +1505,23 @@ static int veth_set_features(struct net_device *dev,
{
netdev_features_t changed = features ^ dev->features;
struct veth_priv *priv = netdev_priv(dev);
struct net_device *peer;
int err;
if (!(changed & NETIF_F_GRO) || !(dev->flags & IFF_UP) || priv->_xdp_prog)
return 0;
peer = rtnl_dereference(priv->peer);
if (features & NETIF_F_GRO) {
err = veth_napi_enable(dev);
if (err)
return err;
xdp_features_set_redirect_target(dev, true);
if (peer)
xdp_features_set_redirect_target(peer, true);
} else {
xdp_features_clear_redirect_target(dev);
if (peer)
xdp_features_clear_redirect_target(peer);
veth_napi_del(dev);
}
return 0;
@ -1598,13 +1603,13 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
peer->max_mtu = max_mtu;
}
xdp_features_set_redirect_target(dev, true);
xdp_features_set_redirect_target(peer, true);
}
if (old_prog) {
if (!prog) {
if (!veth_gro_requested(dev))
xdp_features_clear_redirect_target(dev);
if (peer && !veth_gro_requested(dev))
xdp_features_clear_redirect_target(peer);
if (dev->flags & IFF_UP)
veth_disable_xdp(dev);

View File

@ -814,8 +814,13 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
int page_off,
unsigned int *len)
{
struct page *page = alloc_page(GFP_ATOMIC);
int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
struct page *page;
if (page_off + *len + tailroom > PAGE_SIZE)
return NULL;
page = alloc_page(GFP_ATOMIC);
if (!page)
return NULL;
@ -823,7 +828,6 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
page_off += *len;
while (--*num_buf) {
int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
unsigned int buflen;
void *buf;
int off;

View File

@ -1504,7 +1504,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
goto rcd_done;
}
if (rxDataRingUsed) {
if (rxDataRingUsed && adapter->rxdataring_enabled) {
size_t sz;
BUG_ON(rcd->len > rq->data_ring.desc_size);

View File

@ -1211,11 +1211,6 @@ static inline bool mlx5_core_is_vf(const struct mlx5_core_dev *dev)
return dev->coredev_type == MLX5_COREDEV_VF;
}
static inline bool mlx5_core_is_management_pf(const struct mlx5_core_dev *dev)
{
return MLX5_CAP_GEN(dev, num_ports) == 1 && !MLX5_CAP_GEN(dev, native_port_num);
}
static inline bool mlx5_core_is_ecpf(const struct mlx5_core_dev *dev)
{
return dev->caps.embedded_cpu;

View File

@ -294,6 +294,7 @@ struct nf_bridge_info {
u8 pkt_otherhost:1;
u8 in_prerouting:1;
u8 bridged_dnat:1;
u8 sabotage_in_done:1;
__u16 frag_max_size;
struct net_device *physindev;
@ -4712,7 +4713,7 @@ static inline void nf_reset_ct(struct sk_buff *skb)
static inline void nf_reset_trace(struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES)
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES)
skb->nf_trace = 0;
#endif
}
@ -4732,7 +4733,7 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
dst->_nfct = src->_nfct;
nf_conntrack_get(skb_nfct(src));
#endif
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES)
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES)
if (copy)
dst->nf_trace = src->nf_trace;
#endif

View File

@ -1085,6 +1085,10 @@ struct nft_chain {
};
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain);
int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
const struct nft_set_iter *iter,
struct nft_set_elem *elem);
int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set);
enum nft_chain_types {
NFT_CHAIN_T_DEFAULT = 0,

View File

@ -2967,6 +2967,21 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
}
} else if (opcode == BPF_EXIT) {
return -ENOTSUPP;
} else if (BPF_SRC(insn->code) == BPF_X) {
if (!(*reg_mask & (dreg | sreg)))
return 0;
/* dreg <cond> sreg
* Both dreg and sreg need precision before
* this insn. If only sreg was marked precise
* before it would be equally necessary to
* propagate it to dreg.
*/
*reg_mask |= (sreg | dreg);
/* else dreg <cond> K
* Only dreg still needs precision before
* this insn, so for the K-based conditional
* there is nothing new to be marked.
*/
}
} else if (class == BPF_LD) {
if (!(*reg_mask & dreg))

View File

@ -868,12 +868,17 @@ static unsigned int ip_sabotage_in(void *priv,
{
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
if (nf_bridge && !nf_bridge->in_prerouting &&
!netif_is_l3_master(skb->dev) &&
!netif_is_l3_slave(skb->dev)) {
nf_bridge_info_free(skb);
state->okfn(state->net, state->sk, skb);
return NF_STOLEN;
if (nf_bridge) {
if (nf_bridge->sabotage_in_done)
return NF_ACCEPT;
if (!nf_bridge->in_prerouting &&
!netif_is_l3_master(skb->dev) &&
!netif_is_l3_slave(skb->dev)) {
nf_bridge->sabotage_in_done = 1;
state->okfn(state->net, state->sk, skb);
return NF_STOLEN;
}
}
return NF_ACCEPT;

View File

@ -148,6 +148,17 @@ br_switchdev_fdb_notify(struct net_bridge *br,
if (test_bit(BR_FDB_LOCKED, &fdb->flags))
return;
/* Entries with these flags were created using ndm_state == NUD_REACHABLE,
* ndm_flags == NTF_MASTER( | NTF_STICKY), ext_flags == 0 by something
* equivalent to 'bridge fdb add ... master dynamic (sticky)'.
* Drivers don't know how to deal with these, so don't notify them to
* avoid confusing them.
*/
if (test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags) &&
!test_bit(BR_FDB_STATIC, &fdb->flags) &&
!test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags))
return;
br_switchdev_fdb_populate(br, &item, fdb, NULL);
switch (type) {

View File

@ -32,7 +32,8 @@ static void *ipv6_rpl_segdata_pos(const struct ipv6_rpl_sr_hdr *hdr, int i)
size_t ipv6_rpl_srh_size(unsigned char n, unsigned char cmpri,
unsigned char cmpre)
{
return (n * IPV6_PFXTAIL_LEN(cmpri)) + IPV6_PFXTAIL_LEN(cmpre);
return sizeof(struct ipv6_rpl_sr_hdr) + (n * IPV6_PFXTAIL_LEN(cmpri)) +
IPV6_PFXTAIL_LEN(cmpre);
}
void ipv6_rpl_srh_decompress(struct ipv6_rpl_sr_hdr *outhdr,

View File

@ -2315,7 +2315,26 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
unsigned int flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
bool need_push, dispose_it;
bool dispose_it, need_push = false;
/* If the first subflow moved to a close state before accept, e.g. due
* to an incoming reset, mptcp either:
* - if either the subflow or the msk are dead, destroy the context
* (the subflow socket is deleted by inet_child_forget) and the msk
* - otherwise do nothing at the moment and take action at accept and/or
* listener shutdown - user-space must be able to accept() the closed
* socket.
*/
if (msk->in_accept_queue && msk->first == ssk) {
if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD))
return;
/* ensure later check in mptcp_worker() will dispose the msk */
sock_set_flag(sk, SOCK_DEAD);
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
mptcp_subflow_drop_ctx(ssk);
goto out_release;
}
dispose_it = !msk->subflow || ssk != msk->subflow->sk;
if (dispose_it)
@ -2351,28 +2370,22 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
if (!inet_csk(ssk)->icsk_ulp_ops) {
WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD));
kfree_rcu(subflow, rcu);
} else if (msk->in_accept_queue && msk->first == ssk) {
/* if the first subflow moved to a close state, e.g. due to
* incoming reset and we reach here before inet_child_forget()
* the TCP stack could later try to close it via
* inet_csk_listen_stop(), or deliver it to the user space via
* accept().
* We can't delete the subflow - or risk a double free - nor let
* the msk survive - or will be leaked in the non accept scenario:
* fallback and let TCP cope with the subflow cleanup.
*/
WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD));
mptcp_subflow_drop_ctx(ssk);
} else {
/* otherwise tcp will dispose of the ssk and subflow ctx */
if (ssk->sk_state == TCP_LISTEN)
if (ssk->sk_state == TCP_LISTEN) {
tcp_set_state(ssk, TCP_CLOSE);
mptcp_subflow_queue_clean(sk, ssk);
inet_csk_listen_stop(ssk);
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
}
__tcp_close(ssk, 0);
/* close acquired an extra ref */
__sock_put(ssk);
}
out_release:
release_sock(ssk);
sock_put(ssk);
@ -2427,21 +2440,14 @@ static void __mptcp_close_subflow(struct sock *sk)
mptcp_close_ssk(sk, ssk, subflow);
}
/* if the MPC subflow has been closed before the msk is accepted,
* msk will never be accept-ed, close it now
*/
if (!msk->first && msk->in_accept_queue) {
sock_set_flag(sk, SOCK_DEAD);
inet_sk_state_store(sk, TCP_CLOSE);
}
}
static bool mptcp_check_close_timeout(const struct sock *sk)
static bool mptcp_should_close(const struct sock *sk)
{
s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp;
struct mptcp_subflow_context *subflow;
if (delta >= TCP_TIMEWAIT_LEN)
if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)
return true;
/* if all subflows are in closed status don't bother with additional
@ -2649,7 +2655,7 @@ static void mptcp_worker(struct work_struct *work)
* even if it is orphaned and in FIN_WAIT2 state
*/
if (sock_flag(sk, SOCK_DEAD)) {
if (mptcp_check_close_timeout(sk)) {
if (mptcp_should_close(sk)) {
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
}
@ -2895,6 +2901,14 @@ static void __mptcp_destroy_sock(struct sock *sk)
sock_put(sk);
}
void __mptcp_unaccepted_force_close(struct sock *sk)
{
sock_set_flag(sk, SOCK_DEAD);
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
__mptcp_destroy_sock(sk);
}
static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
{
/* Concurrent splices from sk_receive_queue into receive_queue will
@ -3733,6 +3747,18 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssk->sk_socket)
mptcp_sock_graft(ssk, newsock);
}
/* Do late cleanup for the first subflow as necessary. Also
* deal with bad peers not doing a complete shutdown.
*/
if (msk->first &&
unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) {
__mptcp_close_ssk(newsk, msk->first,
mptcp_subflow_ctx(msk->first), 0);
if (unlikely(list_empty(&msk->conn_list)))
inet_sk_state_store(newsk, TCP_CLOSE);
}
release_sock(newsk);
}

View File

@ -629,10 +629,12 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
void __mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
bool __mptcp_close(struct sock *sk, long timeout);
void mptcp_cancel_work(struct sock *sk);
void __mptcp_unaccepted_force_close(struct sock *sk);
void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
bool mptcp_addresses_equal(const struct mptcp_addr_info *a,

View File

@ -723,9 +723,12 @@ void mptcp_subflow_drop_ctx(struct sock *ssk)
if (!ctx)
return;
subflow_ulp_fallback(ssk, ctx);
if (ctx->conn)
sock_put(ctx->conn);
list_del(&mptcp_subflow_ctx(ssk)->node);
if (inet_csk(ssk)->icsk_ulp_ops) {
subflow_ulp_fallback(ssk, ctx);
if (ctx->conn)
sock_put(ctx->conn);
}
kfree_rcu(ctx, rcu);
}
@ -1819,6 +1822,77 @@ static void subflow_state_change(struct sock *sk)
}
}
void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
{
struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
struct mptcp_sock *msk, *next, *head = NULL;
struct request_sock *req;
struct sock *sk;
/* build a list of all unaccepted mptcp sockets */
spin_lock_bh(&queue->rskq_lock);
for (req = queue->rskq_accept_head; req; req = req->dl_next) {
struct mptcp_subflow_context *subflow;
struct sock *ssk = req->sk;
if (!sk_is_mptcp(ssk))
continue;
subflow = mptcp_subflow_ctx(ssk);
if (!subflow || !subflow->conn)
continue;
/* skip if already in list */
sk = subflow->conn;
msk = mptcp_sk(sk);
if (msk->dl_next || msk == head)
continue;
sock_hold(sk);
msk->dl_next = head;
head = msk;
}
spin_unlock_bh(&queue->rskq_lock);
if (!head)
return;
/* can't acquire the msk socket lock under the subflow one,
* or will cause ABBA deadlock
*/
release_sock(listener_ssk);
for (msk = head; msk; msk = next) {
sk = (struct sock *)msk;
lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
next = msk->dl_next;
msk->dl_next = NULL;
__mptcp_unaccepted_force_close(sk);
release_sock(sk);
/* lockdep will report a false positive ABBA deadlock
* between cancel_work_sync and the listener socket.
* The involved locks belong to different sockets WRT
* the existing AB chain.
* Using a per socket key is problematic as key
* deregistration requires process context and must be
* performed at socket disposal time, in atomic
* context.
* Just tell lockdep to consider the listener socket
* released here.
*/
mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_);
mptcp_cancel_work(sk);
mutex_acquire(&listener_sk->sk_lock.dep_map, 0, 0, _RET_IP_);
sock_put(sk);
}
/* we are still under the listener msk socket lock */
lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
}
static int subflow_ulp_init(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);

View File

@ -3447,6 +3447,64 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
return 0;
}
int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
const struct nft_set_iter *iter,
struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
struct nft_ctx *pctx = (struct nft_ctx *)ctx;
const struct nft_data *data;
int err;
if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
*nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
return 0;
data = nft_set_ext_data(ext);
switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
pctx->level++;
err = nft_chain_validate(ctx, data->verdict.chain);
if (err < 0)
return err;
pctx->level--;
break;
default:
break;
}
return 0;
}
struct nft_set_elem_catchall {
struct list_head list;
struct rcu_head rcu;
void *elem;
};
int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set)
{
u8 genmask = nft_genmask_next(ctx->net);
struct nft_set_elem_catchall *catchall;
struct nft_set_elem elem;
struct nft_set_ext *ext;
int ret = 0;
list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
if (!nft_set_elem_active(ext, genmask))
continue;
elem.priv = catchall->elem;
ret = nft_setelem_validate(ctx, set, NULL, &elem);
if (ret < 0)
return ret;
}
return ret;
}
static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
const struct nft_chain *chain,
const struct nlattr *nla);
@ -4759,12 +4817,6 @@ err_set_name:
return err;
}
struct nft_set_elem_catchall {
struct list_head list;
struct rcu_head rcu;
void *elem;
};
static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
struct nft_set *set)
{
@ -6056,7 +6108,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
return err;
if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL))
if (((flags & NFT_SET_ELEM_CATCHALL) && nla[NFTA_SET_ELEM_KEY]) ||
(!(flags & NFT_SET_ELEM_CATCHALL) && !nla[NFTA_SET_ELEM_KEY]))
return -EINVAL;
if (flags != 0) {
@ -7052,7 +7105,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
}
if (nla[NFTA_OBJ_USERDATA]) {
obj->udata = nla_memdup(nla[NFTA_OBJ_USERDATA], GFP_KERNEL);
obj->udata = nla_memdup(nla[NFTA_OBJ_USERDATA], GFP_KERNEL_ACCOUNT);
if (obj->udata == NULL)
goto err_userdata;

View File

@ -199,37 +199,6 @@ nla_put_failure:
return -1;
}
static int nft_lookup_validate_setelem(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
struct nft_ctx *pctx = (struct nft_ctx *)ctx;
const struct nft_data *data;
int err;
if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
*nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
return 0;
data = nft_set_ext_data(ext);
switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
pctx->level++;
err = nft_chain_validate(ctx, data->verdict.chain);
if (err < 0)
return err;
pctx->level--;
break;
default:
break;
}
return 0;
}
static int nft_lookup_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **d)
@ -245,9 +214,12 @@ static int nft_lookup_validate(const struct nft_ctx *ctx,
iter.skip = 0;
iter.count = 0;
iter.err = 0;
iter.fn = nft_lookup_validate_setelem;
iter.fn = nft_setelem_validate;
priv->set->ops->walk(ctx, priv->set, &iter);
if (!iter.err)
iter.err = nft_set_catchall_validate(ctx, priv->set);
if (iter.err < 0)
return iter.err;

View File

@ -3235,6 +3235,9 @@ int tcf_exts_init_ex(struct tcf_exts *exts, struct net *net, int action,
err_miss_alloc:
tcf_exts_destroy(exts);
#ifdef CONFIG_NET_CLS_ACT
exts->actions = NULL;
#endif
return err;
}
EXPORT_SYMBOL(tcf_exts_init_ex);

View File

@ -421,15 +421,16 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
} else
weight = 1;
if (tb[TCA_QFQ_LMAX]) {
if (tb[TCA_QFQ_LMAX])
lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
pr_notice("qfq: invalid max length %u\n", lmax);
return -EINVAL;
}
} else
else
lmax = psched_mtu(qdisc_dev(sch));
if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
pr_notice("qfq: invalid max length %u\n", lmax);
return -EINVAL;
}
inv_w = ONE_FP / weight;
weight = ONE_FP / inv_w;