linux-stable/drivers/net/dsa/mt7530.c

3345 lines
84 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* Mediatek MT7530 DSA Switch driver
* Copyright (C) 2017 Sean Wang <sean.wang@mediatek.com>
*/
#include <linux/etherdevice.h>
#include <linux/if_bridge.h>
#include <linux/iopoll.h>
#include <linux/mdio.h>
#include <linux/mfd/syscon.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/of_irq.h>
#include <linux/of_mdio.h>
#include <linux/of_net.h>
#include <linux/of_platform.h>
#include <linux/phylink.h>
#include <linux/regmap.h>
#include <linux/regulator/consumer.h>
#include <linux/reset.h>
#include <linux/gpio/consumer.h>
#include <linux/gpio/driver.h>
#include <net/dsa.h>
#include "mt7530.h"
/* String, offset, and register size in bytes if different from 4 bytes */
static const struct mt7530_mib_desc mt7530_mib[] = {
MIB_DESC(1, 0x00, "TxDrop"),
MIB_DESC(1, 0x04, "TxCrcErr"),
MIB_DESC(1, 0x08, "TxUnicast"),
MIB_DESC(1, 0x0c, "TxMulticast"),
MIB_DESC(1, 0x10, "TxBroadcast"),
MIB_DESC(1, 0x14, "TxCollision"),
MIB_DESC(1, 0x18, "TxSingleCollision"),
MIB_DESC(1, 0x1c, "TxMultipleCollision"),
MIB_DESC(1, 0x20, "TxDeferred"),
MIB_DESC(1, 0x24, "TxLateCollision"),
MIB_DESC(1, 0x28, "TxExcessiveCollistion"),
MIB_DESC(1, 0x2c, "TxPause"),
MIB_DESC(1, 0x30, "TxPktSz64"),
MIB_DESC(1, 0x34, "TxPktSz65To127"),
MIB_DESC(1, 0x38, "TxPktSz128To255"),
MIB_DESC(1, 0x3c, "TxPktSz256To511"),
MIB_DESC(1, 0x40, "TxPktSz512To1023"),
MIB_DESC(1, 0x44, "Tx1024ToMax"),
MIB_DESC(2, 0x48, "TxBytes"),
MIB_DESC(1, 0x60, "RxDrop"),
MIB_DESC(1, 0x64, "RxFiltering"),
MIB_DESC(1, 0x68, "RxUnicast"),
MIB_DESC(1, 0x6c, "RxMulticast"),
MIB_DESC(1, 0x70, "RxBroadcast"),
MIB_DESC(1, 0x74, "RxAlignErr"),
MIB_DESC(1, 0x78, "RxCrcErr"),
MIB_DESC(1, 0x7c, "RxUnderSizeErr"),
MIB_DESC(1, 0x80, "RxFragErr"),
MIB_DESC(1, 0x84, "RxOverSzErr"),
MIB_DESC(1, 0x88, "RxJabberErr"),
MIB_DESC(1, 0x8c, "RxPause"),
MIB_DESC(1, 0x90, "RxPktSz64"),
MIB_DESC(1, 0x94, "RxPktSz65To127"),
MIB_DESC(1, 0x98, "RxPktSz128To255"),
MIB_DESC(1, 0x9c, "RxPktSz256To511"),
MIB_DESC(1, 0xa0, "RxPktSz512To1023"),
MIB_DESC(1, 0xa4, "RxPktSz1024ToMax"),
MIB_DESC(2, 0xa8, "RxBytes"),
MIB_DESC(1, 0xb0, "RxCtrlDrop"),
MIB_DESC(1, 0xb4, "RxIngressDrop"),
MIB_DESC(1, 0xb8, "RxArlDrop"),
};
/* Since phy_device has not yet been created and
* phy_{read,write}_mmd_indirect is not available, we provide our own
* core_{read,write}_mmd_indirect with core_{clear,write,set} wrappers
* to complete this function.
*/
static int
core_read_mmd_indirect(struct mt7530_priv *priv, int prtad, int devad)
{
struct mii_bus *bus = priv->bus;
int value, ret;
/* Write the desired MMD Devad */
ret = bus->write(bus, 0, MII_MMD_CTRL, devad);
if (ret < 0)
goto err;
/* Write the desired MMD register address */
ret = bus->write(bus, 0, MII_MMD_DATA, prtad);
if (ret < 0)
goto err;
/* Select the Function : DATA with no post increment */
ret = bus->write(bus, 0, MII_MMD_CTRL, (devad | MII_MMD_CTRL_NOINCR));
if (ret < 0)
goto err;
/* Read the content of the MMD's selected register */
value = bus->read(bus, 0, MII_MMD_DATA);
return value;
err:
dev_err(&bus->dev, "failed to read mmd register\n");
return ret;
}
static int
core_write_mmd_indirect(struct mt7530_priv *priv, int prtad,
int devad, u32 data)
{
struct mii_bus *bus = priv->bus;
int ret;
/* Write the desired MMD Devad */
ret = bus->write(bus, 0, MII_MMD_CTRL, devad);
if (ret < 0)
goto err;
/* Write the desired MMD register address */
ret = bus->write(bus, 0, MII_MMD_DATA, prtad);
if (ret < 0)
goto err;
/* Select the Function : DATA with no post increment */
ret = bus->write(bus, 0, MII_MMD_CTRL, (devad | MII_MMD_CTRL_NOINCR));
if (ret < 0)
goto err;
/* Write the data into MMD's selected register */
ret = bus->write(bus, 0, MII_MMD_DATA, data);
err:
if (ret < 0)
dev_err(&bus->dev,
"failed to write mmd register\n");
return ret;
}
static void
core_write(struct mt7530_priv *priv, u32 reg, u32 val)
{
struct mii_bus *bus = priv->bus;
mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
core_write_mmd_indirect(priv, reg, MDIO_MMD_VEND2, val);
mutex_unlock(&bus->mdio_lock);
}
static void
core_rmw(struct mt7530_priv *priv, u32 reg, u32 mask, u32 set)
{
struct mii_bus *bus = priv->bus;
u32 val;
mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
val = core_read_mmd_indirect(priv, reg, MDIO_MMD_VEND2);
val &= ~mask;
val |= set;
core_write_mmd_indirect(priv, reg, MDIO_MMD_VEND2, val);
mutex_unlock(&bus->mdio_lock);
}
static void
core_set(struct mt7530_priv *priv, u32 reg, u32 val)
{
core_rmw(priv, reg, 0, val);
}
static void
core_clear(struct mt7530_priv *priv, u32 reg, u32 val)
{
core_rmw(priv, reg, val, 0);
}
static int
mt7530_mii_write(struct mt7530_priv *priv, u32 reg, u32 val)
{
struct mii_bus *bus = priv->bus;
u16 page, r, lo, hi;
int ret;
page = (reg >> 6) & 0x3ff;
r = (reg >> 2) & 0xf;
lo = val & 0xffff;
hi = val >> 16;
/* MT7530 uses 31 as the pseudo port */
ret = bus->write(bus, 0x1f, 0x1f, page);
if (ret < 0)
goto err;
ret = bus->write(bus, 0x1f, r, lo);
if (ret < 0)
goto err;
ret = bus->write(bus, 0x1f, 0x10, hi);
err:
if (ret < 0)
dev_err(&bus->dev,
"failed to write mt7530 register\n");
return ret;
}
static u32
mt7530_mii_read(struct mt7530_priv *priv, u32 reg)
{
struct mii_bus *bus = priv->bus;
u16 page, r, lo, hi;
int ret;
page = (reg >> 6) & 0x3ff;
r = (reg >> 2) & 0xf;
/* MT7530 uses 31 as the pseudo port */
ret = bus->write(bus, 0x1f, 0x1f, page);
if (ret < 0) {
dev_err(&bus->dev,
"failed to read mt7530 register\n");
return ret;
}
lo = bus->read(bus, 0x1f, r);
hi = bus->read(bus, 0x1f, 0x10);
return (hi << 16) | (lo & 0xffff);
}
static void
mt7530_write(struct mt7530_priv *priv, u32 reg, u32 val)
{
struct mii_bus *bus = priv->bus;
mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
mt7530_mii_write(priv, reg, val);
mutex_unlock(&bus->mdio_lock);
}
static u32
_mt7530_unlocked_read(struct mt7530_dummy_poll *p)
{
return mt7530_mii_read(p->priv, p->reg);
}
static u32
_mt7530_read(struct mt7530_dummy_poll *p)
{
struct mii_bus *bus = p->priv->bus;
u32 val;
mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
val = mt7530_mii_read(p->priv, p->reg);
mutex_unlock(&bus->mdio_lock);
return val;
}
static u32
mt7530_read(struct mt7530_priv *priv, u32 reg)
{
struct mt7530_dummy_poll p;
INIT_MT7530_DUMMY_POLL(&p, priv, reg);
return _mt7530_read(&p);
}
static void
mt7530_rmw(struct mt7530_priv *priv, u32 reg,
u32 mask, u32 set)
{
struct mii_bus *bus = priv->bus;
u32 val;
mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
val = mt7530_mii_read(priv, reg);
val &= ~mask;
val |= set;
mt7530_mii_write(priv, reg, val);
mutex_unlock(&bus->mdio_lock);
}
static void
mt7530_set(struct mt7530_priv *priv, u32 reg, u32 val)
{
mt7530_rmw(priv, reg, 0, val);
}
static void
mt7530_clear(struct mt7530_priv *priv, u32 reg, u32 val)
{
mt7530_rmw(priv, reg, val, 0);
}
static int
mt7530_fdb_cmd(struct mt7530_priv *priv, enum mt7530_fdb_cmd cmd, u32 *rsp)
{
u32 val;
int ret;
struct mt7530_dummy_poll p;
/* Set the command operating upon the MAC address entries */
val = ATC_BUSY | ATC_MAT(0) | cmd;
mt7530_write(priv, MT7530_ATC, val);
INIT_MT7530_DUMMY_POLL(&p, priv, MT7530_ATC);
ret = readx_poll_timeout(_mt7530_read, &p, val,
!(val & ATC_BUSY), 20, 20000);
if (ret < 0) {
dev_err(priv->dev, "reset timeout\n");
return ret;
}
/* Additional sanity for read command if the specified
* entry is invalid
*/
val = mt7530_read(priv, MT7530_ATC);
if ((cmd == MT7530_FDB_READ) && (val & ATC_INVALID))
return -EINVAL;
if (rsp)
*rsp = val;
return 0;
}
static void
mt7530_fdb_read(struct mt7530_priv *priv, struct mt7530_fdb *fdb)
{
u32 reg[3];
int i;
/* Read from ARL table into an array */
for (i = 0; i < 3; i++) {
reg[i] = mt7530_read(priv, MT7530_TSRA1 + (i * 4));
dev_dbg(priv->dev, "%s(%d) reg[%d]=0x%x\n",
__func__, __LINE__, i, reg[i]);
}
fdb->vid = (reg[1] >> CVID) & CVID_MASK;
fdb->aging = (reg[2] >> AGE_TIMER) & AGE_TIMER_MASK;
fdb->port_mask = (reg[2] >> PORT_MAP) & PORT_MAP_MASK;
fdb->mac[0] = (reg[0] >> MAC_BYTE_0) & MAC_BYTE_MASK;
fdb->mac[1] = (reg[0] >> MAC_BYTE_1) & MAC_BYTE_MASK;
fdb->mac[2] = (reg[0] >> MAC_BYTE_2) & MAC_BYTE_MASK;
fdb->mac[3] = (reg[0] >> MAC_BYTE_3) & MAC_BYTE_MASK;
fdb->mac[4] = (reg[1] >> MAC_BYTE_4) & MAC_BYTE_MASK;
fdb->mac[5] = (reg[1] >> MAC_BYTE_5) & MAC_BYTE_MASK;
fdb->noarp = ((reg[2] >> ENT_STATUS) & ENT_STATUS_MASK) == STATIC_ENT;
}
static void
mt7530_fdb_write(struct mt7530_priv *priv, u16 vid,
u8 port_mask, const u8 *mac,
u8 aging, u8 type)
{
u32 reg[3] = { 0 };
int i;
reg[1] |= vid & CVID_MASK;
reg[1] |= ATA2_IVL;
reg[1] |= ATA2_FID(FID_BRIDGED);
reg[2] |= (aging & AGE_TIMER_MASK) << AGE_TIMER;
reg[2] |= (port_mask & PORT_MAP_MASK) << PORT_MAP;
/* STATIC_ENT indicate that entry is static wouldn't
* be aged out and STATIC_EMP specified as erasing an
* entry
*/
reg[2] |= (type & ENT_STATUS_MASK) << ENT_STATUS;
reg[1] |= mac[5] << MAC_BYTE_5;
reg[1] |= mac[4] << MAC_BYTE_4;
reg[0] |= mac[3] << MAC_BYTE_3;
reg[0] |= mac[2] << MAC_BYTE_2;
reg[0] |= mac[1] << MAC_BYTE_1;
reg[0] |= mac[0] << MAC_BYTE_0;
/* Write array into the ARL table */
for (i = 0; i < 3; i++)
mt7530_write(priv, MT7530_ATA1 + (i * 4), reg[i]);
}
/* Setup TX circuit including relevant PAD and driving */
static int
mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface)
{
struct mt7530_priv *priv = ds->priv;
u32 ncpo1, ssc_delta, trgint, i, xtal;
xtal = mt7530_read(priv, MT7530_MHWTRAP) & HWTRAP_XTAL_MASK;
if (xtal == HWTRAP_XTAL_20MHZ) {
dev_err(priv->dev,
"%s: MT7530 with a 20MHz XTAL is not supported!\n",
__func__);
return -EINVAL;
}
switch (interface) {
case PHY_INTERFACE_MODE_RGMII:
trgint = 0;
/* PLL frequency: 125MHz */
ncpo1 = 0x0c80;
break;
case PHY_INTERFACE_MODE_TRGMII:
trgint = 1;
if (priv->id == ID_MT7621) {
/* PLL frequency: 150MHz: 1.2GBit */
if (xtal == HWTRAP_XTAL_40MHZ)
ncpo1 = 0x0780;
if (xtal == HWTRAP_XTAL_25MHZ)
ncpo1 = 0x0a00;
} else { /* PLL frequency: 250MHz: 2.0Gbit */
if (xtal == HWTRAP_XTAL_40MHZ)
ncpo1 = 0x0c80;
if (xtal == HWTRAP_XTAL_25MHZ)
ncpo1 = 0x1400;
}
break;
default:
dev_err(priv->dev, "xMII interface %d not supported\n",
interface);
return -EINVAL;
}
if (xtal == HWTRAP_XTAL_25MHZ)
ssc_delta = 0x57;
else
ssc_delta = 0x87;
mt7530_rmw(priv, MT7530_P6ECR, P6_INTF_MODE_MASK,
P6_INTF_MODE(trgint));
/* Lower Tx Driving for TRGMII path */
for (i = 0 ; i < NUM_TRGMII_CTRL ; i++)
mt7530_write(priv, MT7530_TRGMII_TD_ODT(i),
TD_DM_DRVP(8) | TD_DM_DRVN(8));
/* Disable MT7530 core and TRGMII Tx clocks */
core_clear(priv, CORE_TRGMII_GSW_CLK_CG,
REG_GSWCK_EN | REG_TRGMIICK_EN);
/* Setup core clock for MT7530 */
/* Disable PLL */
core_write(priv, CORE_GSWPLL_GRP1, 0);
/* Set core clock into 500Mhz */
core_write(priv, CORE_GSWPLL_GRP2,
RG_GSWPLL_POSDIV_500M(1) |
RG_GSWPLL_FBKDIV_500M(25));
/* Enable PLL */
core_write(priv, CORE_GSWPLL_GRP1,
RG_GSWPLL_EN_PRE |
RG_GSWPLL_POSDIV_200M(2) |
RG_GSWPLL_FBKDIV_200M(32));
/* Setup the MT7530 TRGMII Tx Clock */
core_write(priv, CORE_PLL_GROUP5, RG_LCDDS_PCW_NCPO1(ncpo1));
core_write(priv, CORE_PLL_GROUP6, RG_LCDDS_PCW_NCPO0(0));
core_write(priv, CORE_PLL_GROUP10, RG_LCDDS_SSC_DELTA(ssc_delta));
core_write(priv, CORE_PLL_GROUP11, RG_LCDDS_SSC_DELTA1(ssc_delta));
core_write(priv, CORE_PLL_GROUP4,
RG_SYSPLL_DDSFBK_EN | RG_SYSPLL_BIAS_EN |
RG_SYSPLL_BIAS_LPF_EN);
core_write(priv, CORE_PLL_GROUP2,
RG_SYSPLL_EN_NORMAL | RG_SYSPLL_VODEN |
RG_SYSPLL_POSDIV(1));
core_write(priv, CORE_PLL_GROUP7,
RG_LCDDS_PCW_NCPO_CHG | RG_LCCDS_C(3) |
RG_LCDDS_PWDB | RG_LCDDS_ISO_EN);
/* Enable MT7530 core and TRGMII Tx clocks */
core_set(priv, CORE_TRGMII_GSW_CLK_CG,
REG_GSWCK_EN | REG_TRGMIICK_EN);
if (!trgint)
for (i = 0 ; i < NUM_TRGMII_CTRL; i++)
mt7530_rmw(priv, MT7530_TRGMII_RD(i),
RD_TAP_MASK, RD_TAP(16));
return 0;
}
static bool mt7531_dual_sgmii_supported(struct mt7530_priv *priv)
{
u32 val;
val = mt7530_read(priv, MT7531_TOP_SIG_SR);
return (val & PAD_DUAL_SGMII_EN) != 0;
}
static int
mt7531_pad_setup(struct dsa_switch *ds, phy_interface_t interface)
{
struct mt7530_priv *priv = ds->priv;
u32 top_sig;
u32 hwstrap;
u32 xtal;
u32 val;
if (mt7531_dual_sgmii_supported(priv))
return 0;
val = mt7530_read(priv, MT7531_CREV);
top_sig = mt7530_read(priv, MT7531_TOP_SIG_SR);
hwstrap = mt7530_read(priv, MT7531_HWTRAP);
if ((val & CHIP_REV_M) > 0)
xtal = (top_sig & PAD_MCM_SMI_EN) ? HWTRAP_XTAL_FSEL_40MHZ :
HWTRAP_XTAL_FSEL_25MHZ;
else
xtal = hwstrap & HWTRAP_XTAL_FSEL_MASK;
/* Step 1 : Disable MT7531 COREPLL */
val = mt7530_read(priv, MT7531_PLLGP_EN);
val &= ~EN_COREPLL;
mt7530_write(priv, MT7531_PLLGP_EN, val);
/* Step 2: switch to XTAL output */
val = mt7530_read(priv, MT7531_PLLGP_EN);
val |= SW_CLKSW;
mt7530_write(priv, MT7531_PLLGP_EN, val);
val = mt7530_read(priv, MT7531_PLLGP_CR0);
val &= ~RG_COREPLL_EN;
mt7530_write(priv, MT7531_PLLGP_CR0, val);
/* Step 3: disable PLLGP and enable program PLLGP */
val = mt7530_read(priv, MT7531_PLLGP_EN);
val |= SW_PLLGP;
mt7530_write(priv, MT7531_PLLGP_EN, val);
/* Step 4: program COREPLL output frequency to 500MHz */
val = mt7530_read(priv, MT7531_PLLGP_CR0);
val &= ~RG_COREPLL_POSDIV_M;
val |= 2 << RG_COREPLL_POSDIV_S;
mt7530_write(priv, MT7531_PLLGP_CR0, val);
usleep_range(25, 35);
switch (xtal) {
case HWTRAP_XTAL_FSEL_25MHZ:
val = mt7530_read(priv, MT7531_PLLGP_CR0);
val &= ~RG_COREPLL_SDM_PCW_M;
val |= 0x140000 << RG_COREPLL_SDM_PCW_S;
mt7530_write(priv, MT7531_PLLGP_CR0, val);
break;
case HWTRAP_XTAL_FSEL_40MHZ:
val = mt7530_read(priv, MT7531_PLLGP_CR0);
val &= ~RG_COREPLL_SDM_PCW_M;
val |= 0x190000 << RG_COREPLL_SDM_PCW_S;
mt7530_write(priv, MT7531_PLLGP_CR0, val);
break;
}
/* Set feedback divide ratio update signal to high */
val = mt7530_read(priv, MT7531_PLLGP_CR0);
val |= RG_COREPLL_SDM_PCW_CHG;
mt7530_write(priv, MT7531_PLLGP_CR0, val);
/* Wait for at least 16 XTAL clocks */
usleep_range(10, 20);
/* Step 5: set feedback divide ratio update signal to low */
val = mt7530_read(priv, MT7531_PLLGP_CR0);
val &= ~RG_COREPLL_SDM_PCW_CHG;
mt7530_write(priv, MT7531_PLLGP_CR0, val);
/* Enable 325M clock for SGMII */
mt7530_write(priv, MT7531_ANA_PLLGP_CR5, 0xad0000);
/* Enable 250SSC clock for RGMII */
mt7530_write(priv, MT7531_ANA_PLLGP_CR2, 0x4f40000);
/* Step 6: Enable MT7531 PLL */
val = mt7530_read(priv, MT7531_PLLGP_CR0);
val |= RG_COREPLL_EN;
mt7530_write(priv, MT7531_PLLGP_CR0, val);
val = mt7530_read(priv, MT7531_PLLGP_EN);
val |= EN_COREPLL;
mt7530_write(priv, MT7531_PLLGP_EN, val);
usleep_range(25, 35);
return 0;
}
static void
mt7530_mib_reset(struct dsa_switch *ds)
{
struct mt7530_priv *priv = ds->priv;
mt7530_write(priv, MT7530_MIB_CCR, CCR_MIB_FLUSH);
mt7530_write(priv, MT7530_MIB_CCR, CCR_MIB_ACTIVATE);
}
static int mt7530_phy_read(struct mt7530_priv *priv, int port, int regnum)
{
return mdiobus_read_nested(priv->bus, port, regnum);
}
static int mt7530_phy_write(struct mt7530_priv *priv, int port, int regnum,
u16 val)
{
return mdiobus_write_nested(priv->bus, port, regnum, val);
}
static int
mt7531_ind_c45_phy_read(struct mt7530_priv *priv, int port, int devad,
int regnum)
{
struct mii_bus *bus = priv->bus;
struct mt7530_dummy_poll p;
u32 reg, val;
int ret;
INIT_MT7530_DUMMY_POLL(&p, priv, MT7531_PHY_IAC);
mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
ret = readx_poll_timeout(_mt7530_unlocked_read, &p, val,
!(val & MT7531_PHY_ACS_ST), 20, 100000);
if (ret < 0) {
dev_err(priv->dev, "poll timeout\n");
goto out;
}
reg = MT7531_MDIO_CL45_ADDR | MT7531_MDIO_PHY_ADDR(port) |
MT7531_MDIO_DEV_ADDR(devad) | regnum;
mt7530_mii_write(priv, MT7531_PHY_IAC, reg | MT7531_PHY_ACS_ST);
ret = readx_poll_timeout(_mt7530_unlocked_read, &p, val,
!(val & MT7531_PHY_ACS_ST), 20, 100000);
if (ret < 0) {
dev_err(priv->dev, "poll timeout\n");
goto out;
}
reg = MT7531_MDIO_CL45_READ | MT7531_MDIO_PHY_ADDR(port) |
MT7531_MDIO_DEV_ADDR(devad);
mt7530_mii_write(priv, MT7531_PHY_IAC, reg | MT7531_PHY_ACS_ST);
ret = readx_poll_timeout(_mt7530_unlocked_read, &p, val,
!(val & MT7531_PHY_ACS_ST), 20, 100000);
if (ret < 0) {
dev_err(priv->dev, "poll timeout\n");
goto out;
}
ret = val & MT7531_MDIO_RW_DATA_MASK;
out:
mutex_unlock(&bus->mdio_lock);
return ret;
}
static int
mt7531_ind_c45_phy_write(struct mt7530_priv *priv, int port, int devad,
int regnum, u32 data)
{
struct mii_bus *bus = priv->bus;
struct mt7530_dummy_poll p;
u32 val, reg;
int ret;
INIT_MT7530_DUMMY_POLL(&p, priv, MT7531_PHY_IAC);
mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
ret = readx_poll_timeout(_mt7530_unlocked_read, &p, val,
!(val & MT7531_PHY_ACS_ST), 20, 100000);
if (ret < 0) {
dev_err(priv->dev, "poll timeout\n");
goto out;
}
reg = MT7531_MDIO_CL45_ADDR | MT7531_MDIO_PHY_ADDR(port) |
MT7531_MDIO_DEV_ADDR(devad) | regnum;
mt7530_mii_write(priv, MT7531_PHY_IAC, reg | MT7531_PHY_ACS_ST);
ret = readx_poll_timeout(_mt7530_unlocked_read, &p, val,
!(val & MT7531_PHY_ACS_ST), 20, 100000);
if (ret < 0) {
dev_err(priv->dev, "poll timeout\n");
goto out;
}
reg = MT7531_MDIO_CL45_WRITE | MT7531_MDIO_PHY_ADDR(port) |
MT7531_MDIO_DEV_ADDR(devad) | data;
mt7530_mii_write(priv, MT7531_PHY_IAC, reg | MT7531_PHY_ACS_ST);
ret = readx_poll_timeout(_mt7530_unlocked_read, &p, val,
!(val & MT7531_PHY_ACS_ST), 20, 100000);
if (ret < 0) {
dev_err(priv->dev, "poll timeout\n");
goto out;
}
out:
mutex_unlock(&bus->mdio_lock);
return ret;
}
static int
mt7531_ind_c22_phy_read(struct mt7530_priv *priv, int port, int regnum)
{
struct mii_bus *bus = priv->bus;
struct mt7530_dummy_poll p;
int ret;
u32 val;
INIT_MT7530_DUMMY_POLL(&p, priv, MT7531_PHY_IAC);
mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
ret = readx_poll_timeout(_mt7530_unlocked_read, &p, val,
!(val & MT7531_PHY_ACS_ST), 20, 100000);
if (ret < 0) {
dev_err(priv->dev, "poll timeout\n");
goto out;
}
val = MT7531_MDIO_CL22_READ | MT7531_MDIO_PHY_ADDR(port) |
MT7531_MDIO_REG_ADDR(regnum);
mt7530_mii_write(priv, MT7531_PHY_IAC, val | MT7531_PHY_ACS_ST);
ret = readx_poll_timeout(_mt7530_unlocked_read, &p, val,
!(val & MT7531_PHY_ACS_ST), 20, 100000);
if (ret < 0) {
dev_err(priv->dev, "poll timeout\n");
goto out;
}
ret = val & MT7531_MDIO_RW_DATA_MASK;
out:
mutex_unlock(&bus->mdio_lock);
return ret;
}
static int
mt7531_ind_c22_phy_write(struct mt7530_priv *priv, int port, int regnum,
u16 data)
{
struct mii_bus *bus = priv->bus;
struct mt7530_dummy_poll p;
int ret;
u32 reg;
INIT_MT7530_DUMMY_POLL(&p, priv, MT7531_PHY_IAC);
mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
ret = readx_poll_timeout(_mt7530_unlocked_read, &p, reg,
!(reg & MT7531_PHY_ACS_ST), 20, 100000);
if (ret < 0) {
dev_err(priv->dev, "poll timeout\n");
goto out;
}
reg = MT7531_MDIO_CL22_WRITE | MT7531_MDIO_PHY_ADDR(port) |
MT7531_MDIO_REG_ADDR(regnum) | data;
mt7530_mii_write(priv, MT7531_PHY_IAC, reg | MT7531_PHY_ACS_ST);
ret = readx_poll_timeout(_mt7530_unlocked_read, &p, reg,
!(reg & MT7531_PHY_ACS_ST), 20, 100000);
if (ret < 0) {
dev_err(priv->dev, "poll timeout\n");
goto out;
}
out:
mutex_unlock(&bus->mdio_lock);
return ret;
}
static int
mt7531_ind_phy_read(struct mt7530_priv *priv, int port, int regnum)
{
int devad;
int ret;
if (regnum & MII_ADDR_C45) {
devad = (regnum >> MII_DEVADDR_C45_SHIFT) & 0x1f;
ret = mt7531_ind_c45_phy_read(priv, port, devad,
regnum & MII_REGADDR_C45_MASK);
} else {
ret = mt7531_ind_c22_phy_read(priv, port, regnum);
}
return ret;
}
static int
mt7531_ind_phy_write(struct mt7530_priv *priv, int port, int regnum,
u16 data)
{
int devad;
int ret;
if (regnum & MII_ADDR_C45) {
devad = (regnum >> MII_DEVADDR_C45_SHIFT) & 0x1f;
ret = mt7531_ind_c45_phy_write(priv, port, devad,
regnum & MII_REGADDR_C45_MASK,
data);
} else {
ret = mt7531_ind_c22_phy_write(priv, port, regnum, data);
}
return ret;
}
static int
mt753x_phy_read(struct mii_bus *bus, int port, int regnum)
{
struct mt7530_priv *priv = bus->priv;
return priv->info->phy_read(priv, port, regnum);
}
static int
mt753x_phy_write(struct mii_bus *bus, int port, int regnum, u16 val)
{
struct mt7530_priv *priv = bus->priv;
return priv->info->phy_write(priv, port, regnum, val);
}
static void
mt7530_get_strings(struct dsa_switch *ds, int port, u32 stringset,
uint8_t *data)
{
int i;
if (stringset != ETH_SS_STATS)
return;
for (i = 0; i < ARRAY_SIZE(mt7530_mib); i++)
strncpy(data + i * ETH_GSTRING_LEN, mt7530_mib[i].name,
ETH_GSTRING_LEN);
}
static void
mt7530_get_ethtool_stats(struct dsa_switch *ds, int port,
uint64_t *data)
{
struct mt7530_priv *priv = ds->priv;
const struct mt7530_mib_desc *mib;
u32 reg, i;
u64 hi;
for (i = 0; i < ARRAY_SIZE(mt7530_mib); i++) {
mib = &mt7530_mib[i];
reg = MT7530_PORT_MIB_COUNTER(port) + mib->offset;
data[i] = mt7530_read(priv, reg);
if (mib->size == 2) {
hi = mt7530_read(priv, reg + 4);
data[i] |= hi << 32;
}
}
}
static int
mt7530_get_sset_count(struct dsa_switch *ds, int port, int sset)
{
if (sset != ETH_SS_STATS)
return 0;
return ARRAY_SIZE(mt7530_mib);
}
static int
mt7530_set_ageing_time(struct dsa_switch *ds, unsigned int msecs)
{
struct mt7530_priv *priv = ds->priv;
unsigned int secs = msecs / 1000;
unsigned int tmp_age_count;
unsigned int error = -1;
unsigned int age_count;
unsigned int age_unit;
/* Applied timer is (AGE_CNT + 1) * (AGE_UNIT + 1) seconds */
if (secs < 1 || secs > (AGE_CNT_MAX + 1) * (AGE_UNIT_MAX + 1))
return -ERANGE;
/* iterate through all possible age_count to find the closest pair */
for (tmp_age_count = 0; tmp_age_count <= AGE_CNT_MAX; ++tmp_age_count) {
unsigned int tmp_age_unit = secs / (tmp_age_count + 1) - 1;
if (tmp_age_unit <= AGE_UNIT_MAX) {
unsigned int tmp_error = secs -
(tmp_age_count + 1) * (tmp_age_unit + 1);
/* found a closer pair */
if (error > tmp_error) {
error = tmp_error;
age_count = tmp_age_count;
age_unit = tmp_age_unit;
}
/* found the exact match, so break the loop */
if (!error)
break;
}
}
mt7530_write(priv, MT7530_AAC, AGE_CNT(age_count) | AGE_UNIT(age_unit));
return 0;
}
static void mt7530_setup_port5(struct dsa_switch *ds, phy_interface_t interface)
{
struct mt7530_priv *priv = ds->priv;
u8 tx_delay = 0;
int val;
mutex_lock(&priv->reg_mutex);
val = mt7530_read(priv, MT7530_MHWTRAP);
val |= MHWTRAP_MANUAL | MHWTRAP_P5_MAC_SEL | MHWTRAP_P5_DIS;
val &= ~MHWTRAP_P5_RGMII_MODE & ~MHWTRAP_PHY0_SEL;
switch (priv->p5_intf_sel) {
case P5_INTF_SEL_PHY_P0:
/* MT7530_P5_MODE_GPHY_P0: 2nd GMAC -> P5 -> P0 */
val |= MHWTRAP_PHY0_SEL;
fallthrough;
case P5_INTF_SEL_PHY_P4:
/* MT7530_P5_MODE_GPHY_P4: 2nd GMAC -> P5 -> P4 */
val &= ~MHWTRAP_P5_MAC_SEL & ~MHWTRAP_P5_DIS;
/* Setup the MAC by default for the cpu port */
mt7530_write(priv, MT7530_PMCR_P(5), 0x56300);
break;
case P5_INTF_SEL_GMAC5:
/* MT7530_P5_MODE_GMAC: P5 -> External phy or 2nd GMAC */
val &= ~MHWTRAP_P5_DIS;
break;
case P5_DISABLED:
interface = PHY_INTERFACE_MODE_NA;
break;
default:
dev_err(ds->dev, "Unsupported p5_intf_sel %d\n",
priv->p5_intf_sel);
goto unlock_exit;
}
/* Setup RGMII settings */
if (phy_interface_mode_is_rgmii(interface)) {
val |= MHWTRAP_P5_RGMII_MODE;
/* P5 RGMII RX Clock Control: delay setting for 1000M */
mt7530_write(priv, MT7530_P5RGMIIRXCR, CSR_RGMII_EDGE_ALIGN);
/* Don't set delay in DSA mode */
if (!dsa_is_dsa_port(priv->ds, 5) &&
(interface == PHY_INTERFACE_MODE_RGMII_TXID ||
interface == PHY_INTERFACE_MODE_RGMII_ID))
tx_delay = 4; /* n * 0.5 ns */
/* P5 RGMII TX Clock Control: delay x */
mt7530_write(priv, MT7530_P5RGMIITXCR,
CSR_RGMII_TXC_CFG(0x10 + tx_delay));
/* reduce P5 RGMII Tx driving, 8mA */
mt7530_write(priv, MT7530_IO_DRV_CR,
P5_IO_CLK_DRV(1) | P5_IO_DATA_DRV(1));
}
mt7530_write(priv, MT7530_MHWTRAP, val);
dev_dbg(ds->dev, "Setup P5, HWTRAP=0x%x, intf_sel=%s, phy-mode=%s\n",
val, p5_intf_modes(priv->p5_intf_sel), phy_modes(interface));
priv->p5_interface = interface;
unlock_exit:
mutex_unlock(&priv->reg_mutex);
}
static int
mt753x_cpu_port_enable(struct dsa_switch *ds, int port)
{
struct mt7530_priv *priv = ds->priv;
int ret;
/* Setup max capability of CPU port at first */
if (priv->info->cpu_port_config) {
ret = priv->info->cpu_port_config(ds, port);
if (ret)
return ret;
}
/* Enable Mediatek header mode on the cpu port */
mt7530_write(priv, MT7530_PVC_P(port),
PORT_SPEC_TAG);
/* Disable flooding by default */
mt7530_rmw(priv, MT7530_MFC, BC_FFP_MASK | UNM_FFP_MASK | UNU_FFP_MASK,
BC_FFP(BIT(port)) | UNM_FFP(BIT(port)) | UNU_FFP(BIT(port)));
/* Set CPU port number */
if (priv->id == ID_MT7621)
mt7530_rmw(priv, MT7530_MFC, CPU_MASK, CPU_EN | CPU_PORT(port));
/* CPU port gets connected to all user ports of
* the switch.
*/
mt7530_write(priv, MT7530_PCR_P(port),
PCR_MATRIX(dsa_user_ports(priv->ds)));
net: dsa: mt7530: use independent VLAN learning on VLAN-unaware bridges Consider the following bridge configuration, where bond0 is not offloaded: +-- br0 --+ / / | \ / / | \ / | | bond0 / | | / \ swp0 swp1 swp2 swp3 swp4 . . . . . . A B C Ideally, when the switch receives a packet from swp3 or swp4, it should forward the packet to the CPU, according to the port matrix and unknown unicast flood settings. But packet loss will happen if the destination address is at one of the offloaded ports (swp0~2). For example, when client C sends a packet to A, the FDB lookup will indicate that it should be forwarded to swp0, but the port matrix of swp3 and swp4 is configured to only allow the CPU to be its destination, so it is dropped. However, this issue does not happen if the bridge is VLAN-aware. That is because VLAN-aware bridges use independent VLAN learning, i.e. use VID for FDB lookup, on offloaded ports. As swp3 and swp4 are not offloaded, shared VLAN learning with default filter ID of 0 is used instead. So the lookup for A with filter ID 0 never hits and the packet can be forwarded to the CPU. In the current code, only two combinations were used to toggle user ports' VLAN awareness: one is PCR.PORT_VLAN set to port matrix mode with PVC.VLAN_ATTR set to transparent port, the other is PCR.PORT_VLAN set to security mode with PVC.VLAN_ATTR set to user port. It turns out that only PVC.VLAN_ATTR contributes to VLAN awareness, and port matrix mode just skips the VLAN table lookup. The reference manual is somehow misleading when describing PORT_VLAN modes. It states that PORT_MEM (VLAN port member) is used for destination if the VLAN table lookup hits, but actually **PORT_MEM & PORT_MATRIX** (bitwise AND of VLAN port member and port matrix) is used instead, which means we can have two or more separate VLAN-aware bridges with the same PVID and traffic won't leak between them. Therefore, to solve this, enable independent VLAN learning with PVID 0 on VLAN-unaware bridges, by setting their PCR.PORT_VLAN to fallback mode, while leaving standalone ports in port matrix mode. The CPU port is always set to fallback mode to serve those bridges. During testing, it is found that FDB lookup with filter ID of 0 will also hit entries with VID 0 even with independent VLAN learning. To avoid that, install all VLANs with filter ID of 1. Signed-off-by: DENG Qingfang <dqfext@gmail.com> Reviewed-by: Vladimir Oltean <olteanv@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-03 16:04:02 +00:00
/* Set to fallback mode for independent VLAN learning */
mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
MT7530_PORT_FALLBACK_MODE);
return 0;
}
static int
mt7530_port_enable(struct dsa_switch *ds, int port,
struct phy_device *phy)
{
struct mt7530_priv *priv = ds->priv;
mutex_lock(&priv->reg_mutex);
/* Allow the user port gets connected to the cpu port and also
* restore the port matrix if the port is the member of a certain
* bridge.
*/
priv->ports[port].pm |= PCR_MATRIX(BIT(MT7530_CPU_PORT));
priv->ports[port].enable = true;
mt7530_rmw(priv, MT7530_PCR_P(port), PCR_MATRIX_MASK,
priv->ports[port].pm);
mt7530_clear(priv, MT7530_PMCR_P(port), PMCR_LINK_SETTINGS_MASK);
mutex_unlock(&priv->reg_mutex);
return 0;
}
static void
mt7530_port_disable(struct dsa_switch *ds, int port)
{
struct mt7530_priv *priv = ds->priv;
mutex_lock(&priv->reg_mutex);
/* Clear up all port matrix which could be restored in the next
* enablement for the port.
*/
priv->ports[port].enable = false;
mt7530_rmw(priv, MT7530_PCR_P(port), PCR_MATRIX_MASK,
PCR_MATRIX_CLR);
mt7530_clear(priv, MT7530_PMCR_P(port), PMCR_LINK_SETTINGS_MASK);
mutex_unlock(&priv->reg_mutex);
}
static int
mt7530_port_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
{
struct mt7530_priv *priv = ds->priv;
struct mii_bus *bus = priv->bus;
int length;
u32 val;
/* When a new MTU is set, DSA always set the CPU port's MTU to the
* largest MTU of the slave ports. Because the switch only has a global
* RX length register, only allowing CPU port here is enough.
*/
if (!dsa_is_cpu_port(ds, port))
return 0;
mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
val = mt7530_mii_read(priv, MT7530_GMACCR);
val &= ~MAX_RX_PKT_LEN_MASK;
/* RX length also includes Ethernet header, MTK tag, and FCS length */
length = new_mtu + ETH_HLEN + MTK_HDR_LEN + ETH_FCS_LEN;
if (length <= 1522) {
val |= MAX_RX_PKT_LEN_1522;
} else if (length <= 1536) {
val |= MAX_RX_PKT_LEN_1536;
} else if (length <= 1552) {
val |= MAX_RX_PKT_LEN_1552;
} else {
val &= ~MAX_RX_JUMBO_MASK;
val |= MAX_RX_JUMBO(DIV_ROUND_UP(length, 1024));
val |= MAX_RX_PKT_LEN_JUMBO;
}
mt7530_mii_write(priv, MT7530_GMACCR, val);
mutex_unlock(&bus->mdio_lock);
return 0;
}
static int
mt7530_port_max_mtu(struct dsa_switch *ds, int port)
{
return MT7530_MAX_MTU;
}
static void
mt7530_stp_state_set(struct dsa_switch *ds, int port, u8 state)
{
struct mt7530_priv *priv = ds->priv;
u32 stp_state;
switch (state) {
case BR_STATE_DISABLED:
stp_state = MT7530_STP_DISABLED;
break;
case BR_STATE_BLOCKING:
stp_state = MT7530_STP_BLOCKING;
break;
case BR_STATE_LISTENING:
stp_state = MT7530_STP_LISTENING;
break;
case BR_STATE_LEARNING:
stp_state = MT7530_STP_LEARNING;
break;
case BR_STATE_FORWARDING:
default:
stp_state = MT7530_STP_FORWARDING;
break;
}
mt7530_rmw(priv, MT7530_SSP_P(port), FID_PST_MASK(FID_BRIDGED),
FID_PST(FID_BRIDGED, stp_state));
}
static int
mt7530_port_pre_bridge_flags(struct dsa_switch *ds, int port,
struct switchdev_brport_flags flags,
struct netlink_ext_ack *extack)
{
if (flags.mask & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD |
BR_BCAST_FLOOD))
return -EINVAL;
return 0;
}
static int
mt7530_port_bridge_flags(struct dsa_switch *ds, int port,
struct switchdev_brport_flags flags,
struct netlink_ext_ack *extack)
{
struct mt7530_priv *priv = ds->priv;
if (flags.mask & BR_LEARNING)
mt7530_rmw(priv, MT7530_PSC_P(port), SA_DIS,
flags.val & BR_LEARNING ? 0 : SA_DIS);
if (flags.mask & BR_FLOOD)
mt7530_rmw(priv, MT7530_MFC, UNU_FFP(BIT(port)),
flags.val & BR_FLOOD ? UNU_FFP(BIT(port)) : 0);
if (flags.mask & BR_MCAST_FLOOD)
mt7530_rmw(priv, MT7530_MFC, UNM_FFP(BIT(port)),
flags.val & BR_MCAST_FLOOD ? UNM_FFP(BIT(port)) : 0);
if (flags.mask & BR_BCAST_FLOOD)
mt7530_rmw(priv, MT7530_MFC, BC_FFP(BIT(port)),
flags.val & BR_BCAST_FLOOD ? BC_FFP(BIT(port)) : 0);
return 0;
}
static int
mt7530_port_bridge_join(struct dsa_switch *ds, int port,
struct dsa_bridge bridge, bool *tx_fwd_offload,
struct netlink_ext_ack *extack)
{
struct dsa_port *dp = dsa_to_port(ds, port), *other_dp;
u32 port_bitmap = BIT(MT7530_CPU_PORT);
struct mt7530_priv *priv = ds->priv;
mutex_lock(&priv->reg_mutex);
dsa_switch_for_each_user_port(other_dp, ds) {
int other_port = other_dp->index;
if (dp == other_dp)
continue;
/* Add this port to the port matrix of the other ports in the
* same bridge. If the port is disabled, port matrix is kept
* and not being setup until the port becomes enabled.
*/
net: dsa: keep the bridge_dev and bridge_num as part of the same structure The main desire behind this is to provide coherent bridge information to the fast path without locking. For example, right now we set dp->bridge_dev and dp->bridge_num from separate code paths, it is theoretically possible for a packet transmission to read these two port properties consecutively and find a bridge number which does not correspond with the bridge device. Another desire is to start passing more complex bridge information to dsa_switch_ops functions. For example, with FDB isolation, it is expected that drivers will need to be passed the bridge which requested an FDB/MDB entry to be offloaded, and along with that bridge_dev, the associated bridge_num should be passed too, in case the driver might want to implement an isolation scheme based on that number. We already pass the {bridge_dev, bridge_num} pair to the TX forwarding offload switch API, however we'd like to remove that and squash it into the basic bridge join/leave API. So that means we need to pass this pair to the bridge join/leave API. During dsa_port_bridge_leave, first we unset dp->bridge_dev, then we call the driver's .port_bridge_leave with what used to be our dp->bridge_dev, but provided as an argument. When bridge_dev and bridge_num get folded into a single structure, we need to preserve this behavior in dsa_port_bridge_leave: we need a copy of what used to be in dp->bridge. Switch drivers check bridge membership by comparing dp->bridge_dev with the provided bridge_dev, but now, if we provide the struct dsa_bridge as a pointer, they cannot keep comparing dp->bridge to the provided pointer, since this only points to an on-stack copy. To make this obvious and prevent driver writers from forgetting and doing stupid things, in this new API, the struct dsa_bridge is provided as a full structure (not very large, contains an int and a pointer) instead of a pointer. An explicit comparison function needs to be used to determine bridge membership: dsa_port_offloads_bridge(). Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Alvin Šipraga <alsi@bang-olufsen.dk> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-12-06 16:57:56 +00:00
if (!dsa_port_offloads_bridge(other_dp, &bridge))
continue;
if (priv->ports[other_port].enable)
mt7530_set(priv, MT7530_PCR_P(other_port),
PCR_MATRIX(BIT(port)));
priv->ports[other_port].pm |= PCR_MATRIX(BIT(port));
port_bitmap |= BIT(other_port);
}
/* Add the all other ports to this port matrix. */
if (priv->ports[port].enable)
mt7530_rmw(priv, MT7530_PCR_P(port),
PCR_MATRIX_MASK, PCR_MATRIX(port_bitmap));
priv->ports[port].pm |= PCR_MATRIX(port_bitmap);
net: dsa: mt7530: use independent VLAN learning on VLAN-unaware bridges Consider the following bridge configuration, where bond0 is not offloaded: +-- br0 --+ / / | \ / / | \ / | | bond0 / | | / \ swp0 swp1 swp2 swp3 swp4 . . . . . . A B C Ideally, when the switch receives a packet from swp3 or swp4, it should forward the packet to the CPU, according to the port matrix and unknown unicast flood settings. But packet loss will happen if the destination address is at one of the offloaded ports (swp0~2). For example, when client C sends a packet to A, the FDB lookup will indicate that it should be forwarded to swp0, but the port matrix of swp3 and swp4 is configured to only allow the CPU to be its destination, so it is dropped. However, this issue does not happen if the bridge is VLAN-aware. That is because VLAN-aware bridges use independent VLAN learning, i.e. use VID for FDB lookup, on offloaded ports. As swp3 and swp4 are not offloaded, shared VLAN learning with default filter ID of 0 is used instead. So the lookup for A with filter ID 0 never hits and the packet can be forwarded to the CPU. In the current code, only two combinations were used to toggle user ports' VLAN awareness: one is PCR.PORT_VLAN set to port matrix mode with PVC.VLAN_ATTR set to transparent port, the other is PCR.PORT_VLAN set to security mode with PVC.VLAN_ATTR set to user port. It turns out that only PVC.VLAN_ATTR contributes to VLAN awareness, and port matrix mode just skips the VLAN table lookup. The reference manual is somehow misleading when describing PORT_VLAN modes. It states that PORT_MEM (VLAN port member) is used for destination if the VLAN table lookup hits, but actually **PORT_MEM & PORT_MATRIX** (bitwise AND of VLAN port member and port matrix) is used instead, which means we can have two or more separate VLAN-aware bridges with the same PVID and traffic won't leak between them. Therefore, to solve this, enable independent VLAN learning with PVID 0 on VLAN-unaware bridges, by setting their PCR.PORT_VLAN to fallback mode, while leaving standalone ports in port matrix mode. The CPU port is always set to fallback mode to serve those bridges. During testing, it is found that FDB lookup with filter ID of 0 will also hit entries with VID 0 even with independent VLAN learning. To avoid that, install all VLANs with filter ID of 1. Signed-off-by: DENG Qingfang <dqfext@gmail.com> Reviewed-by: Vladimir Oltean <olteanv@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-03 16:04:02 +00:00
/* Set to fallback mode for independent VLAN learning */
mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
MT7530_PORT_FALLBACK_MODE);
mutex_unlock(&priv->reg_mutex);
return 0;
}
static void
mt7530_port_set_vlan_unaware(struct dsa_switch *ds, int port)
{
struct mt7530_priv *priv = ds->priv;
bool all_user_ports_removed = true;
int i;
net: dsa: mt7530: use independent VLAN learning on VLAN-unaware bridges Consider the following bridge configuration, where bond0 is not offloaded: +-- br0 --+ / / | \ / / | \ / | | bond0 / | | / \ swp0 swp1 swp2 swp3 swp4 . . . . . . A B C Ideally, when the switch receives a packet from swp3 or swp4, it should forward the packet to the CPU, according to the port matrix and unknown unicast flood settings. But packet loss will happen if the destination address is at one of the offloaded ports (swp0~2). For example, when client C sends a packet to A, the FDB lookup will indicate that it should be forwarded to swp0, but the port matrix of swp3 and swp4 is configured to only allow the CPU to be its destination, so it is dropped. However, this issue does not happen if the bridge is VLAN-aware. That is because VLAN-aware bridges use independent VLAN learning, i.e. use VID for FDB lookup, on offloaded ports. As swp3 and swp4 are not offloaded, shared VLAN learning with default filter ID of 0 is used instead. So the lookup for A with filter ID 0 never hits and the packet can be forwarded to the CPU. In the current code, only two combinations were used to toggle user ports' VLAN awareness: one is PCR.PORT_VLAN set to port matrix mode with PVC.VLAN_ATTR set to transparent port, the other is PCR.PORT_VLAN set to security mode with PVC.VLAN_ATTR set to user port. It turns out that only PVC.VLAN_ATTR contributes to VLAN awareness, and port matrix mode just skips the VLAN table lookup. The reference manual is somehow misleading when describing PORT_VLAN modes. It states that PORT_MEM (VLAN port member) is used for destination if the VLAN table lookup hits, but actually **PORT_MEM & PORT_MATRIX** (bitwise AND of VLAN port member and port matrix) is used instead, which means we can have two or more separate VLAN-aware bridges with the same PVID and traffic won't leak between them. Therefore, to solve this, enable independent VLAN learning with PVID 0 on VLAN-unaware bridges, by setting their PCR.PORT_VLAN to fallback mode, while leaving standalone ports in port matrix mode. The CPU port is always set to fallback mode to serve those bridges. During testing, it is found that FDB lookup with filter ID of 0 will also hit entries with VID 0 even with independent VLAN learning. To avoid that, install all VLANs with filter ID of 1. Signed-off-by: DENG Qingfang <dqfext@gmail.com> Reviewed-by: Vladimir Oltean <olteanv@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-03 16:04:02 +00:00
/* This is called after .port_bridge_leave when leaving a VLAN-aware
* bridge. Don't set standalone ports to fallback mode.
*/
if (dsa_port_bridge_dev_get(dsa_to_port(ds, port)))
net: dsa: mt7530: use independent VLAN learning on VLAN-unaware bridges Consider the following bridge configuration, where bond0 is not offloaded: +-- br0 --+ / / | \ / / | \ / | | bond0 / | | / \ swp0 swp1 swp2 swp3 swp4 . . . . . . A B C Ideally, when the switch receives a packet from swp3 or swp4, it should forward the packet to the CPU, according to the port matrix and unknown unicast flood settings. But packet loss will happen if the destination address is at one of the offloaded ports (swp0~2). For example, when client C sends a packet to A, the FDB lookup will indicate that it should be forwarded to swp0, but the port matrix of swp3 and swp4 is configured to only allow the CPU to be its destination, so it is dropped. However, this issue does not happen if the bridge is VLAN-aware. That is because VLAN-aware bridges use independent VLAN learning, i.e. use VID for FDB lookup, on offloaded ports. As swp3 and swp4 are not offloaded, shared VLAN learning with default filter ID of 0 is used instead. So the lookup for A with filter ID 0 never hits and the packet can be forwarded to the CPU. In the current code, only two combinations were used to toggle user ports' VLAN awareness: one is PCR.PORT_VLAN set to port matrix mode with PVC.VLAN_ATTR set to transparent port, the other is PCR.PORT_VLAN set to security mode with PVC.VLAN_ATTR set to user port. It turns out that only PVC.VLAN_ATTR contributes to VLAN awareness, and port matrix mode just skips the VLAN table lookup. The reference manual is somehow misleading when describing PORT_VLAN modes. It states that PORT_MEM (VLAN port member) is used for destination if the VLAN table lookup hits, but actually **PORT_MEM & PORT_MATRIX** (bitwise AND of VLAN port member and port matrix) is used instead, which means we can have two or more separate VLAN-aware bridges with the same PVID and traffic won't leak between them. Therefore, to solve this, enable independent VLAN learning with PVID 0 on VLAN-unaware bridges, by setting their PCR.PORT_VLAN to fallback mode, while leaving standalone ports in port matrix mode. The CPU port is always set to fallback mode to serve those bridges. During testing, it is found that FDB lookup with filter ID of 0 will also hit entries with VID 0 even with independent VLAN learning. To avoid that, install all VLANs with filter ID of 1. Signed-off-by: DENG Qingfang <dqfext@gmail.com> Reviewed-by: Vladimir Oltean <olteanv@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-03 16:04:02 +00:00
mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
MT7530_PORT_FALLBACK_MODE);
mt7530_rmw(priv, MT7530_PVC_P(port),
VLAN_ATTR_MASK | PVC_EG_TAG_MASK | ACC_FRM_MASK,
VLAN_ATTR(MT7530_VLAN_TRANSPARENT) |
PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT) |
MT7530_VLAN_ACC_ALL);
net: dsa: mt7530: use independent VLAN learning on VLAN-unaware bridges Consider the following bridge configuration, where bond0 is not offloaded: +-- br0 --+ / / | \ / / | \ / | | bond0 / | | / \ swp0 swp1 swp2 swp3 swp4 . . . . . . A B C Ideally, when the switch receives a packet from swp3 or swp4, it should forward the packet to the CPU, according to the port matrix and unknown unicast flood settings. But packet loss will happen if the destination address is at one of the offloaded ports (swp0~2). For example, when client C sends a packet to A, the FDB lookup will indicate that it should be forwarded to swp0, but the port matrix of swp3 and swp4 is configured to only allow the CPU to be its destination, so it is dropped. However, this issue does not happen if the bridge is VLAN-aware. That is because VLAN-aware bridges use independent VLAN learning, i.e. use VID for FDB lookup, on offloaded ports. As swp3 and swp4 are not offloaded, shared VLAN learning with default filter ID of 0 is used instead. So the lookup for A with filter ID 0 never hits and the packet can be forwarded to the CPU. In the current code, only two combinations were used to toggle user ports' VLAN awareness: one is PCR.PORT_VLAN set to port matrix mode with PVC.VLAN_ATTR set to transparent port, the other is PCR.PORT_VLAN set to security mode with PVC.VLAN_ATTR set to user port. It turns out that only PVC.VLAN_ATTR contributes to VLAN awareness, and port matrix mode just skips the VLAN table lookup. The reference manual is somehow misleading when describing PORT_VLAN modes. It states that PORT_MEM (VLAN port member) is used for destination if the VLAN table lookup hits, but actually **PORT_MEM & PORT_MATRIX** (bitwise AND of VLAN port member and port matrix) is used instead, which means we can have two or more separate VLAN-aware bridges with the same PVID and traffic won't leak between them. Therefore, to solve this, enable independent VLAN learning with PVID 0 on VLAN-unaware bridges, by setting their PCR.PORT_VLAN to fallback mode, while leaving standalone ports in port matrix mode. The CPU port is always set to fallback mode to serve those bridges. During testing, it is found that FDB lookup with filter ID of 0 will also hit entries with VID 0 even with independent VLAN learning. To avoid that, install all VLANs with filter ID of 1. Signed-off-by: DENG Qingfang <dqfext@gmail.com> Reviewed-by: Vladimir Oltean <olteanv@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-03 16:04:02 +00:00
/* Set PVID to 0 */
mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
G0_PORT_VID_DEF);
for (i = 0; i < MT7530_NUM_PORTS; i++) {
if (dsa_is_user_port(ds, i) &&
dsa_port_is_vlan_filtering(dsa_to_port(ds, i))) {
all_user_ports_removed = false;
break;
}
}
/* CPU port also does the same thing until all user ports belonging to
* the CPU port get out of VLAN filtering mode.
*/
if (all_user_ports_removed) {
mt7530_write(priv, MT7530_PCR_P(MT7530_CPU_PORT),
PCR_MATRIX(dsa_user_ports(priv->ds)));
mt7530_write(priv, MT7530_PVC_P(MT7530_CPU_PORT), PORT_SPEC_TAG
| PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
}
}
static void
mt7530_port_set_vlan_aware(struct dsa_switch *ds, int port)
{
struct mt7530_priv *priv = ds->priv;
/* Trapped into security mode allows packet forwarding through VLAN
net: dsa: mt7530: use independent VLAN learning on VLAN-unaware bridges Consider the following bridge configuration, where bond0 is not offloaded: +-- br0 --+ / / | \ / / | \ / | | bond0 / | | / \ swp0 swp1 swp2 swp3 swp4 . . . . . . A B C Ideally, when the switch receives a packet from swp3 or swp4, it should forward the packet to the CPU, according to the port matrix and unknown unicast flood settings. But packet loss will happen if the destination address is at one of the offloaded ports (swp0~2). For example, when client C sends a packet to A, the FDB lookup will indicate that it should be forwarded to swp0, but the port matrix of swp3 and swp4 is configured to only allow the CPU to be its destination, so it is dropped. However, this issue does not happen if the bridge is VLAN-aware. That is because VLAN-aware bridges use independent VLAN learning, i.e. use VID for FDB lookup, on offloaded ports. As swp3 and swp4 are not offloaded, shared VLAN learning with default filter ID of 0 is used instead. So the lookup for A with filter ID 0 never hits and the packet can be forwarded to the CPU. In the current code, only two combinations were used to toggle user ports' VLAN awareness: one is PCR.PORT_VLAN set to port matrix mode with PVC.VLAN_ATTR set to transparent port, the other is PCR.PORT_VLAN set to security mode with PVC.VLAN_ATTR set to user port. It turns out that only PVC.VLAN_ATTR contributes to VLAN awareness, and port matrix mode just skips the VLAN table lookup. The reference manual is somehow misleading when describing PORT_VLAN modes. It states that PORT_MEM (VLAN port member) is used for destination if the VLAN table lookup hits, but actually **PORT_MEM & PORT_MATRIX** (bitwise AND of VLAN port member and port matrix) is used instead, which means we can have two or more separate VLAN-aware bridges with the same PVID and traffic won't leak between them. Therefore, to solve this, enable independent VLAN learning with PVID 0 on VLAN-unaware bridges, by setting their PCR.PORT_VLAN to fallback mode, while leaving standalone ports in port matrix mode. The CPU port is always set to fallback mode to serve those bridges. During testing, it is found that FDB lookup with filter ID of 0 will also hit entries with VID 0 even with independent VLAN learning. To avoid that, install all VLANs with filter ID of 1. Signed-off-by: DENG Qingfang <dqfext@gmail.com> Reviewed-by: Vladimir Oltean <olteanv@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-03 16:04:02 +00:00
* table lookup.
*/
net: dsa: mt7530: use independent VLAN learning on VLAN-unaware bridges Consider the following bridge configuration, where bond0 is not offloaded: +-- br0 --+ / / | \ / / | \ / | | bond0 / | | / \ swp0 swp1 swp2 swp3 swp4 . . . . . . A B C Ideally, when the switch receives a packet from swp3 or swp4, it should forward the packet to the CPU, according to the port matrix and unknown unicast flood settings. But packet loss will happen if the destination address is at one of the offloaded ports (swp0~2). For example, when client C sends a packet to A, the FDB lookup will indicate that it should be forwarded to swp0, but the port matrix of swp3 and swp4 is configured to only allow the CPU to be its destination, so it is dropped. However, this issue does not happen if the bridge is VLAN-aware. That is because VLAN-aware bridges use independent VLAN learning, i.e. use VID for FDB lookup, on offloaded ports. As swp3 and swp4 are not offloaded, shared VLAN learning with default filter ID of 0 is used instead. So the lookup for A with filter ID 0 never hits and the packet can be forwarded to the CPU. In the current code, only two combinations were used to toggle user ports' VLAN awareness: one is PCR.PORT_VLAN set to port matrix mode with PVC.VLAN_ATTR set to transparent port, the other is PCR.PORT_VLAN set to security mode with PVC.VLAN_ATTR set to user port. It turns out that only PVC.VLAN_ATTR contributes to VLAN awareness, and port matrix mode just skips the VLAN table lookup. The reference manual is somehow misleading when describing PORT_VLAN modes. It states that PORT_MEM (VLAN port member) is used for destination if the VLAN table lookup hits, but actually **PORT_MEM & PORT_MATRIX** (bitwise AND of VLAN port member and port matrix) is used instead, which means we can have two or more separate VLAN-aware bridges with the same PVID and traffic won't leak between them. Therefore, to solve this, enable independent VLAN learning with PVID 0 on VLAN-unaware bridges, by setting their PCR.PORT_VLAN to fallback mode, while leaving standalone ports in port matrix mode. The CPU port is always set to fallback mode to serve those bridges. During testing, it is found that FDB lookup with filter ID of 0 will also hit entries with VID 0 even with independent VLAN learning. To avoid that, install all VLANs with filter ID of 1. Signed-off-by: DENG Qingfang <dqfext@gmail.com> Reviewed-by: Vladimir Oltean <olteanv@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-03 16:04:02 +00:00
if (dsa_is_user_port(ds, port)) {
mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
MT7530_PORT_SECURITY_MODE);
net: dsa: mt7530: use independent VLAN learning on VLAN-unaware bridges Consider the following bridge configuration, where bond0 is not offloaded: +-- br0 --+ / / | \ / / | \ / | | bond0 / | | / \ swp0 swp1 swp2 swp3 swp4 . . . . . . A B C Ideally, when the switch receives a packet from swp3 or swp4, it should forward the packet to the CPU, according to the port matrix and unknown unicast flood settings. But packet loss will happen if the destination address is at one of the offloaded ports (swp0~2). For example, when client C sends a packet to A, the FDB lookup will indicate that it should be forwarded to swp0, but the port matrix of swp3 and swp4 is configured to only allow the CPU to be its destination, so it is dropped. However, this issue does not happen if the bridge is VLAN-aware. That is because VLAN-aware bridges use independent VLAN learning, i.e. use VID for FDB lookup, on offloaded ports. As swp3 and swp4 are not offloaded, shared VLAN learning with default filter ID of 0 is used instead. So the lookup for A with filter ID 0 never hits and the packet can be forwarded to the CPU. In the current code, only two combinations were used to toggle user ports' VLAN awareness: one is PCR.PORT_VLAN set to port matrix mode with PVC.VLAN_ATTR set to transparent port, the other is PCR.PORT_VLAN set to security mode with PVC.VLAN_ATTR set to user port. It turns out that only PVC.VLAN_ATTR contributes to VLAN awareness, and port matrix mode just skips the VLAN table lookup. The reference manual is somehow misleading when describing PORT_VLAN modes. It states that PORT_MEM (VLAN port member) is used for destination if the VLAN table lookup hits, but actually **PORT_MEM & PORT_MATRIX** (bitwise AND of VLAN port member and port matrix) is used instead, which means we can have two or more separate VLAN-aware bridges with the same PVID and traffic won't leak between them. Therefore, to solve this, enable independent VLAN learning with PVID 0 on VLAN-unaware bridges, by setting their PCR.PORT_VLAN to fallback mode, while leaving standalone ports in port matrix mode. The CPU port is always set to fallback mode to serve those bridges. During testing, it is found that FDB lookup with filter ID of 0 will also hit entries with VID 0 even with independent VLAN learning. To avoid that, install all VLANs with filter ID of 1. Signed-off-by: DENG Qingfang <dqfext@gmail.com> Reviewed-by: Vladimir Oltean <olteanv@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-03 16:04:02 +00:00
mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
G0_PORT_VID(priv->ports[port].pvid));
/* Only accept tagged frames if PVID is not set */
if (!priv->ports[port].pvid)
mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
MT7530_VLAN_ACC_TAGGED);
net: dsa: mt7530: use independent VLAN learning on VLAN-unaware bridges Consider the following bridge configuration, where bond0 is not offloaded: +-- br0 --+ / / | \ / / | \ / | | bond0 / | | / \ swp0 swp1 swp2 swp3 swp4 . . . . . . A B C Ideally, when the switch receives a packet from swp3 or swp4, it should forward the packet to the CPU, according to the port matrix and unknown unicast flood settings. But packet loss will happen if the destination address is at one of the offloaded ports (swp0~2). For example, when client C sends a packet to A, the FDB lookup will indicate that it should be forwarded to swp0, but the port matrix of swp3 and swp4 is configured to only allow the CPU to be its destination, so it is dropped. However, this issue does not happen if the bridge is VLAN-aware. That is because VLAN-aware bridges use independent VLAN learning, i.e. use VID for FDB lookup, on offloaded ports. As swp3 and swp4 are not offloaded, shared VLAN learning with default filter ID of 0 is used instead. So the lookup for A with filter ID 0 never hits and the packet can be forwarded to the CPU. In the current code, only two combinations were used to toggle user ports' VLAN awareness: one is PCR.PORT_VLAN set to port matrix mode with PVC.VLAN_ATTR set to transparent port, the other is PCR.PORT_VLAN set to security mode with PVC.VLAN_ATTR set to user port. It turns out that only PVC.VLAN_ATTR contributes to VLAN awareness, and port matrix mode just skips the VLAN table lookup. The reference manual is somehow misleading when describing PORT_VLAN modes. It states that PORT_MEM (VLAN port member) is used for destination if the VLAN table lookup hits, but actually **PORT_MEM & PORT_MATRIX** (bitwise AND of VLAN port member and port matrix) is used instead, which means we can have two or more separate VLAN-aware bridges with the same PVID and traffic won't leak between them. Therefore, to solve this, enable independent VLAN learning with PVID 0 on VLAN-unaware bridges, by setting their PCR.PORT_VLAN to fallback mode, while leaving standalone ports in port matrix mode. The CPU port is always set to fallback mode to serve those bridges. During testing, it is found that FDB lookup with filter ID of 0 will also hit entries with VID 0 even with independent VLAN learning. To avoid that, install all VLANs with filter ID of 1. Signed-off-by: DENG Qingfang <dqfext@gmail.com> Reviewed-by: Vladimir Oltean <olteanv@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-03 16:04:02 +00:00
}
/* Set the port as a user port which is to be able to recognize VID
* from incoming packets before fetching entry within the VLAN table.
*/
mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK | PVC_EG_TAG_MASK,
VLAN_ATTR(MT7530_VLAN_USER) |
PVC_EG_TAG(MT7530_VLAN_EG_DISABLED));
}
static void
mt7530_port_bridge_leave(struct dsa_switch *ds, int port,
net: dsa: keep the bridge_dev and bridge_num as part of the same structure The main desire behind this is to provide coherent bridge information to the fast path without locking. For example, right now we set dp->bridge_dev and dp->bridge_num from separate code paths, it is theoretically possible for a packet transmission to read these two port properties consecutively and find a bridge number which does not correspond with the bridge device. Another desire is to start passing more complex bridge information to dsa_switch_ops functions. For example, with FDB isolation, it is expected that drivers will need to be passed the bridge which requested an FDB/MDB entry to be offloaded, and along with that bridge_dev, the associated bridge_num should be passed too, in case the driver might want to implement an isolation scheme based on that number. We already pass the {bridge_dev, bridge_num} pair to the TX forwarding offload switch API, however we'd like to remove that and squash it into the basic bridge join/leave API. So that means we need to pass this pair to the bridge join/leave API. During dsa_port_bridge_leave, first we unset dp->bridge_dev, then we call the driver's .port_bridge_leave with what used to be our dp->bridge_dev, but provided as an argument. When bridge_dev and bridge_num get folded into a single structure, we need to preserve this behavior in dsa_port_bridge_leave: we need a copy of what used to be in dp->bridge. Switch drivers check bridge membership by comparing dp->bridge_dev with the provided bridge_dev, but now, if we provide the struct dsa_bridge as a pointer, they cannot keep comparing dp->bridge to the provided pointer, since this only points to an on-stack copy. To make this obvious and prevent driver writers from forgetting and doing stupid things, in this new API, the struct dsa_bridge is provided as a full structure (not very large, contains an int and a pointer) instead of a pointer. An explicit comparison function needs to be used to determine bridge membership: dsa_port_offloads_bridge(). Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Alvin Šipraga <alsi@bang-olufsen.dk> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-12-06 16:57:56 +00:00
struct dsa_bridge bridge)
{
struct dsa_port *dp = dsa_to_port(ds, port), *other_dp;
struct mt7530_priv *priv = ds->priv;
mutex_lock(&priv->reg_mutex);
dsa_switch_for_each_user_port(other_dp, ds) {
int other_port = other_dp->index;
if (dp == other_dp)
continue;
/* Remove this port from the port matrix of the other ports
* in the same bridge. If the port is disabled, port matrix
* is kept and not being setup until the port becomes enabled.
*/
net: dsa: keep the bridge_dev and bridge_num as part of the same structure The main desire behind this is to provide coherent bridge information to the fast path without locking. For example, right now we set dp->bridge_dev and dp->bridge_num from separate code paths, it is theoretically possible for a packet transmission to read these two port properties consecutively and find a bridge number which does not correspond with the bridge device. Another desire is to start passing more complex bridge information to dsa_switch_ops functions. For example, with FDB isolation, it is expected that drivers will need to be passed the bridge which requested an FDB/MDB entry to be offloaded, and along with that bridge_dev, the associated bridge_num should be passed too, in case the driver might want to implement an isolation scheme based on that number. We already pass the {bridge_dev, bridge_num} pair to the TX forwarding offload switch API, however we'd like to remove that and squash it into the basic bridge join/leave API. So that means we need to pass this pair to the bridge join/leave API. During dsa_port_bridge_leave, first we unset dp->bridge_dev, then we call the driver's .port_bridge_leave with what used to be our dp->bridge_dev, but provided as an argument. When bridge_dev and bridge_num get folded into a single structure, we need to preserve this behavior in dsa_port_bridge_leave: we need a copy of what used to be in dp->bridge. Switch drivers check bridge membership by comparing dp->bridge_dev with the provided bridge_dev, but now, if we provide the struct dsa_bridge as a pointer, they cannot keep comparing dp->bridge to the provided pointer, since this only points to an on-stack copy. To make this obvious and prevent driver writers from forgetting and doing stupid things, in this new API, the struct dsa_bridge is provided as a full structure (not very large, contains an int and a pointer) instead of a pointer. An explicit comparison function needs to be used to determine bridge membership: dsa_port_offloads_bridge(). Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Alvin Šipraga <alsi@bang-olufsen.dk> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-12-06 16:57:56 +00:00
if (!dsa_port_offloads_bridge(other_dp, &bridge))
continue;
if (priv->ports[other_port].enable)
mt7530_clear(priv, MT7530_PCR_P(other_port),
PCR_MATRIX(BIT(port)));
priv->ports[other_port].pm &= ~PCR_MATRIX(BIT(port));
}
/* Set the cpu port to be the only one in the port matrix of
* this port.
*/
if (priv->ports[port].enable)
mt7530_rmw(priv, MT7530_PCR_P(port), PCR_MATRIX_MASK,
PCR_MATRIX(BIT(MT7530_CPU_PORT)));
priv->ports[port].pm = PCR_MATRIX(BIT(MT7530_CPU_PORT));
net: dsa: mt7530: use independent VLAN learning on VLAN-unaware bridges Consider the following bridge configuration, where bond0 is not offloaded: +-- br0 --+ / / | \ / / | \ / | | bond0 / | | / \ swp0 swp1 swp2 swp3 swp4 . . . . . . A B C Ideally, when the switch receives a packet from swp3 or swp4, it should forward the packet to the CPU, according to the port matrix and unknown unicast flood settings. But packet loss will happen if the destination address is at one of the offloaded ports (swp0~2). For example, when client C sends a packet to A, the FDB lookup will indicate that it should be forwarded to swp0, but the port matrix of swp3 and swp4 is configured to only allow the CPU to be its destination, so it is dropped. However, this issue does not happen if the bridge is VLAN-aware. That is because VLAN-aware bridges use independent VLAN learning, i.e. use VID for FDB lookup, on offloaded ports. As swp3 and swp4 are not offloaded, shared VLAN learning with default filter ID of 0 is used instead. So the lookup for A with filter ID 0 never hits and the packet can be forwarded to the CPU. In the current code, only two combinations were used to toggle user ports' VLAN awareness: one is PCR.PORT_VLAN set to port matrix mode with PVC.VLAN_ATTR set to transparent port, the other is PCR.PORT_VLAN set to security mode with PVC.VLAN_ATTR set to user port. It turns out that only PVC.VLAN_ATTR contributes to VLAN awareness, and port matrix mode just skips the VLAN table lookup. The reference manual is somehow misleading when describing PORT_VLAN modes. It states that PORT_MEM (VLAN port member) is used for destination if the VLAN table lookup hits, but actually **PORT_MEM & PORT_MATRIX** (bitwise AND of VLAN port member and port matrix) is used instead, which means we can have two or more separate VLAN-aware bridges with the same PVID and traffic won't leak between them. Therefore, to solve this, enable independent VLAN learning with PVID 0 on VLAN-unaware bridges, by setting their PCR.PORT_VLAN to fallback mode, while leaving standalone ports in port matrix mode. The CPU port is always set to fallback mode to serve those bridges. During testing, it is found that FDB lookup with filter ID of 0 will also hit entries with VID 0 even with independent VLAN learning. To avoid that, install all VLANs with filter ID of 1. Signed-off-by: DENG Qingfang <dqfext@gmail.com> Reviewed-by: Vladimir Oltean <olteanv@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-03 16:04:02 +00:00
/* When a port is removed from the bridge, the port would be set up
* back to the default as is at initial boot which is a VLAN-unaware
* port.
*/
mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
MT7530_PORT_MATRIX_MODE);
mutex_unlock(&priv->reg_mutex);
}
static int
mt7530_port_fdb_add(struct dsa_switch *ds, int port,
net: dsa: request drivers to perform FDB isolation For DSA, to encourage drivers to perform FDB isolation simply means to track which bridge does each FDB and MDB entry belong to. It then becomes the driver responsibility to use something that makes the FDB entry from one bridge not match the FDB lookup of ports from other bridges. The top-level functions where the bridge is determined are: - dsa_port_fdb_{add,del} - dsa_port_host_fdb_{add,del} - dsa_port_mdb_{add,del} - dsa_port_host_mdb_{add,del} aka the pre-crosschip-notifier functions. Changing the API to pass a reference to a bridge is not superfluous, and looking at the passed bridge argument is not the same as having the driver look at dsa_to_port(ds, port)->bridge from the ->port_fdb_add() method. DSA installs FDB and MDB entries on shared (CPU and DSA) ports as well, and those do not have any dp->bridge information to retrieve, because they are not in any bridge - they are merely the pipes that serve the user ports that are in one or multiple bridges. The struct dsa_bridge associated with each FDB/MDB entry is encapsulated in a larger "struct dsa_db" database. Although only databases associated to bridges are notified for now, this API will be the starting point for implementing IFF_UNICAST_FLT in DSA. There, the idea is to install FDB entries on the CPU port which belong to the corresponding user port's port database. These are supposed to match only when the port is standalone. It is better to introduce the API in its expected final form than to introduce it for bridges first, then to have to change drivers which may have made one or more assumptions. Drivers can use the provided bridge.num, but they can also use a different numbering scheme that is more convenient. DSA must perform refcounting on the CPU and DSA ports by also taking into account the bridge number. So if two bridges request the same local address, DSA must notify the driver twice, once for each bridge. In fact, if the driver supports FDB isolation, DSA must perform refcounting per bridge, but if the driver doesn't, DSA must refcount host addresses across all bridges, otherwise it would be telling the driver to delete an FDB entry for a bridge and the driver would delete it for all bridges. So introduce a bool fdb_isolation in drivers which would make all bridge databases passed to the cross-chip notifier have the same number (0). This makes dsa_mac_addr_find() -> dsa_db_equal() say that all bridge databases are the same database - which is essentially the legacy behavior. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 09:22:22 +00:00
const unsigned char *addr, u16 vid,
struct dsa_db db)
{
struct mt7530_priv *priv = ds->priv;
int ret;
u8 port_mask = BIT(port);
mutex_lock(&priv->reg_mutex);
mt7530_fdb_write(priv, vid, port_mask, addr, -1, STATIC_ENT);
ret = mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, NULL);
mutex_unlock(&priv->reg_mutex);
return ret;
}
static int
mt7530_port_fdb_del(struct dsa_switch *ds, int port,
net: dsa: request drivers to perform FDB isolation For DSA, to encourage drivers to perform FDB isolation simply means to track which bridge does each FDB and MDB entry belong to. It then becomes the driver responsibility to use something that makes the FDB entry from one bridge not match the FDB lookup of ports from other bridges. The top-level functions where the bridge is determined are: - dsa_port_fdb_{add,del} - dsa_port_host_fdb_{add,del} - dsa_port_mdb_{add,del} - dsa_port_host_mdb_{add,del} aka the pre-crosschip-notifier functions. Changing the API to pass a reference to a bridge is not superfluous, and looking at the passed bridge argument is not the same as having the driver look at dsa_to_port(ds, port)->bridge from the ->port_fdb_add() method. DSA installs FDB and MDB entries on shared (CPU and DSA) ports as well, and those do not have any dp->bridge information to retrieve, because they are not in any bridge - they are merely the pipes that serve the user ports that are in one or multiple bridges. The struct dsa_bridge associated with each FDB/MDB entry is encapsulated in a larger "struct dsa_db" database. Although only databases associated to bridges are notified for now, this API will be the starting point for implementing IFF_UNICAST_FLT in DSA. There, the idea is to install FDB entries on the CPU port which belong to the corresponding user port's port database. These are supposed to match only when the port is standalone. It is better to introduce the API in its expected final form than to introduce it for bridges first, then to have to change drivers which may have made one or more assumptions. Drivers can use the provided bridge.num, but they can also use a different numbering scheme that is more convenient. DSA must perform refcounting on the CPU and DSA ports by also taking into account the bridge number. So if two bridges request the same local address, DSA must notify the driver twice, once for each bridge. In fact, if the driver supports FDB isolation, DSA must perform refcounting per bridge, but if the driver doesn't, DSA must refcount host addresses across all bridges, otherwise it would be telling the driver to delete an FDB entry for a bridge and the driver would delete it for all bridges. So introduce a bool fdb_isolation in drivers which would make all bridge databases passed to the cross-chip notifier have the same number (0). This makes dsa_mac_addr_find() -> dsa_db_equal() say that all bridge databases are the same database - which is essentially the legacy behavior. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 09:22:22 +00:00
const unsigned char *addr, u16 vid,
struct dsa_db db)
{
struct mt7530_priv *priv = ds->priv;
int ret;
u8 port_mask = BIT(port);
mutex_lock(&priv->reg_mutex);
mt7530_fdb_write(priv, vid, port_mask, addr, -1, STATIC_EMP);
ret = mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, NULL);
mutex_unlock(&priv->reg_mutex);
return ret;
}
static int
mt7530_port_fdb_dump(struct dsa_switch *ds, int port,
dsa_fdb_dump_cb_t *cb, void *data)
{
struct mt7530_priv *priv = ds->priv;
struct mt7530_fdb _fdb = { 0 };
int cnt = MT7530_NUM_FDB_RECORDS;
int ret = 0;
u32 rsp = 0;
mutex_lock(&priv->reg_mutex);
ret = mt7530_fdb_cmd(priv, MT7530_FDB_START, &rsp);
if (ret < 0)
goto err;
do {
if (rsp & ATC_SRCH_HIT) {
mt7530_fdb_read(priv, &_fdb);
if (_fdb.port_mask & BIT(port)) {
ret = cb(_fdb.mac, _fdb.vid, _fdb.noarp,
data);
if (ret < 0)
break;
}
}
} while (--cnt &&
!(rsp & ATC_SRCH_END) &&
!mt7530_fdb_cmd(priv, MT7530_FDB_NEXT, &rsp));
err:
mutex_unlock(&priv->reg_mutex);
return 0;
}
static int
mt7530_port_mdb_add(struct dsa_switch *ds, int port,
net: dsa: request drivers to perform FDB isolation For DSA, to encourage drivers to perform FDB isolation simply means to track which bridge does each FDB and MDB entry belong to. It then becomes the driver responsibility to use something that makes the FDB entry from one bridge not match the FDB lookup of ports from other bridges. The top-level functions where the bridge is determined are: - dsa_port_fdb_{add,del} - dsa_port_host_fdb_{add,del} - dsa_port_mdb_{add,del} - dsa_port_host_mdb_{add,del} aka the pre-crosschip-notifier functions. Changing the API to pass a reference to a bridge is not superfluous, and looking at the passed bridge argument is not the same as having the driver look at dsa_to_port(ds, port)->bridge from the ->port_fdb_add() method. DSA installs FDB and MDB entries on shared (CPU and DSA) ports as well, and those do not have any dp->bridge information to retrieve, because they are not in any bridge - they are merely the pipes that serve the user ports that are in one or multiple bridges. The struct dsa_bridge associated with each FDB/MDB entry is encapsulated in a larger "struct dsa_db" database. Although only databases associated to bridges are notified for now, this API will be the starting point for implementing IFF_UNICAST_FLT in DSA. There, the idea is to install FDB entries on the CPU port which belong to the corresponding user port's port database. These are supposed to match only when the port is standalone. It is better to introduce the API in its expected final form than to introduce it for bridges first, then to have to change drivers which may have made one or more assumptions. Drivers can use the provided bridge.num, but they can also use a different numbering scheme that is more convenient. DSA must perform refcounting on the CPU and DSA ports by also taking into account the bridge number. So if two bridges request the same local address, DSA must notify the driver twice, once for each bridge. In fact, if the driver supports FDB isolation, DSA must perform refcounting per bridge, but if the driver doesn't, DSA must refcount host addresses across all bridges, otherwise it would be telling the driver to delete an FDB entry for a bridge and the driver would delete it for all bridges. So introduce a bool fdb_isolation in drivers which would make all bridge databases passed to the cross-chip notifier have the same number (0). This makes dsa_mac_addr_find() -> dsa_db_equal() say that all bridge databases are the same database - which is essentially the legacy behavior. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 09:22:22 +00:00
const struct switchdev_obj_port_mdb *mdb,
struct dsa_db db)
{
struct mt7530_priv *priv = ds->priv;
const u8 *addr = mdb->addr;
u16 vid = mdb->vid;
u8 port_mask = 0;
int ret;
mutex_lock(&priv->reg_mutex);
mt7530_fdb_write(priv, vid, 0, addr, 0, STATIC_EMP);
if (!mt7530_fdb_cmd(priv, MT7530_FDB_READ, NULL))
port_mask = (mt7530_read(priv, MT7530_ATRD) >> PORT_MAP)
& PORT_MAP_MASK;
port_mask |= BIT(port);
mt7530_fdb_write(priv, vid, port_mask, addr, -1, STATIC_ENT);
ret = mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, NULL);
mutex_unlock(&priv->reg_mutex);
return ret;
}
static int
mt7530_port_mdb_del(struct dsa_switch *ds, int port,
net: dsa: request drivers to perform FDB isolation For DSA, to encourage drivers to perform FDB isolation simply means to track which bridge does each FDB and MDB entry belong to. It then becomes the driver responsibility to use something that makes the FDB entry from one bridge not match the FDB lookup of ports from other bridges. The top-level functions where the bridge is determined are: - dsa_port_fdb_{add,del} - dsa_port_host_fdb_{add,del} - dsa_port_mdb_{add,del} - dsa_port_host_mdb_{add,del} aka the pre-crosschip-notifier functions. Changing the API to pass a reference to a bridge is not superfluous, and looking at the passed bridge argument is not the same as having the driver look at dsa_to_port(ds, port)->bridge from the ->port_fdb_add() method. DSA installs FDB and MDB entries on shared (CPU and DSA) ports as well, and those do not have any dp->bridge information to retrieve, because they are not in any bridge - they are merely the pipes that serve the user ports that are in one or multiple bridges. The struct dsa_bridge associated with each FDB/MDB entry is encapsulated in a larger "struct dsa_db" database. Although only databases associated to bridges are notified for now, this API will be the starting point for implementing IFF_UNICAST_FLT in DSA. There, the idea is to install FDB entries on the CPU port which belong to the corresponding user port's port database. These are supposed to match only when the port is standalone. It is better to introduce the API in its expected final form than to introduce it for bridges first, then to have to change drivers which may have made one or more assumptions. Drivers can use the provided bridge.num, but they can also use a different numbering scheme that is more convenient. DSA must perform refcounting on the CPU and DSA ports by also taking into account the bridge number. So if two bridges request the same local address, DSA must notify the driver twice, once for each bridge. In fact, if the driver supports FDB isolation, DSA must perform refcounting per bridge, but if the driver doesn't, DSA must refcount host addresses across all bridges, otherwise it would be telling the driver to delete an FDB entry for a bridge and the driver would delete it for all bridges. So introduce a bool fdb_isolation in drivers which would make all bridge databases passed to the cross-chip notifier have the same number (0). This makes dsa_mac_addr_find() -> dsa_db_equal() say that all bridge databases are the same database - which is essentially the legacy behavior. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-25 09:22:22 +00:00
const struct switchdev_obj_port_mdb *mdb,
struct dsa_db db)
{
struct mt7530_priv *priv = ds->priv;
const u8 *addr = mdb->addr;
u16 vid = mdb->vid;
u8 port_mask = 0;
int ret;
mutex_lock(&priv->reg_mutex);
mt7530_fdb_write(priv, vid, 0, addr, 0, STATIC_EMP);
if (!mt7530_fdb_cmd(priv, MT7530_FDB_READ, NULL))
port_mask = (mt7530_read(priv, MT7530_ATRD) >> PORT_MAP)
& PORT_MAP_MASK;
port_mask &= ~BIT(port);
mt7530_fdb_write(priv, vid, port_mask, addr, -1,
port_mask ? STATIC_ENT : STATIC_EMP);
ret = mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, NULL);
mutex_unlock(&priv->reg_mutex);
return ret;
}
static int
mt7530_vlan_cmd(struct mt7530_priv *priv, enum mt7530_vlan_cmd cmd, u16 vid)
{
struct mt7530_dummy_poll p;
u32 val;
int ret;
val = VTCR_BUSY | VTCR_FUNC(cmd) | vid;
mt7530_write(priv, MT7530_VTCR, val);
INIT_MT7530_DUMMY_POLL(&p, priv, MT7530_VTCR);
ret = readx_poll_timeout(_mt7530_read, &p, val,
!(val & VTCR_BUSY), 20, 20000);
if (ret < 0) {
dev_err(priv->dev, "poll timeout\n");
return ret;
}
val = mt7530_read(priv, MT7530_VTCR);
if (val & VTCR_INVALID) {
dev_err(priv->dev, "read VTCR invalid\n");
return -EINVAL;
}
return 0;
}
static int
mt7530_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering,
struct netlink_ext_ack *extack)
{
if (vlan_filtering) {
/* The port is being kept as VLAN-unaware port when bridge is
* set up with vlan_filtering not being set, Otherwise, the
* port and the corresponding CPU port is required the setup
* for becoming a VLAN-aware port.
*/
mt7530_port_set_vlan_aware(ds, port);
mt7530_port_set_vlan_aware(ds, MT7530_CPU_PORT);
} else {
mt7530_port_set_vlan_unaware(ds, port);
}
return 0;
}
static void
mt7530_hw_vlan_add(struct mt7530_priv *priv,
struct mt7530_hw_vlan_entry *entry)
{
u8 new_members;
u32 val;
new_members = entry->old_members | BIT(entry->port) |
BIT(MT7530_CPU_PORT);
/* Validate the entry with independent learning, create egress tag per
* VLAN and joining the port as one of the port members.
*/
net: dsa: mt7530: use independent VLAN learning on VLAN-unaware bridges Consider the following bridge configuration, where bond0 is not offloaded: +-- br0 --+ / / | \ / / | \ / | | bond0 / | | / \ swp0 swp1 swp2 swp3 swp4 . . . . . . A B C Ideally, when the switch receives a packet from swp3 or swp4, it should forward the packet to the CPU, according to the port matrix and unknown unicast flood settings. But packet loss will happen if the destination address is at one of the offloaded ports (swp0~2). For example, when client C sends a packet to A, the FDB lookup will indicate that it should be forwarded to swp0, but the port matrix of swp3 and swp4 is configured to only allow the CPU to be its destination, so it is dropped. However, this issue does not happen if the bridge is VLAN-aware. That is because VLAN-aware bridges use independent VLAN learning, i.e. use VID for FDB lookup, on offloaded ports. As swp3 and swp4 are not offloaded, shared VLAN learning with default filter ID of 0 is used instead. So the lookup for A with filter ID 0 never hits and the packet can be forwarded to the CPU. In the current code, only two combinations were used to toggle user ports' VLAN awareness: one is PCR.PORT_VLAN set to port matrix mode with PVC.VLAN_ATTR set to transparent port, the other is PCR.PORT_VLAN set to security mode with PVC.VLAN_ATTR set to user port. It turns out that only PVC.VLAN_ATTR contributes to VLAN awareness, and port matrix mode just skips the VLAN table lookup. The reference manual is somehow misleading when describing PORT_VLAN modes. It states that PORT_MEM (VLAN port member) is used for destination if the VLAN table lookup hits, but actually **PORT_MEM & PORT_MATRIX** (bitwise AND of VLAN port member and port matrix) is used instead, which means we can have two or more separate VLAN-aware bridges with the same PVID and traffic won't leak between them. Therefore, to solve this, enable independent VLAN learning with PVID 0 on VLAN-unaware bridges, by setting their PCR.PORT_VLAN to fallback mode, while leaving standalone ports in port matrix mode. The CPU port is always set to fallback mode to serve those bridges. During testing, it is found that FDB lookup with filter ID of 0 will also hit entries with VID 0 even with independent VLAN learning. To avoid that, install all VLANs with filter ID of 1. Signed-off-by: DENG Qingfang <dqfext@gmail.com> Reviewed-by: Vladimir Oltean <olteanv@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-03 16:04:02 +00:00
val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) | FID(FID_BRIDGED) |
VLAN_VALID;
mt7530_write(priv, MT7530_VAWD1, val);
/* Decide whether adding tag or not for those outgoing packets from the
* port inside the VLAN.
*/
val = entry->untagged ? MT7530_VLAN_EGRESS_UNTAG :
MT7530_VLAN_EGRESS_TAG;
mt7530_rmw(priv, MT7530_VAWD2,
ETAG_CTRL_P_MASK(entry->port),
ETAG_CTRL_P(entry->port, val));
/* CPU port is always taken as a tagged port for serving more than one
* VLANs across and also being applied with egress type stack mode for
* that VLAN tags would be appended after hardware special tag used as
* DSA tag.
*/
mt7530_rmw(priv, MT7530_VAWD2,
ETAG_CTRL_P_MASK(MT7530_CPU_PORT),
ETAG_CTRL_P(MT7530_CPU_PORT,
MT7530_VLAN_EGRESS_STACK));
}
static void
mt7530_hw_vlan_del(struct mt7530_priv *priv,
struct mt7530_hw_vlan_entry *entry)
{
u8 new_members;
u32 val;
new_members = entry->old_members & ~BIT(entry->port);
val = mt7530_read(priv, MT7530_VAWD1);
if (!(val & VLAN_VALID)) {
dev_err(priv->dev,
"Cannot be deleted due to invalid entry\n");
return;
}
/* If certain member apart from CPU port is still alive in the VLAN,
* the entry would be kept valid. Otherwise, the entry is got to be
* disabled.
*/
if (new_members && new_members != BIT(MT7530_CPU_PORT)) {
val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) |
VLAN_VALID;
mt7530_write(priv, MT7530_VAWD1, val);
} else {
mt7530_write(priv, MT7530_VAWD1, 0);
mt7530_write(priv, MT7530_VAWD2, 0);
}
}
static void
mt7530_hw_vlan_update(struct mt7530_priv *priv, u16 vid,
struct mt7530_hw_vlan_entry *entry,
mt7530_vlan_op vlan_op)
{
u32 val;
/* Fetch entry */
mt7530_vlan_cmd(priv, MT7530_VTCR_RD_VID, vid);
val = mt7530_read(priv, MT7530_VAWD1);
entry->old_members = (val >> PORT_MEM_SHFT) & PORT_MEM_MASK;
/* Manipulate entry */
vlan_op(priv, entry);
/* Flush result to hardware */
mt7530_vlan_cmd(priv, MT7530_VTCR_WR_VID, vid);
}
static int
mt7530_setup_vlan0(struct mt7530_priv *priv)
{
u32 val;
/* Validate the entry with independent learning, keep the original
* ingress tag attribute.
*/
val = IVL_MAC | EG_CON | PORT_MEM(MT7530_ALL_MEMBERS) | FID(FID_BRIDGED) |
VLAN_VALID;
mt7530_write(priv, MT7530_VAWD1, val);
return mt7530_vlan_cmd(priv, MT7530_VTCR_WR_VID, 0);
}
net: dsa: remove the transactional logic from VLAN objects It should be the driver's business to logically separate its VLAN offloading into a preparation and a commit phase, and some drivers don't need / can't do this. So remove the transactional shim from DSA and let drivers propagate errors directly from the .port_vlan_add callback. It would appear that the code has worse error handling now than it had before. DSA is the only in-kernel user of switchdev that offloads one switchdev object to more than one port: for every VLAN object offloaded to a user port, that VLAN is also offloaded to the CPU port. So the "prepare for user port -> check for errors -> prepare for CPU port -> check for errors -> commit for user port -> commit for CPU port" sequence appears to make more sense than the one we are using now: "offload to user port -> check for errors -> offload to CPU port -> check for errors", but it is really a compromise. In the new way, we can catch errors from the commit phase that we previously had to ignore. But we have our hands tied and cannot do any rollback now: if we add a VLAN on the CPU port and it fails, we can't do the rollback by simply deleting it from the user port, because the switchdev API is not so nice with us: it could have simply been there already, even with the same flags. So we don't even attempt to rollback anything on addition error, just leave whatever VLANs managed to get offloaded right where they are. This should not be a problem at all in practice. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Acked-by: Linus Walleij <linus.walleij@linaro.org> Acked-by: Jiri Pirko <jiri@nvidia.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 00:01:53 +00:00
static int
mt7530_port_vlan_add(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_vlan *vlan,
struct netlink_ext_ack *extack)
{
bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
struct mt7530_hw_vlan_entry new_entry;
struct mt7530_priv *priv = ds->priv;
mutex_lock(&priv->reg_mutex);
net: switchdev: remove vid_begin -> vid_end range from VLAN objects The call path of a switchdev VLAN addition to the bridge looks something like this today: nbp_vlan_init | __br_vlan_set_default_pvid | | | | | br_afspec | | | | | | | v | | | br_process_vlan_info | | | | | | | v | | | br_vlan_info | | | / \ / | | / \ / | | / \ / | | / \ / v v v v v nbp_vlan_add br_vlan_add ------+ | ^ ^ | | | / | | | | / / / | \ br_vlan_get_master/ / v \ ^ / / br_vlan_add_existing \ | / / | \ | / / / \ | / / / \ | / / / \ | / / / v | | v / __vlan_add / / | / / | / v | / __vlan_vid_add | / \ | / v v v br_switchdev_port_vlan_add The ranges UAPI was introduced to the bridge in commit bdced7ef7838 ("bridge: support for multiple vlans and vlan ranges in setlink and dellink requests") (Jan 10 2015). But the VLAN ranges (parsed in br_afspec) have always been passed one by one, through struct bridge_vlan_info tmp_vinfo, to br_vlan_info. So the range never went too far in depth. Then Scott Feldman introduced the switchdev_port_bridge_setlink function in commit 47f8328bb1a4 ("switchdev: add new switchdev bridge setlink"). That marked the introduction of the SWITCHDEV_OBJ_PORT_VLAN, which made full use of the range. But switchdev_port_bridge_setlink was called like this: br_setlink -> br_afspec -> switchdev_port_bridge_setlink Basically, the switchdev and the bridge code were not tightly integrated. Then commit 41c498b9359e ("bridge: restore br_setlink back to original") came, and switchdev drivers were required to implement .ndo_bridge_setlink = switchdev_port_bridge_setlink for a while. In the meantime, commits such as 0944d6b5a2fa ("bridge: try switchdev op first in __vlan_vid_add/del") finally made switchdev penetrate the br_vlan_info() barrier and start to develop the call path we have today. But remember, br_vlan_info() still receives VLANs one by one. Then Arkadi Sharshevsky refactored the switchdev API in 2017 in commit 29ab586c3d83 ("net: switchdev: Remove bridge bypass support from switchdev") so that drivers would not implement .ndo_bridge_setlink any longer. The switchdev_port_bridge_setlink also got deleted. This refactoring removed the parallel bridge_setlink implementation from switchdev, and left the only switchdev VLAN objects to be the ones offloaded from __vlan_vid_add (basically RX filtering) and __vlan_add (the latter coming from commit 9c86ce2c1ae3 ("net: bridge: Notify about bridge VLANs")). That is to say, today the switchdev VLAN object ranges are not used in the kernel. Refactoring the above call path is a bit complicated, when the bridge VLAN call path is already a bit complicated. Let's go off and finish the job of commit 29ab586c3d83 by deleting the bogus iteration through the VLAN ranges from the drivers. Some aspects of this feature never made too much sense in the first place. For example, what is a range of VLANs all having the BRIDGE_VLAN_INFO_PVID flag supposed to mean, when a port can obviously have a single pvid? This particular configuration _is_ denied as of commit 6623c60dc28e ("bridge: vlan: enforce no pvid flag in vlan ranges"), but from an API perspective, the driver still has to play pretend, and only offload the vlan->vid_end as pvid. And the addition of a switchdev VLAN object can modify the flags of another, completely unrelated, switchdev VLAN object! (a VLAN that is PVID will invalidate the PVID flag from whatever other VLAN had previously been offloaded with switchdev and had that flag. Yet switchdev never notifies about that change, drivers are supposed to guess). Nonetheless, having a VLAN range in the API makes error handling look scarier than it really is - unwinding on errors and all of that. When in reality, no one really calls this API with more than one VLAN. It is all unnecessary complexity. And despite appearing pretentious (two-phase transactional model and all), the switchdev API is really sloppy because the VLAN addition and removal operations are not paired with one another (you can add a VLAN 100 times and delete it just once). The bridge notifies through switchdev of a VLAN addition not only when the flags of an existing VLAN change, but also when nothing changes. There are switchdev drivers out there who don't like adding a VLAN that has already been added, and those checks don't really belong at driver level. But the fact that the API contains ranges is yet another factor that prevents this from being addressed in the future. Of the existing switchdev pieces of hardware, it appears that only Mellanox Spectrum supports offloading more than one VLAN at a time, through mlxsw_sp_port_vlan_set. I have kept that code internal to the driver, because there is some more bookkeeping that makes use of it, but I deleted it from the switchdev API. But since the switchdev support for ranges has already been de facto deleted by a Mellanox employee and nobody noticed for 4 years, I'm going to assume it's not a biggie. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Ido Schimmel <idosch@nvidia.com> # switchdev and mlxsw Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 00:01:46 +00:00
mt7530_hw_vlan_entry_init(&new_entry, port, untagged);
mt7530_hw_vlan_update(priv, vlan->vid, &new_entry, mt7530_hw_vlan_add);
if (pvid) {
net: switchdev: remove vid_begin -> vid_end range from VLAN objects The call path of a switchdev VLAN addition to the bridge looks something like this today: nbp_vlan_init | __br_vlan_set_default_pvid | | | | | br_afspec | | | | | | | v | | | br_process_vlan_info | | | | | | | v | | | br_vlan_info | | | / \ / | | / \ / | | / \ / | | / \ / v v v v v nbp_vlan_add br_vlan_add ------+ | ^ ^ | | | / | | | | / / / | \ br_vlan_get_master/ / v \ ^ / / br_vlan_add_existing \ | / / | \ | / / / \ | / / / \ | / / / \ | / / / v | | v / __vlan_add / / | / / | / v | / __vlan_vid_add | / \ | / v v v br_switchdev_port_vlan_add The ranges UAPI was introduced to the bridge in commit bdced7ef7838 ("bridge: support for multiple vlans and vlan ranges in setlink and dellink requests") (Jan 10 2015). But the VLAN ranges (parsed in br_afspec) have always been passed one by one, through struct bridge_vlan_info tmp_vinfo, to br_vlan_info. So the range never went too far in depth. Then Scott Feldman introduced the switchdev_port_bridge_setlink function in commit 47f8328bb1a4 ("switchdev: add new switchdev bridge setlink"). That marked the introduction of the SWITCHDEV_OBJ_PORT_VLAN, which made full use of the range. But switchdev_port_bridge_setlink was called like this: br_setlink -> br_afspec -> switchdev_port_bridge_setlink Basically, the switchdev and the bridge code were not tightly integrated. Then commit 41c498b9359e ("bridge: restore br_setlink back to original") came, and switchdev drivers were required to implement .ndo_bridge_setlink = switchdev_port_bridge_setlink for a while. In the meantime, commits such as 0944d6b5a2fa ("bridge: try switchdev op first in __vlan_vid_add/del") finally made switchdev penetrate the br_vlan_info() barrier and start to develop the call path we have today. But remember, br_vlan_info() still receives VLANs one by one. Then Arkadi Sharshevsky refactored the switchdev API in 2017 in commit 29ab586c3d83 ("net: switchdev: Remove bridge bypass support from switchdev") so that drivers would not implement .ndo_bridge_setlink any longer. The switchdev_port_bridge_setlink also got deleted. This refactoring removed the parallel bridge_setlink implementation from switchdev, and left the only switchdev VLAN objects to be the ones offloaded from __vlan_vid_add (basically RX filtering) and __vlan_add (the latter coming from commit 9c86ce2c1ae3 ("net: bridge: Notify about bridge VLANs")). That is to say, today the switchdev VLAN object ranges are not used in the kernel. Refactoring the above call path is a bit complicated, when the bridge VLAN call path is already a bit complicated. Let's go off and finish the job of commit 29ab586c3d83 by deleting the bogus iteration through the VLAN ranges from the drivers. Some aspects of this feature never made too much sense in the first place. For example, what is a range of VLANs all having the BRIDGE_VLAN_INFO_PVID flag supposed to mean, when a port can obviously have a single pvid? This particular configuration _is_ denied as of commit 6623c60dc28e ("bridge: vlan: enforce no pvid flag in vlan ranges"), but from an API perspective, the driver still has to play pretend, and only offload the vlan->vid_end as pvid. And the addition of a switchdev VLAN object can modify the flags of another, completely unrelated, switchdev VLAN object! (a VLAN that is PVID will invalidate the PVID flag from whatever other VLAN had previously been offloaded with switchdev and had that flag. Yet switchdev never notifies about that change, drivers are supposed to guess). Nonetheless, having a VLAN range in the API makes error handling look scarier than it really is - unwinding on errors and all of that. When in reality, no one really calls this API with more than one VLAN. It is all unnecessary complexity. And despite appearing pretentious (two-phase transactional model and all), the switchdev API is really sloppy because the VLAN addition and removal operations are not paired with one another (you can add a VLAN 100 times and delete it just once). The bridge notifies through switchdev of a VLAN addition not only when the flags of an existing VLAN change, but also when nothing changes. There are switchdev drivers out there who don't like adding a VLAN that has already been added, and those checks don't really belong at driver level. But the fact that the API contains ranges is yet another factor that prevents this from being addressed in the future. Of the existing switchdev pieces of hardware, it appears that only Mellanox Spectrum supports offloading more than one VLAN at a time, through mlxsw_sp_port_vlan_set. I have kept that code internal to the driver, because there is some more bookkeeping that makes use of it, but I deleted it from the switchdev API. But since the switchdev support for ranges has already been de facto deleted by a Mellanox employee and nobody noticed for 4 years, I'm going to assume it's not a biggie. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Ido Schimmel <idosch@nvidia.com> # switchdev and mlxsw Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 00:01:46 +00:00
priv->ports[port].pvid = vlan->vid;
net: dsa: mt7530: use independent VLAN learning on VLAN-unaware bridges Consider the following bridge configuration, where bond0 is not offloaded: +-- br0 --+ / / | \ / / | \ / | | bond0 / | | / \ swp0 swp1 swp2 swp3 swp4 . . . . . . A B C Ideally, when the switch receives a packet from swp3 or swp4, it should forward the packet to the CPU, according to the port matrix and unknown unicast flood settings. But packet loss will happen if the destination address is at one of the offloaded ports (swp0~2). For example, when client C sends a packet to A, the FDB lookup will indicate that it should be forwarded to swp0, but the port matrix of swp3 and swp4 is configured to only allow the CPU to be its destination, so it is dropped. However, this issue does not happen if the bridge is VLAN-aware. That is because VLAN-aware bridges use independent VLAN learning, i.e. use VID for FDB lookup, on offloaded ports. As swp3 and swp4 are not offloaded, shared VLAN learning with default filter ID of 0 is used instead. So the lookup for A with filter ID 0 never hits and the packet can be forwarded to the CPU. In the current code, only two combinations were used to toggle user ports' VLAN awareness: one is PCR.PORT_VLAN set to port matrix mode with PVC.VLAN_ATTR set to transparent port, the other is PCR.PORT_VLAN set to security mode with PVC.VLAN_ATTR set to user port. It turns out that only PVC.VLAN_ATTR contributes to VLAN awareness, and port matrix mode just skips the VLAN table lookup. The reference manual is somehow misleading when describing PORT_VLAN modes. It states that PORT_MEM (VLAN port member) is used for destination if the VLAN table lookup hits, but actually **PORT_MEM & PORT_MATRIX** (bitwise AND of VLAN port member and port matrix) is used instead, which means we can have two or more separate VLAN-aware bridges with the same PVID and traffic won't leak between them. Therefore, to solve this, enable independent VLAN learning with PVID 0 on VLAN-unaware bridges, by setting their PCR.PORT_VLAN to fallback mode, while leaving standalone ports in port matrix mode. The CPU port is always set to fallback mode to serve those bridges. During testing, it is found that FDB lookup with filter ID of 0 will also hit entries with VID 0 even with independent VLAN learning. To avoid that, install all VLANs with filter ID of 1. Signed-off-by: DENG Qingfang <dqfext@gmail.com> Reviewed-by: Vladimir Oltean <olteanv@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-03 16:04:02 +00:00
/* Accept all frames if PVID is set */
mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
MT7530_VLAN_ACC_ALL);
net: dsa: mt7530: use independent VLAN learning on VLAN-unaware bridges Consider the following bridge configuration, where bond0 is not offloaded: +-- br0 --+ / / | \ / / | \ / | | bond0 / | | / \ swp0 swp1 swp2 swp3 swp4 . . . . . . A B C Ideally, when the switch receives a packet from swp3 or swp4, it should forward the packet to the CPU, according to the port matrix and unknown unicast flood settings. But packet loss will happen if the destination address is at one of the offloaded ports (swp0~2). For example, when client C sends a packet to A, the FDB lookup will indicate that it should be forwarded to swp0, but the port matrix of swp3 and swp4 is configured to only allow the CPU to be its destination, so it is dropped. However, this issue does not happen if the bridge is VLAN-aware. That is because VLAN-aware bridges use independent VLAN learning, i.e. use VID for FDB lookup, on offloaded ports. As swp3 and swp4 are not offloaded, shared VLAN learning with default filter ID of 0 is used instead. So the lookup for A with filter ID 0 never hits and the packet can be forwarded to the CPU. In the current code, only two combinations were used to toggle user ports' VLAN awareness: one is PCR.PORT_VLAN set to port matrix mode with PVC.VLAN_ATTR set to transparent port, the other is PCR.PORT_VLAN set to security mode with PVC.VLAN_ATTR set to user port. It turns out that only PVC.VLAN_ATTR contributes to VLAN awareness, and port matrix mode just skips the VLAN table lookup. The reference manual is somehow misleading when describing PORT_VLAN modes. It states that PORT_MEM (VLAN port member) is used for destination if the VLAN table lookup hits, but actually **PORT_MEM & PORT_MATRIX** (bitwise AND of VLAN port member and port matrix) is used instead, which means we can have two or more separate VLAN-aware bridges with the same PVID and traffic won't leak between them. Therefore, to solve this, enable independent VLAN learning with PVID 0 on VLAN-unaware bridges, by setting their PCR.PORT_VLAN to fallback mode, while leaving standalone ports in port matrix mode. The CPU port is always set to fallback mode to serve those bridges. During testing, it is found that FDB lookup with filter ID of 0 will also hit entries with VID 0 even with independent VLAN learning. To avoid that, install all VLANs with filter ID of 1. Signed-off-by: DENG Qingfang <dqfext@gmail.com> Reviewed-by: Vladimir Oltean <olteanv@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-03 16:04:02 +00:00
/* Only configure PVID if VLAN filtering is enabled */
if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
mt7530_rmw(priv, MT7530_PPBV1_P(port),
G0_PORT_VID_MASK,
G0_PORT_VID(vlan->vid));
} else if (vlan->vid && priv->ports[port].pvid == vlan->vid) {
/* This VLAN is overwritten without PVID, so unset it */
priv->ports[port].pvid = G0_PORT_VID_DEF;
/* Only accept tagged frames if the port is VLAN-aware */
if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
MT7530_VLAN_ACC_TAGGED);
mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
G0_PORT_VID_DEF);
}
mutex_unlock(&priv->reg_mutex);
net: dsa: remove the transactional logic from VLAN objects It should be the driver's business to logically separate its VLAN offloading into a preparation and a commit phase, and some drivers don't need / can't do this. So remove the transactional shim from DSA and let drivers propagate errors directly from the .port_vlan_add callback. It would appear that the code has worse error handling now than it had before. DSA is the only in-kernel user of switchdev that offloads one switchdev object to more than one port: for every VLAN object offloaded to a user port, that VLAN is also offloaded to the CPU port. So the "prepare for user port -> check for errors -> prepare for CPU port -> check for errors -> commit for user port -> commit for CPU port" sequence appears to make more sense than the one we are using now: "offload to user port -> check for errors -> offload to CPU port -> check for errors", but it is really a compromise. In the new way, we can catch errors from the commit phase that we previously had to ignore. But we have our hands tied and cannot do any rollback now: if we add a VLAN on the CPU port and it fails, we can't do the rollback by simply deleting it from the user port, because the switchdev API is not so nice with us: it could have simply been there already, even with the same flags. So we don't even attempt to rollback anything on addition error, just leave whatever VLANs managed to get offloaded right where they are. This should not be a problem at all in practice. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Acked-by: Linus Walleij <linus.walleij@linaro.org> Acked-by: Jiri Pirko <jiri@nvidia.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 00:01:53 +00:00
return 0;
}
static int
mt7530_port_vlan_del(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_vlan *vlan)
{
struct mt7530_hw_vlan_entry target_entry;
struct mt7530_priv *priv = ds->priv;
mutex_lock(&priv->reg_mutex);
net: switchdev: remove vid_begin -> vid_end range from VLAN objects The call path of a switchdev VLAN addition to the bridge looks something like this today: nbp_vlan_init | __br_vlan_set_default_pvid | | | | | br_afspec | | | | | | | v | | | br_process_vlan_info | | | | | | | v | | | br_vlan_info | | | / \ / | | / \ / | | / \ / | | / \ / v v v v v nbp_vlan_add br_vlan_add ------+ | ^ ^ | | | / | | | | / / / | \ br_vlan_get_master/ / v \ ^ / / br_vlan_add_existing \ | / / | \ | / / / \ | / / / \ | / / / \ | / / / v | | v / __vlan_add / / | / / | / v | / __vlan_vid_add | / \ | / v v v br_switchdev_port_vlan_add The ranges UAPI was introduced to the bridge in commit bdced7ef7838 ("bridge: support for multiple vlans and vlan ranges in setlink and dellink requests") (Jan 10 2015). But the VLAN ranges (parsed in br_afspec) have always been passed one by one, through struct bridge_vlan_info tmp_vinfo, to br_vlan_info. So the range never went too far in depth. Then Scott Feldman introduced the switchdev_port_bridge_setlink function in commit 47f8328bb1a4 ("switchdev: add new switchdev bridge setlink"). That marked the introduction of the SWITCHDEV_OBJ_PORT_VLAN, which made full use of the range. But switchdev_port_bridge_setlink was called like this: br_setlink -> br_afspec -> switchdev_port_bridge_setlink Basically, the switchdev and the bridge code were not tightly integrated. Then commit 41c498b9359e ("bridge: restore br_setlink back to original") came, and switchdev drivers were required to implement .ndo_bridge_setlink = switchdev_port_bridge_setlink for a while. In the meantime, commits such as 0944d6b5a2fa ("bridge: try switchdev op first in __vlan_vid_add/del") finally made switchdev penetrate the br_vlan_info() barrier and start to develop the call path we have today. But remember, br_vlan_info() still receives VLANs one by one. Then Arkadi Sharshevsky refactored the switchdev API in 2017 in commit 29ab586c3d83 ("net: switchdev: Remove bridge bypass support from switchdev") so that drivers would not implement .ndo_bridge_setlink any longer. The switchdev_port_bridge_setlink also got deleted. This refactoring removed the parallel bridge_setlink implementation from switchdev, and left the only switchdev VLAN objects to be the ones offloaded from __vlan_vid_add (basically RX filtering) and __vlan_add (the latter coming from commit 9c86ce2c1ae3 ("net: bridge: Notify about bridge VLANs")). That is to say, today the switchdev VLAN object ranges are not used in the kernel. Refactoring the above call path is a bit complicated, when the bridge VLAN call path is already a bit complicated. Let's go off and finish the job of commit 29ab586c3d83 by deleting the bogus iteration through the VLAN ranges from the drivers. Some aspects of this feature never made too much sense in the first place. For example, what is a range of VLANs all having the BRIDGE_VLAN_INFO_PVID flag supposed to mean, when a port can obviously have a single pvid? This particular configuration _is_ denied as of commit 6623c60dc28e ("bridge: vlan: enforce no pvid flag in vlan ranges"), but from an API perspective, the driver still has to play pretend, and only offload the vlan->vid_end as pvid. And the addition of a switchdev VLAN object can modify the flags of another, completely unrelated, switchdev VLAN object! (a VLAN that is PVID will invalidate the PVID flag from whatever other VLAN had previously been offloaded with switchdev and had that flag. Yet switchdev never notifies about that change, drivers are supposed to guess). Nonetheless, having a VLAN range in the API makes error handling look scarier than it really is - unwinding on errors and all of that. When in reality, no one really calls this API with more than one VLAN. It is all unnecessary complexity. And despite appearing pretentious (two-phase transactional model and all), the switchdev API is really sloppy because the VLAN addition and removal operations are not paired with one another (you can add a VLAN 100 times and delete it just once). The bridge notifies through switchdev of a VLAN addition not only when the flags of an existing VLAN change, but also when nothing changes. There are switchdev drivers out there who don't like adding a VLAN that has already been added, and those checks don't really belong at driver level. But the fact that the API contains ranges is yet another factor that prevents this from being addressed in the future. Of the existing switchdev pieces of hardware, it appears that only Mellanox Spectrum supports offloading more than one VLAN at a time, through mlxsw_sp_port_vlan_set. I have kept that code internal to the driver, because there is some more bookkeeping that makes use of it, but I deleted it from the switchdev API. But since the switchdev support for ranges has already been de facto deleted by a Mellanox employee and nobody noticed for 4 years, I'm going to assume it's not a biggie. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Ido Schimmel <idosch@nvidia.com> # switchdev and mlxsw Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 00:01:46 +00:00
mt7530_hw_vlan_entry_init(&target_entry, port, 0);
mt7530_hw_vlan_update(priv, vlan->vid, &target_entry,
mt7530_hw_vlan_del);
net: switchdev: remove vid_begin -> vid_end range from VLAN objects The call path of a switchdev VLAN addition to the bridge looks something like this today: nbp_vlan_init | __br_vlan_set_default_pvid | | | | | br_afspec | | | | | | | v | | | br_process_vlan_info | | | | | | | v | | | br_vlan_info | | | / \ / | | / \ / | | / \ / | | / \ / v v v v v nbp_vlan_add br_vlan_add ------+ | ^ ^ | | | / | | | | / / / | \ br_vlan_get_master/ / v \ ^ / / br_vlan_add_existing \ | / / | \ | / / / \ | / / / \ | / / / \ | / / / v | | v / __vlan_add / / | / / | / v | / __vlan_vid_add | / \ | / v v v br_switchdev_port_vlan_add The ranges UAPI was introduced to the bridge in commit bdced7ef7838 ("bridge: support for multiple vlans and vlan ranges in setlink and dellink requests") (Jan 10 2015). But the VLAN ranges (parsed in br_afspec) have always been passed one by one, through struct bridge_vlan_info tmp_vinfo, to br_vlan_info. So the range never went too far in depth. Then Scott Feldman introduced the switchdev_port_bridge_setlink function in commit 47f8328bb1a4 ("switchdev: add new switchdev bridge setlink"). That marked the introduction of the SWITCHDEV_OBJ_PORT_VLAN, which made full use of the range. But switchdev_port_bridge_setlink was called like this: br_setlink -> br_afspec -> switchdev_port_bridge_setlink Basically, the switchdev and the bridge code were not tightly integrated. Then commit 41c498b9359e ("bridge: restore br_setlink back to original") came, and switchdev drivers were required to implement .ndo_bridge_setlink = switchdev_port_bridge_setlink for a while. In the meantime, commits such as 0944d6b5a2fa ("bridge: try switchdev op first in __vlan_vid_add/del") finally made switchdev penetrate the br_vlan_info() barrier and start to develop the call path we have today. But remember, br_vlan_info() still receives VLANs one by one. Then Arkadi Sharshevsky refactored the switchdev API in 2017 in commit 29ab586c3d83 ("net: switchdev: Remove bridge bypass support from switchdev") so that drivers would not implement .ndo_bridge_setlink any longer. The switchdev_port_bridge_setlink also got deleted. This refactoring removed the parallel bridge_setlink implementation from switchdev, and left the only switchdev VLAN objects to be the ones offloaded from __vlan_vid_add (basically RX filtering) and __vlan_add (the latter coming from commit 9c86ce2c1ae3 ("net: bridge: Notify about bridge VLANs")). That is to say, today the switchdev VLAN object ranges are not used in the kernel. Refactoring the above call path is a bit complicated, when the bridge VLAN call path is already a bit complicated. Let's go off and finish the job of commit 29ab586c3d83 by deleting the bogus iteration through the VLAN ranges from the drivers. Some aspects of this feature never made too much sense in the first place. For example, what is a range of VLANs all having the BRIDGE_VLAN_INFO_PVID flag supposed to mean, when a port can obviously have a single pvid? This particular configuration _is_ denied as of commit 6623c60dc28e ("bridge: vlan: enforce no pvid flag in vlan ranges"), but from an API perspective, the driver still has to play pretend, and only offload the vlan->vid_end as pvid. And the addition of a switchdev VLAN object can modify the flags of another, completely unrelated, switchdev VLAN object! (a VLAN that is PVID will invalidate the PVID flag from whatever other VLAN had previously been offloaded with switchdev and had that flag. Yet switchdev never notifies about that change, drivers are supposed to guess). Nonetheless, having a VLAN range in the API makes error handling look scarier than it really is - unwinding on errors and all of that. When in reality, no one really calls this API with more than one VLAN. It is all unnecessary complexity. And despite appearing pretentious (two-phase transactional model and all), the switchdev API is really sloppy because the VLAN addition and removal operations are not paired with one another (you can add a VLAN 100 times and delete it just once). The bridge notifies through switchdev of a VLAN addition not only when the flags of an existing VLAN change, but also when nothing changes. There are switchdev drivers out there who don't like adding a VLAN that has already been added, and those checks don't really belong at driver level. But the fact that the API contains ranges is yet another factor that prevents this from being addressed in the future. Of the existing switchdev pieces of hardware, it appears that only Mellanox Spectrum supports offloading more than one VLAN at a time, through mlxsw_sp_port_vlan_set. I have kept that code internal to the driver, because there is some more bookkeeping that makes use of it, but I deleted it from the switchdev API. But since the switchdev support for ranges has already been de facto deleted by a Mellanox employee and nobody noticed for 4 years, I'm going to assume it's not a biggie. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Ido Schimmel <idosch@nvidia.com> # switchdev and mlxsw Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 00:01:46 +00:00
/* PVID is being restored to the default whenever the PVID port
* is being removed from the VLAN.
*/
net: dsa: mt7530: use independent VLAN learning on VLAN-unaware bridges Consider the following bridge configuration, where bond0 is not offloaded: +-- br0 --+ / / | \ / / | \ / | | bond0 / | | / \ swp0 swp1 swp2 swp3 swp4 . . . . . . A B C Ideally, when the switch receives a packet from swp3 or swp4, it should forward the packet to the CPU, according to the port matrix and unknown unicast flood settings. But packet loss will happen if the destination address is at one of the offloaded ports (swp0~2). For example, when client C sends a packet to A, the FDB lookup will indicate that it should be forwarded to swp0, but the port matrix of swp3 and swp4 is configured to only allow the CPU to be its destination, so it is dropped. However, this issue does not happen if the bridge is VLAN-aware. That is because VLAN-aware bridges use independent VLAN learning, i.e. use VID for FDB lookup, on offloaded ports. As swp3 and swp4 are not offloaded, shared VLAN learning with default filter ID of 0 is used instead. So the lookup for A with filter ID 0 never hits and the packet can be forwarded to the CPU. In the current code, only two combinations were used to toggle user ports' VLAN awareness: one is PCR.PORT_VLAN set to port matrix mode with PVC.VLAN_ATTR set to transparent port, the other is PCR.PORT_VLAN set to security mode with PVC.VLAN_ATTR set to user port. It turns out that only PVC.VLAN_ATTR contributes to VLAN awareness, and port matrix mode just skips the VLAN table lookup. The reference manual is somehow misleading when describing PORT_VLAN modes. It states that PORT_MEM (VLAN port member) is used for destination if the VLAN table lookup hits, but actually **PORT_MEM & PORT_MATRIX** (bitwise AND of VLAN port member and port matrix) is used instead, which means we can have two or more separate VLAN-aware bridges with the same PVID and traffic won't leak between them. Therefore, to solve this, enable independent VLAN learning with PVID 0 on VLAN-unaware bridges, by setting their PCR.PORT_VLAN to fallback mode, while leaving standalone ports in port matrix mode. The CPU port is always set to fallback mode to serve those bridges. During testing, it is found that FDB lookup with filter ID of 0 will also hit entries with VID 0 even with independent VLAN learning. To avoid that, install all VLANs with filter ID of 1. Signed-off-by: DENG Qingfang <dqfext@gmail.com> Reviewed-by: Vladimir Oltean <olteanv@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-03 16:04:02 +00:00
if (priv->ports[port].pvid == vlan->vid) {
priv->ports[port].pvid = G0_PORT_VID_DEF;
/* Only accept tagged frames if the port is VLAN-aware */
if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
MT7530_VLAN_ACC_TAGGED);
net: dsa: mt7530: use independent VLAN learning on VLAN-unaware bridges Consider the following bridge configuration, where bond0 is not offloaded: +-- br0 --+ / / | \ / / | \ / | | bond0 / | | / \ swp0 swp1 swp2 swp3 swp4 . . . . . . A B C Ideally, when the switch receives a packet from swp3 or swp4, it should forward the packet to the CPU, according to the port matrix and unknown unicast flood settings. But packet loss will happen if the destination address is at one of the offloaded ports (swp0~2). For example, when client C sends a packet to A, the FDB lookup will indicate that it should be forwarded to swp0, but the port matrix of swp3 and swp4 is configured to only allow the CPU to be its destination, so it is dropped. However, this issue does not happen if the bridge is VLAN-aware. That is because VLAN-aware bridges use independent VLAN learning, i.e. use VID for FDB lookup, on offloaded ports. As swp3 and swp4 are not offloaded, shared VLAN learning with default filter ID of 0 is used instead. So the lookup for A with filter ID 0 never hits and the packet can be forwarded to the CPU. In the current code, only two combinations were used to toggle user ports' VLAN awareness: one is PCR.PORT_VLAN set to port matrix mode with PVC.VLAN_ATTR set to transparent port, the other is PCR.PORT_VLAN set to security mode with PVC.VLAN_ATTR set to user port. It turns out that only PVC.VLAN_ATTR contributes to VLAN awareness, and port matrix mode just skips the VLAN table lookup. The reference manual is somehow misleading when describing PORT_VLAN modes. It states that PORT_MEM (VLAN port member) is used for destination if the VLAN table lookup hits, but actually **PORT_MEM & PORT_MATRIX** (bitwise AND of VLAN port member and port matrix) is used instead, which means we can have two or more separate VLAN-aware bridges with the same PVID and traffic won't leak between them. Therefore, to solve this, enable independent VLAN learning with PVID 0 on VLAN-unaware bridges, by setting their PCR.PORT_VLAN to fallback mode, while leaving standalone ports in port matrix mode. The CPU port is always set to fallback mode to serve those bridges. During testing, it is found that FDB lookup with filter ID of 0 will also hit entries with VID 0 even with independent VLAN learning. To avoid that, install all VLANs with filter ID of 1. Signed-off-by: DENG Qingfang <dqfext@gmail.com> Reviewed-by: Vladimir Oltean <olteanv@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-03 16:04:02 +00:00
mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
G0_PORT_VID_DEF);
}
mutex_unlock(&priv->reg_mutex);
return 0;
}
static int mt753x_mirror_port_get(unsigned int id, u32 val)
{
return (id == ID_MT7531) ? MT7531_MIRROR_PORT_GET(val) :
MIRROR_PORT(val);
}
static int mt753x_mirror_port_set(unsigned int id, u32 val)
{
return (id == ID_MT7531) ? MT7531_MIRROR_PORT_SET(val) :
MIRROR_PORT(val);
}
static int mt753x_port_mirror_add(struct dsa_switch *ds, int port,
struct dsa_mall_mirror_tc_entry *mirror,
bool ingress)
{
struct mt7530_priv *priv = ds->priv;
int monitor_port;
u32 val;
/* Check for existent entry */
if ((ingress ? priv->mirror_rx : priv->mirror_tx) & BIT(port))
return -EEXIST;
val = mt7530_read(priv, MT753X_MIRROR_REG(priv->id));
/* MT7530 only supports one monitor port */
monitor_port = mt753x_mirror_port_get(priv->id, val);
if (val & MT753X_MIRROR_EN(priv->id) &&
monitor_port != mirror->to_local_port)
return -EEXIST;
val |= MT753X_MIRROR_EN(priv->id);
val &= ~MT753X_MIRROR_MASK(priv->id);
val |= mt753x_mirror_port_set(priv->id, mirror->to_local_port);
mt7530_write(priv, MT753X_MIRROR_REG(priv->id), val);
val = mt7530_read(priv, MT7530_PCR_P(port));
if (ingress) {
val |= PORT_RX_MIR;
priv->mirror_rx |= BIT(port);
} else {
val |= PORT_TX_MIR;
priv->mirror_tx |= BIT(port);
}
mt7530_write(priv, MT7530_PCR_P(port), val);
return 0;
}
static void mt753x_port_mirror_del(struct dsa_switch *ds, int port,
struct dsa_mall_mirror_tc_entry *mirror)
{
struct mt7530_priv *priv = ds->priv;
u32 val;
val = mt7530_read(priv, MT7530_PCR_P(port));
if (mirror->ingress) {
val &= ~PORT_RX_MIR;
priv->mirror_rx &= ~BIT(port);
} else {
val &= ~PORT_TX_MIR;
priv->mirror_tx &= ~BIT(port);
}
mt7530_write(priv, MT7530_PCR_P(port), val);
if (!priv->mirror_rx && !priv->mirror_tx) {
val = mt7530_read(priv, MT753X_MIRROR_REG(priv->id));
val &= ~MT753X_MIRROR_EN(priv->id);
mt7530_write(priv, MT753X_MIRROR_REG(priv->id), val);
}
}
static enum dsa_tag_protocol
mtk_get_tag_protocol(struct dsa_switch *ds, int port,
enum dsa_tag_protocol mp)
{
return DSA_TAG_PROTO_MTK;
}
#ifdef CONFIG_GPIOLIB
static inline u32
mt7530_gpio_to_bit(unsigned int offset)
{
/* Map GPIO offset to register bit
* [ 2: 0] port 0 LED 0..2 as GPIO 0..2
* [ 6: 4] port 1 LED 0..2 as GPIO 3..5
* [10: 8] port 2 LED 0..2 as GPIO 6..8
* [14:12] port 3 LED 0..2 as GPIO 9..11
* [18:16] port 4 LED 0..2 as GPIO 12..14
*/
return BIT(offset + offset / 3);
}
static int
mt7530_gpio_get(struct gpio_chip *gc, unsigned int offset)
{
struct mt7530_priv *priv = gpiochip_get_data(gc);
u32 bit = mt7530_gpio_to_bit(offset);
return !!(mt7530_read(priv, MT7530_LED_GPIO_DATA) & bit);
}
static void
mt7530_gpio_set(struct gpio_chip *gc, unsigned int offset, int value)
{
struct mt7530_priv *priv = gpiochip_get_data(gc);
u32 bit = mt7530_gpio_to_bit(offset);
if (value)
mt7530_set(priv, MT7530_LED_GPIO_DATA, bit);
else
mt7530_clear(priv, MT7530_LED_GPIO_DATA, bit);
}
static int
mt7530_gpio_get_direction(struct gpio_chip *gc, unsigned int offset)
{
struct mt7530_priv *priv = gpiochip_get_data(gc);
u32 bit = mt7530_gpio_to_bit(offset);
return (mt7530_read(priv, MT7530_LED_GPIO_DIR) & bit) ?
GPIO_LINE_DIRECTION_OUT : GPIO_LINE_DIRECTION_IN;
}
static int
mt7530_gpio_direction_input(struct gpio_chip *gc, unsigned int offset)
{
struct mt7530_priv *priv = gpiochip_get_data(gc);
u32 bit = mt7530_gpio_to_bit(offset);
mt7530_clear(priv, MT7530_LED_GPIO_OE, bit);
mt7530_clear(priv, MT7530_LED_GPIO_DIR, bit);
return 0;
}
static int
mt7530_gpio_direction_output(struct gpio_chip *gc, unsigned int offset, int value)
{
struct mt7530_priv *priv = gpiochip_get_data(gc);
u32 bit = mt7530_gpio_to_bit(offset);
mt7530_set(priv, MT7530_LED_GPIO_DIR, bit);
if (value)
mt7530_set(priv, MT7530_LED_GPIO_DATA, bit);
else
mt7530_clear(priv, MT7530_LED_GPIO_DATA, bit);
mt7530_set(priv, MT7530_LED_GPIO_OE, bit);
return 0;
}
static int
mt7530_setup_gpio(struct mt7530_priv *priv)
{
struct device *dev = priv->dev;
struct gpio_chip *gc;
gc = devm_kzalloc(dev, sizeof(*gc), GFP_KERNEL);
if (!gc)
return -ENOMEM;
mt7530_write(priv, MT7530_LED_GPIO_OE, 0);
mt7530_write(priv, MT7530_LED_GPIO_DIR, 0);
mt7530_write(priv, MT7530_LED_IO_MODE, 0);
gc->label = "mt7530";
gc->parent = dev;
gc->owner = THIS_MODULE;
gc->get_direction = mt7530_gpio_get_direction;
gc->direction_input = mt7530_gpio_direction_input;
gc->direction_output = mt7530_gpio_direction_output;
gc->get = mt7530_gpio_get;
gc->set = mt7530_gpio_set;
gc->base = -1;
gc->ngpio = 15;
gc->can_sleep = true;
return devm_gpiochip_add_data(dev, gc, priv);
}
#endif /* CONFIG_GPIOLIB */
static irqreturn_t
mt7530_irq_thread_fn(int irq, void *dev_id)
{
struct mt7530_priv *priv = dev_id;
bool handled = false;
u32 val;
int p;
mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED);
val = mt7530_mii_read(priv, MT7530_SYS_INT_STS);
mt7530_mii_write(priv, MT7530_SYS_INT_STS, val);
mutex_unlock(&priv->bus->mdio_lock);
for (p = 0; p < MT7530_NUM_PHYS; p++) {
if (BIT(p) & val) {
unsigned int irq;
irq = irq_find_mapping(priv->irq_domain, p);
handle_nested_irq(irq);
handled = true;
}
}
return IRQ_RETVAL(handled);
}
static void
mt7530_irq_mask(struct irq_data *d)
{
struct mt7530_priv *priv = irq_data_get_irq_chip_data(d);
priv->irq_enable &= ~BIT(d->hwirq);
}
static void
mt7530_irq_unmask(struct irq_data *d)
{
struct mt7530_priv *priv = irq_data_get_irq_chip_data(d);
priv->irq_enable |= BIT(d->hwirq);
}
static void
mt7530_irq_bus_lock(struct irq_data *d)
{
struct mt7530_priv *priv = irq_data_get_irq_chip_data(d);
mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED);
}
static void
mt7530_irq_bus_sync_unlock(struct irq_data *d)
{
struct mt7530_priv *priv = irq_data_get_irq_chip_data(d);
mt7530_mii_write(priv, MT7530_SYS_INT_EN, priv->irq_enable);
mutex_unlock(&priv->bus->mdio_lock);
}
static struct irq_chip mt7530_irq_chip = {
.name = KBUILD_MODNAME,
.irq_mask = mt7530_irq_mask,
.irq_unmask = mt7530_irq_unmask,
.irq_bus_lock = mt7530_irq_bus_lock,
.irq_bus_sync_unlock = mt7530_irq_bus_sync_unlock,
};
static int
mt7530_irq_map(struct irq_domain *domain, unsigned int irq,
irq_hw_number_t hwirq)
{
irq_set_chip_data(irq, domain->host_data);
irq_set_chip_and_handler(irq, &mt7530_irq_chip, handle_simple_irq);
irq_set_nested_thread(irq, true);
irq_set_noprobe(irq);
return 0;
}
static const struct irq_domain_ops mt7530_irq_domain_ops = {
.map = mt7530_irq_map,
.xlate = irq_domain_xlate_onecell,
};
static void
mt7530_setup_mdio_irq(struct mt7530_priv *priv)
{
struct dsa_switch *ds = priv->ds;
int p;
for (p = 0; p < MT7530_NUM_PHYS; p++) {
if (BIT(p) & ds->phys_mii_mask) {
unsigned int irq;
irq = irq_create_mapping(priv->irq_domain, p);
ds->slave_mii_bus->irq[p] = irq;
}
}
}
static int
mt7530_setup_irq(struct mt7530_priv *priv)
{
struct device *dev = priv->dev;
struct device_node *np = dev->of_node;
int ret;
if (!of_property_read_bool(np, "interrupt-controller")) {
dev_info(dev, "no interrupt support\n");
return 0;
}
priv->irq = of_irq_get(np, 0);
if (priv->irq <= 0) {
dev_err(dev, "failed to get parent IRQ: %d\n", priv->irq);
return priv->irq ? : -EINVAL;
}
priv->irq_domain = irq_domain_add_linear(np, MT7530_NUM_PHYS,
&mt7530_irq_domain_ops, priv);
if (!priv->irq_domain) {
dev_err(dev, "failed to create IRQ domain\n");
return -ENOMEM;
}
/* This register must be set for MT7530 to properly fire interrupts */
if (priv->id != ID_MT7531)
mt7530_set(priv, MT7530_TOP_SIG_CTRL, TOP_SIG_CTRL_NORMAL);
ret = request_threaded_irq(priv->irq, NULL, mt7530_irq_thread_fn,
IRQF_ONESHOT, KBUILD_MODNAME, priv);
if (ret) {
irq_domain_remove(priv->irq_domain);
dev_err(dev, "failed to request IRQ: %d\n", ret);
return ret;
}
return 0;
}
static void
mt7530_free_mdio_irq(struct mt7530_priv *priv)
{
int p;
for (p = 0; p < MT7530_NUM_PHYS; p++) {
if (BIT(p) & priv->ds->phys_mii_mask) {
unsigned int irq;
irq = irq_find_mapping(priv->irq_domain, p);
irq_dispose_mapping(irq);
}
}
}
static void
mt7530_free_irq_common(struct mt7530_priv *priv)
{
free_irq(priv->irq, priv);
irq_domain_remove(priv->irq_domain);
}
static void
mt7530_free_irq(struct mt7530_priv *priv)
{
mt7530_free_mdio_irq(priv);
mt7530_free_irq_common(priv);
}
static int
mt7530_setup_mdio(struct mt7530_priv *priv)
{
struct dsa_switch *ds = priv->ds;
struct device *dev = priv->dev;
struct mii_bus *bus;
static int idx;
int ret;
bus = devm_mdiobus_alloc(dev);
if (!bus)
return -ENOMEM;
ds->slave_mii_bus = bus;
bus->priv = priv;
bus->name = KBUILD_MODNAME "-mii";
snprintf(bus->id, MII_BUS_ID_SIZE, KBUILD_MODNAME "-%d", idx++);
bus->read = mt753x_phy_read;
bus->write = mt753x_phy_write;
bus->parent = dev;
bus->phy_mask = ~ds->phys_mii_mask;
if (priv->irq)
mt7530_setup_mdio_irq(priv);
ret = devm_mdiobus_register(dev, bus);
if (ret) {
dev_err(dev, "failed to register MDIO bus: %d\n", ret);
if (priv->irq)
mt7530_free_mdio_irq(priv);
}
return ret;
}
static int
mt7530_setup(struct dsa_switch *ds)
{
struct mt7530_priv *priv = ds->priv;
struct device_node *phy_node;
struct device_node *mac_np;
struct mt7530_dummy_poll p;
phy_interface_t interface;
struct device_node *dn;
u32 id, val;
int ret, i;
/* The parent node of master netdev which holds the common system
* controller also is the container for two GMACs nodes representing
* as two netdev instances.
*/
dn = dsa_to_port(ds, MT7530_CPU_PORT)->master->dev.of_node->parent;
ds->assisted_learning_on_cpu_port = true;
ds->mtu_enforcement_ingress = true;
if (priv->id == ID_MT7530) {
regulator_set_voltage(priv->core_pwr, 1000000, 1000000);
ret = regulator_enable(priv->core_pwr);
if (ret < 0) {
dev_err(priv->dev,
"Failed to enable core power: %d\n", ret);
return ret;
}
regulator_set_voltage(priv->io_pwr, 3300000, 3300000);
ret = regulator_enable(priv->io_pwr);
if (ret < 0) {
dev_err(priv->dev, "Failed to enable io pwr: %d\n",
ret);
return ret;
}
}
/* Reset whole chip through gpio pin or memory-mapped registers for
* different type of hardware
*/
if (priv->mcm) {
reset_control_assert(priv->rstc);
usleep_range(1000, 1100);
reset_control_deassert(priv->rstc);
} else {
gpiod_set_value_cansleep(priv->reset, 0);
usleep_range(1000, 1100);
gpiod_set_value_cansleep(priv->reset, 1);
}
/* Waiting for MT7530 got to stable */
INIT_MT7530_DUMMY_POLL(&p, priv, MT7530_HWTRAP);
ret = readx_poll_timeout(_mt7530_read, &p, val, val != 0,
20, 1000000);
if (ret < 0) {
dev_err(priv->dev, "reset timeout\n");
return ret;
}
id = mt7530_read(priv, MT7530_CREV);
id >>= CHIP_NAME_SHIFT;
if (id != MT7530_ID) {
dev_err(priv->dev, "chip %x can't be supported\n", id);
return -ENODEV;
}
/* Reset the switch through internal reset */
mt7530_write(priv, MT7530_SYS_CTRL,
SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST |
SYS_CTRL_REG_RST);
/* Enable Port 6 only; P5 as GMAC5 which currently is not supported */
val = mt7530_read(priv, MT7530_MHWTRAP);
val &= ~MHWTRAP_P6_DIS & ~MHWTRAP_PHY_ACCESS;
val |= MHWTRAP_MANUAL;
mt7530_write(priv, MT7530_MHWTRAP, val);
priv->p6_interface = PHY_INTERFACE_MODE_NA;
/* Enable and reset MIB counters */
mt7530_mib_reset(ds);
for (i = 0; i < MT7530_NUM_PORTS; i++) {
/* Disable forwarding by default on all ports */
mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK,
PCR_MATRIX_CLR);
/* Disable learning by default on all ports */
mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
if (dsa_is_cpu_port(ds, i)) {
ret = mt753x_cpu_port_enable(ds, i);
if (ret)
return ret;
} else {
mt7530_port_disable(ds, i);
net: dsa: mt7530: use independent VLAN learning on VLAN-unaware bridges Consider the following bridge configuration, where bond0 is not offloaded: +-- br0 --+ / / | \ / / | \ / | | bond0 / | | / \ swp0 swp1 swp2 swp3 swp4 . . . . . . A B C Ideally, when the switch receives a packet from swp3 or swp4, it should forward the packet to the CPU, according to the port matrix and unknown unicast flood settings. But packet loss will happen if the destination address is at one of the offloaded ports (swp0~2). For example, when client C sends a packet to A, the FDB lookup will indicate that it should be forwarded to swp0, but the port matrix of swp3 and swp4 is configured to only allow the CPU to be its destination, so it is dropped. However, this issue does not happen if the bridge is VLAN-aware. That is because VLAN-aware bridges use independent VLAN learning, i.e. use VID for FDB lookup, on offloaded ports. As swp3 and swp4 are not offloaded, shared VLAN learning with default filter ID of 0 is used instead. So the lookup for A with filter ID 0 never hits and the packet can be forwarded to the CPU. In the current code, only two combinations were used to toggle user ports' VLAN awareness: one is PCR.PORT_VLAN set to port matrix mode with PVC.VLAN_ATTR set to transparent port, the other is PCR.PORT_VLAN set to security mode with PVC.VLAN_ATTR set to user port. It turns out that only PVC.VLAN_ATTR contributes to VLAN awareness, and port matrix mode just skips the VLAN table lookup. The reference manual is somehow misleading when describing PORT_VLAN modes. It states that PORT_MEM (VLAN port member) is used for destination if the VLAN table lookup hits, but actually **PORT_MEM & PORT_MATRIX** (bitwise AND of VLAN port member and port matrix) is used instead, which means we can have two or more separate VLAN-aware bridges with the same PVID and traffic won't leak between them. Therefore, to solve this, enable independent VLAN learning with PVID 0 on VLAN-unaware bridges, by setting their PCR.PORT_VLAN to fallback mode, while leaving standalone ports in port matrix mode. The CPU port is always set to fallback mode to serve those bridges. During testing, it is found that FDB lookup with filter ID of 0 will also hit entries with VID 0 even with independent VLAN learning. To avoid that, install all VLANs with filter ID of 1. Signed-off-by: DENG Qingfang <dqfext@gmail.com> Reviewed-by: Vladimir Oltean <olteanv@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-03 16:04:02 +00:00
/* Set default PVID to 0 on all user ports */
mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK,
G0_PORT_VID_DEF);
}
/* Enable consistent egress tag */
mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK,
PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
}
/* Setup VLAN ID 0 for VLAN-unaware bridges */
ret = mt7530_setup_vlan0(priv);
if (ret)
return ret;
/* Setup port 5 */
priv->p5_intf_sel = P5_DISABLED;
interface = PHY_INTERFACE_MODE_NA;
if (!dsa_is_unused_port(ds, 5)) {
priv->p5_intf_sel = P5_INTF_SEL_GMAC5;
ret = of_get_phy_mode(dsa_to_port(ds, 5)->dn, &interface);
if (ret && ret != -ENODEV)
return ret;
} else {
/* Scan the ethernet nodes. look for GMAC1, lookup used phy */
for_each_child_of_node(dn, mac_np) {
if (!of_device_is_compatible(mac_np,
"mediatek,eth-mac"))
continue;
ret = of_property_read_u32(mac_np, "reg", &id);
if (ret < 0 || id != 1)
continue;
phy_node = of_parse_phandle(mac_np, "phy-handle", 0);
if (!phy_node)
continue;
if (phy_node->parent == priv->dev->of_node->parent) {
ret = of_get_phy_mode(mac_np, &interface);
if (ret && ret != -ENODEV) {
of_node_put(mac_np);
return ret;
}
id = of_mdio_parse_addr(ds->dev, phy_node);
if (id == 0)
priv->p5_intf_sel = P5_INTF_SEL_PHY_P0;
if (id == 4)
priv->p5_intf_sel = P5_INTF_SEL_PHY_P4;
}
of_node_put(mac_np);
of_node_put(phy_node);
break;
}
}
#ifdef CONFIG_GPIOLIB
if (of_property_read_bool(priv->dev->of_node, "gpio-controller")) {
ret = mt7530_setup_gpio(priv);
if (ret)
return ret;
}
#endif /* CONFIG_GPIOLIB */
mt7530_setup_port5(ds, interface);
/* Flush the FDB table */
ret = mt7530_fdb_cmd(priv, MT7530_FDB_FLUSH, NULL);
if (ret < 0)
return ret;
return 0;
}
static int
mt7531_setup(struct dsa_switch *ds)
{
struct mt7530_priv *priv = ds->priv;
struct mt7530_dummy_poll p;
u32 val, id;
int ret, i;
/* Reset whole chip through gpio pin or memory-mapped registers for
* different type of hardware
*/
if (priv->mcm) {
reset_control_assert(priv->rstc);
usleep_range(1000, 1100);
reset_control_deassert(priv->rstc);
} else {
gpiod_set_value_cansleep(priv->reset, 0);
usleep_range(1000, 1100);
gpiod_set_value_cansleep(priv->reset, 1);
}
/* Waiting for MT7530 got to stable */
INIT_MT7530_DUMMY_POLL(&p, priv, MT7530_HWTRAP);
ret = readx_poll_timeout(_mt7530_read, &p, val, val != 0,
20, 1000000);
if (ret < 0) {
dev_err(priv->dev, "reset timeout\n");
return ret;
}
id = mt7530_read(priv, MT7531_CREV);
id >>= CHIP_NAME_SHIFT;
if (id != MT7531_ID) {
dev_err(priv->dev, "chip %x can't be supported\n", id);
return -ENODEV;
}
/* Reset the switch through internal reset */
mt7530_write(priv, MT7530_SYS_CTRL,
SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST |
SYS_CTRL_REG_RST);
if (mt7531_dual_sgmii_supported(priv)) {
priv->p5_intf_sel = P5_INTF_SEL_GMAC5_SGMII;
/* Let ds->slave_mii_bus be able to access external phy. */
mt7530_rmw(priv, MT7531_GPIO_MODE1, MT7531_GPIO11_RG_RXD2_MASK,
MT7531_EXT_P_MDC_11);
mt7530_rmw(priv, MT7531_GPIO_MODE1, MT7531_GPIO12_RG_RXD3_MASK,
MT7531_EXT_P_MDIO_12);
} else {
priv->p5_intf_sel = P5_INTF_SEL_GMAC5;
}
dev_dbg(ds->dev, "P5 support %s interface\n",
p5_intf_modes(priv->p5_intf_sel));
mt7530_rmw(priv, MT7531_GPIO_MODE0, MT7531_GPIO0_MASK,
MT7531_GPIO0_INTERRUPT);
/* Let phylink decide the interface later. */
priv->p5_interface = PHY_INTERFACE_MODE_NA;
priv->p6_interface = PHY_INTERFACE_MODE_NA;
/* Enable PHY core PLL, since phy_device has not yet been created
* provided for phy_[read,write]_mmd_indirect is called, we provide
* our own mt7531_ind_mmd_phy_[read,write] to complete this
* function.
*/
val = mt7531_ind_c45_phy_read(priv, MT753X_CTRL_PHY_ADDR,
MDIO_MMD_VEND2, CORE_PLL_GROUP4);
val |= MT7531_PHY_PLL_BYPASS_MODE;
val &= ~MT7531_PHY_PLL_OFF;
mt7531_ind_c45_phy_write(priv, MT753X_CTRL_PHY_ADDR, MDIO_MMD_VEND2,
CORE_PLL_GROUP4, val);
/* BPDU to CPU port */
mt7530_rmw(priv, MT7531_CFC, MT7531_CPU_PMAP_MASK,
BIT(MT7530_CPU_PORT));
mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK,
MT753X_BPDU_CPU_ONLY);
/* Enable and reset MIB counters */
mt7530_mib_reset(ds);
for (i = 0; i < MT7530_NUM_PORTS; i++) {
/* Disable forwarding by default on all ports */
mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK,
PCR_MATRIX_CLR);
/* Disable learning by default on all ports */
mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
mt7530_set(priv, MT7531_DBG_CNT(i), MT7531_DIS_CLR);
if (dsa_is_cpu_port(ds, i)) {
ret = mt753x_cpu_port_enable(ds, i);
if (ret)
return ret;
} else {
mt7530_port_disable(ds, i);
net: dsa: mt7530: use independent VLAN learning on VLAN-unaware bridges Consider the following bridge configuration, where bond0 is not offloaded: +-- br0 --+ / / | \ / / | \ / | | bond0 / | | / \ swp0 swp1 swp2 swp3 swp4 . . . . . . A B C Ideally, when the switch receives a packet from swp3 or swp4, it should forward the packet to the CPU, according to the port matrix and unknown unicast flood settings. But packet loss will happen if the destination address is at one of the offloaded ports (swp0~2). For example, when client C sends a packet to A, the FDB lookup will indicate that it should be forwarded to swp0, but the port matrix of swp3 and swp4 is configured to only allow the CPU to be its destination, so it is dropped. However, this issue does not happen if the bridge is VLAN-aware. That is because VLAN-aware bridges use independent VLAN learning, i.e. use VID for FDB lookup, on offloaded ports. As swp3 and swp4 are not offloaded, shared VLAN learning with default filter ID of 0 is used instead. So the lookup for A with filter ID 0 never hits and the packet can be forwarded to the CPU. In the current code, only two combinations were used to toggle user ports' VLAN awareness: one is PCR.PORT_VLAN set to port matrix mode with PVC.VLAN_ATTR set to transparent port, the other is PCR.PORT_VLAN set to security mode with PVC.VLAN_ATTR set to user port. It turns out that only PVC.VLAN_ATTR contributes to VLAN awareness, and port matrix mode just skips the VLAN table lookup. The reference manual is somehow misleading when describing PORT_VLAN modes. It states that PORT_MEM (VLAN port member) is used for destination if the VLAN table lookup hits, but actually **PORT_MEM & PORT_MATRIX** (bitwise AND of VLAN port member and port matrix) is used instead, which means we can have two or more separate VLAN-aware bridges with the same PVID and traffic won't leak between them. Therefore, to solve this, enable independent VLAN learning with PVID 0 on VLAN-unaware bridges, by setting their PCR.PORT_VLAN to fallback mode, while leaving standalone ports in port matrix mode. The CPU port is always set to fallback mode to serve those bridges. During testing, it is found that FDB lookup with filter ID of 0 will also hit entries with VID 0 even with independent VLAN learning. To avoid that, install all VLANs with filter ID of 1. Signed-off-by: DENG Qingfang <dqfext@gmail.com> Reviewed-by: Vladimir Oltean <olteanv@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-03 16:04:02 +00:00
/* Set default PVID to 0 on all user ports */
mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK,
G0_PORT_VID_DEF);
}
/* Enable consistent egress tag */
mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK,
PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
}
/* Setup VLAN ID 0 for VLAN-unaware bridges */
ret = mt7530_setup_vlan0(priv);
if (ret)
return ret;
ds->assisted_learning_on_cpu_port = true;
ds->mtu_enforcement_ingress = true;
/* Flush the FDB table */
ret = mt7530_fdb_cmd(priv, MT7530_FDB_FLUSH, NULL);
if (ret < 0)
return ret;
return 0;
}
static bool
mt7530_phy_mode_supported(struct dsa_switch *ds, int port,
const struct phylink_link_state *state)
{
struct mt7530_priv *priv = ds->priv;
switch (port) {
case 0 ... 4: /* Internal phy */
if (state->interface != PHY_INTERFACE_MODE_GMII)
return false;
break;
case 5: /* 2nd cpu port with phy of port 0 or 4 / external phy */
if (!phy_interface_mode_is_rgmii(state->interface) &&
state->interface != PHY_INTERFACE_MODE_MII &&
state->interface != PHY_INTERFACE_MODE_GMII)
return false;
break;
case 6: /* 1st cpu port */
if (state->interface != PHY_INTERFACE_MODE_RGMII &&
state->interface != PHY_INTERFACE_MODE_TRGMII)
return false;
break;
default:
dev_err(priv->dev, "%s: unsupported port: %i\n", __func__,
port);
return false;
}
return true;
}
static bool mt7531_is_rgmii_port(struct mt7530_priv *priv, u32 port)
{
return (port == 5) && (priv->p5_intf_sel != P5_INTF_SEL_GMAC5_SGMII);
}
static bool
mt7531_phy_mode_supported(struct dsa_switch *ds, int port,
const struct phylink_link_state *state)
{
struct mt7530_priv *priv = ds->priv;
switch (port) {
case 0 ... 4: /* Internal phy */
if (state->interface != PHY_INTERFACE_MODE_GMII)
return false;
break;
case 5: /* 2nd cpu port supports either rgmii or sgmii/8023z */
if (mt7531_is_rgmii_port(priv, port))
return phy_interface_mode_is_rgmii(state->interface);
fallthrough;
case 6: /* 1st cpu port supports sgmii/8023z only */
if (state->interface != PHY_INTERFACE_MODE_SGMII &&
!phy_interface_mode_is_8023z(state->interface))
return false;
break;
default:
dev_err(priv->dev, "%s: unsupported port: %i\n", __func__,
port);
return false;
}
return true;
}
static bool
mt753x_phy_mode_supported(struct dsa_switch *ds, int port,
const struct phylink_link_state *state)
{
struct mt7530_priv *priv = ds->priv;
return priv->info->phy_mode_supported(ds, port, state);
}
static int
mt753x_pad_setup(struct dsa_switch *ds, const struct phylink_link_state *state)
{
struct mt7530_priv *priv = ds->priv;
return priv->info->pad_setup(ds, state->interface);
}
static int
mt7530_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
phy_interface_t interface)
{
struct mt7530_priv *priv = ds->priv;
/* Only need to setup port5. */
if (port != 5)
return 0;
mt7530_setup_port5(priv->ds, interface);
return 0;
}
static int mt7531_rgmii_setup(struct mt7530_priv *priv, u32 port,
phy_interface_t interface,
struct phy_device *phydev)
{
u32 val;
if (!mt7531_is_rgmii_port(priv, port)) {
dev_err(priv->dev, "RGMII mode is not available for port %d\n",
port);
return -EINVAL;
}
val = mt7530_read(priv, MT7531_CLKGEN_CTRL);
val |= GP_CLK_EN;
val &= ~GP_MODE_MASK;
val |= GP_MODE(MT7531_GP_MODE_RGMII);
val &= ~CLK_SKEW_IN_MASK;
val |= CLK_SKEW_IN(MT7531_CLK_SKEW_NO_CHG);
val &= ~CLK_SKEW_OUT_MASK;
val |= CLK_SKEW_OUT(MT7531_CLK_SKEW_NO_CHG);
val |= TXCLK_NO_REVERSE | RXCLK_NO_DELAY;
/* Do not adjust rgmii delay when vendor phy driver presents. */
if (!phydev || phy_driver_is_genphy(phydev)) {
val &= ~(TXCLK_NO_REVERSE | RXCLK_NO_DELAY);
switch (interface) {
case PHY_INTERFACE_MODE_RGMII:
val |= TXCLK_NO_REVERSE;
val |= RXCLK_NO_DELAY;
break;
case PHY_INTERFACE_MODE_RGMII_RXID:
val |= TXCLK_NO_REVERSE;
break;
case PHY_INTERFACE_MODE_RGMII_TXID:
val |= RXCLK_NO_DELAY;
break;
case PHY_INTERFACE_MODE_RGMII_ID:
break;
default:
return -EINVAL;
}
}
mt7530_write(priv, MT7531_CLKGEN_CTRL, val);
return 0;
}
static void mt7531_sgmii_validate(struct mt7530_priv *priv, int port,
unsigned long *supported)
{
/* Port5 supports ethier RGMII or SGMII.
* Port6 supports SGMII only.
*/
switch (port) {
case 5:
if (mt7531_is_rgmii_port(priv, port))
break;
fallthrough;
case 6:
phylink_set(supported, 1000baseX_Full);
phylink_set(supported, 2500baseX_Full);
phylink_set(supported, 2500baseT_Full);
}
}
static void
mt7531_sgmii_link_up_force(struct dsa_switch *ds, int port,
unsigned int mode, phy_interface_t interface,
int speed, int duplex)
{
struct mt7530_priv *priv = ds->priv;
unsigned int val;
/* For adjusting speed and duplex of SGMII force mode. */
if (interface != PHY_INTERFACE_MODE_SGMII ||
phylink_autoneg_inband(mode))
return;
/* SGMII force mode setting */
val = mt7530_read(priv, MT7531_SGMII_MODE(port));
val &= ~MT7531_SGMII_IF_MODE_MASK;
switch (speed) {
case SPEED_10:
val |= MT7531_SGMII_FORCE_SPEED_10;
break;
case SPEED_100:
val |= MT7531_SGMII_FORCE_SPEED_100;
break;
case SPEED_1000:
val |= MT7531_SGMII_FORCE_SPEED_1000;
break;
}
/* MT7531 SGMII 1G force mode can only work in full duplex mode,
* no matter MT7531_SGMII_FORCE_HALF_DUPLEX is set or not.
*/
if ((speed == SPEED_10 || speed == SPEED_100) &&
duplex != DUPLEX_FULL)
val |= MT7531_SGMII_FORCE_HALF_DUPLEX;
mt7530_write(priv, MT7531_SGMII_MODE(port), val);
}
static bool mt753x_is_mac_port(u32 port)
{
return (port == 5 || port == 6);
}
static int mt7531_sgmii_setup_mode_force(struct mt7530_priv *priv, u32 port,
phy_interface_t interface)
{
u32 val;
if (!mt753x_is_mac_port(port))
return -EINVAL;
mt7530_set(priv, MT7531_QPHY_PWR_STATE_CTRL(port),
MT7531_SGMII_PHYA_PWD);
val = mt7530_read(priv, MT7531_PHYA_CTRL_SIGNAL3(port));
val &= ~MT7531_RG_TPHY_SPEED_MASK;
/* Setup 2.5 times faster clock for 2.5Gbps data speeds with 10B/8B
* encoding.
*/
val |= (interface == PHY_INTERFACE_MODE_2500BASEX) ?
MT7531_RG_TPHY_SPEED_3_125G : MT7531_RG_TPHY_SPEED_1_25G;
mt7530_write(priv, MT7531_PHYA_CTRL_SIGNAL3(port), val);
mt7530_clear(priv, MT7531_PCS_CONTROL_1(port), MT7531_SGMII_AN_ENABLE);
/* MT7531 SGMII 1G and 2.5G force mode can only work in full duplex
* mode, no matter MT7531_SGMII_FORCE_HALF_DUPLEX is set or not.
*/
mt7530_rmw(priv, MT7531_SGMII_MODE(port),
MT7531_SGMII_IF_MODE_MASK | MT7531_SGMII_REMOTE_FAULT_DIS,
MT7531_SGMII_FORCE_SPEED_1000);
mt7530_write(priv, MT7531_QPHY_PWR_STATE_CTRL(port), 0);
return 0;
}
static int mt7531_sgmii_setup_mode_an(struct mt7530_priv *priv, int port,
phy_interface_t interface)
{
if (!mt753x_is_mac_port(port))
return -EINVAL;
mt7530_set(priv, MT7531_QPHY_PWR_STATE_CTRL(port),
MT7531_SGMII_PHYA_PWD);
mt7530_rmw(priv, MT7531_PHYA_CTRL_SIGNAL3(port),
MT7531_RG_TPHY_SPEED_MASK, MT7531_RG_TPHY_SPEED_1_25G);
mt7530_set(priv, MT7531_SGMII_MODE(port),
MT7531_SGMII_REMOTE_FAULT_DIS |
MT7531_SGMII_SPEED_DUPLEX_AN);
mt7530_rmw(priv, MT7531_PCS_SPEED_ABILITY(port),
MT7531_SGMII_TX_CONFIG_MASK, 1);
mt7530_set(priv, MT7531_PCS_CONTROL_1(port), MT7531_SGMII_AN_ENABLE);
mt7530_set(priv, MT7531_PCS_CONTROL_1(port), MT7531_SGMII_AN_RESTART);
mt7530_write(priv, MT7531_QPHY_PWR_STATE_CTRL(port), 0);
return 0;
}
static void mt7531_sgmii_restart_an(struct dsa_switch *ds, int port)
{
struct mt7530_priv *priv = ds->priv;
u32 val;
/* Only restart AN when AN is enabled */
val = mt7530_read(priv, MT7531_PCS_CONTROL_1(port));
if (val & MT7531_SGMII_AN_ENABLE) {
val |= MT7531_SGMII_AN_RESTART;
mt7530_write(priv, MT7531_PCS_CONTROL_1(port), val);
}
}
static int
mt7531_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
phy_interface_t interface)
{
struct mt7530_priv *priv = ds->priv;
struct phy_device *phydev;
struct dsa_port *dp;
if (!mt753x_is_mac_port(port)) {
dev_err(priv->dev, "port %d is not a MAC port\n", port);
return -EINVAL;
}
switch (interface) {
case PHY_INTERFACE_MODE_RGMII:
case PHY_INTERFACE_MODE_RGMII_ID:
case PHY_INTERFACE_MODE_RGMII_RXID:
case PHY_INTERFACE_MODE_RGMII_TXID:
dp = dsa_to_port(ds, port);
phydev = dp->slave->phydev;
return mt7531_rgmii_setup(priv, port, interface, phydev);
case PHY_INTERFACE_MODE_SGMII:
return mt7531_sgmii_setup_mode_an(priv, port, interface);
case PHY_INTERFACE_MODE_NA:
case PHY_INTERFACE_MODE_1000BASEX:
case PHY_INTERFACE_MODE_2500BASEX:
if (phylink_autoneg_inband(mode))
return -EINVAL;
return mt7531_sgmii_setup_mode_force(priv, port, interface);
default:
return -EINVAL;
}
return -EINVAL;
}
static int
mt753x_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
const struct phylink_link_state *state)
{
struct mt7530_priv *priv = ds->priv;
return priv->info->mac_port_config(ds, port, mode, state->interface);
}
static void
mt753x_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
const struct phylink_link_state *state)
{
struct mt7530_priv *priv = ds->priv;
u32 mcr_cur, mcr_new;
if (!mt753x_phy_mode_supported(ds, port, state))
goto unsupported;
switch (port) {
case 0 ... 4: /* Internal phy */
if (state->interface != PHY_INTERFACE_MODE_GMII)
goto unsupported;
break;
case 5: /* 2nd cpu port with phy of port 0 or 4 / external phy */
if (priv->p5_interface == state->interface)
break;
if (mt753x_mac_config(ds, port, mode, state) < 0)
goto unsupported;
if (priv->p5_intf_sel != P5_DISABLED)
priv->p5_interface = state->interface;
break;
case 6: /* 1st cpu port */
if (priv->p6_interface == state->interface)
break;
mt753x_pad_setup(ds, state);
if (mt753x_mac_config(ds, port, mode, state) < 0)
goto unsupported;
priv->p6_interface = state->interface;
break;
default:
unsupported:
dev_err(ds->dev, "%s: unsupported %s port: %i\n",
__func__, phy_modes(state->interface), port);
return;
}
if (phylink_autoneg_inband(mode) &&
state->interface != PHY_INTERFACE_MODE_SGMII) {
dev_err(ds->dev, "%s: in-band negotiation unsupported\n",
__func__);
return;
}
mcr_cur = mt7530_read(priv, MT7530_PMCR_P(port));
mcr_new = mcr_cur;
mcr_new &= ~PMCR_LINK_SETTINGS_MASK;
mcr_new |= PMCR_IFG_XMIT(1) | PMCR_MAC_MODE | PMCR_BACKOFF_EN |
PMCR_BACKPR_EN | PMCR_FORCE_MODE_ID(priv->id);
/* Are we connected to external phy */
if (port == 5 && dsa_is_user_port(ds, 5))
mcr_new |= PMCR_EXT_PHY;
if (mcr_new != mcr_cur)
mt7530_write(priv, MT7530_PMCR_P(port), mcr_new);
}
static void
mt753x_phylink_mac_an_restart(struct dsa_switch *ds, int port)
{
struct mt7530_priv *priv = ds->priv;
if (!priv->info->mac_pcs_an_restart)
return;
priv->info->mac_pcs_an_restart(ds, port);
}
static void mt753x_phylink_mac_link_down(struct dsa_switch *ds, int port,
unsigned int mode,
phy_interface_t interface)
{
struct mt7530_priv *priv = ds->priv;
mt7530_clear(priv, MT7530_PMCR_P(port), PMCR_LINK_SETTINGS_MASK);
}
static void mt753x_mac_pcs_link_up(struct dsa_switch *ds, int port,
unsigned int mode, phy_interface_t interface,
int speed, int duplex)
{
struct mt7530_priv *priv = ds->priv;
if (!priv->info->mac_pcs_link_up)
return;
priv->info->mac_pcs_link_up(ds, port, mode, interface, speed, duplex);
}
static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port,
unsigned int mode,
phy_interface_t interface,
struct phy_device *phydev,
int speed, int duplex,
bool tx_pause, bool rx_pause)
{
struct mt7530_priv *priv = ds->priv;
u32 mcr;
mt753x_mac_pcs_link_up(ds, port, mode, interface, speed, duplex);
mcr = PMCR_RX_EN | PMCR_TX_EN | PMCR_FORCE_LNK;
/* MT753x MAC works in 1G full duplex mode for all up-clocked
* variants.
*/
if (interface == PHY_INTERFACE_MODE_TRGMII ||
(phy_interface_mode_is_8023z(interface))) {
speed = SPEED_1000;
duplex = DUPLEX_FULL;
}
switch (speed) {
case SPEED_1000:
mcr |= PMCR_FORCE_SPEED_1000;
break;
case SPEED_100:
mcr |= PMCR_FORCE_SPEED_100;
break;
}
if (duplex == DUPLEX_FULL) {
mcr |= PMCR_FORCE_FDX;
if (tx_pause)
mcr |= PMCR_TX_FC_EN;
if (rx_pause)
mcr |= PMCR_RX_FC_EN;
}
if (mode == MLO_AN_PHY && phydev && phy_init_eee(phydev, false) >= 0) {
switch (speed) {
case SPEED_1000:
mcr |= PMCR_FORCE_EEE1G;
break;
case SPEED_100:
mcr |= PMCR_FORCE_EEE100;
break;
}
}
mt7530_set(priv, MT7530_PMCR_P(port), mcr);
}
static int
mt7531_cpu_port_config(struct dsa_switch *ds, int port)
{
struct mt7530_priv *priv = ds->priv;
phy_interface_t interface;
int speed;
int ret;
switch (port) {
case 5:
if (mt7531_is_rgmii_port(priv, port))
interface = PHY_INTERFACE_MODE_RGMII;
else
interface = PHY_INTERFACE_MODE_2500BASEX;
priv->p5_interface = interface;
break;
case 6:
interface = PHY_INTERFACE_MODE_2500BASEX;
mt7531_pad_setup(ds, interface);
priv->p6_interface = interface;
break;
default:
return -EINVAL;
}
if (interface == PHY_INTERFACE_MODE_2500BASEX)
speed = SPEED_2500;
else
speed = SPEED_1000;
ret = mt7531_mac_config(ds, port, MLO_AN_FIXED, interface);
if (ret)
return ret;
mt7530_write(priv, MT7530_PMCR_P(port),
PMCR_CPU_PORT_SETTING(priv->id));
mt753x_phylink_mac_link_up(ds, port, MLO_AN_FIXED, interface, NULL,
speed, DUPLEX_FULL, true, true);
return 0;
}
static void
mt7530_mac_port_validate(struct dsa_switch *ds, int port,
unsigned long *supported)
{
if (port == 5)
phylink_set(supported, 1000baseX_Full);
}
static void mt7531_mac_port_validate(struct dsa_switch *ds, int port,
unsigned long *supported)
{
struct mt7530_priv *priv = ds->priv;
mt7531_sgmii_validate(priv, port, supported);
}
static void
mt753x_phylink_validate(struct dsa_switch *ds, int port,
unsigned long *supported,
struct phylink_link_state *state)
{
__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
struct mt7530_priv *priv = ds->priv;
if (state->interface != PHY_INTERFACE_MODE_NA &&
!mt753x_phy_mode_supported(ds, port, state)) {
linkmode_zero(supported);
return;
}
phylink_set_port_modes(mask);
if (state->interface != PHY_INTERFACE_MODE_TRGMII &&
!phy_interface_mode_is_8023z(state->interface)) {
phylink_set(mask, 10baseT_Half);
phylink_set(mask, 10baseT_Full);
phylink_set(mask, 100baseT_Half);
phylink_set(mask, 100baseT_Full);
phylink_set(mask, Autoneg);
}
/* This switch only supports 1G full-duplex. */
if (state->interface != PHY_INTERFACE_MODE_MII)
phylink_set(mask, 1000baseT_Full);
priv->info->mac_port_validate(ds, port, mask);
phylink_set(mask, Pause);
phylink_set(mask, Asym_Pause);
linkmode_and(supported, supported, mask);
linkmode_and(state->advertising, state->advertising, mask);
/* We can only operate at 2500BaseX or 1000BaseX. If requested
* to advertise both, only report advertising at 2500BaseX.
*/
phylink_helper_basex_speed(state);
}
static int
mt7530_phylink_mac_link_state(struct dsa_switch *ds, int port,
struct phylink_link_state *state)
{
struct mt7530_priv *priv = ds->priv;
u32 pmsr;
if (port < 0 || port >= MT7530_NUM_PORTS)
return -EINVAL;
pmsr = mt7530_read(priv, MT7530_PMSR_P(port));
state->link = (pmsr & PMSR_LINK);
state->an_complete = state->link;
state->duplex = !!(pmsr & PMSR_DPX);
switch (pmsr & PMSR_SPEED_MASK) {
case PMSR_SPEED_10:
state->speed = SPEED_10;
break;
case PMSR_SPEED_100:
state->speed = SPEED_100;
break;
case PMSR_SPEED_1000:
state->speed = SPEED_1000;
break;
default:
state->speed = SPEED_UNKNOWN;
break;
}
state->pause &= ~(MLO_PAUSE_RX | MLO_PAUSE_TX);
if (pmsr & PMSR_RX_FC)
state->pause |= MLO_PAUSE_RX;
if (pmsr & PMSR_TX_FC)
state->pause |= MLO_PAUSE_TX;
return 1;
}
static int
mt7531_sgmii_pcs_get_state_an(struct mt7530_priv *priv, int port,
struct phylink_link_state *state)
{
u32 status, val;
u16 config_reg;
status = mt7530_read(priv, MT7531_PCS_CONTROL_1(port));
state->link = !!(status & MT7531_SGMII_LINK_STATUS);
if (state->interface == PHY_INTERFACE_MODE_SGMII &&
(status & MT7531_SGMII_AN_ENABLE)) {
val = mt7530_read(priv, MT7531_PCS_SPEED_ABILITY(port));
config_reg = val >> 16;
switch (config_reg & LPA_SGMII_SPD_MASK) {
case LPA_SGMII_1000:
state->speed = SPEED_1000;
break;
case LPA_SGMII_100:
state->speed = SPEED_100;
break;
case LPA_SGMII_10:
state->speed = SPEED_10;
break;
default:
dev_err(priv->dev, "invalid sgmii PHY speed\n");
state->link = false;
return -EINVAL;
}
if (config_reg & LPA_SGMII_FULL_DUPLEX)
state->duplex = DUPLEX_FULL;
else
state->duplex = DUPLEX_HALF;
}
return 0;
}
static int
mt7531_phylink_mac_link_state(struct dsa_switch *ds, int port,
struct phylink_link_state *state)
{
struct mt7530_priv *priv = ds->priv;
if (state->interface == PHY_INTERFACE_MODE_SGMII)
return mt7531_sgmii_pcs_get_state_an(priv, port, state);
return -EOPNOTSUPP;
}
static int
mt753x_phylink_mac_link_state(struct dsa_switch *ds, int port,
struct phylink_link_state *state)
{
struct mt7530_priv *priv = ds->priv;
return priv->info->mac_port_get_state(ds, port, state);
}
static int
mt753x_setup(struct dsa_switch *ds)
{
struct mt7530_priv *priv = ds->priv;
int ret = priv->info->sw_setup(ds);
if (ret)
return ret;
ret = mt7530_setup_irq(priv);
if (ret)
return ret;
ret = mt7530_setup_mdio(priv);
if (ret && priv->irq)
mt7530_free_irq_common(priv);
return ret;
}
static int mt753x_get_mac_eee(struct dsa_switch *ds, int port,
struct ethtool_eee *e)
{
struct mt7530_priv *priv = ds->priv;
u32 eeecr = mt7530_read(priv, MT7530_PMEEECR_P(port));
e->tx_lpi_enabled = !(eeecr & LPI_MODE_EN);
e->tx_lpi_timer = GET_LPI_THRESH(eeecr);
return 0;
}
static int mt753x_set_mac_eee(struct dsa_switch *ds, int port,
struct ethtool_eee *e)
{
struct mt7530_priv *priv = ds->priv;
u32 set, mask = LPI_THRESH_MASK | LPI_MODE_EN;
if (e->tx_lpi_timer > 0xFFF)
return -EINVAL;
set = SET_LPI_THRESH(e->tx_lpi_timer);
if (!e->tx_lpi_enabled)
/* Force LPI Mode without a delay */
set |= LPI_MODE_EN;
mt7530_rmw(priv, MT7530_PMEEECR_P(port), mask, set);
return 0;
}
static const struct dsa_switch_ops mt7530_switch_ops = {
.get_tag_protocol = mtk_get_tag_protocol,
.setup = mt753x_setup,
.get_strings = mt7530_get_strings,
.get_ethtool_stats = mt7530_get_ethtool_stats,
.get_sset_count = mt7530_get_sset_count,
.set_ageing_time = mt7530_set_ageing_time,
.port_enable = mt7530_port_enable,
.port_disable = mt7530_port_disable,
.port_change_mtu = mt7530_port_change_mtu,
.port_max_mtu = mt7530_port_max_mtu,
.port_stp_state_set = mt7530_stp_state_set,
.port_pre_bridge_flags = mt7530_port_pre_bridge_flags,
.port_bridge_flags = mt7530_port_bridge_flags,
.port_bridge_join = mt7530_port_bridge_join,
.port_bridge_leave = mt7530_port_bridge_leave,
.port_fdb_add = mt7530_port_fdb_add,
.port_fdb_del = mt7530_port_fdb_del,
.port_fdb_dump = mt7530_port_fdb_dump,
.port_mdb_add = mt7530_port_mdb_add,
.port_mdb_del = mt7530_port_mdb_del,
.port_vlan_filtering = mt7530_port_vlan_filtering,
.port_vlan_add = mt7530_port_vlan_add,
.port_vlan_del = mt7530_port_vlan_del,
.port_mirror_add = mt753x_port_mirror_add,
.port_mirror_del = mt753x_port_mirror_del,
.phylink_validate = mt753x_phylink_validate,
.phylink_mac_link_state = mt753x_phylink_mac_link_state,
.phylink_mac_config = mt753x_phylink_mac_config,
.phylink_mac_an_restart = mt753x_phylink_mac_an_restart,
.phylink_mac_link_down = mt753x_phylink_mac_link_down,
.phylink_mac_link_up = mt753x_phylink_mac_link_up,
.get_mac_eee = mt753x_get_mac_eee,
.set_mac_eee = mt753x_set_mac_eee,
};
static const struct mt753x_info mt753x_table[] = {
[ID_MT7621] = {
.id = ID_MT7621,
.sw_setup = mt7530_setup,
.phy_read = mt7530_phy_read,
.phy_write = mt7530_phy_write,
.pad_setup = mt7530_pad_clk_setup,
.phy_mode_supported = mt7530_phy_mode_supported,
.mac_port_validate = mt7530_mac_port_validate,
.mac_port_get_state = mt7530_phylink_mac_link_state,
.mac_port_config = mt7530_mac_config,
},
[ID_MT7530] = {
.id = ID_MT7530,
.sw_setup = mt7530_setup,
.phy_read = mt7530_phy_read,
.phy_write = mt7530_phy_write,
.pad_setup = mt7530_pad_clk_setup,
.phy_mode_supported = mt7530_phy_mode_supported,
.mac_port_validate = mt7530_mac_port_validate,
.mac_port_get_state = mt7530_phylink_mac_link_state,
.mac_port_config = mt7530_mac_config,
},
[ID_MT7531] = {
.id = ID_MT7531,
.sw_setup = mt7531_setup,
.phy_read = mt7531_ind_phy_read,
.phy_write = mt7531_ind_phy_write,
.pad_setup = mt7531_pad_setup,
.cpu_port_config = mt7531_cpu_port_config,
.phy_mode_supported = mt7531_phy_mode_supported,
.mac_port_validate = mt7531_mac_port_validate,
.mac_port_get_state = mt7531_phylink_mac_link_state,
.mac_port_config = mt7531_mac_config,
.mac_pcs_an_restart = mt7531_sgmii_restart_an,
.mac_pcs_link_up = mt7531_sgmii_link_up_force,
},
};
static const struct of_device_id mt7530_of_match[] = {
{ .compatible = "mediatek,mt7621", .data = &mt753x_table[ID_MT7621], },
{ .compatible = "mediatek,mt7530", .data = &mt753x_table[ID_MT7530], },
{ .compatible = "mediatek,mt7531", .data = &mt753x_table[ID_MT7531], },
{ /* sentinel */ },
};
MODULE_DEVICE_TABLE(of, mt7530_of_match);
static int
mt7530_probe(struct mdio_device *mdiodev)
{
struct mt7530_priv *priv;
struct device_node *dn;
dn = mdiodev->dev.of_node;
priv = devm_kzalloc(&mdiodev->dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
priv->ds = devm_kzalloc(&mdiodev->dev, sizeof(*priv->ds), GFP_KERNEL);
if (!priv->ds)
return -ENOMEM;
priv->ds->dev = &mdiodev->dev;
priv->ds->num_ports = MT7530_NUM_PORTS;
/* Use medatek,mcm property to distinguish hardware type that would
* casues a little bit differences on power-on sequence.
*/
priv->mcm = of_property_read_bool(dn, "mediatek,mcm");
if (priv->mcm) {
dev_info(&mdiodev->dev, "MT7530 adapts as multi-chip module\n");
priv->rstc = devm_reset_control_get(&mdiodev->dev, "mcm");
if (IS_ERR(priv->rstc)) {
dev_err(&mdiodev->dev, "Couldn't get our reset line\n");
return PTR_ERR(priv->rstc);
}
}
/* Get the hardware identifier from the devicetree node.
* We will need it for some of the clock and regulator setup.
*/
priv->info = of_device_get_match_data(&mdiodev->dev);
if (!priv->info)
return -EINVAL;
/* Sanity check if these required device operations are filled
* properly.
*/
if (!priv->info->sw_setup || !priv->info->pad_setup ||
!priv->info->phy_read || !priv->info->phy_write ||
!priv->info->phy_mode_supported ||
!priv->info->mac_port_validate ||
!priv->info->mac_port_get_state || !priv->info->mac_port_config)
return -EINVAL;
priv->id = priv->info->id;
if (priv->id == ID_MT7530) {
priv->core_pwr = devm_regulator_get(&mdiodev->dev, "core");
if (IS_ERR(priv->core_pwr))
return PTR_ERR(priv->core_pwr);
priv->io_pwr = devm_regulator_get(&mdiodev->dev, "io");
if (IS_ERR(priv->io_pwr))
return PTR_ERR(priv->io_pwr);
}
/* Not MCM that indicates switch works as the remote standalone
* integrated circuit so the GPIO pin would be used to complete
* the reset, otherwise memory-mapped register accessing used
* through syscon provides in the case of MCM.
*/
if (!priv->mcm) {
priv->reset = devm_gpiod_get_optional(&mdiodev->dev, "reset",
GPIOD_OUT_LOW);
if (IS_ERR(priv->reset)) {
dev_err(&mdiodev->dev, "Couldn't get our reset line\n");
return PTR_ERR(priv->reset);
}
}
priv->bus = mdiodev->bus;
priv->dev = &mdiodev->dev;
priv->ds->priv = priv;
priv->ds->ops = &mt7530_switch_ops;
mutex_init(&priv->reg_mutex);
dev_set_drvdata(&mdiodev->dev, priv);
return dsa_register_switch(priv->ds);
}
static void
mt7530_remove(struct mdio_device *mdiodev)
{
struct mt7530_priv *priv = dev_get_drvdata(&mdiodev->dev);
int ret = 0;
net: dsa: be compatible with masters which unregister on shutdown Lino reports that on his system with bcmgenet as DSA master and KSZ9897 as a switch, rebooting or shutting down never works properly. What does the bcmgenet driver have special to trigger this, that other DSA masters do not? It has an implementation of ->shutdown which simply calls its ->remove implementation. Otherwise said, it unregisters its network interface on shutdown. This message can be seen in a loop, and it hangs the reboot process there: unregister_netdevice: waiting for eth0 to become free. Usage count = 3 So why 3? A usage count of 1 is normal for a registered network interface, and any virtual interface which links itself as an upper of that will increment it via dev_hold. In the case of DSA, this is the call path: dsa_slave_create -> netdev_upper_dev_link -> __netdev_upper_dev_link -> __netdev_adjacent_dev_insert -> dev_hold So a DSA switch with 3 interfaces will result in a usage count elevated by two, and netdev_wait_allrefs will wait until they have gone away. Other stacked interfaces, like VLAN, watch NETDEV_UNREGISTER events and delete themselves, but DSA cannot just vanish and go poof, at most it can unbind itself from the switch devices, but that must happen strictly earlier compared to when the DSA master unregisters its net_device, so reacting on the NETDEV_UNREGISTER event is way too late. It seems that it is a pretty established pattern to have a driver's ->shutdown hook redirect to its ->remove hook, so the same code is executed regardless of whether the driver is unbound from the device, or the system is just shutting down. As Florian puts it, it is quite a big hammer for bcmgenet to unregister its net_device during shutdown, but having a common code path with the driver unbind helps ensure it is well tested. So DSA, for better or for worse, has to live with that and engage in an arms race of implementing the ->shutdown hook too, from all individual drivers, and do something sane when paired with masters that unregister their net_device there. The only sane thing to do, of course, is to unlink from the master. However, complications arise really quickly. The pattern of redirecting ->shutdown to ->remove is not unique to bcmgenet or even to net_device drivers. In fact, SPI controllers do it too (see dspi_shutdown -> dspi_remove), and presumably, I2C controllers and MDIO controllers do it too (this is something I have not researched too deeply, but even if this is not the case today, it is certainly plausible to happen in the future, and must be taken into consideration). Since DSA switches might be SPI devices, I2C devices, MDIO devices, the insane implication is that for the exact same DSA switch device, we might have both ->shutdown and ->remove getting called. So we need to do something with that insane environment. The pattern I've come up with is "if this, then not that", so if either ->shutdown or ->remove gets called, we set the device's drvdata to NULL, and in the other hook, we check whether the drvdata is NULL and just do nothing. This is probably not necessary for platform devices, just for devices on buses, but I would really insist for consistency among drivers, because when code is copy-pasted, it is not always copy-pasted from the best sources. So depending on whether the DSA switch's ->remove or ->shutdown will get called first, we cannot really guarantee even for the same driver if rebooting will result in the same code path on all platforms. But nonetheless, we need to do something minimally reasonable on ->shutdown too to fix the bug. Of course, the ->remove will do more (a full teardown of the tree, with all data structures freed, and this is why the bug was not caught for so long). The new ->shutdown method is kept separate from dsa_unregister_switch not because we couldn't have unregistered the switch, but simply in the interest of doing something quick and to the point. The big question is: does the DSA switch's ->shutdown get called earlier than the DSA master's ->shutdown? If not, there is still a risk that we might still trigger the WARN_ON in unregister_netdevice that says we are attempting to unregister a net_device which has uppers. That's no good. Although the reference to the master net_device won't physically go away even if DSA's ->shutdown comes afterwards, remember we have a dev_hold on it. The answer to that question lies in this comment above device_link_add: * A side effect of the link creation is re-ordering of dpm_list and the * devices_kset list by moving the consumer device and all devices depending * on it to the ends of these lists (that does not happen to devices that have * not been registered when this function is called). so the fact that DSA uses device_link_add towards its master is not exactly for nothing. device_shutdown() walks devices_kset from the back, so this is our guarantee that DSA's shutdown happens before the master's shutdown. Fixes: 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA master to get rid of lockdep warnings") Link: https://lore.kernel.org/netdev/20210909095324.12978-1-LinoSanfilippo@gmx.de/ Reported-by: Lino Sanfilippo <LinoSanfilippo@gmx.de> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Tested-by: Andrew Lunn <andrew@lunn.ch> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-09-17 13:34:33 +00:00
if (!priv)
return;
ret = regulator_disable(priv->core_pwr);
if (ret < 0)
dev_err(priv->dev,
"Failed to disable core power: %d\n", ret);
ret = regulator_disable(priv->io_pwr);
if (ret < 0)
dev_err(priv->dev, "Failed to disable io pwr: %d\n",
ret);
if (priv->irq)
mt7530_free_irq(priv);
dsa_unregister_switch(priv->ds);
mutex_destroy(&priv->reg_mutex);
net: dsa: be compatible with masters which unregister on shutdown Lino reports that on his system with bcmgenet as DSA master and KSZ9897 as a switch, rebooting or shutting down never works properly. What does the bcmgenet driver have special to trigger this, that other DSA masters do not? It has an implementation of ->shutdown which simply calls its ->remove implementation. Otherwise said, it unregisters its network interface on shutdown. This message can be seen in a loop, and it hangs the reboot process there: unregister_netdevice: waiting for eth0 to become free. Usage count = 3 So why 3? A usage count of 1 is normal for a registered network interface, and any virtual interface which links itself as an upper of that will increment it via dev_hold. In the case of DSA, this is the call path: dsa_slave_create -> netdev_upper_dev_link -> __netdev_upper_dev_link -> __netdev_adjacent_dev_insert -> dev_hold So a DSA switch with 3 interfaces will result in a usage count elevated by two, and netdev_wait_allrefs will wait until they have gone away. Other stacked interfaces, like VLAN, watch NETDEV_UNREGISTER events and delete themselves, but DSA cannot just vanish and go poof, at most it can unbind itself from the switch devices, but that must happen strictly earlier compared to when the DSA master unregisters its net_device, so reacting on the NETDEV_UNREGISTER event is way too late. It seems that it is a pretty established pattern to have a driver's ->shutdown hook redirect to its ->remove hook, so the same code is executed regardless of whether the driver is unbound from the device, or the system is just shutting down. As Florian puts it, it is quite a big hammer for bcmgenet to unregister its net_device during shutdown, but having a common code path with the driver unbind helps ensure it is well tested. So DSA, for better or for worse, has to live with that and engage in an arms race of implementing the ->shutdown hook too, from all individual drivers, and do something sane when paired with masters that unregister their net_device there. The only sane thing to do, of course, is to unlink from the master. However, complications arise really quickly. The pattern of redirecting ->shutdown to ->remove is not unique to bcmgenet or even to net_device drivers. In fact, SPI controllers do it too (see dspi_shutdown -> dspi_remove), and presumably, I2C controllers and MDIO controllers do it too (this is something I have not researched too deeply, but even if this is not the case today, it is certainly plausible to happen in the future, and must be taken into consideration). Since DSA switches might be SPI devices, I2C devices, MDIO devices, the insane implication is that for the exact same DSA switch device, we might have both ->shutdown and ->remove getting called. So we need to do something with that insane environment. The pattern I've come up with is "if this, then not that", so if either ->shutdown or ->remove gets called, we set the device's drvdata to NULL, and in the other hook, we check whether the drvdata is NULL and just do nothing. This is probably not necessary for platform devices, just for devices on buses, but I would really insist for consistency among drivers, because when code is copy-pasted, it is not always copy-pasted from the best sources. So depending on whether the DSA switch's ->remove or ->shutdown will get called first, we cannot really guarantee even for the same driver if rebooting will result in the same code path on all platforms. But nonetheless, we need to do something minimally reasonable on ->shutdown too to fix the bug. Of course, the ->remove will do more (a full teardown of the tree, with all data structures freed, and this is why the bug was not caught for so long). The new ->shutdown method is kept separate from dsa_unregister_switch not because we couldn't have unregistered the switch, but simply in the interest of doing something quick and to the point. The big question is: does the DSA switch's ->shutdown get called earlier than the DSA master's ->shutdown? If not, there is still a risk that we might still trigger the WARN_ON in unregister_netdevice that says we are attempting to unregister a net_device which has uppers. That's no good. Although the reference to the master net_device won't physically go away even if DSA's ->shutdown comes afterwards, remember we have a dev_hold on it. The answer to that question lies in this comment above device_link_add: * A side effect of the link creation is re-ordering of dpm_list and the * devices_kset list by moving the consumer device and all devices depending * on it to the ends of these lists (that does not happen to devices that have * not been registered when this function is called). so the fact that DSA uses device_link_add towards its master is not exactly for nothing. device_shutdown() walks devices_kset from the back, so this is our guarantee that DSA's shutdown happens before the master's shutdown. Fixes: 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA master to get rid of lockdep warnings") Link: https://lore.kernel.org/netdev/20210909095324.12978-1-LinoSanfilippo@gmx.de/ Reported-by: Lino Sanfilippo <LinoSanfilippo@gmx.de> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Tested-by: Andrew Lunn <andrew@lunn.ch> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-09-17 13:34:33 +00:00
dev_set_drvdata(&mdiodev->dev, NULL);
}
static void mt7530_shutdown(struct mdio_device *mdiodev)
{
struct mt7530_priv *priv = dev_get_drvdata(&mdiodev->dev);
if (!priv)
return;
dsa_switch_shutdown(priv->ds);
dev_set_drvdata(&mdiodev->dev, NULL);
}
static struct mdio_driver mt7530_mdio_driver = {
.probe = mt7530_probe,
.remove = mt7530_remove,
net: dsa: be compatible with masters which unregister on shutdown Lino reports that on his system with bcmgenet as DSA master and KSZ9897 as a switch, rebooting or shutting down never works properly. What does the bcmgenet driver have special to trigger this, that other DSA masters do not? It has an implementation of ->shutdown which simply calls its ->remove implementation. Otherwise said, it unregisters its network interface on shutdown. This message can be seen in a loop, and it hangs the reboot process there: unregister_netdevice: waiting for eth0 to become free. Usage count = 3 So why 3? A usage count of 1 is normal for a registered network interface, and any virtual interface which links itself as an upper of that will increment it via dev_hold. In the case of DSA, this is the call path: dsa_slave_create -> netdev_upper_dev_link -> __netdev_upper_dev_link -> __netdev_adjacent_dev_insert -> dev_hold So a DSA switch with 3 interfaces will result in a usage count elevated by two, and netdev_wait_allrefs will wait until they have gone away. Other stacked interfaces, like VLAN, watch NETDEV_UNREGISTER events and delete themselves, but DSA cannot just vanish and go poof, at most it can unbind itself from the switch devices, but that must happen strictly earlier compared to when the DSA master unregisters its net_device, so reacting on the NETDEV_UNREGISTER event is way too late. It seems that it is a pretty established pattern to have a driver's ->shutdown hook redirect to its ->remove hook, so the same code is executed regardless of whether the driver is unbound from the device, or the system is just shutting down. As Florian puts it, it is quite a big hammer for bcmgenet to unregister its net_device during shutdown, but having a common code path with the driver unbind helps ensure it is well tested. So DSA, for better or for worse, has to live with that and engage in an arms race of implementing the ->shutdown hook too, from all individual drivers, and do something sane when paired with masters that unregister their net_device there. The only sane thing to do, of course, is to unlink from the master. However, complications arise really quickly. The pattern of redirecting ->shutdown to ->remove is not unique to bcmgenet or even to net_device drivers. In fact, SPI controllers do it too (see dspi_shutdown -> dspi_remove), and presumably, I2C controllers and MDIO controllers do it too (this is something I have not researched too deeply, but even if this is not the case today, it is certainly plausible to happen in the future, and must be taken into consideration). Since DSA switches might be SPI devices, I2C devices, MDIO devices, the insane implication is that for the exact same DSA switch device, we might have both ->shutdown and ->remove getting called. So we need to do something with that insane environment. The pattern I've come up with is "if this, then not that", so if either ->shutdown or ->remove gets called, we set the device's drvdata to NULL, and in the other hook, we check whether the drvdata is NULL and just do nothing. This is probably not necessary for platform devices, just for devices on buses, but I would really insist for consistency among drivers, because when code is copy-pasted, it is not always copy-pasted from the best sources. So depending on whether the DSA switch's ->remove or ->shutdown will get called first, we cannot really guarantee even for the same driver if rebooting will result in the same code path on all platforms. But nonetheless, we need to do something minimally reasonable on ->shutdown too to fix the bug. Of course, the ->remove will do more (a full teardown of the tree, with all data structures freed, and this is why the bug was not caught for so long). The new ->shutdown method is kept separate from dsa_unregister_switch not because we couldn't have unregistered the switch, but simply in the interest of doing something quick and to the point. The big question is: does the DSA switch's ->shutdown get called earlier than the DSA master's ->shutdown? If not, there is still a risk that we might still trigger the WARN_ON in unregister_netdevice that says we are attempting to unregister a net_device which has uppers. That's no good. Although the reference to the master net_device won't physically go away even if DSA's ->shutdown comes afterwards, remember we have a dev_hold on it. The answer to that question lies in this comment above device_link_add: * A side effect of the link creation is re-ordering of dpm_list and the * devices_kset list by moving the consumer device and all devices depending * on it to the ends of these lists (that does not happen to devices that have * not been registered when this function is called). so the fact that DSA uses device_link_add towards its master is not exactly for nothing. device_shutdown() walks devices_kset from the back, so this is our guarantee that DSA's shutdown happens before the master's shutdown. Fixes: 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA master to get rid of lockdep warnings") Link: https://lore.kernel.org/netdev/20210909095324.12978-1-LinoSanfilippo@gmx.de/ Reported-by: Lino Sanfilippo <LinoSanfilippo@gmx.de> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Tested-by: Andrew Lunn <andrew@lunn.ch> Signed-off-by: David S. Miller <davem@davemloft.net>
2021-09-17 13:34:33 +00:00
.shutdown = mt7530_shutdown,
.mdiodrv.driver = {
.name = "mt7530",
.of_match_table = mt7530_of_match,
},
};
mdio_module_driver(mt7530_mdio_driver);
MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
MODULE_DESCRIPTION("Driver for Mediatek MT7530 Switch");
MODULE_LICENSE("GPL");