From 4c3eb3ca13966508bcb64f39dcdef48be22f1731 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 26 Aug 2010 17:19:22 +0300 Subject: [PATCH] IB/mlx4: Add VLAN support for IBoE This patch allows IBoE traffic to be encapsulated in 802.1Q tagged VLAN frames. The VLAN tag is encoded in the GID and derived from it by a simple computation. The netdev notifier callback is modified to catch VLAN device addition/removal and the port's GID table is updated to reflect the change, so that for each netdevice there is an entry in the GID table. When the port's GID table is exhausted, GID entries will not be added. Only children of the main interfaces can add to the GID table; if a VLAN interface is added on another VLAN interface (e.g. "vconfig add eth2.6 8"), then that interfaces will not add an entry to the GID table. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/ah.c | 10 ++++ drivers/infiniband/hw/mlx4/main.c | 99 +++++++++++++++++++++++++++---- drivers/infiniband/hw/mlx4/qp.c | 79 +++++++++++++++++++----- drivers/net/mlx4/en_netdev.c | 10 ++++ drivers/net/mlx4/mlx4_en.h | 1 + drivers/net/mlx4/port.c | 19 ++++++ include/linux/mlx4/device.h | 1 + include/linux/mlx4/qp.h | 2 +- 8 files changed, 193 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 3bf3544c0aa0..4b8f9c49397e 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -31,6 +31,7 @@ */ #include +#include #include #include @@ -91,17 +92,26 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr { struct mlx4_ib_dev *ibdev = to_mdev(pd->device); struct mlx4_dev *dev = ibdev->dev; + union ib_gid sgid; u8 mac[6]; int err; int is_mcast; + u16 vlan_tag; err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num); if (err) return ERR_PTR(err); memcpy(ah->av.eth.mac, mac, 6); + err = ib_get_cached_gid(pd->device, ah_attr->port_num, ah_attr->grh.sgid_index, &sgid); + if (err) + return ERR_PTR(err); + vlan_tag = rdma_get_vlan_id(&sgid); + if (vlan_tag < 0x1000) + vlan_tag |= (ah_attr->sl & 7) << 13; ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); ah->av.eth.gid_index = ah_attr->grh.sgid_index; + ah->av.eth.vlan = cpu_to_be16(vlan_tag); if (ah_attr->static_rate) { ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index e65db73fc277..8736bd836dc0 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -79,6 +80,8 @@ static void init_query_mad(struct ib_smp *mad) mad->method = IB_MGMT_METHOD_GET; } +static union ib_gid zgid; + static int mlx4_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { @@ -755,12 +758,17 @@ static struct device_attribute *mlx4_class_attributes[] = { &dev_attr_board_id }; -static void mlx4_addrconf_ifid_eui48(u8 *eui, struct net_device *dev) +static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev) { memcpy(eui, dev->dev_addr, 3); memcpy(eui + 5, dev->dev_addr + 3, 3); - eui[3] = 0xFF; - eui[4] = 0xFE; + if (vlan_id < 0x1000) { + eui[3] = vlan_id >> 8; + eui[4] = vlan_id & 0xff; + } else { + eui[3] = 0xff; + eui[4] = 0xfe; + } eui[0] ^= 2; } @@ -802,28 +810,93 @@ static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear) { struct net_device *ndev = dev->iboe.netdevs[port - 1]; struct update_gid_work *work; + struct net_device *tmp; + int i; + u8 *hits; + int ret; + union ib_gid gid; + int free; + int found; + int need_update = 0; + u16 vid; work = kzalloc(sizeof *work, GFP_ATOMIC); if (!work) return -ENOMEM; - if (!clear) { - mlx4_addrconf_ifid_eui48(&work->gids[0].raw[8], ndev); - work->gids[0].global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + hits = kzalloc(128, GFP_ATOMIC); + if (!hits) { + ret = -ENOMEM; + goto out; } - INIT_WORK(&work->work, update_gids_task); - work->port = port; - work->dev = dev; - queue_work(wq, &work->work); + read_lock(&dev_base_lock); + for_each_netdev(&init_net, tmp) { + if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) { + gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + vid = rdma_vlan_dev_vlan_id(tmp); + mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev); + found = 0; + free = -1; + for (i = 0; i < 128; ++i) { + if (free < 0 && + !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid)) + free = i; + if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) { + hits[i] = 1; + found = 1; + break; + } + } + if (!found) { + if (tmp == ndev && + (memcmp(&dev->iboe.gid_table[port - 1][0], + &gid, sizeof gid) || + !memcmp(&dev->iboe.gid_table[port - 1][0], + &zgid, sizeof gid))) { + dev->iboe.gid_table[port - 1][0] = gid; + ++need_update; + hits[0] = 1; + } else if (free >= 0) { + dev->iboe.gid_table[port - 1][free] = gid; + hits[free] = 1; + ++need_update; + } + } + } + } + read_unlock(&dev_base_lock); + + for (i = 0; i < 128; ++i) + if (!hits[i]) { + if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid)) + ++need_update; + dev->iboe.gid_table[port - 1][i] = zgid; + } + + if (need_update) { + memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids); + INIT_WORK(&work->work, update_gids_task); + work->port = port; + work->dev = dev; + queue_work(wq, &work->work); + } else + kfree(work); + + kfree(hits); return 0; + +out: + kfree(work); + return ret; } static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event) { switch (event) { case NETDEV_UP: + case NETDEV_CHANGEADDR: update_ipv6_gids(dev, port, 0); break; @@ -871,9 +944,11 @@ static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event } } - if (dev == iboe->netdevs[0]) + if (dev == iboe->netdevs[0] || + (iboe->netdevs[0] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[0])) handle_en_event(ibdev, 1, event); - else if (dev == iboe->netdevs[1]) + else if (dev == iboe->netdevs[1] + || (iboe->netdevs[1] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[1])) handle_en_event(ibdev, 2, event); spin_unlock(&iboe->lock); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 269648445113..9a7794ac34c1 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -37,6 +37,7 @@ #include #include +#include #include @@ -57,10 +58,11 @@ enum { enum { /* * Largest possible UD header: send with GRH and immediate - * data plus 14 bytes for an Ethernet header. (LRH would only - * use 8 bytes, so Ethernet is the biggest case) + * data plus 18 bytes for an Ethernet header with VLAN/802.1Q + * tag. (LRH would only use 8 bytes, so Ethernet is the + * biggest case) */ - MLX4_IB_UD_HEADER_SIZE = 78, + MLX4_IB_UD_HEADER_SIZE = 82, MLX4_IB_LSO_HEADER_SPARE = 128, }; @@ -879,6 +881,8 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, IB_LINK_LAYER_ETHERNET; u8 mac[6]; int is_mcast; + u16 vlan_tag; + int vidx; path->grh_mylmc = ah->src_path_bits & 0x7f; path->rlid = cpu_to_be16(ah->dlid); @@ -907,10 +911,10 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, memcpy(path->rgid, ah->grh.dgid.raw, 16); } - path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | - ((port - 1) << 6) | ((ah->sl & 0xf) << 2); - if (is_eth) { + path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | + ((port - 1) << 6) | ((ah->sl & 7) << 3) | ((ah->sl & 8) >> 1); + if (!(ah->ah_flags & IB_AH_GRH)) return -1; @@ -922,7 +926,18 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, path->ackto = MLX4_IB_LINK_TYPE_ETH; /* use index 0 into MAC table for IBoE */ path->grh_mylmc &= 0x80; - } + + vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]); + if (vlan_tag < 0x1000) { + if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx)) + return -ENOENT; + + path->vlan_index = vidx; + path->fl = 1 << 6; + } + } else + path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | + ((port - 1) << 6) | ((ah->sl & 0xf) << 2); return 0; } @@ -1277,13 +1292,16 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); + union ib_gid sgid; u16 pkey; int send_size; int header_size; int spc; int i; int is_eth; + int is_vlan = 0; int is_grh; + u16 vlan; send_size = 0; for (i = 0; i < wr->num_sge; ++i) @@ -1291,7 +1309,13 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; is_grh = mlx4_ib_ah_grh_present(ah); - ib_ud_header_init(send_size, !is_eth, is_eth, 0, is_grh, 0, &sqp->ud_header); + if (is_eth) { + ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, &sgid); + vlan = rdma_get_vlan_id(&sgid); + is_vlan = vlan < 0x1000; + } + ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header); if (!is_eth) { sqp->ud_header.lrh.service_level = @@ -1345,7 +1369,15 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, memcpy(sqp->ud_header.eth.smac_h, smac, 6); if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); - sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); + if (!is_vlan) { + sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); + } else { + u16 pcp; + + sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); + pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 27 & 3) << 13; + sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); + } } else { sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) @@ -1507,13 +1539,14 @@ static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, } static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, - struct ib_send_wr *wr) + struct ib_send_wr *wr, __be16 *vlan) { memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan; memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); + *vlan = dseg->vlan; } static void set_mlx_icrc_seg(void *dseg) @@ -1616,6 +1649,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, __be32 uninitialized_var(lso_hdr_sz); __be32 blh; int i; + __be16 vlan = cpu_to_be16(0xffff); spin_lock_irqsave(&qp->sq.lock, flags); @@ -1719,7 +1753,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_QPT_UD: - set_datagram_seg(wqe, wr); + set_datagram_seg(wqe, wr, &vlan); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; @@ -1797,6 +1831,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh; + if (be16_to_cpu(vlan) < 0x1000) { + ctrl->ins_vlan = 1 << 6; + ctrl->vlan_tag = vlan; + } + stamp = ind + qp->sq_spare_wqes; ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); @@ -1946,17 +1985,27 @@ static int to_ib_qp_access_flags(int mlx4_flags) return ib_flags; } -static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr, +static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr, struct mlx4_qp_path *path) { + struct mlx4_dev *dev = ibdev->dev; + int is_eth; + memset(ib_ah_attr, 0, sizeof *ib_ah_attr); ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1; if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) return; + is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) == + IB_LINK_LAYER_ETHERNET; + if (is_eth) + ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) | + ((path->sched_queue & 4) << 1); + else + ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf; + ib_ah_attr->dlid = be16_to_cpu(path->rlid); - ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf; ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f; ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0; ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0; @@ -2009,8 +2058,8 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr to_ib_qp_access_flags(be32_to_cpu(context.params2)); if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { - to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path); - to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path); + to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path); + to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path); qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; } diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c index a0d8a26f5a02..9a87c4f3bbbd 100644 --- a/drivers/net/mlx4/en_netdev.c +++ b/drivers/net/mlx4/en_netdev.c @@ -69,6 +69,7 @@ static void mlx4_en_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err; + int idx; if (!priv->vlgrp) return; @@ -83,7 +84,10 @@ static void mlx4_en_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) if (err) en_err(priv, "Failed configuring VLAN filter\n"); } + if (mlx4_register_vlan(mdev->dev, priv->port, vid, &idx)) + en_err(priv, "failed adding vlan %d\n", vid); mutex_unlock(&mdev->state_lock); + } static void mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) @@ -91,6 +95,7 @@ static void mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err; + int idx; if (!priv->vlgrp) return; @@ -101,6 +106,11 @@ static void mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) /* Remove VID from port VLAN filter */ mutex_lock(&mdev->state_lock); + if (!mlx4_find_cached_vlan(mdev->dev, priv->port, vid, &idx)) + mlx4_unregister_vlan(mdev->dev, priv->port, idx); + else + en_err(priv, "could not find vid %d in cache\n", vid); + if (mdev->device_up && priv->port_up) { err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, priv->vlgrp); if (err) diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h index 449210994ee9..dab5eafb8946 100644 --- a/drivers/net/mlx4/mlx4_en.h +++ b/drivers/net/mlx4/mlx4_en.h @@ -463,6 +463,7 @@ struct mlx4_en_priv { char *mc_addrs; int mc_addrs_cnt; struct mlx4_en_stat_out_mbox hw_stats; + int vids[128]; }; diff --git a/drivers/net/mlx4/port.c b/drivers/net/mlx4/port.c index 606aa58afdea..56371ef328ef 100644 --- a/drivers/net/mlx4/port.c +++ b/drivers/net/mlx4/port.c @@ -182,6 +182,25 @@ static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port, return err; } +int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx) +{ + struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; + int i; + + for (i = 0; i < MLX4_MAX_VLAN_NUM; ++i) { + if (table->refs[i] && + (vid == (MLX4_VLAN_MASK & + be32_to_cpu(table->entries[i])))) { + /* VLAN already registered, increase reference count */ + *idx = i; + return 0; + } + } + + return -ENOENT; +} +EXPORT_SYMBOL_GPL(mlx4_find_cached_vlan); + int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index) { struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ca5645c43f61..ff9893a33e90 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -496,6 +496,7 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index); void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index); +int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index); diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 97cfdc8d7e2f..0eeb2a1a867c 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -109,7 +109,7 @@ struct mlx4_qp_path { __be32 tclass_flowlabel; u8 rgid[16]; u8 sched_queue; - u8 snooper_flags; + u8 vlan_index; u8 reserved3[2]; u8 counter_index; u8 reserved4;