diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 09eb1d19e72d..77c8cb4c5073 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -748,8 +748,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) break; } - if (npolled) - mlx4_cq_set_ci(&cq->mcq); + mlx4_cq_set_ci(&cq->mcq); spin_unlock_irqrestore(&cq->lock, flags); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index abce99ed565f..75d305629300 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -163,7 +163,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; - props->max_map_per_fmr = (1 << (32 - ilog2(dev->dev->caps.num_mpts))) - 1; + props->max_map_per_fmr = dev->dev->caps.max_fmr_maps; out: kfree(in_mad); @@ -182,12 +182,27 @@ mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num) } static int ib_link_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props, - struct ib_smp *in_mad, - struct ib_smp *out_mad) + struct ib_port_attr *props) { + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; int ext_active_speed; - int err; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, + in_mad, out_mad); + if (err) + goto out; + props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); props->lmc = out_mad->data[34] & 0x7; @@ -238,8 +253,10 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port, if (out_mad->data[15] & 0x1) props->active_speed = IB_SPEED_FDR10; } - - return 0; +out: + kfree(in_mad); + kfree(out_mad); + return err; } static u8 state_to_phys_state(enum ib_port_state state) @@ -248,32 +265,42 @@ static u8 state_to_phys_state(enum ib_port_state state) } static int eth_link_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props, - struct ib_smp *out_mad) + struct ib_port_attr *props) { - struct mlx4_ib_iboe *iboe = &to_mdev(ibdev)->iboe; + + struct mlx4_ib_dev *mdev = to_mdev(ibdev); + struct mlx4_ib_iboe *iboe = &mdev->iboe; struct net_device *ndev; enum ib_mtu tmp; + struct mlx4_cmd_mailbox *mailbox; + int err = 0; - props->active_width = IB_WIDTH_1X; + mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0, + MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + if (err) + goto out; + + props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ? + IB_WIDTH_4X : IB_WIDTH_1X; props->active_speed = IB_SPEED_QDR; props->port_cap_flags = IB_PORT_CM_SUP; - props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; - props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz; + props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; + props->max_msg_sz = mdev->dev->caps.max_msg_sz; props->pkey_tbl_len = 1; - props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); - props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); props->max_mtu = IB_MTU_4096; - props->subnet_timeout = 0; - props->max_vl_num = out_mad->data[37] >> 4; - props->init_type_reply = 0; + props->max_vl_num = 2; props->state = IB_PORT_DOWN; props->phys_state = state_to_phys_state(props->state); props->active_mtu = IB_MTU_256; spin_lock(&iboe->lock); ndev = iboe->netdevs[port - 1]; if (!ndev) - goto out; + goto out_unlock; tmp = iboe_get_mtu(ndev->mtu); props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256; @@ -281,41 +308,23 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, props->state = (netif_running(ndev) && netif_carrier_ok(ndev)) ? IB_PORT_ACTIVE : IB_PORT_DOWN; props->phys_state = state_to_phys_state(props->state); - -out: +out_unlock: spin_unlock(&iboe->lock); - return 0; +out: + mlx4_free_cmd_mailbox(mdev->dev, mailbox); + return err; } static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; - int err = -ENOMEM; - - in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); - out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); - if (!in_mad || !out_mad) - goto out; + int err; memset(props, 0, sizeof *props); - init_query_mad(in_mad); - in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; - in_mad->attr_mod = cpu_to_be32(port); - - err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); - if (err) - goto out; - err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ? - ib_link_query_port(ibdev, port, props, in_mad, out_mad) : - eth_link_query_port(ibdev, port, props, out_mad); - -out: - kfree(in_mad); - kfree(out_mad); + ib_link_query_port(ibdev, port, props) : + eth_link_query_port(ibdev, port, props); return err; } diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index aa2aefa4236c..3a7848966627 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1884,6 +1884,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, wmb(); if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) { + *bad_wr = wr; err = -EINVAL; goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 8fa41f3082cf..780b5adf8e7d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -79,7 +79,8 @@ enum { (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT) | \ (1ull << MLX4_EVENT_TYPE_CMD) | \ (1ull << MLX4_EVENT_TYPE_COMM_CHANNEL) | \ - (1ull << MLX4_EVENT_TYPE_FLR_EVENT)) + (1ull << MLX4_EVENT_TYPE_FLR_EVENT) | \ + (1ull << MLX4_EVENT_TYPE_FATAL_WARNING)) static void eq_set_ci(struct mlx4_eq *eq, int req_not) { @@ -443,6 +444,35 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) queue_work(priv->mfunc.master.comm_wq, &priv->mfunc.master.slave_flr_event_work); break; + + case MLX4_EVENT_TYPE_FATAL_WARNING: + if (eqe->subtype == MLX4_FATAL_WARNING_SUBTYPE_WARMING) { + if (mlx4_is_master(dev)) + for (i = 0; i < dev->num_slaves; i++) { + mlx4_dbg(dev, "%s: Sending " + "MLX4_FATAL_WARNING_SUBTYPE_WARMING" + " to slave: %d\n", __func__, i); + if (i == dev->caps.function) + continue; + mlx4_slave_event(dev, i, eqe); + } + mlx4_err(dev, "Temperature Threshold was reached! " + "Threshold: %d celsius degrees; " + "Current Temperature: %d\n", + be16_to_cpu(eqe->event.warming.warning_threshold), + be16_to_cpu(eqe->event.warming.current_temperature)); + } else + mlx4_warn(dev, "Unhandled event FATAL WARNING (%02x), " + "subtype %02x on EQ %d at index %u. owner=%x, " + "nent=0x%x, slave=%x, ownership=%s\n", + eqe->type, eqe->subtype, eq->eqn, + eq->cons_index, eqe->owner, eq->nent, + eqe->slave_id, + !!(eqe->owner & 0x80) ^ + !!(eq->cons_index & eq->nent) ? "HW" : "SW"); + + break; + case MLX4_EVENT_TYPE_EEC_CATAS_ERROR: case MLX4_EVENT_TYPE_ECC_DETECT: default: diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 2f94d30ab12b..a6ee22b319f8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -394,7 +394,7 @@ static int mlx4_how_many_lives_vf(struct mlx4_dev *dev) return ret; } -static int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) +int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *s_slave; @@ -647,6 +647,99 @@ out: return err ? err : count; } +enum ibta_mtu { + IB_MTU_256 = 1, + IB_MTU_512 = 2, + IB_MTU_1024 = 3, + IB_MTU_2048 = 4, + IB_MTU_4096 = 5 +}; + +static inline int int_to_ibta_mtu(int mtu) +{ + switch (mtu) { + case 256: return IB_MTU_256; + case 512: return IB_MTU_512; + case 1024: return IB_MTU_1024; + case 2048: return IB_MTU_2048; + case 4096: return IB_MTU_4096; + default: return -1; + } +} + +static inline int ibta_mtu_to_int(enum ibta_mtu mtu) +{ + switch (mtu) { + case IB_MTU_256: return 256; + case IB_MTU_512: return 512; + case IB_MTU_1024: return 1024; + case IB_MTU_2048: return 2048; + case IB_MTU_4096: return 4096; + default: return -1; + } +} + +static ssize_t show_port_ib_mtu(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, + port_mtu_attr); + struct mlx4_dev *mdev = info->dev; + + if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) + mlx4_warn(mdev, "port level mtu is only used for IB ports\n"); + + sprintf(buf, "%d\n", + ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port])); + return strlen(buf); +} + +static ssize_t set_port_ib_mtu(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, + port_mtu_attr); + struct mlx4_dev *mdev = info->dev; + struct mlx4_priv *priv = mlx4_priv(mdev); + int err, port, mtu, ibta_mtu = -1; + + if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) { + mlx4_warn(mdev, "port level mtu is only used for IB ports\n"); + return -EINVAL; + } + + err = sscanf(buf, "%d", &mtu); + if (err > 0) + ibta_mtu = int_to_ibta_mtu(mtu); + + if (err <= 0 || ibta_mtu < 0) { + mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf); + return -EINVAL; + } + + mdev->caps.port_ib_mtu[info->port] = ibta_mtu; + + mlx4_stop_sense(mdev); + mutex_lock(&priv->port_mutex); + mlx4_unregister_device(mdev); + for (port = 1; port <= mdev->caps.num_ports; port++) { + mlx4_CLOSE_PORT(mdev, port); + err = mlx4_SET_PORT(mdev, port); + if (err) { + mlx4_err(mdev, "Failed to set port %d, " + "aborting\n", port); + goto err_set_port; + } + } + err = mlx4_register_device(mdev); +err_set_port: + mutex_unlock(&priv->port_mutex); + mlx4_start_sense(mdev); + return err ? err : count; +} + static int mlx4_load_fw(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1131,6 +1224,8 @@ static int mlx4_init_hca(struct mlx4_dev *dev) goto err_stop_fw; } + dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; + init_hca.log_uar_sz = ilog2(dev->caps.num_uars); init_hca.uar_page_sz = PAGE_SHIFT - 12; @@ -1361,6 +1456,11 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) "with caps = 0\n", port, err); dev->caps.ib_port_def_cap[port] = ib_port_default_caps; + if (mlx4_is_mfunc(dev)) + dev->caps.port_ib_mtu[port] = IB_MTU_2048; + else + dev->caps.port_ib_mtu[port] = IB_MTU_4096; + err = mlx4_SET_PORT(dev, port); if (err) { mlx4_err(dev, "Failed to set port %d, aborting\n", @@ -1515,6 +1615,24 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) info->port = -1; } + sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port); + info->port_mtu_attr.attr.name = info->dev_mtu_name; + if (mlx4_is_mfunc(dev)) + info->port_mtu_attr.attr.mode = S_IRUGO; + else { + info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR; + info->port_mtu_attr.store = set_port_ib_mtu; + } + info->port_mtu_attr.show = show_port_ib_mtu; + sysfs_attr_init(&info->port_mtu_attr.attr); + + err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr); + if (err) { + mlx4_err(dev, "Failed to create mtu file for port %d\n", port); + device_remove_file(&info->dev->pdev->dev, &info->port_attr); + info->port = -1; + } + return err; } @@ -1524,6 +1642,7 @@ static void mlx4_cleanup_port_info(struct mlx4_port_info *info) return; device_remove_file(&info->dev->pdev->dev, &info->port_attr); + device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr); } static int mlx4_init_steering(struct mlx4_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index c34d30a5d77b..a99a13e9e695 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -363,6 +363,10 @@ struct mlx4_eqe { struct { __be32 slave_id; } __packed flr_event; + struct { + __be16 current_temperature; + __be16 warning_threshold; + } __packed warming; } event; u8 slave_id; u8 reserved3[2]; @@ -399,7 +403,7 @@ struct mlx4_profile { int num_cq; int num_mcg; int num_mpt; - int num_mtt; + unsigned num_mtt; }; struct mlx4_fw { @@ -682,6 +686,8 @@ struct mlx4_port_info { char dev_name[16]; struct device_attribute port_attr; enum mlx4_port_type tmp_type; + char dev_mtu_name[16]; + struct device_attribute port_mtu_attr; struct mlx4_mac_table mac_table; struct radix_tree_root mac_tree; struct mlx4_vlan_table vlan_table; diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 25a80d71fb2a..5b7c06e0cd05 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -816,6 +816,9 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages, u64 mtt_offset; int err = -ENOMEM; + if (max_maps > dev->caps.max_fmr_maps) + return -EINVAL; + if (page_shift < (ffs(dev->caps.page_size_cap) - 1) || page_shift >= 32) return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 51708dd7c8b0..409d444c4df5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -723,10 +723,18 @@ int mlx4_SET_PORT_wrapper(struct mlx4_dev *dev, int slave, vhcr->op_modifier, inbox); } +/* bit locations for set port command with zero op modifier */ +enum { + MLX4_SET_PORT_VL_CAP = 4, /* bits 7:4 */ + MLX4_SET_PORT_MTU_CAP = 12, /* bits 15:12 */ + MLX4_CHANGE_PORT_VL_CAP = 21, + MLX4_CHANGE_PORT_MTU_CAP = 22, +}; + int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port) { struct mlx4_cmd_mailbox *mailbox; - int err; + int err, vl_cap; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) return 0; @@ -738,8 +746,19 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port) memset(mailbox->buf, 0, 256); ((__be32 *) mailbox->buf)[1] = dev->caps.ib_port_def_cap[port]; - err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); + + /* IB VL CAP enum isn't used by the firmware, just numerical values */ + for (vl_cap = 8; vl_cap >= 1; vl_cap >>= 1) { + ((__be32 *) mailbox->buf)[0] = cpu_to_be32( + (1 << MLX4_CHANGE_PORT_MTU_CAP) | + (1 << MLX4_CHANGE_PORT_VL_CAP) | + (dev->caps.port_ib_mtu[port] << MLX4_SET_PORT_MTU_CAP) | + (vl_cap << MLX4_SET_PORT_VL_CAP)); + err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); + if (err != -ENOMEM) + break; + } mlx4_free_cmd_mailbox(dev, mailbox); return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/profile.c b/drivers/net/ethernet/mellanox/mlx4/profile.c index 1129677daa62..06e5adeb76f7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/profile.c +++ b/drivers/net/ethernet/mellanox/mlx4/profile.c @@ -83,12 +83,31 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, u64 total_size = 0; struct mlx4_resource *profile; struct mlx4_resource tmp; + struct sysinfo si; int i, j; profile = kcalloc(MLX4_RES_NUM, sizeof(*profile), GFP_KERNEL); if (!profile) return -ENOMEM; + /* + * We want to scale the number of MTTs with the size of the + * system memory, since it makes sense to register a lot of + * memory on a system with a lot of memory. As a heuristic, + * make sure we have enough MTTs to cover twice the system + * memory (with PAGE_SIZE entries). + * + * This number has to be a power of two and fit into 32 bits + * due to device limitations, so cap this at 2^31 as well. + * That limits us to 8TB of memory registration per HCA with + * 4KB pages, which is probably OK for the next few months. + */ + si_meminfo(&si); + request->num_mtt = + roundup_pow_of_two(max_t(unsigned, request->num_mtt, + min(1UL << 31, + si.totalram >> (log_mtts_per_seg - 1)))); + profile[MLX4_RES_QP].size = dev_cap->qpc_entry_sz; profile[MLX4_RES_RDMARC].size = dev_cap->rdmarc_entry_sz; profile[MLX4_RES_ALTC].size = dev_cap->altc_entry_sz; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index be7f235c04c0..b8432516d68a 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -129,6 +129,7 @@ enum mlx4_event { MLX4_EVENT_TYPE_CMD = 0x0a, MLX4_EVENT_TYPE_VEP_UPDATE = 0x19, MLX4_EVENT_TYPE_COMM_CHANNEL = 0x18, + MLX4_EVENT_TYPE_FATAL_WARNING = 0x1b, MLX4_EVENT_TYPE_FLR_EVENT = 0x1c, MLX4_EVENT_TYPE_NONE = 0xff, }; @@ -138,6 +139,10 @@ enum { MLX4_PORT_CHANGE_SUBTYPE_ACTIVE = 4 }; +enum { + MLX4_FATAL_WARNING_SUBTYPE_WARMING = 0, +}; + enum { MLX4_PERM_LOCAL_READ = 1 << 10, MLX4_PERM_LOCAL_WRITE = 1 << 11, @@ -269,6 +274,7 @@ struct mlx4_caps { int num_comp_vectors; int comp_pool; int num_mpts; + int max_fmr_maps; int num_mtts; int fmr_reserved_mtts; int reserved_mtts; @@ -304,6 +310,7 @@ struct mlx4_caps { u32 port_mask[MLX4_MAX_PORTS + 1]; enum mlx4_port_type possible_type[MLX4_MAX_PORTS + 1]; u32 max_counters; + u8 port_ib_mtu[MLX4_MAX_PORTS + 1]; }; struct mlx4_buf_list {