From d0d68b8693bd16bfbbc93b89f1d9f3351723307c Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 4 Oct 2010 12:11:34 +0000 Subject: [PATCH] IB/mlx4: Signal node desc changes to SM by using FW to generate trap 144 The Node Description cannot be changed via MADs (it is read-only). Until now, it was changed in the driver via sysfs, and the new Node Description was simply inserted by the driver into MAD responses (replacing the description returned by FW). System startup scripts use the sysfs interface to change the node description at driver startup to show the hostname, etc. However, this has a race condition: the SM could discover the original FW node description rather than the system-specific description if it queried the port before the startup scripts finish running. For mlx4, we fix this with a new FW command (SET_NODE) that allows passing the new node description to FW. When this command is invoked, FW sends a trap 144 to the SM. When it gets this trap, the SM can query the node to obtain the new node description -- thus eliminating the effects of the race. This patch simply calls SET_NODE command when a new node description is entered via sysfs (thus causing trap 144 to be issued by the FW). We ignore all failures of the SET_NODE command (including those caused by using a device FW that predates the SET_NODE command), since in that case things work just as before. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 28 +++++++++++++++++++++++----- include/linux/mlx4/cmd.h | 1 + 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 62e8cd6f0371..ac6951d99336 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -272,14 +272,32 @@ static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, struct ib_device_modify *props) { + struct mlx4_cmd_mailbox *mailbox; + if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) return -EOPNOTSUPP; - if (mask & IB_DEVICE_MODIFY_NODE_DESC) { - spin_lock(&to_mdev(ibdev)->sm_lock); - memcpy(ibdev->node_desc, props->node_desc, 64); - spin_unlock(&to_mdev(ibdev)->sm_lock); - } + if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) + return 0; + + spin_lock(&to_mdev(ibdev)->sm_lock); + memcpy(ibdev->node_desc, props->node_desc, 64); + spin_unlock(&to_mdev(ibdev)->sm_lock); + + /* + * If possible, pass node desc to FW, so it can generate + * a 144 trap. If cmd fails, just ignore. + */ + mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev); + if (IS_ERR(mailbox)) + return 0; + + memset(mailbox->buf, 0, 256); + memcpy(mailbox->buf, props->node_desc, 64); + mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0, + MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A); + + mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox); return 0; } diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 0f82293a82ed..2731266e73a7 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -57,6 +57,7 @@ enum { MLX4_CMD_QUERY_PORT = 0x43, MLX4_CMD_SENSE_PORT = 0x4d, MLX4_CMD_SET_PORT = 0xc, + MLX4_CMD_SET_NODE = 0x5a, MLX4_CMD_ACCESS_DDR = 0x2e, MLX4_CMD_MAP_ICM = 0xffa, MLX4_CMD_UNMAP_ICM = 0xff9,