IB/qib: Add congestion control agent implementation

Add a congestion control agent in the driver that handles gets and
sets from the congestion control manager in the fabric for the
Performance Scale Messaging (PSM) library.

Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
This commit is contained in:
Mike Marciniszyn 2012-07-19 13:04:04 +00:00 committed by Roland Dreier
parent 551ace124d
commit 36a8f01cd2
5 changed files with 790 additions and 12 deletions

View file

@ -519,6 +519,7 @@ struct qib_pportdata {
struct qib_devdata *dd; struct qib_devdata *dd;
struct qib_chippport_specific *cpspec; /* chip-specific per-port */ struct qib_chippport_specific *cpspec; /* chip-specific per-port */
struct kobject pport_kobj; struct kobject pport_kobj;
struct kobject pport_cc_kobj;
struct kobject sl2vl_kobj; struct kobject sl2vl_kobj;
struct kobject diagc_kobj; struct kobject diagc_kobj;
@ -638,6 +639,39 @@ struct qib_pportdata {
struct timer_list led_override_timer; struct timer_list led_override_timer;
struct xmit_wait cong_stats; struct xmit_wait cong_stats;
struct timer_list symerr_clear_timer; struct timer_list symerr_clear_timer;
/* Synchronize access between driver writes and sysfs reads */
spinlock_t cc_shadow_lock
____cacheline_aligned_in_smp;
/* Shadow copy of the congestion control table */
struct cc_table_shadow *ccti_entries_shadow;
/* Shadow copy of the congestion control entries */
struct ib_cc_congestion_setting_attr_shadow *congestion_entries_shadow;
/* List of congestion control table entries */
struct ib_cc_table_entry_shadow *ccti_entries;
/* 16 congestion entries with each entry corresponding to a SL */
struct ib_cc_congestion_entry_shadow *congestion_entries;
/* Total number of congestion control table entries */
u16 total_cct_entry;
/* Bit map identifying service level */
u16 cc_sl_control_map;
/* maximum congestion control table index */
u16 ccti_limit;
/* CA's max number of 64 entry units in the congestion control table */
u8 cc_max_table_entries;
/* Maximum number of congestion control entries that the agent expects
* the manager to send.
*/
u8 cc_supported_table_entries;
}; };
/* Observers. Not to be taken lightly, possibly not to ship. */ /* Observers. Not to be taken lightly, possibly not to ship. */
@ -1078,6 +1112,7 @@ extern u32 qib_cpulist_count;
extern unsigned long *qib_cpulist; extern unsigned long *qib_cpulist;
extern unsigned qib_wc_pat; extern unsigned qib_wc_pat;
extern unsigned qib_cc_table_size;
int qib_init(struct qib_devdata *, int); int qib_init(struct qib_devdata *, int);
int init_chip_wc_pat(struct qib_devdata *dd, u32); int init_chip_wc_pat(struct qib_devdata *dd, u32);
int qib_enable_wc(struct qib_devdata *dd); int qib_enable_wc(struct qib_devdata *dd);

View file

@ -41,6 +41,7 @@
#include "qib.h" #include "qib.h"
#include "qib_common.h" #include "qib_common.h"
#include "qib_mad.h"
/* /*
* min buffers we want to have per context, after driver * min buffers we want to have per context, after driver
@ -71,6 +72,9 @@ unsigned qib_n_krcv_queues;
module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO); module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO);
MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port"); MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port");
unsigned qib_cc_table_size;
module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO);
MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984");
/* /*
* qib_wc_pat parameter: * qib_wc_pat parameter:
* 0 is WC via MTRR * 0 is WC via MTRR
@ -199,6 +203,7 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt)
void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
u8 hw_pidx, u8 port) u8 hw_pidx, u8 port)
{ {
int size;
ppd->dd = dd; ppd->dd = dd;
ppd->hw_pidx = hw_pidx; ppd->hw_pidx = hw_pidx;
ppd->port = port; /* IB port number, not index */ ppd->port = port; /* IB port number, not index */
@ -212,6 +217,81 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
ppd->symerr_clear_timer.data = (unsigned long)ppd; ppd->symerr_clear_timer.data = (unsigned long)ppd;
ppd->qib_wq = NULL; ppd->qib_wq = NULL;
spin_lock_init(&ppd->cc_shadow_lock);
if (qib_cc_table_size < IB_CCT_MIN_ENTRIES)
goto bail;
ppd->cc_supported_table_entries = min(max_t(int, qib_cc_table_size,
IB_CCT_MIN_ENTRIES), IB_CCT_ENTRIES*IB_CC_TABLE_CAP_DEFAULT);
ppd->cc_max_table_entries =
ppd->cc_supported_table_entries/IB_CCT_ENTRIES;
size = IB_CC_TABLE_CAP_DEFAULT * sizeof(struct ib_cc_table_entry)
* IB_CCT_ENTRIES;
ppd->ccti_entries = kzalloc(size, GFP_KERNEL);
if (!ppd->ccti_entries) {
qib_dev_err(dd,
"failed to allocate congestion control table for port %d!\n",
port);
goto bail;
}
size = IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry);
ppd->congestion_entries = kzalloc(size, GFP_KERNEL);
if (!ppd->congestion_entries) {
qib_dev_err(dd,
"failed to allocate congestion setting list for port %d!\n",
port);
goto bail_1;
}
size = sizeof(struct cc_table_shadow);
ppd->ccti_entries_shadow = kzalloc(size, GFP_KERNEL);
if (!ppd->ccti_entries_shadow) {
qib_dev_err(dd,
"failed to allocate shadow ccti list for port %d!\n",
port);
goto bail_2;
}
size = sizeof(struct ib_cc_congestion_setting_attr);
ppd->congestion_entries_shadow = kzalloc(size, GFP_KERNEL);
if (!ppd->congestion_entries_shadow) {
qib_dev_err(dd,
"failed to allocate shadow congestion setting list for port %d!\n",
port);
goto bail_3;
}
return;
bail_3:
kfree(ppd->ccti_entries_shadow);
ppd->ccti_entries_shadow = NULL;
bail_2:
kfree(ppd->congestion_entries);
ppd->congestion_entries = NULL;
bail_1:
kfree(ppd->ccti_entries);
ppd->ccti_entries = NULL;
bail:
/* User is intentionally disabling the congestion control agent */
if (!qib_cc_table_size)
return;
if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) {
qib_cc_table_size = 0;
qib_dev_err(dd,
"Congestion Control table size %d less than minimum %d for port %d\n",
qib_cc_table_size, IB_CCT_MIN_ENTRIES, port);
}
qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n",
port);
return;
} }
static int init_pioavailregs(struct qib_devdata *dd) static int init_pioavailregs(struct qib_devdata *dd)
@ -1164,10 +1244,24 @@ static void cleanup_device_data(struct qib_devdata *dd)
unsigned long flags; unsigned long flags;
/* users can't do anything more with chip */ /* users can't do anything more with chip */
for (pidx = 0; pidx < dd->num_pports; ++pidx) for (pidx = 0; pidx < dd->num_pports; ++pidx) {
if (dd->pport[pidx].statusp) if (dd->pport[pidx].statusp)
*dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT; *dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT;
spin_lock(&dd->pport[pidx].cc_shadow_lock);
kfree(dd->pport[pidx].congestion_entries);
dd->pport[pidx].congestion_entries = NULL;
kfree(dd->pport[pidx].ccti_entries);
dd->pport[pidx].ccti_entries = NULL;
kfree(dd->pport[pidx].ccti_entries_shadow);
dd->pport[pidx].ccti_entries_shadow = NULL;
kfree(dd->pport[pidx].congestion_entries_shadow);
dd->pport[pidx].congestion_entries_shadow = NULL;
spin_unlock(&dd->pport[pidx].cc_shadow_lock);
}
if (!qib_wc_pat) if (!qib_wc_pat)
qib_disable_wc(dd); qib_disable_wc(dd);

View file

@ -49,6 +49,18 @@ static int reply(struct ib_smp *smp)
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
} }
static int reply_failure(struct ib_smp *smp)
{
/*
* The verbs framework will handle the directed/LID route
* packet changes.
*/
smp->method = IB_MGMT_METHOD_GET_RESP;
if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
smp->status |= IB_SMP_DIRECTION;
return IB_MAD_RESULT_FAILURE | IB_MAD_RESULT_REPLY;
}
static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
{ {
struct ib_mad_send_buf *send_buf; struct ib_mad_send_buf *send_buf;
@ -2047,6 +2059,298 @@ static int process_perf(struct ib_device *ibdev, u8 port,
return ret; return ret;
} }
static int cc_get_classportinfo(struct ib_cc_mad *ccp,
struct ib_device *ibdev)
{
struct ib_cc_classportinfo_attr *p =
(struct ib_cc_classportinfo_attr *)ccp->mgmt_data;
memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
p->base_version = 1;
p->class_version = 1;
p->cap_mask = 0;
/*
* Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
*/
p->resp_time_value = 18;
return reply((struct ib_smp *) ccp);
}
static int cc_get_congestion_info(struct ib_cc_mad *ccp,
struct ib_device *ibdev, u8 port)
{
struct ib_cc_info_attr *p =
(struct ib_cc_info_attr *)ccp->mgmt_data;
struct qib_ibport *ibp = to_iport(ibdev, port);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
p->congestion_info = 0;
p->control_table_cap = ppd->cc_max_table_entries;
return reply((struct ib_smp *) ccp);
}
static int cc_get_congestion_setting(struct ib_cc_mad *ccp,
struct ib_device *ibdev, u8 port)
{
int i;
struct ib_cc_congestion_setting_attr *p =
(struct ib_cc_congestion_setting_attr *)ccp->mgmt_data;
struct qib_ibport *ibp = to_iport(ibdev, port);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
struct ib_cc_congestion_entry_shadow *entries;
memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
spin_lock(&ppd->cc_shadow_lock);
entries = ppd->congestion_entries_shadow->entries;
p->port_control = cpu_to_be16(
ppd->congestion_entries_shadow->port_control);
p->control_map = cpu_to_be16(
ppd->congestion_entries_shadow->control_map);
for (i = 0; i < IB_CC_CCS_ENTRIES; i++) {
p->entries[i].ccti_increase = entries[i].ccti_increase;
p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer);
p->entries[i].trigger_threshold = entries[i].trigger_threshold;
p->entries[i].ccti_min = entries[i].ccti_min;
}
spin_unlock(&ppd->cc_shadow_lock);
return reply((struct ib_smp *) ccp);
}
static int cc_get_congestion_control_table(struct ib_cc_mad *ccp,
struct ib_device *ibdev, u8 port)
{
struct ib_cc_table_attr *p =
(struct ib_cc_table_attr *)ccp->mgmt_data;
struct qib_ibport *ibp = to_iport(ibdev, port);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
u32 cct_block_index = be32_to_cpu(ccp->attr_mod);
u32 max_cct_block;
u32 cct_entry;
struct ib_cc_table_entry_shadow *entries;
int i;
/* Is the table index more than what is supported? */
if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1)
goto bail;
memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
spin_lock(&ppd->cc_shadow_lock);
max_cct_block =
(ppd->ccti_entries_shadow->ccti_last_entry + 1)/IB_CCT_ENTRIES;
max_cct_block = max_cct_block ? max_cct_block - 1 : 0;
if (cct_block_index > max_cct_block) {
spin_unlock(&ppd->cc_shadow_lock);
goto bail;
}
ccp->attr_mod = cpu_to_be32(cct_block_index);
cct_entry = IB_CCT_ENTRIES * (cct_block_index + 1);
cct_entry--;
p->ccti_limit = cpu_to_be16(cct_entry);
entries = &ppd->ccti_entries_shadow->
entries[IB_CCT_ENTRIES * cct_block_index];
cct_entry %= IB_CCT_ENTRIES;
for (i = 0; i <= cct_entry; i++)
p->ccti_entries[i].entry = cpu_to_be16(entries[i].entry);
spin_unlock(&ppd->cc_shadow_lock);
return reply((struct ib_smp *) ccp);
bail:
return reply_failure((struct ib_smp *) ccp);
}
static int cc_set_congestion_setting(struct ib_cc_mad *ccp,
struct ib_device *ibdev, u8 port)
{
struct ib_cc_congestion_setting_attr *p =
(struct ib_cc_congestion_setting_attr *)ccp->mgmt_data;
struct qib_ibport *ibp = to_iport(ibdev, port);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
int i;
ppd->cc_sl_control_map = be16_to_cpu(p->control_map);
for (i = 0; i < IB_CC_CCS_ENTRIES; i++) {
ppd->congestion_entries[i].ccti_increase =
p->entries[i].ccti_increase;
ppd->congestion_entries[i].ccti_timer =
be16_to_cpu(p->entries[i].ccti_timer);
ppd->congestion_entries[i].trigger_threshold =
p->entries[i].trigger_threshold;
ppd->congestion_entries[i].ccti_min =
p->entries[i].ccti_min;
}
return reply((struct ib_smp *) ccp);
}
static int cc_set_congestion_control_table(struct ib_cc_mad *ccp,
struct ib_device *ibdev, u8 port)
{
struct ib_cc_table_attr *p =
(struct ib_cc_table_attr *)ccp->mgmt_data;
struct qib_ibport *ibp = to_iport(ibdev, port);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
u32 cct_block_index = be32_to_cpu(ccp->attr_mod);
u32 cct_entry;
struct ib_cc_table_entry_shadow *entries;
int i;
/* Is the table index more than what is supported? */
if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1)
goto bail;
/* If this packet is the first in the sequence then
* zero the total table entry count.
*/
if (be16_to_cpu(p->ccti_limit) < IB_CCT_ENTRIES)
ppd->total_cct_entry = 0;
cct_entry = (be16_to_cpu(p->ccti_limit))%IB_CCT_ENTRIES;
/* ccti_limit is 0 to 63 */
ppd->total_cct_entry += (cct_entry + 1);
if (ppd->total_cct_entry > ppd->cc_supported_table_entries)
goto bail;
ppd->ccti_limit = be16_to_cpu(p->ccti_limit);
entries = ppd->ccti_entries + (IB_CCT_ENTRIES * cct_block_index);
for (i = 0; i <= cct_entry; i++)
entries[i].entry = be16_to_cpu(p->ccti_entries[i].entry);
spin_lock(&ppd->cc_shadow_lock);
ppd->ccti_entries_shadow->ccti_last_entry = ppd->total_cct_entry - 1;
memcpy(ppd->ccti_entries_shadow->entries, ppd->ccti_entries,
(ppd->total_cct_entry * sizeof(struct ib_cc_table_entry)));
ppd->congestion_entries_shadow->port_control = IB_CC_CCS_PC_SL_BASED;
ppd->congestion_entries_shadow->control_map = ppd->cc_sl_control_map;
memcpy(ppd->congestion_entries_shadow->entries, ppd->congestion_entries,
IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry));
spin_unlock(&ppd->cc_shadow_lock);
return reply((struct ib_smp *) ccp);
bail:
return reply_failure((struct ib_smp *) ccp);
}
static int check_cc_key(struct qib_ibport *ibp,
struct ib_cc_mad *ccp, int mad_flags)
{
return 0;
}
static int process_cc(struct ib_device *ibdev, int mad_flags,
u8 port, struct ib_mad *in_mad,
struct ib_mad *out_mad)
{
struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad;
struct qib_ibport *ibp = to_iport(ibdev, port);
int ret;
*out_mad = *in_mad;
if (ccp->class_version != 2) {
ccp->status |= IB_SMP_UNSUP_VERSION;
ret = reply((struct ib_smp *)ccp);
goto bail;
}
ret = check_cc_key(ibp, ccp, mad_flags);
if (ret)
goto bail;
switch (ccp->method) {
case IB_MGMT_METHOD_GET:
switch (ccp->attr_id) {
case IB_CC_ATTR_CLASSPORTINFO:
ret = cc_get_classportinfo(ccp, ibdev);
goto bail;
case IB_CC_ATTR_CONGESTION_INFO:
ret = cc_get_congestion_info(ccp, ibdev, port);
goto bail;
case IB_CC_ATTR_CA_CONGESTION_SETTING:
ret = cc_get_congestion_setting(ccp, ibdev, port);
goto bail;
case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
ret = cc_get_congestion_control_table(ccp, ibdev, port);
goto bail;
/* FALLTHROUGH */
default:
ccp->status |= IB_SMP_UNSUP_METH_ATTR;
ret = reply((struct ib_smp *) ccp);
goto bail;
}
case IB_MGMT_METHOD_SET:
switch (ccp->attr_id) {
case IB_CC_ATTR_CA_CONGESTION_SETTING:
ret = cc_set_congestion_setting(ccp, ibdev, port);
goto bail;
case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
ret = cc_set_congestion_control_table(ccp, ibdev, port);
goto bail;
/* FALLTHROUGH */
default:
ccp->status |= IB_SMP_UNSUP_METH_ATTR;
ret = reply((struct ib_smp *) ccp);
goto bail;
}
case IB_MGMT_METHOD_GET_RESP:
/*
* The ib_mad module will call us to process responses
* before checking for other consumers.
* Just tell the caller to process it normally.
*/
ret = IB_MAD_RESULT_SUCCESS;
goto bail;
case IB_MGMT_METHOD_TRAP:
default:
ccp->status |= IB_SMP_UNSUP_METHOD;
ret = reply((struct ib_smp *) ccp);
}
bail:
return ret;
}
/** /**
* qib_process_mad - process an incoming MAD packet * qib_process_mad - process an incoming MAD packet
* @ibdev: the infiniband device this packet came in on * @ibdev: the infiniband device this packet came in on
@ -2071,6 +2375,8 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
struct ib_mad *in_mad, struct ib_mad *out_mad) struct ib_mad *in_mad, struct ib_mad *out_mad)
{ {
int ret; int ret;
struct qib_ibport *ibp = to_iport(ibdev, port);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
switch (in_mad->mad_hdr.mgmt_class) { switch (in_mad->mad_hdr.mgmt_class) {
case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
@ -2082,6 +2388,15 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
ret = process_perf(ibdev, port, in_mad, out_mad); ret = process_perf(ibdev, port, in_mad, out_mad);
goto bail; goto bail;
case IB_MGMT_CLASS_CONG_MGMT:
if (!ppd->congestion_entries_shadow ||
!qib_cc_table_size) {
ret = IB_MAD_RESULT_SUCCESS;
goto bail;
}
ret = process_cc(ibdev, mad_flags, port, in_mad, out_mad);
goto bail;
default: default:
ret = IB_MAD_RESULT_SUCCESS; ret = IB_MAD_RESULT_SUCCESS;
} }

View file

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. * Copyright (c) 2012 Intel Corporation. All rights reserved.
* All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
@ -31,6 +31,8 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE. * SOFTWARE.
*/ */
#ifndef _QIB_MAD_H
#define _QIB_MAD_H
#include <rdma/ib_pma.h> #include <rdma/ib_pma.h>
@ -222,6 +224,198 @@ struct ib_pma_portcounters_cong {
#define IB_PMA_SEL_CONG_XMIT 0x04 #define IB_PMA_SEL_CONG_XMIT 0x04
#define IB_PMA_SEL_CONG_ROUTING 0x08 #define IB_PMA_SEL_CONG_ROUTING 0x08
/*
* Congestion control class attributes
*/
#define IB_CC_ATTR_CLASSPORTINFO cpu_to_be16(0x0001)
#define IB_CC_ATTR_NOTICE cpu_to_be16(0x0002)
#define IB_CC_ATTR_CONGESTION_INFO cpu_to_be16(0x0011)
#define IB_CC_ATTR_CONGESTION_KEY_INFO cpu_to_be16(0x0012)
#define IB_CC_ATTR_CONGESTION_LOG cpu_to_be16(0x0013)
#define IB_CC_ATTR_SWITCH_CONGESTION_SETTING cpu_to_be16(0x0014)
#define IB_CC_ATTR_SWITCH_PORT_CONGESTION_SETTING cpu_to_be16(0x0015)
#define IB_CC_ATTR_CA_CONGESTION_SETTING cpu_to_be16(0x0016)
#define IB_CC_ATTR_CONGESTION_CONTROL_TABLE cpu_to_be16(0x0017)
#define IB_CC_ATTR_TIME_STAMP cpu_to_be16(0x0018)
/* generalizations for threshold values */
#define IB_CC_THRESHOLD_NONE 0x0
#define IB_CC_THRESHOLD_MIN 0x1
#define IB_CC_THRESHOLD_MAX 0xf
/* CCA MAD header constants */
#define IB_CC_MAD_LOGDATA_LEN 32
#define IB_CC_MAD_MGMTDATA_LEN 192
struct ib_cc_mad {
u8 base_version;
u8 mgmt_class;
u8 class_version;
u8 method;
__be16 status;
__be16 class_specific;
__be64 tid;
__be16 attr_id;
__be16 resv;
__be32 attr_mod;
__be64 cckey;
/* For CongestionLog attribute only */
u8 log_data[IB_CC_MAD_LOGDATA_LEN];
u8 mgmt_data[IB_CC_MAD_MGMTDATA_LEN];
} __packed;
/*
* Congestion Control class portinfo capability mask bits
*/
#define IB_CC_CPI_CM_TRAP_GEN cpu_to_be16(1 << 0)
#define IB_CC_CPI_CM_GET_SET_NOTICE cpu_to_be16(1 << 1)
#define IB_CC_CPI_CM_CAP2 cpu_to_be16(1 << 2)
#define IB_CC_CPI_CM_ENHANCEDPORT0_CC cpu_to_be16(1 << 8)
struct ib_cc_classportinfo_attr {
u8 base_version;
u8 class_version;
__be16 cap_mask;
u8 reserved[3];
u8 resp_time_value; /* only lower 5 bits */
union ib_gid redirect_gid;
__be32 redirect_tc_sl_fl; /* 8, 4, 20 bits respectively */
__be16 redirect_lid;
__be16 redirect_pkey;
__be32 redirect_qp; /* only lower 24 bits */
__be32 redirect_qkey;
union ib_gid trap_gid;
__be32 trap_tc_sl_fl; /* 8, 4, 20 bits respectively */
__be16 trap_lid;
__be16 trap_pkey;
__be32 trap_hl_qp; /* 8, 24 bits respectively */
__be32 trap_qkey;
} __packed;
/* Congestion control traps */
#define IB_CC_TRAP_KEY_VIOLATION 0x0000
struct ib_cc_trap_key_violation_attr {
__be16 source_lid;
u8 method;
u8 reserved1;
__be16 attrib_id;
__be32 attrib_mod;
__be32 qp;
__be64 cckey;
u8 sgid[16];
u8 padding[24];
} __packed;
/* Congestion info flags */
#define IB_CC_CI_FLAGS_CREDIT_STARVATION 0x1
#define IB_CC_TABLE_CAP_DEFAULT 31
struct ib_cc_info_attr {
__be16 congestion_info;
u8 control_table_cap; /* Multiple of 64 entry unit CCTs */
} __packed;
struct ib_cc_key_info_attr {
__be64 cckey;
u8 protect;
__be16 lease_period;
__be16 violations;
} __packed;
#define IB_CC_CL_CA_LOGEVENTS_LEN 208
struct ib_cc_log_attr {
u8 log_type;
u8 congestion_flags;
__be16 threshold_event_counter;
__be16 threshold_congestion_event_map;
__be16 current_time_stamp;
u8 log_events[IB_CC_CL_CA_LOGEVENTS_LEN];
} __packed;
#define IB_CC_CLEC_SERVICETYPE_RC 0x0
#define IB_CC_CLEC_SERVICETYPE_UC 0x1
#define IB_CC_CLEC_SERVICETYPE_RD 0x2
#define IB_CC_CLEC_SERVICETYPE_UD 0x3
struct ib_cc_log_event {
u8 local_qp_cn_entry;
u8 remote_qp_number_cn_entry[3];
u8 sl_cn_entry:4;
u8 service_type_cn_entry:4;
__be32 remote_lid_cn_entry;
__be32 timestamp_cn_entry;
} __packed;
/* Sixteen congestion entries */
#define IB_CC_CCS_ENTRIES 16
/* Port control flags */
#define IB_CC_CCS_PC_SL_BASED 0x01
struct ib_cc_congestion_entry {
u8 ccti_increase;
__be16 ccti_timer;
u8 trigger_threshold;
u8 ccti_min; /* min CCTI for cc table */
} __packed;
struct ib_cc_congestion_entry_shadow {
u8 ccti_increase;
u16 ccti_timer;
u8 trigger_threshold;
u8 ccti_min; /* min CCTI for cc table */
} __packed;
struct ib_cc_congestion_setting_attr {
__be16 port_control;
__be16 control_map;
struct ib_cc_congestion_entry entries[IB_CC_CCS_ENTRIES];
} __packed;
struct ib_cc_congestion_setting_attr_shadow {
u16 port_control;
u16 control_map;
struct ib_cc_congestion_entry_shadow entries[IB_CC_CCS_ENTRIES];
} __packed;
#define IB_CC_TABLE_ENTRY_INCREASE_DEFAULT 1
#define IB_CC_TABLE_ENTRY_TIMER_DEFAULT 1
/* 64 Congestion Control table entries in a single MAD */
#define IB_CCT_ENTRIES 64
#define IB_CCT_MIN_ENTRIES (IB_CCT_ENTRIES * 2)
struct ib_cc_table_entry {
__be16 entry; /* shift:2, multiplier:14 */
};
struct ib_cc_table_entry_shadow {
u16 entry; /* shift:2, multiplier:14 */
};
struct ib_cc_table_attr {
__be16 ccti_limit; /* max CCTI for cc table */
struct ib_cc_table_entry ccti_entries[IB_CCT_ENTRIES];
} __packed;
struct ib_cc_table_attr_shadow {
u16 ccti_limit; /* max CCTI for cc table */
struct ib_cc_table_entry_shadow ccti_entries[IB_CCT_ENTRIES];
} __packed;
#define CC_TABLE_SHADOW_MAX \
(IB_CC_TABLE_CAP_DEFAULT * IB_CCT_ENTRIES)
struct cc_table_shadow {
u16 ccti_last_entry;
struct ib_cc_table_entry_shadow entries[CC_TABLE_SHADOW_MAX];
} __packed;
#endif /* _QIB_MAD_H */
/* /*
* The PortSamplesControl.CounterMasks field is an array of 3 bit fields * The PortSamplesControl.CounterMasks field is an array of 3 bit fields
* which specify the N'th counter's capabilities. See ch. 16.1.3.2. * which specify the N'th counter's capabilities. See ch. 16.1.3.2.

View file

@ -1,5 +1,6 @@
/* /*
* Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. * Copyright (c) 2012 Intel Corporation. All rights reserved.
* Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2006 PathScale, Inc. All rights reserved. * Copyright (c) 2006 PathScale, Inc. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
@ -33,6 +34,7 @@
#include <linux/ctype.h> #include <linux/ctype.h>
#include "qib.h" #include "qib.h"
#include "qib_mad.h"
/** /**
* qib_parse_ushort - parse an unsigned short value in an arbitrary base * qib_parse_ushort - parse an unsigned short value in an arbitrary base
@ -231,6 +233,98 @@ static struct attribute *port_default_attributes[] = {
NULL NULL
}; };
/*
* Start of per-port congestion control structures and support code
*/
/*
* Congestion control table size followed by table entries
*/
static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t pos, size_t count)
{
int ret;
struct qib_pportdata *ppd =
container_of(kobj, struct qib_pportdata, pport_cc_kobj);
if (!qib_cc_table_size || !ppd->ccti_entries_shadow)
return -EINVAL;
ret = ppd->total_cct_entry * sizeof(struct ib_cc_table_entry_shadow)
+ sizeof(__be16);
if (pos > ret)
return -EINVAL;
if (count > ret - pos)
count = ret - pos;
if (!count)
return count;
spin_lock(&ppd->cc_shadow_lock);
memcpy(buf, ppd->ccti_entries_shadow, count);
spin_unlock(&ppd->cc_shadow_lock);
return count;
}
static void qib_port_release(struct kobject *kobj)
{
/* nothing to do since memory is freed by qib_free_devdata() */
}
static struct kobj_type qib_port_cc_ktype = {
.release = qib_port_release,
};
static struct bin_attribute cc_table_bin_attr = {
.attr = {.name = "cc_table_bin", .mode = 0444},
.read = read_cc_table_bin,
.size = PAGE_SIZE,
};
/*
* Congestion settings: port control, control map and an array of 16
* entries for the congestion entries - increase, timer, event log
* trigger threshold and the minimum injection rate delay.
*/
static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t pos, size_t count)
{
int ret;
struct qib_pportdata *ppd =
container_of(kobj, struct qib_pportdata, pport_cc_kobj);
if (!qib_cc_table_size || !ppd->congestion_entries_shadow)
return -EINVAL;
ret = sizeof(struct ib_cc_congestion_setting_attr_shadow);
if (pos > ret)
return -EINVAL;
if (count > ret - pos)
count = ret - pos;
if (!count)
return count;
spin_lock(&ppd->cc_shadow_lock);
memcpy(buf, ppd->congestion_entries_shadow, count);
spin_unlock(&ppd->cc_shadow_lock);
return count;
}
static struct bin_attribute cc_setting_bin_attr = {
.attr = {.name = "cc_settings_bin", .mode = 0444},
.read = read_cc_setting_bin,
.size = PAGE_SIZE,
};
static ssize_t qib_portattr_show(struct kobject *kobj, static ssize_t qib_portattr_show(struct kobject *kobj,
struct attribute *attr, char *buf) struct attribute *attr, char *buf)
{ {
@ -253,10 +347,6 @@ static ssize_t qib_portattr_store(struct kobject *kobj,
return pattr->store(ppd, buf, len); return pattr->store(ppd, buf, len);
} }
static void qib_port_release(struct kobject *kobj)
{
/* nothing to do since memory is freed by qib_free_devdata() */
}
static const struct sysfs_ops qib_port_ops = { static const struct sysfs_ops qib_port_ops = {
.show = qib_portattr_show, .show = qib_portattr_show,
@ -670,7 +760,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
if (ret) { if (ret) {
qib_dev_err(dd, "Skipping sl2vl sysfs info, " qib_dev_err(dd, "Skipping sl2vl sysfs info, "
"(err %d) port %u\n", ret, port_num); "(err %d) port %u\n", ret, port_num);
goto bail_sl; goto bail_link;
} }
kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD); kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD);
@ -679,15 +769,57 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
if (ret) { if (ret) {
qib_dev_err(dd, "Skipping diag_counters sysfs info, " qib_dev_err(dd, "Skipping diag_counters sysfs info, "
"(err %d) port %u\n", ret, port_num); "(err %d) port %u\n", ret, port_num);
goto bail_diagc; goto bail_sl;
} }
kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD); kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD);
if (!qib_cc_table_size || !ppd->congestion_entries_shadow)
return 0;
ret = kobject_init_and_add(&ppd->pport_cc_kobj, &qib_port_cc_ktype,
kobj, "CCMgtA");
if (ret) {
qib_dev_err(dd,
"Skipping Congestion Control sysfs info, (err %d) port %u\n",
ret, port_num);
goto bail_diagc;
}
kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD);
ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
&cc_setting_bin_attr);
if (ret) {
qib_dev_err(dd,
"Skipping Congestion Control setting sysfs info, (err %d) port %u\n",
ret, port_num);
goto bail_cc;
}
ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
&cc_table_bin_attr);
if (ret) {
qib_dev_err(dd,
"Skipping Congestion Control table sysfs info, (err %d) port %u\n",
ret, port_num);
goto bail_cc_entry_bin;
}
qib_devinfo(dd->pcidev,
"IB%u: Congestion Control Agent enabled for port %d\n",
dd->unit, port_num);
return 0; return 0;
bail_cc_entry_bin:
sysfs_remove_bin_file(&ppd->pport_cc_kobj, &cc_setting_bin_attr);
bail_cc:
kobject_put(&ppd->pport_cc_kobj);
bail_diagc: bail_diagc:
kobject_put(&ppd->sl2vl_kobj); kobject_put(&ppd->diagc_kobj);
bail_sl: bail_sl:
kobject_put(&ppd->sl2vl_kobj);
bail_link:
kobject_put(&ppd->pport_kobj); kobject_put(&ppd->pport_kobj);
bail: bail:
return ret; return ret;
@ -720,7 +852,15 @@ void qib_verbs_unregister_sysfs(struct qib_devdata *dd)
for (i = 0; i < dd->num_pports; i++) { for (i = 0; i < dd->num_pports; i++) {
ppd = &dd->pport[i]; ppd = &dd->pport[i];
kobject_put(&ppd->pport_kobj); if (qib_cc_table_size &&
ppd->congestion_entries_shadow) {
sysfs_remove_bin_file(&ppd->pport_cc_kobj,
&cc_setting_bin_attr);
sysfs_remove_bin_file(&ppd->pport_cc_kobj,
&cc_table_bin_attr);
kobject_put(&ppd->pport_cc_kobj);
}
kobject_put(&ppd->sl2vl_kobj); kobject_put(&ppd->sl2vl_kobj);
kobject_put(&ppd->pport_kobj);
} }
} }