Merge branch 'for-6.3/cxl-events' into cxl/next

Add the CXL event and interrupt support for the v6.3 update.
This commit is contained in:
Dan Williams 2023-02-07 11:14:06 -08:00
commit dbe9f7d1e1
8 changed files with 1495 additions and 3 deletions

View File

@ -3,11 +3,13 @@
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/security.h>
#include <linux/debugfs.h>
#include <linux/ktime.h>
#include <linux/mutex.h>
#include <cxlmem.h>
#include <cxl.h>
#include "core.h"
#include "trace.h"
static bool cxl_raw_allow_all;
@ -737,6 +739,203 @@ out:
}
EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
/*
* General Media Event Record
* CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
*/
static const uuid_t gen_media_event_uuid =
UUID_INIT(0xfbcd0a77, 0xc260, 0x417f,
0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6);
/*
* DRAM Event Record
* CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
*/
static const uuid_t dram_event_uuid =
UUID_INIT(0x601dcbb3, 0x9c06, 0x4eab,
0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24);
/*
* Memory Module Event Record
* CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
*/
static const uuid_t mem_mod_event_uuid =
UUID_INIT(0xfe927475, 0xdd59, 0x4339,
0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74);
static void cxl_event_trace_record(const struct device *dev,
enum cxl_event_log_type type,
struct cxl_event_record_raw *record)
{
uuid_t *id = &record->hdr.id;
if (uuid_equal(id, &gen_media_event_uuid)) {
struct cxl_event_gen_media *rec =
(struct cxl_event_gen_media *)record;
trace_cxl_general_media(dev, type, rec);
} else if (uuid_equal(id, &dram_event_uuid)) {
struct cxl_event_dram *rec = (struct cxl_event_dram *)record;
trace_cxl_dram(dev, type, rec);
} else if (uuid_equal(id, &mem_mod_event_uuid)) {
struct cxl_event_mem_module *rec =
(struct cxl_event_mem_module *)record;
trace_cxl_memory_module(dev, type, rec);
} else {
/* For unknown record types print just the header */
trace_cxl_generic_event(dev, type, record);
}
}
static int cxl_clear_event_record(struct cxl_dev_state *cxlds,
enum cxl_event_log_type log,
struct cxl_get_event_payload *get_pl)
{
struct cxl_mbox_clear_event_payload *payload;
u16 total = le16_to_cpu(get_pl->record_count);
u8 max_handles = CXL_CLEAR_EVENT_MAX_HANDLES;
size_t pl_size = struct_size(payload, handles, max_handles);
struct cxl_mbox_cmd mbox_cmd;
u16 cnt;
int rc = 0;
int i;
/* Payload size may limit the max handles */
if (pl_size > cxlds->payload_size) {
max_handles = (cxlds->payload_size - sizeof(*payload)) /
sizeof(__le16);
pl_size = struct_size(payload, handles, max_handles);
}
payload = kvzalloc(pl_size, GFP_KERNEL);
if (!payload)
return -ENOMEM;
*payload = (struct cxl_mbox_clear_event_payload) {
.event_log = log,
};
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_CLEAR_EVENT_RECORD,
.payload_in = payload,
.size_in = pl_size,
};
/*
* Clear Event Records uses u8 for the handle cnt while Get Event
* Record can return up to 0xffff records.
*/
i = 0;
for (cnt = 0; cnt < total; cnt++) {
payload->handles[i++] = get_pl->records[cnt].hdr.handle;
dev_dbg(cxlds->dev, "Event log '%d': Clearing %u\n",
log, le16_to_cpu(payload->handles[i]));
if (i == max_handles) {
payload->nr_recs = i;
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc)
goto free_pl;
i = 0;
}
}
/* Clear what is left if any */
if (i) {
payload->nr_recs = i;
mbox_cmd.size_in = struct_size(payload, handles, i);
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc)
goto free_pl;
}
free_pl:
kvfree(payload);
return rc;
}
static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
enum cxl_event_log_type type)
{
struct cxl_get_event_payload *payload;
struct cxl_mbox_cmd mbox_cmd;
u8 log_type = type;
u16 nr_rec;
mutex_lock(&cxlds->event.log_lock);
payload = cxlds->event.buf;
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_GET_EVENT_RECORD,
.payload_in = &log_type,
.size_in = sizeof(log_type),
.payload_out = payload,
.size_out = cxlds->payload_size,
.min_out = struct_size(payload, records, 0),
};
do {
int rc, i;
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc) {
dev_err_ratelimited(cxlds->dev,
"Event log '%d': Failed to query event records : %d",
type, rc);
break;
}
nr_rec = le16_to_cpu(payload->record_count);
if (!nr_rec)
break;
for (i = 0; i < nr_rec; i++)
cxl_event_trace_record(cxlds->dev, type,
&payload->records[i]);
if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW)
trace_cxl_overflow(cxlds->dev, type, payload);
rc = cxl_clear_event_record(cxlds, type, payload);
if (rc) {
dev_err_ratelimited(cxlds->dev,
"Event log '%d': Failed to clear events : %d",
type, rc);
break;
}
} while (nr_rec);
mutex_unlock(&cxlds->event.log_lock);
}
/**
* cxl_mem_get_event_records - Get Event Records from the device
* @cxlds: The device data for the operation
* @status: Event Status register value identifying which events are available.
*
* Retrieve all event records available on the device, report them as trace
* events, and clear them.
*
* See CXL rev 3.0 @8.2.9.2.2 Get Event Records
* See CXL rev 3.0 @8.2.9.2.3 Clear Event Records
*/
void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status)
{
dev_dbg(cxlds->dev, "Reading event logs: %x\n", status);
if (status & CXLDEV_EVENT_STATUS_FATAL)
cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FATAL);
if (status & CXLDEV_EVENT_STATUS_FAIL)
cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FAIL);
if (status & CXLDEV_EVENT_STATUS_WARN)
cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_WARN);
if (status & CXLDEV_EVENT_STATUS_INFO)
cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_INFO);
}
EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL);
/**
* cxl_mem_get_partition_info - Get partition info
* @cxlds: The device data for the operation
@ -877,6 +1076,32 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds)
}
EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, CXL);
int cxl_set_timestamp(struct cxl_dev_state *cxlds)
{
struct cxl_mbox_cmd mbox_cmd;
struct cxl_mbox_set_timestamp_in pi;
int rc;
pi.timestamp = cpu_to_le64(ktime_get_real_ns());
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_SET_TIMESTAMP,
.size_in = sizeof(pi),
.payload_in = &pi,
};
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
/*
* Command is optional. Devices may have another way of providing
* a timestamp, or may return all 0s in timestamp fields.
* Don't report an error if this command isn't supported
*/
if (rc && (mbox_cmd.return_code != CXL_MBOX_CMD_RC_UNSUPPORTED))
return rc;
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_set_timestamp, CXL);
struct cxl_dev_state *cxl_dev_state_create(struct device *dev)
{
struct cxl_dev_state *cxlds;
@ -888,6 +1113,7 @@ struct cxl_dev_state *cxl_dev_state_create(struct device *dev)
}
mutex_init(&cxlds->mbox_mutex);
mutex_init(&cxlds->event.log_lock);
cxlds->dev = dev;
return cxlds;

View File

@ -6,8 +6,11 @@
#if !defined(_CXL_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ)
#define _CXL_EVENTS_H
#include <cxl.h>
#include <linux/tracepoint.h>
#include <asm-generic/unaligned.h>
#include <cxl.h>
#include <cxlmem.h>
#define CXL_RAS_UC_CACHE_DATA_PARITY BIT(0)
#define CXL_RAS_UC_CACHE_ADDR_PARITY BIT(1)
@ -103,6 +106,481 @@ TRACE_EVENT(cxl_aer_correctable_error,
)
);
#define cxl_event_log_type_str(type) \
__print_symbolic(type, \
{ CXL_EVENT_TYPE_INFO, "Informational" }, \
{ CXL_EVENT_TYPE_WARN, "Warning" }, \
{ CXL_EVENT_TYPE_FAIL, "Failure" }, \
{ CXL_EVENT_TYPE_FATAL, "Fatal" })
TRACE_EVENT(cxl_overflow,
TP_PROTO(const struct device *dev, enum cxl_event_log_type log,
struct cxl_get_event_payload *payload),
TP_ARGS(dev, log, payload),
TP_STRUCT__entry(
__string(dev_name, dev_name(dev))
__field(int, log)
__field(u64, first_ts)
__field(u64, last_ts)
__field(u16, count)
),
TP_fast_assign(
__assign_str(dev_name, dev_name(dev));
__entry->log = log;
__entry->count = le16_to_cpu(payload->overflow_err_count);
__entry->first_ts = le64_to_cpu(payload->first_overflow_timestamp);
__entry->last_ts = le64_to_cpu(payload->last_overflow_timestamp);
),
TP_printk("%s: log=%s : %u records from %llu to %llu",
__get_str(dev_name), cxl_event_log_type_str(__entry->log),
__entry->count, __entry->first_ts, __entry->last_ts)
);
/*
* Common Event Record Format
* CXL 3.0 section 8.2.9.2.1; Table 8-42
*/
#define CXL_EVENT_RECORD_FLAG_PERMANENT BIT(2)
#define CXL_EVENT_RECORD_FLAG_MAINT_NEEDED BIT(3)
#define CXL_EVENT_RECORD_FLAG_PERF_DEGRADED BIT(4)
#define CXL_EVENT_RECORD_FLAG_HW_REPLACE BIT(5)
#define show_hdr_flags(flags) __print_flags(flags, " | ", \
{ CXL_EVENT_RECORD_FLAG_PERMANENT, "PERMANENT_CONDITION" }, \
{ CXL_EVENT_RECORD_FLAG_MAINT_NEEDED, "MAINTENANCE_NEEDED" }, \
{ CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, "PERFORMANCE_DEGRADED" }, \
{ CXL_EVENT_RECORD_FLAG_HW_REPLACE, "HARDWARE_REPLACEMENT_NEEDED" } \
)
/*
* Define macros for the common header of each CXL event.
*
* Tracepoints using these macros must do 3 things:
*
* 1) Add CXL_EVT_TP_entry to TP_STRUCT__entry
* 2) Use CXL_EVT_TP_fast_assign within TP_fast_assign;
* pass the dev, log, and CXL event header
* 3) Use CXL_EVT_TP_printk() instead of TP_printk()
*
* See the generic_event tracepoint as an example.
*/
#define CXL_EVT_TP_entry \
__string(dev_name, dev_name(dev)) \
__field(int, log) \
__field_struct(uuid_t, hdr_uuid) \
__field(u32, hdr_flags) \
__field(u16, hdr_handle) \
__field(u16, hdr_related_handle) \
__field(u64, hdr_timestamp) \
__field(u8, hdr_length) \
__field(u8, hdr_maint_op_class)
#define CXL_EVT_TP_fast_assign(dev, l, hdr) \
__assign_str(dev_name, dev_name(dev)); \
__entry->log = (l); \
memcpy(&__entry->hdr_uuid, &(hdr).id, sizeof(uuid_t)); \
__entry->hdr_length = (hdr).length; \
__entry->hdr_flags = get_unaligned_le24((hdr).flags); \
__entry->hdr_handle = le16_to_cpu((hdr).handle); \
__entry->hdr_related_handle = le16_to_cpu((hdr).related_handle); \
__entry->hdr_timestamp = le64_to_cpu((hdr).timestamp); \
__entry->hdr_maint_op_class = (hdr).maint_op_class
#define CXL_EVT_TP_printk(fmt, ...) \
TP_printk("%s log=%s : time=%llu uuid=%pUb len=%d flags='%s' " \
"handle=%x related_handle=%x maint_op_class=%u" \
" : " fmt, \
__get_str(dev_name), cxl_event_log_type_str(__entry->log), \
__entry->hdr_timestamp, &__entry->hdr_uuid, __entry->hdr_length,\
show_hdr_flags(__entry->hdr_flags), __entry->hdr_handle, \
__entry->hdr_related_handle, __entry->hdr_maint_op_class, \
##__VA_ARGS__)
TRACE_EVENT(cxl_generic_event,
TP_PROTO(const struct device *dev, enum cxl_event_log_type log,
struct cxl_event_record_raw *rec),
TP_ARGS(dev, log, rec),
TP_STRUCT__entry(
CXL_EVT_TP_entry
__array(u8, data, CXL_EVENT_RECORD_DATA_LENGTH)
),
TP_fast_assign(
CXL_EVT_TP_fast_assign(dev, log, rec->hdr);
memcpy(__entry->data, &rec->data, CXL_EVENT_RECORD_DATA_LENGTH);
),
CXL_EVT_TP_printk("%s",
__print_hex(__entry->data, CXL_EVENT_RECORD_DATA_LENGTH))
);
/*
* Physical Address field masks
*
* General Media Event Record
* CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
*
* DRAM Event Record
* CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
*/
#define CXL_DPA_FLAGS_MASK 0x3F
#define CXL_DPA_MASK (~CXL_DPA_FLAGS_MASK)
#define CXL_DPA_VOLATILE BIT(0)
#define CXL_DPA_NOT_REPAIRABLE BIT(1)
#define show_dpa_flags(flags) __print_flags(flags, "|", \
{ CXL_DPA_VOLATILE, "VOLATILE" }, \
{ CXL_DPA_NOT_REPAIRABLE, "NOT_REPAIRABLE" } \
)
/*
* General Media Event Record - GMER
* CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
*/
#define CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT BIT(0)
#define CXL_GMER_EVT_DESC_THRESHOLD_EVENT BIT(1)
#define CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW BIT(2)
#define show_event_desc_flags(flags) __print_flags(flags, "|", \
{ CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, "UNCORRECTABLE_EVENT" }, \
{ CXL_GMER_EVT_DESC_THRESHOLD_EVENT, "THRESHOLD_EVENT" }, \
{ CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW, "POISON_LIST_OVERFLOW" } \
)
#define CXL_GMER_MEM_EVT_TYPE_ECC_ERROR 0x00
#define CXL_GMER_MEM_EVT_TYPE_INV_ADDR 0x01
#define CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR 0x02
#define show_mem_event_type(type) __print_symbolic(type, \
{ CXL_GMER_MEM_EVT_TYPE_ECC_ERROR, "ECC Error" }, \
{ CXL_GMER_MEM_EVT_TYPE_INV_ADDR, "Invalid Address" }, \
{ CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" } \
)
#define CXL_GMER_TRANS_UNKNOWN 0x00
#define CXL_GMER_TRANS_HOST_READ 0x01
#define CXL_GMER_TRANS_HOST_WRITE 0x02
#define CXL_GMER_TRANS_HOST_SCAN_MEDIA 0x03
#define CXL_GMER_TRANS_HOST_INJECT_POISON 0x04
#define CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB 0x05
#define CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT 0x06
#define show_trans_type(type) __print_symbolic(type, \
{ CXL_GMER_TRANS_UNKNOWN, "Unknown" }, \
{ CXL_GMER_TRANS_HOST_READ, "Host Read" }, \
{ CXL_GMER_TRANS_HOST_WRITE, "Host Write" }, \
{ CXL_GMER_TRANS_HOST_SCAN_MEDIA, "Host Scan Media" }, \
{ CXL_GMER_TRANS_HOST_INJECT_POISON, "Host Inject Poison" }, \
{ CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB, "Internal Media Scrub" }, \
{ CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT, "Internal Media Management" } \
)
#define CXL_GMER_VALID_CHANNEL BIT(0)
#define CXL_GMER_VALID_RANK BIT(1)
#define CXL_GMER_VALID_DEVICE BIT(2)
#define CXL_GMER_VALID_COMPONENT BIT(3)
#define show_valid_flags(flags) __print_flags(flags, "|", \
{ CXL_GMER_VALID_CHANNEL, "CHANNEL" }, \
{ CXL_GMER_VALID_RANK, "RANK" }, \
{ CXL_GMER_VALID_DEVICE, "DEVICE" }, \
{ CXL_GMER_VALID_COMPONENT, "COMPONENT" } \
)
TRACE_EVENT(cxl_general_media,
TP_PROTO(const struct device *dev, enum cxl_event_log_type log,
struct cxl_event_gen_media *rec),
TP_ARGS(dev, log, rec),
TP_STRUCT__entry(
CXL_EVT_TP_entry
/* General Media */
__field(u64, dpa)
__field(u8, descriptor)
__field(u8, type)
__field(u8, transaction_type)
__field(u8, channel)
__field(u32, device)
__array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE)
__field(u16, validity_flags)
/* Following are out of order to pack trace record */
__field(u8, rank)
__field(u8, dpa_flags)
),
TP_fast_assign(
CXL_EVT_TP_fast_assign(dev, log, rec->hdr);
/* General Media */
__entry->dpa = le64_to_cpu(rec->phys_addr);
__entry->dpa_flags = __entry->dpa & CXL_DPA_FLAGS_MASK;
/* Mask after flags have been parsed */
__entry->dpa &= CXL_DPA_MASK;
__entry->descriptor = rec->descriptor;
__entry->type = rec->type;
__entry->transaction_type = rec->transaction_type;
__entry->channel = rec->channel;
__entry->rank = rec->rank;
__entry->device = get_unaligned_le24(rec->device);
memcpy(__entry->comp_id, &rec->component_id,
CXL_EVENT_GEN_MED_COMP_ID_SIZE);
__entry->validity_flags = get_unaligned_le16(&rec->validity_flags);
),
CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' " \
"descriptor='%s' type='%s' transaction_type='%s' channel=%u rank=%u " \
"device=%x comp_id=%s validity_flags='%s'",
__entry->dpa, show_dpa_flags(__entry->dpa_flags),
show_event_desc_flags(__entry->descriptor),
show_mem_event_type(__entry->type),
show_trans_type(__entry->transaction_type),
__entry->channel, __entry->rank, __entry->device,
__print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE),
show_valid_flags(__entry->validity_flags)
)
);
/*
* DRAM Event Record - DER
*
* CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
*/
/*
* DRAM Event Record defines many fields the same as the General Media Event
* Record. Reuse those definitions as appropriate.
*/
#define CXL_DER_VALID_CHANNEL BIT(0)
#define CXL_DER_VALID_RANK BIT(1)
#define CXL_DER_VALID_NIBBLE BIT(2)
#define CXL_DER_VALID_BANK_GROUP BIT(3)
#define CXL_DER_VALID_BANK BIT(4)
#define CXL_DER_VALID_ROW BIT(5)
#define CXL_DER_VALID_COLUMN BIT(6)
#define CXL_DER_VALID_CORRECTION_MASK BIT(7)
#define show_dram_valid_flags(flags) __print_flags(flags, "|", \
{ CXL_DER_VALID_CHANNEL, "CHANNEL" }, \
{ CXL_DER_VALID_RANK, "RANK" }, \
{ CXL_DER_VALID_NIBBLE, "NIBBLE" }, \
{ CXL_DER_VALID_BANK_GROUP, "BANK GROUP" }, \
{ CXL_DER_VALID_BANK, "BANK" }, \
{ CXL_DER_VALID_ROW, "ROW" }, \
{ CXL_DER_VALID_COLUMN, "COLUMN" }, \
{ CXL_DER_VALID_CORRECTION_MASK, "CORRECTION MASK" } \
)
TRACE_EVENT(cxl_dram,
TP_PROTO(const struct device *dev, enum cxl_event_log_type log,
struct cxl_event_dram *rec),
TP_ARGS(dev, log, rec),
TP_STRUCT__entry(
CXL_EVT_TP_entry
/* DRAM */
__field(u64, dpa)
__field(u8, descriptor)
__field(u8, type)
__field(u8, transaction_type)
__field(u8, channel)
__field(u16, validity_flags)
__field(u16, column) /* Out of order to pack trace record */
__field(u32, nibble_mask)
__field(u32, row)
__array(u8, cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE)
__field(u8, rank) /* Out of order to pack trace record */
__field(u8, bank_group) /* Out of order to pack trace record */
__field(u8, bank) /* Out of order to pack trace record */
__field(u8, dpa_flags) /* Out of order to pack trace record */
),
TP_fast_assign(
CXL_EVT_TP_fast_assign(dev, log, rec->hdr);
/* DRAM */
__entry->dpa = le64_to_cpu(rec->phys_addr);
__entry->dpa_flags = __entry->dpa & CXL_DPA_FLAGS_MASK;
__entry->dpa &= CXL_DPA_MASK;
__entry->descriptor = rec->descriptor;
__entry->type = rec->type;
__entry->transaction_type = rec->transaction_type;
__entry->validity_flags = get_unaligned_le16(rec->validity_flags);
__entry->channel = rec->channel;
__entry->rank = rec->rank;
__entry->nibble_mask = get_unaligned_le24(rec->nibble_mask);
__entry->bank_group = rec->bank_group;
__entry->bank = rec->bank;
__entry->row = get_unaligned_le24(rec->row);
__entry->column = get_unaligned_le16(rec->column);
memcpy(__entry->cor_mask, &rec->correction_mask,
CXL_EVENT_DER_CORRECTION_MASK_SIZE);
),
CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' descriptor='%s' type='%s' " \
"transaction_type='%s' channel=%u rank=%u nibble_mask=%x " \
"bank_group=%u bank=%u row=%u column=%u cor_mask=%s " \
"validity_flags='%s'",
__entry->dpa, show_dpa_flags(__entry->dpa_flags),
show_event_desc_flags(__entry->descriptor),
show_mem_event_type(__entry->type),
show_trans_type(__entry->transaction_type),
__entry->channel, __entry->rank, __entry->nibble_mask,
__entry->bank_group, __entry->bank,
__entry->row, __entry->column,
__print_hex(__entry->cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE),
show_dram_valid_flags(__entry->validity_flags)
)
);
/*
* Memory Module Event Record - MMER
*
* CXL res 3.0 section 8.2.9.2.1.3; Table 8-45
*/
#define CXL_MMER_HEALTH_STATUS_CHANGE 0x00
#define CXL_MMER_MEDIA_STATUS_CHANGE 0x01
#define CXL_MMER_LIFE_USED_CHANGE 0x02
#define CXL_MMER_TEMP_CHANGE 0x03
#define CXL_MMER_DATA_PATH_ERROR 0x04
#define CXL_MMER_LSA_ERROR 0x05
#define show_dev_evt_type(type) __print_symbolic(type, \
{ CXL_MMER_HEALTH_STATUS_CHANGE, "Health Status Change" }, \
{ CXL_MMER_MEDIA_STATUS_CHANGE, "Media Status Change" }, \
{ CXL_MMER_LIFE_USED_CHANGE, "Life Used Change" }, \
{ CXL_MMER_TEMP_CHANGE, "Temperature Change" }, \
{ CXL_MMER_DATA_PATH_ERROR, "Data Path Error" }, \
{ CXL_MMER_LSA_ERROR, "LSA Error" } \
)
/*
* Device Health Information - DHI
*
* CXL res 3.0 section 8.2.9.8.3.1; Table 8-100
*/
#define CXL_DHI_HS_MAINTENANCE_NEEDED BIT(0)
#define CXL_DHI_HS_PERFORMANCE_DEGRADED BIT(1)
#define CXL_DHI_HS_HW_REPLACEMENT_NEEDED BIT(2)
#define show_health_status_flags(flags) __print_flags(flags, "|", \
{ CXL_DHI_HS_MAINTENANCE_NEEDED, "MAINTENANCE_NEEDED" }, \
{ CXL_DHI_HS_PERFORMANCE_DEGRADED, "PERFORMANCE_DEGRADED" }, \
{ CXL_DHI_HS_HW_REPLACEMENT_NEEDED, "REPLACEMENT_NEEDED" } \
)
#define CXL_DHI_MS_NORMAL 0x00
#define CXL_DHI_MS_NOT_READY 0x01
#define CXL_DHI_MS_WRITE_PERSISTENCY_LOST 0x02
#define CXL_DHI_MS_ALL_DATA_LOST 0x03
#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_POWER_LOSS 0x04
#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_SHUTDOWN 0x05
#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_IMMINENT 0x06
#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_POWER_LOSS 0x07
#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_SHUTDOWN 0x08
#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_IMMINENT 0x09
#define show_media_status(ms) __print_symbolic(ms, \
{ CXL_DHI_MS_NORMAL, \
"Normal" }, \
{ CXL_DHI_MS_NOT_READY, \
"Not Ready" }, \
{ CXL_DHI_MS_WRITE_PERSISTENCY_LOST, \
"Write Persistency Lost" }, \
{ CXL_DHI_MS_ALL_DATA_LOST, \
"All Data Lost" }, \
{ CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_POWER_LOSS, \
"Write Persistency Loss in the Event of Power Loss" }, \
{ CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_SHUTDOWN, \
"Write Persistency Loss in Event of Shutdown" }, \
{ CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_IMMINENT, \
"Write Persistency Loss Imminent" }, \
{ CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_POWER_LOSS, \
"All Data Loss in Event of Power Loss" }, \
{ CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_SHUTDOWN, \
"All Data loss in the Event of Shutdown" }, \
{ CXL_DHI_MS_WRITE_ALL_DATA_LOSS_IMMINENT, \
"All Data Loss Imminent" } \
)
#define CXL_DHI_AS_NORMAL 0x0
#define CXL_DHI_AS_WARNING 0x1
#define CXL_DHI_AS_CRITICAL 0x2
#define show_two_bit_status(as) __print_symbolic(as, \
{ CXL_DHI_AS_NORMAL, "Normal" }, \
{ CXL_DHI_AS_WARNING, "Warning" }, \
{ CXL_DHI_AS_CRITICAL, "Critical" } \
)
#define show_one_bit_status(as) __print_symbolic(as, \
{ CXL_DHI_AS_NORMAL, "Normal" }, \
{ CXL_DHI_AS_WARNING, "Warning" } \
)
#define CXL_DHI_AS_LIFE_USED(as) (as & 0x3)
#define CXL_DHI_AS_DEV_TEMP(as) ((as & 0xC) >> 2)
#define CXL_DHI_AS_COR_VOL_ERR_CNT(as) ((as & 0x10) >> 4)
#define CXL_DHI_AS_COR_PER_ERR_CNT(as) ((as & 0x20) >> 5)
TRACE_EVENT(cxl_memory_module,
TP_PROTO(const struct device *dev, enum cxl_event_log_type log,
struct cxl_event_mem_module *rec),
TP_ARGS(dev, log, rec),
TP_STRUCT__entry(
CXL_EVT_TP_entry
/* Memory Module Event */
__field(u8, event_type)
/* Device Health Info */
__field(u8, health_status)
__field(u8, media_status)
__field(u8, life_used)
__field(u32, dirty_shutdown_cnt)
__field(u32, cor_vol_err_cnt)
__field(u32, cor_per_err_cnt)
__field(s16, device_temp)
__field(u8, add_status)
),
TP_fast_assign(
CXL_EVT_TP_fast_assign(dev, log, rec->hdr);
/* Memory Module Event */
__entry->event_type = rec->event_type;
/* Device Health Info */
__entry->health_status = rec->info.health_status;
__entry->media_status = rec->info.media_status;
__entry->life_used = rec->info.life_used;
__entry->dirty_shutdown_cnt = get_unaligned_le32(rec->info.dirty_shutdown_cnt);
__entry->cor_vol_err_cnt = get_unaligned_le32(rec->info.cor_vol_err_cnt);
__entry->cor_per_err_cnt = get_unaligned_le32(rec->info.cor_per_err_cnt);
__entry->device_temp = get_unaligned_le16(rec->info.device_temp);
__entry->add_status = rec->info.add_status;
),
CXL_EVT_TP_printk("event_type='%s' health_status='%s' media_status='%s' " \
"as_life_used=%s as_dev_temp=%s as_cor_vol_err_cnt=%s " \
"as_cor_per_err_cnt=%s life_used=%u device_temp=%d " \
"dirty_shutdown_cnt=%u cor_vol_err_cnt=%u cor_per_err_cnt=%u",
show_dev_evt_type(__entry->event_type),
show_health_status_flags(__entry->health_status),
show_media_status(__entry->media_status),
show_two_bit_status(CXL_DHI_AS_LIFE_USED(__entry->add_status)),
show_two_bit_status(CXL_DHI_AS_DEV_TEMP(__entry->add_status)),
show_one_bit_status(CXL_DHI_AS_COR_VOL_ERR_CNT(__entry->add_status)),
show_one_bit_status(CXL_DHI_AS_COR_PER_ERR_CNT(__entry->add_status)),
__entry->life_used, __entry->device_temp,
__entry->dirty_shutdown_cnt, __entry->cor_vol_err_cnt,
__entry->cor_per_err_cnt
)
);
#endif /* _CXL_EVENTS_H */
#define TRACE_INCLUDE_FILE trace

View File

@ -156,6 +156,22 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw)
#define CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX 0x3
#define CXLDEV_CAP_CAP_ID_MEMDEV 0x4000
/* CXL 3.0 8.2.8.3.1 Event Status Register */
#define CXLDEV_DEV_EVENT_STATUS_OFFSET 0x00
#define CXLDEV_EVENT_STATUS_INFO BIT(0)
#define CXLDEV_EVENT_STATUS_WARN BIT(1)
#define CXLDEV_EVENT_STATUS_FAIL BIT(2)
#define CXLDEV_EVENT_STATUS_FATAL BIT(3)
#define CXLDEV_EVENT_STATUS_ALL (CXLDEV_EVENT_STATUS_INFO | \
CXLDEV_EVENT_STATUS_WARN | \
CXLDEV_EVENT_STATUS_FAIL | \
CXLDEV_EVENT_STATUS_FATAL)
/* CXL rev 3.0 section 8.2.9.2.4; Table 8-52 */
#define CXLDEV_EVENT_INT_MODE_MASK GENMASK(1, 0)
#define CXLDEV_EVENT_INT_MSGNUM_MASK GENMASK(7, 4)
/* CXL 2.0 8.2.8.4 Mailbox Registers */
#define CXLDEV_MBOX_CAPS_OFFSET 0x00
#define CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK GENMASK(4, 0)

View File

@ -4,6 +4,7 @@
#define __CXL_MEM_H__
#include <uapi/linux/cxl_mem.h>
#include <linux/cdev.h>
#include <linux/uuid.h>
#include "cxl.h"
/* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */
@ -193,6 +194,34 @@ struct cxl_endpoint_dvsec_info {
struct range dvsec_range[2];
};
/**
* Event Interrupt Policy
*
* CXL rev 3.0 section 8.2.9.2.4; Table 8-52
*/
enum cxl_event_int_mode {
CXL_INT_NONE = 0x00,
CXL_INT_MSI_MSIX = 0x01,
CXL_INT_FW = 0x02
};
struct cxl_event_interrupt_policy {
u8 info_settings;
u8 warn_settings;
u8 failure_settings;
u8 fatal_settings;
} __packed;
/**
* struct cxl_event_state - Event log driver state
*
* @event_buf: Buffer to receive event data
* @event_log_lock: Serialize event_buf and log use
*/
struct cxl_event_state {
struct cxl_get_event_payload *buf;
struct mutex log_lock;
};
/**
* struct cxl_dev_state - The driver device state
*
@ -266,14 +295,21 @@ struct cxl_dev_state {
struct xarray doe_mbs;
struct cxl_event_state event;
int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd);
};
enum cxl_opcode {
CXL_MBOX_OP_INVALID = 0x0000,
CXL_MBOX_OP_RAW = CXL_MBOX_OP_INVALID,
CXL_MBOX_OP_GET_EVENT_RECORD = 0x0100,
CXL_MBOX_OP_CLEAR_EVENT_RECORD = 0x0101,
CXL_MBOX_OP_GET_EVT_INT_POLICY = 0x0102,
CXL_MBOX_OP_SET_EVT_INT_POLICY = 0x0103,
CXL_MBOX_OP_GET_FW_INFO = 0x0200,
CXL_MBOX_OP_ACTIVATE_FW = 0x0202,
CXL_MBOX_OP_SET_TIMESTAMP = 0x0301,
CXL_MBOX_OP_GET_SUPPORTED_LOGS = 0x0400,
CXL_MBOX_OP_GET_LOG = 0x0401,
CXL_MBOX_OP_IDENTIFY = 0x4000,
@ -347,6 +383,136 @@ struct cxl_mbox_identify {
u8 qos_telemetry_caps;
} __packed;
/*
* Common Event Record Format
* CXL rev 3.0 section 8.2.9.2.1; Table 8-42
*/
struct cxl_event_record_hdr {
uuid_t id;
u8 length;
u8 flags[3];
__le16 handle;
__le16 related_handle;
__le64 timestamp;
u8 maint_op_class;
u8 reserved[15];
} __packed;
#define CXL_EVENT_RECORD_DATA_LENGTH 0x50
struct cxl_event_record_raw {
struct cxl_event_record_hdr hdr;
u8 data[CXL_EVENT_RECORD_DATA_LENGTH];
} __packed;
/*
* Get Event Records output payload
* CXL rev 3.0 section 8.2.9.2.2; Table 8-50
*/
#define CXL_GET_EVENT_FLAG_OVERFLOW BIT(0)
#define CXL_GET_EVENT_FLAG_MORE_RECORDS BIT(1)
struct cxl_get_event_payload {
u8 flags;
u8 reserved1;
__le16 overflow_err_count;
__le64 first_overflow_timestamp;
__le64 last_overflow_timestamp;
__le16 record_count;
u8 reserved2[10];
struct cxl_event_record_raw records[];
} __packed;
/*
* CXL rev 3.0 section 8.2.9.2.2; Table 8-49
*/
enum cxl_event_log_type {
CXL_EVENT_TYPE_INFO = 0x00,
CXL_EVENT_TYPE_WARN,
CXL_EVENT_TYPE_FAIL,
CXL_EVENT_TYPE_FATAL,
CXL_EVENT_TYPE_MAX
};
/*
* Clear Event Records input payload
* CXL rev 3.0 section 8.2.9.2.3; Table 8-51
*/
struct cxl_mbox_clear_event_payload {
u8 event_log; /* enum cxl_event_log_type */
u8 clear_flags;
u8 nr_recs;
u8 reserved[3];
__le16 handles[];
} __packed;
#define CXL_CLEAR_EVENT_MAX_HANDLES U8_MAX
/*
* General Media Event Record
* CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
*/
#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10
struct cxl_event_gen_media {
struct cxl_event_record_hdr hdr;
__le64 phys_addr;
u8 descriptor;
u8 type;
u8 transaction_type;
u8 validity_flags[2];
u8 channel;
u8 rank;
u8 device[3];
u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE];
u8 reserved[46];
} __packed;
/*
* DRAM Event Record - DER
* CXL rev 3.0 section 8.2.9.2.1.2; Table 3-44
*/
#define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20
struct cxl_event_dram {
struct cxl_event_record_hdr hdr;
__le64 phys_addr;
u8 descriptor;
u8 type;
u8 transaction_type;
u8 validity_flags[2];
u8 channel;
u8 rank;
u8 nibble_mask[3];
u8 bank_group;
u8 bank;
u8 row[3];
u8 column[2];
u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE];
u8 reserved[0x17];
} __packed;
/*
* Get Health Info Record
* CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100
*/
struct cxl_get_health_info {
u8 health_status;
u8 media_status;
u8 add_status;
u8 life_used;
u8 device_temp[2];
u8 dirty_shutdown_cnt[4];
u8 cor_vol_err_cnt[4];
u8 cor_per_err_cnt[4];
} __packed;
/*
* Memory Module Event Record
* CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
*/
struct cxl_event_mem_module {
struct cxl_event_record_hdr hdr;
u8 event_type;
struct cxl_get_health_info info;
u8 reserved[0x3d];
} __packed;
struct cxl_mbox_get_partition_info {
__le64 active_volatile_cap;
__le64 active_persistent_cap;
@ -372,6 +538,12 @@ struct cxl_mbox_set_partition_info {
#define CXL_SET_PARTITION_IMMEDIATE_FLAG BIT(0)
/* Set Timestamp CXL 3.0 Spec 8.2.9.4.2 */
struct cxl_mbox_set_timestamp_in {
__le64 timestamp;
} __packed;
/**
* struct cxl_mem_command - Driver representation of a memory device command
* @info: Command information as it exists for the UAPI
@ -441,6 +613,9 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds);
struct cxl_dev_state *cxl_dev_state_create(struct device *dev);
void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status);
int cxl_set_timestamp(struct cxl_dev_state *cxlds);
#ifdef CONFIG_CXL_SUSPEND
void cxl_mem_active_inc(void);
void cxl_mem_active_dec(void);

View File

@ -53,6 +53,12 @@
#define CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK GENMASK(15, 8)
#define CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK GENMASK(31, 16)
/*
* NOTE: Currently all the functions which are enabled for CXL require their
* vectors to be in the first 16. Use this as the default max.
*/
#define CXL_PCI_DEFAULT_MAX_VECTORS 16
/* Register Block Identifier (RBI) */
enum cxl_regloc_type {
CXL_REGLOC_RBI_EMPTY = 0,

View File

@ -417,8 +417,234 @@ static void disable_aer(void *pdev)
pci_disable_pcie_error_reporting(pdev);
}
static void free_event_buf(void *buf)
{
kvfree(buf);
}
/*
* There is a single buffer for reading event logs from the mailbox. All logs
* share this buffer protected by the cxlds->event_log_lock.
*/
static int cxl_mem_alloc_event_buf(struct cxl_dev_state *cxlds)
{
struct cxl_get_event_payload *buf;
buf = kvmalloc(cxlds->payload_size, GFP_KERNEL);
if (!buf)
return -ENOMEM;
cxlds->event.buf = buf;
return devm_add_action_or_reset(cxlds->dev, free_event_buf, buf);
}
static int cxl_alloc_irq_vectors(struct pci_dev *pdev)
{
int nvecs;
/*
* Per CXL 3.0 3.1.1 CXL.io Endpoint a function on a CXL device must
* not generate INTx messages if that function participates in
* CXL.cache or CXL.mem.
*
* Additionally pci_alloc_irq_vectors() handles calling
* pci_free_irq_vectors() automatically despite not being called
* pcim_*. See pci_setup_msi_context().
*/
nvecs = pci_alloc_irq_vectors(pdev, 1, CXL_PCI_DEFAULT_MAX_VECTORS,
PCI_IRQ_MSIX | PCI_IRQ_MSI);
if (nvecs < 1) {
dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs);
return -ENXIO;
}
return 0;
}
struct cxl_dev_id {
struct cxl_dev_state *cxlds;
};
static irqreturn_t cxl_event_thread(int irq, void *id)
{
struct cxl_dev_id *dev_id = id;
struct cxl_dev_state *cxlds = dev_id->cxlds;
u32 status;
do {
/*
* CXL 3.0 8.2.8.3.1: The lower 32 bits are the status;
* ignore the reserved upper 32 bits
*/
status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET);
/* Ignore logs unknown to the driver */
status &= CXLDEV_EVENT_STATUS_ALL;
if (!status)
break;
cxl_mem_get_event_records(cxlds, status);
cond_resched();
} while (status);
return IRQ_HANDLED;
}
static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting)
{
struct device *dev = cxlds->dev;
struct pci_dev *pdev = to_pci_dev(dev);
struct cxl_dev_id *dev_id;
int irq;
if (FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting) != CXL_INT_MSI_MSIX)
return -ENXIO;
/* dev_id must be globally unique and must contain the cxlds */
dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL);
if (!dev_id)
return -ENOMEM;
dev_id->cxlds = cxlds;
irq = pci_irq_vector(pdev,
FIELD_GET(CXLDEV_EVENT_INT_MSGNUM_MASK, setting));
if (irq < 0)
return irq;
return devm_request_threaded_irq(dev, irq, NULL, cxl_event_thread,
IRQF_SHARED | IRQF_ONESHOT, NULL,
dev_id);
}
static int cxl_event_get_int_policy(struct cxl_dev_state *cxlds,
struct cxl_event_interrupt_policy *policy)
{
struct cxl_mbox_cmd mbox_cmd = {
.opcode = CXL_MBOX_OP_GET_EVT_INT_POLICY,
.payload_out = policy,
.size_out = sizeof(*policy),
};
int rc;
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc < 0)
dev_err(cxlds->dev, "Failed to get event interrupt policy : %d",
rc);
return rc;
}
static int cxl_event_config_msgnums(struct cxl_dev_state *cxlds,
struct cxl_event_interrupt_policy *policy)
{
struct cxl_mbox_cmd mbox_cmd;
int rc;
*policy = (struct cxl_event_interrupt_policy) {
.info_settings = CXL_INT_MSI_MSIX,
.warn_settings = CXL_INT_MSI_MSIX,
.failure_settings = CXL_INT_MSI_MSIX,
.fatal_settings = CXL_INT_MSI_MSIX,
};
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_SET_EVT_INT_POLICY,
.payload_in = policy,
.size_in = sizeof(*policy),
};
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc < 0) {
dev_err(cxlds->dev, "Failed to set event interrupt policy : %d",
rc);
return rc;
}
/* Retrieve final interrupt settings */
return cxl_event_get_int_policy(cxlds, policy);
}
static int cxl_event_irqsetup(struct cxl_dev_state *cxlds)
{
struct cxl_event_interrupt_policy policy;
int rc;
rc = cxl_event_config_msgnums(cxlds, &policy);
if (rc)
return rc;
rc = cxl_event_req_irq(cxlds, policy.info_settings);
if (rc) {
dev_err(cxlds->dev, "Failed to get interrupt for event Info log\n");
return rc;
}
rc = cxl_event_req_irq(cxlds, policy.warn_settings);
if (rc) {
dev_err(cxlds->dev, "Failed to get interrupt for event Warn log\n");
return rc;
}
rc = cxl_event_req_irq(cxlds, policy.failure_settings);
if (rc) {
dev_err(cxlds->dev, "Failed to get interrupt for event Failure log\n");
return rc;
}
rc = cxl_event_req_irq(cxlds, policy.fatal_settings);
if (rc) {
dev_err(cxlds->dev, "Failed to get interrupt for event Fatal log\n");
return rc;
}
return 0;
}
static bool cxl_event_int_is_fw(u8 setting)
{
u8 mode = FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting);
return mode == CXL_INT_FW;
}
static int cxl_event_config(struct pci_host_bridge *host_bridge,
struct cxl_dev_state *cxlds)
{
struct cxl_event_interrupt_policy policy;
int rc;
/*
* When BIOS maintains CXL error reporting control, it will process
* event records. Only one agent can do so.
*/
if (!host_bridge->native_cxl_error)
return 0;
rc = cxl_mem_alloc_event_buf(cxlds);
if (rc)
return rc;
rc = cxl_event_get_int_policy(cxlds, &policy);
if (rc)
return rc;
if (cxl_event_int_is_fw(policy.info_settings) ||
cxl_event_int_is_fw(policy.warn_settings) ||
cxl_event_int_is_fw(policy.failure_settings) ||
cxl_event_int_is_fw(policy.fatal_settings)) {
dev_err(cxlds->dev, "FW still in control of Event Logs despite _OSC settings\n");
return -EBUSY;
}
rc = cxl_event_irqsetup(cxlds);
if (rc)
return rc;
cxl_mem_get_event_records(cxlds, CXLDEV_EVENT_STATUS_ALL);
return 0;
}
static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus);
struct cxl_register_map map;
struct cxl_memdev *cxlmd;
struct cxl_dev_state *cxlds;
@ -434,6 +660,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
rc = pcim_enable_device(pdev);
if (rc)
return rc;
pci_set_master(pdev);
cxlds = cxl_dev_state_create(&pdev->dev);
if (IS_ERR(cxlds))
@ -482,6 +709,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
rc = cxl_set_timestamp(cxlds);
if (rc)
return rc;
rc = cxl_dev_state_identify(cxlds);
if (rc)
return rc;
@ -490,10 +721,18 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
rc = cxl_alloc_irq_vectors(pdev);
if (rc)
return rc;
cxlmd = devm_cxl_add_memdev(cxlds);
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
rc = cxl_event_config(host_bridge, cxlds);
if (rc)
return rc;
if (cxlds->regs.ras) {
pci_enable_pcie_error_reporting(pdev);
rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev);

View File

@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
ccflags-y := -I$(srctree)/drivers/cxl/
ccflags-y := -I$(srctree)/drivers/cxl/ -I$(srctree)/drivers/cxl/core
obj-m += cxl_test.o
obj-m += cxl_mock.o

View File

@ -9,6 +9,8 @@
#include <linux/bits.h>
#include <cxlmem.h>
#include "trace.h"
#define LSA_SIZE SZ_128K
#define DEV_SIZE SZ_2G
#define EFFECT(x) (1U << x)
@ -67,6 +69,26 @@ static struct {
#define PASS_TRY_LIMIT 3
#define CXL_TEST_EVENT_CNT_MAX 15
/* Set a number of events to return at a time for simulation. */
#define CXL_TEST_EVENT_CNT 3
struct mock_event_log {
u16 clear_idx;
u16 cur_idx;
u16 nr_events;
u16 nr_overflow;
u16 overflow_reset;
struct cxl_event_record_raw *events[CXL_TEST_EVENT_CNT_MAX];
};
struct mock_event_store {
struct cxl_dev_state *cxlds;
struct mock_event_log mock_logs[CXL_EVENT_TYPE_MAX];
u32 ev_status;
};
struct cxl_mockmem_data {
void *lsa;
u32 security_state;
@ -74,9 +96,317 @@ struct cxl_mockmem_data {
u8 master_pass[NVDIMM_PASSPHRASE_LEN];
int user_limit;
int master_limit;
struct mock_event_store mes;
u8 event_buf[SZ_4K];
};
static struct mock_event_log *event_find_log(struct device *dev, int log_type)
{
struct cxl_mockmem_data *mdata = dev_get_drvdata(dev);
if (log_type >= CXL_EVENT_TYPE_MAX)
return NULL;
return &mdata->mes.mock_logs[log_type];
}
static struct cxl_event_record_raw *event_get_current(struct mock_event_log *log)
{
return log->events[log->cur_idx];
}
static void event_reset_log(struct mock_event_log *log)
{
log->cur_idx = 0;
log->clear_idx = 0;
log->nr_overflow = log->overflow_reset;
}
/* Handle can never be 0 use 1 based indexing for handle */
static u16 event_get_clear_handle(struct mock_event_log *log)
{
return log->clear_idx + 1;
}
/* Handle can never be 0 use 1 based indexing for handle */
static __le16 event_get_cur_event_handle(struct mock_event_log *log)
{
u16 cur_handle = log->cur_idx + 1;
return cpu_to_le16(cur_handle);
}
static bool event_log_empty(struct mock_event_log *log)
{
return log->cur_idx == log->nr_events;
}
static void mes_add_event(struct mock_event_store *mes,
enum cxl_event_log_type log_type,
struct cxl_event_record_raw *event)
{
struct mock_event_log *log;
if (WARN_ON(log_type >= CXL_EVENT_TYPE_MAX))
return;
log = &mes->mock_logs[log_type];
if ((log->nr_events + 1) > CXL_TEST_EVENT_CNT_MAX) {
log->nr_overflow++;
log->overflow_reset = log->nr_overflow;
return;
}
log->events[log->nr_events] = event;
log->nr_events++;
}
static int mock_get_event(struct cxl_dev_state *cxlds,
struct cxl_mbox_cmd *cmd)
{
struct cxl_get_event_payload *pl;
struct mock_event_log *log;
u16 nr_overflow;
u8 log_type;
int i;
if (cmd->size_in != sizeof(log_type))
return -EINVAL;
if (cmd->size_out < struct_size(pl, records, CXL_TEST_EVENT_CNT))
return -EINVAL;
log_type = *((u8 *)cmd->payload_in);
if (log_type >= CXL_EVENT_TYPE_MAX)
return -EINVAL;
memset(cmd->payload_out, 0, cmd->size_out);
log = event_find_log(cxlds->dev, log_type);
if (!log || event_log_empty(log))
return 0;
pl = cmd->payload_out;
for (i = 0; i < CXL_TEST_EVENT_CNT && !event_log_empty(log); i++) {
memcpy(&pl->records[i], event_get_current(log),
sizeof(pl->records[i]));
pl->records[i].hdr.handle = event_get_cur_event_handle(log);
log->cur_idx++;
}
pl->record_count = cpu_to_le16(i);
if (!event_log_empty(log))
pl->flags |= CXL_GET_EVENT_FLAG_MORE_RECORDS;
if (log->nr_overflow) {
u64 ns;
pl->flags |= CXL_GET_EVENT_FLAG_OVERFLOW;
pl->overflow_err_count = cpu_to_le16(nr_overflow);
ns = ktime_get_real_ns();
ns -= 5000000000; /* 5s ago */
pl->first_overflow_timestamp = cpu_to_le64(ns);
ns = ktime_get_real_ns();
ns -= 1000000000; /* 1s ago */
pl->last_overflow_timestamp = cpu_to_le64(ns);
}
return 0;
}
static int mock_clear_event(struct cxl_dev_state *cxlds,
struct cxl_mbox_cmd *cmd)
{
struct cxl_mbox_clear_event_payload *pl = cmd->payload_in;
struct mock_event_log *log;
u8 log_type = pl->event_log;
u16 handle;
int nr;
if (log_type >= CXL_EVENT_TYPE_MAX)
return -EINVAL;
log = event_find_log(cxlds->dev, log_type);
if (!log)
return 0; /* No mock data in this log */
/*
* This check is technically not invalid per the specification AFAICS.
* (The host could 'guess' handles and clear them in order).
* However, this is not good behavior for the host so test it.
*/
if (log->clear_idx + pl->nr_recs > log->cur_idx) {
dev_err(cxlds->dev,
"Attempting to clear more events than returned!\n");
return -EINVAL;
}
/* Check handle order prior to clearing events */
for (nr = 0, handle = event_get_clear_handle(log);
nr < pl->nr_recs;
nr++, handle++) {
if (handle != le16_to_cpu(pl->handles[nr])) {
dev_err(cxlds->dev, "Clearing events out of order\n");
return -EINVAL;
}
}
if (log->nr_overflow)
log->nr_overflow = 0;
/* Clear events */
log->clear_idx += pl->nr_recs;
return 0;
}
static void cxl_mock_event_trigger(struct device *dev)
{
struct cxl_mockmem_data *mdata = dev_get_drvdata(dev);
struct mock_event_store *mes = &mdata->mes;
int i;
for (i = CXL_EVENT_TYPE_INFO; i < CXL_EVENT_TYPE_MAX; i++) {
struct mock_event_log *log;
log = event_find_log(dev, i);
if (log)
event_reset_log(log);
}
cxl_mem_get_event_records(mes->cxlds, mes->ev_status);
}
struct cxl_event_record_raw maint_needed = {
.hdr = {
.id = UUID_INIT(0xBA5EBA11, 0xABCD, 0xEFEB,
0xa5, 0x5a, 0xa5, 0x5a, 0xa5, 0xa5, 0x5a, 0xa5),
.length = sizeof(struct cxl_event_record_raw),
.flags[0] = CXL_EVENT_RECORD_FLAG_MAINT_NEEDED,
/* .handle = Set dynamically */
.related_handle = cpu_to_le16(0xa5b6),
},
.data = { 0xDE, 0xAD, 0xBE, 0xEF },
};
struct cxl_event_record_raw hardware_replace = {
.hdr = {
.id = UUID_INIT(0xABCDEFEB, 0xBA11, 0xBA5E,
0xa5, 0x5a, 0xa5, 0x5a, 0xa5, 0xa5, 0x5a, 0xa5),
.length = sizeof(struct cxl_event_record_raw),
.flags[0] = CXL_EVENT_RECORD_FLAG_HW_REPLACE,
/* .handle = Set dynamically */
.related_handle = cpu_to_le16(0xb6a5),
},
.data = { 0xDE, 0xAD, 0xBE, 0xEF },
};
struct cxl_event_gen_media gen_media = {
.hdr = {
.id = UUID_INIT(0xfbcd0a77, 0xc260, 0x417f,
0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6),
.length = sizeof(struct cxl_event_gen_media),
.flags[0] = CXL_EVENT_RECORD_FLAG_PERMANENT,
/* .handle = Set dynamically */
.related_handle = cpu_to_le16(0),
},
.phys_addr = cpu_to_le64(0x2000),
.descriptor = CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT,
.type = CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR,
.transaction_type = CXL_GMER_TRANS_HOST_WRITE,
/* .validity_flags = <set below> */
.channel = 1,
.rank = 30
};
struct cxl_event_dram dram = {
.hdr = {
.id = UUID_INIT(0x601dcbb3, 0x9c06, 0x4eab,
0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24),
.length = sizeof(struct cxl_event_dram),
.flags[0] = CXL_EVENT_RECORD_FLAG_PERF_DEGRADED,
/* .handle = Set dynamically */
.related_handle = cpu_to_le16(0),
},
.phys_addr = cpu_to_le64(0x8000),
.descriptor = CXL_GMER_EVT_DESC_THRESHOLD_EVENT,
.type = CXL_GMER_MEM_EVT_TYPE_INV_ADDR,
.transaction_type = CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB,
/* .validity_flags = <set below> */
.channel = 1,
.bank_group = 5,
.bank = 2,
.column = {0xDE, 0xAD},
};
struct cxl_event_mem_module mem_module = {
.hdr = {
.id = UUID_INIT(0xfe927475, 0xdd59, 0x4339,
0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74),
.length = sizeof(struct cxl_event_mem_module),
/* .handle = Set dynamically */
.related_handle = cpu_to_le16(0),
},
.event_type = CXL_MMER_TEMP_CHANGE,
.info = {
.health_status = CXL_DHI_HS_PERFORMANCE_DEGRADED,
.media_status = CXL_DHI_MS_ALL_DATA_LOST,
.add_status = (CXL_DHI_AS_CRITICAL << 2) |
(CXL_DHI_AS_WARNING << 4) |
(CXL_DHI_AS_WARNING << 5),
.device_temp = { 0xDE, 0xAD},
.dirty_shutdown_cnt = { 0xde, 0xad, 0xbe, 0xef },
.cor_vol_err_cnt = { 0xde, 0xad, 0xbe, 0xef },
.cor_per_err_cnt = { 0xde, 0xad, 0xbe, 0xef },
}
};
static void cxl_mock_add_event_logs(struct mock_event_store *mes)
{
put_unaligned_le16(CXL_GMER_VALID_CHANNEL | CXL_GMER_VALID_RANK,
&gen_media.validity_flags);
put_unaligned_le16(CXL_DER_VALID_CHANNEL | CXL_DER_VALID_BANK_GROUP |
CXL_DER_VALID_BANK | CXL_DER_VALID_COLUMN,
&dram.validity_flags);
mes_add_event(mes, CXL_EVENT_TYPE_INFO, &maint_needed);
mes_add_event(mes, CXL_EVENT_TYPE_INFO,
(struct cxl_event_record_raw *)&gen_media);
mes_add_event(mes, CXL_EVENT_TYPE_INFO,
(struct cxl_event_record_raw *)&mem_module);
mes->ev_status |= CXLDEV_EVENT_STATUS_INFO;
mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &maint_needed);
mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
mes_add_event(mes, CXL_EVENT_TYPE_FAIL,
(struct cxl_event_record_raw *)&dram);
mes_add_event(mes, CXL_EVENT_TYPE_FAIL,
(struct cxl_event_record_raw *)&gen_media);
mes_add_event(mes, CXL_EVENT_TYPE_FAIL,
(struct cxl_event_record_raw *)&mem_module);
mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
mes_add_event(mes, CXL_EVENT_TYPE_FAIL,
(struct cxl_event_record_raw *)&dram);
/* Overflow this log */
mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
mes_add_event(mes, CXL_EVENT_TYPE_FAIL, &hardware_replace);
mes->ev_status |= CXLDEV_EVENT_STATUS_FAIL;
mes_add_event(mes, CXL_EVENT_TYPE_FATAL, &hardware_replace);
mes_add_event(mes, CXL_EVENT_TYPE_FATAL,
(struct cxl_event_record_raw *)&dram);
mes->ev_status |= CXLDEV_EVENT_STATUS_FATAL;
}
static int mock_gsl(struct cxl_mbox_cmd *cmd)
{
if (cmd->size_out < sizeof(mock_gsl_payload))
@ -582,6 +912,12 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *
case CXL_MBOX_OP_GET_PARTITION_INFO:
rc = mock_partition_info(cxlds, cmd);
break;
case CXL_MBOX_OP_GET_EVENT_RECORD:
rc = mock_get_event(cxlds, cmd);
break;
case CXL_MBOX_OP_CLEAR_EVENT_RECORD:
rc = mock_clear_event(cxlds, cmd);
break;
case CXL_MBOX_OP_SET_LSA:
rc = mock_set_lsa(cxlds, cmd);
break;
@ -628,6 +964,15 @@ static bool is_rcd(struct platform_device *pdev)
return !!id->driver_data;
}
static ssize_t event_trigger_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
cxl_mock_event_trigger(dev);
return count;
}
static DEVICE_ATTR_WO(event_trigger);
static int cxl_mock_mem_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
@ -655,6 +1000,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
cxlds->serial = pdev->id;
cxlds->mbox_send = cxl_mock_mbox_send;
cxlds->payload_size = SZ_4K;
cxlds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf;
if (is_rcd(pdev)) {
cxlds->rcd = true;
cxlds->component_reg_phys = CXL_RESOURCE_NONE;
@ -672,10 +1018,15 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
if (rc)
return rc;
mdata->mes.cxlds = cxlds;
cxl_mock_add_event_logs(&mdata->mes);
cxlmd = devm_cxl_add_memdev(cxlds);
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
cxl_mem_get_event_records(cxlds, CXLDEV_EVENT_STATUS_ALL);
return 0;
}
@ -714,6 +1065,7 @@ static DEVICE_ATTR_RW(security_lock);
static struct attribute *cxl_mock_mem_attrs[] = {
&dev_attr_security_lock.attr,
&dev_attr_event_trigger.attr,
NULL
};
ATTRIBUTE_GROUPS(cxl_mock_mem);