cxl fixes for 6.8-rc6

- Fix NUMA initialization from ACPI CEDT.CFMWS
 
 - Fix region assembly failures due to async init order
 
 - Fix / simplify export of qos_class information
 
 - Fix cxl_acpi initialization vs single-window-init failures
 
 - Fix handling of repeated 'pci_channel_io_frozen' notifications
 
 - Workaround platforms that violate host-physical-address ==
   system-physical address assumptions
 
 - Defer CXL CPER notification handling to v6.9
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQSbo+XnGs+rwLz9XGXfioYZHlFsZwUCZdpH9gAKCRDfioYZHlFs
 ZwZlAQDE+PxTJnjCXDVnDylVF4yeJF2G/wSkH1CFVFVxa0OjhAD/ZFScS/nz/76l
 1IYYiiLqmVO5DdmJtfKtq16m7e1cZwc=
 =PuPF
 -----END PGP SIGNATURE-----

Merge tag 'cxl-fixes-6.8-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl

Pull cxl fixes from Dan Williams:
 "A collection of significant fixes for the CXL subsystem.

  The largest change in this set, that bordered on "new development", is
  the fix for the fact that the location of the new qos_class attribute
  did not match the Documentation. The fix ends up deleting more code
  than it added, and it has a new unit test to backstop basic errors in
  this interface going forward. So the "red-diff" and unit test saved
  the "rip it out and try again" response.

  In contrast, the new notification path for firmware reported CXL
  errors (CXL CPER notifications) has a locking context bug that can not
  be fixed with a red-diff. Given where the release cycle stands, it is
  not comfortable to squeeze in that fix in these waning days. So, that
  receives the "back it out and try again later" treatment.

  There is a regression fix in the code that establishes memory NUMA
  nodes for platform CXL regions. That has an ack from x86 folks. There
  are a couple more fixups for Linux to understand (reassemble) CXL
  regions instantiated by platform firmware. The policy around platforms
  that do not match host-physical-address with system-physical-address
  (i.e. systems that have an address translation mechanism between the
  address range reported in the ACPI CEDT.CFMWS and endpoint decoders)
  has been softened to abort driver load rather than teardown the memory
  range (can cause system hangs). Lastly, there is a robustness /
  regression fix for cases where the driver would previously continue in
  the face of error, and a fixup for PCI error notification handling.

  Summary:

   - Fix NUMA initialization from ACPI CEDT.CFMWS

   - Fix region assembly failures due to async init order

   - Fix / simplify export of qos_class information

   - Fix cxl_acpi initialization vs single-window-init failures

   - Fix handling of repeated 'pci_channel_io_frozen' notifications

   - Workaround platforms that violate host-physical-address ==
     system-physical address assumptions

   - Defer CXL CPER notification handling to v6.9"

* tag 'cxl-fixes-6.8-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl:
  cxl/acpi: Fix load failures due to single window creation failure
  acpi/ghes: Remove CXL CPER notifications
  cxl/pci: Fix disabling memory if DVSEC CXL Range does not match a CFMWS window
  cxl/test: Add support for qos_class checking
  cxl: Fix sysfs export of qos_class for memdev
  cxl: Remove unnecessary type cast in cxl_qos_class_verify()
  cxl: Change 'struct cxl_memdev_state' *_perf_list to single 'struct cxl_dpa_perf'
  cxl/region: Allow out of order assembly of autodiscovered regions
  cxl/region: Handle endpoint decoders in cxl_region_find_decoder()
  x86/numa: Fix the sort compare func used in numa_fill_memblks()
  x86/numa: Fix the address overlap check in numa_fill_memblks()
  cxl/pci: Skip to handle RAS errors if CXL.mem device is detached
This commit is contained in:
Linus Torvalds 2024-02-24 15:53:40 -08:00
commit ac389bc0ca
19 changed files with 289 additions and 334 deletions

View File

@ -934,7 +934,7 @@ static int __init cmp_memblk(const void *a, const void *b)
const struct numa_memblk *ma = *(const struct numa_memblk **)a;
const struct numa_memblk *mb = *(const struct numa_memblk **)b;
return ma->start - mb->start;
return (ma->start > mb->start) - (ma->start < mb->start);
}
static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
@ -944,14 +944,12 @@ static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
* @start: address to begin fill
* @end: address to end fill
*
* Find and extend numa_meminfo memblks to cover the @start-@end
* physical address range, such that the first memblk includes
* @start, the last memblk includes @end, and any gaps in between
* are filled.
* Find and extend numa_meminfo memblks to cover the physical
* address range @start-@end
*
* RETURNS:
* 0 : Success
* NUMA_NO_MEMBLK : No memblk exists in @start-@end range
* NUMA_NO_MEMBLK : No memblks exist in address range @start-@end
*/
int __init numa_fill_memblks(u64 start, u64 end)
@ -963,17 +961,14 @@ int __init numa_fill_memblks(u64 start, u64 end)
/*
* Create a list of pointers to numa_meminfo memblks that
* overlap start, end. Exclude (start == bi->end) since
* end addresses in both a CFMWS range and a memblk range
* are exclusive.
*
* This list of pointers is used to make in-place changes
* that fill out the numa_meminfo memblks.
* overlap start, end. The list is used to make in-place
* changes that fill out the numa_meminfo memblks.
*/
for (int i = 0; i < mi->nr_blks; i++) {
struct numa_memblk *bi = &mi->blk[i];
if (start < bi->end && end >= bi->start) {
if (memblock_addrs_overlap(start, end - start, bi->start,
bi->end - bi->start)) {
blk[count] = &mi->blk[i];
count++;
}

View File

@ -26,7 +26,6 @@
#include <linux/interrupt.h>
#include <linux/timer.h>
#include <linux/cper.h>
#include <linux/cxl-event.h>
#include <linux/platform_device.h>
#include <linux/mutex.h>
#include <linux/ratelimit.h>
@ -674,52 +673,6 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
schedule_work(&entry->work);
}
/*
* Only a single callback can be registered for CXL CPER events.
*/
static DECLARE_RWSEM(cxl_cper_rw_sem);
static cxl_cper_callback cper_callback;
static void cxl_cper_post_event(enum cxl_event_type event_type,
struct cxl_cper_event_rec *rec)
{
if (rec->hdr.length <= sizeof(rec->hdr) ||
rec->hdr.length > sizeof(*rec)) {
pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n",
rec->hdr.length);
return;
}
if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) {
pr_err(FW_WARN "CXL CPER invalid event\n");
return;
}
guard(rwsem_read)(&cxl_cper_rw_sem);
if (cper_callback)
cper_callback(event_type, rec);
}
int cxl_cper_register_callback(cxl_cper_callback callback)
{
guard(rwsem_write)(&cxl_cper_rw_sem);
if (cper_callback)
return -EINVAL;
cper_callback = callback;
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_cper_register_callback, CXL);
int cxl_cper_unregister_callback(cxl_cper_callback callback)
{
guard(rwsem_write)(&cxl_cper_rw_sem);
if (callback != cper_callback)
return -EINVAL;
cper_callback = NULL;
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_callback, CXL);
static bool ghes_do_proc(struct ghes *ghes,
const struct acpi_hest_generic_status *estatus)
{
@ -754,22 +707,6 @@ static bool ghes_do_proc(struct ghes *ghes,
}
else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
queued = ghes_handle_arm_hw_error(gdata, sev, sync);
} else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) {
struct cxl_cper_event_rec *rec =
acpi_hest_get_payload(gdata);
cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec);
} else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) {
struct cxl_cper_event_rec *rec =
acpi_hest_get_payload(gdata);
cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec);
} else if (guid_equal(sec_type,
&CPER_SEC_CXL_MEM_MODULE_GUID)) {
struct cxl_cper_event_rec *rec =
acpi_hest_get_payload(gdata);
cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec);
} else {
void *err = acpi_hest_get_payload(gdata);

View File

@ -316,31 +316,27 @@ static const struct cxl_root_ops acpi_root_ops = {
.qos_class = cxl_acpi_qos_class,
};
static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
const unsigned long end)
static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
struct cxl_cfmws_context *ctx)
{
int target_map[CXL_DECODER_MAX_INTERLEAVE];
struct cxl_cfmws_context *ctx = arg;
struct cxl_port *root_port = ctx->root_port;
struct resource *cxl_res = ctx->cxl_res;
struct cxl_cxims_context cxims_ctx;
struct cxl_root_decoder *cxlrd;
struct device *dev = ctx->dev;
struct acpi_cedt_cfmws *cfmws;
cxl_calc_hb_fn cxl_calc_hb;
struct cxl_decoder *cxld;
unsigned int ways, i, ig;
struct resource *res;
int rc;
cfmws = (struct acpi_cedt_cfmws *) header;
rc = cxl_acpi_cfmws_verify(dev, cfmws);
if (rc) {
dev_err(dev, "CFMWS range %#llx-%#llx not registered\n",
cfmws->base_hpa,
cfmws->base_hpa + cfmws->window_size - 1);
return 0;
return rc;
}
rc = eiw_to_ways(cfmws->interleave_ways, &ways);
@ -376,7 +372,7 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb);
if (IS_ERR(cxlrd))
return 0;
return PTR_ERR(cxlrd);
cxld = &cxlrd->cxlsd.cxld;
cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions);
@ -420,16 +416,7 @@ err_xormap:
put_device(&cxld->dev);
else
rc = cxl_decoder_autoremove(dev, cxld);
if (rc) {
dev_err(dev, "Failed to add decode range: %pr", res);
return rc;
}
dev_dbg(dev, "add: %s node: %d range [%#llx - %#llx]\n",
dev_name(&cxld->dev),
phys_to_target_node(cxld->hpa_range.start),
cxld->hpa_range.start, cxld->hpa_range.end);
return 0;
return rc;
err_insert:
kfree(res->name);
@ -438,6 +425,29 @@ err_name:
return -ENOMEM;
}
static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
const unsigned long end)
{
struct acpi_cedt_cfmws *cfmws = (struct acpi_cedt_cfmws *)header;
struct cxl_cfmws_context *ctx = arg;
struct device *dev = ctx->dev;
int rc;
rc = __cxl_parse_cfmws(cfmws, ctx);
if (rc)
dev_err(dev,
"Failed to add decode range: [%#llx - %#llx] (%d)\n",
cfmws->base_hpa,
cfmws->base_hpa + cfmws->window_size - 1, rc);
else
dev_dbg(dev, "decode range: node: %d range [%#llx - %#llx]\n",
phys_to_target_node(cfmws->base_hpa), cfmws->base_hpa,
cfmws->base_hpa + cfmws->window_size - 1);
/* never fail cxl_acpi load for a single window failure */
return 0;
}
__mock struct acpi_device *to_cxl_host_bridge(struct device *host,
struct device *dev)
{

View File

@ -210,19 +210,12 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
return 0;
}
static void add_perf_entry(struct device *dev, struct dsmas_entry *dent,
struct list_head *list)
static void update_perf_entry(struct device *dev, struct dsmas_entry *dent,
struct cxl_dpa_perf *dpa_perf)
{
struct cxl_dpa_perf *dpa_perf;
dpa_perf = kzalloc(sizeof(*dpa_perf), GFP_KERNEL);
if (!dpa_perf)
return;
dpa_perf->dpa_range = dent->dpa_range;
dpa_perf->coord = dent->coord;
dpa_perf->qos_class = dent->qos_class;
list_add_tail(&dpa_perf->list, list);
dev_dbg(dev,
"DSMAS: dpa: %#llx qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n",
dent->dpa_range.start, dpa_perf->qos_class,
@ -230,20 +223,6 @@ static void add_perf_entry(struct device *dev, struct dsmas_entry *dent,
dent->coord.read_latency, dent->coord.write_latency);
}
static void free_perf_ents(void *data)
{
struct cxl_memdev_state *mds = data;
struct cxl_dpa_perf *dpa_perf, *n;
LIST_HEAD(discard);
list_splice_tail_init(&mds->ram_perf_list, &discard);
list_splice_tail_init(&mds->pmem_perf_list, &discard);
list_for_each_entry_safe(dpa_perf, n, &discard, list) {
list_del(&dpa_perf->list);
kfree(dpa_perf);
}
}
static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds,
struct xarray *dsmas_xa)
{
@ -263,16 +242,14 @@ static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds,
xa_for_each(dsmas_xa, index, dent) {
if (resource_size(&cxlds->ram_res) &&
range_contains(&ram_range, &dent->dpa_range))
add_perf_entry(dev, dent, &mds->ram_perf_list);
update_perf_entry(dev, dent, &mds->ram_perf);
else if (resource_size(&cxlds->pmem_res) &&
range_contains(&pmem_range, &dent->dpa_range))
add_perf_entry(dev, dent, &mds->pmem_perf_list);
update_perf_entry(dev, dent, &mds->pmem_perf);
else
dev_dbg(dev, "no partition for dsmas dpa: %#llx\n",
dent->dpa_range.start);
}
devm_add_action_or_reset(&cxlds->cxlmd->dev, free_perf_ents, mds);
}
static int match_cxlrd_qos_class(struct device *dev, void *data)
@ -293,24 +270,24 @@ static int match_cxlrd_qos_class(struct device *dev, void *data)
return 0;
}
static void cxl_qos_match(struct cxl_port *root_port,
struct list_head *work_list,
struct list_head *discard_list)
static void reset_dpa_perf(struct cxl_dpa_perf *dpa_perf)
{
struct cxl_dpa_perf *dpa_perf, *n;
*dpa_perf = (struct cxl_dpa_perf) {
.qos_class = CXL_QOS_CLASS_INVALID,
};
}
list_for_each_entry_safe(dpa_perf, n, work_list, list) {
int rc;
static bool cxl_qos_match(struct cxl_port *root_port,
struct cxl_dpa_perf *dpa_perf)
{
if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID)
return false;
if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID)
return;
if (!device_for_each_child(&root_port->dev, &dpa_perf->qos_class,
match_cxlrd_qos_class))
return false;
rc = device_for_each_child(&root_port->dev,
(void *)&dpa_perf->qos_class,
match_cxlrd_qos_class);
if (!rc)
list_move_tail(&dpa_perf->list, discard_list);
}
return true;
}
static int match_cxlrd_hb(struct device *dev, void *data)
@ -334,23 +311,10 @@ static int match_cxlrd_hb(struct device *dev, void *data)
return 0;
}
static void discard_dpa_perf(struct list_head *list)
{
struct cxl_dpa_perf *dpa_perf, *n;
list_for_each_entry_safe(dpa_perf, n, list, list) {
list_del(&dpa_perf->list);
kfree(dpa_perf);
}
}
DEFINE_FREE(dpa_perf, struct list_head *, if (!list_empty(_T)) discard_dpa_perf(_T))
static int cxl_qos_class_verify(struct cxl_memdev *cxlmd)
{
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
LIST_HEAD(__discard);
struct list_head *discard __free(dpa_perf) = &__discard;
struct cxl_port *root_port;
int rc;
@ -363,16 +327,17 @@ static int cxl_qos_class_verify(struct cxl_memdev *cxlmd)
root_port = &cxl_root->port;
/* Check that the QTG IDs are all sane between end device and root decoders */
cxl_qos_match(root_port, &mds->ram_perf_list, discard);
cxl_qos_match(root_port, &mds->pmem_perf_list, discard);
if (!cxl_qos_match(root_port, &mds->ram_perf))
reset_dpa_perf(&mds->ram_perf);
if (!cxl_qos_match(root_port, &mds->pmem_perf))
reset_dpa_perf(&mds->pmem_perf);
/* Check to make sure that the device's host bridge is under a root decoder */
rc = device_for_each_child(&root_port->dev,
(void *)cxlmd->endpoint->host_bridge,
match_cxlrd_hb);
cxlmd->endpoint->host_bridge, match_cxlrd_hb);
if (!rc) {
list_splice_tail_init(&mds->ram_perf_list, discard);
list_splice_tail_init(&mds->pmem_perf_list, discard);
reset_dpa_perf(&mds->ram_perf);
reset_dpa_perf(&mds->pmem_perf);
}
return rc;
@ -417,6 +382,7 @@ void cxl_endpoint_parse_cdat(struct cxl_port *port)
cxl_memdev_set_qos_class(cxlds, dsmas_xa);
cxl_qos_class_verify(cxlmd);
cxl_memdev_update_perf(cxlmd);
}
EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat, CXL);

View File

@ -1391,8 +1391,8 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
mds->cxlds.reg_map.host = dev;
mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE;
mds->cxlds.type = CXL_DEVTYPE_CLASSMEM;
INIT_LIST_HEAD(&mds->ram_perf_list);
INIT_LIST_HEAD(&mds->pmem_perf_list);
mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID;
mds->pmem_perf.qos_class = CXL_QOS_CLASS_INVALID;
return mds;
}

View File

@ -447,13 +447,41 @@ static struct attribute *cxl_memdev_attributes[] = {
NULL,
};
static ssize_t pmem_qos_class_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
return sysfs_emit(buf, "%d\n", mds->pmem_perf.qos_class);
}
static struct device_attribute dev_attr_pmem_qos_class =
__ATTR(qos_class, 0444, pmem_qos_class_show, NULL);
static struct attribute *cxl_memdev_pmem_attributes[] = {
&dev_attr_pmem_size.attr,
&dev_attr_pmem_qos_class.attr,
NULL,
};
static ssize_t ram_qos_class_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
return sysfs_emit(buf, "%d\n", mds->ram_perf.qos_class);
}
static struct device_attribute dev_attr_ram_qos_class =
__ATTR(qos_class, 0444, ram_qos_class_show, NULL);
static struct attribute *cxl_memdev_ram_attributes[] = {
&dev_attr_ram_size.attr,
&dev_attr_ram_qos_class.attr,
NULL,
};
@ -477,14 +505,42 @@ static struct attribute_group cxl_memdev_attribute_group = {
.is_visible = cxl_memdev_visible,
};
static umode_t cxl_ram_visible(struct kobject *kobj, struct attribute *a, int n)
{
struct device *dev = kobj_to_dev(kobj);
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
if (a == &dev_attr_ram_qos_class.attr)
if (mds->ram_perf.qos_class == CXL_QOS_CLASS_INVALID)
return 0;
return a->mode;
}
static struct attribute_group cxl_memdev_ram_attribute_group = {
.name = "ram",
.attrs = cxl_memdev_ram_attributes,
.is_visible = cxl_ram_visible,
};
static umode_t cxl_pmem_visible(struct kobject *kobj, struct attribute *a, int n)
{
struct device *dev = kobj_to_dev(kobj);
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
if (a == &dev_attr_pmem_qos_class.attr)
if (mds->pmem_perf.qos_class == CXL_QOS_CLASS_INVALID)
return 0;
return a->mode;
}
static struct attribute_group cxl_memdev_pmem_attribute_group = {
.name = "pmem",
.attrs = cxl_memdev_pmem_attributes,
.is_visible = cxl_pmem_visible,
};
static umode_t cxl_memdev_security_visible(struct kobject *kobj,
@ -519,6 +575,13 @@ static const struct attribute_group *cxl_memdev_attribute_groups[] = {
NULL,
};
void cxl_memdev_update_perf(struct cxl_memdev *cxlmd)
{
sysfs_update_group(&cxlmd->dev.kobj, &cxl_memdev_ram_attribute_group);
sysfs_update_group(&cxlmd->dev.kobj, &cxl_memdev_pmem_attribute_group);
}
EXPORT_SYMBOL_NS_GPL(cxl_memdev_update_perf, CXL);
static const struct device_type cxl_memdev_type = {
.name = "cxl_memdev",
.release = cxl_memdev_release,

View File

@ -477,9 +477,9 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
allowed++;
}
if (!allowed) {
cxl_set_mem_enable(cxlds, 0);
info->mem_enabled = 0;
if (!allowed && info->mem_enabled) {
dev_err(dev, "Range register decodes outside platform defined CXL ranges.\n");
return -ENXIO;
}
/*
@ -932,11 +932,21 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
void cxl_cor_error_detected(struct pci_dev *pdev)
{
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
struct device *dev = &cxlds->cxlmd->dev;
if (cxlds->rcd)
cxl_handle_rdport_errors(cxlds);
scoped_guard(device, dev) {
if (!dev->driver) {
dev_warn(&pdev->dev,
"%s: memdev disabled, abort error handling\n",
dev_name(dev));
return;
}
cxl_handle_endpoint_cor_ras(cxlds);
if (cxlds->rcd)
cxl_handle_rdport_errors(cxlds);
cxl_handle_endpoint_cor_ras(cxlds);
}
}
EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL);
@ -948,16 +958,25 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
struct device *dev = &cxlmd->dev;
bool ue;
if (cxlds->rcd)
cxl_handle_rdport_errors(cxlds);
scoped_guard(device, dev) {
if (!dev->driver) {
dev_warn(&pdev->dev,
"%s: memdev disabled, abort error handling\n",
dev_name(dev));
return PCI_ERS_RESULT_DISCONNECT;
}
if (cxlds->rcd)
cxl_handle_rdport_errors(cxlds);
/*
* A frozen channel indicates an impending reset which is fatal to
* CXL.mem operation, and will likely crash the system. On the off
* chance the situation is recoverable dump the status of the RAS
* capability registers and bounce the active state of the memdev.
*/
ue = cxl_handle_endpoint_ras(cxlds);
}
/*
* A frozen channel indicates an impending reset which is fatal to
* CXL.mem operation, and will likely crash the system. On the off
* chance the situation is recoverable dump the status of the RAS
* capability registers and bounce the active state of the memdev.
*/
ue = cxl_handle_endpoint_ras(cxlds);
switch (state) {
case pci_channel_io_normal:

View File

@ -730,12 +730,17 @@ static int match_auto_decoder(struct device *dev, void *data)
return 0;
}
static struct cxl_decoder *cxl_region_find_decoder(struct cxl_port *port,
struct cxl_region *cxlr)
static struct cxl_decoder *
cxl_region_find_decoder(struct cxl_port *port,
struct cxl_endpoint_decoder *cxled,
struct cxl_region *cxlr)
{
struct device *dev;
int id = 0;
if (port == cxled_to_port(cxled))
return &cxled->cxld;
if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
dev = device_find_child(&port->dev, &cxlr->params,
match_auto_decoder);
@ -753,8 +758,31 @@ static struct cxl_decoder *cxl_region_find_decoder(struct cxl_port *port,
return to_cxl_decoder(dev);
}
static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port,
struct cxl_region *cxlr)
static bool auto_order_ok(struct cxl_port *port, struct cxl_region *cxlr_iter,
struct cxl_decoder *cxld)
{
struct cxl_region_ref *rr = cxl_rr_load(port, cxlr_iter);
struct cxl_decoder *cxld_iter = rr->decoder;
/*
* Allow the out of order assembly of auto-discovered regions.
* Per CXL Spec 3.1 8.2.4.20.12 software must commit decoders
* in HPA order. Confirm that the decoder with the lesser HPA
* starting address has the lesser id.
*/
dev_dbg(&cxld->dev, "check for HPA violation %s:%d < %s:%d\n",
dev_name(&cxld->dev), cxld->id,
dev_name(&cxld_iter->dev), cxld_iter->id);
if (cxld_iter->id > cxld->id)
return true;
return false;
}
static struct cxl_region_ref *
alloc_region_ref(struct cxl_port *port, struct cxl_region *cxlr,
struct cxl_endpoint_decoder *cxled)
{
struct cxl_region_params *p = &cxlr->params;
struct cxl_region_ref *cxl_rr, *iter;
@ -764,16 +792,21 @@ static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port,
xa_for_each(&port->regions, index, iter) {
struct cxl_region_params *ip = &iter->region->params;
if (!ip->res)
if (!ip->res || ip->res->start < p->res->start)
continue;
if (ip->res->start > p->res->start) {
dev_dbg(&cxlr->dev,
"%s: HPA order violation %s:%pr vs %pr\n",
dev_name(&port->dev),
dev_name(&iter->region->dev), ip->res, p->res);
return ERR_PTR(-EBUSY);
if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
struct cxl_decoder *cxld;
cxld = cxl_region_find_decoder(port, cxled, cxlr);
if (auto_order_ok(port, iter->region, cxld))
continue;
}
dev_dbg(&cxlr->dev, "%s: HPA order violation %s:%pr vs %pr\n",
dev_name(&port->dev),
dev_name(&iter->region->dev), ip->res, p->res);
return ERR_PTR(-EBUSY);
}
cxl_rr = kzalloc(sizeof(*cxl_rr), GFP_KERNEL);
@ -853,10 +886,7 @@ static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr,
{
struct cxl_decoder *cxld;
if (port == cxled_to_port(cxled))
cxld = &cxled->cxld;
else
cxld = cxl_region_find_decoder(port, cxlr);
cxld = cxl_region_find_decoder(port, cxled, cxlr);
if (!cxld) {
dev_dbg(&cxlr->dev, "%s: no decoder available\n",
dev_name(&port->dev));
@ -953,7 +983,7 @@ static int cxl_port_attach_region(struct cxl_port *port,
nr_targets_inc = true;
}
} else {
cxl_rr = alloc_region_ref(port, cxlr);
cxl_rr = alloc_region_ref(port, cxlr, cxled);
if (IS_ERR(cxl_rr)) {
dev_dbg(&cxlr->dev,
"%s: failed to allocate region reference\n",

View File

@ -880,6 +880,8 @@ void cxl_switch_parse_cdat(struct cxl_port *port);
int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
struct access_coordinate *coord);
void cxl_memdev_update_perf(struct cxl_memdev *cxlmd);
/*
* Unit test builds overrides this to __weak, find the 'strong' version
* of these symbols in tools/testing/cxl/.

View File

@ -395,13 +395,11 @@ enum cxl_devtype {
/**
* struct cxl_dpa_perf - DPA performance property entry
* @list - list entry
* @dpa_range - range for DPA address
* @coord - QoS performance data (i.e. latency, bandwidth)
* @qos_class - QoS Class cookies
*/
struct cxl_dpa_perf {
struct list_head list;
struct range dpa_range;
struct access_coordinate coord;
int qos_class;
@ -471,8 +469,8 @@ struct cxl_dev_state {
* @security: security driver state info
* @fw: firmware upload / activation state
* @mbox_send: @dev specific transport for transmitting mailbox commands
* @ram_perf_list: performance data entries matched to RAM
* @pmem_perf_list: performance data entries matched to PMEM
* @ram_perf: performance data entry matched to RAM partition
* @pmem_perf: performance data entry matched to PMEM partition
*
* See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for
* details on capacity parameters.
@ -494,8 +492,8 @@ struct cxl_memdev_state {
u64 next_volatile_bytes;
u64 next_persistent_bytes;
struct list_head ram_perf_list;
struct list_head pmem_perf_list;
struct cxl_dpa_perf ram_perf;
struct cxl_dpa_perf pmem_perf;
struct cxl_event_state event;
struct cxl_poison_state poison;

View File

@ -215,52 +215,6 @@ static ssize_t trigger_poison_list_store(struct device *dev,
}
static DEVICE_ATTR_WO(trigger_poison_list);
static ssize_t ram_qos_class_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
struct cxl_dpa_perf *dpa_perf;
if (!dev->driver)
return -ENOENT;
if (list_empty(&mds->ram_perf_list))
return -ENOENT;
dpa_perf = list_first_entry(&mds->ram_perf_list, struct cxl_dpa_perf,
list);
return sysfs_emit(buf, "%d\n", dpa_perf->qos_class);
}
static struct device_attribute dev_attr_ram_qos_class =
__ATTR(qos_class, 0444, ram_qos_class_show, NULL);
static ssize_t pmem_qos_class_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
struct cxl_dpa_perf *dpa_perf;
if (!dev->driver)
return -ENOENT;
if (list_empty(&mds->pmem_perf_list))
return -ENOENT;
dpa_perf = list_first_entry(&mds->pmem_perf_list, struct cxl_dpa_perf,
list);
return sysfs_emit(buf, "%d\n", dpa_perf->qos_class);
}
static struct device_attribute dev_attr_pmem_qos_class =
__ATTR(qos_class, 0444, pmem_qos_class_show, NULL);
static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n)
{
struct device *dev = kobj_to_dev(kobj);
@ -272,21 +226,11 @@ static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n)
mds->poison.enabled_cmds))
return 0;
if (a == &dev_attr_pmem_qos_class.attr)
if (list_empty(&mds->pmem_perf_list))
return 0;
if (a == &dev_attr_ram_qos_class.attr)
if (list_empty(&mds->ram_perf_list))
return 0;
return a->mode;
}
static struct attribute *cxl_mem_attrs[] = {
&dev_attr_trigger_poison_list.attr,
&dev_attr_ram_qos_class.attr,
&dev_attr_pmem_qos_class.attr,
NULL
};

View File

@ -974,61 +974,6 @@ static struct pci_driver cxl_pci_driver = {
},
};
#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0)
static void cxl_cper_event_call(enum cxl_event_type ev_type,
struct cxl_cper_event_rec *rec)
{
struct cper_cxl_event_devid *device_id = &rec->hdr.device_id;
struct pci_dev *pdev __free(pci_dev_put) = NULL;
enum cxl_event_log_type log_type;
struct cxl_dev_state *cxlds;
unsigned int devfn;
u32 hdr_flags;
devfn = PCI_DEVFN(device_id->device_num, device_id->func_num);
pdev = pci_get_domain_bus_and_slot(device_id->segment_num,
device_id->bus_num, devfn);
if (!pdev)
return;
guard(pci_dev)(pdev);
if (pdev->driver != &cxl_pci_driver)
return;
cxlds = pci_get_drvdata(pdev);
if (!cxlds)
return;
/* Fabricate a log type */
hdr_flags = get_unaligned_le24(rec->event.generic.hdr.flags);
log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags);
cxl_event_trace_record(cxlds->cxlmd, log_type, ev_type,
&uuid_null, &rec->event);
}
static int __init cxl_pci_driver_init(void)
{
int rc;
rc = cxl_cper_register_callback(cxl_cper_event_call);
if (rc)
return rc;
rc = pci_register_driver(&cxl_pci_driver);
if (rc)
cxl_cper_unregister_callback(cxl_cper_event_call);
return rc;
}
static void __exit cxl_pci_driver_exit(void)
{
pci_unregister_driver(&cxl_pci_driver);
cxl_cper_unregister_callback(cxl_cper_event_call);
}
module_init(cxl_pci_driver_init);
module_exit(cxl_pci_driver_exit);
module_pci_driver(cxl_pci_driver);
MODULE_LICENSE("GPL v2");
MODULE_IMPORT_NS(CXL);

View File

@ -140,22 +140,4 @@ struct cxl_cper_event_rec {
union cxl_event event;
} __packed;
typedef void (*cxl_cper_callback)(enum cxl_event_type type,
struct cxl_cper_event_rec *rec);
#ifdef CONFIG_ACPI_APEI_GHES
int cxl_cper_register_callback(cxl_cper_callback callback);
int cxl_cper_unregister_callback(cxl_cper_callback callback);
#else
static inline int cxl_cper_register_callback(cxl_cper_callback callback)
{
return 0;
}
static inline int cxl_cper_unregister_callback(cxl_cper_callback callback)
{
return 0;
}
#endif
#endif /* _LINUX_CXL_EVENT_H */

View File

@ -121,6 +121,8 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size);
int memblock_physmem_add(phys_addr_t base, phys_addr_t size);
#endif
void memblock_trim_memory(phys_addr_t align);
unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
phys_addr_t base2, phys_addr_t size2);
bool memblock_overlaps_region(struct memblock_type *type,
phys_addr_t base, phys_addr_t size);
bool memblock_validate_numa_coverage(unsigned long threshold_bytes);

View File

@ -180,8 +180,9 @@ static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size)
/*
* Address comparison utilities
*/
static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
phys_addr_t base2, phys_addr_t size2)
unsigned long __init_memblock
memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2,
phys_addr_t size2)
{
return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
}

View File

@ -13,6 +13,7 @@ ldflags-y += --wrap=cxl_hdm_decode_init
ldflags-y += --wrap=cxl_dvsec_rr_decode
ldflags-y += --wrap=devm_cxl_add_rch_dport
ldflags-y += --wrap=cxl_rcd_component_reg_phys
ldflags-y += --wrap=cxl_endpoint_parse_cdat
DRIVERS := ../../../drivers
CXL_SRC := $(DRIVERS)/cxl

View File

@ -15,6 +15,8 @@
static int interleave_arithmetic;
#define FAKE_QTG_ID 42
#define NR_CXL_HOST_BRIDGES 2
#define NR_CXL_SINGLE_HOST 1
#define NR_CXL_RCH 1
@ -209,7 +211,7 @@ static struct {
.granularity = 4,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
.qtg_id = 0,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 4UL,
},
.target = { 0 },
@ -224,7 +226,7 @@ static struct {
.granularity = 4,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
.qtg_id = 1,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 8UL,
},
.target = { 0, 1, },
@ -239,7 +241,7 @@ static struct {
.granularity = 4,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = 2,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 4UL,
},
.target = { 0 },
@ -254,7 +256,7 @@ static struct {
.granularity = 4,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = 3,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 8UL,
},
.target = { 0, 1, },
@ -269,7 +271,7 @@ static struct {
.granularity = 4,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = 4,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 4UL,
},
.target = { 2 },
@ -284,7 +286,7 @@ static struct {
.granularity = 4,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
.qtg_id = 5,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M,
},
.target = { 3 },
@ -301,7 +303,7 @@ static struct {
.granularity = 4,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = 0,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 8UL,
},
.target = { 0, },
@ -317,7 +319,7 @@ static struct {
.granularity = 0,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = 1,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 8UL,
},
.target = { 0, 1, },
@ -333,7 +335,7 @@ static struct {
.granularity = 0,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = 0,
.qtg_id = FAKE_QTG_ID,
.window_size = SZ_256M * 16UL,
},
.target = { 0, 1, 0, 1, },
@ -976,6 +978,48 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
return 0;
}
/*
* Faking the cxl_dpa_perf for the memdev when appropriate.
*/
static void dpa_perf_setup(struct cxl_port *endpoint, struct range *range,
struct cxl_dpa_perf *dpa_perf)
{
dpa_perf->qos_class = FAKE_QTG_ID;
dpa_perf->dpa_range = *range;
dpa_perf->coord.read_latency = 500;
dpa_perf->coord.write_latency = 500;
dpa_perf->coord.read_bandwidth = 1000;
dpa_perf->coord.write_bandwidth = 1000;
}
static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port)
{
struct cxl_root *cxl_root __free(put_cxl_root) =
find_cxl_root(port);
struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
struct range pmem_range = {
.start = cxlds->pmem_res.start,
.end = cxlds->pmem_res.end,
};
struct range ram_range = {
.start = cxlds->ram_res.start,
.end = cxlds->ram_res.end,
};
if (!cxl_root)
return;
if (range_len(&ram_range))
dpa_perf_setup(port, &ram_range, &mds->ram_perf);
if (range_len(&pmem_range))
dpa_perf_setup(port, &pmem_range, &mds->pmem_perf);
cxl_memdev_update_perf(cxlmd);
}
static struct cxl_mock_ops cxl_mock_ops = {
.is_mock_adev = is_mock_adev,
.is_mock_bridge = is_mock_bridge,
@ -989,6 +1033,7 @@ static struct cxl_mock_ops cxl_mock_ops = {
.devm_cxl_setup_hdm = mock_cxl_setup_hdm,
.devm_cxl_add_passthrough_decoder = mock_cxl_add_passthrough_decoder,
.devm_cxl_enumerate_decoders = mock_cxl_enumerate_decoders,
.cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat,
.list = LIST_HEAD_INIT(cxl_mock_ops.list),
};

View File

@ -285,6 +285,20 @@ resource_size_t __wrap_cxl_rcd_component_reg_phys(struct device *dev,
}
EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcd_component_reg_phys, CXL);
void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port)
{
int index;
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev);
if (ops && ops->is_mock_dev(cxlmd->dev.parent))
ops->cxl_endpoint_parse_cdat(port);
else
cxl_endpoint_parse_cdat(port);
put_cxl_mock_ops(index);
}
EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, CXL);
MODULE_LICENSE("GPL v2");
MODULE_IMPORT_NS(ACPI);
MODULE_IMPORT_NS(CXL);

View File

@ -25,6 +25,7 @@ struct cxl_mock_ops {
int (*devm_cxl_add_passthrough_decoder)(struct cxl_port *port);
int (*devm_cxl_enumerate_decoders)(
struct cxl_hdm *hdm, struct cxl_endpoint_dvsec_info *info);
void (*cxl_endpoint_parse_cdat)(struct cxl_port *port);
};
void register_cxl_mock_ops(struct cxl_mock_ops *ops);