From abe8f12b44250d02937665033a8b750c1bfeb26e Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 8 Jul 2020 16:39:20 +0000 Subject: [PATCH 01/12] x86/resctrl: Remove unused struct mbm_state::chunks_bw Nothing reads struct mbm_states's chunks_bw value, its a copy of chunks. Remove it. Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Reinette Chatre Link: https://lkml.kernel.org/r/20200708163929.2783-2-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/internal.h | 2 -- arch/x86/kernel/cpu/resctrl/monitor.c | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 5ffa32256b3b..72bb21068466 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -283,7 +283,6 @@ struct rftype { * struct mbm_state - status for each MBM counter in each domain * @chunks: Total data moved (multiply by rdt_group.mon_scale to get bytes) * @prev_msr Value of IA32_QM_CTR for this RMID last time we read it - * @chunks_bw Total local data moved. Used for bandwidth calculation * @prev_bw_msr:Value of previous IA32_QM_CTR for bandwidth counting * @prev_bw The most recent bandwidth in MBps * @delta_bw Difference between the current and previous bandwidth @@ -292,7 +291,6 @@ struct rftype { struct mbm_state { u64 chunks; u64 prev_msr; - u64 chunks_bw; u64 prev_bw_msr; u32 prev_bw; u32 delta_bw; diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 837d7d012b7b..d6b92d7487a7 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -279,8 +279,7 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr) return; chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width); - m->chunks_bw += chunks; - m->chunks = m->chunks_bw; + m->chunks += chunks; cur_bw = (chunks * r->mon_scale) >> 20; if (m->delta_comp) From e89f85b9171665c917dca59920884f3d4fe0b1ef Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 8 Jul 2020 16:39:21 +0000 Subject: [PATCH 02/12] x86/resctrl: Remove struct rdt_membw::max_delay max_delay is used by x86's __get_mem_config_intel() as a local variable. Remove it, replacing it with a local variable. Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Reinette Chatre Link: https://lkml.kernel.org/r/20200708163929.2783-3-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 8 ++++---- arch/x86/kernel/cpu/resctrl/internal.h | 3 --- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 6a9df71c1b9e..9225ee5cca6f 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -254,16 +254,16 @@ static bool __get_mem_config_intel(struct rdt_resource *r) { union cpuid_0x10_3_eax eax; union cpuid_0x10_x_edx edx; - u32 ebx, ecx; + u32 ebx, ecx, max_delay; cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full); r->num_closid = edx.split.cos_max + 1; - r->membw.max_delay = eax.split.max_delay + 1; + max_delay = eax.split.max_delay + 1; r->default_ctrl = MAX_MBA_BW; if (ecx & MBA_IS_LINEAR) { r->membw.delay_linear = true; - r->membw.min_bw = MAX_MBA_BW - r->membw.max_delay; - r->membw.bw_gran = MAX_MBA_BW - r->membw.max_delay; + r->membw.min_bw = MAX_MBA_BW - max_delay; + r->membw.bw_gran = MAX_MBA_BW - max_delay; } else { if (!rdt_get_mb_table(r)) return false; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 72bb21068466..1eb39bd24990 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -369,8 +369,6 @@ struct rdt_cache { /** * struct rdt_membw - Memory bandwidth allocation related data - * @max_delay: Max throttle delay. Delay is the hardware - * representation for memory bandwidth. * @min_bw: Minimum memory bandwidth percentage user can request * @bw_gran: Granularity at which the memory bandwidth is allocated * @delay_linear: True if memory B/W delay is in linear scale @@ -378,7 +376,6 @@ struct rdt_cache { * @mb_map: Mapping of memory B/W percentage to memory B/W delay */ struct rdt_membw { - u32 max_delay; u32 min_bw; u32 bw_gran; u32 delay_linear; From ae0fbedd2a18cd82a2c0c5605a0a60865bc54576 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 8 Jul 2020 16:39:22 +0000 Subject: [PATCH 03/12] x86/resctrl: Fix stale comment The comment in rdtgroup_init() refers to the non existent function rdt_mount(), which has now been renamed rdt_get_tree(). Fix the comment. Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Reinette Chatre Link: https://lkml.kernel.org/r/20200708163929.2783-4-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 3f844f14fc0a..b04461763256 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -3196,7 +3196,7 @@ int __init rdtgroup_init(void) * It may also be ok since that would enable debugging of RDT before * resctrl is mounted. * The reason why the debugfs directory is created here and not in - * rdt_mount() is because rdt_mount() takes rdtgroup_mutex and + * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and * during the debugfs directory creation also &sb->s_type->i_mutex_key * (the lockdep class of inode->i_rwsem). Other filesystem * interactions (eg. SyS_getdents) have the lock ordering: From f995801ba3a0660cb352c8beb794379c82781ca3 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 8 Jul 2020 16:39:23 +0000 Subject: [PATCH 04/12] x86/resctrl: Use container_of() in delayed_work handlers mbm_handle_overflow() and cqm_handle_limbo() are both provided with the domain's work_struct when called, but use get_domain_from_cpu() to find the domain, along with the appropriate error handling. container_of() saves some list walking and bitmap testing, use that instead. Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Reinette Chatre Link: https://lkml.kernel.org/r/20200708163929.2783-5-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/monitor.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index d6b92d7487a7..54dffe574e67 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -477,19 +477,13 @@ void cqm_handle_limbo(struct work_struct *work) mutex_lock(&rdtgroup_mutex); r = &rdt_resources_all[RDT_RESOURCE_L3]; - d = get_domain_from_cpu(cpu, r); - - if (!d) { - pr_warn_once("Failure to get domain for limbo worker\n"); - goto out_unlock; - } + d = container_of(work, struct rdt_domain, cqm_limbo.work); __check_limbo(d, false); if (has_busy_rmid(r, d)) schedule_delayed_work_on(cpu, &d->cqm_limbo, delay); -out_unlock: mutex_unlock(&rdtgroup_mutex); } @@ -519,10 +513,7 @@ void mbm_handle_overflow(struct work_struct *work) goto out_unlock; r = &rdt_resources_all[RDT_RESOURCE_L3]; - - d = get_domain_from_cpu(cpu, r); - if (!d) - goto out_unlock; + d = container_of(work, struct rdt_domain, mbm_over.work); list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { mbm_update(r, d, prgrp->mon.rmid); From a21a4391f20c0ab45db452e22bc3e8afe8b36e46 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 8 Jul 2020 16:39:24 +0000 Subject: [PATCH 05/12] x86/resctrl: Include pid.h We are about to disturb the header soup. This header uses struct pid and struct pid_namespace. Include their header. Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Reinette Chatre Link: https://lkml.kernel.org/r/20200708163929.2783-6-james.morse@arm.com --- include/linux/resctrl.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index daf5cf64c6a6..9b05af9b3e28 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -2,6 +2,8 @@ #ifndef _RESCTRL_H #define _RESCTRL_H +#include + #ifdef CONFIG_PROC_CPU_RESCTRL int proc_resctrl_show(struct seq_file *m, From e6b2fac36fcc0b73cbef063d700a9841850e37a0 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 8 Jul 2020 16:39:25 +0000 Subject: [PATCH 06/12] x86/resctrl: Use is_closid_match() in more places rdtgroup_tasks_assigned() and show_rdt_tasks() loop over threads testing for a CTRL/MON group match by closid/rmid with the provided rdtgrp. Further down the file are helpers to do this, move these further up and make use of them here. These helpers additionally check for alloc/mon capable. This is harmless as rdtgroup_mkdir() tests these capable flags before allowing the config directories to be created. Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Reinette Chatre Link: https://lkml.kernel.org/r/20200708163929.2783-7-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 30 ++++++++++++-------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index b04461763256..78f3be10e215 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -592,6 +592,18 @@ static int __rdtgroup_move_task(struct task_struct *tsk, return ret; } +static bool is_closid_match(struct task_struct *t, struct rdtgroup *r) +{ + return (rdt_alloc_capable && + (r->type == RDTCTRL_GROUP) && (t->closid == r->closid)); +} + +static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r) +{ + return (rdt_mon_capable && + (r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid)); +} + /** * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group * @r: Resource group @@ -607,8 +619,7 @@ int rdtgroup_tasks_assigned(struct rdtgroup *r) rcu_read_lock(); for_each_process_thread(p, t) { - if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) || - (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid)) { + if (is_closid_match(t, r) || is_rmid_match(t, r)) { ret = 1; break; } @@ -706,8 +717,7 @@ static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s) rcu_read_lock(); for_each_process_thread(p, t) { - if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) || - (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid)) + if (is_closid_match(t, r) || is_rmid_match(t, r)) seq_printf(s, "%d\n", t->pid); } rcu_read_unlock(); @@ -2245,18 +2255,6 @@ static int reset_all_ctrls(struct rdt_resource *r) return 0; } -static bool is_closid_match(struct task_struct *t, struct rdtgroup *r) -{ - return (rdt_alloc_capable && - (r->type == RDTCTRL_GROUP) && (t->closid == r->closid)); -} - -static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r) -{ - return (rdt_mon_capable && - (r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid)); -} - /* * Move tasks from one to the other group. If @from is NULL, then all tasks * in the systems are moved unconditionally (used for teardown). From 41215b7947f1b1b86fd77a7bebd2320599aea7bd Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 8 Jul 2020 16:39:26 +0000 Subject: [PATCH 07/12] x86/resctrl: Add struct rdt_membw::arch_needs_linear to explain AMD/Intel MBA difference The configuration values user-space provides to the resctrl filesystem are ABI. To make this work on another architecture, all the ABI bits should be moved out of /arch/x86 and under /fs. To do this, the differences between AMD and Intel CPUs needs to be explained to resctrl via resource properties, instead of function pointers that let the arch code accept subtly different values on different platforms/architectures. For MBA, Intel CPUs reject configuration attempts for non-linear resources, whereas AMD ignore this field as its MBA resource is never linear. To merge the parse/validate functions, this difference needs to be explained. Add struct rdt_membw::arch_needs_linear to indicate the arch code needs the linear property to be true to configure this resource. AMD can set this and delay_linear to false. Intel can set arch_needs_linear to true to keep the existing "No support for non-linear MB domains" error message for affected platforms. [ bp: convert "we" etc to passive voice. ] Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Reinette Chatre Reviewed-by: Babu Moger Link: https://lkml.kernel.org/r/20200708163929.2783-8-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 3 +++ arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 8 +++++++- arch/x86/kernel/cpu/resctrl/internal.h | 2 ++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 9225ee5cca6f..52b8991de8a3 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -260,6 +260,7 @@ static bool __get_mem_config_intel(struct rdt_resource *r) r->num_closid = edx.split.cos_max + 1; max_delay = eax.split.max_delay + 1; r->default_ctrl = MAX_MBA_BW; + r->membw.arch_needs_linear = true; if (ecx & MBA_IS_LINEAR) { r->membw.delay_linear = true; r->membw.min_bw = MAX_MBA_BW - max_delay; @@ -267,6 +268,7 @@ static bool __get_mem_config_intel(struct rdt_resource *r) } else { if (!rdt_get_mb_table(r)) return false; + r->membw.arch_needs_linear = false; } r->data_width = 3; @@ -288,6 +290,7 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r) /* AMD does not use delay */ r->membw.delay_linear = false; + r->membw.arch_needs_linear = false; r->membw.min_bw = 0; r->membw.bw_gran = 1; diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 934c8fb8a64a..e3bcd77add2b 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -33,6 +33,12 @@ static bool bw_validate_amd(char *buf, unsigned long *data, unsigned long bw; int ret; + /* temporary: always false on AMD */ + if (!r->membw.delay_linear && r->membw.arch_needs_linear) { + rdt_last_cmd_puts("No support for non-linear MB domains\n"); + return false; + } + ret = kstrtoul(buf, 10, &bw); if (ret) { rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf); @@ -82,7 +88,7 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r) /* * Only linear delay values is supported for current Intel SKUs. */ - if (!r->membw.delay_linear) { + if (!r->membw.delay_linear && r->membw.arch_needs_linear) { rdt_last_cmd_puts("No support for non-linear MB domains\n"); return false; } diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 1eb39bd24990..7b0072397a11 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -372,6 +372,7 @@ struct rdt_cache { * @min_bw: Minimum memory bandwidth percentage user can request * @bw_gran: Granularity at which the memory bandwidth is allocated * @delay_linear: True if memory B/W delay is in linear scale + * @arch_needs_linear: True if we can't configure non-linear resources * @mba_sc: True if MBA software controller(mba_sc) is enabled * @mb_map: Mapping of memory B/W percentage to memory B/W delay */ @@ -379,6 +380,7 @@ struct rdt_membw { u32 min_bw; u32 bw_gran; u32 delay_linear; + bool arch_needs_linear; bool mba_sc; u32 *mb_map; }; From 5df3ca9334d5603e4afbb95953d0affb37dcf86b Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 8 Jul 2020 16:39:27 +0000 Subject: [PATCH 08/12] x86/resctrl: Merge AMD/Intel parse_bw() calls Now after arch_needs_linear has been added, the parse_bw() calls are almost the same between AMD and Intel. The difference is '!is_mba_sc()', which is not checked on AMD. This will always be true on AMD CPUs as mba_sc cannot be enabled as is_mba_linear() is false. Removing this duplication means user-space visible behaviour and error messages are not validated or generated in different places. Reviewed-by : Babu Moger Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Reinette Chatre Link: https://lkml.kernel.org/r/20200708163929.2783-9-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 3 +- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 57 +---------------------- arch/x86/kernel/cpu/resctrl/internal.h | 6 +-- 3 files changed, 5 insertions(+), 61 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 52b8991de8a3..10a52d173e4f 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -168,6 +168,7 @@ struct rdt_resource rdt_resources_all[] = { .name = "MB", .domains = domain_init(RDT_RESOURCE_MBA), .cache_level = 3, + .parse_ctrlval = parse_bw, .format_str = "%d=%*u", .fflags = RFTYPE_RES_MB, }, @@ -926,7 +927,6 @@ static __init void rdt_init_res_defs_intel(void) else if (r->rid == RDT_RESOURCE_MBA) { r->msr_base = MSR_IA32_MBA_THRTL_BASE; r->msr_update = mba_wrmsr_intel; - r->parse_ctrlval = parse_bw_intel; } } } @@ -946,7 +946,6 @@ static __init void rdt_init_res_defs_amd(void) else if (r->rid == RDT_RESOURCE_MBA) { r->msr_base = MSR_IA32_MBA_BW_BASE; r->msr_update = mba_wrmsr_amd; - r->parse_ctrlval = parse_bw_amd; } } } diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index e3bcd77add2b..b0e24cb6f85c 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -21,59 +21,6 @@ #include #include "internal.h" -/* - * Check whether MBA bandwidth percentage value is correct. The value is - * checked against the minimum and maximum bandwidth values specified by - * the hardware. The allocated bandwidth percentage is rounded to the next - * control step available on the hardware. - */ -static bool bw_validate_amd(char *buf, unsigned long *data, - struct rdt_resource *r) -{ - unsigned long bw; - int ret; - - /* temporary: always false on AMD */ - if (!r->membw.delay_linear && r->membw.arch_needs_linear) { - rdt_last_cmd_puts("No support for non-linear MB domains\n"); - return false; - } - - ret = kstrtoul(buf, 10, &bw); - if (ret) { - rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf); - return false; - } - - if (bw < r->membw.min_bw || bw > r->default_ctrl) { - rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw, - r->membw.min_bw, r->default_ctrl); - return false; - } - - *data = roundup(bw, (unsigned long)r->membw.bw_gran); - return true; -} - -int parse_bw_amd(struct rdt_parse_data *data, struct rdt_resource *r, - struct rdt_domain *d) -{ - unsigned long bw_val; - - if (d->have_new_ctrl) { - rdt_last_cmd_printf("Duplicate domain %d\n", d->id); - return -EINVAL; - } - - if (!bw_validate_amd(data->buf, &bw_val, r)) - return -EINVAL; - - d->new_ctrl = bw_val; - d->have_new_ctrl = true; - - return 0; -} - /* * Check whether MBA bandwidth percentage value is correct. The value is * checked against the minimum and max bandwidth values specified by the @@ -110,8 +57,8 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r) return true; } -int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r, - struct rdt_domain *d) +int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r, + struct rdt_domain *d) { unsigned long bw_val; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 7b0072397a11..21f43994b56d 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -471,10 +471,8 @@ struct rdt_resource { int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, struct rdt_domain *d); -int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r, - struct rdt_domain *d); -int parse_bw_amd(struct rdt_parse_data *data, struct rdt_resource *r, - struct rdt_domain *d); +int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r, + struct rdt_domain *d); extern struct mutex rdtgroup_mutex; From 316e7f901f5aedb415c72d1eedd7de0846238dd0 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 8 Jul 2020 16:39:28 +0000 Subject: [PATCH 09/12] x86/resctrl: Add struct rdt_cache::arch_has_{sparse, empty}_bitmaps Intel CPUs expect the cache bitmap provided by user-space to have on a single span of 1s, whereas AMD can support bitmaps like 0xf00f. Arm's MPAM support also allows sparse bitmaps. Similarly, Intel CPUs check at least one bit set, whereas AMD CPUs are quite happy with an empty bitmap. Arm's MPAM allows an empty bitmap. To move resctrl out to /fs/, platform differences like this need to be explained. Add two resource properties arch_has_{empty,sparse}_bitmaps. Test these around the relevant parts of cbm_validate(). Merging the validate calls causes AMD to gain the min_cbm_bits test needed for Haswell, but as it always sets this value to 1, it will never match. [ bp: Massage commit message. ] Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Babu Moger Reviewed-by: Reinette Chatre Link: https://lkml.kernel.org/r/20200708163929.2783-10-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 14 ++++---- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 39 ++++++----------------- arch/x86/kernel/cpu/resctrl/internal.h | 8 ++--- 3 files changed, 22 insertions(+), 39 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 10a52d173e4f..cbbd751bcc38 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -922,9 +922,10 @@ static __init void rdt_init_res_defs_intel(void) r->rid == RDT_RESOURCE_L3CODE || r->rid == RDT_RESOURCE_L2 || r->rid == RDT_RESOURCE_L2DATA || - r->rid == RDT_RESOURCE_L2CODE) - r->cbm_validate = cbm_validate_intel; - else if (r->rid == RDT_RESOURCE_MBA) { + r->rid == RDT_RESOURCE_L2CODE) { + r->cache.arch_has_sparse_bitmaps = false; + r->cache.arch_has_empty_bitmaps = false; + } else if (r->rid == RDT_RESOURCE_MBA) { r->msr_base = MSR_IA32_MBA_THRTL_BASE; r->msr_update = mba_wrmsr_intel; } @@ -941,9 +942,10 @@ static __init void rdt_init_res_defs_amd(void) r->rid == RDT_RESOURCE_L3CODE || r->rid == RDT_RESOURCE_L2 || r->rid == RDT_RESOURCE_L2DATA || - r->rid == RDT_RESOURCE_L2CODE) - r->cbm_validate = cbm_validate_amd; - else if (r->rid == RDT_RESOURCE_MBA) { + r->rid == RDT_RESOURCE_L2CODE) { + r->cache.arch_has_sparse_bitmaps = true; + r->cache.arch_has_empty_bitmaps = true; + } else if (r->rid == RDT_RESOURCE_MBA) { r->msr_base = MSR_IA32_MBA_BW_BASE; r->msr_update = mba_wrmsr_amd; } diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index b0e24cb6f85c..c877642e8a14 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -76,12 +76,14 @@ int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r, } /* - * Check whether a cache bit mask is valid. The SDM says: + * Check whether a cache bit mask is valid. + * For Intel the SDM says: * Please note that all (and only) contiguous '1' combinations * are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.). * Additionally Haswell requires at least two bits set. + * AMD allows non-contiguous bitmasks. */ -bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r) +static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) { unsigned long first_bit, zero_bit, val; unsigned int cbm_len = r->cache.cbm_len; @@ -93,7 +95,8 @@ bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r) return false; } - if (val == 0 || val > r->default_ctrl) { + if ((!r->cache.arch_has_empty_bitmaps && val == 0) || + val > r->default_ctrl) { rdt_last_cmd_puts("Mask out of range\n"); return false; } @@ -101,7 +104,9 @@ bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r) first_bit = find_first_bit(&val, cbm_len); zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); - if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len) { + /* Are non-contiguous bitmaps allowed? */ + if (!r->cache.arch_has_sparse_bitmaps && + (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) { rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val); return false; } @@ -116,30 +121,6 @@ bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r) return true; } -/* - * Check whether a cache bit mask is valid. AMD allows non-contiguous - * bitmasks - */ -bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r) -{ - unsigned long val; - int ret; - - ret = kstrtoul(buf, 16, &val); - if (ret) { - rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf); - return false; - } - - if (val > r->default_ctrl) { - rdt_last_cmd_puts("Mask out of range\n"); - return false; - } - - *data = val; - return true; -} - /* * Read one cache bit mask (hex). Check that it is valid for the current * resource type. @@ -165,7 +146,7 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, return -EINVAL; } - if (!r->cbm_validate(data->buf, &cbm_val, r)) + if (!cbm_validate(data->buf, &cbm_val, r)) return -EINVAL; if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE || diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 21f43994b56d..0b4829484c9e 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -358,6 +358,8 @@ struct msr_param { * in a cache bit mask * @shareable_bits: Bitmask of shareable resource with other * executing entities + * @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid. + * @arch_has_empty_bitmaps: True if the '0' bitmap is valid. */ struct rdt_cache { unsigned int cbm_len; @@ -365,6 +367,8 @@ struct rdt_cache { unsigned int cbm_idx_mult; unsigned int cbm_idx_offset; unsigned int shareable_bits; + bool arch_has_sparse_bitmaps; + bool arch_has_empty_bitmaps; }; /** @@ -434,7 +438,6 @@ struct rdt_parse_data { * @cache: Cache allocation related data * @format_str: Per resource format string to show domain value * @parse_ctrlval: Per resource function pointer to parse control values - * @cbm_validate Cache bitmask validate function * @evt_list: List of monitoring events * @num_rmid: Number of RMIDs available * @mon_scale: cqm counter * mon_scale = occupancy in bytes @@ -461,7 +464,6 @@ struct rdt_resource { int (*parse_ctrlval)(struct rdt_parse_data *data, struct rdt_resource *r, struct rdt_domain *d); - bool (*cbm_validate)(char *buf, u32 *data, struct rdt_resource *r); struct list_head evt_list; int num_rmid; unsigned int mon_scale; @@ -604,8 +606,6 @@ void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms); void cqm_handle_limbo(struct work_struct *work); bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); void __check_limbo(struct rdt_domain *d, bool force_free); -bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r); -bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r); void rdt_domain_reconfigure_cdp(struct rdt_resource *r); #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ From 709c4362725abb5fa1e36fd94893a9b0d049df82 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 8 Jul 2020 16:39:29 +0000 Subject: [PATCH 10/12] cacheinfo: Move resctrl's get_cache_id() to the cacheinfo header file resctrl/core.c defines get_cache_id() for use in its cpu-hotplug callbacks. This gets the id attribute of the cache at the corresponding level of a CPU. Later rework means this private function needs to be shared. Move it to the header file. The name conflicts with a different definition in intel_cacheinfo.c, name it get_cpu_cacheinfo_id() to show its relation with get_cpu_cacheinfo(). Now this is visible on other architectures, check the id attribute has actually been set. Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Babu Moger Reviewed-by: Reinette Chatre Link: https://lkml.kernel.org/r/20200708163929.2783-11-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 17 ++--------------- include/linux/cacheinfo.h | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index cbbd751bcc38..1c00f2f0bf0c 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -350,19 +350,6 @@ static void rdt_get_cdp_l2_config(void) rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2CODE); } -static int get_cache_id(int cpu, int level) -{ - struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); - int i; - - for (i = 0; i < ci->num_leaves; i++) { - if (ci->info_list[i].level == level) - return ci->info_list[i].id; - } - - return -1; -} - static void mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) { @@ -560,7 +547,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) */ static void domain_add_cpu(int cpu, struct rdt_resource *r) { - int id = get_cache_id(cpu, r->cache_level); + int id = get_cpu_cacheinfo_id(cpu, r->cache_level); struct list_head *add_pos = NULL; struct rdt_domain *d; @@ -606,7 +593,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) static void domain_remove_cpu(int cpu, struct rdt_resource *r) { - int id = get_cache_id(cpu, r->cache_level); + int id = get_cpu_cacheinfo_id(cpu, r->cache_level); struct rdt_domain *d; d = rdt_find_domain(r, id, NULL); diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 46b92cd61d0c..4f72b47973c3 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -3,6 +3,7 @@ #define _LINUX_CACHEINFO_H #include +#include #include #include @@ -119,4 +120,24 @@ int acpi_find_last_cache_level(unsigned int cpu); const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf); +/* + * Get the id of the cache associated with @cpu at level @level. + * cpuhp lock must be held. + */ +static inline int get_cpu_cacheinfo_id(int cpu, int level) +{ + struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); + int i; + + for (i = 0; i < ci->num_leaves; i++) { + if (ci->info_list[i].level == level) { + if (ci->info_list[i].attributes & CACHE_ID) + return ci->info_list[i].id; + return -1; + } + } + + return -1; +} + #endif /* _LINUX_CACHEINFO_H */ From e48cb1a3fb9165009fe318e1db2fde10d303c1d3 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 24 Aug 2020 12:11:20 -0700 Subject: [PATCH 11/12] x86/resctrl: Enumerate per-thread MBA controls Some systems support per-thread Memory Bandwidth Allocation (MBA) which applies a throttling delay value to each hardware thread instead of to a core. Per-thread MBA is enumerated by CPUID. No feature flag is shown in /proc/cpuinfo. User applications need to check a resctrl throttling mode info file to know if the feature is supported. Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Babu Moger Reviewed-by: Reinette Chatre Link: https://lkml.kernel.org/r/1598296281-127595-2-git-send-email-fenghua.yu@intel.com --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/cpuid-deps.c | 1 + arch/x86/kernel/cpu/scattered.c | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 2901d5df4366..4a7473ae55ac 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -288,6 +288,7 @@ #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ #define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ +#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index 3cbe24ca80ab..3e30b26c50ef 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -69,6 +69,7 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC }, { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC }, { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL }, + { X86_FEATURE_PER_THREAD_MBA, X86_FEATURE_MBA }, {} }; diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 62b137c3c97a..bccfc9ff3cc1 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -35,6 +35,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 }, { X86_FEATURE_CDP_L2, CPUID_ECX, 2, 0x00000010, 2 }, { X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 }, + { X86_FEATURE_PER_THREAD_MBA, CPUID_ECX, 0, 0x00000010, 3 }, { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, From 29b6bd41ee24f69a85666b9f68d500b382d408fd Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 24 Aug 2020 12:11:21 -0700 Subject: [PATCH 12/12] x86/resctrl: Enable user to view thread or core throttling mode Early Intel hardware implementations of Memory Bandwidth Allocation (MBA) could only control bandwidth at the processor core level. This meant that when two processes with different bandwidth allocations ran simultaneously on the same core the hardware had to resolve this difference. It did so by applying the higher throttling value (lower bandwidth) to both processes. Newer implementations can apply different throttling values to each thread on a core. Introduce a new resctrl file, "thread_throttle_mode", on Intel systems that shows to the user how throttling values are allocated, per-core or per-thread. On systems that support per-core throttling, the file will display "max". On newer systems that support per-thread throttling, the file will display "per-thread". AMD confirmed in [1] that AMD bandwidth allocation is already at thread level but that the AMD implementation does not use a memory delay throttle mode. So to avoid confusion the thread throttling mode would be UNDEFINED on AMD systems and the "thread_throttle_mode" file will not be visible. Originally-by: Reinette Chatre Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Reinette Chatre Link: https://lkml.kernel.org/r/1598296281-127595-3-git-send-email-fenghua.yu@intel.com Link: [1] https://lore.kernel.org/lkml/18d277fd-6523-319c-d560-66b63ff606b8@amd.com --- Documentation/x86/resctrl_ui.rst | 18 ++++++++- arch/x86/kernel/cpu/resctrl/core.c | 11 ++++++ arch/x86/kernel/cpu/resctrl/internal.h | 30 ++++++++++++--- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 53 +++++++++++++++++++++++++- 4 files changed, 103 insertions(+), 9 deletions(-) diff --git a/Documentation/x86/resctrl_ui.rst b/Documentation/x86/resctrl_ui.rst index 5368cedfb530..e59b7b93a9b4 100644 --- a/Documentation/x86/resctrl_ui.rst +++ b/Documentation/x86/resctrl_ui.rst @@ -138,6 +138,18 @@ with respect to allocation: non-linear. This field is purely informational only. +"thread_throttle_mode": + Indicator on Intel systems of how tasks running on threads + of a physical core are throttled in cases where they + request different memory bandwidth percentages: + + "max": + the smallest percentage is applied + to all threads + "per-thread": + bandwidth percentages are directly applied to + the threads running on the core + If RDT monitoring is available there will be an "L3_MON" directory with the following files: @@ -364,8 +376,10 @@ to the next control step available on the hardware. The bandwidth throttling is a core specific mechanism on some of Intel SKUs. Using a high bandwidth and a low bandwidth setting on two threads -sharing a core will result in both threads being throttled to use the -low bandwidth. The fact that Memory bandwidth allocation(MBA) is a core +sharing a core may result in both threads being throttled to use the +low bandwidth (see "thread_throttle_mode"). + +The fact that Memory bandwidth allocation(MBA) may be a core specific mechanism where as memory bandwidth monitoring(MBM) is done at the package level may lead to confusion when users try to apply control via the MBA and then monitor the bandwidth to see if the controls are diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 1c00f2f0bf0c..9e1712e8aef7 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -273,6 +273,12 @@ static bool __get_mem_config_intel(struct rdt_resource *r) } r->data_width = 3; + if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA)) + r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD; + else + r->membw.throttle_mode = THREAD_THROTTLE_MAX; + thread_throttle_mode_init(); + r->alloc_capable = true; r->alloc_enabled = true; @@ -293,6 +299,11 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r) r->membw.delay_linear = false; r->membw.arch_needs_linear = false; + /* + * AMD does not use memory delay throttle model to control + * the allocation like Intel does. + */ + r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED; r->membw.min_bw = 0; r->membw.bw_gran = 1; /* Max value is 2048, Data width should be 4 in decimal */ diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 0b4829484c9e..80fa997fae60 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -371,22 +371,39 @@ struct rdt_cache { bool arch_has_empty_bitmaps; }; +/** + * enum membw_throttle_mode - System's memory bandwidth throttling mode + * @THREAD_THROTTLE_UNDEFINED: Not relevant to the system + * @THREAD_THROTTLE_MAX: Memory bandwidth is throttled at the core + * always using smallest bandwidth percentage + * assigned to threads, aka "max throttling" + * @THREAD_THROTTLE_PER_THREAD: Memory bandwidth is throttled at the thread + */ +enum membw_throttle_mode { + THREAD_THROTTLE_UNDEFINED = 0, + THREAD_THROTTLE_MAX, + THREAD_THROTTLE_PER_THREAD, +}; + /** * struct rdt_membw - Memory bandwidth allocation related data * @min_bw: Minimum memory bandwidth percentage user can request * @bw_gran: Granularity at which the memory bandwidth is allocated * @delay_linear: True if memory B/W delay is in linear scale * @arch_needs_linear: True if we can't configure non-linear resources + * @throttle_mode: Bandwidth throttling mode when threads request + * different memory bandwidths * @mba_sc: True if MBA software controller(mba_sc) is enabled * @mb_map: Mapping of memory B/W percentage to memory B/W delay */ struct rdt_membw { - u32 min_bw; - u32 bw_gran; - u32 delay_linear; - bool arch_needs_linear; - bool mba_sc; - u32 *mb_map; + u32 min_bw; + u32 bw_gran; + u32 delay_linear; + bool arch_needs_linear; + enum membw_throttle_mode throttle_mode; + bool mba_sc; + u32 *mb_map; }; static inline bool is_llc_occupancy_enabled(void) @@ -607,5 +624,6 @@ void cqm_handle_limbo(struct work_struct *work); bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); void __check_limbo(struct rdt_domain *d, bool force_free); void rdt_domain_reconfigure_cdp(struct rdt_resource *r); +void __init thread_throttle_mode_init(void); #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 78f3be10e215..b494187632b2 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1027,6 +1027,19 @@ static int max_threshold_occ_show(struct kernfs_open_file *of, return 0; } +static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct rdt_resource *r = of->kn->parent->priv; + + if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD) + seq_puts(seq, "per-thread\n"); + else + seq_puts(seq, "max\n"); + + return 0; +} + static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { @@ -1523,6 +1536,17 @@ static struct rftype res_common_files[] = { .seq_show = rdt_delay_linear_show, .fflags = RF_CTRL_INFO | RFTYPE_RES_MB, }, + /* + * Platform specific which (if any) capabilities are provided by + * thread_throttle_mode. Defer "fflags" initialization to platform + * discovery. + */ + { + .name = "thread_throttle_mode", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_thread_throttle_mode_show, + }, { .name = "max_threshold_occupancy", .mode = 0644, @@ -1593,7 +1617,7 @@ static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags) lockdep_assert_held(&rdtgroup_mutex); for (rft = rfts; rft < rfts + len; rft++) { - if ((fflags & rft->fflags) == rft->fflags) { + if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) { ret = rdtgroup_add_file(kn, rft); if (ret) goto error; @@ -1610,6 +1634,33 @@ static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags) return ret; } +static struct rftype *rdtgroup_get_rftype_by_name(const char *name) +{ + struct rftype *rfts, *rft; + int len; + + rfts = res_common_files; + len = ARRAY_SIZE(res_common_files); + + for (rft = rfts; rft < rfts + len; rft++) { + if (!strcmp(rft->name, name)) + return rft; + } + + return NULL; +} + +void __init thread_throttle_mode_init(void) +{ + struct rftype *rft; + + rft = rdtgroup_get_rftype_by_name("thread_throttle_mode"); + if (!rft) + return; + + rft->fflags = RF_CTRL_INFO | RFTYPE_RES_MB; +} + /** * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file * @r: The resource group with which the file is associated.