From c7ec4f2d684e17d69bbdd7c4324db0ef5daac26a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 8 Jan 2024 07:41:39 +0100 Subject: [PATCH 001/768] xen-netback: don't produce zero-size SKB frags While frontends may submit zero-size requests (wasting a precious slot), core networking code as of at least 3ece782693c4b ("sock: skb_copy_ubufs support for compound pages") can't deal with SKBs when they have all zero-size fragments. Respond to empty requests right when populating fragments; all further processing is fragment based and hence won't encounter these empty requests anymore. In a way this should have been that way from the beginning: When no data is to be transferred for a particular request, there's not even a point in validating the respective grant ref. That's no different from e.g. passing NULL into memcpy() when at the same time the size is 0. This is XSA-448 / CVE-2023-46838. Cc: stable@vger.kernel.org Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Reviewed-by: Paul Durrant --- drivers/net/xen-netback/netback.c | 44 ++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 88f760a7cbc3..d7503aef599f 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -463,12 +463,25 @@ static void xenvif_get_requests(struct xenvif_queue *queue, } for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS; - shinfo->nr_frags++, gop++, nr_slots--) { + nr_slots--) { + if (unlikely(!txp->size)) { + unsigned long flags; + + spin_lock_irqsave(&queue->response_lock, flags); + make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY); + push_tx_responses(queue); + spin_unlock_irqrestore(&queue->response_lock, flags); + ++txp; + continue; + } + index = pending_index(queue->pending_cons++); pending_idx = queue->pending_ring[index]; xenvif_tx_create_map_op(queue, pending_idx, txp, txp == first ? extra_count : 0, gop); frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); + ++shinfo->nr_frags; + ++gop; if (txp == first) txp = txfrags; @@ -481,20 +494,39 @@ static void xenvif_get_requests(struct xenvif_queue *queue, shinfo = skb_shinfo(nskb); frags = shinfo->frags; - for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; - shinfo->nr_frags++, txp++, gop++) { + for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; ++txp) { + if (unlikely(!txp->size)) { + unsigned long flags; + + spin_lock_irqsave(&queue->response_lock, flags); + make_tx_response(queue, txp, 0, + XEN_NETIF_RSP_OKAY); + push_tx_responses(queue); + spin_unlock_irqrestore(&queue->response_lock, + flags); + continue; + } + index = pending_index(queue->pending_cons++); pending_idx = queue->pending_ring[index]; xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop); frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); + ++shinfo->nr_frags; + ++gop; } - skb_shinfo(skb)->frag_list = nskb; - } else if (nskb) { + if (shinfo->nr_frags) { + skb_shinfo(skb)->frag_list = nskb; + nskb = NULL; + } + } + + if (nskb) { /* A frag_list skb was allocated but it is no longer needed - * because enough slots were converted to copy ops above. + * because enough slots were converted to copy ops above or some + * were empty. */ kfree_skb(nskb); } From 024b32db43a359e0ded3fcc6cd86247cbbed4224 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 21 Dec 2023 13:55:48 -0800 Subject: [PATCH 002/768] drm/bridge: parade-ps8640: Wait for HPD when doing an AUX transfer Unlike what is claimed in commit f5aa7d46b0ee ("drm/bridge: parade-ps8640: Provide wait_hpd_asserted() in struct drm_dp_aux"), if someone manually tries to do an AUX transfer (like via `i2cdump ${bus} 0x50 i`) while the panel is off we don't just get a simple transfer error. Instead, the whole ps8640 gets thrown for a loop and goes into a bad state. Let's put the function to wait for the HPD (and the magical 50 ms after first reset) back in when we're doing an AUX transfer. This shouldn't actually make things much slower (assuming the panel is on) because we should immediately poll and see the HPD high. Mostly this is just an extra i2c transfer to the bridge. Fixes: f5aa7d46b0ee ("drm/bridge: parade-ps8640: Provide wait_hpd_asserted() in struct drm_dp_aux") Tested-by: Pin-yen Lin Reviewed-by: Pin-yen Lin Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20231221135548.1.I10f326a9305d57ad32cee7f8d9c60518c8be20fb@changeid --- drivers/gpu/drm/bridge/parade-ps8640.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c index 541e4f5afc4c..fb5e9ae9ad81 100644 --- a/drivers/gpu/drm/bridge/parade-ps8640.c +++ b/drivers/gpu/drm/bridge/parade-ps8640.c @@ -346,6 +346,11 @@ static ssize_t ps8640_aux_transfer(struct drm_dp_aux *aux, int ret; pm_runtime_get_sync(dev); + ret = _ps8640_wait_hpd_asserted(ps_bridge, 200 * 1000); + if (ret) { + pm_runtime_put_sync_suspend(dev); + return ret; + } ret = ps8640_aux_transfer_msg(aux, msg); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); From 571c7ed0baa928447bac29ef79a90bd6525d1ebc Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 13 Dec 2023 16:42:01 -0600 Subject: [PATCH 003/768] dt-bindings: display: samsung,exynos-mixer: Fix 'regs' typo The correct property name is 'reg' not 'regs'. Fixes: 68e89bb36d58 ("dt-bindings: display: samsung,exynos-mixer: convert to dtschema") Signed-off-by: Rob Herring Signed-off-by: Inki Dae --- .../bindings/display/samsung/samsung,exynos-mixer.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/display/samsung/samsung,exynos-mixer.yaml b/Documentation/devicetree/bindings/display/samsung/samsung,exynos-mixer.yaml index 25d53fde92e1..597c9cc6a312 100644 --- a/Documentation/devicetree/bindings/display/samsung/samsung,exynos-mixer.yaml +++ b/Documentation/devicetree/bindings/display/samsung/samsung,exynos-mixer.yaml @@ -85,7 +85,7 @@ allOf: clocks: minItems: 6 maxItems: 6 - regs: + reg: minItems: 2 maxItems: 2 @@ -99,7 +99,7 @@ allOf: clocks: minItems: 4 maxItems: 4 - regs: + reg: minItems: 2 maxItems: 2 @@ -116,7 +116,7 @@ allOf: clocks: minItems: 3 maxItems: 3 - regs: + reg: minItems: 1 maxItems: 1 From 989cd9fd1ffe1a964429325f9092ea8f0db3f953 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Tue, 19 Dec 2023 18:25:16 +0200 Subject: [PATCH 004/768] wifi: p54: fix GCC format truncation warning with wiphy->fw_version GCC 13.2 warns: drivers/net/wireless/intersil/p54/fwio.c:128:34: warning: '%s' directive output may be truncated writing up to 39 bytes into a region of size 32 [-Wformat-truncation=] drivers/net/wireless/intersil/p54/fwio.c:128:33: note: directive argument in the range [0, 16777215] drivers/net/wireless/intersil/p54/fwio.c:128:33: note: directive argument in the range [0, 255] drivers/net/wireless/intersil/p54/fwio.c:127:17: note: 'snprintf' output between 7 and 52 bytes into a destination of size 32 The issue here is that wiphy->fw_version is 32 bytes and in theory the string we try to place there can be 39 bytes. wiphy->fw_version is used for providing the firmware version to user space via ethtool, so not really important. fw_version in theory can be 24 bytes but in practise it's shorter, so even if print only 19 bytes via ethtool there should not be any practical difference. I did consider removing fw_var from the string altogether or making the maximum length for fw_version 19 bytes, but chose this approach as it was the least intrusive. Compile tested only. Signed-off-by: Kalle Valo Acked-by: Christian Lamparter # Tested with Dell 1450 USB Signed-off-by: Kalle Valo Link: https://msgid.link/20231219162516.898205-1-kvalo@kernel.org --- drivers/net/wireless/intersil/p54/fwio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intersil/p54/fwio.c b/drivers/net/wireless/intersil/p54/fwio.c index b52cce38115d..c4fe70e05b9b 100644 --- a/drivers/net/wireless/intersil/p54/fwio.c +++ b/drivers/net/wireless/intersil/p54/fwio.c @@ -125,7 +125,7 @@ int p54_parse_firmware(struct ieee80211_hw *dev, const struct firmware *fw) "FW rev %s - Softmac protocol %x.%x\n", fw_version, priv->fw_var >> 8, priv->fw_var & 0xff); snprintf(dev->wiphy->fw_version, sizeof(dev->wiphy->fw_version), - "%s - %x.%x", fw_version, + "%.19s - %x.%x", fw_version, priv->fw_var >> 8, priv->fw_var & 0xff); } From 2ad62d16cd24b5e2f18318e97e1f06bef9f1ce7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Tue, 9 Jan 2024 11:28:24 -0300 Subject: [PATCH 005/768] drm/v3d: Free the job and assign it to NULL if initialization fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, if `v3d_job_init()` fails (e.g. in the IGT test "bad-in-sync", where we submit an invalid in-sync to the IOCTL), then we end up with the following NULL pointer dereference: [ 34.146279] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000078 [ 34.146301] Mem abort info: [ 34.146306] ESR = 0x0000000096000005 [ 34.146315] EC = 0x25: DABT (current EL), IL = 32 bits [ 34.146322] SET = 0, FnV = 0 [ 34.146328] EA = 0, S1PTW = 0 [ 34.146334] FSC = 0x05: level 1 translation fault [ 34.146340] Data abort info: [ 34.146345] ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000 [ 34.146351] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 34.146357] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 34.146366] user pgtable: 4k pages, 39-bit VAs, pgdp=00000001232e6000 [ 34.146375] [0000000000000078] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000 [ 34.146399] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP [ 34.146406] Modules linked in: rfcomm snd_seq_dummy snd_hrtimer snd_seq snd_seq_device algif_hash aes_neon_bs aes_neon_blk algif_skcipher af_alg bnep hid_logitech_hidpp brcmfmac_wcc brcmfmac brcmutil hci_uart vc4 btbcm cfg80211 bluetooth bcm2835_v4l2(C) snd_soc_hdmi_codec binfmt_misc cec drm_display_helper hid_logitech_dj bcm2835_mmal_vchiq(C) drm_dma_helper drm_kms_helper videobuf2_v4l2 raspberrypi_hwmon ecdh_generic videobuf2_vmalloc videobuf2_memops ecc videobuf2_common rfkill videodev libaes snd_soc_core dwc2 i2c_brcmstb snd_pcm_dmaengine snd_bcm2835(C) i2c_bcm2835 pwm_bcm2835 snd_pcm mc v3d snd_timer snd gpu_sched drm_shmem_helper nvmem_rmem uio_pdrv_genirq uio i2c_dev drm fuse dm_mod drm_panel_orientation_quirks backlight configfs ip_tables x_tables ipv6 [ 34.146556] CPU: 1 PID: 1890 Comm: v3d_submit_csd Tainted: G C 6.7.0-rc3-g49ddab089611 #68 [ 34.146563] Hardware name: Raspberry Pi 4 Model B Rev 1.5 (DT) [ 34.146569] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 34.146575] pc : drm_sched_job_cleanup+0x3c/0x190 [gpu_sched] [ 34.146611] lr : v3d_submit_csd_ioctl+0x1b4/0x460 [v3d] [ 34.146653] sp : ffffffc083cbbb80 [ 34.146658] x29: ffffffc083cbbb90 x28: ffffff81035afc00 x27: ffffffe77a641168 [ 34.146668] x26: ffffff81056a8000 x25: 0000000000000058 x24: 0000000000000000 [ 34.146677] x23: ffffff81065e2000 x22: ffffff81035afe00 x21: ffffffc083cbbcf0 [ 34.146686] x20: ffffff81035afe00 x19: 00000000ffffffea x18: 0000000000000000 [ 34.146694] x17: 0000000000000000 x16: ffffffe7989e34b0 x15: 0000000000000000 [ 34.146703] x14: 0000000004000004 x13: ffffff81035afe80 x12: ffffffc083cb8000 [ 34.146711] x11: cc57e05dfbe5ef00 x10: cc57e05dfbe5ef00 x9 : ffffffe77a64131c [ 34.146719] x8 : 0000000000000000 x7 : 0000000000000000 x6 : 000000000000003f [ 34.146727] x5 : 0000000000000040 x4 : ffffff81fefb03f0 x3 : ffffffc083cbba40 [ 34.146736] x2 : ffffff81056a8000 x1 : ffffffe7989e35e8 x0 : 0000000000000000 [ 34.146745] Call trace: [ 34.146748] drm_sched_job_cleanup+0x3c/0x190 [gpu_sched] [ 34.146768] v3d_submit_csd_ioctl+0x1b4/0x460 [v3d] [ 34.146791] drm_ioctl_kernel+0xe0/0x120 [drm] [ 34.147029] drm_ioctl+0x264/0x408 [drm] [ 34.147135] __arm64_sys_ioctl+0x9c/0xe0 [ 34.147152] invoke_syscall+0x4c/0x118 [ 34.147162] el0_svc_common+0xb8/0xf0 [ 34.147168] do_el0_svc+0x28/0x40 [ 34.147174] el0_svc+0x38/0x88 [ 34.147184] el0t_64_sync_handler+0x84/0x100 [ 34.147191] el0t_64_sync+0x190/0x198 [ 34.147201] Code: aa0003f4 f90007e8 f9401008 aa0803e0 (b8478c09) [ 34.147210] ---[ end trace 0000000000000000 ]--- This happens because we are calling `drm_sched_job_cleanup()` twice: once at `v3d_job_init()` and again when we call `v3d_job_cleanup()`. To mitigate this issue, we can return to the same approach that we used to use before 464c61e76de8: deallocate the job after `v3d_job_init()` fails and assign it to NULL. Then, when we call `v3d_job_cleanup()`, job is NULL and the function returns. Fixes: 464c61e76de8 ("drm/v3d: Decouple job allocation from job initiation") Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240109142857.1122704-1-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_submit.c | 35 +++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c index fcff41dd2315..88f63d526b22 100644 --- a/drivers/gpu/drm/v3d/v3d_submit.c +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -147,6 +147,13 @@ v3d_job_allocate(void **container, size_t size) return 0; } +static void +v3d_job_deallocate(void **container) +{ + kfree(*container); + *container = NULL; +} + static int v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, struct v3d_job *job, void (*free)(struct kref *ref), @@ -273,8 +280,10 @@ v3d_setup_csd_jobs_and_bos(struct drm_file *file_priv, ret = v3d_job_init(v3d, file_priv, &(*job)->base, v3d_job_free, args->in_sync, se, V3D_CSD); - if (ret) + if (ret) { + v3d_job_deallocate((void *)job); return ret; + } ret = v3d_job_allocate((void *)clean_job, sizeof(**clean_job)); if (ret) @@ -282,8 +291,10 @@ v3d_setup_csd_jobs_and_bos(struct drm_file *file_priv, ret = v3d_job_init(v3d, file_priv, *clean_job, v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); - if (ret) + if (ret) { + v3d_job_deallocate((void *)clean_job); return ret; + } (*job)->args = *args; @@ -860,8 +871,10 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, ret = v3d_job_init(v3d, file_priv, &render->base, v3d_render_job_free, args->in_sync_rcl, &se, V3D_RENDER); - if (ret) + if (ret) { + v3d_job_deallocate((void *)&render); goto fail; + } render->start = args->rcl_start; render->end = args->rcl_end; @@ -874,8 +887,10 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, ret = v3d_job_init(v3d, file_priv, &bin->base, v3d_job_free, args->in_sync_bcl, &se, V3D_BIN); - if (ret) + if (ret) { + v3d_job_deallocate((void *)&bin); goto fail; + } bin->start = args->bcl_start; bin->end = args->bcl_end; @@ -892,8 +907,10 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); - if (ret) + if (ret) { + v3d_job_deallocate((void *)&clean_job); goto fail; + } last_job = clean_job; } else { @@ -1015,8 +1032,10 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, ret = v3d_job_init(v3d, file_priv, &job->base, v3d_job_free, args->in_sync, &se, V3D_TFU); - if (ret) + if (ret) { + v3d_job_deallocate((void *)&job); goto fail; + } job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles), sizeof(*job->base.bo), GFP_KERNEL); @@ -1233,8 +1252,10 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data, ret = v3d_job_init(v3d, file_priv, &cpu_job->base, v3d_job_free, 0, &se, V3D_CPU); - if (ret) + if (ret) { + v3d_job_deallocate((void *)&cpu_job); goto fail; + } clean_job = cpu_job->indirect_csd.clean_job; csd_job = cpu_job->indirect_csd.job; From b271fee9a41ca1474d30639fd6cc912c9901d0f8 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 19 Dec 2023 01:02:28 +0900 Subject: [PATCH 006/768] btrfs: zoned: factor out prepare_allocation_zoned() Factor out prepare_allocation_zoned() for further extension. While at it, optimize the if-branch a bit. Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f396aba92c57..d260b970bec7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4298,6 +4298,24 @@ static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info, return 0; } +static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info, + struct find_free_extent_ctl *ffe_ctl) +{ + if (ffe_ctl->for_treelog) { + spin_lock(&fs_info->treelog_bg_lock); + if (fs_info->treelog_bg) + ffe_ctl->hint_byte = fs_info->treelog_bg; + spin_unlock(&fs_info->treelog_bg_lock); + } else if (ffe_ctl->for_data_reloc) { + spin_lock(&fs_info->relocation_bg_lock); + if (fs_info->data_reloc_bg) + ffe_ctl->hint_byte = fs_info->data_reloc_bg; + spin_unlock(&fs_info->relocation_bg_lock); + } + + return 0; +} + static int prepare_allocation(struct btrfs_fs_info *fs_info, struct find_free_extent_ctl *ffe_ctl, struct btrfs_space_info *space_info, @@ -4308,19 +4326,7 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info, return prepare_allocation_clustered(fs_info, ffe_ctl, space_info, ins); case BTRFS_EXTENT_ALLOC_ZONED: - if (ffe_ctl->for_treelog) { - spin_lock(&fs_info->treelog_bg_lock); - if (fs_info->treelog_bg) - ffe_ctl->hint_byte = fs_info->treelog_bg; - spin_unlock(&fs_info->treelog_bg_lock); - } - if (ffe_ctl->for_data_reloc) { - spin_lock(&fs_info->relocation_bg_lock); - if (fs_info->data_reloc_bg) - ffe_ctl->hint_byte = fs_info->data_reloc_bg; - spin_unlock(&fs_info->relocation_bg_lock); - } - return 0; + return prepare_allocation_zoned(fs_info, ffe_ctl); default: BUG(); } From 02444f2ac26eae6385a65fcd66915084d15dffba Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 19 Dec 2023 01:02:29 +0900 Subject: [PATCH 007/768] btrfs: zoned: optimize hint byte for zoned allocator Writing sequentially to a huge file on btrfs on a SMR HDD revealed a decline of the performance (220 MiB/s to 30 MiB/s after 500 minutes). The performance goes down because of increased latency of the extent allocation, which is induced by a traversing of a lot of full block groups. So, this patch optimizes the ffe_ctl->hint_byte by choosing a block group with sufficient size from the active block group list, which does not contain full block groups. After applying the patch, the performance is maintained well. Fixes: 2eda57089ea3 ("btrfs: zoned: implement sequential extent allocation") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d260b970bec7..6d680031211a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4311,6 +4311,24 @@ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info, if (fs_info->data_reloc_bg) ffe_ctl->hint_byte = fs_info->data_reloc_bg; spin_unlock(&fs_info->relocation_bg_lock); + } else if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) { + struct btrfs_block_group *block_group; + + spin_lock(&fs_info->zone_active_bgs_lock); + list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) { + /* + * No lock is OK here because avail is monotinically + * decreasing, and this is just a hint. + */ + u64 avail = block_group->zone_capacity - block_group->alloc_offset; + + if (block_group_bits(block_group, ffe_ctl->flags) && + avail >= ffe_ctl->num_bytes) { + ffe_ctl->hint_byte = block_group->start; + break; + } + } + spin_unlock(&fs_info->zone_active_bgs_lock); } return 0; From 6ff09b6b8c2fb6b3edda4ffaa173153a40653067 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Thu, 21 Dec 2023 11:47:45 +0300 Subject: [PATCH 008/768] btrfs: fix kvcalloc() arguments order in btrfs_ioctl_send() When compiling with gcc version 14.0.0 20231220 (experimental) and W=1, I've noticed the following warning: fs/btrfs/send.c: In function 'btrfs_ioctl_send': fs/btrfs/send.c:8208:44: warning: 'kvcalloc' sizes specified with 'sizeof' in the earlier argument and not in the later argument [-Wcalloc-transposed-args] 8208 | sctx->clone_roots = kvcalloc(sizeof(*sctx->clone_roots), | ^ Since 'n' and 'size' arguments of 'kvcalloc()' are multiplied to calculate the final size, their actual order doesn't affect the result and so this is not a bug. But it's still worth to fix it. Signed-off-by: Dmitry Antipov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/send.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 4e36550618e5..2d7519a6ce72 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -8205,8 +8205,8 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg) goto out; } - sctx->clone_roots = kvcalloc(sizeof(*sctx->clone_roots), - arg->clone_sources_count + 1, + sctx->clone_roots = kvcalloc(arg->clone_sources_count + 1, + sizeof(*sctx->clone_roots), GFP_KERNEL); if (!sctx->clone_roots) { ret = -ENOMEM; From f03e274a8b29d1d1c1bbd7f764766cb5ca537ab7 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Wed, 3 Jan 2024 13:31:27 +0300 Subject: [PATCH 009/768] btrfs: ref-verify: free ref cache before clearing mount opt As clearing REF_VERIFY mount option indicates there were some errors in a ref-verify process, a ref cache is not relevant anymore and should be freed. btrfs_free_ref_cache() requires REF_VERIFY option being set so call it just before clearing the mount option. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Reported-by: syzbot+be14ed7728594dc8bd42@syzkaller.appspotmail.com Fixes: fd708b81d972 ("Btrfs: add a extent ref verify tool") CC: stable@vger.kernel.org # 5.4+ Closes: https://lore.kernel.org/lkml/000000000000e5a65c05ee832054@google.com/ Reported-by: syzbot+c563a3c79927971f950f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/0000000000007fe09705fdc6086c@google.com/ Reviewed-by: Anand Jain Signed-off-by: Fedor Pchelkin Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ref-verify.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 6486f0d7e993..8c4fc98ca9ce 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -889,8 +889,10 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, out_unlock: spin_unlock(&fs_info->ref_verify_lock); out: - if (ret) + if (ret) { + btrfs_free_ref_cache(fs_info); btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); + } return ret; } @@ -1021,8 +1023,8 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) } } if (ret) { - btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); btrfs_free_ref_cache(fs_info); + btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); } btrfs_free_path(path); return ret; From d967c914a633ee797255261808720f791b658f24 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Fri, 22 Dec 2023 01:46:16 +0900 Subject: [PATCH 010/768] btrfs: fix unbalanced unlock of mapping_tree_lock The error path of btrfs_get_chunk_map() releases fs_info->mapping_tree_lock. But, it is taken and released in btrfs_find_chunk_map(). So, there is no need to do so. Fixes: 7dc66abb5a47 ("btrfs: use a dedicated data structure for chunk maps") Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 4c32497311d2..d67785be2c77 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3087,7 +3087,6 @@ struct btrfs_chunk_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info, map = btrfs_find_chunk_map(fs_info, logical, length); if (unlikely(!map)) { - read_unlock(&fs_info->mapping_tree_lock); btrfs_crit(fs_info, "unable to find chunk map for logical %llu length %llu", logical, length); @@ -3095,7 +3094,6 @@ struct btrfs_chunk_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info, } if (unlikely(map->start > logical || map->start + map->chunk_len <= logical)) { - read_unlock(&fs_info->mapping_tree_lock); btrfs_crit(fs_info, "found a bad chunk map, wanted %llu-%llu, found %llu-%llu", logical, logical + length, map->start, From b18f3b60b35a8c01c9a2a0f0d6424c6d73971dc3 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Fri, 22 Dec 2023 13:56:34 +0900 Subject: [PATCH 011/768] btrfs: zoned: fix lock ordering in btrfs_zone_activate() The btrfs CI reported a lockdep warning as follows by running generic generic/129. WARNING: possible circular locking dependency detected 6.7.0-rc5+ #1 Not tainted ------------------------------------------------------ kworker/u5:5/793427 is trying to acquire lock: ffff88813256d028 (&cache->lock){+.+.}-{2:2}, at: btrfs_zone_finish_one_bg+0x5e/0x130 but task is already holding lock: ffff88810a23a318 (&fs_info->zone_active_bgs_lock){+.+.}-{2:2}, at: btrfs_zone_finish_one_bg+0x34/0x130 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&fs_info->zone_active_bgs_lock){+.+.}-{2:2}: ... -> #0 (&cache->lock){+.+.}-{2:2}: ... This is because we take fs_info->zone_active_bgs_lock after a block_group's lock in btrfs_zone_activate() while doing the opposite in other places. Fix the issue by expanding the fs_info->zone_active_bgs_lock's critical section and taking it before a block_group's lock. Fixes: a7e1ac7bdc5a ("btrfs: zoned: reserve zones for an active metadata/system block group") CC: stable@vger.kernel.org # 6.6 Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 12066afc235c..ac9bbe0c4ffe 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -2072,6 +2072,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) map = block_group->physical_map; + spin_lock(&fs_info->zone_active_bgs_lock); spin_lock(&block_group->lock); if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) { ret = true; @@ -2084,7 +2085,6 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) goto out_unlock; } - spin_lock(&fs_info->zone_active_bgs_lock); for (i = 0; i < map->num_stripes; i++) { struct btrfs_zoned_device_info *zinfo; int reserved = 0; @@ -2104,20 +2104,17 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) */ if (atomic_read(&zinfo->active_zones_left) <= reserved) { ret = false; - spin_unlock(&fs_info->zone_active_bgs_lock); goto out_unlock; } if (!btrfs_dev_set_active_zone(device, physical)) { /* Cannot activate the zone */ ret = false; - spin_unlock(&fs_info->zone_active_bgs_lock); goto out_unlock; } if (!is_data) zinfo->reserved_active_zones--; } - spin_unlock(&fs_info->zone_active_bgs_lock); /* Successfully activated all the zones */ set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags); @@ -2125,8 +2122,6 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) /* For the active block group list */ btrfs_get_block_group(block_group); - - spin_lock(&fs_info->zone_active_bgs_lock); list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs); spin_unlock(&fs_info->zone_active_bgs_lock); @@ -2134,6 +2129,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) out_unlock: spin_unlock(&block_group->lock); + spin_unlock(&fs_info->zone_active_bgs_lock); return ret; } From 7081929ab2572920e94d70be3d332e5c9f97095a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 4 Jan 2024 11:48:46 -0800 Subject: [PATCH 012/768] btrfs: don't abort filesystem when attempting to snapshot deleted subvolume If the source file descriptor to the snapshot ioctl refers to a deleted subvolume, we get the following abort: BTRFS: Transaction aborted (error -2) WARNING: CPU: 0 PID: 833 at fs/btrfs/transaction.c:1875 create_pending_snapshot+0x1040/0x1190 [btrfs] Modules linked in: pata_acpi btrfs ata_piix libata scsi_mod virtio_net blake2b_generic xor net_failover virtio_rng failover scsi_common rng_core raid6_pq libcrc32c CPU: 0 PID: 833 Comm: t_snapshot_dele Not tainted 6.7.0-rc6 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-1.fc39 04/01/2014 RIP: 0010:create_pending_snapshot+0x1040/0x1190 [btrfs] RSP: 0018:ffffa09c01337af8 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff9982053e7c78 RCX: 0000000000000027 RDX: ffff99827dc20848 RSI: 0000000000000001 RDI: ffff99827dc20840 RBP: ffffa09c01337c00 R08: 0000000000000000 R09: ffffa09c01337998 R10: 0000000000000003 R11: ffffffffb96da248 R12: fffffffffffffffe R13: ffff99820535bb28 R14: ffff99820b7bd000 R15: ffff99820381ea80 FS: 00007fe20aadabc0(0000) GS:ffff99827dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000559a120b502f CR3: 00000000055b6000 CR4: 00000000000006f0 Call Trace: ? create_pending_snapshot+0x1040/0x1190 [btrfs] ? __warn+0x81/0x130 ? create_pending_snapshot+0x1040/0x1190 [btrfs] ? report_bug+0x171/0x1a0 ? handle_bug+0x3a/0x70 ? exc_invalid_op+0x17/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? create_pending_snapshot+0x1040/0x1190 [btrfs] ? create_pending_snapshot+0x1040/0x1190 [btrfs] create_pending_snapshots+0x92/0xc0 [btrfs] btrfs_commit_transaction+0x66b/0xf40 [btrfs] btrfs_mksubvol+0x301/0x4d0 [btrfs] btrfs_mksnapshot+0x80/0xb0 [btrfs] __btrfs_ioctl_snap_create+0x1c2/0x1d0 [btrfs] btrfs_ioctl_snap_create_v2+0xc4/0x150 [btrfs] btrfs_ioctl+0x8a6/0x2650 [btrfs] ? kmem_cache_free+0x22/0x340 ? do_sys_openat2+0x97/0xe0 __x64_sys_ioctl+0x97/0xd0 do_syscall_64+0x46/0xf0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 RIP: 0033:0x7fe20abe83af RSP: 002b:00007ffe6eff1360 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007fe20abe83af RDX: 00007ffe6eff23c0 RSI: 0000000050009417 RDI: 0000000000000003 RBP: 0000000000000003 R08: 0000000000000000 R09: 00007fe20ad16cd0 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffe6eff13c0 R14: 00007fe20ad45000 R15: 0000559a120b6d58 ---[ end trace 0000000000000000 ]--- BTRFS: error (device vdc: state A) in create_pending_snapshot:1875: errno=-2 No such entry BTRFS info (device vdc: state EA): forced readonly BTRFS warning (device vdc: state EA): Skipping commit of aborted transaction. BTRFS: error (device vdc: state EA) in cleanup_transaction:2055: errno=-2 No such entry This happens because create_pending_snapshot() initializes the new root item as a copy of the source root item. This includes the refs field, which is 0 for a deleted subvolume. The call to btrfs_insert_root() therefore inserts a root with refs == 0. btrfs_get_new_fs_root() then finds the root and returns -ENOENT if refs == 0, which causes create_pending_snapshot() to abort. Fix it by checking the source root's refs before attempting the snapshot, but after locking subvol_sem to avoid racing with deletion. CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Sweet Tea Dorminy Reviewed-by: Anand Jain Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4e50b62db2a8..fea5d37528b8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -790,6 +790,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, return -EOPNOTSUPP; } + if (btrfs_root_refs(&root->root_item) == 0) + return -ENOENT; + if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) return -EINVAL; From 3324d0547861b16cf436d54abba7052e0c8aa9de Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 4 Jan 2024 11:48:47 -0800 Subject: [PATCH 013/768] btrfs: avoid copying BTRFS_ROOT_SUBVOL_DEAD flag to snapshot of subvolume being deleted Sweet Tea spotted a race between subvolume deletion and snapshotting that can result in the root item for the snapshot having the BTRFS_ROOT_SUBVOL_DEAD flag set. The race is: Thread 1 | Thread 2 ----------------------------------------------|---------- btrfs_delete_subvolume | btrfs_set_root_flags(BTRFS_ROOT_SUBVOL_DEAD)| |btrfs_mksubvol | down_read(subvol_sem) | create_snapshot | ... | create_pending_snapshot | copy root item from source down_write(subvol_sem) | This flag is only checked in send and swap activate, which this would cause to fail mysteriously. create_snapshot() now checks the root refs to reject a deleted subvolume, so we can fix this by locking subvol_sem earlier so that the BTRFS_ROOT_SUBVOL_DEAD flag and the root refs are updated atomically. CC: stable@vger.kernel.org # 4.14+ Reported-by: Sweet Tea Dorminy Reviewed-by: Sweet Tea Dorminy Reviewed-by: Anand Jain Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b3e39610cc95..7bcc1c03437a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4458,6 +4458,8 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry) u64 root_flags; int ret; + down_write(&fs_info->subvol_sem); + /* * Don't allow to delete a subvolume with send in progress. This is * inside the inode lock so the error handling that has to drop the bit @@ -4469,25 +4471,25 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry) btrfs_warn(fs_info, "attempt to delete subvolume %llu during send", dest->root_key.objectid); - return -EPERM; + ret = -EPERM; + goto out_up_write; } if (atomic_read(&dest->nr_swapfiles)) { spin_unlock(&dest->root_item_lock); btrfs_warn(fs_info, "attempt to delete subvolume %llu with active swapfile", root->root_key.objectid); - return -EPERM; + ret = -EPERM; + goto out_up_write; } root_flags = btrfs_root_flags(&dest->root_item); btrfs_set_root_flags(&dest->root_item, root_flags | BTRFS_ROOT_SUBVOL_DEAD); spin_unlock(&dest->root_item_lock); - down_write(&fs_info->subvol_sem); - ret = may_destroy_subvol(dest); if (ret) - goto out_up_write; + goto out_undead; btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); /* @@ -4497,7 +4499,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry) */ ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true); if (ret) - goto out_up_write; + goto out_undead; trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { @@ -4563,15 +4565,17 @@ out_end_trans: inode->i_flags |= S_DEAD; out_release: btrfs_subvolume_release_metadata(root, &block_rsv); -out_up_write: - up_write(&fs_info->subvol_sem); +out_undead: if (ret) { spin_lock(&dest->root_item_lock); root_flags = btrfs_root_flags(&dest->root_item); btrfs_set_root_flags(&dest->root_item, root_flags & ~BTRFS_ROOT_SUBVOL_DEAD); spin_unlock(&dest->root_item_lock); - } else { + } +out_up_write: + up_write(&fs_info->subvol_sem); + if (!ret) { d_invalidate(dentry); btrfs_prune_dentries(dest); ASSERT(dest->send_in_progress == 0); From 173431b274a9a54fc10b273b46e67f46bcf62d2e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 10 Jan 2024 08:58:26 +1030 Subject: [PATCH 014/768] btrfs: defrag: reject unknown flags of btrfs_ioctl_defrag_range_args Add extra sanity check for btrfs_ioctl_defrag_range_args::flags. This is not really to enhance fuzzing tests, but as a preparation for future expansion on btrfs_ioctl_defrag_range_args. In the future we're going to add new members, allowing more fine tuning for btrfs defrag. Without the -ENONOTSUPP error, there would be no way to detect if the kernel supports those new defrag features. CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 4 ++++ include/uapi/linux/btrfs.h | 3 +++ 2 files changed, 7 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index fea5d37528b8..5d42319b43f2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2602,6 +2602,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) ret = -EFAULT; goto out; } + if (range.flags & ~BTRFS_DEFRAG_RANGE_FLAGS_SUPP) { + ret = -EOPNOTSUPP; + goto out; + } /* compression requires us to start the IO */ if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { range.flags |= BTRFS_DEFRAG_RANGE_START_IO; diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 7c29d82db9ee..f8bc34a6bcfa 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -614,6 +614,9 @@ struct btrfs_ioctl_clone_range_args { */ #define BTRFS_DEFRAG_RANGE_COMPRESS 1 #define BTRFS_DEFRAG_RANGE_START_IO 2 +#define BTRFS_DEFRAG_RANGE_FLAGS_SUPP (BTRFS_DEFRAG_RANGE_COMPRESS | \ + BTRFS_DEFRAG_RANGE_START_IO) + struct btrfs_ioctl_defrag_range_args { /* start of the defrag operation */ __u64 start; From 45dd7df26cee741b31c25ffdd44fb8794eb45ccd Mon Sep 17 00:00:00 2001 From: Markus Niebel Date: Thu, 12 Oct 2023 10:42:08 +0200 Subject: [PATCH 015/768] drm: panel-simple: add missing bus flags for Tianma tm070jvhg[30/33] The DE signal is active high on this display, fill in the missing bus_flags. This aligns panel_desc with its display_timing. Fixes: 9a2654c0f62a ("drm/panel: Add and fill drm_panel type field") Fixes: b3bfcdf8a3b6 ("drm/panel: simple: add Tianma TM070JVHG33") Signed-off-by: Markus Niebel Signed-off-by: Alexander Stein Reviewed-by: Sam Ravnborg Link: https://lore.kernel.org/r/20231012084208.2731650-1-alexander.stein@ew.tq-group.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231012084208.2731650-1-alexander.stein@ew.tq-group.com --- drivers/gpu/drm/panel/panel-simple.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 9367a4572dcf..1ba633fd5696 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -3861,6 +3861,7 @@ static const struct panel_desc tianma_tm070jdhg30 = { }, .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, .connector_type = DRM_MODE_CONNECTOR_LVDS, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, }; static const struct panel_desc tianma_tm070jvhg33 = { @@ -3873,6 +3874,7 @@ static const struct panel_desc tianma_tm070jvhg33 = { }, .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, .connector_type = DRM_MODE_CONNECTOR_LVDS, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, }; static const struct display_timing tianma_tm070rvhg71_timing = { From 62b143b5ec4a14e1ae0dede5aabaf1832e3b0073 Mon Sep 17 00:00:00 2001 From: Artur Weber Date: Fri, 5 Jan 2024 07:53:02 +0100 Subject: [PATCH 016/768] drm/panel: samsung-s6d7aa0: drop DRM_BUS_FLAG_DE_HIGH for lsl080al02 It turns out that I had misconfigured the device I was using the panel with; the bus data polarity is not high for this panel, I had to change the config on the display controller's side. Fix the panel config to properly reflect its accurate settings. Fixes: 6810bb390282 ("drm/panel: Add Samsung S6D7AA0 panel controller driver") Reviewed-by: Jessica Zhang Signed-off-by: Artur Weber Link: https://lore.kernel.org/r/20240105-tab3-display-fixes-v2-2-904d1207bf6f@gmail.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240105-tab3-display-fixes-v2-2-904d1207bf6f@gmail.com --- drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c b/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c index ea5a85779382..f23d8832a1ad 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c @@ -309,7 +309,7 @@ static const struct s6d7aa0_panel_desc s6d7aa0_lsl080al02_desc = { .off_func = s6d7aa0_lsl080al02_off, .drm_mode = &s6d7aa0_lsl080al02_mode, .mode_flags = MIPI_DSI_MODE_VSYNC_FLUSH | MIPI_DSI_MODE_VIDEO_NO_HFP, - .bus_flags = DRM_BUS_FLAG_DE_HIGH, + .bus_flags = 0, .has_backlight = false, .use_passwd3 = false, From 589830b13ac21bddf99b9bc5a4ec17813d0869ef Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 23 Oct 2023 13:55:58 +0200 Subject: [PATCH 017/768] drm/panel/raydium-rm692e5: select CONFIG_DRM_DISPLAY_DP_HELPER As with several other panel drivers, this fails to link without the DP helper library: ld: drivers/gpu/drm/panel/panel-raydium-rm692e5.o: in function `rm692e5_prepare': panel-raydium-rm692e5.c:(.text+0x11f4): undefined reference to `drm_dsc_pps_payload_pack' Select the same symbols that the others already use. Fixes: 988d0ff29ecf7 ("drm/panel: Add driver for BOE RM692E5 AMOLED panel") Signed-off-by: Arnd Bergmann Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20231023115619.3551348-1-arnd@kernel.org Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231023115619.3551348-1-arnd@kernel.org --- drivers/gpu/drm/panel/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index 99e14dc212ec..a4ac4b47777f 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -530,6 +530,8 @@ config DRM_PANEL_RAYDIUM_RM692E5 depends on OF depends on DRM_MIPI_DSI depends on BACKLIGHT_CLASS_DEVICE + select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HELPER help Say Y here if you want to enable support for Raydium RM692E5-based display panels, such as the one found in the Fairphone 5 smartphone. From a9668169961106f3598384fe95004106ec191201 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 12 Jan 2024 17:34:14 +0300 Subject: [PATCH 018/768] HID: hid-steam: remove pointless error message This error message doesn't really add any information. If modprobe fails then the user will already know what the error code is. In the case of kmalloc() it's a style violation to print an error message for that because kmalloc has it's own better error messages built in. Signed-off-by: Dan Carpenter Reviewed-by: Vicki Pfau Link: https://lore.kernel.org/r/305898fb-6bd4-4749-806c-05ec51bbeb80@moroto.mountain Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-steam.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index b3c4e50e248a..59df6ead7b54 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -1109,10 +1109,9 @@ static int steam_probe(struct hid_device *hdev, return hid_hw_start(hdev, HID_CONNECT_DEFAULT); steam = devm_kzalloc(&hdev->dev, sizeof(*steam), GFP_KERNEL); - if (!steam) { - ret = -ENOMEM; - goto steam_alloc_fail; - } + if (!steam) + return -ENOMEM; + steam->hdev = hdev; hid_set_drvdata(hdev, steam); spin_lock_init(&steam->lock); @@ -1179,9 +1178,6 @@ hid_hw_start_fail: cancel_work_sync(&steam->work_connect); cancel_delayed_work_sync(&steam->mode_switch); cancel_work_sync(&steam->rumble_work); -steam_alloc_fail: - hid_err(hdev, "%s: failed with error %d\n", - __func__, ret); return ret; } From a9f1da09c69f13ef471db8b22107a28042d230ca Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 12 Jan 2024 17:35:06 +0300 Subject: [PATCH 019/768] HID: hid-steam: Fix cleanup in probe() There are a number of issues in this code. First of all if steam_create_client_hid() fails then it leads to an error pointer dereference when we call hid_destroy_device(steam->client_hdev). Also there are a number of leaks. hid_hw_stop() is not called if hid_hw_open() fails for example. And it doesn't call steam_unregister() or hid_hw_close(). Fixes: 691ead124a0c ("HID: hid-steam: Clean up locking") Signed-off-by: Dan Carpenter Reviewed-by: Vicki Pfau Link: https://lore.kernel.org/r/1fd87904-dabf-4879-bb89-72d13ebfc91e@moroto.mountain Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-steam.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index 59df6ead7b54..b08a5ab58528 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -1128,14 +1128,14 @@ static int steam_probe(struct hid_device *hdev, */ ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_HIDRAW); if (ret) - goto hid_hw_start_fail; + goto err_cancel_work; ret = hid_hw_open(hdev); if (ret) { hid_err(hdev, "%s:hid_hw_open\n", __func__); - goto hid_hw_open_fail; + goto err_hw_stop; } if (steam->quirks & STEAM_QUIRK_WIRELESS) { @@ -1151,33 +1151,37 @@ static int steam_probe(struct hid_device *hdev, hid_err(hdev, "%s:steam_register failed with error %d\n", __func__, ret); - goto input_register_fail; + goto err_hw_close; } } steam->client_hdev = steam_create_client_hid(hdev); if (IS_ERR(steam->client_hdev)) { ret = PTR_ERR(steam->client_hdev); - goto client_hdev_fail; + goto err_stream_unregister; } steam->client_hdev->driver_data = steam; ret = hid_add_device(steam->client_hdev); if (ret) - goto client_hdev_add_fail; + goto err_destroy; return 0; -client_hdev_add_fail: - hid_hw_stop(hdev); -client_hdev_fail: +err_destroy: hid_destroy_device(steam->client_hdev); -input_register_fail: -hid_hw_open_fail: -hid_hw_start_fail: +err_stream_unregister: + if (steam->connected) + steam_unregister(steam); +err_hw_close: + hid_hw_close(hdev); +err_hw_stop: + hid_hw_stop(hdev); +err_cancel_work: cancel_work_sync(&steam->work_connect); cancel_delayed_work_sync(&steam->mode_switch); cancel_work_sync(&steam->rumble_work); + return ret; } From 1f1626ac0428820f998245478610f452650bcab5 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Sun, 14 Jan 2024 00:33:45 +0300 Subject: [PATCH 020/768] drm/ttm: fix ttm pool initialization for no-dma-device drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit QXL driver doesn't use any device for DMA mappings or allocations so dev_to_node() will panic inside ttm_device_init() on NUMA systems: general protection fault, probably for non-canonical address 0xdffffc000000007a: 0000 [#1] PREEMPT SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x00000000000003d0-0x00000000000003d7] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 6.7.0+ #9 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 RIP: 0010:ttm_device_init+0x10e/0x340 Call Trace: qxl_ttm_init+0xaa/0x310 qxl_device_init+0x1071/0x2000 qxl_pci_probe+0x167/0x3f0 local_pci_probe+0xe1/0x1b0 pci_device_probe+0x29d/0x790 really_probe+0x251/0x910 __driver_probe_device+0x1ea/0x390 driver_probe_device+0x4e/0x2e0 __driver_attach+0x1e3/0x600 bus_for_each_dev+0x12d/0x1c0 bus_add_driver+0x25a/0x590 driver_register+0x15c/0x4b0 qxl_pci_driver_init+0x67/0x80 do_one_initcall+0xf5/0x5d0 kernel_init_freeable+0x637/0xb10 kernel_init+0x1c/0x2e0 ret_from_fork+0x48/0x80 ret_from_fork_asm+0x1b/0x30 Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:ttm_device_init+0x10e/0x340 Fall back to NUMA_NO_NODE if there is no device for DMA. Found by Linux Verification Center (linuxtesting.org). Fixes: b0a7ce53d494 ("drm/ttm: Schedule delayed_delete worker closer") Signed-off-by: Fedor Pchelkin Link: https://patchwork.freedesktop.org/patch/msgid/20240113213347.9562-1-pchelkin@ispras.ru Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_device.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index f5187b384ae9..4130945052ed 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -195,7 +195,7 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func bool use_dma_alloc, bool use_dma32) { struct ttm_global *glob = &ttm_glob; - int ret; + int ret, nid; if (WARN_ON(vma_manager == NULL)) return -EINVAL; @@ -215,7 +215,12 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func ttm_sys_man_init(bdev); - ttm_pool_init(&bdev->pool, dev, dev_to_node(dev), use_dma_alloc, use_dma32); + if (dev) + nid = dev_to_node(dev); + else + nid = NUMA_NO_NODE; + + ttm_pool_init(&bdev->pool, dev, nid, use_dma_alloc, use_dma32); bdev->vma_manager = vma_manager; spin_lock_init(&bdev->lru_lock); From 08ac6f132dd77e40f786d8af51140c96c6d739c9 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 3 Jan 2024 15:31:07 +0200 Subject: [PATCH 021/768] drm/bridge: sii902x: Fix probing race issue A null pointer dereference crash has been observed rarely on TI platforms using sii9022 bridge: [ 53.271356] sii902x_get_edid+0x34/0x70 [sii902x] [ 53.276066] sii902x_bridge_get_edid+0x14/0x20 [sii902x] [ 53.281381] drm_bridge_get_edid+0x20/0x34 [drm] [ 53.286305] drm_bridge_connector_get_modes+0x8c/0xcc [drm_kms_helper] [ 53.292955] drm_helper_probe_single_connector_modes+0x190/0x538 [drm_kms_helper] [ 53.300510] drm_client_modeset_probe+0x1f0/0xbd4 [drm] [ 53.305958] __drm_fb_helper_initial_config_and_unlock+0x50/0x510 [drm_kms_helper] [ 53.313611] drm_fb_helper_initial_config+0x48/0x58 [drm_kms_helper] [ 53.320039] drm_fbdev_dma_client_hotplug+0x84/0xd4 [drm_dma_helper] [ 53.326401] drm_client_register+0x5c/0xa0 [drm] [ 53.331216] drm_fbdev_dma_setup+0xc8/0x13c [drm_dma_helper] [ 53.336881] tidss_probe+0x128/0x264 [tidss] [ 53.341174] platform_probe+0x68/0xc4 [ 53.344841] really_probe+0x188/0x3c4 [ 53.348501] __driver_probe_device+0x7c/0x16c [ 53.352854] driver_probe_device+0x3c/0x10c [ 53.357033] __device_attach_driver+0xbc/0x158 [ 53.361472] bus_for_each_drv+0x88/0xe8 [ 53.365303] __device_attach+0xa0/0x1b4 [ 53.369135] device_initial_probe+0x14/0x20 [ 53.373314] bus_probe_device+0xb0/0xb4 [ 53.377145] deferred_probe_work_func+0xcc/0x124 [ 53.381757] process_one_work+0x1f0/0x518 [ 53.385770] worker_thread+0x1e8/0x3dc [ 53.389519] kthread+0x11c/0x120 [ 53.392750] ret_from_fork+0x10/0x20 The issue here is as follows: - tidss probes, but is deferred as sii902x is still missing. - sii902x starts probing and enters sii902x_init(). - sii902x calls drm_bridge_add(). Now the sii902x bridge is ready from DRM's perspective. - sii902x calls sii902x_audio_codec_init() and platform_device_register_data() - The registration of the audio platform device causes probing of the deferred devices. - tidss probes, which eventually causes sii902x_bridge_get_edid() to be called. - sii902x_bridge_get_edid() tries to use the i2c to read the edid. However, the sii902x driver has not set up the i2c part yet, leading to the crash. Fix this by moving the drm_bridge_add() to the end of the sii902x_init(), which is also at the very end of sii902x_probe(). Signed-off-by: Tomi Valkeinen Fixes: 21d808405fe4 ("drm/bridge/sii902x: Fix EDID readback") Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20240103-si902x-fixes-v1-1-b9fd3e448411@ideasonboard.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240103-si902x-fixes-v1-1-b9fd3e448411@ideasonboard.com --- drivers/gpu/drm/bridge/sii902x.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c index 2bdc5b439beb..69da73e414a9 100644 --- a/drivers/gpu/drm/bridge/sii902x.c +++ b/drivers/gpu/drm/bridge/sii902x.c @@ -1080,16 +1080,6 @@ static int sii902x_init(struct sii902x *sii902x) return ret; } - sii902x->bridge.funcs = &sii902x_bridge_funcs; - sii902x->bridge.of_node = dev->of_node; - sii902x->bridge.timings = &default_sii902x_timings; - sii902x->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID; - - if (sii902x->i2c->irq > 0) - sii902x->bridge.ops |= DRM_BRIDGE_OP_HPD; - - drm_bridge_add(&sii902x->bridge); - sii902x_audio_codec_init(sii902x, dev); i2c_set_clientdata(sii902x->i2c, sii902x); @@ -1102,7 +1092,21 @@ static int sii902x_init(struct sii902x *sii902x) return -ENOMEM; sii902x->i2cmux->priv = sii902x; - return i2c_mux_add_adapter(sii902x->i2cmux, 0, 0, 0); + ret = i2c_mux_add_adapter(sii902x->i2cmux, 0, 0, 0); + if (ret) + return ret; + + sii902x->bridge.funcs = &sii902x_bridge_funcs; + sii902x->bridge.of_node = dev->of_node; + sii902x->bridge.timings = &default_sii902x_timings; + sii902x->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID; + + if (sii902x->i2c->irq > 0) + sii902x->bridge.ops |= DRM_BRIDGE_OP_HPD; + + drm_bridge_add(&sii902x->bridge); + + return 0; } static int sii902x_probe(struct i2c_client *client) @@ -1170,12 +1174,11 @@ static int sii902x_probe(struct i2c_client *client) } static void sii902x_remove(struct i2c_client *client) - { struct sii902x *sii902x = i2c_get_clientdata(client); - i2c_mux_del_adapters(sii902x->i2cmux); drm_bridge_remove(&sii902x->bridge); + i2c_mux_del_adapters(sii902x->i2cmux); } static const struct of_device_id sii902x_dt_ids[] = { From 3fc6c76a8d208d3955c9e64b382d0ff370bc61fc Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 3 Jan 2024 15:31:08 +0200 Subject: [PATCH 022/768] drm/bridge: sii902x: Fix audio codec unregistration The driver never unregisters the audio codec platform device, which can lead to a crash on module reloading, nor does it handle the return value from sii902x_audio_codec_init(). Signed-off-by: Tomi Valkeinen Fixes: ff5781634c41 ("drm/bridge: sii902x: Implement HDMI audio support") Cc: Jyri Sarha Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20240103-si902x-fixes-v1-2-b9fd3e448411@ideasonboard.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240103-si902x-fixes-v1-2-b9fd3e448411@ideasonboard.com --- drivers/gpu/drm/bridge/sii902x.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c index 69da73e414a9..4560ae9cbce1 100644 --- a/drivers/gpu/drm/bridge/sii902x.c +++ b/drivers/gpu/drm/bridge/sii902x.c @@ -1080,7 +1080,9 @@ static int sii902x_init(struct sii902x *sii902x) return ret; } - sii902x_audio_codec_init(sii902x, dev); + ret = sii902x_audio_codec_init(sii902x, dev); + if (ret) + return ret; i2c_set_clientdata(sii902x->i2c, sii902x); @@ -1088,13 +1090,15 @@ static int sii902x_init(struct sii902x *sii902x) 1, 0, I2C_MUX_GATE, sii902x_i2c_bypass_select, sii902x_i2c_bypass_deselect); - if (!sii902x->i2cmux) - return -ENOMEM; + if (!sii902x->i2cmux) { + ret = -ENOMEM; + goto err_unreg_audio; + } sii902x->i2cmux->priv = sii902x; ret = i2c_mux_add_adapter(sii902x->i2cmux, 0, 0, 0); if (ret) - return ret; + goto err_unreg_audio; sii902x->bridge.funcs = &sii902x_bridge_funcs; sii902x->bridge.of_node = dev->of_node; @@ -1107,6 +1111,12 @@ static int sii902x_init(struct sii902x *sii902x) drm_bridge_add(&sii902x->bridge); return 0; + +err_unreg_audio: + if (!PTR_ERR_OR_ZERO(sii902x->audio.pdev)) + platform_device_unregister(sii902x->audio.pdev); + + return ret; } static int sii902x_probe(struct i2c_client *client) @@ -1179,6 +1189,9 @@ static void sii902x_remove(struct i2c_client *client) drm_bridge_remove(&sii902x->bridge); i2c_mux_del_adapters(sii902x->i2cmux); + + if (!PTR_ERR_OR_ZERO(sii902x->audio.pdev)) + platform_device_unregister(sii902x->audio.pdev); } static const struct of_device_id sii902x_dt_ids[] = { From 0a8c1feed387f8460b8b65fc46fb3608afa7512e Mon Sep 17 00:00:00 2001 From: Yangyu Chen Date: Wed, 17 Jan 2024 02:50:34 +0800 Subject: [PATCH 023/768] drm/ttm: allocate dummy_read_page without DMA32 on fail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some platforms may not have any memory in ZONE_DMA32 and use IOMMU to allow 32-bit-DMA-only device to work. Forcing GFP_DMA32 on dummy_read_page will fail on such platforms. Retry after fail will get this works on such platforms. Signed-off-by: Yangyu Chen Link: https://patchwork.freedesktop.org/patch/msgid/tencent_8637383EE0A2C7CC870036AAF01909B26A0A@qq.com Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_device.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index d48b39132b32..c9fa8561f71f 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -95,11 +95,17 @@ static int ttm_global_init(void) ttm_pool_mgr_init(num_pages); ttm_tt_mgr_init(num_pages, num_dma32); - glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32); + glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32 | + __GFP_NOWARN); + /* Retry without GFP_DMA32 for platforms DMA32 is not available */ if (unlikely(glob->dummy_read_page == NULL)) { - ret = -ENOMEM; - goto out; + glob->dummy_read_page = alloc_page(__GFP_ZERO); + if (unlikely(glob->dummy_read_page == NULL)) { + ret = -ENOMEM; + goto out; + } + pr_warn("Using GFP_DMA32 fallback for dummy_read_page\n"); } INIT_LIST_HEAD(&glob->device_list); From 3d9e9020b92288871b02f194c3ec88e03a1afa88 Mon Sep 17 00:00:00 2001 From: Khaled Almahallawy Date: Wed, 13 Dec 2023 13:15:42 -0800 Subject: [PATCH 024/768] drm/i915/dp: Fix passing the correct DPCD_REV for drm_dp_set_phy_test_pattern Using link_status to get DPCD_REV fails when disabling/defaulting phy pattern. Use intel_dp->dpcd to access DPCD_REV correctly. Fixes: 8cdf72711928 ("drm/i915/dp: Program vswing, pre-emphasis, test-pattern") Cc: Jani Nikula Cc: Imre Deak Cc: Lee Shawn C Signed-off-by: Khaled Almahallawy Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231213211542.3585105-3-khaled.almahallawy@intel.com (cherry picked from commit 3ee302ec22d6e1d7d1e6d381b0d507ee80f2135c) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index c3b906ebe542..f5ef95da5534 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4764,7 +4764,7 @@ static void intel_dp_process_phy_request(struct intel_dp *intel_dp, intel_dp->train_set, crtc_state->lane_count); drm_dp_set_phy_test_pattern(&intel_dp->aux, data, - link_status[DP_DPCD_REV]); + intel_dp->dpcd[DP_DPCD_REV]); } static u8 intel_dp_autotest_phy_pattern(struct intel_dp *intel_dp) From 0103b4496087c99c195800456bf6a4fcf24fd444 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Mon, 18 Dec 2023 16:05:43 -0800 Subject: [PATCH 025/768] drm/i915/perf: Update handling of MMIO triggered reports On XEHP platforms user is not able to find MMIO triggered reports in the OA buffer since i915 squashes the context ID fields. These context ID fields hold the MMIO trigger markers. Update logic to not squash the context ID fields of MMIO triggered reports. Fixes: cba94bbcff08 ("drm/i915/perf: Determine context valid in OA reports") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20231219000543.1087706-1-umesh.nerlige.ramappa@intel.com (cherry picked from commit 0c68132df6e66244acec1bb5b9e19b0751414389) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_perf.c | 39 ++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 7b1c8de2f9cb..2d695818f006 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -772,10 +772,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * The reason field includes flags identifying what * triggered this specific report (mostly timer * triggered or e.g. due to a context switch). - * - * In MMIO triggered reports, some platforms do not set the - * reason bit in this field and it is valid to have a reason - * field of zero. */ reason = oa_report_reason(stream, report); ctx_id = oa_context_id(stream, report32); @@ -787,8 +783,41 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * * Note: that we don't clear the valid_ctx_bit so userspace can * understand that the ID has been squashed by the kernel. + * + * Update: + * + * On XEHP platforms the behavior of context id valid bit has + * changed compared to prior platforms. To describe this, we + * define a few terms: + * + * context-switch-report: This is a report with the reason type + * being context-switch. It is generated when a context switches + * out. + * + * context-valid-bit: A bit that is set in the report ID field + * to indicate that a valid context has been loaded. + * + * gpu-idle: A condition characterized by a + * context-switch-report with context-valid-bit set to 0. + * + * On prior platforms, context-id-valid bit is set to 0 only + * when GPU goes idle. In all other reports, it is set to 1. + * + * On XEHP platforms, context-valid-bit is set to 1 in a context + * switch report if a new context switched in. For all other + * reports it is set to 0. + * + * This change in behavior causes an issue with MMIO triggered + * reports. MMIO triggered reports have the markers in the + * context ID field and the context-valid-bit is 0. The logic + * below to squash the context ID would render the report + * useless since the user will not be able to find it in the OA + * buffer. Since MMIO triggered reports exist only on XEHP, + * we should avoid squashing these for XEHP platforms. */ - if (oa_report_ctx_invalid(stream, report)) { + + if (oa_report_ctx_invalid(stream, report) && + GRAPHICS_VER_FULL(stream->engine->i915) < IP_VER(12, 50)) { ctx_id = INVALID_CTX_ID; oa_context_id_squash(stream, report32); } From 3eb791c891aa91603a5fbbfea940f8acf5f17d45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Tue, 16 Jan 2024 18:46:02 +0100 Subject: [PATCH 026/768] drm/tests: mm: Call drm_mm_print in drm_test_mm_debug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original intent behind the test was to sanity check whether calling the debug iterator (drm_mm_print) doesn't cause any problems. Unfortunately - this call got accidentally removed during KUnit transition. Restore it. Signed-off-by: Michał Winiarski Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240116174602.1019512-1-michal.winiarski@intel.com --- drivers/gpu/drm/tests/drm_mm_test.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tests/drm_mm_test.c b/drivers/gpu/drm/tests/drm_mm_test.c index 05d5e7af6d25..6803cb2eb8fd 100644 --- a/drivers/gpu/drm/tests/drm_mm_test.c +++ b/drivers/gpu/drm/tests/drm_mm_test.c @@ -247,13 +247,13 @@ out: static void drm_test_mm_debug(struct kunit *test) { + struct drm_printer p = drm_debug_printer(test->name); struct drm_mm mm; struct drm_mm_node nodes[2]; /* Create a small drm_mm with a couple of nodes and a few holes, and * check that the debug iterator doesn't explode over a trivial drm_mm. */ - drm_mm_init(&mm, 0, 4096); memset(nodes, 0, sizeof(nodes)); @@ -268,6 +268,9 @@ static void drm_test_mm_debug(struct kunit *test) KUNIT_ASSERT_FALSE_MSG(test, drm_mm_reserve_node(&mm, &nodes[1]), "failed to reserve node[0] {start=%lld, size=%lld)\n", nodes[0].start, nodes[0].size); + + drm_mm_print(&mm, &p); + KUNIT_SUCCEED(test); } static struct drm_mm_node *set_node(struct drm_mm_node *node, From 26db46bc9c675e43230cc6accd110110a7654299 Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Tue, 9 Jan 2024 20:04:57 +0800 Subject: [PATCH 027/768] drm/bridge: parade-ps8640: Ensure bridge is suspended in .post_disable() The ps8640 bridge seems to expect everything to be power cycled at the disable process, but sometimes ps8640_aux_transfer() holds the runtime PM reference and prevents the bridge from suspend. Prevent that by introducing a mutex lock between ps8640_aux_transfer() and .post_disable() to make sure the bridge is really powered off. Fixes: 826cff3f7ebb ("drm/bridge: parade-ps8640: Enable runtime power management") Signed-off-by: Pin-yen Lin Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20240109120528.1292601-1-treapking@chromium.org --- drivers/gpu/drm/bridge/parade-ps8640.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c index fb5e9ae9ad81..166bfc725ef4 100644 --- a/drivers/gpu/drm/bridge/parade-ps8640.c +++ b/drivers/gpu/drm/bridge/parade-ps8640.c @@ -107,6 +107,7 @@ struct ps8640 { struct device_link *link; bool pre_enabled; bool need_post_hpd_delay; + struct mutex aux_lock; }; static const struct regmap_config ps8640_regmap_config[] = { @@ -345,6 +346,7 @@ static ssize_t ps8640_aux_transfer(struct drm_dp_aux *aux, struct device *dev = &ps_bridge->page[PAGE0_DP_CNTL]->dev; int ret; + mutex_lock(&ps_bridge->aux_lock); pm_runtime_get_sync(dev); ret = _ps8640_wait_hpd_asserted(ps_bridge, 200 * 1000); if (ret) { @@ -354,6 +356,7 @@ static ssize_t ps8640_aux_transfer(struct drm_dp_aux *aux, ret = ps8640_aux_transfer_msg(aux, msg); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); + mutex_unlock(&ps_bridge->aux_lock); return ret; } @@ -475,7 +478,18 @@ static void ps8640_atomic_post_disable(struct drm_bridge *bridge, ps_bridge->pre_enabled = false; ps8640_bridge_vdo_control(ps_bridge, DISABLE); + + /* + * The bridge seems to expect everything to be power cycled at the + * disable process, so grab a lock here to make sure + * ps8640_aux_transfer() is not holding a runtime PM reference and + * preventing the bridge from suspend. + */ + mutex_lock(&ps_bridge->aux_lock); + pm_runtime_put_sync_suspend(&ps_bridge->page[PAGE0_DP_CNTL]->dev); + + mutex_unlock(&ps_bridge->aux_lock); } static int ps8640_bridge_attach(struct drm_bridge *bridge, @@ -624,6 +638,8 @@ static int ps8640_probe(struct i2c_client *client) if (!ps_bridge) return -ENOMEM; + mutex_init(&ps_bridge->aux_lock); + ps_bridge->supplies[0].supply = "vdd12"; ps_bridge->supplies[1].supply = "vdd33"; ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(ps_bridge->supplies), From 658365c6b0857e6a306436e315a8633937e3af42 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Fri, 12 Jan 2024 12:19:27 +0800 Subject: [PATCH 028/768] scsi: isci: Fix an error code problem in isci_io_request_build() Clang static complains that Value stored to 'status' is never read. Return 'status' rather than 'SCI_SUCCESS'. Fixes: f1f52e75939b ("isci: uplevel request infrastructure") Signed-off-by: Su Hui Link: https://lore.kernel.org/r/20240112041926.3924315-1-suhui@nfschina.com Reviewed-by: Artur Paszkiewicz Signed-off-by: Martin K. Petersen --- drivers/scsi/isci/request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c index 71f711cb0628..355a0bc0828e 100644 --- a/drivers/scsi/isci/request.c +++ b/drivers/scsi/isci/request.c @@ -3387,7 +3387,7 @@ static enum sci_status isci_io_request_build(struct isci_host *ihost, return SCI_FAILURE; } - return SCI_SUCCESS; + return status; } static struct isci_request *isci_request_from_tag(struct isci_host *ihost, u16 tag) From d6b75ba5218915be48542bcd7e2a09776b7c66c9 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 16 Jan 2024 12:58:36 +0800 Subject: [PATCH 029/768] scsi: virtio_scsi: Remove duplicate check if queue is broken virtqueue_enable_cb() will call virtqueue_poll() which will check if queue is broken at beginning, so remove the virtqueue_is_broken() call Signed-off-by: Li RongQing Link: https://lore.kernel.org/r/20240116045836.12475-1-lirongqing@baidu.com Reviewed-by: Stefan Hajnoczi Acked-by: Michael S. Tsirkin Signed-off-by: Martin K. Petersen --- drivers/scsi/virtio_scsi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 9d1bdcdc1331..e15b3809d059 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -182,8 +182,6 @@ static void virtscsi_vq_done(struct virtio_scsi *vscsi, while ((buf = virtqueue_get_buf(vq, &len)) != NULL) fn(vscsi, buf); - if (unlikely(virtqueue_is_broken(vq))) - break; } while (!virtqueue_enable_cb(vq)); spin_unlock_irqrestore(&virtscsi_vq->vq_lock, flags); } From e6f3799de2f2b2cd23c3894a127921dfb6c6a512 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 16 Jan 2024 11:26:06 +0000 Subject: [PATCH 030/768] scsi: initio: Remove redundant variable 'rb' The variable 'rb' is being assigned a value but it isn't being read afterwards. The assignment is redundant and so 'rb' can be removed. Cleans up clang scan build warning: warning: Although the value stored to 'rb' is used in the enclosing expression, the value is never actually read from 'rb'[deadcode.DeadStores] Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20240116112606.2263738-1-colin.i.king@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/initio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/initio.c b/drivers/scsi/initio.c index 2a50fda3a628..625fd547ee60 100644 --- a/drivers/scsi/initio.c +++ b/drivers/scsi/initio.c @@ -371,7 +371,6 @@ static u16 initio_se2_rd(unsigned long base, u8 addr) */ static void initio_se2_wr(unsigned long base, u8 addr, u16 val) { - u8 rb; u8 instr; int i; @@ -400,7 +399,7 @@ static void initio_se2_wr(unsigned long base, u8 addr, u16 val) udelay(30); outb(SE2CS, base + TUL_NVRAM); /* -CLK */ udelay(30); - if ((rb = inb(base + TUL_NVRAM)) & SE2DI) + if (inb(base + TUL_NVRAM) & SE2DI) break; /* write complete */ } outb(0, base + TUL_NVRAM); /* -CS */ From 7d1ae55ffed0ff78ed33b1465c1233e05024e135 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Tue, 16 Jan 2024 13:55:09 -0800 Subject: [PATCH 031/768] scsi: MAINTAINERS: Update ibmvscsi_tgt maintainer Michael has not been responsible for this code as an IBMer for quite some time. Seeing as the rest of the IBM Virtual SCSI related drivers already fall under my purview replace Michael with myself as maintainer. Signed-off-by: Tyrel Datwyler Link: https://lore.kernel.org/r/20240116215509.1155787-1-tyreld@linux.ibm.com Signed-off-by: Martin K. Petersen --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index b5ac8c4ae434..afbd909258df 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10203,7 +10203,7 @@ F: drivers/scsi/ibmvscsi/ibmvscsi* F: include/scsi/viosrp.h IBM Power Virtual SCSI Device Target Driver -M: Michael Cyr +M: Tyrel Datwyler L: linux-scsi@vger.kernel.org L: target-devel@vger.kernel.org S: Supported From a20f1b02bafcbf5a32d96a1d4185d6981cf7d016 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 17 Jan 2024 10:35:03 -0800 Subject: [PATCH 032/768] drm/bridge: parade-ps8640: Make sure we drop the AUX mutex in the error case After commit 26db46bc9c67 ("drm/bridge: parade-ps8640: Ensure bridge is suspended in .post_disable()"), if we hit the error case in ps8640_aux_transfer() then we return without dropping the mutex. Fix this oversight. Fixes: 26db46bc9c67 ("drm/bridge: parade-ps8640: Ensure bridge is suspended in .post_disable()") Reviewed-by: Hsin-Yi Wang Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20240117103502.1.Ib726a0184913925efc7e99c4d4fc801982e1bc24@changeid --- drivers/gpu/drm/bridge/parade-ps8640.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c index 166bfc725ef4..14d4dcf239da 100644 --- a/drivers/gpu/drm/bridge/parade-ps8640.c +++ b/drivers/gpu/drm/bridge/parade-ps8640.c @@ -351,11 +351,13 @@ static ssize_t ps8640_aux_transfer(struct drm_dp_aux *aux, ret = _ps8640_wait_hpd_asserted(ps_bridge, 200 * 1000); if (ret) { pm_runtime_put_sync_suspend(dev); - return ret; + goto exit; } ret = ps8640_aux_transfer_msg(aux, msg); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); + +exit: mutex_unlock(&ps_bridge->aux_lock); return ret; From 1d9cabe2817edd215779dc9c2fe5e7ab9aac0704 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 17 Jan 2024 22:06:28 +0100 Subject: [PATCH 033/768] SUNRPC: use request size to initialize bio_vec in svc_udp_sendto() Use the proper size when setting up the bio_vec, as otherwise only zero-length UDP packets will be sent. Fixes: baabf59c2414 ("SUNRPC: Convert svc_udp_sendto() to use the per-socket bio_vec array") Signed-off-by: Lucas Stach Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index bfb2f78523a8..545017a3daa4 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -717,12 +717,12 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) ARRAY_SIZE(rqstp->rq_bvec), xdr); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec, - count, 0); + count, rqstp->rq_res.len); err = sock_sendmsg(svsk->sk_sock, &msg); if (err == -ECONNREFUSED) { /* ICMP error on earlier request. */ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec, - count, 0); + count, rqstp->rq_res.len); err = sock_sendmsg(svsk->sk_sock, &msg); } From 4d695869d3fb952d6f690bec0260ffe2a8b99b37 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 17 Jan 2024 14:27:15 +0100 Subject: [PATCH 034/768] selftests/hid: wacom: fix confidence tests The device is exported with a fuzz of 4, meaning that the `+ t` here is removed by the fuzz algorithm, making those tests failing. Not sure why, but when I run this locally it was passing, but not in the VM of the CI. Fixes: b0fb904d074e ("HID: wacom: Add additional tests of confidence behavior") Link: https://gitlab.freedesktop.org/bentiss/hid/-/jobs/53692957#L3315 Acked-by: Jason Gerecke Link: https://lore.kernel.org/r/20240117-b4-wip-wacom-tests-fixes-v1-1-f317784f3c36@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/tests/test_wacom_generic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/hid/tests/test_wacom_generic.py b/tools/testing/selftests/hid/tests/test_wacom_generic.py index 352fc39f3c6c..b62c7dba6777 100644 --- a/tools/testing/selftests/hid/tests/test_wacom_generic.py +++ b/tools/testing/selftests/hid/tests/test_wacom_generic.py @@ -880,8 +880,8 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest does not overlap with other contacts. The value of `t` may be incremented over time to move the point along a linear path. """ - x = 50 + 10 * contact_id + t - y = 100 + 100 * contact_id + t + x = 50 + 10 * contact_id + t * 11 + y = 100 + 100 * contact_id + t * 11 return test_multitouch.Touch(contact_id, x, y) def make_contacts(self, n, t=0): @@ -902,8 +902,8 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest tracking_id = contact_ids.tracking_id slot_num = contact_ids.slot_num - x = 50 + 10 * contact_id + t - y = 100 + 100 * contact_id + t + x = 50 + 10 * contact_id + t * 11 + y = 100 + 100 * contact_id + t * 11 # If the data isn't supposed to be stored in any slots, there is # nothing we can check for in the evdev stream. From 556857aa1d0855aba02b1c63bc52b91ec63fc2cc Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Mon, 15 Jan 2024 11:18:05 +0100 Subject: [PATCH 035/768] wifi: ath11k: rely on mac80211 debugfs handling for vif mac80211 started to delete debugfs entries in certain cases, causing a ath11k to crash when it tried to delete the entries later. Fix this by relying on mac80211 to delete the entries when appropriate and adding them from the vif_add_debugfs handler. Fixes: 0a3d898ee9a8 ("wifi: mac80211: add/remove driver debugfs entries as appropriate") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218364 Signed-off-by: Benjamin Berg Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://msgid.link/20240115101805.1277949-1-benjamin@sipsolutions.net --- drivers/net/wireless/ath/ath11k/core.h | 4 ---- drivers/net/wireless/ath/ath11k/debugfs.c | 25 +++++++++-------------- drivers/net/wireless/ath/ath11k/debugfs.h | 12 ++--------- drivers/net/wireless/ath/ath11k/mac.c | 12 +---------- 4 files changed, 13 insertions(+), 40 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index f12b606e2d2e..667d55e26156 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -368,10 +368,6 @@ struct ath11k_vif { struct ieee80211_chanctx_conf chanctx; struct ath11k_arp_ns_offload arp_ns_offload; struct ath11k_rekey_data rekey_data; - -#ifdef CONFIG_ATH11K_DEBUGFS - struct dentry *debugfs_twt; -#endif /* CONFIG_ATH11K_DEBUGFS */ }; struct ath11k_vif_iter { diff --git a/drivers/net/wireless/ath/ath11k/debugfs.c b/drivers/net/wireless/ath/ath11k/debugfs.c index be76e7d1c436..0796f4d92b47 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.c +++ b/drivers/net/wireless/ath/ath11k/debugfs.c @@ -1893,35 +1893,30 @@ static const struct file_operations ath11k_fops_twt_resume_dialog = { .open = simple_open }; -void ath11k_debugfs_add_interface(struct ath11k_vif *arvif) +void ath11k_debugfs_op_vif_add(struct ieee80211_hw *hw, + struct ieee80211_vif *vif) { + struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif); struct ath11k_base *ab = arvif->ar->ab; + struct dentry *debugfs_twt; if (arvif->vif->type != NL80211_IFTYPE_AP && !(arvif->vif->type == NL80211_IFTYPE_STATION && test_bit(WMI_TLV_SERVICE_STA_TWT, ab->wmi_ab.svc_map))) return; - arvif->debugfs_twt = debugfs_create_dir("twt", - arvif->vif->debugfs_dir); - debugfs_create_file("add_dialog", 0200, arvif->debugfs_twt, + debugfs_twt = debugfs_create_dir("twt", + arvif->vif->debugfs_dir); + debugfs_create_file("add_dialog", 0200, debugfs_twt, arvif, &ath11k_fops_twt_add_dialog); - debugfs_create_file("del_dialog", 0200, arvif->debugfs_twt, + debugfs_create_file("del_dialog", 0200, debugfs_twt, arvif, &ath11k_fops_twt_del_dialog); - debugfs_create_file("pause_dialog", 0200, arvif->debugfs_twt, + debugfs_create_file("pause_dialog", 0200, debugfs_twt, arvif, &ath11k_fops_twt_pause_dialog); - debugfs_create_file("resume_dialog", 0200, arvif->debugfs_twt, + debugfs_create_file("resume_dialog", 0200, debugfs_twt, arvif, &ath11k_fops_twt_resume_dialog); } -void ath11k_debugfs_remove_interface(struct ath11k_vif *arvif) -{ - if (!arvif->debugfs_twt) - return; - - debugfs_remove_recursive(arvif->debugfs_twt); - arvif->debugfs_twt = NULL; -} diff --git a/drivers/net/wireless/ath/ath11k/debugfs.h b/drivers/net/wireless/ath/ath11k/debugfs.h index 3af0169f6cf2..6f630b42e95c 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.h +++ b/drivers/net/wireless/ath/ath11k/debugfs.h @@ -306,8 +306,8 @@ static inline int ath11k_debugfs_rx_filter(struct ath11k *ar) return ar->debug.rx_filter; } -void ath11k_debugfs_add_interface(struct ath11k_vif *arvif); -void ath11k_debugfs_remove_interface(struct ath11k_vif *arvif); +void ath11k_debugfs_op_vif_add(struct ieee80211_hw *hw, + struct ieee80211_vif *vif); void ath11k_debugfs_add_dbring_entry(struct ath11k *ar, enum wmi_direct_buffer_module id, enum ath11k_dbg_dbr_event event, @@ -386,14 +386,6 @@ static inline int ath11k_debugfs_get_fw_stats(struct ath11k *ar, return 0; } -static inline void ath11k_debugfs_add_interface(struct ath11k_vif *arvif) -{ -} - -static inline void ath11k_debugfs_remove_interface(struct ath11k_vif *arvif) -{ -} - static inline void ath11k_debugfs_add_dbring_entry(struct ath11k *ar, enum wmi_direct_buffer_module id, diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 7f7b39817773..71c6dab1aedb 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -6750,13 +6750,6 @@ static int ath11k_mac_op_add_interface(struct ieee80211_hw *hw, goto err; } - /* In the case of hardware recovery, debugfs files are - * not deleted since ieee80211_ops.remove_interface() is - * not invoked. In such cases, try to delete the files. - * These will be re-created later. - */ - ath11k_debugfs_remove_interface(arvif); - memset(arvif, 0, sizeof(*arvif)); arvif->ar = ar; @@ -6933,8 +6926,6 @@ static int ath11k_mac_op_add_interface(struct ieee80211_hw *hw, ath11k_dp_vdev_tx_attach(ar, arvif); - ath11k_debugfs_add_interface(arvif); - if (vif->type != NL80211_IFTYPE_MONITOR && test_bit(ATH11K_FLAG_MONITOR_CONF_ENABLED, &ar->monitor_flags)) { ret = ath11k_mac_monitor_vdev_create(ar); @@ -7050,8 +7041,6 @@ err_vdev_del: /* Recalc txpower for remaining vdev */ ath11k_mac_txpower_recalc(ar); - ath11k_debugfs_remove_interface(arvif); - /* TODO: recal traffic pause state based on the available vdevs */ mutex_unlock(&ar->conf_mutex); @@ -9149,6 +9138,7 @@ static const struct ieee80211_ops ath11k_ops = { #endif #ifdef CONFIG_ATH11K_DEBUGFS + .vif_add_debugfs = ath11k_debugfs_op_vif_add, .sta_add_debugfs = ath11k_debugfs_sta_op_add, #endif From 6992eb815d087858f8d7e4020529c2fe800456b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 16 Jan 2024 23:08:21 +0200 Subject: [PATCH 036/768] Revert "drm/i915/dsi: Do display on sequence later on icl+" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 88b065943cb583e890324d618e8d4b23460d51a3. Lenovo 82TQ is unhappy if we do the display on sequence this late. The display output shows severe corruption. It's unclear if this is a failure on our part (perhaps something to do with sending commands in LP mode after HS /video mode transmission has been started? Though the backlight on command at least seems to work) or simply that there are some commands in the sequence that are needed to be done earlier (eg. could be some DSC init stuff?). If the latter then I don't think the current Windows code would work either, but maybe this was originally tested with an older driver, who knows. Root causing this fully would likely require a lot of experimentation which isn't really feasible without direct access to the machine, so let's just accept failure and go back to the original sequence. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10071 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240116210821.30194-1-ville.syrjala@linux.intel.com Acked-by: Jani Nikula (cherry picked from commit dc524d05974f615b145404191fcf91b478950499) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/icl_dsi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index ac456a2275db..eda4a8b88590 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1155,6 +1155,7 @@ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder) } intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_INIT_OTP); + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); /* ensure all panel commands dispatched before enabling transcoder */ wait_for_cmds_dispatched_to_panel(encoder); @@ -1255,8 +1256,6 @@ static void gen11_dsi_enable(struct intel_atomic_state *state, /* step6d: enable dsi transcoder */ gen11_dsi_enable_transcoder(encoder); - intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); - /* step7: enable backlight */ intel_backlight_enable(crtc_state, conn_state); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON); From 84b5ece64477df4394d362d494a2496bf0878985 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 12 Jan 2024 07:49:12 -0800 Subject: [PATCH 037/768] drm/i915: Drop -Wstringop-overflow -Wstringop-overflow is broken on GCC11. In future changes it will be moved to the normal C flags in the top level Makefile (out of Makefile.extrawarn), but accounting for the compiler support. Just remove it out of i915's forced extra warnings, preparing for the upcoming change and avoiding build warnings to show up. Fixes: 2250c7ead8ad ("drm/i915: enable W=1 warnings by default") References: https://lore.kernel.org/all/45ad1d0f-a10f-483e-848a-76a30252edbe@paulmck-laptop/ Signed-off-by: Lucas De Marchi Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240112154912.1775199-1-lucas.demarchi@intel.com (cherry picked from commit 05ae67d95bade8b7facd5612baea21c12d243149) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e777686190ca..c13f14edb508 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -17,7 +17,6 @@ subdir-ccflags-y += $(call cc-option, -Wunused-const-variable) subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned) subdir-ccflags-y += $(call cc-option, -Wformat-overflow) subdir-ccflags-y += $(call cc-option, -Wformat-truncation) -subdir-ccflags-y += $(call cc-option, -Wstringop-overflow) subdir-ccflags-y += $(call cc-option, -Wstringop-truncation) # The following turn off the warnings enabled by -Wextra ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),) From a6e4f85d3820d00694ed10f581f4c650445dbcda Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Tue, 16 Jan 2024 14:22:57 +0000 Subject: [PATCH 038/768] wifi: cfg80211: fix missing interfaces when dumping The nl80211_dump_interface() supports resumption in case nl80211_send_iface() doesn't have the resources to complete its work. The logic would store the progress as iteration offsets for rdev and wdev loops. However the logic did not properly handle resumption for non-last rdev. Assuming a system with 2 rdevs, with 2 wdevs each, this could happen: dump(cb=[0, 0]): if_start=cb[1] (=0) send rdev0.wdev0 -> ok send rdev0.wdev1 -> yield cb[1] = 1 dump(cb=[0, 1]): if_start=cb[1] (=1) send rdev0.wdev1 -> ok // since if_start=1 the rdev0.wdev0 got skipped // through if_idx < if_start send rdev1.wdev1 -> ok The if_start needs to be reset back to 0 upon wdev loop end. The problem is actually hard to hit on a desktop, and even on most routers. The prerequisites for this manifesting was: - more than 1 wiphy - a few handful of interfaces - dump without rdev or wdev filter I was seeing this with 4 wiphys 9 interfaces each. It'd miss 6 interfaces from the last wiphy reported to userspace. Signed-off-by: Michal Kazior Link: https://msgid.link/20240116142340.89678-1-kazikcz@gmail.com Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1cbbb11ea503..fbf95b7ff6b4 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4008,6 +4008,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * } wiphy_unlock(&rdev->wiphy); + if_start = 0; wp_idx++; } out: From 26490da5a71da9064e58f0d4ce82756c26ef9eb1 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 18 Jan 2024 09:25:45 +0100 Subject: [PATCH 039/768] wifi: cfg80211/mac80211: remove dependency on non-existing option Commit ffbd0c8c1e7f ("wifi: mac80211: add an element parsing unit test") and commit 730eeb17bbdd ("wifi: cfg80211: add first kunit tests, for element defrag") add new configs that depend on !KERNEL_6_2, but the config option KERNEL_6_2 does not exist in the tree. This dependency is used for handling backporting to restrict the option to certain kernels but this really should not be carried around the mainline kernel tree. Clean up this needless dependency on the non-existing option KERNEL_6_2. Link: https://lore.kernel.org/lkml/CAKXUXMyfrM6amOR7Ysim3WNQ-Ckf9HJDqRhAoYmLXujo1UV+yA@mail.gmail.com/ Signed-off-by: Lukas Bulwahn Signed-off-by: Johannes Berg --- net/mac80211/Kconfig | 1 - net/wireless/Kconfig | 1 - 2 files changed, 2 deletions(-) diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index cb0291decf2e..13438cc0a6b1 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -62,7 +62,6 @@ config MAC80211_KUNIT_TEST depends on KUNIT depends on MAC80211 default KUNIT_ALL_TESTS - depends on !KERNEL_6_2 help Enable this option to test mac80211 internals with kunit. diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index a9ac85e09af3..10345388ad13 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -206,7 +206,6 @@ config CFG80211_KUNIT_TEST depends on KUNIT depends on CFG80211 default KUNIT_ALL_TESTS - depends on !KERNEL_6_2 help Enable this option to test cfg80211 functions with kunit. From b01a74b3ca6fd51b62c67733ba7c3280fa6c5d26 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Jan 2024 18:17:44 +0200 Subject: [PATCH 040/768] wifi: mac80211: fix potential sta-link leak When a station is allocated, links are added but not set to valid yet (e.g. during connection to an AP MLD), we might remove the station without ever marking links valid, and leak them. Fix that. Fixes: cb71f1d136a6 ("wifi: mac80211: add sta link addition/removal") Signed-off-by: Johannes Berg Reviewed-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://msgid.link/20240111181514.6573998beaf8.I09ac2e1d41c80f82a5a616b8bd1d9d8dd709a6a6@changeid Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 0ba613dd1cc4..c33decbb97f2 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -404,7 +404,10 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) int i; for (i = 0; i < ARRAY_SIZE(sta->link); i++) { - if (!(sta->sta.valid_links & BIT(i))) + struct link_sta_info *link_sta; + + link_sta = rcu_access_pointer(sta->link[i]); + if (!link_sta) continue; sta_remove_link(sta, i, false); From cf4a0d840ecc72fcf16198d5e9c505ab7d5a5e4d Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 11 Jan 2024 15:07:25 +0200 Subject: [PATCH 041/768] wifi: iwlwifi: fix a memory corruption iwl_fw_ini_trigger_tlv::data is a pointer to a __le32, which means that if we copy to iwl_fw_ini_trigger_tlv::data + offset while offset is in bytes, we'll write past the buffer. Cc: stable@vger.kernel.org Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218233 Fixes: cf29c5b66b9f ("iwlwifi: dbg_ini: implement time point handling") Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://msgid.link/20240111150610.2d2b8b870194.I14ed76505a5cf87304e0c9cc05cc0ae85ed3bf91@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c index b658cf228fbe..9160d81a871e 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #include #include "iwl-drv.h" @@ -1096,7 +1096,7 @@ static int iwl_dbg_tlv_override_trig_node(struct iwl_fw_runtime *fwrt, node_trig = (void *)node_tlv->data; } - memcpy(node_trig->data + offset, trig->data, trig_data_len); + memcpy((u8 *)node_trig->data + offset, trig->data, trig_data_len); node_tlv->length = cpu_to_le32(size); if (policy & IWL_FW_INI_APPLY_POLICY_OVERRIDE_CFG) { From bcbc84af1183c8cf3d1ca9b78540c2185cd85e7f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 4 Jan 2024 19:10:59 +0100 Subject: [PATCH 042/768] wifi: mac80211: fix race condition on enabling fast-xmit fast-xmit must only be enabled after the sta has been uploaded to the driver, otherwise it could end up passing the not-yet-uploaded sta via drv_tx calls to the driver, leading to potential crashes because of uninitialized drv_priv data. Add a missing sta->uploaded check and re-check fast xmit after inserting a sta. Signed-off-by: Felix Fietkau Link: https://msgid.link/20240104181059.84032-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 2 ++ net/mac80211/tx.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index c33decbb97f2..bcf3f727fc6d 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -913,6 +913,8 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_accept_plinks_update(sdata); + ieee80211_check_fast_xmit(sta); + return 0; out_remove: if (sta->sta.valid_links) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index ed4fdf655343..4b2823e36a37 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3048,7 +3048,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) sdata->vif.type == NL80211_IFTYPE_STATION) goto out; - if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED) || !sta->uploaded) goto out; if (test_sta_flag(sta, WLAN_STA_PS_STA) || From 0991abeddefa118479b0af32c28bcd662dec1561 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Thu, 18 Jan 2024 09:52:52 +0800 Subject: [PATCH 043/768] exfat: fix zero the unwritten part for dio read For dio read, bio will be leave in flight when a successful partial aio read have been setup, blockdev_direct_IO() will return -EIOCBQUEUED. In the case, iter->iov_offset will be not advanced, the oops reported by syzbot will occur if revert iter->iov_offset with iov_iter_revert(). The unwritten part had been zeroed by aio read, so there is no need to zero it in dio read. Reported-by: syzbot+fd404f6b03a58e8bc403@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=fd404f6b03a58e8bc403 Fixes: 11a347fb6cef ("exfat: change to get file size from DataLength") Signed-off-by: Yuezhang Mo Signed-off-by: Namjae Jeon --- fs/exfat/inode.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index 522edcbb2ce4..0687f952956c 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -501,7 +501,7 @@ static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter) struct inode *inode = mapping->host; struct exfat_inode_info *ei = EXFAT_I(inode); loff_t pos = iocb->ki_pos; - loff_t size = iocb->ki_pos + iov_iter_count(iter); + loff_t size = pos + iov_iter_count(iter); int rw = iov_iter_rw(iter); ssize_t ret; @@ -525,11 +525,10 @@ static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter) */ ret = blockdev_direct_IO(iocb, inode, iter, exfat_get_block); if (ret < 0) { - if (rw == WRITE) + if (rw == WRITE && ret != -EIOCBQUEUED) exfat_write_failed(mapping, size); - if (ret != -EIOCBQUEUED) - return ret; + return ret; } else size = pos + ret; From 55583e899a5357308274601364741a83e78d6ac4 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:33 +0800 Subject: [PATCH 044/768] ext4: fix double-free of blocks due to wrong extents moved_len In ext4_move_extents(), moved_len is only updated when all moves are successfully executed, and only discards orig_inode and donor_inode preallocations when moved_len is not zero. When the loop fails to exit after successfully moving some extents, moved_len is not updated and remains at 0, so it does not discard the preallocations. If the moved extents overlap with the preallocated extents, the overlapped extents are freed twice in ext4_mb_release_inode_pa() and ext4_process_freed_data() (as described in commit 94d7c16cbbbd ("ext4: Fix double-free of blocks with EXT4_IOC_MOVE_EXT")), and bb_free is incremented twice. Hence when trim is executed, a zero-division bug is triggered in mb_update_avg_fragment_size() because bb_free is not zero and bb_fragments is zero. Therefore, update move_len after each extent move to avoid the issue. Reported-by: Wei Chen Reported-by: xingwei lee Closes: https://lore.kernel.org/r/CAO4mrferzqBUnCag8R3m2zf897ts9UEuhjFQGPtODT92rYyR2Q@mail.gmail.com Fixes: fcf6b1b729bc ("ext4: refactor ext4_move_extents code base") CC: # 3.18 Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-2-libaokun1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/move_extent.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 3aa57376d9c2..391efa6d4c56 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -618,6 +618,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, goto out; o_end = o_start + len; + *moved_len = 0; while (o_start < o_end) { struct ext4_extent *ex; ext4_lblk_t cur_blk, next_blk; @@ -672,7 +673,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, */ ext4_double_up_write_data_sem(orig_inode, donor_inode); /* Swap original branches with new branches */ - move_extent_per_page(o_filp, donor_inode, + *moved_len += move_extent_per_page(o_filp, donor_inode, orig_page_index, donor_page_index, offset_in_page, cur_len, unwritten, &ret); @@ -682,9 +683,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, o_start += cur_len; d_start += cur_len; } - *moved_len = o_start - orig_blk; - if (*moved_len > len) - *moved_len = len; out: if (*moved_len) { From 172202152a125955367393956acf5f4ffd092e0d Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:34 +0800 Subject: [PATCH 045/768] ext4: do not trim the group with corrupted block bitmap Otherwise operating on an incorrupted block bitmap can lead to all sorts of unknown problems. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-3-libaokun1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f44f668e407f..c86c90b494de 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -6761,6 +6761,9 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) bool set_trimmed = false; void *bitmap; + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + return 0; + last = ext4_last_grp_cluster(sb, e4b->bd_group); bitmap = e4b->bd_bitmap; if (start == 0 && max >= last) From c9b528c35795b711331ed36dc3dbee90d5812d4e Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:35 +0800 Subject: [PATCH 046/768] ext4: regenerate buddy after block freeing failed if under fc replay This mostly reverts commit 6bd97bf273bd ("ext4: remove redundant mb_regenerate_buddy()") and reintroduces mb_regenerate_buddy(). Based on code in mb_free_blocks(), fast commit replay can end up marking as free blocks that are already marked as such. This causes corruption of the buddy bitmap so we need to regenerate it in that case. Reported-by: Jan Kara Fixes: 6bd97bf273bd ("ext4: remove redundant mb_regenerate_buddy()") Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-4-libaokun1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c86c90b494de..c97ad0e77831 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1233,6 +1233,24 @@ void ext4_mb_generate_buddy(struct super_block *sb, atomic64_add(period, &sbi->s_mb_generation_time); } +static void mb_regenerate_buddy(struct ext4_buddy *e4b) +{ + int count; + int order = 1; + void *buddy; + + while ((buddy = mb_find_buddy(e4b, order++, &count))) + mb_set_bits(buddy, 0, count); + + e4b->bd_info->bb_fragments = 0; + memset(e4b->bd_info->bb_counters, 0, + sizeof(*e4b->bd_info->bb_counters) * + (e4b->bd_sb->s_blocksize_bits + 2)); + + ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy, + e4b->bd_bitmap, e4b->bd_group, e4b->bd_info); +} + /* The buddy information is attached the buddy cache inode * for convenience. The information regarding each group * is loaded via ext4_mb_load_buddy. The information involve @@ -1920,6 +1938,8 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, ext4_mark_group_bitmap_corrupted( sb, e4b->bd_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); + } else { + mb_regenerate_buddy(e4b); } goto done; } From 2331fd4a49864e1571b4f50aa3aa1536ed6220d0 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:36 +0800 Subject: [PATCH 047/768] ext4: avoid bb_free and bb_fragments inconsistency in mb_free_blocks() After updating bb_free in mb_free_blocks, it is possible to return without updating bb_fragments because the block being freed is found to have already been freed, which leads to inconsistency between bb_free and bb_fragments. Since the group may be unlocked in ext4_grp_locked_error(), this can lead to problems such as dividing by zero when calculating the average fragment length. Hence move the update of bb_free to after the block double-free check guarantees that the corresponding statistics are updated only after the core block bitmap is modified. Fixes: eabe0444df90 ("ext4: speed-up releasing blocks on commit") CC: # 3.10 Suggested-by: Jan Kara Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-5-libaokun1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c97ad0e77831..fa351aa323dc 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1909,11 +1909,6 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, mb_check_buddy(e4b); mb_free_blocks_double(inode, e4b, first, count); - this_cpu_inc(discard_pa_seq); - e4b->bd_info->bb_free += count; - if (first < e4b->bd_info->bb_first_free) - e4b->bd_info->bb_first_free = first; - /* access memory sequentially: check left neighbour, * clear range and then check right neighbour */ @@ -1927,23 +1922,31 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t blocknr; + /* + * Fastcommit replay can free already freed blocks which + * corrupts allocation info. Regenerate it. + */ + if (sbi->s_mount_state & EXT4_FC_REPLAY) { + mb_regenerate_buddy(e4b); + goto check; + } + blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr += EXT4_C2B(sbi, block); - if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) { - ext4_grp_locked_error(sb, e4b->bd_group, - inode ? inode->i_ino : 0, - blocknr, - "freeing already freed block (bit %u); block bitmap corrupt.", - block); - ext4_mark_group_bitmap_corrupted( - sb, e4b->bd_group, + ext4_grp_locked_error(sb, e4b->bd_group, + inode ? inode->i_ino : 0, blocknr, + "freeing already freed block (bit %u); block bitmap corrupt.", + block); + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); - } else { - mb_regenerate_buddy(e4b); - } - goto done; + return; } + this_cpu_inc(discard_pa_seq); + e4b->bd_info->bb_free += count; + if (first < e4b->bd_info->bb_first_free) + e4b->bd_info->bb_first_free = first; + /* let's maintain fragments counter */ if (left_is_free && right_is_free) e4b->bd_info->bb_fragments--; @@ -1968,9 +1971,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, if (first <= last) mb_buddy_mark_free(e4b, first >> 1, last >> 1); -done: mb_set_largest_free_order(sb, e4b->bd_info); mb_update_avg_fragment_size(sb, e4b->bd_info); +check: mb_check_buddy(e4b); } From 993bf0f4c393b3667830918f9247438a8f6fdb5b Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:37 +0800 Subject: [PATCH 048/768] ext4: avoid dividing by 0 in mb_update_avg_fragment_size() when block bitmap corrupt Determine if bb_fragments is 0 instead of determining bb_free to eliminate the risk of dividing by zero when the block bitmap is corrupted. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-6-libaokun1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index fa351aa323dc..751e31df0f10 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -842,7 +842,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp) struct ext4_sb_info *sbi = EXT4_SB(sb); int new_order; - if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0) + if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_fragments == 0) return; new_order = mb_avg_fragment_size_order(sb, From 4530b3660d396a646aad91a787b6ab37cf604b53 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:38 +0800 Subject: [PATCH 049/768] ext4: avoid allocating blocks from corrupted group in ext4_mb_try_best_found() Determine if the group block bitmap is corrupted before using ac_b_ex in ext4_mb_try_best_found() to avoid allocating blocks from a group with a corrupted block bitmap in the following concurrency and making the situation worse. ext4_mb_regular_allocator ext4_lock_group(sb, group) ext4_mb_good_group // check if the group bbitmap is corrupted ext4_mb_complex_scan_group // Scan group gets ac_b_ex but doesn't use it ext4_unlock_group(sb, group) ext4_mark_group_bitmap_corrupted(group) // The block bitmap was corrupted during // the group unlock gap. ext4_mb_try_best_found ext4_lock_group(ac->ac_sb, group) ext4_mb_use_best_found mb_mark_used // Allocating blocks in block bitmap corrupted group Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-7-libaokun1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 751e31df0f10..82afe275e6c8 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2299,6 +2299,9 @@ void ext4_mb_try_best_found(struct ext4_allocation_context *ac, return; ext4_lock_group(ac->ac_sb, group); + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + goto out; + max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex); if (max > 0) { @@ -2306,6 +2309,7 @@ void ext4_mb_try_best_found(struct ext4_allocation_context *ac, ext4_mb_use_best_found(ac, e4b); } +out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); } From 832698373a25950942c04a512daa652c18a9b513 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:39 +0800 Subject: [PATCH 050/768] ext4: avoid allocating blocks from corrupted group in ext4_mb_find_by_goal() Places the logic for checking if the group's block bitmap is corrupt under the protection of the group lock to avoid allocating blocks from the group with a corrupted block bitmap. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-8-libaokun1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 82afe275e6c8..2069d768c619 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2336,12 +2336,10 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, if (err) return err; - if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) { - ext4_mb_unload_buddy(e4b); - return 0; - } - ext4_lock_group(ac->ac_sb, group); + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + goto out; + max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, ac->ac_g_ex.fe_len, &ex); ex.fe_logical = 0xDEADFA11; /* debug value */ @@ -2374,6 +2372,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, ac->ac_b_ex = ex; ext4_mb_use_best_found(ac, e4b); } +out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); From c5f3a3821de42ede9bcaeb892d1539717cf590cb Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:40 +0800 Subject: [PATCH 051/768] ext4: mark the group block bitmap as corrupted before reporting an error Otherwise unlocking the group in ext4_grp_locked_error may allow other processes to modify the core block bitmap that is known to be corrupt. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-9-libaokun1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 2069d768c619..c7f63dd7afcc 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -564,14 +564,14 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr += EXT4_C2B(EXT4_SB(sb), first + i); + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(sb, e4b->bd_group, inode ? inode->i_ino : 0, blocknr, "freeing block already freed " "(bit %u)", first + i); - ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, - EXT4_GROUP_INFO_BBITMAP_CORRUPT); } mb_clear_bit(first + i, e4b->bd_info->bb_bitmap); } @@ -1933,12 +1933,12 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr += EXT4_C2B(sbi, block); + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(sb, e4b->bd_group, inode ? inode->i_ino : 0, blocknr, "freeing already freed block (bit %u); block bitmap corrupt.", block); - ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, - EXT4_GROUP_INFO_BBITMAP_CORRUPT); return; } @@ -2406,12 +2406,12 @@ void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac, k = mb_find_next_zero_bit(buddy, max, 0); if (k >= max) { + ext4_mark_group_bitmap_corrupted(ac->ac_sb, + e4b->bd_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(ac->ac_sb, e4b->bd_group, 0, 0, "%d free clusters of order %d. But found 0", grp->bb_counters[i], i); - ext4_mark_group_bitmap_corrupted(ac->ac_sb, - e4b->bd_group, - EXT4_GROUP_INFO_BBITMAP_CORRUPT); break; } ac->ac_found++; @@ -2462,12 +2462,12 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, * free blocks even though group info says we * have free blocks */ + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, "%d free clusters as per " "group info. But bitmap says 0", free); - ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, - EXT4_GROUP_INFO_BBITMAP_CORRUPT); break; } @@ -2493,12 +2493,12 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, if (WARN_ON(ex.fe_len <= 0)) break; if (free < ex.fe_len) { + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, "%d free clusters as per " "group info. But got %d blocks", free, ex.fe_len); - ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, - EXT4_GROUP_INFO_BBITMAP_CORRUPT); /* * The number of free blocks differs. This mostly * indicate that the bitmap is corrupt. So exit From 133de5a0d8f8e32b34feaa8beae7a189482f1856 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 5 Jan 2024 17:20:54 +0800 Subject: [PATCH 052/768] ext4: remove unused return value of __mb_check_buddy Remove unused return value of __mb_check_buddy. Signed-off-by: Kemeng Shi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240105092102.496631-2-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c7f63dd7afcc..0d0917cf2e8f 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -677,7 +677,7 @@ do { \ } \ } while (0) -static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, +static void __mb_check_buddy(struct ext4_buddy *e4b, char *file, const char *function, int line) { struct super_block *sb = e4b->bd_sb; @@ -696,7 +696,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, void *buddy2; if (e4b->bd_info->bb_check_counter++ % 10) - return 0; + return; while (order > 1) { buddy = mb_find_buddy(e4b, order, &max); @@ -758,7 +758,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, grp = ext4_get_group_info(sb, e4b->bd_group); if (!grp) - return NULL; + return; list_for_each(cur, &grp->bb_prealloc_list) { ext4_group_t groupnr; struct ext4_prealloc_space *pa; @@ -768,7 +768,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, for (i = 0; i < pa->pa_len; i++) MB_CHECK_ASSERT(mb_test_bit(k + i, buddy)); } - return 0; } #undef MB_CHECK_ASSERT #define mb_check_buddy(e4b) __mb_check_buddy(e4b, \ From 438a35e72d09c01da7ffb49570ecd11ca632270b Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 5 Jan 2024 17:20:55 +0800 Subject: [PATCH 053/768] ext4: remove unused parameter ngroup in ext4_mb_choose_next_group_*() Remove unused parameter ngroup in ext4_mb_choose_next_group_*(). Signed-off-by: Kemeng Shi Reviewed-by: Jan Kara Reviewed-by: Ojaswin Mujoo Link: https://lore.kernel.org/r/20240105092102.496631-3-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 0d0917cf2e8f..9c867a13bf8d 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -870,7 +870,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp) * cr level needs an update. */ static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context *ac, - enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) + enum criteria *new_cr, ext4_group_t *group) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_group_info *iter; @@ -944,7 +944,7 @@ ext4_mb_find_good_group_avg_frag_lists(struct ext4_allocation_context *ac, int o * order. Updates *new_cr if cr level needs an update. */ static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context *ac, - enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) + enum criteria *new_cr, ext4_group_t *group) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_group_info *grp = NULL; @@ -989,7 +989,7 @@ static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context * * much and fall to CR_GOAL_LEN_SLOW in that case. */ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context *ac, - enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) + enum criteria *new_cr, ext4_group_t *group) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_group_info *grp = NULL; @@ -1124,11 +1124,11 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac, } if (*new_cr == CR_POWER2_ALIGNED) { - ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group, ngroups); + ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group); } else if (*new_cr == CR_GOAL_LEN_FAST) { - ext4_mb_choose_next_group_goal_fast(ac, new_cr, group, ngroups); + ext4_mb_choose_next_group_goal_fast(ac, new_cr, group); } else if (*new_cr == CR_BEST_AVAIL_LEN) { - ext4_mb_choose_next_group_best_avail(ac, new_cr, group, ngroups); + ext4_mb_choose_next_group_best_avail(ac, new_cr, group); } else { /* * TODO: For CR=2, we can arrange groups in an rb tree sorted by From 11fd1a5d642355a45f4008d308399c26a8b3d3f0 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 5 Jan 2024 17:20:56 +0800 Subject: [PATCH 054/768] ext4: remove unneeded return value of ext4_mb_release_context Function ext4_mb_release_context always return 0 and the return value is never used. Just remove unneeded return value of ext4_mb_release_context. Signed-off-by: Kemeng Shi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240105092102.496631-4-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 9c867a13bf8d..dfd2de7aa0cf 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5968,7 +5968,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) /* * release all resource we used in allocation */ -static int ext4_mb_release_context(struct ext4_allocation_context *ac) +static void ext4_mb_release_context(struct ext4_allocation_context *ac) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_prealloc_space *pa = ac->ac_pa; @@ -6005,7 +6005,6 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) mutex_unlock(&ac->ac_lg->lg_mutex); ext4_mb_collect_stats(ac); - return 0; } static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) From 97c32dbffce12136c06d9ceb6919850233d3a1c2 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 5 Jan 2024 17:20:57 +0800 Subject: [PATCH 055/768] ext4: remove unused ext4_allocation_context::ac_groups_considered Remove unused ext4_allocation_context::ac_groups_considered Signed-off-by: Kemeng Shi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240105092102.496631-5-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index d7aeb5da7d86..56938532b4ce 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -192,7 +192,6 @@ struct ext4_allocation_context { */ ext4_grpblk_t ac_orig_goal_len; - __u32 ac_groups_considered; __u32 ac_flags; /* allocation hints */ __u16 ac_groups_scanned; __u16 ac_groups_linear_remaining; From 908177175a2ac7280cac738f33ccbbbcff035c5c Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 5 Jan 2024 17:20:58 +0800 Subject: [PATCH 056/768] ext4: remove unused return value of ext4_mb_release Remove unused return value of ext4_mb_release. Signed-off-by: Kemeng Shi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240105092102.496631-6-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 2 +- fs/ext4/mballoc.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index a5d784872303..cabce11778fc 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2912,7 +2912,7 @@ extern const struct seq_operations ext4_mb_seq_groups_ops; extern const struct seq_operations ext4_mb_seq_structs_summary_ops; extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset); extern int ext4_mb_init(struct super_block *); -extern int ext4_mb_release(struct super_block *); +extern void ext4_mb_release(struct super_block *); extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, struct ext4_allocation_request *, int *); extern void ext4_discard_preallocations(struct inode *, unsigned int); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index dfd2de7aa0cf..26b199d2c330 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3750,7 +3750,7 @@ static int ext4_mb_cleanup_pa(struct ext4_group_info *grp) return count; } -int ext4_mb_release(struct super_block *sb) +void ext4_mb_release(struct super_block *sb) { ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; @@ -3826,8 +3826,6 @@ int ext4_mb_release(struct super_block *sb) } free_percpu(sbi->s_locality_groups); - - return 0; } static inline int ext4_issue_discard(struct super_block *sb, From 820c280896eaf715b39ac1ad38976bcc749082b7 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 5 Jan 2024 17:20:59 +0800 Subject: [PATCH 057/768] ext4: remove unused return value of ext4_mb_release_inode_pa Remove unused return value of ext4_mb_release_inode_pa Signed-off-by: Kemeng Shi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240105092102.496631-7-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 26b199d2c330..3349723ff93f 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5307,7 +5307,7 @@ static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac) * the caller MUST hold group/inode locks. * TODO: optimize the case when there are no in-core structures yet */ -static noinline_for_stack int +static noinline_for_stack void ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, struct ext4_prealloc_space *pa) { @@ -5357,8 +5357,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, */ } atomic_add(free, &sbi->s_mb_discarded); - - return 0; } static noinline_for_stack int From 20427949b9b584422c05bb31e48b1b68615dd794 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 5 Jan 2024 17:21:00 +0800 Subject: [PATCH 058/768] ext4: remove unused return value of ext4_mb_release_group_pa Remove unused return value of ext4_mb_release_group_pa. Signed-off-by: Kemeng Shi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240105092102.496631-8-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 3349723ff93f..a8e61bb181fa 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5359,7 +5359,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, atomic_add(free, &sbi->s_mb_discarded); } -static noinline_for_stack int +static noinline_for_stack void ext4_mb_release_group_pa(struct ext4_buddy *e4b, struct ext4_prealloc_space *pa) { @@ -5373,13 +5373,11 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, if (unlikely(group != e4b->bd_group && pa->pa_len != 0)) { ext4_warning(sb, "bad group: expected %u, group %u, pa_start %llu", e4b->bd_group, group, pa->pa_pstart); - return 0; + return; } mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len); - - return 0; } /* From 2ffd2a6ad1d3a8213bb5805f45f49098fe615db1 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 5 Jan 2024 17:21:01 +0800 Subject: [PATCH 059/768] ext4: remove unnecessary parameter "needed" in ext4_discard_preallocations The "needed" controls the number of ext4_prealloc_space to discard in ext4_discard_preallocations. Function ext4_discard_preallocations is supposed to discard all non-used preallocated blocks when "needed" is 0 and now ext4_discard_preallocations is always called with "needed" = 0. Remove unnecessary parameter "needed" and remove all non-used preallocated spaces in ext4_discard_preallocations to simplify the code. Note: If count of non-used preallocated spaces could be more than UINT_MAX, there was a memory leak as some non-used preallocated spaces are left ununsed and this commit will fix it. Otherwise, there is no behavior change. Signed-off-by: Kemeng Shi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240105092102.496631-9-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 2 +- fs/ext4/extents.c | 10 +++++----- fs/ext4/file.c | 2 +- fs/ext4/indirect.c | 2 +- fs/ext4/inode.c | 6 +++--- fs/ext4/ioctl.c | 2 +- fs/ext4/mballoc.c | 10 +++------- fs/ext4/move_extent.c | 4 ++-- fs/ext4/super.c | 2 +- 9 files changed, 18 insertions(+), 22 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index cabce11778fc..786b6857ab47 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2915,7 +2915,7 @@ extern int ext4_mb_init(struct super_block *); extern void ext4_mb_release(struct super_block *); extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, struct ext4_allocation_request *, int *); -extern void ext4_discard_preallocations(struct inode *, unsigned int); +extern void ext4_discard_preallocations(struct inode *); extern int __init ext4_init_mballoc(void); extern void ext4_exit_mballoc(void); extern ext4_group_t ext4_mb_prefetch(struct super_block *sb, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 01299b55a567..9106715254ac 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -100,7 +100,7 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped) * i_rwsem. So we can safely drop the i_data_sem here. */ BUG_ON(EXT4_JOURNAL(inode) == NULL); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); up_write(&EXT4_I(inode)->i_data_sem); *dropped = 1; return 0; @@ -4313,7 +4313,7 @@ got_allocated_blocks: * not a good idea to call discard here directly, * but otherwise we'd need to call it every free(). */ - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE; ext4_free_blocks(handle, inode, NULL, newblock, @@ -5357,7 +5357,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len) ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle); down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); ext4_es_remove_extent(inode, punch_start, EXT_MAX_BLOCKS - punch_start); ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1); @@ -5365,7 +5365,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len) up_write(&EXT4_I(inode)->i_data_sem); goto out_stop; } - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); ret = ext4_ext_shift_extents(inode, handle, punch_stop, punch_stop - punch_start, SHIFT_LEFT); @@ -5497,7 +5497,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len) goto out_stop; down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); path = ext4_find_extent(inode, offset_lblk, NULL, 0); if (IS_ERR(path)) { diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 6aa15dafc677..54d6ff22585c 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -174,7 +174,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp) (atomic_read(&inode->i_writecount) == 1) && !EXT4_I(inode)->i_reserved_data_blocks) { down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); up_write(&EXT4_I(inode)->i_data_sem); } if (is_dx(inode) && filp->private_data) diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index a9f3716119d3..d8ca7f64f952 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -714,7 +714,7 @@ static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode, * i_rwsem. So we can safely drop the i_data_sem here. */ BUG_ON(EXT4_JOURNAL(inode) == NULL); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); up_write(&EXT4_I(inode)->i_data_sem); *dropped = 1; return 0; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5af1b0b8680e..4cae8698f70c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -371,7 +371,7 @@ void ext4_da_update_reserve_space(struct inode *inode, */ if ((ei->i_reserved_data_blocks == 0) && !inode_is_open_for_write(inode)) - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); } static int __check_block_validity(struct inode *inode, const char *func, @@ -4017,7 +4017,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) if (stop_block > first_block) { down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); ext4_es_remove_extent(inode, first_block, stop_block - first_block); @@ -4170,7 +4170,7 @@ int ext4_truncate(struct inode *inode) down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) err = ext4_ext_truncate(handle, inode); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index aa6be510eb8f..7160a71044c8 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -467,7 +467,7 @@ static long swap_inode_boot_loader(struct super_block *sb, ext4_reset_inode_seed(inode); ext4_reset_inode_seed(inode_bl); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); err = ext4_mark_inode_dirty(handle, inode); if (err < 0) { diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a8e61bb181fa..f3da7db2beee 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5498,7 +5498,7 @@ out_dbg: * * FIXME!! Make sure it is valid at all the call sites */ -void ext4_discard_preallocations(struct inode *inode, unsigned int needed) +void ext4_discard_preallocations(struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); struct super_block *sb = inode->i_sb; @@ -5520,15 +5520,12 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed) mb_debug(sb, "discard preallocation for inode %lu\n", inode->i_ino); trace_ext4_discard_preallocations(inode, - atomic_read(&ei->i_prealloc_active), needed); - - if (needed == 0) - needed = UINT_MAX; + atomic_read(&ei->i_prealloc_active), 0); repeat: /* first, collect all pa's in the inode */ write_lock(&ei->i_prealloc_lock); - for (iter = rb_first(&ei->i_prealloc_node); iter && needed; + for (iter = rb_first(&ei->i_prealloc_node); iter; iter = rb_next(iter)) { pa = rb_entry(iter, struct ext4_prealloc_space, pa_node.inode_node); @@ -5552,7 +5549,6 @@ repeat: spin_unlock(&pa->pa_lock); rb_erase(&pa->pa_node.inode_node, &ei->i_prealloc_node); list_add(&pa->u.pa_tmp_list, &list); - needed--; continue; } diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 391efa6d4c56..7cd4afa4de1d 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -686,8 +686,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, out: if (*moved_len) { - ext4_discard_preallocations(orig_inode, 0); - ext4_discard_preallocations(donor_inode, 0); + ext4_discard_preallocations(orig_inode); + ext4_discard_preallocations(donor_inode); } ext4_free_ext_path(path); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index dcba0f85dfe2..0f931d0c227d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1525,7 +1525,7 @@ void ext4_clear_inode(struct inode *inode) ext4_fc_del(inode); invalidate_inode_buffers(inode); clear_inode(inode); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); dquot_drop(inode); if (EXT4_I(inode)->jinode) { From f0e54b6087de9571ec61c189d6c378b81edbe3b2 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 5 Jan 2024 17:21:02 +0800 Subject: [PATCH 060/768] ext4: remove 'needed' in trace_ext4_discard_preallocations As 'needed' to trace_ext4_discard_preallocations is always 0 which is meaningless. Just remove it. Signed-off-by: Kemeng Shi Suggested-by: Jan Kara Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240105092102.496631-10-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 5 ++--- include/trace/events/ext4.h | 11 ++++------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f3da7db2beee..e4f7cf9d89c4 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5510,9 +5510,8 @@ void ext4_discard_preallocations(struct inode *inode) struct rb_node *iter; int err; - if (!S_ISREG(inode->i_mode)) { + if (!S_ISREG(inode->i_mode)) return; - } if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) return; @@ -5520,7 +5519,7 @@ void ext4_discard_preallocations(struct inode *inode) mb_debug(sb, "discard preallocation for inode %lu\n", inode->i_ino); trace_ext4_discard_preallocations(inode, - atomic_read(&ei->i_prealloc_active), 0); + atomic_read(&ei->i_prealloc_active)); repeat: /* first, collect all pa's in the inode */ diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 65029dfb92fb..a697f4b77162 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -772,15 +772,14 @@ TRACE_EVENT(ext4_mb_release_group_pa, ); TRACE_EVENT(ext4_discard_preallocations, - TP_PROTO(struct inode *inode, unsigned int len, unsigned int needed), + TP_PROTO(struct inode *inode, unsigned int len), - TP_ARGS(inode, len, needed), + TP_ARGS(inode, len), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( unsigned int, len ) - __field( unsigned int, needed ) ), @@ -788,13 +787,11 @@ TRACE_EVENT(ext4_discard_preallocations, __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->len = len; - __entry->needed = needed; ), - TP_printk("dev %d,%d ino %lu len: %u needed %u", + TP_printk("dev %d,%d ino %lu len: %u", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, __entry->len, - __entry->needed) + (unsigned long) __entry->ino, __entry->len) ); TRACE_EVENT(ext4_mb_discard_preallocations, From 2c25716dcc25a0420c4ad49d6e6bf61e60a21434 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 8 Jan 2024 19:38:44 +1030 Subject: [PATCH 061/768] btrfs: zlib: fix and simplify the inline extent decompression [BUG] If we have a filesystem with 4k sectorsize, and an inlined compressed extent created like this: item 4 key (257 INODE_ITEM 0) itemoff 15863 itemsize 160 generation 8 transid 8 size 4096 nbytes 4096 block group 0 mode 100600 links 1 uid 0 gid 0 rdev 0 sequence 1 flags 0x0(none) item 5 key (257 INODE_REF 256) itemoff 15839 itemsize 24 index 2 namelen 14 name: source_inlined item 6 key (257 EXTENT_DATA 0) itemoff 15770 itemsize 69 generation 8 type 0 (inline) inline extent data size 48 ram_bytes 4096 compression 1 (zlib) Which has an inline compressed extent at file offset 0, and its decompressed size is 4K, allowing us to reflink that 4K range to another location (which will not be compressed). If we do such reflink on a subpage system, it would fail like this: # xfs_io -f -c "reflink $mnt/source_inlined 0 60k 4k" $mnt/dest XFS_IOC_CLONE_RANGE: Input/output error [CAUSE] In zlib_decompress(), we didn't treat @start_byte as just a page offset, but also use it as an indicator on whether we should switch our output buffer. In reality, for subpage cases, although @start_byte can be non-zero, we should never switch input/output buffer, since the whole input/output buffer should never exceed one sector. Note: The above assumption is only not true if we're going to support multi-page sectorsize. Thus the current code using @start_byte as a condition to switch input/output buffer or finish the decompression is completely incorrect. [FIX] The fix involves several modifications: - Rename @start_byte to @dest_pgoff to properly express its meaning - Add an extra ASSERT() inside btrfs_decompress() to make sure the input/output size never exceeds one sector. - Use Z_FINISH flag to make sure the decompression happens in one go - Remove the loop needed to switch input/output buffers - Use correct destination offset inside the destination page - Consider early end as an error After the fix, even on 64K page sized aarch64, above reflink now works as expected: # xfs_io -f -c "reflink $mnt/source_inlined 0 60k 4k" $mnt/dest linked 4096/4096 bytes at offset 61440 And resulted a correct file layout: item 9 key (258 INODE_ITEM 0) itemoff 15542 itemsize 160 generation 10 transid 10 size 65536 nbytes 4096 block group 0 mode 100600 links 1 uid 0 gid 0 rdev 0 sequence 1 flags 0x0(none) item 10 key (258 INODE_REF 256) itemoff 15528 itemsize 14 index 3 namelen 4 name: dest item 11 key (258 XATTR_ITEM 3817753667) itemoff 15445 itemsize 83 location key (0 UNKNOWN.0 0) type XATTR transid 10 data_len 37 name_len 16 name: security.selinux data unconfined_u:object_r:unlabeled_t:s0 item 12 key (258 EXTENT_DATA 61440) itemoff 15392 itemsize 53 generation 10 type 1 (regular) extent data disk byte 13631488 nr 4096 extent data offset 0 nr 4096 ram 4096 extent compression 0 (none) Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.c | 23 +++++++++---- fs/btrfs/compression.h | 2 +- fs/btrfs/zlib.c | 73 +++++++++++------------------------------- 3 files changed, 36 insertions(+), 62 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 193168214eeb..68345f73d429 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -141,16 +141,16 @@ static int compression_decompress_bio(struct list_head *ws, } static int compression_decompress(int type, struct list_head *ws, - const u8 *data_in, struct page *dest_page, - unsigned long start_byte, size_t srclen, size_t destlen) + const u8 *data_in, struct page *dest_page, + unsigned long dest_pgoff, size_t srclen, size_t destlen) { switch (type) { case BTRFS_COMPRESS_ZLIB: return zlib_decompress(ws, data_in, dest_page, - start_byte, srclen, destlen); + dest_pgoff, srclen, destlen); case BTRFS_COMPRESS_LZO: return lzo_decompress(ws, data_in, dest_page, - start_byte, srclen, destlen); + dest_pgoff, srclen, destlen); case BTRFS_COMPRESS_ZSTD: return zstd_decompress(ws, data_in, dest_page, - start_byte, srclen, destlen); + dest_pgoff, srclen, destlen); case BTRFS_COMPRESS_NONE: default: /* @@ -1037,14 +1037,23 @@ static int btrfs_decompress_bio(struct compressed_bio *cb) * start_byte tells us the offset into the compressed data we're interested in */ int btrfs_decompress(int type, const u8 *data_in, struct page *dest_page, - unsigned long start_byte, size_t srclen, size_t destlen) + unsigned long dest_pgoff, size_t srclen, size_t destlen) { + struct btrfs_fs_info *fs_info = btrfs_sb(dest_page->mapping->host->i_sb); struct list_head *workspace; + const u32 sectorsize = fs_info->sectorsize; int ret; + /* + * The full destination page range should not exceed the page size. + * And the @destlen should not exceed sectorsize, as this is only called for + * inline file extents, which should not exceed sectorsize. + */ + ASSERT(dest_pgoff + destlen <= PAGE_SIZE && destlen <= sectorsize); + workspace = get_workspace(type, 0); ret = compression_decompress(type, workspace, data_in, dest_page, - start_byte, srclen, destlen); + dest_pgoff, srclen, destlen); put_workspace(type, workspace); return ret; diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 93cc92974dee..2b4dfb1b010c 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -148,7 +148,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping, unsigned long *total_in, unsigned long *total_out); int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb); int zlib_decompress(struct list_head *ws, const u8 *data_in, - struct page *dest_page, unsigned long start_byte, size_t srclen, + struct page *dest_page, unsigned long dest_pgoff, size_t srclen, size_t destlen); struct list_head *zlib_alloc_workspace(unsigned int level); void zlib_free_workspace(struct list_head *ws); diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 36cf1f0e338e..8da66ea699e8 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -354,18 +354,13 @@ done: } int zlib_decompress(struct list_head *ws, const u8 *data_in, - struct page *dest_page, unsigned long start_byte, size_t srclen, + struct page *dest_page, unsigned long dest_pgoff, size_t srclen, size_t destlen) { struct workspace *workspace = list_entry(ws, struct workspace, list); int ret = 0; int wbits = MAX_WBITS; - unsigned long bytes_left; - unsigned long total_out = 0; - unsigned long pg_offset = 0; - - destlen = min_t(unsigned long, destlen, PAGE_SIZE); - bytes_left = destlen; + unsigned long to_copy; workspace->strm.next_in = data_in; workspace->strm.avail_in = srclen; @@ -390,60 +385,30 @@ int zlib_decompress(struct list_head *ws, const u8 *data_in, return -EIO; } - while (bytes_left > 0) { - unsigned long buf_start; - unsigned long buf_offset; - unsigned long bytes; + /* + * Everything (in/out buf) should be at most one sector, there should + * be no need to switch any input/output buffer. + */ + ret = zlib_inflate(&workspace->strm, Z_FINISH); + to_copy = min(workspace->strm.total_out, destlen); + if (ret != Z_STREAM_END) + goto out; - ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH); - if (ret != Z_OK && ret != Z_STREAM_END) - break; + memcpy_to_page(dest_page, dest_pgoff, workspace->buf, to_copy); - buf_start = total_out; - total_out = workspace->strm.total_out; - - if (total_out == buf_start) { - ret = -EIO; - break; - } - - if (total_out <= start_byte) - goto next; - - if (total_out > start_byte && buf_start < start_byte) - buf_offset = start_byte - buf_start; - else - buf_offset = 0; - - bytes = min(PAGE_SIZE - pg_offset, - PAGE_SIZE - (buf_offset % PAGE_SIZE)); - bytes = min(bytes, bytes_left); - - memcpy_to_page(dest_page, pg_offset, - workspace->buf + buf_offset, bytes); - - pg_offset += bytes; - bytes_left -= bytes; -next: - workspace->strm.next_out = workspace->buf; - workspace->strm.avail_out = workspace->buf_size; - } - - if (ret != Z_STREAM_END && bytes_left != 0) +out: + if (unlikely(to_copy != destlen)) { + pr_warn_ratelimited("BTRFS: infalte failed, decompressed=%lu expected=%zu\n", + to_copy, destlen); ret = -EIO; - else + } else { ret = 0; + } zlib_inflateEnd(&workspace->strm); - /* - * this should only happen if zlib returned fewer bytes than we - * expected. btrfs_get_block is responsible for zeroing from the - * end of the inline extent (destlen) to the end of the page - */ - if (pg_offset < destlen) { - memzero_page(dest_page, pg_offset, destlen - pg_offset); - } + if (unlikely(to_copy < destlen)) + memzero_page(dest_page, dest_pgoff + to_copy, destlen - to_copy); return ret; } From 6a69631ec9b1b23784c012a855bbb23012a6dbeb Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 8 Jan 2024 19:38:45 +1030 Subject: [PATCH 062/768] btrfs: lzo: fix and simplify the inline extent decompression [BUG] If we have a filesystem with 4k sectorsize, and an inlined compressed extent created like this: item 4 key (257 INODE_ITEM 0) itemoff 15863 itemsize 160 generation 8 transid 8 size 4096 nbytes 4096 block group 0 mode 100600 links 1 uid 0 gid 0 rdev 0 sequence 1 flags 0x0(none) item 5 key (257 INODE_REF 256) itemoff 15839 itemsize 24 index 2 namelen 14 name: source_inlined item 6 key (257 EXTENT_DATA 0) itemoff 15770 itemsize 69 generation 8 type 0 (inline) inline extent data size 48 ram_bytes 4096 compression 2 (lzo) Then trying to reflink that extent in an aarch64 system with 64K page size, the reflink would just fail: # xfs_io -f -c "reflink $mnt/source_inlined 0 60k 4k" $mnt/dest XFS_IOC_CLONE_RANGE: Input/output error [CAUSE] In zlib_decompress(), we didn't treat @start_byte as just a page offset, but also use it as an indicator on whether we should error out, without any proper explanation (this is from the very beginning of btrfs). In reality, for subpage cases, although @start_byte can be non-zero, we should never switch input/output buffer nor error out, since the whole input/output buffer should never exceed one sector. Note: The above assumption is only not true if we're going to support multi-page sectorsize. Thus the current code using @start_byte as a condition to switch input/output buffer or finish the decompression is completely incorrect. [FIX] The fix involves several modifications: - Rename @start_byte to @dest_pgoff to properly express its meaning - Use @sectorsize other than PAGE_SIZE to properly initialize the output buffer size - Use correct destination offset inside the destination page - Use memcpy_to_page() to copy the contents to the destination page - Use memzero_page() to zero out the tailing part - Consider early end as an error After the fix, even on 64K page sized aarch64, above reflink now works as expected: # xfs_io -f -c "reflink $mnt/source_inlined 0 60k 4k" $mnt/dest linked 4096/4096 bytes at offset 61440 And results the correct file layout: item 9 key (258 INODE_ITEM 0) itemoff 15542 itemsize 160 generation 10 transid 10 size 65536 nbytes 4096 block group 0 mode 100600 links 1 uid 0 gid 0 rdev 0 sequence 1 flags 0x0(none) item 10 key (258 INODE_REF 256) itemoff 15528 itemsize 14 index 3 namelen 4 name: dest item 11 key (258 XATTR_ITEM 3817753667) itemoff 15445 itemsize 83 location key (0 UNKNOWN.0 0) type XATTR transid 10 data_len 37 name_len 16 name: security.selinux data unconfined_u:object_r:unlabeled_t:s0 item 12 key (258 EXTENT_DATA 61440) itemoff 15392 itemsize 53 generation 10 type 1 (regular) extent data disk byte 13631488 nr 4096 extent data offset 0 nr 4096 ram 4096 extent compression 0 (none) Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.h | 2 +- fs/btrfs/lzo.c | 34 +++++++++------------------------- 2 files changed, 10 insertions(+), 26 deletions(-) diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 2b4dfb1b010c..afd7e50d073d 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -159,7 +159,7 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping, unsigned long *total_in, unsigned long *total_out); int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb); int lzo_decompress(struct list_head *ws, const u8 *data_in, - struct page *dest_page, unsigned long start_byte, size_t srclen, + struct page *dest_page, unsigned long dest_pgoff, size_t srclen, size_t destlen); struct list_head *lzo_alloc_workspace(unsigned int level); void lzo_free_workspace(struct list_head *ws); diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index 1131d5a29d61..e43bc0fdc74e 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c @@ -425,16 +425,16 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb) } int lzo_decompress(struct list_head *ws, const u8 *data_in, - struct page *dest_page, unsigned long start_byte, size_t srclen, + struct page *dest_page, unsigned long dest_pgoff, size_t srclen, size_t destlen) { struct workspace *workspace = list_entry(ws, struct workspace, list); + struct btrfs_fs_info *fs_info = btrfs_sb(dest_page->mapping->host->i_sb); + const u32 sectorsize = fs_info->sectorsize; size_t in_len; size_t out_len; size_t max_segment_len = WORKSPACE_BUF_LENGTH; int ret = 0; - char *kaddr; - unsigned long bytes; if (srclen < LZO_LEN || srclen > max_segment_len + LZO_LEN * 2) return -EUCLEAN; @@ -451,7 +451,7 @@ int lzo_decompress(struct list_head *ws, const u8 *data_in, } data_in += LZO_LEN; - out_len = PAGE_SIZE; + out_len = sectorsize; ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); if (ret != LZO_E_OK) { pr_warn("BTRFS: decompress failed!\n"); @@ -459,29 +459,13 @@ int lzo_decompress(struct list_head *ws, const u8 *data_in, goto out; } - if (out_len < start_byte) { + ASSERT(out_len <= sectorsize); + memcpy_to_page(dest_page, dest_pgoff, workspace->buf, out_len); + /* Early end, considered as an error. */ + if (unlikely(out_len < destlen)) { ret = -EIO; - goto out; + memzero_page(dest_page, dest_pgoff + out_len, destlen - out_len); } - - /* - * the caller is already checking against PAGE_SIZE, but lets - * move this check closer to the memcpy/memset - */ - destlen = min_t(unsigned long, destlen, PAGE_SIZE); - bytes = min_t(unsigned long, destlen, out_len - start_byte); - - kaddr = kmap_local_page(dest_page); - memcpy(kaddr, workspace->buf + start_byte, bytes); - - /* - * btrfs_getblock is doing a zero on the tail of the page too, - * but this will cover anything missing from the decompressed - * data. - */ - if (bytes < destlen) - memset(kaddr+bytes, 0, destlen-bytes); - kunmap_local(kaddr); out: return ret; } From 1e7f6def8b2370ecefb54b3c8f390ff894b0c51b Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 8 Jan 2024 19:38:46 +1030 Subject: [PATCH 063/768] btrfs: zstd: fix and simplify the inline extent decompression [BUG] If we have a filesystem with 4k sectorsize, and an inlined compressed extent created like this: item 4 key (257 INODE_ITEM 0) itemoff 15863 itemsize 160 generation 8 transid 8 size 4096 nbytes 4096 block group 0 mode 100600 links 1 uid 0 gid 0 rdev 0 sequence 1 flags 0x0(none) item 5 key (257 INODE_REF 256) itemoff 15839 itemsize 24 index 2 namelen 14 name: source_inlined item 6 key (257 EXTENT_DATA 0) itemoff 15770 itemsize 69 generation 8 type 0 (inline) inline extent data size 48 ram_bytes 4096 compression 3 (zstd) Then trying to reflink that extent in an aarch64 system with 64K page size, the reflink would just fail: # xfs_io -f -c "reflink $mnt/source_inlined 0 60k 4k" $mnt/dest XFS_IOC_CLONE_RANGE: Input/output error [CAUSE] In zstd_decompress(), we didn't treat @start_byte as just a page offset, but also use it as an indicator on whether we should error out, without any proper explanation (this is copied from other decompression code). In reality, for subpage cases, although @start_byte can be non-zero, we should never switch input/output buffer nor error out, since the whole input/output buffer should never exceed one sector, thus we should not need to do any buffer switch. Thus the current code using @start_byte as a condition to switch input/output buffer or finish the decompression is completely incorrect. [FIX] The fix involves several modification: - Rename @start_byte to @dest_pgoff to properly express its meaning - Use @sectorsize other than PAGE_SIZE to properly initialize the output buffer size - Use correct destination offset inside the destination page - Simplify the main loop Since the input/output buffer should never switch, we only need one zstd_decompress_stream() call. - Consider early end as an error After the fix, even on 64K page sized aarch64, above reflink now works as expected: # xfs_io -f -c "reflink $mnt/source_inlined 0 60k 4k" $mnt/dest linked 4096/4096 bytes at offset 61440 And results the correct file layout: item 9 key (258 INODE_ITEM 0) itemoff 15542 itemsize 160 generation 10 transid 10 size 65536 nbytes 4096 block group 0 mode 100600 links 1 uid 0 gid 0 rdev 0 sequence 1 flags 0x0(none) item 10 key (258 INODE_REF 256) itemoff 15528 itemsize 14 index 3 namelen 4 name: dest item 11 key (258 XATTR_ITEM 3817753667) itemoff 15445 itemsize 83 location key (0 UNKNOWN.0 0) type XATTR transid 10 data_len 37 name_len 16 name: security.selinux data unconfined_u:object_r:unlabeled_t:s0 item 12 key (258 EXTENT_DATA 61440) itemoff 15392 itemsize 53 generation 10 type 1 (regular) extent data disk byte 13631488 nr 4096 extent data offset 0 nr 4096 ram 4096 extent compression 0 (none) Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.h | 2 +- fs/btrfs/zstd.c | 73 ++++++++++++------------------------------ 2 files changed, 22 insertions(+), 53 deletions(-) diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index afd7e50d073d..97fe3ebf11a2 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -169,7 +169,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, unsigned long *total_in, unsigned long *total_out); int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb); int zstd_decompress(struct list_head *ws, const u8 *data_in, - struct page *dest_page, unsigned long start_byte, size_t srclen, + struct page *dest_page, unsigned long dest_pgoff, size_t srclen, size_t destlen); void zstd_init_workspace_manager(void); void zstd_cleanup_workspace_manager(void); diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c index 0d66db8bc1d4..346c46d88d07 100644 --- a/fs/btrfs/zstd.c +++ b/fs/btrfs/zstd.c @@ -20,6 +20,7 @@ #include "misc.h" #include "compression.h" #include "ctree.h" +#include "super.h" #define ZSTD_BTRFS_MAX_WINDOWLOG 17 #define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG) @@ -618,80 +619,48 @@ done: } int zstd_decompress(struct list_head *ws, const u8 *data_in, - struct page *dest_page, unsigned long start_byte, size_t srclen, + struct page *dest_page, unsigned long dest_pgoff, size_t srclen, size_t destlen) { struct workspace *workspace = list_entry(ws, struct workspace, list); + struct btrfs_fs_info *fs_info = btrfs_sb(dest_page->mapping->host->i_sb); + const u32 sectorsize = fs_info->sectorsize; zstd_dstream *stream; int ret = 0; - size_t ret2; - unsigned long total_out = 0; - unsigned long pg_offset = 0; + unsigned long to_copy = 0; stream = zstd_init_dstream( ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); if (!stream) { pr_warn("BTRFS: zstd_init_dstream failed\n"); - ret = -EIO; goto finish; } - destlen = min_t(size_t, destlen, PAGE_SIZE); - workspace->in_buf.src = data_in; workspace->in_buf.pos = 0; workspace->in_buf.size = srclen; workspace->out_buf.dst = workspace->buf; workspace->out_buf.pos = 0; - workspace->out_buf.size = PAGE_SIZE; + workspace->out_buf.size = sectorsize; - ret2 = 1; - while (pg_offset < destlen - && workspace->in_buf.pos < workspace->in_buf.size) { - unsigned long buf_start; - unsigned long buf_offset; - unsigned long bytes; - - /* Check if the frame is over and we still need more input */ - if (ret2 == 0) { - pr_debug("BTRFS: zstd_decompress_stream ended early\n"); - ret = -EIO; - goto finish; - } - ret2 = zstd_decompress_stream(stream, &workspace->out_buf, - &workspace->in_buf); - if (zstd_is_error(ret2)) { - pr_debug("BTRFS: zstd_decompress_stream returned %d\n", - zstd_get_error_code(ret2)); - ret = -EIO; - goto finish; - } - - buf_start = total_out; - total_out += workspace->out_buf.pos; - workspace->out_buf.pos = 0; - - if (total_out <= start_byte) - continue; - - if (total_out > start_byte && buf_start < start_byte) - buf_offset = start_byte - buf_start; - else - buf_offset = 0; - - bytes = min_t(unsigned long, destlen - pg_offset, - workspace->out_buf.size - buf_offset); - - memcpy_to_page(dest_page, pg_offset, - workspace->out_buf.dst + buf_offset, bytes); - - pg_offset += bytes; + /* + * Since both input and output buffers should not exceed one sector, + * one call should end the decompression. + */ + ret = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf); + if (zstd_is_error(ret)) { + pr_warn_ratelimited("BTRFS: zstd_decompress_stream return %d\n", + zstd_get_error_code(ret)); + goto finish; } - ret = 0; + to_copy = workspace->out_buf.pos; + memcpy_to_page(dest_page, dest_pgoff + to_copy, workspace->out_buf.dst, to_copy); finish: - if (pg_offset < destlen) { - memzero_page(dest_page, pg_offset, destlen - pg_offset); + /* Error or early end. */ + if (unlikely(to_copy < destlen)) { + ret = -EIO; + memzero_page(dest_page, dest_pgoff + to_copy, destlen - to_copy); } return ret; } From f398e70dd69e6ceea71463a5380e6118f219197e Mon Sep 17 00:00:00 2001 From: Chung-Chiang Cheng Date: Fri, 12 Jan 2024 15:41:05 +0800 Subject: [PATCH 064/768] btrfs: tree-checker: fix inline ref size in error messages The error message should accurately reflect the size rather than the type. Fixes: f82d1c7ca8ae ("btrfs: tree-checker: Add EXTENT_ITEM and METADATA_ITEM check") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Filipe Manana Reviewed-by: Qu Wenruo Signed-off-by: Chung-Chiang Cheng Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 50fdc69fdddf..6eccf8496486 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1436,7 +1436,7 @@ static int check_extent_item(struct extent_buffer *leaf, if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) { extent_err(leaf, slot, "inline ref item overflows extent item, ptr %lu iref size %u end %lu", - ptr, inline_type, end); + ptr, btrfs_extent_inline_ref_size(inline_type), end); return -EUCLEAN; } From a208b3f132b48e1f94f620024e66fea635925877 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 15 Jan 2024 20:30:26 +0100 Subject: [PATCH 065/768] btrfs: don't warn if discard range is not aligned to sector There's a warning in btrfs_issue_discard() when the range is not aligned to 512 bytes, originally added in 4d89d377bbb0 ("btrfs: btrfs_issue_discard ensure offset/length are aligned to sector boundaries"). We can't do sub-sector writes anyway so the adjustment is the only thing that we can do and the warning is unnecessary. CC: stable@vger.kernel.org # 4.19+ Reported-by: syzbot+4a4f1eba14eb5c3417d1@syzkaller.appspotmail.com Reviewed-by: Johannes Thumshirn Reviewed-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6d680031211a..8e8cc1111277 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1260,7 +1260,8 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len, u64 bytes_left, end; u64 aligned_start = ALIGN(start, 1 << SECTOR_SHIFT); - if (WARN_ON(start != aligned_start)) { + /* Adjust the range to be aligned to 512B sectors if necessary. */ + if (start != aligned_start) { len -= aligned_start - start; len = round_down(len, 1 << SECTOR_SHIFT); start = aligned_start; From 2018ef1d9ac3e95448b9206adc3425b0431c2411 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 10 Jan 2024 12:54:41 -0500 Subject: [PATCH 066/768] btrfs: use the original mount's mount options for the legacy reconfigure btrfs/330, which tests our old trick to allow mount -o ro,subvol=/x /dev/sda1 /foo mount -o rw,subvol=/y /dev/sda1 /bar fails on the block group tree. This is because we aren't preserving the mount options for what is essentially a remount, and thus we're ending up without the FREE_SPACE_TREE mount option, which triggers our free space tree delete codepath. This isn't possible with the block group tree and thus it falls over. Fix this by making sure we copy the existing mount options for the existing fs mount over in this case. Fixes: f044b318675f ("btrfs: handle the ro->rw transition for mounting different subvolumes") Reviewed-by: Neal Gompa Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3a677b808f0f..f192f8fe0ce6 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1451,6 +1451,14 @@ static int btrfs_reconfigure(struct fs_context *fc) btrfs_info_to_ctx(fs_info, &old_ctx); + /* + * This is our "bind mount" trick, we don't want to allow the user to do + * anything other than mount a different ro/rw and a different subvol, + * all of the mount options should be maintained. + */ + if (mount_reconfigure) + ctx->mount_opt = old_ctx.mount_opt; + sync_filesystem(sb); set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); From 1e61b8c672ab2f59b282c8d48a29c14b52c0f5b4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 10 Jan 2024 17:14:21 -0500 Subject: [PATCH 067/768] btrfs: don't unconditionally call folio_start_writeback in subpage In the normal case we check if a page is under writeback and skip it before we attempt to begin writeback. The exception is subpage metadata writes, where we know we don't have an eb under writeback and we're doing it one eb at a time. Since b5612c368648 ("mm: return void from folio_start_writeback() and related functions") we now will BUG_ON() if we call folio_start_writeback() on a folio that's already under writeback. Previously folio_start_writeback() would bail if writeback was already started. Fix this in the subpage code by checking if we have writeback set and skipping it if we do. This fixes the panic we were seeing on subpage. Reviewed-by: Qu Wenruo Reviewed-by: Anand Jain Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/subpage.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index d9a30b93d543..277dd6d312ee 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -475,7 +475,8 @@ void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info, spin_lock_irqsave(&subpage->lock, flags); bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); - folio_start_writeback(folio); + if (!folio_test_writeback(folio)) + folio_start_writeback(folio); spin_unlock_irqrestore(&subpage->lock, flags); } From f546c4282673497a06ecb6190b50ae7f6c85b02f Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 17 Jan 2024 11:02:25 +1030 Subject: [PATCH 068/768] btrfs: scrub: avoid use-after-free when chunk length is not 64K aligned [BUG] There is a bug report that, on a ext4-converted btrfs, scrub leads to various problems, including: - "unable to find chunk map" errors BTRFS info (device vdb): scrub: started on devid 1 BTRFS critical (device vdb): unable to find chunk map for logical 2214744064 length 4096 BTRFS critical (device vdb): unable to find chunk map for logical 2214744064 length 45056 This would lead to unrepariable errors. - Use-after-free KASAN reports: ================================================================== BUG: KASAN: slab-use-after-free in __blk_rq_map_sg+0x18f/0x7c0 Read of size 8 at addr ffff8881013c9040 by task btrfs/909 CPU: 0 PID: 909 Comm: btrfs Not tainted 6.7.0-x64v3-dbg #11 c50636e9419a8354555555245df535e380563b2b Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 2023.11-2 12/24/2023 Call Trace: dump_stack_lvl+0x43/0x60 print_report+0xcf/0x640 kasan_report+0xa6/0xd0 __blk_rq_map_sg+0x18f/0x7c0 virtblk_prep_rq.isra.0+0x215/0x6a0 [virtio_blk 19a65eeee9ae6fcf02edfad39bb9ddee07dcdaff] virtio_queue_rqs+0xc4/0x310 [virtio_blk 19a65eeee9ae6fcf02edfad39bb9ddee07dcdaff] blk_mq_flush_plug_list.part.0+0x780/0x860 __blk_flush_plug+0x1ba/0x220 blk_finish_plug+0x3b/0x60 submit_initial_group_read+0x10a/0x290 [btrfs e57987a360bed82fe8756dcd3e0de5406ccfe965] flush_scrub_stripes+0x38e/0x430 [btrfs e57987a360bed82fe8756dcd3e0de5406ccfe965] scrub_stripe+0x82a/0xae0 [btrfs e57987a360bed82fe8756dcd3e0de5406ccfe965] scrub_chunk+0x178/0x200 [btrfs e57987a360bed82fe8756dcd3e0de5406ccfe965] scrub_enumerate_chunks+0x4bc/0xa30 [btrfs e57987a360bed82fe8756dcd3e0de5406ccfe965] btrfs_scrub_dev+0x398/0x810 [btrfs e57987a360bed82fe8756dcd3e0de5406ccfe965] btrfs_ioctl+0x4b9/0x3020 [btrfs e57987a360bed82fe8756dcd3e0de5406ccfe965] __x64_sys_ioctl+0xbd/0x100 do_syscall_64+0x5d/0xe0 entry_SYSCALL_64_after_hwframe+0x63/0x6b RIP: 0033:0x7f47e5e0952b - Crash, mostly due to above use-after-free [CAUSE] The converted fs has the following data chunk layout: item 2 key (FIRST_CHUNK_TREE CHUNK_ITEM 2214658048) itemoff 16025 itemsize 80 length 86016 owner 2 stripe_len 65536 type DATA|single For above logical bytenr 2214744064, it's at the chunk end (2214658048 + 86016 = 2214744064). This means btrfs_submit_bio() would split the bio, and trigger endio function for both of the two halves. However scrub_submit_initial_read() would only expect the endio function to be called once, not any more. This means the first endio function would already free the bbio::bio, leaving the bvec freed, thus the 2nd endio call would lead to use-after-free. [FIX] - Make sure scrub_read_endio() only updates bits in its range Since we may read less than 64K at the end of the chunk, we should not touch the bits beyond chunk boundary. - Make sure scrub_submit_initial_read() only to read the chunk range This is done by calculating the real number of sectors we need to read, and add sector-by-sector to the bio. Thankfully the scrub read repair path won't need extra fixes: - scrub_stripe_submit_repair_read() With above fixes, we won't update error bit for range beyond chunk, thus scrub_stripe_submit_repair_read() should never submit any read beyond the chunk. Reported-by: Rongrong Fixes: e02ee89baa66 ("btrfs: scrub: switch scrub_simple_mirror() to scrub_stripe infrastructure") Tested-by: Rongrong Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index a01807cbd4d4..2d81b1a18a04 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1098,12 +1098,22 @@ out: static void scrub_read_endio(struct btrfs_bio *bbio) { struct scrub_stripe *stripe = bbio->private; + struct bio_vec *bvec; + int sector_nr = calc_sector_number(stripe, bio_first_bvec_all(&bbio->bio)); + int num_sectors; + u32 bio_size = 0; + int i; + + ASSERT(sector_nr < stripe->nr_sectors); + bio_for_each_bvec_all(bvec, &bbio->bio, i) + bio_size += bvec->bv_len; + num_sectors = bio_size >> stripe->bg->fs_info->sectorsize_bits; if (bbio->bio.bi_status) { - bitmap_set(&stripe->io_error_bitmap, 0, stripe->nr_sectors); - bitmap_set(&stripe->error_bitmap, 0, stripe->nr_sectors); + bitmap_set(&stripe->io_error_bitmap, sector_nr, num_sectors); + bitmap_set(&stripe->error_bitmap, sector_nr, num_sectors); } else { - bitmap_clear(&stripe->io_error_bitmap, 0, stripe->nr_sectors); + bitmap_clear(&stripe->io_error_bitmap, sector_nr, num_sectors); } bio_put(&bbio->bio); if (atomic_dec_and_test(&stripe->pending_io)) { @@ -1701,6 +1711,9 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx, { struct btrfs_fs_info *fs_info = sctx->fs_info; struct btrfs_bio *bbio; + unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start + + stripe->bg->length - stripe->logical) >> + fs_info->sectorsize_bits; int mirror = stripe->mirror_num; ASSERT(stripe->bg); @@ -1715,14 +1728,16 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx, bbio = btrfs_bio_alloc(SCRUB_STRIPE_PAGES, REQ_OP_READ, fs_info, scrub_read_endio, stripe); - /* Read the whole stripe. */ bbio->bio.bi_iter.bi_sector = stripe->logical >> SECTOR_SHIFT; - for (int i = 0; i < BTRFS_STRIPE_LEN >> PAGE_SHIFT; i++) { + /* Read the whole range inside the chunk boundary. */ + for (unsigned int cur = 0; cur < nr_sectors; cur++) { + struct page *page = scrub_stripe_get_page(stripe, cur); + unsigned int pgoff = scrub_stripe_get_page_offset(stripe, cur); int ret; - ret = bio_add_page(&bbio->bio, stripe->pages[i], PAGE_SIZE, 0); + ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff); /* We should have allocated enough bio vectors. */ - ASSERT(ret == PAGE_SIZE); + ASSERT(ret == fs_info->sectorsize); } atomic_inc(&stripe->pending_io); From 7f2d219e78e95a137a9c76fddac7ff8228260439 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 17 Jan 2024 11:02:26 +1030 Subject: [PATCH 069/768] btrfs: scrub: limit RST scrub to chunk boundary [BUG] If there is an extent beyond chunk boundary, currently RST scrub would error out. [CAUSE] In scrub_submit_extent_sector_read(), we completely rely on extent_sector_bitmap, which is populated using extent tree. The extent tree can be corrupted that there is an extent item beyond a chunk. In that case, RST scrub would fail and error out. [FIX] Despite the extent_sector_bitmap usage, also limit the read to chunk boundary. Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2d81b1a18a04..0123d2728923 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1646,6 +1646,9 @@ static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx, { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; struct btrfs_bio *bbio = NULL; + unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start + + stripe->bg->length - stripe->logical) >> + fs_info->sectorsize_bits; u64 stripe_len = BTRFS_STRIPE_LEN; int mirror = stripe->mirror_num; int i; @@ -1656,6 +1659,10 @@ static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx, struct page *page = scrub_stripe_get_page(stripe, i); unsigned int pgoff = scrub_stripe_get_page_offset(stripe, i); + /* We're beyond the chunk boundary, no need to read anymore. */ + if (i >= nr_sectors) + break; + /* The current sector cannot be merged, submit the bio. */ if (bbio && ((i > 0 && From 180a8f12c21f41740fee09ca7f7aa98ff5bb99f8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 23 Dec 2023 15:16:50 +0100 Subject: [PATCH 070/768] Input: goodix - accept ACPI resources with gpio_count == 3 && gpio_int_idx == 0 Some devices list 3 Gpio resources in the ACPI resource list for the touchscreen: 1. GpioInt resource pointing to the GPIO used for the interrupt 2. GpioIo resource pointing to the reset GPIO 3. GpioIo resource pointing to the GPIO used for the interrupt Note how the third extra GpioIo resource really is a duplicate of the GpioInt provided info. Ignore this extra GPIO, treating this setup the same as gpio_count == 2 && gpio_int_idx == 0 fixes the touchscreen not working on the Thunderbook Colossus W803 rugged tablet and likely also on the CyberBook_T116K. Reported-by: Maarten van der Schrieck Closes: https://gitlab.com/AdyaAdya/goodix-touchscreen-linux-driver/-/issues/22 Suggested-by: Maarten van der Schrieck Tested-by: Maarten van der Schrieck Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20231223141650.10679-1-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index af32fbe57b63..b068ff8afbc9 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -884,7 +884,8 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts) } } - if (ts->gpio_count == 2 && ts->gpio_int_idx == 0) { + /* Some devices with gpio_int_idx 0 list a third unused GPIO */ + if ((ts->gpio_count == 2 || ts->gpio_count == 3) && ts->gpio_int_idx == 0) { ts->irq_pin_access_method = IRQ_PIN_ACCESS_ACPI_GPIO; gpio_mapping = acpi_goodix_int_first_gpios; } else if (ts->gpio_count == 2 && ts->gpio_int_idx == 1) { From 6a9531c3a88096a26cf3ac582f7ec44f94a7dcb2 Mon Sep 17 00:00:00 2001 From: Yajun Deng Date: Thu, 18 Jan 2024 14:18:53 +0800 Subject: [PATCH 071/768] memblock: fix crash when reserved memory is not added to memory After commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") nid of a reserved region is used by init_reserved_page() (with CONFIG_DEFERRED_STRUCT_PAGE_INIT=y) to access node strucure. In many cases the nid of the reserved memory is not set and this causes a crash. When the nid of a reserved region is not set, fall back to early_pfn_to_nid(), so that nid of the first_online_node will be passed to init_reserved_page(). Fixes: 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") Signed-off-by: Yajun Deng Link: https://lore.kernel.org/r/20240118061853.2652295-1-yajun.deng@linux.dev [rppt: massaged the commit message] Signed-off-by: Mike Rapoport (IBM) --- mm/memblock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/memblock.c b/mm/memblock.c index 5a88d6d24d79..4823ad979b72 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2141,6 +2141,9 @@ static void __init memmap_init_reserved_pages(void) start = region->base; end = start + region->size; + if (nid == NUMA_NO_NODE || nid >= MAX_NUMNODES) + nid = early_pfn_to_nid(PFN_DOWN(start)); + reserve_bootmem_region(start, end, nid); } } From e626cb02ee8399fd42c415e542d031d185783903 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 18 Jan 2024 12:54:51 +0100 Subject: [PATCH 072/768] futex: Prevent the reuse of stale pi_state Jiri Slaby reported a futex state inconsistency resulting in -EINVAL during a lock operation for a PI futex. It requires that the a lock process is interrupted by a timeout or signal: T1 Owns the futex in user space. T2 Tries to acquire the futex in kernel (futex_lock_pi()). Allocates a pi_state and attaches itself to it. T2 Times out and removes its rt_waiter from the rt_mutex. Drops the rtmutex lock and tries to acquire the hash bucket lock to remove the futex_q. The lock is contended and T2 schedules out. T1 Unlocks the futex (futex_unlock_pi()). Finds a futex_q but no rt_waiter. Unlocks the futex (do_uncontended) and makes it available to user space. T3 Acquires the futex in user space. T4 Tries to acquire the futex in kernel (futex_lock_pi()). Finds the existing futex_q of T2 and tries to attach itself to the existing pi_state. This (attach_to_pi_state()) fails with -EINVAL because uval contains the TID of T3 but pi_state points to T1. It's incorrect to unlock the futex and make it available for user space to acquire as long as there is still an existing state attached to it in the kernel. T1 cannot hand over the futex to T2 because T2 already gave up and started to clean up and is blocked on the hash bucket lock, so T2's futex_q with the pi_state pointing to T1 is still queued. T2 observes the futex_q, but ignores it as there is no waiter on the corresponding rt_mutex and takes the uncontended path which allows the subsequent caller of futex_lock_pi() (T4) to observe that stale state. To prevent this the unlock path must dequeue all futex_q entries which point to the same pi_state when there is no waiter on the rt mutex. This requires obviously to make the dequeue conditional in the locking path to prevent a double dequeue. With that it's guaranteed that user space cannot observe an uncontended futex which has kernel state attached. Fixes: fbeb558b0dd0d ("futex/pi: Fix recursive rt_mutex waiter state") Reported-by: Jiri Slaby Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Tested-by: Jiri Slaby Link: https://lore.kernel.org/r/20240118115451.0TkD_ZhB@linutronix.de Closes: https://lore.kernel.org/all/4611bcf2-44d0-4c34-9b84-17406f881003@kernel.org --- kernel/futex/core.c | 15 ++++++++++++--- kernel/futex/pi.c | 11 ++++++++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/kernel/futex/core.c b/kernel/futex/core.c index e0e853412c15..1e78ef24321e 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -627,12 +627,21 @@ retry: } /* - * PI futexes can not be requeued and must remove themselves from the - * hash bucket. The hash bucket lock (i.e. lock_ptr) is held. + * PI futexes can not be requeued and must remove themselves from the hash + * bucket. The hash bucket lock (i.e. lock_ptr) is held. */ void futex_unqueue_pi(struct futex_q *q) { - __futex_unqueue(q); + /* + * If the lock was not acquired (due to timeout or signal) then the + * rt_waiter is removed before futex_q is. If this is observed by + * an unlocker after dropping the rtmutex wait lock and before + * acquiring the hash bucket lock, then the unlocker dequeues the + * futex_q from the hash bucket list to guarantee consistent state + * vs. userspace. Therefore the dequeue here must be conditional. + */ + if (!plist_node_empty(&q->list)) + __futex_unqueue(q); BUG_ON(!q->pi_state); put_pi_state(q->pi_state); diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c index 90e5197f4e56..5722467f2737 100644 --- a/kernel/futex/pi.c +++ b/kernel/futex/pi.c @@ -1135,6 +1135,7 @@ retry: hb = futex_hash(&key); spin_lock(&hb->lock); +retry_hb: /* * Check waiters first. We do not trust user space values at @@ -1177,12 +1178,17 @@ retry: /* * Futex vs rt_mutex waiter state -- if there are no rt_mutex * waiters even though futex thinks there are, then the waiter - * is leaving and the uncontended path is safe to take. + * is leaving. The entry needs to be removed from the list so a + * new futex_lock_pi() is not using this stale PI-state while + * the futex is available in user space again. + * There can be more than one task on its way out so it needs + * to retry. */ rt_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex); if (!rt_waiter) { + __futex_unqueue(top_waiter); raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - goto do_uncontended; + goto retry_hb; } get_pi_state(pi_state); @@ -1217,7 +1223,6 @@ retry: return ret; } -do_uncontended: /* * We have no kernel internal state, i.e. no waiters in the * kernel. Waiters which are about to queue themselves are stuck From dbc153fd3c142909e564bb256da087e13fbf239c Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Thu, 18 Jan 2024 12:32:10 +0800 Subject: [PATCH 073/768] net/smc: fix illegal rmb_desc access in SMC-D connection dump A crash was found when dumping SMC-D connections. It can be reproduced by following steps: - run nginx/wrk test: smc_run nginx smc_run wrk -t 16 -c 1000 -d -H 'Connection: Close' - continuously dump SMC-D connections in parallel: watch -n 1 'smcss -D' BUG: kernel NULL pointer dereference, address: 0000000000000030 CPU: 2 PID: 7204 Comm: smcss Kdump: loaded Tainted: G E 6.7.0+ #55 RIP: 0010:__smc_diag_dump.constprop.0+0x5e5/0x620 [smc_diag] Call Trace: ? __die+0x24/0x70 ? page_fault_oops+0x66/0x150 ? exc_page_fault+0x69/0x140 ? asm_exc_page_fault+0x26/0x30 ? __smc_diag_dump.constprop.0+0x5e5/0x620 [smc_diag] ? __kmalloc_node_track_caller+0x35d/0x430 ? __alloc_skb+0x77/0x170 smc_diag_dump_proto+0xd0/0xf0 [smc_diag] smc_diag_dump+0x26/0x60 [smc_diag] netlink_dump+0x19f/0x320 __netlink_dump_start+0x1dc/0x300 smc_diag_handler_dump+0x6a/0x80 [smc_diag] ? __pfx_smc_diag_dump+0x10/0x10 [smc_diag] sock_diag_rcv_msg+0x121/0x140 ? __pfx_sock_diag_rcv_msg+0x10/0x10 netlink_rcv_skb+0x5a/0x110 sock_diag_rcv+0x28/0x40 netlink_unicast+0x22a/0x330 netlink_sendmsg+0x1f8/0x420 __sock_sendmsg+0xb0/0xc0 ____sys_sendmsg+0x24e/0x300 ? copy_msghdr_from_user+0x62/0x80 ___sys_sendmsg+0x7c/0xd0 ? __do_fault+0x34/0x160 ? do_read_fault+0x5f/0x100 ? do_fault+0xb0/0x110 ? __handle_mm_fault+0x2b0/0x6c0 __sys_sendmsg+0x4d/0x80 do_syscall_64+0x69/0x180 entry_SYSCALL_64_after_hwframe+0x6e/0x76 It is possible that the connection is in process of being established when we dump it. Assumed that the connection has been registered in a link group by smc_conn_create() but the rmb_desc has not yet been initialized by smc_buf_create(), thus causing the illegal access to conn->rmb_desc. So fix it by checking before dump. Fixes: 4b1b7d3b30a6 ("net/smc: add SMC-D diag support") Signed-off-by: Wen Gu Reviewed-by: Dust Li Reviewed-by: Wenjia Zhang Signed-off-by: David S. Miller --- net/smc/smc_diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 52f7c4f1e767..5a33908015f3 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -164,7 +164,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, } if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd && (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) && - !list_empty(&smc->conn.lgr->list)) { + !list_empty(&smc->conn.lgr->list) && smc->conn.rmb_desc) { struct smc_connection *conn = &smc->conn; struct smcd_diag_dmbinfo dinfo; struct smcd_dev *smcd = conn->lgr->smcd; From b01f15a7571b7aa222458bc9bf26ab59bd84e384 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 17 Jan 2024 19:12:32 -0500 Subject: [PATCH 074/768] selftests: bonding: Increase timeout to 1200s When tests are run by runner.sh, bond_options.sh gets killed before it can complete: make -C tools/testing/selftests run_tests TARGETS="drivers/net/bonding" [...] # timeout set to 120 # selftests: drivers/net/bonding: bond_options.sh # TEST: prio (active-backup miimon primary_reselect 0) [ OK ] # TEST: prio (active-backup miimon primary_reselect 1) [ OK ] # TEST: prio (active-backup miimon primary_reselect 2) [ OK ] # TEST: prio (active-backup arp_ip_target primary_reselect 0) [ OK ] # TEST: prio (active-backup arp_ip_target primary_reselect 1) [ OK ] # TEST: prio (active-backup arp_ip_target primary_reselect 2) [ OK ] # not ok 7 selftests: drivers/net/bonding: bond_options.sh # TIMEOUT 120 seconds This test includes many sleep statements, at least some of which are related to timers in the operation of the bonding driver itself. Increase the test timeout to allow the test to complete. I ran the test in slightly different VMs (including one without HW virtualization support) and got runtimes of 13m39.760s, 13m31.238s, and 13m2.956s. Use a ~1.5x "safety factor" and set the timeout to 1200s. Fixes: 42a8d4aaea84 ("selftests: bonding: add bonding prio option test") Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20240116104402.1203850a@kernel.org/#t Suggested-by: Jakub Kicinski Signed-off-by: Benjamin Poirier Reviewed-by: Hangbin Liu Link: https://lore.kernel.org/r/20240118001233.304759-1-bpoirier@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/bonding/settings | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/bonding/settings b/tools/testing/selftests/drivers/net/bonding/settings index 6091b45d226b..79b65bdf05db 100644 --- a/tools/testing/selftests/drivers/net/bonding/settings +++ b/tools/testing/selftests/drivers/net/bonding/settings @@ -1 +1 @@ -timeout=120 +timeout=1200 From 198bc90e0e734e5f98c3d2833e8390cac3df61b2 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Thu, 18 Jan 2024 09:20:19 +0800 Subject: [PATCH 075/768] tcp: make sure init the accept_queue's spinlocks once When I run syz's reproduction C program locally, it causes the following issue: pvqspinlock: lock 0xffff9d181cd5c660 has corrupted value 0x0! WARNING: CPU: 19 PID: 21160 at __pv_queued_spin_unlock_slowpath (kernel/locking/qspinlock_paravirt.h:508) Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:__pv_queued_spin_unlock_slowpath (kernel/locking/qspinlock_paravirt.h:508) Code: 73 56 3a ff 90 c3 cc cc cc cc 8b 05 bb 1f 48 01 85 c0 74 05 c3 cc cc cc cc 8b 17 48 89 fe 48 c7 c7 30 20 ce 8f e8 ad 56 42 ff <0f> 0b c3 cc cc cc cc 0f 0b 0f 1f 40 00 90 90 90 90 90 90 90 90 90 RSP: 0018:ffffa8d200604cb8 EFLAGS: 00010282 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff9d1ef60e0908 RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffff9d1ef60e0900 RBP: ffff9d181cd5c280 R08: 0000000000000000 R09: 00000000ffff7fff R10: ffffa8d200604b68 R11: ffffffff907dcdc8 R12: 0000000000000000 R13: ffff9d181cd5c660 R14: ffff9d1813a3f330 R15: 0000000000001000 FS: 00007fa110184640(0000) GS:ffff9d1ef60c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000000 CR3: 000000011f65e000 CR4: 00000000000006f0 Call Trace: _raw_spin_unlock (kernel/locking/spinlock.c:186) inet_csk_reqsk_queue_add (net/ipv4/inet_connection_sock.c:1321) inet_csk_complete_hashdance (net/ipv4/inet_connection_sock.c:1358) tcp_check_req (net/ipv4/tcp_minisocks.c:868) tcp_v4_rcv (net/ipv4/tcp_ipv4.c:2260) ip_protocol_deliver_rcu (net/ipv4/ip_input.c:205) ip_local_deliver_finish (net/ipv4/ip_input.c:234) __netif_receive_skb_one_core (net/core/dev.c:5529) process_backlog (./include/linux/rcupdate.h:779) __napi_poll (net/core/dev.c:6533) net_rx_action (net/core/dev.c:6604) __do_softirq (./arch/x86/include/asm/jump_label.h:27) do_softirq (kernel/softirq.c:454 kernel/softirq.c:441) __local_bh_enable_ip (kernel/softirq.c:381) __dev_queue_xmit (net/core/dev.c:4374) ip_finish_output2 (./include/net/neighbour.h:540 net/ipv4/ip_output.c:235) __ip_queue_xmit (net/ipv4/ip_output.c:535) __tcp_transmit_skb (net/ipv4/tcp_output.c:1462) tcp_rcv_synsent_state_process (net/ipv4/tcp_input.c:6469) tcp_rcv_state_process (net/ipv4/tcp_input.c:6657) tcp_v4_do_rcv (net/ipv4/tcp_ipv4.c:1929) __release_sock (./include/net/sock.h:1121 net/core/sock.c:2968) release_sock (net/core/sock.c:3536) inet_wait_for_connect (net/ipv4/af_inet.c:609) __inet_stream_connect (net/ipv4/af_inet.c:702) inet_stream_connect (net/ipv4/af_inet.c:748) __sys_connect (./include/linux/file.h:45 net/socket.c:2064) __x64_sys_connect (net/socket.c:2073 net/socket.c:2070 net/socket.c:2070) do_syscall_64 (arch/x86/entry/common.c:51 arch/x86/entry/common.c:82) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) RIP: 0033:0x7fa10ff05a3d Code: 5b 41 5c c3 66 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ab a3 0e 00 f7 d8 64 89 01 48 RSP: 002b:00007fa110183de8 EFLAGS: 00000202 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 0000000020000054 RCX: 00007fa10ff05a3d RDX: 000000000000001c RSI: 0000000020000040 RDI: 0000000000000003 RBP: 00007fa110183e20 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000202 R12: 00007fa110184640 R13: 0000000000000000 R14: 00007fa10fe8b060 R15: 00007fff73e23b20 The issue triggering process is analyzed as follows: Thread A Thread B tcp_v4_rcv //receive ack TCP packet inet_shutdown tcp_check_req tcp_disconnect //disconnect sock ... tcp_set_state(sk, TCP_CLOSE) inet_csk_complete_hashdance ... inet_csk_reqsk_queue_add inet_listen //start listen spin_lock(&queue->rskq_lock) inet_csk_listen_start ... reqsk_queue_alloc ... spin_lock_init spin_unlock(&queue->rskq_lock) //warning When the socket receives the ACK packet during the three-way handshake, it will hold spinlock. And then the user actively shutdowns the socket and listens to the socket immediately, the spinlock will be initialized. When the socket is going to release the spinlock, a warning is generated. Also the same issue to fastopenq.lock. Move init spinlock to inet_create and inet_accept to make sure init the accept_queue's spinlocks once. Fixes: fff1f3001cc5 ("tcp: add a spinlock to protect struct request_sock_queue") Fixes: 168a8f58059a ("tcp: TCP Fast Open Server - main code path") Reported-by: Ming Shu Signed-off-by: Zhengchao Shao Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240118012019.1751966-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- include/net/inet_connection_sock.h | 8 ++++++++ net/core/request_sock.c | 3 --- net/ipv4/af_inet.c | 3 +++ net/ipv4/inet_connection_sock.c | 4 ++++ 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index d0a2f827d5f2..9ab4bf704e86 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -357,4 +357,12 @@ static inline bool inet_csk_has_ulp(const struct sock *sk) return inet_test_bit(IS_ICSK, sk) && !!inet_csk(sk)->icsk_ulp_ops; } +static inline void inet_init_csk_locks(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + spin_lock_init(&icsk->icsk_accept_queue.rskq_lock); + spin_lock_init(&icsk->icsk_accept_queue.fastopenq.lock); +} + #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/net/core/request_sock.c b/net/core/request_sock.c index f35c2e998406..63de5c635842 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -33,9 +33,6 @@ void reqsk_queue_alloc(struct request_sock_queue *queue) { - spin_lock_init(&queue->rskq_lock); - - spin_lock_init(&queue->fastopenq.lock); queue->fastopenq.rskq_rst_head = NULL; queue->fastopenq.rskq_rst_tail = NULL; queue->fastopenq.qlen = 0; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 835f4f9d98d2..4e635dd3d3c8 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -330,6 +330,9 @@ lookup_protocol: if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = SK_CAN_REUSE; + if (INET_PROTOSW_ICSK & answer_flags) + inet_init_csk_locks(sk); + inet = inet_sk(sk); inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8e2eb1793685..459af1f89739 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -727,6 +727,10 @@ out: } if (req) reqsk_put(req); + + if (newsk) + inet_init_csk_locks(newsk); + return newsk; out_err: newsk = NULL; From 3c1069fa42872f95cf3c6fedf80723d391e12d57 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 17 Jan 2024 15:45:11 -0800 Subject: [PATCH 076/768] bnxt_en: Wait for FLR to complete during probe The first message to firmware may fail if the device is undergoing FLR. The driver has some recovery logic for this failure scenario but we must wait 100 msec for FLR to complete before proceeding. Otherwise the recovery will always fail. Fixes: ba02629ff6cb ("bnxt_en: log firmware status on firmware init failure") Reviewed-by: Damodharam Ammepalli Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20240117234515.226944-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0aacd3c6ed5c..0866aba35d9b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -13232,6 +13232,11 @@ static int bnxt_fw_init_one_p1(struct bnxt *bp) bp->fw_cap = 0; rc = bnxt_hwrm_ver_get(bp); + /* FW may be unresponsive after FLR. FLR must complete within 100 msec + * so wait before continuing with recovery. + */ + if (rc) + msleep(100); bnxt_try_map_fw_health_reg(bp); if (rc) { rc = bnxt_try_recover_fw(bp); From 2ad8e57338ac7b1e149d458669a95132e2460096 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 17 Jan 2024 15:45:12 -0800 Subject: [PATCH 077/768] bnxt_en: Fix memory leak in bnxt_hwrm_get_rings() bnxt_hwrm_get_rings() can abort and return error when there are not enough ring resources. It aborts without releasing the HWRM DMA buffer, causing a dma_pool_destroy warning when the driver is unloaded: bnxt_en 0000:99:00.0: dma_pool_destroy bnxt_hwrm, 000000005b089ba8 busy Fixes: f1e50b276d37 ("bnxt_en: Fix trimming of P5 RX and TX rings") Reviewed-by: Somnath Kotur Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20240117234515.226944-3-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0866aba35d9b..9fdc90bfce38 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6926,7 +6926,7 @@ static int bnxt_hwrm_get_rings(struct bnxt *bp) if (cp < (rx + tx)) { rc = __bnxt_trim_rings(bp, &rx, &tx, cp, false); if (rc) - return rc; + goto get_rings_exit; if (bp->flags & BNXT_FLAG_AGG_RINGS) rx <<= 1; hw_resc->resv_rx_rings = rx; @@ -6938,8 +6938,9 @@ static int bnxt_hwrm_get_rings(struct bnxt *bp) hw_resc->resv_cp_rings = cp; hw_resc->resv_stat_ctxs = stats; } +get_rings_exit: hwrm_req_drop(bp, req); - return 0; + return rc; } int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings) From 523384a6aa095d3f3d9ee8b1a4e289d4311cd2d9 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 17 Jan 2024 15:45:13 -0800 Subject: [PATCH 078/768] bnxt_en: Fix RSS table entries calculation for P5_PLUS chips The existing formula used in the driver to calculate the number of RSS table entries is to round up the number of RX rings to the next integer multiples of 64 (e.g. 64, 128, 192, ..). This is incorrect. The valid values supported by the chip are 64, 128, 256, 512 only (power of 2 starting from 64). When the number of RX rings is greater than 128, the entry size will likely be wrong. Firmware will round down the invalid value (e.g. 192 rounded down to 128) provided by the driver, causing some RSS rings to not receive any packets. We already have an existing function bnxt_calc_nr_ring_pages() to do this calculation. Use it in bnxt_get_nr_rss_ctxs() to calculate the number of RSS contexts correctly for P5_PLUS chips. Reviewed-by: Andy Gospodarek Reviewed-by: Pavan Chebbi Fixes: 7b3af4f75b81 ("bnxt_en: Add RSS support for 57500 chips.") Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20240117234515.226944-4-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 17 ++++++++++++----- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 3 ++- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 9fdc90bfce38..3d090d4403df 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5935,8 +5935,12 @@ static u16 bnxt_get_max_rss_ring(struct bnxt *bp) int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings) { - if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) - return DIV_ROUND_UP(rx_rings, BNXT_RSS_TABLE_ENTRIES_P5); + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { + if (!rx_rings) + return 0; + return bnxt_calc_nr_ring_pages(rx_rings - 1, + BNXT_RSS_TABLE_ENTRIES_P5); + } if (BNXT_CHIP_TYPE_NITRO_A0(bp)) return 2; return 1; @@ -7001,10 +7005,11 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings, req->num_rx_rings = cpu_to_le16(rx_rings); if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { + u16 rss_ctx = bnxt_get_nr_rss_ctxs(bp, ring_grps); + req->num_cmpl_rings = cpu_to_le16(tx_rings + ring_grps); req->num_msix = cpu_to_le16(cp_rings); - req->num_rsscos_ctxs = - cpu_to_le16(DIV_ROUND_UP(ring_grps, 64)); + req->num_rsscos_ctxs = cpu_to_le16(rss_ctx); } else { req->num_cmpl_rings = cpu_to_le16(cp_rings); req->num_hw_ring_grps = cpu_to_le16(ring_grps); @@ -7051,8 +7056,10 @@ __bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings, req->num_tx_rings = cpu_to_le16(tx_rings); req->num_rx_rings = cpu_to_le16(rx_rings); if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { + u16 rss_ctx = bnxt_get_nr_rss_ctxs(bp, ring_grps); + req->num_cmpl_rings = cpu_to_le16(tx_rings + ring_grps); - req->num_rsscos_ctxs = cpu_to_le16(DIV_ROUND_UP(ring_grps, 64)); + req->num_rsscos_ctxs = cpu_to_le16(rss_ctx); } else { req->num_cmpl_rings = cpu_to_le16(cp_rings); req->num_hw_ring_grps = cpu_to_le16(ring_grps); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 27b983c0a8a9..1f6e0cd84f2e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1574,7 +1574,8 @@ u32 bnxt_get_rxfh_indir_size(struct net_device *dev) struct bnxt *bp = netdev_priv(dev); if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) - return ALIGN(bp->rx_nr_rings, BNXT_RSS_TABLE_ENTRIES_P5); + return bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) * + BNXT_RSS_TABLE_ENTRIES_P5; return HW_HASH_INDEX_SIZE; } From c20f482129a582455f02eb9a6dcb2a4215274599 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 17 Jan 2024 15:45:14 -0800 Subject: [PATCH 079/768] bnxt_en: Prevent kernel warning when running offline self test We call bnxt_half_open_nic() to setup the chip partially to run loopback tests. The rings and buffers are initialized normally so that we can transmit and receive packets in loopback mode. That means page pool buffers are allocated for the aggregation ring just like the normal case. NAPI is not needed because we are just polling for the loopback packets. When we're done with the loopback tests, we call bnxt_half_close_nic() to clean up. When freeing the page pools, we hit a WARN_ON() in page_pool_unlink_napi() because the NAPI state linked to the page pool is uninitialized. The simplest way to avoid this warning is just to initialize the NAPIs during half open and delete the NAPIs during half close. Trying to skip the page pool initialization or skip linking of NAPI during half open will be more complicated. This fix avoids this warning: WARNING: CPU: 4 PID: 46967 at net/core/page_pool.c:946 page_pool_unlink_napi+0x1f/0x30 CPU: 4 PID: 46967 Comm: ethtool Tainted: G S W 6.7.0-rc5+ #22 Hardware name: Dell Inc. PowerEdge R750/06V45N, BIOS 1.3.8 08/31/2021 RIP: 0010:page_pool_unlink_napi+0x1f/0x30 Code: 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 48 8b 47 18 48 85 c0 74 1b 48 8b 50 10 83 e2 01 74 08 8b 40 34 83 f8 ff 74 02 <0f> 0b 48 c7 47 18 00 00 00 00 c3 cc cc cc cc 66 90 90 90 90 90 90 RSP: 0018:ffa000003d0dfbe8 EFLAGS: 00010246 RAX: ff110003607ce640 RBX: ff110010baf5d000 RCX: 0000000000000008 RDX: 0000000000000000 RSI: ff110001e5e522c0 RDI: ff110010baf5d000 RBP: ff11000145539b40 R08: 0000000000000001 R09: ffffffffc063f641 R10: ff110001361eddb8 R11: 000000000040000f R12: 0000000000000001 R13: 000000000000001c R14: ff1100014553a080 R15: 0000000000003fc0 FS: 00007f9301c4f740(0000) GS:ff1100103fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f91344fa8f0 CR3: 00000003527cc005 CR4: 0000000000771ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? __warn+0x81/0x140 ? page_pool_unlink_napi+0x1f/0x30 ? report_bug+0x102/0x200 ? handle_bug+0x44/0x70 ? exc_invalid_op+0x13/0x60 ? asm_exc_invalid_op+0x16/0x20 ? bnxt_free_ring.isra.123+0xb1/0xd0 [bnxt_en] ? page_pool_unlink_napi+0x1f/0x30 page_pool_destroy+0x3e/0x150 bnxt_free_mem+0x441/0x5e0 [bnxt_en] bnxt_half_close_nic+0x2a/0x40 [bnxt_en] bnxt_self_test+0x21d/0x450 [bnxt_en] __dev_ethtool+0xeda/0x2e30 ? native_queued_spin_lock_slowpath+0x17f/0x2b0 ? __link_object+0xa1/0x160 ? _raw_spin_unlock_irqrestore+0x23/0x40 ? __create_object+0x5f/0x90 ? __kmem_cache_alloc_node+0x317/0x3c0 ? dev_ethtool+0x59/0x170 dev_ethtool+0xa7/0x170 dev_ioctl+0xc3/0x530 sock_do_ioctl+0xa8/0xf0 sock_ioctl+0x270/0x310 __x64_sys_ioctl+0x8c/0xc0 do_syscall_64+0x3e/0xf0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 Fixes: 294e39e0d034 ("bnxt: hook NAPIs to page pools") Reviewed-by: Andy Gospodarek Reviewed-by: Ajit Khaparde Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20240117234515.226944-5-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3d090d4403df..0f5004872a46 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11572,10 +11572,12 @@ int bnxt_half_open_nic(struct bnxt *bp) netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc); goto half_open_err; } + bnxt_init_napi(bp); set_bit(BNXT_STATE_HALF_OPEN, &bp->state); rc = bnxt_init_nic(bp, true); if (rc) { clear_bit(BNXT_STATE_HALF_OPEN, &bp->state); + bnxt_del_napi(bp); netdev_err(bp->dev, "bnxt_init_nic err: %x\n", rc); goto half_open_err; } @@ -11594,6 +11596,7 @@ half_open_err: void bnxt_half_close_nic(struct bnxt *bp) { bnxt_hwrm_resource_free(bp, false, true); + bnxt_del_napi(bp); bnxt_free_skbs(bp); bnxt_free_mem(bp, true); clear_bit(BNXT_STATE_HALF_OPEN, &bp->state); From 467739baf63646d4a5033f7f8a9306669ea55326 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 17 Jan 2024 15:45:15 -0800 Subject: [PATCH 080/768] bnxt_en: Fix possible crash after creating sw mqprio TCs The driver relies on netdev_get_num_tc() to get the number of HW offloaded mqprio TCs to allocate and free TX rings. This won't work and can potentially crash the system if software mqprio or taprio TCs have been setup. netdev_get_num_tc() will return the number of software TCs and it may cause the driver to allocate or free more TX rings that it should. Fix it by adding a bp->num_tc field to store the number of HW offload mqprio TCs for the device. Use bp->num_tc instead of netdev_get_num_tc(). This fixes a crash like this: BUG: kernel NULL pointer dereference, address: 0000000000000000 PGD 42b8404067 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 120 PID: 8661 Comm: ifconfig Kdump: loaded Tainted: G OE 5.18.16 #1 Hardware name: Lenovo ThinkSystem SR650 V3/SB27A92818, BIOS ESE114N-2.12 04/25/2023 RIP: 0010:bnxt_hwrm_cp_ring_alloc_p5+0x10/0x90 [bnxt_en] Code: 41 5c 41 5d 41 5e c3 cc cc cc cc 41 8b 44 24 08 66 89 03 eb c6 e8 b0 f1 7d db 0f 1f 44 00 00 41 56 41 55 41 54 55 48 89 fd 53 <48> 8b 06 48 89 f3 48 81 c6 28 01 00 00 0f b6 96 13 ff ff ff 44 8b RSP: 0018:ff65907660d1fa88 EFLAGS: 00010202 RAX: 0000000000000010 RBX: ff4dde1d907e4980 RCX: f400000000000000 RDX: 0000000000000010 RSI: 0000000000000000 RDI: ff4dde1d907e4980 RBP: ff4dde1d907e4980 R08: 000000000000000f R09: 0000000000000000 R10: ff4dde5f02671800 R11: 0000000000000008 R12: 0000000088888889 R13: 0500000000000000 R14: 00f0000000000000 R15: ff4dde5f02671800 FS: 00007f4b126b5740(0000) GS:ff4dde9bff600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000416f9c6002 CR4: 0000000000771ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: bnxt_hwrm_ring_alloc+0x204/0x770 [bnxt_en] bnxt_init_chip+0x4d/0x680 [bnxt_en] ? bnxt_poll+0x1a0/0x1a0 [bnxt_en] __bnxt_open_nic+0xd2/0x740 [bnxt_en] bnxt_open+0x10b/0x220 [bnxt_en] ? raw_notifier_call_chain+0x41/0x60 __dev_open+0xf3/0x1b0 __dev_change_flags+0x1db/0x250 dev_change_flags+0x21/0x60 devinet_ioctl+0x590/0x720 ? avc_has_extended_perms+0x1b7/0x420 ? _copy_from_user+0x3a/0x60 inet_ioctl+0x189/0x1c0 ? wp_page_copy+0x45a/0x6e0 sock_do_ioctl+0x42/0xf0 ? ioctl_has_perm.constprop.0.isra.0+0xbd/0x120 sock_ioctl+0x1ce/0x2e0 __x64_sys_ioctl+0x87/0xc0 do_syscall_64+0x59/0x90 ? syscall_exit_work+0x103/0x130 ? syscall_exit_to_user_mode+0x12/0x30 ? do_syscall_64+0x69/0x90 ? exc_page_fault+0x62/0x150 Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Reviewed-by: Damodharam Ammepalli Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20240117234515.226944-6-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 19 ++++++++++++------- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c | 2 +- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 4 ++-- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 2 +- 5 files changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0f5004872a46..39845d556baf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3817,7 +3817,7 @@ static int bnxt_alloc_cp_rings(struct bnxt *bp) { bool sh = !!(bp->flags & BNXT_FLAG_SHARED_RINGS); int i, j, rc, ulp_base_vec, ulp_msix; - int tcs = netdev_get_num_tc(bp->dev); + int tcs = bp->num_tc; if (!tcs) tcs = 1; @@ -9946,7 +9946,7 @@ static int __bnxt_num_tx_to_cp(struct bnxt *bp, int tx, int tx_sets, int tx_xdp) int bnxt_num_tx_to_cp(struct bnxt *bp, int tx) { - int tcs = netdev_get_num_tc(bp->dev); + int tcs = bp->num_tc; if (!tcs) tcs = 1; @@ -9955,7 +9955,7 @@ int bnxt_num_tx_to_cp(struct bnxt *bp, int tx) static int bnxt_num_cp_to_tx(struct bnxt *bp, int tx_cp) { - int tcs = netdev_get_num_tc(bp->dev); + int tcs = bp->num_tc; return (tx_cp - bp->tx_nr_rings_xdp) * tcs + bp->tx_nr_rings_xdp; @@ -9985,7 +9985,7 @@ static void bnxt_setup_msix(struct bnxt *bp) struct net_device *dev = bp->dev; int tcs, i; - tcs = netdev_get_num_tc(dev); + tcs = bp->num_tc; if (tcs) { int i, off, count; @@ -10017,8 +10017,10 @@ static void bnxt_setup_inta(struct bnxt *bp) { const int len = sizeof(bp->irq_tbl[0].name); - if (netdev_get_num_tc(bp->dev)) + if (bp->num_tc) { netdev_reset_tc(bp->dev); + bp->num_tc = 0; + } snprintf(bp->irq_tbl[0].name, len, "%s-%s-%d", bp->dev->name, "TxRx", 0); @@ -10244,8 +10246,8 @@ static void bnxt_clear_int_mode(struct bnxt *bp) int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init) { - int tcs = netdev_get_num_tc(bp->dev); bool irq_cleared = false; + int tcs = bp->num_tc; int rc; if (!bnxt_need_reserve_rings(bp)) @@ -10271,6 +10273,7 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init) bp->tx_nr_rings - bp->tx_nr_rings_xdp)) { netdev_err(bp->dev, "tx ring reservation failure\n"); netdev_reset_tc(bp->dev); + bp->num_tc = 0; if (bp->tx_nr_rings_xdp) bp->tx_nr_rings_per_tc = bp->tx_nr_rings_xdp; else @@ -13800,7 +13803,7 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc) return -EINVAL; } - if (netdev_get_num_tc(dev) == tc) + if (bp->num_tc == tc) return 0; if (bp->flags & BNXT_FLAG_SHARED_RINGS) @@ -13818,9 +13821,11 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc) if (tc) { bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tc; netdev_set_num_tc(dev, tc); + bp->num_tc = tc; } else { bp->tx_nr_rings = bp->tx_nr_rings_per_tc; netdev_reset_tc(dev); + bp->num_tc = 0; } bp->tx_nr_rings += bp->tx_nr_rings_xdp; tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index b8ef1717cb65..47338b48ca20 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2225,6 +2225,7 @@ struct bnxt { u8 tc_to_qidx[BNXT_MAX_QUEUE]; u8 q_ids[BNXT_MAX_QUEUE]; u8 max_q; + u8 num_tc; unsigned int current_interval; #define BNXT_TIMER_INTERVAL HZ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c index 63e067038385..0dbb880a7aa0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c @@ -228,7 +228,7 @@ static int bnxt_queue_remap(struct bnxt *bp, unsigned int lltc_mask) } } if (bp->ieee_ets) { - int tc = netdev_get_num_tc(bp->dev); + int tc = bp->num_tc; if (!tc) tc = 1; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 1f6e0cd84f2e..dc4ca706b0e2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -884,7 +884,7 @@ static void bnxt_get_channels(struct net_device *dev, if (max_tx_sch_inputs) max_tx_rings = min_t(int, max_tx_rings, max_tx_sch_inputs); - tcs = netdev_get_num_tc(dev); + tcs = bp->num_tc; tx_grps = max(tcs, 1); if (bp->tx_nr_rings_xdp) tx_grps++; @@ -944,7 +944,7 @@ static int bnxt_set_channels(struct net_device *dev, if (channel->combined_count) sh = true; - tcs = netdev_get_num_tc(dev); + tcs = bp->num_tc; req_tx_rings = sh ? channel->combined_count : channel->tx_count; req_rx_rings = sh ? channel->combined_count : channel->rx_count; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index c2b25fc623ec..4079538bc310 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -407,7 +407,7 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) if (prog) tx_xdp = bp->rx_nr_rings; - tc = netdev_get_num_tc(dev); + tc = bp->num_tc; if (!tc) tc = 1; rc = bnxt_check_rings(bp, bp->tx_nr_rings_per_tc, bp->rx_nr_rings, From 6c21660fe221a15c789dee2bc2fd95516bc5aeaf Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Thu, 18 Jan 2024 21:03:06 +0800 Subject: [PATCH 081/768] vlan: skip nested type that is not IFLA_VLAN_QOS_MAPPING In the vlan_changelink function, a loop is used to parse the nested attributes IFLA_VLAN_EGRESS_QOS and IFLA_VLAN_INGRESS_QOS in order to obtain the struct ifla_vlan_qos_mapping. These two nested attributes are checked in the vlan_validate_qos_map function, which calls nla_validate_nested_deprecated with the vlan_map_policy. However, this deprecated validator applies a LIBERAL strictness, allowing the presence of an attribute with the type IFLA_VLAN_QOS_UNSPEC. Consequently, the loop in vlan_changelink may parse an attribute of type IFLA_VLAN_QOS_UNSPEC and believe it carries a payload of struct ifla_vlan_qos_mapping, which is not necessarily true. To address this issue and ensure compatibility, this patch introduces two type checks that skip attributes whose type is not IFLA_VLAN_QOS_MAPPING. Fixes: 07b5b17e157b ("[VLAN]: Use rtnl_link API") Signed-off-by: Lin Ma Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240118130306.1644001-1-linma@zju.edu.cn Signed-off-by: Jakub Kicinski --- net/8021q/vlan_netlink.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 214532173536..a3b68243fd4b 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -118,12 +118,16 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], } if (data[IFLA_VLAN_INGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) { + if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING) + continue; m = nla_data(attr); vlan_dev_set_ingress_priority(dev, m->to, m->from); } } if (data[IFLA_VLAN_EGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) { + if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING) + continue; m = nla_data(attr); err = vlan_dev_set_egress_priority(dev, m->from, m->to); if (err) From dad555c816a50c6a6a8a86be1f9177673918c647 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Jan 2024 18:36:25 +0000 Subject: [PATCH 082/768] llc: make llc_ui_sendmsg() more robust against bonding changes syzbot was able to trick llc_ui_sendmsg(), allocating an skb with no headroom, but subsequently trying to push 14 bytes of Ethernet header [1] Like some others, llc_ui_sendmsg() releases the socket lock before calling sock_alloc_send_skb(). Then it acquires it again, but does not redo all the sanity checks that were performed. This fix: - Uses LL_RESERVED_SPACE() to reserve space. - Check all conditions again after socket lock is held again. - Do not account Ethernet header for mtu limitation. [1] skbuff: skb_under_panic: text:ffff800088baa334 len:1514 put:14 head:ffff0000c9c37000 data:ffff0000c9c36ff2 tail:0x5dc end:0x6c0 dev:bond0 kernel BUG at net/core/skbuff.c:193 ! Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 6875 Comm: syz-executor.0 Not tainted 6.7.0-rc8-syzkaller-00101-g0802e17d9aca-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023 pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : skb_panic net/core/skbuff.c:189 [inline] pc : skb_under_panic+0x13c/0x140 net/core/skbuff.c:203 lr : skb_panic net/core/skbuff.c:189 [inline] lr : skb_under_panic+0x13c/0x140 net/core/skbuff.c:203 sp : ffff800096f97000 x29: ffff800096f97010 x28: ffff80008cc8d668 x27: dfff800000000000 x26: ffff0000cb970c90 x25: 00000000000005dc x24: ffff0000c9c36ff2 x23: ffff0000c9c37000 x22: 00000000000005ea x21: 00000000000006c0 x20: 000000000000000e x19: ffff800088baa334 x18: 1fffe000368261ce x17: ffff80008e4ed000 x16: ffff80008a8310f8 x15: 0000000000000001 x14: 1ffff00012df2d58 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000001 x10: 0000000000ff0100 x9 : e28a51f1087e8400 x8 : e28a51f1087e8400 x7 : ffff80008028f8d0 x6 : 0000000000000000 x5 : 0000000000000001 x4 : 0000000000000001 x3 : ffff800082b78714 x2 : 0000000000000001 x1 : 0000000100000000 x0 : 0000000000000089 Call trace: skb_panic net/core/skbuff.c:189 [inline] skb_under_panic+0x13c/0x140 net/core/skbuff.c:203 skb_push+0xf0/0x108 net/core/skbuff.c:2451 eth_header+0x44/0x1f8 net/ethernet/eth.c:83 dev_hard_header include/linux/netdevice.h:3188 [inline] llc_mac_hdr_init+0x110/0x17c net/llc/llc_output.c:33 llc_sap_action_send_xid_c+0x170/0x344 net/llc/llc_s_ac.c:85 llc_exec_sap_trans_actions net/llc/llc_sap.c:153 [inline] llc_sap_next_state net/llc/llc_sap.c:182 [inline] llc_sap_state_process+0x1ec/0x774 net/llc/llc_sap.c:209 llc_build_and_send_xid_pkt+0x12c/0x1c0 net/llc/llc_sap.c:270 llc_ui_sendmsg+0x7bc/0xb1c net/llc/af_llc.c:997 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] sock_sendmsg+0x194/0x274 net/socket.c:767 splice_to_socket+0x7cc/0xd58 fs/splice.c:881 do_splice_from fs/splice.c:933 [inline] direct_splice_actor+0xe4/0x1c0 fs/splice.c:1142 splice_direct_to_actor+0x2a0/0x7e4 fs/splice.c:1088 do_splice_direct+0x20c/0x348 fs/splice.c:1194 do_sendfile+0x4bc/0xc70 fs/read_write.c:1254 __do_sys_sendfile64 fs/read_write.c:1322 [inline] __se_sys_sendfile64 fs/read_write.c:1308 [inline] __arm64_sys_sendfile64+0x160/0x3b4 fs/read_write.c:1308 __invoke_syscall arch/arm64/kernel/syscall.c:37 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:51 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:136 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:155 el0_svc+0x54/0x158 arch/arm64/kernel/entry-common.c:678 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:696 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:595 Code: aa1803e6 aa1903e7 a90023f5 94792f6a (d4210000) Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-and-tested-by: syzbot+2a7024e9502df538e8ef@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240118183625.4007013-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/llc/af_llc.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 9b06c380866b..20551cfb7da6 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -928,14 +928,15 @@ copy_uaddr: */ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { + DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name); struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); - DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name); int flags = msg->msg_flags; int noblock = flags & MSG_DONTWAIT; + int rc = -EINVAL, copied = 0, hdrlen, hh_len; struct sk_buff *skb = NULL; + struct net_device *dev; size_t size = 0; - int rc = -EINVAL, copied = 0, hdrlen; dprintk("%s: sending from %02X to %02X\n", __func__, llc->laddr.lsap, llc->daddr.lsap); @@ -955,22 +956,29 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (rc) goto out; } - hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr); + dev = llc->dev; + hh_len = LL_RESERVED_SPACE(dev); + hdrlen = llc_ui_header_len(sk, addr); size = hdrlen + len; - if (size > llc->dev->mtu) - size = llc->dev->mtu; + size = min_t(size_t, size, READ_ONCE(dev->mtu)); copied = size - hdrlen; rc = -EINVAL; if (copied < 0) goto out; release_sock(sk); - skb = sock_alloc_send_skb(sk, size, noblock, &rc); + skb = sock_alloc_send_skb(sk, hh_len + size, noblock, &rc); lock_sock(sk); if (!skb) goto out; - skb->dev = llc->dev; + if (sock_flag(sk, SOCK_ZAPPED) || + llc->dev != dev || + hdrlen != llc_ui_header_len(sk, addr) || + hh_len != LL_RESERVED_SPACE(dev) || + size > READ_ONCE(dev->mtu)) + goto out; + skb->dev = dev; skb->protocol = llc_proto_type(addr->sllc_arphrd); - skb_reserve(skb, hdrlen); + skb_reserve(skb, hh_len + hdrlen); rc = memcpy_from_msg(skb_put(skb, copied), msg, copied); if (rc) goto out; From e3f9bed9bee261e3347131764e42aeedf1ffea61 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 18 Jan 2024 17:55:15 -0800 Subject: [PATCH 083/768] llc: Drop support for ETH_P_TR_802_2. syzbot reported an uninit-value bug below. [0] llc supports ETH_P_802_2 (0x0004) and used to support ETH_P_TR_802_2 (0x0011), and syzbot abused the latter to trigger the bug. write$tun(r0, &(0x7f0000000040)={@val={0x0, 0x11}, @val, @mpls={[], @llc={@snap={0xaa, 0x1, ')', "90e5dd"}}}}, 0x16) llc_conn_handler() initialises local variables {saddr,daddr}.mac based on skb in llc_pdu_decode_sa()/llc_pdu_decode_da() and passes them to __llc_lookup(). However, the initialisation is done only when skb->protocol is htons(ETH_P_802_2), otherwise, __llc_lookup_established() and __llc_lookup_listener() will read garbage. The missing initialisation existed prior to commit 211ed865108e ("net: delete all instances of special processing for token ring"). It removed the part to kick out the token ring stuff but forgot to close the door allowing ETH_P_TR_802_2 packets to sneak into llc_rcv(). Let's remove llc_tr_packet_type and complete the deprecation. [0]: BUG: KMSAN: uninit-value in __llc_lookup_established+0xe9d/0xf90 __llc_lookup_established+0xe9d/0xf90 __llc_lookup net/llc/llc_conn.c:611 [inline] llc_conn_handler+0x4bd/0x1360 net/llc/llc_conn.c:791 llc_rcv+0xfbb/0x14a0 net/llc/llc_input.c:206 __netif_receive_skb_one_core net/core/dev.c:5527 [inline] __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5641 netif_receive_skb_internal net/core/dev.c:5727 [inline] netif_receive_skb+0x58/0x660 net/core/dev.c:5786 tun_rx_batched+0x3ee/0x980 drivers/net/tun.c:1555 tun_get_user+0x53af/0x66d0 drivers/net/tun.c:2002 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:2020 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x8ef/0x1490 fs/read_write.c:584 ksys_write+0x20f/0x4c0 fs/read_write.c:637 __do_sys_write fs/read_write.c:649 [inline] __se_sys_write fs/read_write.c:646 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:646 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Local variable daddr created at: llc_conn_handler+0x53/0x1360 net/llc/llc_conn.c:783 llc_rcv+0xfbb/0x14a0 net/llc/llc_input.c:206 CPU: 1 PID: 5004 Comm: syz-executor994 Not tainted 6.6.0-syzkaller-14500-g1c41041124bd #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023 Fixes: 211ed865108e ("net: delete all instances of special processing for token ring") Reported-by: syzbot+b5ad66046b913bc04c6f@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=b5ad66046b913bc04c6f Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240119015515.61898-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/llc_pdu.h | 6 ++---- net/llc/llc_core.c | 7 ------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index 7e73f8e5e497..1d55ba7c45be 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -262,8 +262,7 @@ static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type, */ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa) { - if (skb->protocol == htons(ETH_P_802_2)) - memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN); + memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN); } /** @@ -275,8 +274,7 @@ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa) */ static inline void llc_pdu_decode_da(struct sk_buff *skb, u8 *da) { - if (skb->protocol == htons(ETH_P_802_2)) - memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN); + memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN); } /** diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 6e387aadffce..4f16d9c88350 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -135,22 +135,15 @@ static struct packet_type llc_packet_type __read_mostly = { .func = llc_rcv, }; -static struct packet_type llc_tr_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_TR_802_2), - .func = llc_rcv, -}; - static int __init llc_init(void) { dev_add_pack(&llc_packet_type); - dev_add_pack(&llc_tr_packet_type); return 0; } static void __exit llc_exit(void) { dev_remove_pack(&llc_packet_type); - dev_remove_pack(&llc_tr_packet_type); } module_init(llc_init); From fead90507a37e73d41f6059b325b34412ed8d84b Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 5 Jan 2024 10:06:01 +0800 Subject: [PATCH 084/768] fbdev: vt8500lcdfb: Remove unnecessary print function dev_err() The print function dev_err() is redundant because platform_get_irq() already prints an error. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=7824 Signed-off-by: Jiapeng Chong Signed-off-by: Helge Deller --- drivers/video/fbdev/vt8500lcdfb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/video/fbdev/vt8500lcdfb.c b/drivers/video/fbdev/vt8500lcdfb.c index 42c25dc85197..ac73937073a7 100644 --- a/drivers/video/fbdev/vt8500lcdfb.c +++ b/drivers/video/fbdev/vt8500lcdfb.c @@ -374,7 +374,6 @@ static int vt8500lcd_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) { - dev_err(&pdev->dev, "no IRQ defined\n"); ret = -ENODEV; goto failed_free_palette; } From 04e5eac8f3ab2ff52fa191c187a46d4fdbc1e288 Mon Sep 17 00:00:00 2001 From: Fullway Wang Date: Thu, 18 Jan 2024 11:49:40 +0800 Subject: [PATCH 085/768] fbdev: savage: Error out if pixclock equals zero The userspace program could pass any values to the driver through ioctl() interface. If the driver doesn't check the value of pixclock, it may cause divide-by-zero error. Although pixclock is checked in savagefb_decode_var(), but it is not checked properly in savagefb_probe(). Fix this by checking whether pixclock is zero in the function savagefb_check_var() before info->var.pixclock is used as the divisor. This is similar to CVE-2022-3061 in i740fb which was fixed by commit 15cf0b8. Signed-off-by: Fullway Wang Signed-off-by: Helge Deller --- drivers/video/fbdev/savage/savagefb_driver.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/fbdev/savage/savagefb_driver.c b/drivers/video/fbdev/savage/savagefb_driver.c index dddd6afcb972..ebc9aeffdde7 100644 --- a/drivers/video/fbdev/savage/savagefb_driver.c +++ b/drivers/video/fbdev/savage/savagefb_driver.c @@ -869,6 +869,9 @@ static int savagefb_check_var(struct fb_var_screeninfo *var, DBG("savagefb_check_var"); + if (!var->pixclock) + return -EINVAL; + var->transp.offset = 0; var->transp.length = 0; switch (var->bits_per_pixel) { From e421946be7d9bf545147bea8419ef8239cb7ca52 Mon Sep 17 00:00:00 2001 From: Fullway Wang Date: Thu, 18 Jan 2024 14:24:43 +0800 Subject: [PATCH 086/768] fbdev: sis: Error out if pixclock equals zero The userspace program could pass any values to the driver through ioctl() interface. If the driver doesn't check the value of pixclock, it may cause divide-by-zero error. In sisfb_check_var(), var->pixclock is used as a divisor to caculate drate before it is checked against zero. Fix this by checking it at the beginning. This is similar to CVE-2022-3061 in i740fb which was fixed by commit 15cf0b8. Signed-off-by: Fullway Wang Signed-off-by: Helge Deller --- drivers/video/fbdev/sis/sis_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/video/fbdev/sis/sis_main.c b/drivers/video/fbdev/sis/sis_main.c index 803ccb6aa479..009bf1d92644 100644 --- a/drivers/video/fbdev/sis/sis_main.c +++ b/drivers/video/fbdev/sis/sis_main.c @@ -1444,6 +1444,8 @@ sisfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) vtotal = var->upper_margin + var->lower_margin + var->vsync_len; + if (!var->pixclock) + return -EINVAL; pixclock = var->pixclock; if((var->vmode & FB_VMODE_MASK) == FB_VMODE_NONINTERLACED) { From a54d51fb2dfb846aedf3751af501e9688db447f5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Jan 2024 20:17:49 +0000 Subject: [PATCH 087/768] udp: fix busy polling Generic sk_busy_loop_end() only looks at sk->sk_receive_queue for presence of packets. Problem is that for UDP sockets after blamed commit, some packets could be present in another queue: udp_sk(sk)->reader_queue In some cases, a busy poller could spin until timeout expiration, even if some packets are available in udp_sk(sk)->reader_queue. v3: - make sk_busy_loop_end() nicer (Willem) v2: - add a READ_ONCE(sk->sk_family) in sk_is_inet() to avoid KCSAN splats. - add a sk_is_inet() check in sk_is_udp() (Willem feedback) - add a sk_is_inet() check in sk_is_tcp(). Fixes: 2276f58ac589 ("udp: use a separate rx queue for packet reception") Signed-off-by: Eric Dumazet Reviewed-by: Paolo Abeni Reviewed-by: Willem de Bruijn Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/linux/skmsg.h | 6 ------ include/net/inet_sock.h | 5 ----- include/net/sock.h | 18 +++++++++++++++++- net/core/sock.c | 11 +++++++++-- 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 888a4b217829..e65ec3fd2799 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -505,12 +505,6 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) return !!psock->saved_data_ready; } -static inline bool sk_is_udp(const struct sock *sk) -{ - return sk->sk_type == SOCK_DGRAM && - sk->sk_protocol == IPPROTO_UDP; -} - #if IS_ENABLED(CONFIG_NET_SOCK_MSG) #define BPF_F_STRPARSER (1UL << 1) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index aa86453f6b9b..d94c242eb3ed 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -307,11 +307,6 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet) #define inet_assign_bit(nr, sk, val) \ assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val) -static inline bool sk_is_inet(struct sock *sk) -{ - return sk->sk_family == AF_INET || sk->sk_family == AF_INET6; -} - /** * sk_to_full_sk - Access to a full socket * @sk: pointer to a socket diff --git a/include/net/sock.h b/include/net/sock.h index a7f815c7cfdf..54ca8dcbfb43 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2765,9 +2765,25 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) &skb_shinfo(skb)->tskey); } +static inline bool sk_is_inet(const struct sock *sk) +{ + int family = READ_ONCE(sk->sk_family); + + return family == AF_INET || family == AF_INET6; +} + static inline bool sk_is_tcp(const struct sock *sk) { - return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP; + return sk_is_inet(sk) && + sk->sk_type == SOCK_STREAM && + sk->sk_protocol == IPPROTO_TCP; +} + +static inline bool sk_is_udp(const struct sock *sk) +{ + return sk_is_inet(sk) && + sk->sk_type == SOCK_DGRAM && + sk->sk_protocol == IPPROTO_UDP; } static inline bool sk_is_stream_unix(const struct sock *sk) diff --git a/net/core/sock.c b/net/core/sock.c index 158dbdebce6a..0a7f46c37f0c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -107,6 +107,7 @@ #include #include #include +#include #include #include #include @@ -4144,8 +4145,14 @@ bool sk_busy_loop_end(void *p, unsigned long start_time) { struct sock *sk = p; - return !skb_queue_empty_lockless(&sk->sk_receive_queue) || - sk_busy_loop_timeout(sk, start_time); + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) + return true; + + if (sk_is_udp(sk) && + !skb_queue_empty_lockless(&udp_sk(sk)->reader_queue)) + return true; + + return sk_busy_loop_timeout(sk, start_time); } EXPORT_SYMBOL(sk_busy_loop_end); #endif /* CONFIG_NET_RX_BUSY_POLL */ From 359724fa3ab79fbe9f42c6263cddc2afae32eef3 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Thu, 18 Jan 2024 21:50:40 +0100 Subject: [PATCH 088/768] idpf: distinguish vports by the dev_port attribute idpf registers multiple netdevs (virtual ports) for one PCI function, but it does not provide a way for userspace to distinguish them with sysfs attributes. Per Documentation/ABI/testing/sysfs-class-net, it is a bug not to set dev_port for independent ports on the same PCI bus, device and function. Without dev_port set, systemd-udevd's default naming policy attempts to assign the same name ("ens2f0") to all four idpf netdevs on my test system and obviously fails, leaving three of them with the initial eth name. With this patch, systemd-udevd is able to assign unique names to the netdevs (e.g. "ens2f0", "ens2f0d1", "ens2f0d2", "ens2f0d3"). The Intel-provided out-of-tree idpf driver already sets dev_port. In this patch I chose to do it in the same place in the idpf_cfg_netdev function. Fixes: 0fe45467a104 ("idpf: add create vport and netdev configuration") Signed-off-by: Michal Schmidt Reviewed-by: Jesse Brandeburg Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 5fea2fd957eb..58179bd733ff 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -783,6 +783,8 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) /* setup watchdog timeout value to be 5 second */ netdev->watchdog_timeo = 5 * HZ; + netdev->dev_port = idx; + /* configure default MTU size */ netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = vport->max_mtu; From 72b0cbf6b81003c01d63c60180b335f7692d170e Mon Sep 17 00:00:00 2001 From: Yang Li Date: Fri, 19 Jan 2024 17:57:07 +0800 Subject: [PATCH 089/768] smb: Fix some kernel-doc comments Fix some kernel-doc comments to silence the warnings: fs/smb/server/transport_tcp.c:374: warning: Function parameter or struct member 'max_retries' not described in 'ksmbd_tcp_read' fs/smb/server/transport_tcp.c:423: warning: Function parameter or struct member 'iface' not described in 'create_socket' Signed-off-by: Yang Li Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_tcp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index 9d4222154dcc..002a3f0dc7c5 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -365,6 +365,7 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, * @t: TCP transport instance * @buf: buffer to store read data from socket * @to_read: number of bytes to read from socket + * @max_retries: number of retries if reading from socket fails * * Return: on success return number of bytes read from socket, * otherwise return error number @@ -416,6 +417,7 @@ static void tcp_destroy_socket(struct socket *ksmbd_socket) /** * create_socket - create socket for ksmbd/0 + * @iface: interface to bind the created socket to * * Return: 0 on success, error number otherwise */ From cd30e8bde28ac361e15d67ee5c00e0125ed42548 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 15 Jan 2024 21:37:47 +0100 Subject: [PATCH 090/768] rbd: remove usage of the deprecated ida_simple_*() API ida_alloc() and ida_free() should be preferred to the deprecated ida_simple_get() and ida_simple_remove(). Note that the upper limit of ida_simple_get() is exclusive, while that of ida_alloc_max() is inclusive, so 1 has been subtracted. [ idryomov: tweak changelog ] Signed-off-by: Christophe JAILLET Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index a999b698b131..63897d0d6629 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -5326,7 +5326,7 @@ static void rbd_dev_release(struct device *dev) if (need_put) { destroy_workqueue(rbd_dev->task_wq); - ida_simple_remove(&rbd_dev_id_ida, rbd_dev->dev_id); + ida_free(&rbd_dev_id_ida, rbd_dev->dev_id); } rbd_dev_free(rbd_dev); @@ -5402,9 +5402,9 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, return NULL; /* get an id and fill in device name */ - rbd_dev->dev_id = ida_simple_get(&rbd_dev_id_ida, 0, - minor_to_rbd_dev_id(1 << MINORBITS), - GFP_KERNEL); + rbd_dev->dev_id = ida_alloc_max(&rbd_dev_id_ida, + minor_to_rbd_dev_id(1 << MINORBITS) - 1, + GFP_KERNEL); if (rbd_dev->dev_id < 0) goto fail_rbd_dev; @@ -5425,7 +5425,7 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, return rbd_dev; fail_dev_id: - ida_simple_remove(&rbd_dev_id_ida, rbd_dev->dev_id); + ida_free(&rbd_dev_id_ida, rbd_dev->dev_id); fail_rbd_dev: rbd_dev_free(rbd_dev); return NULL; From ded080c86b3f99683774af0441a58fc2e3d60cae Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 17 Jan 2024 18:59:44 +0100 Subject: [PATCH 091/768] rbd: don't move requests to the running list on errors The running list is supposed to contain requests that are pinning the exclusive lock, i.e. those that must be flushed before exclusive lock is released. When wake_lock_waiters() is called to handle an error, requests on the acquiring list are failed with that error and no flushing takes place. Briefly moving them to the running list is not only pointless but also harmful: if exclusive lock gets acquired before all of their state machines are scheduled and go through rbd_lock_del_request(), we trigger rbd_assert(list_empty(&rbd_dev->running_list)); in rbd_try_acquire_lock(). Cc: stable@vger.kernel.org Fixes: 637cd060537d ("rbd: new exclusive lock wait/wake code") Signed-off-by: Ilya Dryomov Reviewed-by: Dongsheng Yang --- drivers/block/rbd.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 63897d0d6629..12b5d53ec856 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3452,14 +3452,15 @@ static bool rbd_lock_add_request(struct rbd_img_request *img_req) static void rbd_lock_del_request(struct rbd_img_request *img_req) { struct rbd_device *rbd_dev = img_req->rbd_dev; - bool need_wakeup; + bool need_wakeup = false; lockdep_assert_held(&rbd_dev->lock_rwsem); spin_lock(&rbd_dev->lock_lists_lock); - rbd_assert(!list_empty(&img_req->lock_item)); - list_del_init(&img_req->lock_item); - need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING && - list_empty(&rbd_dev->running_list)); + if (!list_empty(&img_req->lock_item)) { + list_del_init(&img_req->lock_item); + need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING && + list_empty(&rbd_dev->running_list)); + } spin_unlock(&rbd_dev->lock_lists_lock); if (need_wakeup) complete(&rbd_dev->releasing_wait); @@ -3842,14 +3843,19 @@ static void wake_lock_waiters(struct rbd_device *rbd_dev, int result) return; } - list_for_each_entry(img_req, &rbd_dev->acquiring_list, lock_item) { + while (!list_empty(&rbd_dev->acquiring_list)) { + img_req = list_first_entry(&rbd_dev->acquiring_list, + struct rbd_img_request, lock_item); mutex_lock(&img_req->state_mutex); rbd_assert(img_req->state == RBD_IMG_EXCLUSIVE_LOCK); + if (!result) + list_move_tail(&img_req->lock_item, + &rbd_dev->running_list); + else + list_del_init(&img_req->lock_item); rbd_img_schedule(img_req, result); mutex_unlock(&img_req->state_mutex); } - - list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list); } static bool locker_equal(const struct ceph_locker *lhs, From 113a61863ecbfb3c29f3eb18fd9813bccf1743c1 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 31 Oct 2023 17:50:11 -0600 Subject: [PATCH 092/768] Makefile: Enable -Wstringop-overflow globally It seems that we have finished addressing all the remaining issues regarding -Wstringop-overflow. So, we are now in good shape to enable this compiler option globally. Signed-off-by: Gustavo A. R. Silva --- Makefile | 2 ++ scripts/Makefile.extrawarn | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 9869f57c3fb3..c01cbbb208ca 100644 --- a/Makefile +++ b/Makefile @@ -986,6 +986,8 @@ NOSTDINC_FLAGS += -nostdinc # perform bounds checking. KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3) +KBUILD_CFLAGS += $(call cc-option, -Wstringop-overflow) + # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += -fno-strict-overflow diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 9b7a37ae28a8..a9e552a1e910 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -97,7 +97,6 @@ KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable) KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned) KBUILD_CFLAGS += $(call cc-option, -Wformat-overflow) KBUILD_CFLAGS += $(call cc-option, -Wformat-truncation) -KBUILD_CFLAGS += $(call cc-option, -Wstringop-overflow) KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation) KBUILD_CPPFLAGS += -Wundef @@ -113,7 +112,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, restrict) KBUILD_CFLAGS += $(call cc-disable-warning, packed-not-aligned) KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow) KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation) -KBUILD_CFLAGS += $(call cc-disable-warning, stringop-overflow) KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation) ifdef CONFIG_CC_IS_CLANG From a5e0ace04fbf56c1794b1a2fa7a93672753b3fc7 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 30 Nov 2023 14:29:34 -0600 Subject: [PATCH 093/768] init: Kconfig: Disable -Wstringop-overflow for GCC-11 -Wstringop-overflow is buggy in GCC-11. Therefore, we should disable this option specifically for that compiler version. To achieve this, we introduce a new configuration option: GCC11_NO_STRINGOP_OVERFLOW. The compiler option related to string operation overflow is now managed under configuration CC_STRINGOP_OVERFLOW. This option is enabled by default for all other versions of GCC that support it. Link: https://lore.kernel.org/lkml/b3c99290-40bc-426f-b3d2-1aa903f95c4e@embeddedor.com/ Link: https://lore.kernel.org/lkml/20231128091351.2bfb38dd@canb.auug.org.au/ Reviewed-by: Kees Cook Link: https://lore.kernel.org/linux-hardening/ZWj1+jkweEDWbmAR@work/ Signed-off-by: Gustavo A. R. Silva --- Makefile | 4 +++- init/Kconfig | 12 ++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c01cbbb208ca..9f9b76d3a4b7 100644 --- a/Makefile +++ b/Makefile @@ -986,7 +986,9 @@ NOSTDINC_FLAGS += -nostdinc # perform bounds checking. KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3) -KBUILD_CFLAGS += $(call cc-option, -Wstringop-overflow) +#Currently, disable -Wstringop-overflow for GCC 11, globally. +KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-option, -Wno-stringop-overflow) +KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += -fno-strict-overflow diff --git a/init/Kconfig b/init/Kconfig index 8df18f3a9748..8d4e836e1b6b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -876,6 +876,18 @@ config CC_NO_ARRAY_BOUNDS bool default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC11_NO_ARRAY_BOUNDS +# Currently, disable -Wstringop-overflow for GCC 11, globally. +config GCC11_NO_STRINGOP_OVERFLOW + def_bool y + +config CC_NO_STRINGOP_OVERFLOW + bool + default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC_VERSION < 120000 && GCC11_NO_STRINGOP_OVERFLOW + +config CC_STRINGOP_OVERFLOW + bool + default y if CC_IS_GCC && !CC_NO_STRINGOP_OVERFLOW + # # For architectures that know their GCC __int128 support is sound # From 0086ffec768bec6f5d61fc7e406af640eb912a24 Mon Sep 17 00:00:00 2001 From: Stanley Chan Date: Mon, 27 Nov 2023 15:20:48 -0600 Subject: [PATCH 094/768] tools cpupower bench: Override CFLAGS assignments Allow user to specify outside CFLAGS values as make argument Corrects an issue where CFLAGS is passed as a make argument for cpupower, but bench's makefile does not inherit and append to them. Signed-off-by: Stanley Chan Signed-off-by: Shuah Khan --- tools/power/cpupower/bench/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/cpupower/bench/Makefile b/tools/power/cpupower/bench/Makefile index d9d9923af85c..a4b902f9e1c4 100644 --- a/tools/power/cpupower/bench/Makefile +++ b/tools/power/cpupower/bench/Makefile @@ -15,7 +15,7 @@ LIBS = -L../ -L$(OUTPUT) -lm -lcpupower OBJS = $(OUTPUT)main.o $(OUTPUT)parse.o $(OUTPUT)system.o $(OUTPUT)benchmark.o endif -CFLAGS += -D_GNU_SOURCE -I../lib -DDEFAULT_CONFIG_FILE=\"$(confdir)/cpufreq-bench.conf\" +override CFLAGS += -D_GNU_SOURCE -I../lib -DDEFAULT_CONFIG_FILE=\"$(confdir)/cpufreq-bench.conf\" $(OUTPUT)%.o : %.c $(ECHO) " CC " $@ From d09486a04f5da0a812c26217213b89a3b1acf836 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 18 Jan 2024 16:58:59 -0800 Subject: [PATCH 095/768] net: fix removing a namespace with conflicting altnames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark reports a BUG() when a net namespace is removed. kernel BUG at net/core/dev.c:11520! Physical interfaces moved outside of init_net get "refunded" to init_net when that namespace disappears. The main interface name may get overwritten in the process if it would have conflicted. We need to also discard all conflicting altnames. Recent fixes addressed ensuring that altnames get moved with the main interface, which surfaced this problem. Reported-by: Марк Коренберг Link: https://lore.kernel.org/all/CAEmTpZFZ4Sv3KwqFOY2WKDHeZYdi0O7N5H1nTvcGp=SAEavtDg@mail.gmail.com/ Fixes: 7663d522099e ("net: check for altname conflicts when changing netdev's netns") Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Reviewed-by: Jiri Pirko Reviewed-by: Xin Long Signed-off-by: David S. Miller --- net/core/dev.c | 9 +++++++++ net/core/dev.h | 3 +++ 2 files changed, 12 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index f01a9b858347..cb2dab0feee0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11551,6 +11551,7 @@ static struct pernet_operations __net_initdata netdev_net_ops = { static void __net_exit default_device_exit_net(struct net *net) { + struct netdev_name_node *name_node, *tmp; struct net_device *dev, *aux; /* * Push all migratable network devices back to the @@ -11573,6 +11574,14 @@ static void __net_exit default_device_exit_net(struct net *net) snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); if (netdev_name_in_use(&init_net, fb_name)) snprintf(fb_name, IFNAMSIZ, "dev%%d"); + + netdev_for_each_altname_safe(dev, name_node, tmp) + if (netdev_name_in_use(&init_net, name_node->name)) { + netdev_name_node_del(name_node); + synchronize_rcu(); + __netdev_name_node_alt_destroy(name_node); + } + err = dev_change_net_namespace(dev, &init_net, fb_name); if (err) { pr_emerg("%s: failed to move %s to init_net: %d\n", diff --git a/net/core/dev.h b/net/core/dev.h index cf93e188785b..7480b4c84298 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -63,6 +63,9 @@ int dev_change_name(struct net_device *dev, const char *newname); #define netdev_for_each_altname(dev, namenode) \ list_for_each_entry((namenode), &(dev)->name_node->list, list) +#define netdev_for_each_altname_safe(dev, namenode, next) \ + list_for_each_entry_safe((namenode), (next), &(dev)->name_node->list, \ + list) int netdev_name_node_alt_create(struct net_device *dev, const char *name); int netdev_name_node_alt_destroy(struct net_device *dev, const char *name); From 477552e1d339d9fa654a363c9c0f1e9c4f087d2b Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Fri, 15 Dec 2023 09:38:19 +0900 Subject: [PATCH 096/768] drm/exynos: fix incorrect type issue Fix incorrect type issue in fimd_commit() of exynos_drm_fimd.c module. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312140930.Me9yWf8F-lkp@intel.com/ Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 8dde7b1e9b35..a090ff837075 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -480,7 +480,7 @@ static void fimd_commit(struct exynos_drm_crtc *crtc) struct fimd_context *ctx = crtc->ctx; struct drm_display_mode *mode = &crtc->base.state->adjusted_mode; const struct fimd_driver_data *driver_data = ctx->driver_data; - void *timing_base = ctx->regs + driver_data->timing_base; + void __iomem *timing_base = ctx->regs + driver_data->timing_base; u32 val; if (ctx->suspended) From 960b537e91725bcb17dd1b19e48950e62d134078 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 14 Dec 2023 13:32:15 +0100 Subject: [PATCH 097/768] drm/exynos: fix accidental on-stack copy of exynos_drm_plane gcc rightfully complains about excessive stack usage in the fimd_win_set_pixfmt() function: drivers/gpu/drm/exynos/exynos_drm_fimd.c: In function 'fimd_win_set_pixfmt': drivers/gpu/drm/exynos/exynos_drm_fimd.c:750:1: error: the frame size of 1032 bytes is larger than 1024 byte drivers/gpu/drm/exynos/exynos5433_drm_decon.c: In function 'decon_win_set_pixfmt': drivers/gpu/drm/exynos/exynos5433_drm_decon.c:381:1: error: the frame size of 1032 bytes is larger than 1024 bytes There is really no reason to copy the large exynos_drm_plane structure to the stack before using one of its members, so just use a pointer instead. Fixes: 6f8ee5c21722 ("drm/exynos: fimd: Make plane alpha configurable") Signed-off-by: Arnd Bergmann Reviewed-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 4 ++-- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 4d986077738b..bce027552474 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -319,9 +319,9 @@ static void decon_win_set_bldmod(struct decon_context *ctx, unsigned int win, static void decon_win_set_pixfmt(struct decon_context *ctx, unsigned int win, struct drm_framebuffer *fb) { - struct exynos_drm_plane plane = ctx->planes[win]; + struct exynos_drm_plane *plane = &ctx->planes[win]; struct exynos_drm_plane_state *state = - to_exynos_plane_state(plane.base.state); + to_exynos_plane_state(plane->base.state); unsigned int alpha = state->base.alpha; unsigned int pixel_alpha; unsigned long val; diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index a090ff837075..65489e18ab91 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -661,9 +661,9 @@ static void fimd_win_set_bldmod(struct fimd_context *ctx, unsigned int win, static void fimd_win_set_pixfmt(struct fimd_context *ctx, unsigned int win, struct drm_framebuffer *fb, int width) { - struct exynos_drm_plane plane = ctx->planes[win]; + struct exynos_drm_plane *plane = &ctx->planes[win]; struct exynos_drm_plane_state *state = - to_exynos_plane_state(plane.base.state); + to_exynos_plane_state(plane->base.state); uint32_t pixel_format = fb->format->format; unsigned int alpha = state->base.alpha; u32 val = WINCONx_ENWIN; From 4050957c7c2c14aa795dbf423b4180d5ac04e113 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Wed, 20 Dec 2023 12:53:15 +0300 Subject: [PATCH 098/768] drm/exynos: gsc: minor fix for loop iteration in gsc_runtime_resume Do not forget to call clk_disable_unprepare() on the first element of ctx->clocks array. Found by Linux Verification Center (linuxtesting.org). Fixes: 8b7d3ec83aba ("drm/exynos: gsc: Convert driver to IPP v2 core API") Signed-off-by: Fedor Pchelkin Reviewed-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_gsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 34cdabc30b4f..5302bebbe38c 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -1342,7 +1342,7 @@ static int __maybe_unused gsc_runtime_resume(struct device *dev) for (i = 0; i < ctx->num_clocks; i++) { ret = clk_prepare_enable(ctx->clocks[i]); if (ret) { - while (--i > 0) + while (--i >= 0) clk_disable_unprepare(ctx->clocks[i]); return ret; } From d8d222e09dab84a17bb65dda4b94d01c565f5327 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 16 Jan 2024 15:33:07 +1100 Subject: [PATCH 099/768] xfs: read only mounts with fsopen mount API are busted Recently xfs/513 started failing on my test machines testing "-o ro,norecovery" mount options. This was being emitted in dmesg: [ 9906.932724] XFS (pmem0): no-recovery mounts must be read-only. Turns out, readonly mounts with the fsopen()/fsconfig() mount API have been busted since day zero. It's only taken 5 years for debian unstable to start using this "new" mount API, and shortly after this I noticed xfs/513 had started to fail as per above. The syscall trace is: fsopen("xfs", FSOPEN_CLOEXEC) = 3 mount_setattr(-1, NULL, 0, NULL, 0) = -1 EINVAL (Invalid argument) ..... fsconfig(3, FSCONFIG_SET_STRING, "source", "/dev/pmem0", 0) = 0 fsconfig(3, FSCONFIG_SET_FLAG, "ro", NULL, 0) = 0 fsconfig(3, FSCONFIG_SET_FLAG, "norecovery", NULL, 0) = 0 fsconfig(3, FSCONFIG_CMD_CREATE, NULL, NULL, 0) = -1 EINVAL (Invalid argument) close(3) = 0 Showing that the actual mount instantiation (FSCONFIG_CMD_CREATE) is what threw out the error. During mount instantiation, we call xfs_fs_validate_params() which does: /* No recovery flag requires a read-only mount */ if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) { xfs_warn(mp, "no-recovery mounts must be read-only."); return -EINVAL; } and xfs_is_readonly() checks internal mount flags for read only state. This state is set in xfs_init_fs_context() from the context superblock flag state: /* * Copy binary VFS mount flags we are interested in. */ if (fc->sb_flags & SB_RDONLY) set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); With the old mount API, all of the VFS specific superblock flags had already been parsed and set before xfs_init_fs_context() is called, so this all works fine. However, in the brave new fsopen/fsconfig world, xfs_init_fs_context() is called from fsopen() context, before any VFS superblock have been set or parsed. Hence if we use fsopen(), the internal XFS readonly state is *never set*. Hence anything that depends on xfs_is_readonly() actually returning true for read only mounts is broken if fsopen() has been used to mount the filesystem. Fix this by moving this internal state initialisation to xfs_fs_fill_super() before we attempt to validate the parameters that have been set prior to the FSCONFIG_CMD_CREATE call being made. Signed-off-by: Dave Chinner Fixes: 73e5fff98b64 ("xfs: switch to use the new mount-api") cc: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/xfs_super.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index aff20ddd4a9f..5a2512d20bd0 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1496,6 +1496,18 @@ xfs_fs_fill_super( mp->m_super = sb; + /* + * Copy VFS mount flags from the context now that all parameter parsing + * is guaranteed to have been completed by either the old mount API or + * the newer fsopen/fsconfig API. + */ + if (fc->sb_flags & SB_RDONLY) + set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); + if (fc->sb_flags & SB_DIRSYNC) + mp->m_features |= XFS_FEAT_DIRSYNC; + if (fc->sb_flags & SB_SYNCHRONOUS) + mp->m_features |= XFS_FEAT_WSYNC; + error = xfs_fs_validate_params(mp); if (error) return error; @@ -1965,6 +1977,11 @@ static const struct fs_context_operations xfs_context_ops = { .free = xfs_fs_free, }; +/* + * WARNING: do not initialise any parameters in this function that depend on + * mount option parsing having already been performed as this can be called from + * fsopen() before any parameters have been set. + */ static int xfs_init_fs_context( struct fs_context *fc) { @@ -1996,16 +2013,6 @@ static int xfs_init_fs_context( mp->m_logbsize = -1; mp->m_allocsize_log = 16; /* 64k */ - /* - * Copy binary VFS mount flags we are interested in. - */ - if (fc->sb_flags & SB_RDONLY) - set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); - if (fc->sb_flags & SB_DIRSYNC) - mp->m_features |= XFS_FEAT_DIRSYNC; - if (fc->sb_flags & SB_SYNCHRONOUS) - mp->m_features |= XFS_FEAT_WSYNC; - fc->s_fs_info = mp; fc->ops = &xfs_context_ops; From 805c74eac8cb306dc69b87b6b066ab4da77ceaf1 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 17 Jan 2024 08:29:42 -0600 Subject: [PATCH 100/768] gpiolib: acpi: Ignore touchpad wakeup on GPD G1619-04 Spurious wakeups are reported on the GPD G1619-04 which can be absolved by programming the GPIO to ignore wakeups. Cc: stable@vger.kernel.org Reported-and-tested-by: George Melikov Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3073 Signed-off-by: Mario Limonciello Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-acpi.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 88066826d8e5..cd3e9657cc36 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1651,6 +1651,20 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = { .ignore_interrupt = "INT33FC:00@3", }, }, + { + /* + * Spurious wakeups from TP_ATTN# pin + * Found in BIOS 0.35 + * https://gitlab.freedesktop.org/drm/amd/-/issues/3073 + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GPD"), + DMI_MATCH(DMI_PRODUCT_NAME, "G1619-04"), + }, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_wake = "PNP0C50:00@8", + }, + }, {} /* Terminating entry */ }; From 30cf36bb0408a163eb3d58ea6b883c612c029286 Mon Sep 17 00:00:00 2001 From: "Wachowski, Karol" Date: Mon, 15 Jan 2024 14:44:26 +0100 Subject: [PATCH 101/768] accel/ivpu: Dump MMU events in case of VPU boot timeout Add ivpu_mmu_evtq_dump() function that dumps existing MMU events from MMU event queue. Call this function if VPU boot failed. Previously MMU events were only checked in interrupt handler, but if VPU failed to boot due to MMU faults, those faults were missed because of interrupts not yet being enabled. This will allow checking potential fault reason of VPU not booting. Signed-off-by: Wachowski, Karol Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240115134434.493839-2-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 1 + drivers/accel/ivpu/ivpu_mmu.c | 8 ++++++++ drivers/accel/ivpu/ivpu_mmu.h | 1 + 3 files changed, 10 insertions(+) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 64927682161b..0c3180411b0e 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -369,6 +369,7 @@ int ivpu_boot(struct ivpu_device *vdev) ret = ivpu_wait_for_ready(vdev); if (ret) { ivpu_err(vdev, "Failed to boot the firmware: %d\n", ret); + ivpu_mmu_evtq_dump(vdev); return ret; } diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c index 2228c44b115f..92ef651098d8 100644 --- a/drivers/accel/ivpu/ivpu_mmu.c +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -905,6 +905,14 @@ void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev) ivpu_pm_schedule_recovery(vdev); } +void ivpu_mmu_evtq_dump(struct ivpu_device *vdev) +{ + u32 *event; + + while ((event = ivpu_mmu_get_event(vdev)) != NULL) + ivpu_mmu_dump_event(vdev, event); +} + void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev) { u32 gerror_val, gerrorn_val, active; diff --git a/drivers/accel/ivpu/ivpu_mmu.h b/drivers/accel/ivpu/ivpu_mmu.h index cb551126806b..6fa35c240710 100644 --- a/drivers/accel/ivpu/ivpu_mmu.h +++ b/drivers/accel/ivpu/ivpu_mmu.h @@ -46,5 +46,6 @@ int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid); void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev); void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev); +void ivpu_mmu_evtq_dump(struct ivpu_device *vdev); #endif /* __IVPU_MMU_H__ */ From 929acfb9c53986d5ba37cfb9e1172ad79735f8eb Mon Sep 17 00:00:00 2001 From: "Wachowski, Karol" Date: Mon, 15 Jan 2024 14:44:27 +0100 Subject: [PATCH 102/768] accel/ivpu: Call diagnose failure in ivpu_mmu_cmdq_sync() Check for possible failure reasons in the buttress. Some errors (like external abort) should have corresponding buttress errors registers set indicating the real reason of failure. Signed-off-by: Wachowski, Karol Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240115134434.493839-3-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_mmu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c index 92ef651098d8..1f813625aab3 100644 --- a/drivers/accel/ivpu/ivpu_mmu.c +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -7,6 +7,7 @@ #include #include "ivpu_drv.h" +#include "ivpu_hw.h" #include "ivpu_hw_reg_io.h" #include "ivpu_mmu.h" #include "ivpu_mmu_context.h" @@ -518,6 +519,7 @@ static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev) ivpu_err(vdev, "Timed out waiting for MMU consumer: %d, error: %s\n", ret, ivpu_mmu_cmdq_err_to_str(err)); + ivpu_hw_diagnose_failure(vdev); } return ret; From 8047d36fe563bf7ee7b28a284a0892506a6000a9 Mon Sep 17 00:00:00 2001 From: "Wachowski, Karol" Date: Mon, 15 Jan 2024 14:44:28 +0100 Subject: [PATCH 103/768] accel/ivpu: Add debug prints for MMU map/unmap operations It is common need to be able to see IOVA/physical to VPU addresses mappings. Especially when debugging different kind of memory related issues. Lack of such logs forces user to modify and recompile KMD manually. This commit adds those logs under MMU debug mask which can be turned on dynamically with module param during KMD load. Signed-off-by: Wachowski, Karol Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20240115134434.493839-4-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.h | 1 + drivers/accel/ivpu/ivpu_mmu_context.c | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index ebc4b84f27b2..9b6e336626e3 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -56,6 +56,7 @@ #define IVPU_DBG_JSM BIT(10) #define IVPU_DBG_KREF BIT(11) #define IVPU_DBG_RPM BIT(12) +#define IVPU_DBG_MMU_MAP BIT(13) #define ivpu_err(vdev, fmt, ...) \ drm_err(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__) diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c index 12a8c09d4547..fe6161299236 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.c +++ b/drivers/accel/ivpu/ivpu_mmu_context.c @@ -355,6 +355,9 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, dma_addr_t dma_addr = sg_dma_address(sg) - sg->offset; size_t size = sg_dma_len(sg) + sg->offset; + ivpu_dbg(vdev, MMU_MAP, "Map ctx: %u dma_addr: 0x%llx vpu_addr: 0x%llx size: %lu\n", + ctx->id, dma_addr, vpu_addr, size); + ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot); if (ret) { ivpu_err(vdev, "Failed to map context pages\n"); @@ -366,6 +369,7 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, /* Ensure page table modifications are flushed from wc buffers to memory */ wmb(); + mutex_unlock(&ctx->lock); ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); @@ -388,14 +392,19 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ct mutex_lock(&ctx->lock); for_each_sgtable_dma_sg(sgt, sg, i) { + dma_addr_t dma_addr = sg_dma_address(sg) - sg->offset; size_t size = sg_dma_len(sg) + sg->offset; + ivpu_dbg(vdev, MMU_MAP, "Unmap ctx: %u dma_addr: 0x%llx vpu_addr: 0x%llx size: %lu\n", + ctx->id, dma_addr, vpu_addr, size); + ivpu_mmu_context_unmap_pages(ctx, vpu_addr, size); vpu_addr += size; } /* Ensure page table modifications are flushed from wc buffers to memory */ wmb(); + mutex_unlock(&ctx->lock); ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); From 2a20b857dd654595d332f2521d80bd67b03536a0 Mon Sep 17 00:00:00 2001 From: "Wachowski, Karol" Date: Mon, 15 Jan 2024 14:44:29 +0100 Subject: [PATCH 104/768] accel/ivpu: Add diagnostic messages when VPU fails to boot or suspend Make boot/suspend failure debugging easier by dumping FW logs and error registers. Signed-off-by: Wachowski, Karol Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240115134434.493839-5-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 5 +++-- drivers/accel/ivpu/ivpu_pm.c | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 0c3180411b0e..ec66c2c39877 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -17,6 +17,7 @@ #include "ivpu_debugfs.h" #include "ivpu_drv.h" #include "ivpu_fw.h" +#include "ivpu_fw_log.h" #include "ivpu_gem.h" #include "ivpu_hw.h" #include "ivpu_ipc.h" @@ -340,8 +341,6 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev) if (!ret) ivpu_dbg(vdev, PM, "VPU ready message received successfully\n"); - else - ivpu_hw_diagnose_failure(vdev); return ret; } @@ -369,7 +368,9 @@ int ivpu_boot(struct ivpu_device *vdev) ret = ivpu_wait_for_ready(vdev); if (ret) { ivpu_err(vdev, "Failed to boot the firmware: %d\n", ret); + ivpu_hw_diagnose_failure(vdev); ivpu_mmu_evtq_dump(vdev); + ivpu_fw_log_dump(vdev); return ret; } diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index 0af8864cb3b5..8407f1d8c99c 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -13,6 +13,7 @@ #include "ivpu_drv.h" #include "ivpu_hw.h" #include "ivpu_fw.h" +#include "ivpu_fw_log.h" #include "ivpu_ipc.h" #include "ivpu_job.h" #include "ivpu_jsm_msg.h" @@ -247,7 +248,8 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev) ivpu_err(vdev, "Failed to set suspend VPU: %d\n", ret); if (!hw_is_idle) { - ivpu_warn(vdev, "VPU failed to enter idle, force suspended.\n"); + ivpu_err(vdev, "VPU failed to enter idle, force suspended.\n"); + ivpu_fw_log_dump(vdev); ivpu_pm_prepare_cold_boot(vdev); } else { ivpu_pm_prepare_warm_boot(vdev); From 7f66319927a8ced3be715eb7616a890b9bf0348b Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Mon, 15 Jan 2024 14:44:30 +0100 Subject: [PATCH 105/768] accel/ivpu: Fix for missing lock around drm_gem_shmem_vmap() drm_gem_shmem_vmap/vunmap requires dma resv lock to be held. This was missed during conversion to shmem helper. Fixes: 8d88e4cdce4f ("accel/ivpu: Use GEM shmem helper for all buffers") Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240115134434.493839-6-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_gem.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index 1dda4f38ea25..6890d33cf352 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -361,7 +361,9 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla if (ret) goto err_put; + dma_resv_lock(bo->base.base.resv, NULL); ret = drm_gem_shmem_vmap(&bo->base, &map); + dma_resv_unlock(bo->base.base.resv); if (ret) goto err_put; @@ -376,7 +378,10 @@ void ivpu_bo_free_internal(struct ivpu_bo *bo) { struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->base.vaddr); + dma_resv_lock(bo->base.base.resv, NULL); drm_gem_shmem_vunmap(&bo->base, &map); + dma_resv_unlock(bo->base.base.resv); + drm_gem_object_put(&bo->base.base); } From a8c099d5d0e4b0400cfddfd95b881c8bd9349a88 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Mon, 15 Jan 2024 14:44:31 +0100 Subject: [PATCH 106/768] accel/ivpu: Free buffer sgt on unbind Call dma_unmap() on all buffers before the VPU is unbinded to avoid "device driver has pending DMA allocations while released from device" warning when DMA-API debug is enabled. Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240115134434.493839-7-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_gem.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index 6890d33cf352..4de454bfbf91 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -112,8 +112,6 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo) ivpu_dbg_bo(vdev, bo, "unbind"); - /* TODO: dma_unmap */ - if (bo->mmu_mapped) { drm_WARN_ON(&vdev->drm, !bo->ctx); drm_WARN_ON(&vdev->drm, !bo->vpu_addr); @@ -127,6 +125,18 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo) bo->vpu_addr = 0; bo->ctx = NULL; } + + if (bo->base.base.import_attach) + return; + + dma_resv_lock(bo->base.base.resv, NULL); + if (bo->base.sgt) { + dma_unmap_sgtable(vdev->drm.dev, bo->base.sgt, DMA_BIDIRECTIONAL, 0); + sg_free_table(bo->base.sgt); + kfree(bo->base.sgt); + bo->base.sgt = NULL; + } + dma_resv_unlock(bo->base.base.resv); } static void ivpu_bo_unbind(struct ivpu_bo *bo) From b7a0e75632eb6568c82de9108bd29e130dd082d9 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Mon, 15 Jan 2024 14:44:32 +0100 Subject: [PATCH 107/768] accel/ivpu: Disable buffer sharing among VPU contexts This was not supported properly. A buffer was imported to another VPU context as a separate buffer object with duplicated sgt. Both exported and imported buffers could be DMA mapped causing a double mapping on the same device. Buffers imported from another VPU context will now just increase reference count, leaving only a single sgt, fixing the problem above. Buffers still can't be shared among VPU contexts because each has its own MMU mapping and ivpu_bo only supports single MMU mappings. The solution would be to use a mapping list as in panfrost or etnaviv drivers and it will be implemented in future if required. Signed-off-by: Jacek Lawrynowicz Reviewed-by: Andrzej Kacprowski Link: https://patchwork.freedesktop.org/patch/msgid/20240115134434.493839-8-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_gem.c | 44 +++++------------------------------ 1 file changed, 6 insertions(+), 38 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index 4de454bfbf91..95e731e13941 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -222,6 +222,12 @@ static int ivpu_bo_open(struct drm_gem_object *obj, struct drm_file *file) struct ivpu_bo *bo = to_ivpu_bo(obj); struct ivpu_addr_range *range; + if (bo->ctx) { + ivpu_warn(vdev, "Can't add BO to ctx %u: already in ctx %u\n", + file_priv->ctx.id, bo->ctx->id); + return -EALREADY; + } + if (bo->flags & DRM_IVPU_BO_SHAVE_MEM) range = &vdev->hw->ranges.shave; else if (bo->flags & DRM_IVPU_BO_DMA_MEM) @@ -252,47 +258,9 @@ static void ivpu_bo_free(struct drm_gem_object *obj) drm_gem_shmem_free(&bo->base); } -static const struct dma_buf_ops ivpu_bo_dmabuf_ops = { - .cache_sgt_mapping = true, - .attach = drm_gem_map_attach, - .detach = drm_gem_map_detach, - .map_dma_buf = drm_gem_map_dma_buf, - .unmap_dma_buf = drm_gem_unmap_dma_buf, - .release = drm_gem_dmabuf_release, - .mmap = drm_gem_dmabuf_mmap, - .vmap = drm_gem_dmabuf_vmap, - .vunmap = drm_gem_dmabuf_vunmap, -}; - -static struct dma_buf *ivpu_bo_export(struct drm_gem_object *obj, int flags) -{ - struct drm_device *dev = obj->dev; - struct dma_buf_export_info exp_info = { - .exp_name = KBUILD_MODNAME, - .owner = dev->driver->fops->owner, - .ops = &ivpu_bo_dmabuf_ops, - .size = obj->size, - .flags = flags, - .priv = obj, - .resv = obj->resv, - }; - void *sgt; - - /* - * Make sure that pages are allocated and dma-mapped before exporting the bo. - * DMA-mapping is required if the bo will be imported to the same device. - */ - sgt = drm_gem_shmem_get_pages_sgt(to_drm_gem_shmem_obj(obj)); - if (IS_ERR(sgt)) - return sgt; - - return drm_gem_dmabuf_export(dev, &exp_info); -} - static const struct drm_gem_object_funcs ivpu_gem_funcs = { .free = ivpu_bo_free, .open = ivpu_bo_open, - .export = ivpu_bo_export, .print_info = drm_gem_shmem_object_print_info, .pin = drm_gem_shmem_object_pin, .unpin = drm_gem_shmem_object_unpin, From 37dee2a2f4330a030abc5674bcec25ccc4addbcc Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Mon, 15 Jan 2024 14:44:33 +0100 Subject: [PATCH 108/768] accel/ivpu: Improve buffer object debug logs Make debug logs more readable and consistent: - don't print handle as it is not always available for all buffers - use hashed ivpu_bo ptr as main buffer identifier - remove unused fields from ivpu_bo_print_info() Signed-off-by: Jacek Lawrynowicz Reviewed-by: Wachowski, Karol Link: https://patchwork.freedesktop.org/patch/msgid/20240115134434.493839-9-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_gem.c | 63 ++++++++++++----------------------- drivers/accel/ivpu/ivpu_gem.h | 1 - 2 files changed, 22 insertions(+), 42 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index 95e731e13941..16f3035b91c0 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -24,14 +24,11 @@ static const struct drm_gem_object_funcs ivpu_gem_funcs; static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, const char *action) { - if (bo->ctx) - ivpu_dbg(vdev, BO, "%6s: size %zu has_pages %d dma_mapped %d handle %u ctx %d vpu_addr 0x%llx mmu_mapped %d\n", - action, ivpu_bo_size(bo), (bool)bo->base.pages, (bool)bo->base.sgt, - bo->handle, bo->ctx->id, bo->vpu_addr, bo->mmu_mapped); - else - ivpu_dbg(vdev, BO, "%6s: size %zu has_pages %d dma_mapped %d handle %u (not added to context)\n", - action, ivpu_bo_size(bo), (bool)bo->base.pages, (bool)bo->base.sgt, - bo->handle); + ivpu_dbg(vdev, BO, + "%6s: bo %8p vpu_addr %9llx size %8zu ctx %d has_pages %d dma_mapped %d mmu_mapped %d wc %d imported %d\n", + action, bo, bo->vpu_addr, ivpu_bo_size(bo), bo->ctx ? bo->ctx->id : 0, + (bool)bo->base.pages, (bool)bo->base.sgt, bo->mmu_mapped, bo->base.map_wc, + (bool)bo->base.base.import_attach); } /* @@ -49,12 +46,7 @@ int __must_check ivpu_bo_pin(struct ivpu_bo *bo) mutex_lock(&bo->lock); ivpu_dbg_bo(vdev, bo, "pin"); - - if (!bo->ctx) { - ivpu_err(vdev, "vpu_addr not allocated for BO %d\n", bo->handle); - ret = -EINVAL; - goto unlock; - } + drm_WARN_ON(&vdev->drm, !bo->ctx); if (!bo->mmu_mapped) { struct sg_table *sgt = drm_gem_shmem_get_pages_sgt(&bo->base); @@ -108,9 +100,7 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo) { struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); - lockdep_assert_held(&bo->lock); - - ivpu_dbg_bo(vdev, bo, "unbind"); + lockdep_assert(lockdep_is_held(&bo->lock) || !kref_read(&bo->base.base.refcount)); if (bo->mmu_mapped) { drm_WARN_ON(&vdev->drm, !bo->ctx); @@ -122,7 +112,6 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo) if (bo->ctx) { ivpu_mmu_context_remove_node(bo->ctx, &bo->mm_node); - bo->vpu_addr = 0; bo->ctx = NULL; } @@ -156,8 +145,10 @@ void ivpu_bo_remove_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_m mutex_lock(&vdev->bo_list_lock); list_for_each_entry(bo, &vdev->bo_list, bo_list_node) { mutex_lock(&bo->lock); - if (bo->ctx == ctx) + if (bo->ctx == ctx) { + ivpu_dbg_bo(vdev, bo, "unbind"); ivpu_bo_unbind_locked(bo); + } mutex_unlock(&bo->lock); } mutex_unlock(&vdev->bo_list_lock); @@ -209,9 +200,6 @@ ivpu_bo_create(struct ivpu_device *vdev, u64 size, u32 flags) list_add_tail(&bo->bo_list_node, &vdev->bo_list); mutex_unlock(&vdev->bo_list_lock); - ivpu_dbg(vdev, BO, "create: vpu_addr 0x%llx size %zu flags 0x%x\n", - bo->vpu_addr, bo->base.base.size, flags); - return bo; } @@ -243,14 +231,14 @@ static void ivpu_bo_free(struct drm_gem_object *obj) struct ivpu_device *vdev = to_ivpu_device(obj->dev); struct ivpu_bo *bo = to_ivpu_bo(obj); + ivpu_dbg_bo(vdev, bo, "free"); + mutex_lock(&vdev->bo_list_lock); list_del(&bo->bo_list_node); mutex_unlock(&vdev->bo_list_lock); drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ)); - ivpu_dbg_bo(vdev, bo, "free"); - ivpu_bo_unbind(bo); mutex_destroy(&bo->lock); @@ -293,11 +281,9 @@ int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *fi return PTR_ERR(bo); } - ret = drm_gem_handle_create(file, &bo->base.base, &bo->handle); - if (!ret) { + ret = drm_gem_handle_create(file, &bo->base.base, &args->handle); + if (!ret) args->vpu_addr = bo->vpu_addr; - args->handle = bo->handle; - } drm_gem_object_put(&bo->base.base); @@ -415,19 +401,11 @@ int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p) { - unsigned long dma_refcount = 0; - mutex_lock(&bo->lock); - if (bo->base.base.dma_buf && bo->base.base.dma_buf->file) - dma_refcount = atomic_long_read(&bo->base.base.dma_buf->file->f_count); - - drm_printf(p, "%-3u %-6d 0x%-12llx %-10lu 0x%-8x %-4u %-8lu", - bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.base.size, - bo->flags, kref_read(&bo->base.base.refcount), dma_refcount); - - if (bo->base.base.import_attach) - drm_printf(p, " imported"); + drm_printf(p, "%-9p %-3u 0x%-12llx %-10lu 0x%-8x %-4u", + bo, bo->ctx->id, bo->vpu_addr, bo->base.base.size, + bo->flags, kref_read(&bo->base.base.refcount)); if (bo->base.pages) drm_printf(p, " has_pages"); @@ -435,6 +413,9 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p) if (bo->mmu_mapped) drm_printf(p, " mmu_mapped"); + if (bo->base.base.import_attach) + drm_printf(p, " imported"); + drm_printf(p, "\n"); mutex_unlock(&bo->lock); @@ -445,8 +426,8 @@ void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p) struct ivpu_device *vdev = to_ivpu_device(dev); struct ivpu_bo *bo; - drm_printf(p, "%-3s %-6s %-14s %-10s %-10s %-4s %-8s %s\n", - "ctx", "handle", "vpu_addr", "size", "flags", "refs", "dma_refs", "attribs"); + drm_printf(p, "%-9s %-3s %-14s %-10s %-10s %-4s %s\n", + "bo", "ctx", "vpu_addr", "size", "flags", "refs", "attribs"); mutex_lock(&vdev->bo_list_lock); list_for_each_entry(bo, &vdev->bo_list, bo_list_node) diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h index d75cad0d3c74..5cb1dda3e58e 100644 --- a/drivers/accel/ivpu/ivpu_gem.h +++ b/drivers/accel/ivpu/ivpu_gem.h @@ -19,7 +19,6 @@ struct ivpu_bo { struct mutex lock; /* Protects: ctx, mmu_mapped, vpu_addr */ u64 vpu_addr; - u32 handle; u32 flags; u32 job_status; /* Valid only for command buffer */ bool mmu_mapped; From b246271d257b4b0573e88f443ed8091f8b044895 Mon Sep 17 00:00:00 2001 From: "Wachowski, Karol" Date: Mon, 15 Jan 2024 14:44:34 +0100 Subject: [PATCH 109/768] accel/ivpu: Deprecate DRM_IVPU_PARAM_CONTEXT_PRIORITY param DRM_IVPU_PARAM_CONTEXT_PRIORITY has been deprecated because it has been replaced with DRM_IVPU_JOB_PRIORITY levels set with submit IOCTL and was unused anyway. Signed-off-by: Wachowski, Karol Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20240115134434.493839-10-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 11 ----------- drivers/accel/ivpu/ivpu_drv.h | 1 - drivers/accel/ivpu/ivpu_job.c | 3 +++ include/uapi/drm/ivpu_accel.h | 25 ++++++++++++++++++++----- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index ec66c2c39877..546c0899bb9e 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -177,9 +177,6 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f case DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS: args->value = vdev->hw->ranges.user.start; break; - case DRM_IVPU_PARAM_CONTEXT_PRIORITY: - args->value = file_priv->priority; - break; case DRM_IVPU_PARAM_CONTEXT_ID: args->value = file_priv->ctx.id; break; @@ -219,17 +216,10 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f static int ivpu_set_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct ivpu_file_priv *file_priv = file->driver_priv; struct drm_ivpu_param *args = data; int ret = 0; switch (args->param) { - case DRM_IVPU_PARAM_CONTEXT_PRIORITY: - if (args->value <= DRM_IVPU_CONTEXT_PRIORITY_REALTIME) - file_priv->priority = args->value; - else - ret = -EINVAL; - break; default: ret = -EINVAL; } @@ -258,7 +248,6 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) } file_priv->vdev = vdev; - file_priv->priority = DRM_IVPU_CONTEXT_PRIORITY_NORMAL; kref_init(&file_priv->ref); mutex_init(&file_priv->lock); diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 9b6e336626e3..7a6bc1918780 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -146,7 +146,6 @@ struct ivpu_file_priv { struct mutex lock; /* Protects cmdq */ struct ivpu_cmdq *cmdq[IVPU_NUM_ENGINES]; struct ivpu_mmu_context ctx; - u32 priority; bool has_mmu_faults; }; diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 7206cf9cdb4a..82e40bb4803c 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -488,6 +488,9 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (params->engine > DRM_IVPU_ENGINE_COPY) return -EINVAL; + if (params->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) + return -EINVAL; + if (params->buffer_count == 0 || params->buffer_count > JOB_MAX_BUFFER_COUNT) return -EINVAL; diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index de1944e42c65..63c49318a863 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -53,7 +53,7 @@ extern "C" { #define DRM_IVPU_PARAM_CORE_CLOCK_RATE 3 #define DRM_IVPU_PARAM_NUM_CONTEXTS 4 #define DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS 5 -#define DRM_IVPU_PARAM_CONTEXT_PRIORITY 6 +#define DRM_IVPU_PARAM_CONTEXT_PRIORITY 6 /* Deprecated */ #define DRM_IVPU_PARAM_CONTEXT_ID 7 #define DRM_IVPU_PARAM_FW_API_VERSION 8 #define DRM_IVPU_PARAM_ENGINE_HEARTBEAT 9 @@ -64,11 +64,18 @@ extern "C" { #define DRM_IVPU_PLATFORM_TYPE_SILICON 0 +/* Deprecated, use DRM_IVPU_JOB_PRIORITY */ #define DRM_IVPU_CONTEXT_PRIORITY_IDLE 0 #define DRM_IVPU_CONTEXT_PRIORITY_NORMAL 1 #define DRM_IVPU_CONTEXT_PRIORITY_FOCUS 2 #define DRM_IVPU_CONTEXT_PRIORITY_REALTIME 3 +#define DRM_IVPU_JOB_PRIORITY_DEFAULT 0 +#define DRM_IVPU_JOB_PRIORITY_IDLE 1 +#define DRM_IVPU_JOB_PRIORITY_NORMAL 2 +#define DRM_IVPU_JOB_PRIORITY_FOCUS 3 +#define DRM_IVPU_JOB_PRIORITY_REALTIME 4 + /** * DRM_IVPU_CAP_METRIC_STREAMER * @@ -112,10 +119,6 @@ struct drm_ivpu_param { * %DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS: * Lowest VPU virtual address available in the current context (read-only) * - * %DRM_IVPU_PARAM_CONTEXT_PRIORITY: - * Value of current context scheduling priority (read-write). - * See DRM_IVPU_CONTEXT_PRIORITY_* for possible values. - * * %DRM_IVPU_PARAM_CONTEXT_ID: * Current context ID, always greater than 0 (read-only) * @@ -286,6 +289,18 @@ struct drm_ivpu_submit { * to be executed. The offset has to be 8-byte aligned. */ __u32 commands_offset; + + /** + * @priority: + * + * Priority to be set for related job command queue, can be one of the following: + * %DRM_IVPU_JOB_PRIORITY_DEFAULT + * %DRM_IVPU_JOB_PRIORITY_IDLE + * %DRM_IVPU_JOB_PRIORITY_NORMAL + * %DRM_IVPU_JOB_PRIORITY_FOCUS + * %DRM_IVPU_JOB_PRIORITY_REALTIME + */ + __u32 priority; }; /* drm_ivpu_bo_wait job status codes */ From 0dd20a48a541ea5485b8bfc0e0bdbc6ae29b389f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 16 Jan 2024 20:30:59 -0800 Subject: [PATCH 110/768] MIPS: Cobalt: Fix missing prototypes Fix missing prototypes warnings for cobalt_machine_halt() and cobalt_machine_restart() by moving their prototypes to cobalt.h which is included by setup.c. Signed-off-by: Florian Fainelli Signed-off-by: Thomas Bogendoerfer --- arch/mips/cobalt/setup.c | 3 --- arch/mips/include/asm/mach-cobalt/cobalt.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/cobalt/setup.c b/arch/mips/cobalt/setup.c index 2e099d55a564..9a266bf78339 100644 --- a/arch/mips/cobalt/setup.c +++ b/arch/mips/cobalt/setup.c @@ -23,9 +23,6 @@ #include -extern void cobalt_machine_restart(char *command); -extern void cobalt_machine_halt(void); - const char *get_system_type(void) { switch (cobalt_board_id) { diff --git a/arch/mips/include/asm/mach-cobalt/cobalt.h b/arch/mips/include/asm/mach-cobalt/cobalt.h index 5b9fce73f11d..97f9d5e9446d 100644 --- a/arch/mips/include/asm/mach-cobalt/cobalt.h +++ b/arch/mips/include/asm/mach-cobalt/cobalt.h @@ -19,4 +19,7 @@ extern int cobalt_board_id; #define COBALT_BRD_ID_QUBE2 0x5 #define COBALT_BRD_ID_RAQ2 0x6 +void cobalt_machine_halt(void); +void cobalt_machine_restart(char *command); + #endif /* __ASM_COBALT_H */ From feab19143a1c8c5efdf1934dd0b1defb29bb5026 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 17 Jan 2024 15:49:44 -0800 Subject: [PATCH 111/768] MIPS: Alchemy: Fix missing prototypes We have a number of missing prototypes warnings for board_setup(), alchemy_set_lpj() and prom_init_cmdline(), prom_getenv() and prom_get_ethernet_addr(). Fix those by providing definitions for the first two functions in au1000.h which is included everywhere relevant, and including prom.h for the last three. Signed-off-by: Florian Fainelli Signed-off-by: Thomas Bogendoerfer --- arch/mips/alchemy/common/prom.c | 1 + arch/mips/alchemy/common/setup.c | 4 +--- arch/mips/include/asm/mach-au1x00/au1000.h | 3 +++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/mips/alchemy/common/prom.c b/arch/mips/alchemy/common/prom.c index b13d8adf3be4..20d30f6265cd 100644 --- a/arch/mips/alchemy/common/prom.c +++ b/arch/mips/alchemy/common/prom.c @@ -40,6 +40,7 @@ #include #include +#include int prom_argc; char **prom_argv; diff --git a/arch/mips/alchemy/common/setup.c b/arch/mips/alchemy/common/setup.c index 2388d68786f4..a7a6d31a7a41 100644 --- a/arch/mips/alchemy/common/setup.c +++ b/arch/mips/alchemy/common/setup.c @@ -30,13 +30,11 @@ #include #include /* for dma_default_coherent */ +#include #include #include -extern void __init board_setup(void); -extern void __init alchemy_set_lpj(void); - static bool alchemy_dma_coherent(void) { switch (alchemy_get_cputype()) { diff --git a/arch/mips/include/asm/mach-au1x00/au1000.h b/arch/mips/include/asm/mach-au1x00/au1000.h index a7eec3364a64..41546777902b 100644 --- a/arch/mips/include/asm/mach-au1x00/au1000.h +++ b/arch/mips/include/asm/mach-au1x00/au1000.h @@ -597,6 +597,9 @@ #include +void alchemy_set_lpj(void); +void board_setup(void); + /* helpers to access the SYS_* registers */ static inline unsigned long alchemy_rdsys(int regofs) { From 437a310b22244d4e0b78665c3042e5d1c0f45306 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Wed, 20 Dec 2023 17:21:12 +0000 Subject: [PATCH 112/768] firmware: arm_scmi: Check mailbox/SMT channel for consistency On reception of a completion interrupt the shared memory area is accessed to retrieve the message header at first and then, if the message sequence number identifies a transaction which is still pending, the related payload is fetched too. When an SCMI command times out the channel ownership remains with the platform until eventually a late reply is received and, as a consequence, any further transmission attempt remains pending, waiting for the channel to be relinquished by the platform. Once that late reply is received the channel ownership is given back to the agent and any pending request is then allowed to proceed and overwrite the SMT area of the just delivered late reply; then the wait for the reply to the new request starts. It has been observed that the spurious IRQ related to the late reply can be wrongly associated with the freshly enqueued request: when that happens the SCMI stack in-flight lookup procedure is fooled by the fact that the message header now present in the SMT area is related to the new pending transaction, even though the real reply has still to arrive. This race-condition on the A2P channel can be detected by looking at the channel status bits: a genuine reply from the platform will have set the channel free bit before triggering the completion IRQ. Add a consistency check to validate such condition in the A2P ISR. Reported-by: Xinglong Yang Closes: https://lore.kernel.org/all/PUZPR06MB54981E6FA00D82BFDBB864FBF08DA@PUZPR06MB5498.apcprd06.prod.outlook.com/ Fixes: 5c8a47a5a91d ("firmware: arm_scmi: Make scmi core independent of the transport type") Cc: stable@vger.kernel.org # 5.15+ Signed-off-by: Cristian Marussi Tested-by: Xinglong Yang Link: https://lore.kernel.org/r/20231220172112.763539-1-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/common.h | 1 + drivers/firmware/arm_scmi/mailbox.c | 14 ++++++++++++++ drivers/firmware/arm_scmi/shmem.c | 6 ++++++ 3 files changed, 21 insertions(+) diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h index c46dc5215af7..00b165d1f502 100644 --- a/drivers/firmware/arm_scmi/common.h +++ b/drivers/firmware/arm_scmi/common.h @@ -314,6 +314,7 @@ void shmem_fetch_notification(struct scmi_shared_mem __iomem *shmem, void shmem_clear_channel(struct scmi_shared_mem __iomem *shmem); bool shmem_poll_done(struct scmi_shared_mem __iomem *shmem, struct scmi_xfer *xfer); +bool shmem_channel_free(struct scmi_shared_mem __iomem *shmem); /* declarations for message passing transports */ struct scmi_msg_payld; diff --git a/drivers/firmware/arm_scmi/mailbox.c b/drivers/firmware/arm_scmi/mailbox.c index 19246ed1f01f..b8d470417e8f 100644 --- a/drivers/firmware/arm_scmi/mailbox.c +++ b/drivers/firmware/arm_scmi/mailbox.c @@ -45,6 +45,20 @@ static void rx_callback(struct mbox_client *cl, void *m) { struct scmi_mailbox *smbox = client_to_scmi_mailbox(cl); + /* + * An A2P IRQ is NOT valid when received while the platform still has + * the ownership of the channel, because the platform at first releases + * the SMT channel and then sends the completion interrupt. + * + * This addresses a possible race condition in which a spurious IRQ from + * a previous timed-out reply which arrived late could be wrongly + * associated with the next pending transaction. + */ + if (cl->knows_txdone && !shmem_channel_free(smbox->shmem)) { + dev_warn(smbox->cinfo->dev, "Ignoring spurious A2P IRQ !\n"); + return; + } + scmi_rx_callback(smbox->cinfo, shmem_read_header(smbox->shmem), NULL); } diff --git a/drivers/firmware/arm_scmi/shmem.c b/drivers/firmware/arm_scmi/shmem.c index 87b4f4d35f06..517d52fb3bcb 100644 --- a/drivers/firmware/arm_scmi/shmem.c +++ b/drivers/firmware/arm_scmi/shmem.c @@ -122,3 +122,9 @@ bool shmem_poll_done(struct scmi_shared_mem __iomem *shmem, (SCMI_SHMEM_CHAN_STAT_CHANNEL_ERROR | SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE); } + +bool shmem_channel_free(struct scmi_shared_mem __iomem *shmem) +{ + return (ioread32(&shmem->channel_status) & + SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE); +} From 0726fcc8d4af75441b38aaa082f820e63b3a8748 Mon Sep 17 00:00:00 2001 From: Tanzir Hasan Date: Tue, 26 Dec 2023 22:52:03 +0000 Subject: [PATCH 113/768] firmware: arm_scmi: Replace asm-generic/bug.h with linux/bug.h linux/bug.h includes asm-generic/bug.h already and hence replacing asm-generic/bug.h with linux/bug.h will not regress any build. Also, it is always better to avoid header file inclusion from asm-generic if possible. Suggested-by: Al Viro Signed-off-by: Tanzir Hasan Link: https://lore.kernel.org/r/20231226-shmem-v1-1-ea15ce81d8ba@google.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/arm_scmi/shmem.c b/drivers/firmware/arm_scmi/shmem.c index 517d52fb3bcb..8bf495bcad09 100644 --- a/drivers/firmware/arm_scmi/shmem.c +++ b/drivers/firmware/arm_scmi/shmem.c @@ -10,7 +10,7 @@ #include #include -#include +#include #include "common.h" From e8ef4bbe39b9576a73f104f6af743fb9c7b624ba Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 8 Jan 2024 18:50:49 +0000 Subject: [PATCH 114/768] firmware: arm_scmi: Use xa_insert() to store opps When storing opps by level or index use xa_insert() instead of xa_store() and add error-checking to spot bad duplicates indexes possibly wrongly provided by the platform firmware. Fixes: 31c7c1397a33 ("firmware: arm_scmi: Add v3.2 perf level indexing mode support") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20240108185050.1628687-1-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/perf.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index 8ea2a7b3d35d..211e8e0aef2c 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -350,8 +350,8 @@ process_response_opp(struct scmi_opp *opp, unsigned int loop_idx, } static inline void -process_response_opp_v4(struct perf_dom_info *dom, struct scmi_opp *opp, - unsigned int loop_idx, +process_response_opp_v4(struct device *dev, struct perf_dom_info *dom, + struct scmi_opp *opp, unsigned int loop_idx, const struct scmi_msg_resp_perf_describe_levels_v4 *r) { opp->perf = le32_to_cpu(r->opp[loop_idx].perf_val); @@ -362,10 +362,23 @@ process_response_opp_v4(struct perf_dom_info *dom, struct scmi_opp *opp, /* Note that PERF v4 reports always five 32-bit words */ opp->indicative_freq = le32_to_cpu(r->opp[loop_idx].indicative_freq); if (dom->level_indexing_mode) { + int ret; + opp->level_index = le32_to_cpu(r->opp[loop_idx].level_index); - xa_store(&dom->opps_by_idx, opp->level_index, opp, GFP_KERNEL); - xa_store(&dom->opps_by_lvl, opp->perf, opp, GFP_KERNEL); + ret = xa_insert(&dom->opps_by_idx, opp->level_index, opp, + GFP_KERNEL); + if (ret) + dev_warn(dev, + "Failed to add opps_by_idx at %d - ret:%d\n", + opp->level_index, ret); + + ret = xa_insert(&dom->opps_by_lvl, opp->perf, opp, GFP_KERNEL); + if (ret) + dev_warn(dev, + "Failed to add opps_by_lvl at %d - ret:%d\n", + opp->perf, ret); + hash_add(dom->opps_by_freq, &opp->hash, opp->indicative_freq); } } @@ -382,7 +395,7 @@ iter_perf_levels_process_response(const struct scmi_protocol_handle *ph, if (PROTOCOL_REV_MAJOR(p->version) <= 0x3) process_response_opp(opp, st->loop_idx, response); else - process_response_opp_v4(p->perf_dom, opp, st->loop_idx, + process_response_opp_v4(ph->dev, p->perf_dom, opp, st->loop_idx, response); p->perf_dom->opp_count++; From b5dc0ffd36560dbadaed9a3d9fd7838055d62d74 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 8 Jan 2024 18:50:50 +0000 Subject: [PATCH 115/768] firmware: arm_scmi: Use xa_insert() when saving raw queues Use xa_insert() when saving per-channel raw queues to better check for duplicates. Fixes: 7860701d1e6e ("firmware: arm_scmi: Add per-channel raw injection support") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20240108185050.1628687-2-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/raw_mode.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/arm_scmi/raw_mode.c b/drivers/firmware/arm_scmi/raw_mode.c index 0493aa3c12bf..350573518503 100644 --- a/drivers/firmware/arm_scmi/raw_mode.c +++ b/drivers/firmware/arm_scmi/raw_mode.c @@ -1111,7 +1111,6 @@ static int scmi_raw_mode_setup(struct scmi_raw_mode_info *raw, int i; for (i = 0; i < num_chans; i++) { - void *xret; struct scmi_raw_queue *q; q = scmi_raw_queue_init(raw); @@ -1120,13 +1119,12 @@ static int scmi_raw_mode_setup(struct scmi_raw_mode_info *raw, goto err_xa; } - xret = xa_store(&raw->chans_q, channels[i], q, + ret = xa_insert(&raw->chans_q, channels[i], q, GFP_KERNEL); - if (xa_err(xret)) { + if (ret) { dev_err(dev, "Fail to allocate Raw queue 0x%02X\n", channels[i]); - ret = xa_err(xret); goto err_xa; } } @@ -1322,6 +1320,12 @@ void scmi_raw_message_report(void *r, struct scmi_xfer *xfer, dev = raw->handle->dev; q = scmi_raw_queue_select(raw, idx, SCMI_XFER_IS_CHAN_SET(xfer) ? chan_id : 0); + if (!q) { + dev_warn(dev, + "RAW[%d] - NO queue for chan 0x%X. Dropping report.\n", + idx, chan_id); + return; + } /* * Grab the msg_q_lock upfront to avoid a possible race between From 27600c96e2ffa6c1b2cb378ddc75c6620c628d04 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 9 Jan 2024 15:01:06 +0000 Subject: [PATCH 116/768] firmware: arm_scmi: Fix the clock protocol version for v3.2 The clock protocol version as per the SCMI v3.2 specification is 0x30000. Enable the v3.0 clock protocol features only when clock protocol version equals 0x30000. The previous beta version of the spec had this value set to 0x20001 and th same value trickled down from the initial development. The version update were missed in the driver. Fixes: e49e314a2cf7 ("firmware: arm_scmi: Add clock v3.2 CONFIG_SET support") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20240109150106.2066739-1-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/clock.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c index c0644558042a..d6357513163c 100644 --- a/drivers/firmware/arm_scmi/clock.c +++ b/drivers/firmware/arm_scmi/clock.c @@ -954,8 +954,7 @@ static int scmi_clock_protocol_init(const struct scmi_protocol_handle *ph) scmi_clock_describe_rates_get(ph, clkid, clk); } - if (PROTOCOL_REV_MAJOR(version) >= 0x2 && - PROTOCOL_REV_MINOR(version) >= 0x1) { + if (PROTOCOL_REV_MAJOR(version) >= 0x3) { cinfo->clock_config_set = scmi_clock_config_set_v2; cinfo->clock_config_get = scmi_clock_config_get_v2; } else { From 6bd1b3fede83d8ba5314886062a9bfdada5102a9 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 9 Jan 2024 18:17:16 +0000 Subject: [PATCH 117/768] firmware: arm_scmi: Fix the clock protocol supported version Rollback currently supported SCMI clock protocol version to v2.0 since some of the mandatory v3.0 features are indeed still not supported yet. Fixes: b5efc28a754d ("firmware: arm_scmi: Add protocol versioning checks") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20240109181716.2338636-1-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c index d6357513163c..e2050adbf85c 100644 --- a/drivers/firmware/arm_scmi/clock.c +++ b/drivers/firmware/arm_scmi/clock.c @@ -13,7 +13,7 @@ #include "notify.h" /* Updated only after ALL the mandatory features for that version are merged */ -#define SCMI_PROTOCOL_SUPPORTED_VERSION 0x20001 +#define SCMI_PROTOCOL_SUPPORTED_VERSION 0x20000 enum scmi_clock_protocol_cmd { CLOCK_ATTRIBUTES = 0x3, From 59b2e242b13192e50bf47df3780bf8a7e2260e98 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 8 Jan 2024 12:34:11 +0000 Subject: [PATCH 118/768] firmware: arm_ffa: Add missing rwlock_init() in ffa_setup_partitions() Add the missing rwlock initialization for the individual FF-A partition information in ffa_setup_partitions(). Fixes: 0184450b8b1e ("firmware: arm_ffa: Add schedule receiver callback mechanism") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20240108-ffa_fixes_6-8-v1-1-75bf7035bc50@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 6146b2927d5c..ed1d6a24934e 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -1226,6 +1226,7 @@ static void ffa_setup_partitions(void) ffa_device_unregister(ffa_dev); continue; } + rwlock_init(&info->rw_lock); xa_store(&drv_info->partition_info, tpbuf->id, info, GFP_KERNEL); } drv_info->partition_count = count; From 5ff30ade16cd9efc2466d3ea22bbaf370772941a Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 8 Jan 2024 12:34:12 +0000 Subject: [PATCH 119/768] firmware: arm_ffa: Add missing rwlock_init() for the driver partition Add the missing rwlock initialization for the FF-A partition associated the driver in ffa_setup_partitions(). It will the primary scheduler partition in the host or the VM partition in the virtualised environment. IOW, it corresponds to the partition with VM ID == drv_info->vm_id. Fixes: 1b6bf41b7a65 ("firmware: arm_ffa: Add notification handling mechanism") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20240108-ffa_fixes_6-8-v1-2-75bf7035bc50@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index ed1d6a24934e..8df92c9521f4 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -1237,6 +1237,7 @@ static void ffa_setup_partitions(void) info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return; + rwlock_init(&info->rw_lock); xa_store(&drv_info->partition_info, drv_info->vm_id, info, GFP_KERNEL); drv_info->partition_count++; } From c00d9738fd5fce15dc5494d05b7599dce23e8146 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 8 Jan 2024 12:34:13 +0000 Subject: [PATCH 120/768] firmware: arm_ffa: Check xa_load() return value Add a check to verify the result of xa_load() during the partition lookups done while registering/unregistering the scheduler receiver interrupt callbacks and while executing the main scheduler receiver interrupt callback handler. Fixes: 0184450b8b1e ("firmware: arm_ffa: Add schedule receiver callback mechanism") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20240108-ffa_fixes_6-8-v1-3-75bf7035bc50@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 8df92c9521f4..0ea1dd6e55c4 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -733,6 +733,11 @@ static void __do_sched_recv_cb(u16 part_id, u16 vcpu, bool is_per_vcpu) void *cb_data; partition = xa_load(&drv_info->partition_info, part_id); + if (!partition) { + pr_err("%s: Invalid partition ID 0x%x\n", __func__, part_id); + return; + } + read_lock(&partition->rw_lock); callback = partition->callback; cb_data = partition->cb_data; @@ -915,6 +920,11 @@ static int ffa_sched_recv_cb_update(u16 part_id, ffa_sched_recv_cb callback, return -EOPNOTSUPP; partition = xa_load(&drv_info->partition_info, part_id); + if (!partition) { + pr_err("%s: Invalid partition ID 0x%x\n", __func__, part_id); + return -EINVAL; + } + write_lock(&partition->rw_lock); cb_valid = !!partition->callback; From ad9d9a107a4308e75ec34890547447c7095b4781 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 8 Jan 2024 12:34:14 +0000 Subject: [PATCH 121/768] firmware: arm_ffa: Simplify ffa_partitions_cleanup() On cleanup iterate the XArrays with xa_for_each() and remove the existent entries with xa_erase(), finally destroy the XArray itself. Remove partition_count field from drv_info since no more used anywhwere. Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20240108-ffa_fixes_6-8-v1-4-75bf7035bc50@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 0ea1dd6e55c4..2426021dbb58 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -107,7 +107,6 @@ struct ffa_drv_info { struct work_struct notif_pcpu_work; struct work_struct irq_work; struct xarray partition_info; - unsigned int partition_count; DECLARE_HASHTABLE(notifier_hash, ilog2(FFA_MAX_NOTIFICATIONS)); struct mutex notify_lock; /* lock to protect notifier hashtable */ }; @@ -1239,7 +1238,6 @@ static void ffa_setup_partitions(void) rwlock_init(&info->rw_lock); xa_store(&drv_info->partition_info, tpbuf->id, info, GFP_KERNEL); } - drv_info->partition_count = count; kfree(pbuf); @@ -1249,29 +1247,18 @@ static void ffa_setup_partitions(void) return; rwlock_init(&info->rw_lock); xa_store(&drv_info->partition_info, drv_info->vm_id, info, GFP_KERNEL); - drv_info->partition_count++; } static void ffa_partitions_cleanup(void) { - struct ffa_dev_part_info **info; - int idx, count = drv_info->partition_count; + struct ffa_dev_part_info *info; + unsigned long idx; - if (!count) - return; + xa_for_each(&drv_info->partition_info, idx, info) { + xa_erase(&drv_info->partition_info, idx); + kfree(info); + } - info = kcalloc(count, sizeof(*info), GFP_KERNEL); - if (!info) - return; - - xa_extract(&drv_info->partition_info, (void **)info, 0, VM_ID_MASK, - count, XA_PRESENT); - - for (idx = 0; idx < count; idx++) - kfree(info[idx]); - kfree(info); - - drv_info->partition_count = 0; xa_destroy(&drv_info->partition_info); } @@ -1547,7 +1534,6 @@ static void __exit ffa_exit(void) ffa_rxtx_unmap(drv_info->vm_id); free_pages_exact(drv_info->tx_buffer, RXTX_BUFFER_SIZE); free_pages_exact(drv_info->rx_buffer, RXTX_BUFFER_SIZE); - xa_destroy(&drv_info->partition_info); kfree(drv_info); arm_ffa_bus_exit(); } From ace760d9c0498fb226269ed34f0e86417d90f91b Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 8 Jan 2024 12:34:15 +0000 Subject: [PATCH 122/768] firmware: arm_ffa: Use xa_insert() and check for result While adding new partitions descriptors to the XArray the outcome of the stores should be checked and, in particular, it has also to be ensured that an existing entry with the same index was not already present, since partitions IDs are expected to be unique. Use xa_insert() instead of xa_store() since it returns -EBUSY when the index is already in use and log an error when that happens. Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20240108-ffa_fixes_6-8-v1-5-75bf7035bc50@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 2426021dbb58..c613b57747cf 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -1197,7 +1197,7 @@ void ffa_device_match_uuid(struct ffa_device *ffa_dev, const uuid_t *uuid) static void ffa_setup_partitions(void) { - int count, idx; + int count, idx, ret; uuid_t uuid; struct ffa_device *ffa_dev; struct ffa_dev_part_info *info; @@ -1236,7 +1236,14 @@ static void ffa_setup_partitions(void) continue; } rwlock_init(&info->rw_lock); - xa_store(&drv_info->partition_info, tpbuf->id, info, GFP_KERNEL); + ret = xa_insert(&drv_info->partition_info, tpbuf->id, + info, GFP_KERNEL); + if (ret) { + pr_err("%s: failed to save partition ID 0x%x - ret:%d\n", + __func__, tpbuf->id, ret); + ffa_device_unregister(ffa_dev); + kfree(info); + } } kfree(pbuf); @@ -1246,7 +1253,13 @@ static void ffa_setup_partitions(void) if (!info) return; rwlock_init(&info->rw_lock); - xa_store(&drv_info->partition_info, drv_info->vm_id, info, GFP_KERNEL); + ret = xa_insert(&drv_info->partition_info, drv_info->vm_id, + info, GFP_KERNEL); + if (ret) { + pr_err("%s: failed to save Host partition ID 0x%x - ret:%d. Abort.\n", + __func__, drv_info->vm_id, ret); + kfree(info); + } } static void ffa_partitions_cleanup(void) From 0c565d16b80074e57e3e56240d13fc6cd6ed0334 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 8 Jan 2024 12:34:16 +0000 Subject: [PATCH 123/768] firmware: arm_ffa: Handle partitions setup failures Make ffa_setup_partitions() fail, cleanup and return an error when the Host partition setup fails: in such a case ffa_init() itself will fail. Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20240108-ffa_fixes_6-8-v1-6-75bf7035bc50@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index c613b57747cf..f2556a8e9401 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -112,6 +112,7 @@ struct ffa_drv_info { }; static struct ffa_drv_info *drv_info; +static void ffa_partitions_cleanup(void); /* * The driver must be able to support all the versions from the earliest @@ -1195,7 +1196,7 @@ void ffa_device_match_uuid(struct ffa_device *ffa_dev, const uuid_t *uuid) kfree(pbuf); } -static void ffa_setup_partitions(void) +static int ffa_setup_partitions(void) { int count, idx, ret; uuid_t uuid; @@ -1206,7 +1207,7 @@ static void ffa_setup_partitions(void) count = ffa_partition_probe(&uuid_null, &pbuf); if (count <= 0) { pr_info("%s: No partitions found, error %d\n", __func__, count); - return; + return -EINVAL; } xa_init(&drv_info->partition_info); @@ -1250,8 +1251,14 @@ static void ffa_setup_partitions(void) /* Allocate for the host */ info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) - return; + if (!info) { + pr_err("%s: failed to alloc Host partition ID 0x%x. Abort.\n", + __func__, drv_info->vm_id); + /* Already registered devices are freed on bus_exit */ + ffa_partitions_cleanup(); + return -ENOMEM; + } + rwlock_init(&info->rw_lock); ret = xa_insert(&drv_info->partition_info, drv_info->vm_id, info, GFP_KERNEL); @@ -1259,7 +1266,11 @@ static void ffa_setup_partitions(void) pr_err("%s: failed to save Host partition ID 0x%x - ret:%d. Abort.\n", __func__, drv_info->vm_id, ret); kfree(info); + /* Already registered devices are freed on bus_exit */ + ffa_partitions_cleanup(); } + + return ret; } static void ffa_partitions_cleanup(void) @@ -1520,7 +1531,11 @@ static int __init ffa_init(void) ffa_notifications_setup(); - ffa_setup_partitions(); + ret = ffa_setup_partitions(); + if (ret) { + pr_err("failed to setup partitions\n"); + goto cleanup_notifs; + } ret = ffa_sched_recv_cb_update(drv_info->vm_id, ffa_self_notif_handle, drv_info, true); @@ -1528,6 +1543,9 @@ static int __init ffa_init(void) pr_info("Failed to register driver sched callback %d\n", ret); return 0; + +cleanup_notifs: + ffa_notifications_cleanup(); free_pages: if (drv_info->tx_buffer) free_pages_exact(drv_info->tx_buffer, RXTX_BUFFER_SIZE); From f134bd1ebc28d40010328e5b314e10575e3550e0 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Fri, 19 Jan 2024 14:32:16 +0100 Subject: [PATCH 124/768] MIPS: sgi-ip27: Fix missing prototypes Fix missing prototypes by making not shared functions static and adding others to ip27-common.h. Also drop ip27-hubio.c as it's not used for a long time. Signed-off-by: Thomas Bogendoerfer Reviewed-by: Florian Fainelli --- arch/mips/sgi-ip27/Makefile | 2 +- arch/mips/sgi-ip27/ip27-berr.c | 4 +- arch/mips/sgi-ip27/ip27-common.h | 2 + arch/mips/sgi-ip27/ip27-hubio.c | 185 ------------------------------- arch/mips/sgi-ip27/ip27-irq.c | 2 + arch/mips/sgi-ip27/ip27-memory.c | 1 + arch/mips/sgi-ip27/ip27-nmi.c | 25 ++--- 7 files changed, 17 insertions(+), 204 deletions(-) delete mode 100644 arch/mips/sgi-ip27/ip27-hubio.c diff --git a/arch/mips/sgi-ip27/Makefile b/arch/mips/sgi-ip27/Makefile index 27c14ede191e..9877fcc512b1 100644 --- a/arch/mips/sgi-ip27/Makefile +++ b/arch/mips/sgi-ip27/Makefile @@ -5,7 +5,7 @@ obj-y := ip27-berr.o ip27-irq.o ip27-init.o ip27-klconfig.o \ ip27-klnuma.o ip27-memory.o ip27-nmi.o ip27-reset.o ip27-timer.o \ - ip27-hubio.o ip27-xtalk.o + ip27-xtalk.o obj-$(CONFIG_EARLY_PRINTK) += ip27-console.o obj-$(CONFIG_SMP) += ip27-smp.o diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c index 923a63a51cda..9eb497cb5d52 100644 --- a/arch/mips/sgi-ip27/ip27-berr.c +++ b/arch/mips/sgi-ip27/ip27-berr.c @@ -22,6 +22,8 @@ #include #include +#include "ip27-common.h" + static void dump_hub_information(unsigned long errst0, unsigned long errst1) { static char *err_type[2][8] = { @@ -57,7 +59,7 @@ static void dump_hub_information(unsigned long errst0, unsigned long errst1) [st0.pi_stat0_fmt.s0_err_type] ? : "invalid"); } -int ip27_be_handler(struct pt_regs *regs, int is_fixup) +static int ip27_be_handler(struct pt_regs *regs, int is_fixup) { unsigned long errst0, errst1; int data = regs->cp0_cause & 4; diff --git a/arch/mips/sgi-ip27/ip27-common.h b/arch/mips/sgi-ip27/ip27-common.h index ed008a08464c..a0059fa13934 100644 --- a/arch/mips/sgi-ip27/ip27-common.h +++ b/arch/mips/sgi-ip27/ip27-common.h @@ -10,6 +10,7 @@ extern void hub_rt_clock_event_init(void); extern void hub_rtc_init(nasid_t nasid); extern void install_cpu_nmi_handler(int slice); extern void install_ipi(void); +extern void ip27_be_init(void); extern void ip27_reboot_setup(void); extern const struct plat_smp_ops ip27_smp_ops; extern unsigned long node_getfirstfree(nasid_t nasid); @@ -17,4 +18,5 @@ extern void per_cpu_init(void); extern void replicate_kernel_text(void); extern void setup_replication_mask(void); + #endif /* __IP27_COMMON_H */ diff --git a/arch/mips/sgi-ip27/ip27-hubio.c b/arch/mips/sgi-ip27/ip27-hubio.c deleted file mode 100644 index c57f0d8f3218..000000000000 --- a/arch/mips/sgi-ip27/ip27-hubio.c +++ /dev/null @@ -1,185 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 1992-1997, 2000-2003 Silicon Graphics, Inc. - * Copyright (C) 2004 Christoph Hellwig. - * - * Support functions for the HUB ASIC - mostly PIO mapping related. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - - -static int force_fire_and_forget = 1; - -/** - * hub_pio_map - establish a HUB PIO mapping - * - * @nasid: nasid to perform PIO mapping on - * @widget: widget ID to perform PIO mapping for - * @xtalk_addr: xtalk_address that needs to be mapped - * @size: size of the PIO mapping - * - **/ -unsigned long hub_pio_map(nasid_t nasid, xwidgetnum_t widget, - unsigned long xtalk_addr, size_t size) -{ - unsigned i; - - /* use small-window mapping if possible */ - if ((xtalk_addr % SWIN_SIZE) + size <= SWIN_SIZE) - return NODE_SWIN_BASE(nasid, widget) + (xtalk_addr % SWIN_SIZE); - - if ((xtalk_addr % BWIN_SIZE) + size > BWIN_SIZE) { - printk(KERN_WARNING "PIO mapping at hub %d widget %d addr 0x%lx" - " too big (%ld)\n", - nasid, widget, xtalk_addr, size); - return 0; - } - - xtalk_addr &= ~(BWIN_SIZE-1); - for (i = 0; i < HUB_NUM_BIG_WINDOW; i++) { - if (test_and_set_bit(i, hub_data(nasid)->h_bigwin_used)) - continue; - - /* - * The code below does a PIO write to setup an ITTE entry. - * - * We need to prevent other CPUs from seeing our updated - * memory shadow of the ITTE (in the piomap) until the ITTE - * entry is actually set up; otherwise, another CPU might - * attempt a PIO prematurely. - * - * Also, the only way we can know that an entry has been - * received by the hub and can be used by future PIO reads/ - * writes is by reading back the ITTE entry after writing it. - * - * For these two reasons, we PIO read back the ITTE entry - * after we write it. - */ - IIO_ITTE_PUT(nasid, i, HUB_PIO_MAP_TO_MEM, widget, xtalk_addr); - __raw_readq(IIO_ITTE_GET(nasid, i)); - - return NODE_BWIN_BASE(nasid, widget) + (xtalk_addr % BWIN_SIZE); - } - - printk(KERN_WARNING "unable to establish PIO mapping for at" - " hub %d widget %d addr 0x%lx\n", - nasid, widget, xtalk_addr); - return 0; -} - - -/* - * hub_setup_prb(nasid, prbnum, credits, conveyor) - * - * Put a PRB into fire-and-forget mode if conveyor isn't set. Otherwise, - * put it into conveyor belt mode with the specified number of credits. - */ -static void hub_setup_prb(nasid_t nasid, int prbnum, int credits) -{ - union iprb_u prb; - int prb_offset; - - /* - * Get the current register value. - */ - prb_offset = IIO_IOPRB(prbnum); - prb.iprb_regval = REMOTE_HUB_L(nasid, prb_offset); - - /* - * Clear out some fields. - */ - prb.iprb_ovflow = 1; - prb.iprb_bnakctr = 0; - prb.iprb_anakctr = 0; - - /* - * Enable or disable fire-and-forget mode. - */ - prb.iprb_ff = force_fire_and_forget ? 1 : 0; - - /* - * Set the appropriate number of PIO credits for the widget. - */ - prb.iprb_xtalkctr = credits; - - /* - * Store the new value to the register. - */ - REMOTE_HUB_S(nasid, prb_offset, prb.iprb_regval); -} - -/** - * hub_set_piomode - set pio mode for a given hub - * - * @nasid: physical node ID for the hub in question - * - * Put the hub into either "PIO conveyor belt" mode or "fire-and-forget" mode. - * To do this, we have to make absolutely sure that no PIOs are in progress - * so we turn off access to all widgets for the duration of the function. - * - * XXX - This code should really check what kind of widget we're talking - * to. Bridges can only handle three requests, but XG will do more. - * How many can crossbow handle to widget 0? We're assuming 1. - * - * XXX - There is a bug in the crossbow that link reset PIOs do not - * return write responses. The easiest solution to this problem is to - * leave widget 0 (xbow) in fire-and-forget mode at all times. This - * only affects pio's to xbow registers, which should be rare. - **/ -static void hub_set_piomode(nasid_t nasid) -{ - u64 ii_iowa; - union hubii_wcr_u ii_wcr; - unsigned i; - - ii_iowa = REMOTE_HUB_L(nasid, IIO_OUTWIDGET_ACCESS); - REMOTE_HUB_S(nasid, IIO_OUTWIDGET_ACCESS, 0); - - ii_wcr.wcr_reg_value = REMOTE_HUB_L(nasid, IIO_WCR); - - if (ii_wcr.iwcr_dir_con) { - /* - * Assume a bridge here. - */ - hub_setup_prb(nasid, 0, 3); - } else { - /* - * Assume a crossbow here. - */ - hub_setup_prb(nasid, 0, 1); - } - - /* - * XXX - Here's where we should take the widget type into - * when account assigning credits. - */ - for (i = HUB_WIDGET_ID_MIN; i <= HUB_WIDGET_ID_MAX; i++) - hub_setup_prb(nasid, i, 3); - - REMOTE_HUB_S(nasid, IIO_OUTWIDGET_ACCESS, ii_iowa); -} - -/* - * hub_pio_init - PIO-related hub initialization - * - * @hub: hubinfo structure for our hub - */ -void hub_pio_init(nasid_t nasid) -{ - unsigned i; - - /* initialize big window piomaps for this hub */ - bitmap_zero(hub_data(nasid)->h_bigwin_used, HUB_NUM_BIG_WINDOW); - for (i = 0; i < HUB_NUM_BIG_WINDOW; i++) - IIO_ITTE_DISABLE(nasid, i); - - hub_set_piomode(nasid); -} diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c index a0dd3bd2b81b..8f5299b269e7 100644 --- a/arch/mips/sgi-ip27/ip27-irq.c +++ b/arch/mips/sgi-ip27/ip27-irq.c @@ -23,6 +23,8 @@ #include #include +#include "ip27-common.h" + struct hub_irq_data { u64 *irq_mask[2]; cpuid_t cpu; diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index f79c48393716..b8ca94cfb4fe 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/sgi-ip27/ip27-nmi.c b/arch/mips/sgi-ip27/ip27-nmi.c index 84889b57d5ff..fc2816398d0c 100644 --- a/arch/mips/sgi-ip27/ip27-nmi.c +++ b/arch/mips/sgi-ip27/ip27-nmi.c @@ -11,6 +11,8 @@ #include #include +#include "ip27-common.h" + #if 0 #define NODE_NUM_CPUS(n) CNODE_NUM_CPUS(n) #else @@ -23,16 +25,7 @@ typedef unsigned long machreg_t; static arch_spinlock_t nmi_lock = __ARCH_SPIN_LOCK_UNLOCKED; - -/* - * Let's see what else we need to do here. Set up sp, gp? - */ -void nmi_dump(void) -{ - void cont_nmi_dump(void); - - cont_nmi_dump(); -} +static void nmi_dump(void); void install_cpu_nmi_handler(int slice) { @@ -53,7 +46,7 @@ void install_cpu_nmi_handler(int slice) * into the eframe format for the node under consideration. */ -void nmi_cpu_eframe_save(nasid_t nasid, int slice) +static void nmi_cpu_eframe_save(nasid_t nasid, int slice) { struct reg_struct *nr; int i; @@ -129,7 +122,7 @@ void nmi_cpu_eframe_save(nasid_t nasid, int slice) pr_emerg("\n"); } -void nmi_dump_hub_irq(nasid_t nasid, int slice) +static void nmi_dump_hub_irq(nasid_t nasid, int slice) { u64 mask0, mask1, pend0, pend1; @@ -153,7 +146,7 @@ void nmi_dump_hub_irq(nasid_t nasid, int slice) * Copy the cpu registers which have been saved in the IP27prom format * into the eframe format for the node under consideration. */ -void nmi_node_eframe_save(nasid_t nasid) +static void nmi_node_eframe_save(nasid_t nasid) { int slice; @@ -170,8 +163,7 @@ void nmi_node_eframe_save(nasid_t nasid) /* * Save the nmi cpu registers for all cpus in the system. */ -void -nmi_eframes_save(void) +static void nmi_eframes_save(void) { nasid_t nasid; @@ -179,8 +171,7 @@ nmi_eframes_save(void) nmi_node_eframe_save(nasid); } -void -cont_nmi_dump(void) +static void nmi_dump(void) { #ifndef REAL_NMI_SIGNAL static atomic_t nmied_cpus = ATOMIC_INIT(0); From e3a4f1b7ada8360c1838ac3aad9837749a698c7a Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Fri, 19 Jan 2024 14:36:34 +0100 Subject: [PATCH 125/768] MIPS: fw arc: Fix missing prototypes Make ArcGetMemoryDescriptor() static since it's only needed internally. Signed-off-by: Thomas Bogendoerfer Reviewed-by: Florian Fainelli --- arch/mips/fw/arc/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/fw/arc/memory.c b/arch/mips/fw/arc/memory.c index 66188739f54d..fb78e6fd5de4 100644 --- a/arch/mips/fw/arc/memory.c +++ b/arch/mips/fw/arc/memory.c @@ -37,7 +37,7 @@ static unsigned int nr_prom_mem __initdata; */ #define ARC_PAGE_SHIFT 12 -struct linux_mdesc * __init ArcGetMemoryDescriptor(struct linux_mdesc *Current) +static struct linux_mdesc * __init ArcGetMemoryDescriptor(struct linux_mdesc *Current) { return (struct linux_mdesc *) ARC_CALL1(get_mdesc, Current); } From ab58a2f319de945f631150a190653a90e307df8e Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Fri, 19 Jan 2024 14:37:57 +0100 Subject: [PATCH 126/768] MIPS: sgi-ip30: Fix missing prototypes Include needed header files. Signed-off-by: Thomas Bogendoerfer Reviewed-by: Florian Fainelli --- arch/mips/sgi-ip30/ip30-console.c | 1 + arch/mips/sgi-ip30/ip30-setup.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/mips/sgi-ip30/ip30-console.c b/arch/mips/sgi-ip30/ip30-console.c index b91f8c4fdc78..7c6dcf6e73f7 100644 --- a/arch/mips/sgi-ip30/ip30-console.c +++ b/arch/mips/sgi-ip30/ip30-console.c @@ -3,6 +3,7 @@ #include #include +#include static inline struct ioc3_uartregs *console_uart(void) { diff --git a/arch/mips/sgi-ip30/ip30-setup.c b/arch/mips/sgi-ip30/ip30-setup.c index 75a34684e704..e8547636a748 100644 --- a/arch/mips/sgi-ip30/ip30-setup.c +++ b/arch/mips/sgi-ip30/ip30-setup.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include From f64fdde9bc771d7d790e8bcc30a7e03bbe0b1a9f Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Fri, 19 Jan 2024 14:52:51 +0100 Subject: [PATCH 127/768] MIPS: sgi-ip32: Fix missing prototypes Fix interrupt function prototypes, move all prototypes into a new file ip32-common.h and include it where needed. Signed-off-by: Thomas Bogendoerfer Reviewed-by: Florian Fainelli --- arch/mips/sgi-ip32/crime.c | 6 ++++-- arch/mips/sgi-ip32/ip32-berr.c | 2 ++ arch/mips/sgi-ip32/ip32-common.h | 15 +++++++++++++++ arch/mips/sgi-ip32/ip32-irq.c | 6 ++---- arch/mips/sgi-ip32/ip32-memory.c | 1 + arch/mips/sgi-ip32/ip32-reset.c | 2 ++ arch/mips/sgi-ip32/ip32-setup.c | 3 +-- 7 files changed, 27 insertions(+), 8 deletions(-) create mode 100644 arch/mips/sgi-ip32/ip32-common.h diff --git a/arch/mips/sgi-ip32/crime.c b/arch/mips/sgi-ip32/crime.c index a8e0c776ca6c..b8a0e4cfa9ce 100644 --- a/arch/mips/sgi-ip32/crime.c +++ b/arch/mips/sgi-ip32/crime.c @@ -18,6 +18,8 @@ #include #include +#include "ip32-common.h" + struct sgi_crime __iomem *crime; struct sgi_mace __iomem *mace; @@ -39,7 +41,7 @@ void __init crime_init(void) id, rev, field, (unsigned long) CRIME_BASE); } -irqreturn_t crime_memerr_intr(unsigned int irq, void *dev_id) +irqreturn_t crime_memerr_intr(int irq, void *dev_id) { unsigned long stat, addr; int fatal = 0; @@ -90,7 +92,7 @@ irqreturn_t crime_memerr_intr(unsigned int irq, void *dev_id) return IRQ_HANDLED; } -irqreturn_t crime_cpuerr_intr(unsigned int irq, void *dev_id) +irqreturn_t crime_cpuerr_intr(int irq, void *dev_id) { unsigned long stat = crime->cpu_error_stat & CRIME_CPU_ERROR_MASK; unsigned long addr = crime->cpu_error_addr & CRIME_CPU_ERROR_ADDR_MASK; diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c index 478b63b4c808..7cbc27941f92 100644 --- a/arch/mips/sgi-ip32/ip32-berr.c +++ b/arch/mips/sgi-ip32/ip32-berr.c @@ -18,6 +18,8 @@ #include #include +#include "ip32-common.h" + static int ip32_be_handler(struct pt_regs *regs, int is_fixup) { int data = regs->cp0_cause & 4; diff --git a/arch/mips/sgi-ip32/ip32-common.h b/arch/mips/sgi-ip32/ip32-common.h new file mode 100644 index 000000000000..cfc0225b1419 --- /dev/null +++ b/arch/mips/sgi-ip32/ip32-common.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __IP32_COMMON_H +#define __IP32_COMMON_H + +#include +#include + +void __init crime_init(void); +irqreturn_t crime_memerr_intr(int irq, void *dev_id); +irqreturn_t crime_cpuerr_intr(int irq, void *dev_id); +void __init ip32_be_init(void); +void ip32_prepare_poweroff(void); + +#endif /* __IP32_COMMON_H */ diff --git a/arch/mips/sgi-ip32/ip32-irq.c b/arch/mips/sgi-ip32/ip32-irq.c index e21ea1de05e3..29d04468a06b 100644 --- a/arch/mips/sgi-ip32/ip32-irq.c +++ b/arch/mips/sgi-ip32/ip32-irq.c @@ -28,6 +28,8 @@ #include #include +#include "ip32-common.h" + /* issue a PIO read to make sure no PIO writes are pending */ static inline void flush_crime_bus(void) { @@ -107,10 +109,6 @@ static inline void flush_mace_bus(void) * is quite different anyway. */ -/* Some initial interrupts to set up */ -extern irqreturn_t crime_memerr_intr(int irq, void *dev_id); -extern irqreturn_t crime_cpuerr_intr(int irq, void *dev_id); - /* * This is for pure CRIME interrupts - ie not MACE. The advantage? * We get to split the register in half and do faster lookups. diff --git a/arch/mips/sgi-ip32/ip32-memory.c b/arch/mips/sgi-ip32/ip32-memory.c index 3fc8d0a0bdfa..5fee33744f67 100644 --- a/arch/mips/sgi-ip32/ip32-memory.c +++ b/arch/mips/sgi-ip32/ip32-memory.c @@ -15,6 +15,7 @@ #include #include #include +#include extern void crime_init(void); diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c index 18d1c115cd53..6bdc1421cda4 100644 --- a/arch/mips/sgi-ip32/ip32-reset.c +++ b/arch/mips/sgi-ip32/ip32-reset.c @@ -29,6 +29,8 @@ #include #include +#include "ip32-common.h" + #define POWERDOWN_TIMEOUT 120 /* * Blink frequency during reboot grace period and when panicked. diff --git a/arch/mips/sgi-ip32/ip32-setup.c b/arch/mips/sgi-ip32/ip32-setup.c index 8019dae1721a..aeb0805aae57 100644 --- a/arch/mips/sgi-ip32/ip32-setup.c +++ b/arch/mips/sgi-ip32/ip32-setup.c @@ -26,8 +26,7 @@ #include #include -extern void ip32_be_init(void); -extern void crime_init(void); +#include "ip32-common.h" #ifdef CONFIG_SGI_O2MACE_ETH /* From e4cec073b7755a78030f30cf627141c759035b50 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 20 Jan 2024 23:00:21 -0800 Subject: [PATCH 128/768] dmaengine: at_hdmac: fix some kernel-doc warnings Fix some kernel-doc format warnings: at_hdmac.c:243: warning: Excess struct member 'sg_len' description in 'at_desc' at_hdmac.c:252: warning: cannot understand function prototype: 'enum atc_status ' ez at_hdmac.c:351: warning: Excess struct member 'atdma_devtype' description in 'at_dma' at_hdmac.c:351: warning: Excess struct member 'ch_regs' description in 'at_dma' at_hdmac.c:664: warning: contents before sections Signed-off-by: Randy Dunlap Cc: Ludovic Desroches Cc: Tudor Ambarus Cc: linux-arm-kernel@lists.infradead.org Cc: Vinod Koul Cc: dmaengine@vger.kernel.org Link: https://lore.kernel.org/r/20240121070021.25365-1-rdunlap@infradead.org Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index fb89ecbf0cc5..6bad536e0492 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -222,7 +222,7 @@ struct atdma_sg { * @vd: pointer to the virtual dma descriptor. * @atchan: pointer to the atmel dma channel. * @total_len: total transaction byte count - * @sg_len: number of sg entries. + * @sglen: number of sg entries. * @sg: array of sgs. */ struct at_desc { @@ -245,7 +245,7 @@ struct at_desc { /*-- Channels --------------------------------------------------------*/ /** - * atc_status - information bits stored in channel status flag + * enum atc_status - information bits stored in channel status flag * * Manipulated with atomic operations. */ @@ -328,8 +328,7 @@ static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width) /** * struct at_dma - internal representation of an Atmel HDMA Controller * @dma_device: dmaengine dma_device object members - * @atdma_devtype: identifier of DMA controller compatibility - * @ch_regs: memory mapped register base + * @regs: memory mapped register base * @clk: dma controller clock * @save_imr: interrupt mask register that is saved on suspend/resume cycle * @all_chan_mask: all channels availlable in a mask @@ -626,6 +625,9 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla) /** * atc_get_llis_residue - Get residue for a hardware linked list transfer + * @atchan: pointer to an atmel hdmac channel. + * @desc: pointer to the descriptor for which the residue is calculated. + * @residue: residue to be set to dma_tx_state. * * Calculate the residue by removing the length of the Linked List Item (LLI) * already transferred from the total length. To get the current LLI we can use @@ -661,10 +663,8 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla) * two DSCR values are different, we read again the CTRLA then the DSCR till two * consecutive read values from DSCR are equal or till the maximum trials is * reach. This algorithm is very unlikely not to find a stable value for DSCR. - * @atchan: pointer to an atmel hdmac channel. - * @desc: pointer to the descriptor for which the residue is calculated. - * @residue: residue to be set to dma_tx_state. - * Returns 0 on success, -errno otherwise. + * + * Returns: %0 on success, -errno otherwise. */ static int atc_get_llis_residue(struct at_dma_chan *atchan, struct at_desc *desc, u32 *residue) @@ -731,7 +731,8 @@ static int atc_get_llis_residue(struct at_dma_chan *atchan, * @chan: DMA channel * @cookie: transaction identifier to check status of * @residue: residue to be updated. - * Return 0 on success, -errono otherwise. + * + * Return: %0 on success, -errno otherwise. */ static int atc_get_residue(struct dma_chan *chan, dma_cookie_t cookie, u32 *residue) @@ -1710,7 +1711,7 @@ static void atc_issue_pending(struct dma_chan *chan) * atc_alloc_chan_resources - allocate resources for DMA channel * @chan: allocate descriptor resources for this channel * - * return - the number of allocated descriptors + * Return: the number of allocated descriptors */ static int atc_alloc_chan_resources(struct dma_chan *chan) { From 6ba7843b59b77360812617d071313c7f35f3757a Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 3 Jan 2024 20:27:04 +0100 Subject: [PATCH 129/768] platform/x86: wmi: Fix error handling in legacy WMI notify handler functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When wmi_install_notify_handler()/wmi_remove_notify_handler() are unable to enable/disable the WMI device, they unconditionally return an error to the caller. When registering legacy WMI notify handlers, this means that the callback remains registered despite wmi_install_notify_handler() having returned an error. When removing legacy WMI notify handlers, this means that the callback is removed despite wmi_remove_notify_handler() having returned an error. Fix this by only warning when the WMI device could not be enabled. This behaviour matches the bus-based WMI interface. Tested on a Dell Inspiron 3505 and a Acer Aspire E1-731. Fixes: 58f6425eb92f ("WMI: Cater for multiple events with same GUID") Signed-off-by: Armin Wolf Reviewed-by: Ilpo Järvinen Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20240103192707.115512-2-W_Armin@gmx.de Signed-off-by: Hans de Goede --- drivers/platform/x86/wmi.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index bd271a5730aa..23b4043c0fe7 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -593,9 +593,10 @@ acpi_status wmi_install_notify_handler(const char *guid, block->handler_data = data; wmi_status = wmi_method_enable(block, true); - if ((wmi_status != AE_OK) || - ((wmi_status == AE_OK) && (status == AE_NOT_EXIST))) - status = wmi_status; + if (ACPI_FAILURE(wmi_status)) + dev_warn(&block->dev.dev, "Failed to enable device\n"); + + status = AE_OK; } } @@ -631,10 +632,13 @@ acpi_status wmi_remove_notify_handler(const char *guid) return AE_NULL_ENTRY; wmi_status = wmi_method_enable(block, false); + if (ACPI_FAILURE(wmi_status)) + dev_warn(&block->dev.dev, "Failed to disable device\n"); + block->handler = NULL; block->handler_data = NULL; - if (wmi_status != AE_OK || (wmi_status == AE_OK && status == AE_NOT_EXIST)) - status = wmi_status; + + status = AE_OK; } } From 3d8a29fec2cb96b3aa75a595f20c4b73ff294a97 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 3 Jan 2024 20:27:05 +0100 Subject: [PATCH 130/768] platform/x86: wmi: Return immediately if an suitable WMI event is found MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 58f6425eb92f ("WMI: Cater for multiple events with same GUID") allowed legacy WMI notify handlers to be installed for multiple WMI devices with the same GUID. However this is useless since the legacy GUID-based interface is blacklisted from seeing WMI devices with duplicated GUIDs. Return immediately if a suitable WMI event is found in wmi_install/remove_notify_handler() since searching for other suitable events is pointless. Tested on a Dell Inspiron 3505 and a Acer Aspire E1-731. Signed-off-by: Armin Wolf Reviewed-by: Ilpo Järvinen Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20240103192707.115512-3-W_Armin@gmx.de Signed-off-by: Hans de Goede --- drivers/platform/x86/wmi.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 23b4043c0fe7..4e2d0e15b155 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -573,7 +573,6 @@ acpi_status wmi_install_notify_handler(const char *guid, void *data) { struct wmi_block *block; - acpi_status status = AE_NOT_EXIST; guid_t guid_input; if (!guid || !handler) @@ -596,11 +595,11 @@ acpi_status wmi_install_notify_handler(const char *guid, if (ACPI_FAILURE(wmi_status)) dev_warn(&block->dev.dev, "Failed to enable device\n"); - status = AE_OK; + return AE_OK; } } - return status; + return AE_NOT_EXIST; } EXPORT_SYMBOL_GPL(wmi_install_notify_handler); @@ -615,7 +614,6 @@ EXPORT_SYMBOL_GPL(wmi_install_notify_handler); acpi_status wmi_remove_notify_handler(const char *guid) { struct wmi_block *block; - acpi_status status = AE_NOT_EXIST; guid_t guid_input; if (!guid) @@ -638,11 +636,11 @@ acpi_status wmi_remove_notify_handler(const char *guid) block->handler = NULL; block->handler_data = NULL; - status = AE_OK; + return AE_OK; } } - return status; + return AE_NOT_EXIST; } EXPORT_SYMBOL_GPL(wmi_remove_notify_handler); From 3ea7f59af8ffa17ce5f5173d6f4bfbc73334187d Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 3 Jan 2024 20:27:06 +0100 Subject: [PATCH 131/768] platform/x86: wmi: Decouple legacy WMI notify handlers from wmi_block_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Until now, legacy WMI notify handler functions where using the wmi_block_list, which did no refcounting on the returned WMI device. This meant that the WMI device could disappear at any moment, potentially leading to various errors. Fix this by using bus_find_device() which returns an actual reference to the found WMI device. Tested on a Dell Inspiron 3505 and a Acer Aspire E1-731. Signed-off-by: Armin Wolf Reviewed-by: Ilpo Järvinen Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20240103192707.115512-4-W_Armin@gmx.de Signed-off-by: Hans de Goede --- drivers/platform/x86/wmi.c | 118 +++++++++++++++++++++---------------- 1 file changed, 68 insertions(+), 50 deletions(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 4e2d0e15b155..699157ce347c 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -219,6 +219,17 @@ static int wmidev_match_guid(struct device *dev, const void *data) return 0; } +static int wmidev_match_notify_id(struct device *dev, const void *data) +{ + struct wmi_block *wblock = dev_to_wblock(dev); + const u32 *notify_id = data; + + if (wblock->gblock.flags & ACPI_WMI_EVENT && wblock->gblock.notify_id == *notify_id) + return 1; + + return 0; +} + static struct bus_type wmi_bus_type; static struct wmi_device *wmi_find_device_by_guid(const char *guid_string) @@ -238,6 +249,17 @@ static struct wmi_device *wmi_find_device_by_guid(const char *guid_string) return dev_to_wdev(dev); } +static struct wmi_device *wmi_find_event_by_notify_id(const u32 notify_id) +{ + struct device *dev; + + dev = bus_find_device(&wmi_bus_type, NULL, ¬ify_id, wmidev_match_notify_id); + if (!dev) + return ERR_PTR(-ENODEV); + + return to_wmi_device(dev); +} + static void wmi_device_put(struct wmi_device *wdev) { put_device(&wdev->dev); @@ -572,34 +594,30 @@ acpi_status wmi_install_notify_handler(const char *guid, wmi_notify_handler handler, void *data) { - struct wmi_block *block; - guid_t guid_input; + struct wmi_block *wblock; + struct wmi_device *wdev; + acpi_status status; - if (!guid || !handler) - return AE_BAD_PARAMETER; + wdev = wmi_find_device_by_guid(guid); + if (IS_ERR(wdev)) + return AE_ERROR; - if (guid_parse(guid, &guid_input)) - return AE_BAD_PARAMETER; + wblock = container_of(wdev, struct wmi_block, dev); + if (wblock->handler) { + status = AE_ALREADY_ACQUIRED; + } else { + wblock->handler = handler; + wblock->handler_data = data; - list_for_each_entry(block, &wmi_block_list, list) { - acpi_status wmi_status; + if (ACPI_FAILURE(wmi_method_enable(wblock, true))) + dev_warn(&wblock->dev.dev, "Failed to enable device\n"); - if (guid_equal(&block->gblock.guid, &guid_input)) { - if (block->handler) - return AE_ALREADY_ACQUIRED; - - block->handler = handler; - block->handler_data = data; - - wmi_status = wmi_method_enable(block, true); - if (ACPI_FAILURE(wmi_status)) - dev_warn(&block->dev.dev, "Failed to enable device\n"); - - return AE_OK; - } + status = AE_OK; } - return AE_NOT_EXIST; + wmi_device_put(wdev); + + return status; } EXPORT_SYMBOL_GPL(wmi_install_notify_handler); @@ -613,34 +631,30 @@ EXPORT_SYMBOL_GPL(wmi_install_notify_handler); */ acpi_status wmi_remove_notify_handler(const char *guid) { - struct wmi_block *block; - guid_t guid_input; + struct wmi_block *wblock; + struct wmi_device *wdev; + acpi_status status; - if (!guid) - return AE_BAD_PARAMETER; + wdev = wmi_find_device_by_guid(guid); + if (IS_ERR(wdev)) + return AE_ERROR; - if (guid_parse(guid, &guid_input)) - return AE_BAD_PARAMETER; + wblock = container_of(wdev, struct wmi_block, dev); + if (!wblock->handler) { + status = AE_NULL_ENTRY; + } else { + if (ACPI_FAILURE(wmi_method_enable(wblock, false))) + dev_warn(&wblock->dev.dev, "Failed to disable device\n"); - list_for_each_entry(block, &wmi_block_list, list) { - acpi_status wmi_status; + wblock->handler = NULL; + wblock->handler_data = NULL; - if (guid_equal(&block->gblock.guid, &guid_input)) { - if (!block->handler) - return AE_NULL_ENTRY; - - wmi_status = wmi_method_enable(block, false); - if (ACPI_FAILURE(wmi_status)) - dev_warn(&block->dev.dev, "Failed to disable device\n"); - - block->handler = NULL; - block->handler_data = NULL; - - return AE_OK; - } + status = AE_OK; } - return AE_NOT_EXIST; + wmi_device_put(wdev); + + return status; } EXPORT_SYMBOL_GPL(wmi_remove_notify_handler); @@ -657,15 +671,19 @@ EXPORT_SYMBOL_GPL(wmi_remove_notify_handler); acpi_status wmi_get_event_data(u32 event, struct acpi_buffer *out) { struct wmi_block *wblock; + struct wmi_device *wdev; + acpi_status status; - list_for_each_entry(wblock, &wmi_block_list, list) { - struct guid_block *gblock = &wblock->gblock; + wdev = wmi_find_event_by_notify_id(event); + if (IS_ERR(wdev)) + return AE_NOT_FOUND; - if ((gblock->flags & ACPI_WMI_EVENT) && gblock->notify_id == event) - return get_event_data(wblock, out); - } + wblock = container_of(wdev, struct wmi_block, dev); + status = get_event_data(wblock, out); - return AE_NOT_FOUND; + wmi_device_put(wdev); + + return status; } EXPORT_SYMBOL_GPL(wmi_get_event_data); From 29e473f4b51ee56b5808323e274a8369b4d181cb Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 3 Jan 2024 20:27:07 +0100 Subject: [PATCH 132/768] platform/x86: wmi: Fix notify callback locking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an legacy WMI event handler is removed, an WMI event could have called the handler just before it was removed, meaning the handler could still be running after wmi_remove_notify_handler() returns. Something similar could also happens when using the WMI bus, as the WMI core might still call the notify() callback from an WMI driver even if its remove() callback was just called. Fix this by introducing a rw semaphore which ensures that the event state of a WMI device does not change while the WMI core is handling an event for it. Tested on a Dell Inspiron 3505 and a Acer Aspire E1-731. Fixes: 1686f5444546 ("platform/x86: wmi: Incorporate acpi_install_notify_handler") Signed-off-by: Armin Wolf Reviewed-by: Ilpo Järvinen Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20240103192707.115512-5-W_Armin@gmx.de Signed-off-by: Hans de Goede --- drivers/platform/x86/wmi.c | 71 +++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 24 deletions(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 699157ce347c..d1df1a31de00 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -56,7 +57,6 @@ static_assert(__alignof__(struct guid_block) == 1); enum { /* wmi_block flags */ WMI_READ_TAKES_NO_ARGS, - WMI_PROBED, }; struct wmi_block { @@ -64,8 +64,10 @@ struct wmi_block { struct list_head list; struct guid_block gblock; struct acpi_device *acpi_device; + struct rw_semaphore notify_lock; /* Protects notify callback add/remove */ wmi_notify_handler handler; void *handler_data; + bool driver_ready; unsigned long flags; }; @@ -603,6 +605,8 @@ acpi_status wmi_install_notify_handler(const char *guid, return AE_ERROR; wblock = container_of(wdev, struct wmi_block, dev); + + down_write(&wblock->notify_lock); if (wblock->handler) { status = AE_ALREADY_ACQUIRED; } else { @@ -614,6 +618,7 @@ acpi_status wmi_install_notify_handler(const char *guid, status = AE_OK; } + up_write(&wblock->notify_lock); wmi_device_put(wdev); @@ -640,6 +645,8 @@ acpi_status wmi_remove_notify_handler(const char *guid) return AE_ERROR; wblock = container_of(wdev, struct wmi_block, dev); + + down_write(&wblock->notify_lock); if (!wblock->handler) { status = AE_NULL_ENTRY; } else { @@ -651,6 +658,7 @@ acpi_status wmi_remove_notify_handler(const char *guid) status = AE_OK; } + up_write(&wblock->notify_lock); wmi_device_put(wdev); @@ -896,7 +904,9 @@ static int wmi_dev_probe(struct device *dev) } } - set_bit(WMI_PROBED, &wblock->flags); + down_write(&wblock->notify_lock); + wblock->driver_ready = true; + up_write(&wblock->notify_lock); return 0; } @@ -906,7 +916,9 @@ static void wmi_dev_remove(struct device *dev) struct wmi_block *wblock = dev_to_wblock(dev); struct wmi_driver *wdriver = drv_to_wdrv(dev->driver); - clear_bit(WMI_PROBED, &wblock->flags); + down_write(&wblock->notify_lock); + wblock->driver_ready = false; + up_write(&wblock->notify_lock); if (wdriver->remove) wdriver->remove(dev_to_wdev(dev)); @@ -1019,6 +1031,8 @@ static int wmi_create_device(struct device *wmi_bus_dev, wblock->dev.setable = true; out_init: + init_rwsem(&wblock->notify_lock); + wblock->driver_ready = false; wblock->dev.dev.bus = &wmi_bus_type; wblock->dev.dev.parent = wmi_bus_dev; @@ -1191,6 +1205,26 @@ acpi_wmi_ec_space_handler(u32 function, acpi_physical_address address, } } +static void wmi_notify_driver(struct wmi_block *wblock) +{ + struct wmi_driver *driver = drv_to_wdrv(wblock->dev.dev.driver); + struct acpi_buffer data = { ACPI_ALLOCATE_BUFFER, NULL }; + acpi_status status; + + if (!driver->no_notify_data) { + status = get_event_data(wblock, &data); + if (ACPI_FAILURE(status)) { + dev_warn(&wblock->dev.dev, "Failed to get event data\n"); + return; + } + } + + if (driver->notify) + driver->notify(&wblock->dev, data.pointer); + + kfree(data.pointer); +} + static int wmi_notify_device(struct device *dev, void *data) { struct wmi_block *wblock = dev_to_wblock(dev); @@ -1199,28 +1233,17 @@ static int wmi_notify_device(struct device *dev, void *data) if (!(wblock->gblock.flags & ACPI_WMI_EVENT && wblock->gblock.notify_id == *event)) return 0; - /* If a driver is bound, then notify the driver. */ - if (test_bit(WMI_PROBED, &wblock->flags) && wblock->dev.dev.driver) { - struct wmi_driver *driver = drv_to_wdrv(wblock->dev.dev.driver); - struct acpi_buffer evdata = { ACPI_ALLOCATE_BUFFER, NULL }; - acpi_status status; - - if (!driver->no_notify_data) { - status = get_event_data(wblock, &evdata); - if (ACPI_FAILURE(status)) { - dev_warn(&wblock->dev.dev, "failed to get event data\n"); - return -EIO; - } - } - - if (driver->notify) - driver->notify(&wblock->dev, evdata.pointer); - - kfree(evdata.pointer); - } else if (wblock->handler) { - /* Legacy handler */ - wblock->handler(*event, wblock->handler_data); + down_read(&wblock->notify_lock); + /* The WMI driver notify handler conflicts with the legacy WMI handler. + * Because of this the WMI driver notify handler takes precedence. + */ + if (wblock->dev.dev.driver && wblock->driver_ready) { + wmi_notify_driver(wblock); + } else { + if (wblock->handler) + wblock->handler(*event, wblock->handler_data); } + up_read(&wblock->notify_lock); acpi_bus_generate_netlink_event(wblock->acpi_device->pnp.device_class, dev_name(&wblock->dev.dev), *event, 0); From b73e43dcd7a8be26880ef8ff336053b29e79dbc5 Mon Sep 17 00:00:00 2001 From: Guanhua Gao Date: Thu, 18 Jan 2024 11:29:16 -0500 Subject: [PATCH 133/768] dmaengine: fsl-dpaa2-qdma: Fix the size of dma pools In case of long format of qDMA command descriptor, there are one frame descriptor, three entries in the frame list and two data entries. So the size of dma_pool_create for these three fields should be the same with the total size of entries respectively, or the contents may be overwritten by the next allocated descriptor. Fixes: 7fdf9b05c73b ("dmaengine: fsl-dpaa2-qdma: Add NXP dpaa2 qDMA controller driver for Layerscape SoCs") Signed-off-by: Guanhua Gao Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240118162917.2951450-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c index 7958ac33e36c..5a8061a307cd 100644 --- a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c +++ b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c @@ -38,15 +38,17 @@ static int dpaa2_qdma_alloc_chan_resources(struct dma_chan *chan) if (!dpaa2_chan->fd_pool) goto err; - dpaa2_chan->fl_pool = dma_pool_create("fl_pool", dev, - sizeof(struct dpaa2_fl_entry), - sizeof(struct dpaa2_fl_entry), 0); + dpaa2_chan->fl_pool = + dma_pool_create("fl_pool", dev, + sizeof(struct dpaa2_fl_entry) * 3, + sizeof(struct dpaa2_fl_entry), 0); + if (!dpaa2_chan->fl_pool) goto err_fd; dpaa2_chan->sdd_pool = dma_pool_create("sdd_pool", dev, - sizeof(struct dpaa2_qdma_sd_d), + sizeof(struct dpaa2_qdma_sd_d) * 2, sizeof(struct dpaa2_qdma_sd_d), 0); if (!dpaa2_chan->sdd_pool) goto err_fl; From 8446f9d11678bc268897882e86733d73204f83c4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 5 Jan 2024 16:47:54 +0300 Subject: [PATCH 134/768] platform/x86: wmi: Fix wmi_dev_probe() This has a reversed if statement so it accidentally disables the wmi method before returning. Fixes: 704af3a40747 ("platform/x86: wmi: Remove chardev interface") Signed-off-by: Dan Carpenter Reviewed-by: Armin Wolf Link: https://lore.kernel.org/r/9c81251b-bc87-4ca3-bb86-843dc85e5145@moroto.mountain Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index d1df1a31de00..3c288e8f404b 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -896,7 +896,7 @@ static int wmi_dev_probe(struct device *dev) if (wdriver->probe) { ret = wdriver->probe(dev_to_wdev(dev), find_guid_context(wblock, wdriver)); - if (!ret) { + if (ret) { if (ACPI_FAILURE(wmi_method_enable(wblock, false))) dev_warn(dev, "Failed to disable device\n"); From 416de0246f35f43d871a57939671fe814f4455ee Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 4 Jan 2024 15:59:03 -0700 Subject: [PATCH 135/768] platform/x86: intel-uncore-freq: Fix types in sysfs callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When booting a kernel with CONFIG_CFI_CLANG, there is a CFI failure when accessing any of the values under /sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00: $ cat /sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/max_freq_khz fish: Job 1, 'cat /sys/devices/system/cpu/int…' terminated by signal SIGSEGV (Address boundary error) $ sudo dmesg &| grep 'CFI failure' [ 170.953925] CFI failure at kobj_attr_show+0x19/0x30 (target: show_max_freq_khz+0x0/0xc0 [intel_uncore_frequency_common]; expected type: 0xd34078c5 The sysfs callback functions such as show_domain_id() are written as if they are going to be called by dev_attr_show() but as the above message shows, they are instead called by kobj_attr_show(). kCFI checks that the destination of an indirect jump has the exact same type as the prototype of the function pointer it is called through and fails when they do not. These callbacks are called through kobj_attr_show() because uncore_root_kobj was initialized with kobject_create_and_add(), which means uncore_root_kobj has a ->sysfs_ops of kobj_sysfs_ops from kobject_create(), which uses kobj_attr_show() as its ->show() value. The only reason there has not been a more noticeable problem until this point is that 'struct kobj_attribute' and 'struct device_attribute' have the same layout, so getting the callback from container_of() works the same with either value. Change all the callbacks and their uses to be compatible with kobj_attr_show() and kobj_attr_store(), which resolves the kCFI failure and allows the sysfs files to work properly. Closes: https://github.com/ClangBuiltLinux/linux/issues/1974 Fixes: ae7b2ce57851 ("platform/x86/intel/uncore-freq: Use sysfs API to create attributes") Cc: stable@vger.kernel.org Signed-off-by: Nathan Chancellor Reviewed-by: Sami Tolvanen Acked-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20240104-intel-uncore-freq-kcfi-fix-v1-1-bf1e8939af40@kernel.org Signed-off-by: Hans de Goede --- .../uncore-frequency-common.c | 82 +++++++++---------- .../uncore-frequency-common.h | 32 ++++---- 2 files changed, 57 insertions(+), 57 deletions(-) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c index 33ab207493e3..33bb58dc3f78 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c @@ -23,23 +23,23 @@ static int (*uncore_read)(struct uncore_data *data, unsigned int *min, unsigned static int (*uncore_write)(struct uncore_data *data, unsigned int input, unsigned int min_max); static int (*uncore_read_freq)(struct uncore_data *data, unsigned int *freq); -static ssize_t show_domain_id(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t show_domain_id(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - struct uncore_data *data = container_of(attr, struct uncore_data, domain_id_dev_attr); + struct uncore_data *data = container_of(attr, struct uncore_data, domain_id_kobj_attr); return sprintf(buf, "%u\n", data->domain_id); } -static ssize_t show_fabric_cluster_id(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t show_fabric_cluster_id(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - struct uncore_data *data = container_of(attr, struct uncore_data, fabric_cluster_id_dev_attr); + struct uncore_data *data = container_of(attr, struct uncore_data, fabric_cluster_id_kobj_attr); return sprintf(buf, "%u\n", data->cluster_id); } -static ssize_t show_package_id(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t show_package_id(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - struct uncore_data *data = container_of(attr, struct uncore_data, package_id_dev_attr); + struct uncore_data *data = container_of(attr, struct uncore_data, package_id_kobj_attr); return sprintf(buf, "%u\n", data->package_id); } @@ -97,30 +97,30 @@ static ssize_t show_perf_status_freq_khz(struct uncore_data *data, char *buf) } #define store_uncore_min_max(name, min_max) \ - static ssize_t store_##name(struct device *dev, \ - struct device_attribute *attr, \ + static ssize_t store_##name(struct kobject *kobj, \ + struct kobj_attribute *attr, \ const char *buf, size_t count) \ { \ - struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\ + struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\ \ return store_min_max_freq_khz(data, buf, count, \ min_max); \ } #define show_uncore_min_max(name, min_max) \ - static ssize_t show_##name(struct device *dev, \ - struct device_attribute *attr, char *buf)\ + static ssize_t show_##name(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf)\ { \ - struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\ + struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\ \ return show_min_max_freq_khz(data, buf, min_max); \ } #define show_uncore_perf_status(name) \ - static ssize_t show_##name(struct device *dev, \ - struct device_attribute *attr, char *buf)\ + static ssize_t show_##name(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf)\ { \ - struct uncore_data *data = container_of(attr, struct uncore_data, name##_dev_attr);\ + struct uncore_data *data = container_of(attr, struct uncore_data, name##_kobj_attr);\ \ return show_perf_status_freq_khz(data, buf); \ } @@ -134,11 +134,11 @@ show_uncore_min_max(max_freq_khz, 1); show_uncore_perf_status(current_freq_khz); #define show_uncore_data(member_name) \ - static ssize_t show_##member_name(struct device *dev, \ - struct device_attribute *attr, char *buf)\ + static ssize_t show_##member_name(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf)\ { \ struct uncore_data *data = container_of(attr, struct uncore_data,\ - member_name##_dev_attr);\ + member_name##_kobj_attr);\ \ return sysfs_emit(buf, "%u\n", \ data->member_name); \ @@ -149,29 +149,29 @@ show_uncore_data(initial_max_freq_khz); #define init_attribute_rw(_name) \ do { \ - sysfs_attr_init(&data->_name##_dev_attr.attr); \ - data->_name##_dev_attr.show = show_##_name; \ - data->_name##_dev_attr.store = store_##_name; \ - data->_name##_dev_attr.attr.name = #_name; \ - data->_name##_dev_attr.attr.mode = 0644; \ + sysfs_attr_init(&data->_name##_kobj_attr.attr); \ + data->_name##_kobj_attr.show = show_##_name; \ + data->_name##_kobj_attr.store = store_##_name; \ + data->_name##_kobj_attr.attr.name = #_name; \ + data->_name##_kobj_attr.attr.mode = 0644; \ } while (0) #define init_attribute_ro(_name) \ do { \ - sysfs_attr_init(&data->_name##_dev_attr.attr); \ - data->_name##_dev_attr.show = show_##_name; \ - data->_name##_dev_attr.store = NULL; \ - data->_name##_dev_attr.attr.name = #_name; \ - data->_name##_dev_attr.attr.mode = 0444; \ + sysfs_attr_init(&data->_name##_kobj_attr.attr); \ + data->_name##_kobj_attr.show = show_##_name; \ + data->_name##_kobj_attr.store = NULL; \ + data->_name##_kobj_attr.attr.name = #_name; \ + data->_name##_kobj_attr.attr.mode = 0444; \ } while (0) #define init_attribute_root_ro(_name) \ do { \ - sysfs_attr_init(&data->_name##_dev_attr.attr); \ - data->_name##_dev_attr.show = show_##_name; \ - data->_name##_dev_attr.store = NULL; \ - data->_name##_dev_attr.attr.name = #_name; \ - data->_name##_dev_attr.attr.mode = 0400; \ + sysfs_attr_init(&data->_name##_kobj_attr.attr); \ + data->_name##_kobj_attr.show = show_##_name; \ + data->_name##_kobj_attr.store = NULL; \ + data->_name##_kobj_attr.attr.name = #_name; \ + data->_name##_kobj_attr.attr.mode = 0400; \ } while (0) static int create_attr_group(struct uncore_data *data, char *name) @@ -186,21 +186,21 @@ static int create_attr_group(struct uncore_data *data, char *name) if (data->domain_id != UNCORE_DOMAIN_ID_INVALID) { init_attribute_root_ro(domain_id); - data->uncore_attrs[index++] = &data->domain_id_dev_attr.attr; + data->uncore_attrs[index++] = &data->domain_id_kobj_attr.attr; init_attribute_root_ro(fabric_cluster_id); - data->uncore_attrs[index++] = &data->fabric_cluster_id_dev_attr.attr; + data->uncore_attrs[index++] = &data->fabric_cluster_id_kobj_attr.attr; init_attribute_root_ro(package_id); - data->uncore_attrs[index++] = &data->package_id_dev_attr.attr; + data->uncore_attrs[index++] = &data->package_id_kobj_attr.attr; } - data->uncore_attrs[index++] = &data->max_freq_khz_dev_attr.attr; - data->uncore_attrs[index++] = &data->min_freq_khz_dev_attr.attr; - data->uncore_attrs[index++] = &data->initial_min_freq_khz_dev_attr.attr; - data->uncore_attrs[index++] = &data->initial_max_freq_khz_dev_attr.attr; + data->uncore_attrs[index++] = &data->max_freq_khz_kobj_attr.attr; + data->uncore_attrs[index++] = &data->min_freq_khz_kobj_attr.attr; + data->uncore_attrs[index++] = &data->initial_min_freq_khz_kobj_attr.attr; + data->uncore_attrs[index++] = &data->initial_max_freq_khz_kobj_attr.attr; ret = uncore_read_freq(data, &freq); if (!ret) - data->uncore_attrs[index++] = &data->current_freq_khz_dev_attr.attr; + data->uncore_attrs[index++] = &data->current_freq_khz_kobj_attr.attr; data->uncore_attrs[index] = NULL; diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h index 7afb69977c7e..0e5bf507e555 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h @@ -26,14 +26,14 @@ * @instance_id: Unique instance id to append to directory name * @name: Sysfs entry name for this instance * @uncore_attr_group: Attribute group storage - * @max_freq_khz_dev_attr: Storage for device attribute max_freq_khz - * @mix_freq_khz_dev_attr: Storage for device attribute min_freq_khz - * @initial_max_freq_khz_dev_attr: Storage for device attribute initial_max_freq_khz - * @initial_min_freq_khz_dev_attr: Storage for device attribute initial_min_freq_khz - * @current_freq_khz_dev_attr: Storage for device attribute current_freq_khz - * @domain_id_dev_attr: Storage for device attribute domain_id - * @fabric_cluster_id_dev_attr: Storage for device attribute fabric_cluster_id - * @package_id_dev_attr: Storage for device attribute package_id + * @max_freq_khz_kobj_attr: Storage for kobject attribute max_freq_khz + * @mix_freq_khz_kobj_attr: Storage for kobject attribute min_freq_khz + * @initial_max_freq_khz_kobj_attr: Storage for kobject attribute initial_max_freq_khz + * @initial_min_freq_khz_kobj_attr: Storage for kobject attribute initial_min_freq_khz + * @current_freq_khz_kobj_attr: Storage for kobject attribute current_freq_khz + * @domain_id_kobj_attr: Storage for kobject attribute domain_id + * @fabric_cluster_id_kobj_attr: Storage for kobject attribute fabric_cluster_id + * @package_id_kobj_attr: Storage for kobject attribute package_id * @uncore_attrs: Attribute storage for group creation * * This structure is used to encapsulate all data related to uncore sysfs @@ -53,14 +53,14 @@ struct uncore_data { char name[32]; struct attribute_group uncore_attr_group; - struct device_attribute max_freq_khz_dev_attr; - struct device_attribute min_freq_khz_dev_attr; - struct device_attribute initial_max_freq_khz_dev_attr; - struct device_attribute initial_min_freq_khz_dev_attr; - struct device_attribute current_freq_khz_dev_attr; - struct device_attribute domain_id_dev_attr; - struct device_attribute fabric_cluster_id_dev_attr; - struct device_attribute package_id_dev_attr; + struct kobj_attribute max_freq_khz_kobj_attr; + struct kobj_attribute min_freq_khz_kobj_attr; + struct kobj_attribute initial_max_freq_khz_kobj_attr; + struct kobj_attribute initial_min_freq_khz_kobj_attr; + struct kobj_attribute current_freq_khz_kobj_attr; + struct kobj_attribute domain_id_kobj_attr; + struct kobj_attribute fabric_cluster_id_kobj_attr; + struct kobj_attribute package_id_kobj_attr; struct attribute *uncore_attrs[9]; }; From 5913320eb0b3ec88158cfcb0fa5e996bf4ef681b Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Mon, 8 Jan 2024 15:20:58 +0900 Subject: [PATCH 136/768] platform/x86: p2sb: Allow p2sb_bar() calls during PCI device probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit p2sb_bar() unhides P2SB device to get resources from the device. It guards the operation by locking pci_rescan_remove_lock so that parallel rescans do not find the P2SB device. However, this lock causes deadlock when PCI bus rescan is triggered by /sys/bus/pci/rescan. The rescan locks pci_rescan_remove_lock and probes PCI devices. When PCI devices call p2sb_bar() during probe, it locks pci_rescan_remove_lock again. Hence the deadlock. To avoid the deadlock, do not lock pci_rescan_remove_lock in p2sb_bar(). Instead, do the lock at fs_initcall. Introduce p2sb_cache_resources() for fs_initcall which gets and caches the P2SB resources. At p2sb_bar(), refer the cache and return to the caller. Before operating the device at P2SB DEVFN for resource cache, check that its device class is PCI_CLASS_MEMORY_OTHER 0x0580 that PCH specifications define. This avoids unexpected operation to other devices at the same DEVFN. Link: https://lore.kernel.org/linux-pci/6xb24fjmptxxn5js2fjrrddjae6twex5bjaftwqsuawuqqqydx@7cl3uik5ef6j/ Fixes: 9745fb07474f ("platform/x86/intel: Add Primary to Sideband (P2SB) bridge support") Cc: stable@vger.kernel.org Suggested-by: Andy Shevchenko Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20240108062059.3583028-2-shinichiro.kawasaki@wdc.com Reviewed-by: Andy Shevchenko Reviewed-by: Ilpo Järvinen Tested-by Klara Modin Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/p2sb.c | 202 ++++++++++++++++++++++++++---------- 1 file changed, 150 insertions(+), 52 deletions(-) diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c index 1cf2471d54dd..17cc4b45e023 100644 --- a/drivers/platform/x86/p2sb.c +++ b/drivers/platform/x86/p2sb.c @@ -26,6 +26,21 @@ static const struct x86_cpu_id p2sb_cpu_ids[] = { {} }; +/* + * Cache BAR0 of P2SB device functions 0 to 7. + * TODO: The constant 8 is the number of functions that PCI specification + * defines. Same definitions exist tree-wide. Unify this definition and + * the other definitions then move to include/uapi/linux/pci.h. + */ +#define NR_P2SB_RES_CACHE 8 + +struct p2sb_res_cache { + u32 bus_dev_id; + struct resource res; +}; + +static struct p2sb_res_cache p2sb_resources[NR_P2SB_RES_CACHE]; + static int p2sb_get_devfn(unsigned int *devfn) { unsigned int fn = P2SB_DEVFN_DEFAULT; @@ -39,8 +54,16 @@ static int p2sb_get_devfn(unsigned int *devfn) return 0; } +static bool p2sb_valid_resource(struct resource *res) +{ + if (res->flags) + return true; + + return false; +} + /* Copy resource from the first BAR of the device in question */ -static int p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) +static void p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) { struct resource *bar0 = &pdev->resource[0]; @@ -56,22 +79,108 @@ static int p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) mem->end = bar0->end; mem->flags = bar0->flags; mem->desc = bar0->desc; +} + +static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn) +{ + struct p2sb_res_cache *cache = &p2sb_resources[PCI_FUNC(devfn)]; + struct pci_dev *pdev; + + pdev = pci_scan_single_device(bus, devfn); + if (!pdev) + return; + + p2sb_read_bar0(pdev, &cache->res); + cache->bus_dev_id = bus->dev.id; + + pci_stop_and_remove_bus_device(pdev); +} + +static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn) +{ + unsigned int slot, fn; + + if (PCI_FUNC(devfn) == 0) { + /* + * When function number of the P2SB device is zero, scan it and + * other function numbers, and if devices are available, cache + * their BAR0s. + */ + slot = PCI_SLOT(devfn); + for (fn = 0; fn < NR_P2SB_RES_CACHE; fn++) + p2sb_scan_and_cache_devfn(bus, PCI_DEVFN(slot, fn)); + } else { + /* Scan the P2SB device and cache its BAR0 */ + p2sb_scan_and_cache_devfn(bus, devfn); + } + + if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res)) + return -ENOENT; return 0; } -static int p2sb_scan_and_read(struct pci_bus *bus, unsigned int devfn, struct resource *mem) +static struct pci_bus *p2sb_get_bus(struct pci_bus *bus) { - struct pci_dev *pdev; + static struct pci_bus *p2sb_bus; + + bus = bus ?: p2sb_bus; + if (bus) + return bus; + + /* Assume P2SB is on the bus 0 in domain 0 */ + p2sb_bus = pci_find_bus(0, 0); + return p2sb_bus; +} + +static int p2sb_cache_resources(void) +{ + unsigned int devfn_p2sb; + u32 value = P2SBC_HIDE; + struct pci_bus *bus; + u16 class; int ret; - pdev = pci_scan_single_device(bus, devfn); - if (!pdev) + /* Get devfn for P2SB device itself */ + ret = p2sb_get_devfn(&devfn_p2sb); + if (ret) + return ret; + + bus = p2sb_get_bus(NULL); + if (!bus) return -ENODEV; - ret = p2sb_read_bar0(pdev, mem); + /* + * When a device with same devfn exists and its device class is not + * PCI_CLASS_MEMORY_OTHER for P2SB, do not touch it. + */ + pci_bus_read_config_word(bus, devfn_p2sb, PCI_CLASS_DEVICE, &class); + if (!PCI_POSSIBLE_ERROR(class) && class != PCI_CLASS_MEMORY_OTHER) + return -ENODEV; + + /* + * Prevent concurrent PCI bus scan from seeing the P2SB device and + * removing via sysfs while it is temporarily exposed. + */ + pci_lock_rescan_remove(); + + /* + * The BIOS prevents the P2SB device from being enumerated by the PCI + * subsystem, so we need to unhide and hide it back to lookup the BAR. + * Unhide the P2SB device here, if needed. + */ + pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value); + if (value & P2SBC_HIDE) + pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0); + + ret = p2sb_scan_and_cache(bus, devfn_p2sb); + + /* Hide the P2SB device, if it was hidden */ + if (value & P2SBC_HIDE) + pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, P2SBC_HIDE); + + pci_unlock_rescan_remove(); - pci_stop_and_remove_bus_device(pdev); return ret; } @@ -81,64 +190,53 @@ static int p2sb_scan_and_read(struct pci_bus *bus, unsigned int devfn, struct re * @devfn: PCI slot and function to communicate with * @mem: memory resource to be filled in * - * The BIOS prevents the P2SB device from being enumerated by the PCI - * subsystem, so we need to unhide and hide it back to lookup the BAR. - * - * if @bus is NULL, the bus 0 in domain 0 will be used. + * If @bus is NULL, the bus 0 in domain 0 will be used. * If @devfn is 0, it will be replaced by devfn of the P2SB device. * * Caller must provide a valid pointer to @mem. * - * Locking is handled by pci_rescan_remove_lock mutex. - * * Return: * 0 on success or appropriate errno value on error. */ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) { - struct pci_dev *pdev_p2sb; - unsigned int devfn_p2sb; - u32 value = P2SBC_HIDE; + struct p2sb_res_cache *cache; int ret; - /* Get devfn for P2SB device itself */ - ret = p2sb_get_devfn(&devfn_p2sb); - if (ret) - return ret; - - /* if @bus is NULL, use bus 0 in domain 0 */ - bus = bus ?: pci_find_bus(0, 0); - - /* - * Prevent concurrent PCI bus scan from seeing the P2SB device and - * removing via sysfs while it is temporarily exposed. - */ - pci_lock_rescan_remove(); - - /* Unhide the P2SB device, if needed */ - pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value); - if (value & P2SBC_HIDE) - pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0); - - pdev_p2sb = pci_scan_single_device(bus, devfn_p2sb); - if (devfn) - ret = p2sb_scan_and_read(bus, devfn, mem); - else - ret = p2sb_read_bar0(pdev_p2sb, mem); - pci_stop_and_remove_bus_device(pdev_p2sb); - - /* Hide the P2SB device, if it was hidden */ - if (value & P2SBC_HIDE) - pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, P2SBC_HIDE); - - pci_unlock_rescan_remove(); - - if (ret) - return ret; - - if (mem->flags == 0) + bus = p2sb_get_bus(bus); + if (!bus) return -ENODEV; + if (!devfn) { + ret = p2sb_get_devfn(&devfn); + if (ret) + return ret; + } + + cache = &p2sb_resources[PCI_FUNC(devfn)]; + if (cache->bus_dev_id != bus->dev.id) + return -ENODEV; + + if (!p2sb_valid_resource(&cache->res)) + return -ENOENT; + + memcpy(mem, &cache->res, sizeof(*mem)); return 0; } EXPORT_SYMBOL_GPL(p2sb_bar); + +static int __init p2sb_fs_init(void) +{ + p2sb_cache_resources(); + return 0; +} + +/* + * pci_rescan_remove_lock to avoid access to unhidden P2SB devices can + * not be locked in sysfs pci bus rescan path because of deadlock. To + * avoid the deadlock, access to P2SB devices with the lock at an early + * step in kernel initialization and cache required resources. This + * should happen after subsys_initcall which initializes PCI subsystem + * and before device_initcall which requires P2SB resources. + */ +fs_initcall(p2sb_fs_init); From 9e054ed05ddae2c1b4b2bcb5dfcae49c5ac7637e Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Mon, 8 Jan 2024 15:20:59 +0900 Subject: [PATCH 137/768] platform/x86: p2sb: Use pci_resource_n() in p2sb_read_bar0() Accesses to resource[] member of struct pci_dev shall be wrapped with pci_resource_n() for future compatibility. Call the helper function in p2sb_read_bar0(). Suggested-by: Andy Shevchenko Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20240108062059.3583028-3-shinichiro.kawasaki@wdc.com Tested-by Klara Modin Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/p2sb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c index 17cc4b45e023..6bd14d0132db 100644 --- a/drivers/platform/x86/p2sb.c +++ b/drivers/platform/x86/p2sb.c @@ -65,7 +65,7 @@ static bool p2sb_valid_resource(struct resource *res) /* Copy resource from the first BAR of the device in question */ static void p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) { - struct resource *bar0 = &pdev->resource[0]; + struct resource *bar0 = pci_resource_n(pdev, 0); /* Make sure we have no dangling pointers in the output */ memset(mem, 0, sizeof(*mem)); From 348d9cc7bde30852aa4f54aa70a22c3ad5dd081a Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Sat, 6 Jan 2024 23:41:26 +0100 Subject: [PATCH 138/768] platform/x86: intel-wmi-sbl-fw-update: Fix function name in error message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since when the driver was converted to use the bus-based WMI interface, the old GUID-based WMI functions are not used anymore. Update the error message to avoid confusing users. Compile-tested only. Fixes: 75c487fcb69c ("platform/x86: intel-wmi-sbl-fw-update: Use bus-based WMI interface") Signed-off-by: Armin Wolf Acked-by: Randy Dunlap Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20240106224126.13803-1-W_Armin@gmx.de Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/wmi/sbl-fw-update.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel/wmi/sbl-fw-update.c b/drivers/platform/x86/intel/wmi/sbl-fw-update.c index 9cf5ed0f8dc2..040153ad67c1 100644 --- a/drivers/platform/x86/intel/wmi/sbl-fw-update.c +++ b/drivers/platform/x86/intel/wmi/sbl-fw-update.c @@ -32,7 +32,7 @@ static int get_fwu_request(struct device *dev, u32 *out) return -ENODEV; if (obj->type != ACPI_TYPE_INTEGER) { - dev_warn(dev, "wmi_query_block returned invalid value\n"); + dev_warn(dev, "wmidev_block_query returned invalid value\n"); kfree(obj); return -EINVAL; } @@ -55,7 +55,7 @@ static int set_fwu_request(struct device *dev, u32 in) status = wmidev_block_set(to_wmi_device(dev), 0, &input); if (ACPI_FAILURE(status)) { - dev_err(dev, "wmi_set_block failed\n"); + dev_err(dev, "wmidev_block_set failed\n"); return -ENODEV; } From 41237735ccde2cc3fe1d83ae0b776a085be6a22f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 8 Jan 2024 15:06:55 +0100 Subject: [PATCH 139/768] platform/x86: silicom-platform: Add missing "Description:" for power_cycle sysfs attr The Documentation/ABI/testing/sysfs-platform-silicom entry for the power_cycle sysfs attr is missing the "Description:" keyword, add this. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240108140655.547261-1-hdegoede@redhat.com --- Documentation/ABI/testing/sysfs-platform-silicom | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/ABI/testing/sysfs-platform-silicom b/Documentation/ABI/testing/sysfs-platform-silicom index 2288b3665d16..4d1cc5bdbcc5 100644 --- a/Documentation/ABI/testing/sysfs-platform-silicom +++ b/Documentation/ABI/testing/sysfs-platform-silicom @@ -10,6 +10,7 @@ What: /sys/devices/platform/silicom-platform/power_cycle Date: November 2023 KernelVersion: 6.7 Contact: Henry Shi +Description: This file allow user to power cycle the platform. Default value is 0; when set to 1, it powers down the platform, waits 5 seconds, then powers on the From 452c314988dbccc491a32b674a3945d93a23c87c Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 14 Jan 2024 17:53:16 +0100 Subject: [PATCH 140/768] MAINTAINERS: Remove Perry Yuan as DELL WMI HARDWARE PRIVACY SUPPORT maintainer Recent mails to his Dell address bounced with "user unknown". So remove him as maintainer. Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/c9757d0a-2046-464b-93e1-a2d9ab0ce36b@gmail.com Signed-off-by: Hans de Goede --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a69..30e231c8da37 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5958,7 +5958,6 @@ S: Maintained F: drivers/platform/x86/dell/dell-wmi-descriptor.c DELL WMI HARDWARE PRIVACY SUPPORT -M: Perry Yuan L: Dell.Client.Kernel@dell.com L: platform-driver-x86@vger.kernel.org S: Maintained From 20fe5e9be47efc952c66374334f7bb94e4a51d90 Mon Sep 17 00:00:00 2001 From: "Luke D. Jones" Date: Tue, 16 Jan 2024 10:18:29 +1300 Subject: [PATCH 141/768] MAINTAINERS: add Luke Jones as maintainer for asus notebooks Add myself as maintainer for "ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS" as suggested by Hans de Goede based on my history of contributions. Signed-off-by: Luke D. Jones Link: https://lore.kernel.org/r/20240115211829.48251-1-luke@ljones.dev Signed-off-by: Hans de Goede --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 30e231c8da37..8be1d172cc06 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3168,6 +3168,7 @@ F: drivers/hwmon/asus-ec-sensors.c ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS M: Corentin Chary +M: Luke D. Jones L: acpi4asus-user@lists.sourceforge.net L: platform-driver-x86@vger.kernel.org S: Maintained From 8530ecaf35f23d02fdce49aded7227fc8f14ebcc Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 16 Jan 2024 11:39:21 +0100 Subject: [PATCH 142/768] MAINTAINERS: remove defunct acpi4asus project info from asus notebooks section The acpi4asus project appears to be defunct, according to: https://sourceforge.net/p/acpi4asus/mailman/acpi4asus-user/ the last posts to the list were done in May 2020 and even then they were mostly spam. And the http://acpi4asus.sf.net website still talks about 2.6.x kernels. Drop the defunct mailing-list and update the W: entry to point to the new up2date https://asus-linux.org/ site. Cc: Corentin Chary Cc: Luke D. Jones Signed-off-by: Hans de Goede --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8be1d172cc06..d9f66b5dec6b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3169,10 +3169,9 @@ F: drivers/hwmon/asus-ec-sensors.c ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS M: Corentin Chary M: Luke D. Jones -L: acpi4asus-user@lists.sourceforge.net L: platform-driver-x86@vger.kernel.org S: Maintained -W: http://acpi4asus.sf.net +W: https://asus-linux.org/ F: drivers/platform/x86/asus*.c F: drivers/platform/x86/eeepc*.c From 84aef4ed59705585d629e81d633a83b7d416f5fb Mon Sep 17 00:00:00 2001 From: Wenhua Lin Date: Tue, 9 Jan 2024 15:38:48 +0800 Subject: [PATCH 143/768] gpio: eic-sprd: Clear interrupt after set the interrupt type The raw interrupt status of eic maybe set before the interrupt is enabled, since the eic interrupt has a latch function, which would trigger the interrupt event once enabled it from user side. To solve this problem, interrupts generated before setting the interrupt trigger type are ignored. Fixes: 25518e024e3a ("gpio: Add Spreadtrum EIC driver support") Acked-by: Chunyan Zhang Signed-off-by: Wenhua Lin Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-eic-sprd.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c index be7f2fa5aa7b..806b88d8dfb7 100644 --- a/drivers/gpio/gpio-eic-sprd.c +++ b/drivers/gpio/gpio-eic-sprd.c @@ -330,20 +330,27 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) switch (flow_type) { case IRQ_TYPE_LEVEL_HIGH: sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 1); + sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IC, 1); break; case IRQ_TYPE_LEVEL_LOW: sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 0); + sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IC, 1); break; case IRQ_TYPE_EDGE_RISING: case IRQ_TYPE_EDGE_FALLING: case IRQ_TYPE_EDGE_BOTH: state = sprd_eic_get(chip, offset); - if (state) + if (state) { sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 0); - else + sprd_eic_update(chip, offset, + SPRD_EIC_DBNC_IC, 1); + } else { sprd_eic_update(chip, offset, SPRD_EIC_DBNC_IEV, 1); + sprd_eic_update(chip, offset, + SPRD_EIC_DBNC_IC, 1); + } break; default: return -ENOTSUPP; @@ -355,20 +362,27 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) switch (flow_type) { case IRQ_TYPE_LEVEL_HIGH: sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTCLR, 1); break; case IRQ_TYPE_LEVEL_LOW: sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTCLR, 1); break; case IRQ_TYPE_EDGE_RISING: case IRQ_TYPE_EDGE_FALLING: case IRQ_TYPE_EDGE_BOTH: state = sprd_eic_get(chip, offset); - if (state) + if (state) { sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 0); - else + sprd_eic_update(chip, offset, + SPRD_EIC_LATCH_INTCLR, 1); + } else { sprd_eic_update(chip, offset, SPRD_EIC_LATCH_INTPOL, 1); + sprd_eic_update(chip, offset, + SPRD_EIC_LATCH_INTCLR, 1); + } break; default: return -ENOTSUPP; @@ -382,29 +396,34 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_FALLING: sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_BOTH: sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 1); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_LEVEL_HIGH: sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 1); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_level_irq); break; case IRQ_TYPE_LEVEL_LOW: sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTMODE, 1); sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_ASYNC_INTCLR, 1); irq_set_handler_locked(data, handle_level_irq); break; default: @@ -417,29 +436,34 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type) sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_FALLING: sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_BOTH: sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 1); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_LEVEL_HIGH: sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 1); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 1); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_level_irq); break; case IRQ_TYPE_LEVEL_LOW: sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 0); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 1); sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTPOL, 0); + sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTCLR, 1); irq_set_handler_locked(data, handle_level_irq); break; default: From 80c86ff6800b857c8008cebe7b8d22a6e574e68d Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 9 Jan 2024 11:49:07 +0000 Subject: [PATCH 144/768] arm64: dts: exynos: gs101: comply with the new cmu_misc clock names The cmu_misc clock-names were renamed to just "bus" and "sss" because naming is local to the module, so cmu_misc is implied. As the bindings and the device tree have not made a release yet, comply with the renamed clocks. Suggested-by: Rob Herring Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20240109114908.3623645-3-tudor.ambarus@linaro.org Signed-off-by: Krzysztof Kozlowski --- arch/arm64/boot/dts/exynos/google/gs101.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/exynos/google/gs101.dtsi b/arch/arm64/boot/dts/exynos/google/gs101.dtsi index 9747cb3fa03a..d838e3a7af6e 100644 --- a/arch/arm64/boot/dts/exynos/google/gs101.dtsi +++ b/arch/arm64/boot/dts/exynos/google/gs101.dtsi @@ -289,7 +289,7 @@ #clock-cells = <1>; clocks = <&cmu_top CLK_DOUT_CMU_MISC_BUS>, <&cmu_top CLK_DOUT_CMU_MISC_SSS>; - clock-names = "dout_cmu_misc_bus", "dout_cmu_misc_sss"; + clock-names = "bus", "sss"; }; watchdog_cl0: watchdog@10060000 { From eab4f56d3e75dad697acf8dc2c8be3c341d6c63e Mon Sep 17 00:00:00 2001 From: Artur Weber Date: Fri, 5 Jan 2024 07:53:01 +0100 Subject: [PATCH 145/768] ARM: dts: exynos4212-tab3: add samsung,invert-vclk flag to fimd After more investigation, I've found that it's not the panel driver config that needs to be modified to invert the data polarity, but the FIMD config. Add the missing invert-vclk option that is required to get the display to work correctly. Fixes: ee37a457af1d ("ARM: dts: exynos: Add Samsung Galaxy Tab 3 8.0 boards") Signed-off-by: Artur Weber Link: https://lore.kernel.org/r/20240105-tab3-display-fixes-v2-1-904d1207bf6f@gmail.com Signed-off-by: Krzysztof Kozlowski --- arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi b/arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi index d7954ff466b4..e5254e32aa8f 100644 --- a/arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi +++ b/arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi @@ -434,6 +434,7 @@ }; &fimd { + samsung,invert-vclk; status = "okay"; }; From 5744ba05e7c4bff8fec133dd0f9e51ddffba92f5 Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Fri, 19 Jan 2024 18:22:35 +0800 Subject: [PATCH 146/768] tun: fix missing dropped counter in tun_xdp_act The commit 8ae1aff0b331 ("tuntap: split out XDP logic") includes dropped counter for XDP_DROP, XDP_ABORTED, and invalid XDP actions. Unfortunately, that commit missed the dropped counter when error occurs during XDP_TX and XDP_REDIRECT actions. This patch fixes this issue. Fixes: 8ae1aff0b331 ("tuntap: split out XDP logic") Signed-off-by: Yunjian Wang Reviewed-by: Willem de Bruijn Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index afa5497f7c35..237fef557ba5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1630,13 +1630,17 @@ static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog, switch (act) { case XDP_REDIRECT: err = xdp_do_redirect(tun->dev, xdp, xdp_prog); - if (err) + if (err) { + dev_core_stats_rx_dropped_inc(tun->dev); return err; + } break; case XDP_TX: err = tun_xdp_tx(tun->dev, xdp); - if (err < 0) + if (err < 0) { + dev_core_stats_rx_dropped_inc(tun->dev); return err; + } break; case XDP_PASS: break; From f1084c427f55d573fcd5688d9ba7b31b78019716 Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Fri, 19 Jan 2024 18:22:56 +0800 Subject: [PATCH 147/768] tun: add missing rx stats accounting in tun_xdp_act The TUN can be used as vhost-net backend, and it is necessary to count the packets transmitted from TUN to vhost-net/virtio-net. However, there are some places in the receive path that were not taken into account when using XDP. It would be beneficial to also include new accounting for successfully received bytes using dev_sw_netstats_rx_add. Fixes: 761876c857cb ("tap: XDP support") Signed-off-by: Yunjian Wang Reviewed-by: Willem de Bruijn Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 237fef557ba5..4a4f8c8e79fa 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1634,6 +1634,7 @@ static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog, dev_core_stats_rx_dropped_inc(tun->dev); return err; } + dev_sw_netstats_rx_add(tun->dev, xdp->data_end - xdp->data); break; case XDP_TX: err = tun_xdp_tx(tun->dev, xdp); @@ -1641,6 +1642,7 @@ static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog, dev_core_stats_rx_dropped_inc(tun->dev); return err; } + dev_sw_netstats_rx_add(tun->dev, xdp->data_end - xdp->data); break; case XDP_PASS: break; From c6a783be82c893c6f124a5853bef2edeaf26dadf Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 18 Jan 2024 04:23:40 -0800 Subject: [PATCH 148/768] thermal: intel: powerclamp: Remove dead code for target mwait value After conversion of this driver to use powercap idle_inject core, this driver doesn't use target_mwait value. So remove dead code. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/intel_powerclamp.c | 32 ------------------------ 1 file changed, 32 deletions(-) diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 5ac5cb60bae6..bc6eb0dd66a4 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -49,7 +49,6 @@ */ #define DEFAULT_DURATION_JIFFIES (6) -static unsigned int target_mwait; static struct dentry *debug_dir; static bool poll_pkg_cstate_enable; @@ -312,34 +311,6 @@ MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n" "\twindow size results in slower response time but more smooth\n" "\tclamping results. default to 2."); -static void find_target_mwait(void) -{ - unsigned int eax, ebx, ecx, edx; - unsigned int highest_cstate = 0; - unsigned int highest_subcstate = 0; - int i; - - if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) - return; - - cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); - - if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || - !(ecx & CPUID5_ECX_INTERRUPT_BREAK)) - return; - - edx >>= MWAIT_SUBSTATE_SIZE; - for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { - if (edx & MWAIT_SUBSTATE_MASK) { - highest_cstate = i; - highest_subcstate = edx & MWAIT_SUBSTATE_MASK; - } - } - target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) | - (highest_subcstate - 1); - -} - struct pkg_cstate_info { bool skip; int msr_index; @@ -759,9 +730,6 @@ static int __init powerclamp_probe(void) return -ENODEV; } - /* find the deepest mwait value */ - find_target_mwait(); - return 0; } From b6a11a7fc4d6337f7ea720b9287d1b9749c4eae0 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Fri, 19 Jan 2024 14:43:01 +0100 Subject: [PATCH 149/768] dpll: fix broken error path in dpll_pin_alloc(..) If pin type is not expected, or pin properities failed to allocate memory, the unwind error path shall not destroy pin's xarrays, which were not yet initialized. Add new goto label and use it to fix broken error path. Reviewed-by: Jiri Pirko Signed-off-by: Arkadiusz Kubalewski Signed-off-by: David S. Miller --- drivers/dpll/dpll_core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c index 1eca8cc271f8..c08772ee9fd6 100644 --- a/drivers/dpll/dpll_core.c +++ b/drivers/dpll/dpll_core.c @@ -441,7 +441,7 @@ dpll_pin_alloc(u64 clock_id, u32 pin_idx, struct module *module, if (WARN_ON(prop->type < DPLL_PIN_TYPE_MUX || prop->type > DPLL_PIN_TYPE_MAX)) { ret = -EINVAL; - goto err; + goto err_pin_prop; } pin->prop = prop; refcount_set(&pin->refcount, 1); @@ -450,11 +450,12 @@ dpll_pin_alloc(u64 clock_id, u32 pin_idx, struct module *module, ret = xa_alloc_cyclic(&dpll_pin_xa, &pin->id, pin, xa_limit_32b, &dpll_pin_xa_id, GFP_KERNEL); if (ret) - goto err; + goto err_xa_alloc; return pin; -err: +err_xa_alloc: xa_destroy(&pin->dpll_refs); xa_destroy(&pin->parent_refs); +err_pin_prop: kfree(pin); return ERR_PTR(ret); } From 830ead5fb0c5855ce4d70ba2ed4a673b5f1e7d9b Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Fri, 19 Jan 2024 14:43:02 +0100 Subject: [PATCH 150/768] dpll: fix pin dump crash for rebound module When a kernel module is unbound but the pin resources were not entirely freed (other kernel module instance of the same PCI device have had kept the reference to that pin), and kernel module is again bound, the pin properties would not be updated (the properties are only assigned when memory for the pin is allocated), prop pointer still points to the kernel module memory of the kernel module which was deallocated on the unbind. If the pin dump is invoked in this state, the result is a kernel crash. Prevent the crash by storing persistent pin properties in dpll subsystem, copy the content from the kernel module when pin is allocated, instead of using memory of the kernel module. Fixes: 9431063ad323 ("dpll: core: Add DPLL framework base functions") Fixes: 9d71b54b65b1 ("dpll: netlink: Add DPLL framework base functions") Reviewed-by: Jan Glaza Reviewed-by: Przemek Kitszel Signed-off-by: Arkadiusz Kubalewski Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/dpll/dpll_core.c | 55 +++++++++++++++++++++++++++++++++++-- drivers/dpll/dpll_core.h | 4 +-- drivers/dpll/dpll_netlink.c | 28 +++++++++---------- 3 files changed, 69 insertions(+), 18 deletions(-) diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c index c08772ee9fd6..cb62696467d1 100644 --- a/drivers/dpll/dpll_core.c +++ b/drivers/dpll/dpll_core.c @@ -425,6 +425,53 @@ void dpll_device_unregister(struct dpll_device *dpll, } EXPORT_SYMBOL_GPL(dpll_device_unregister); +static void dpll_pin_prop_free(struct dpll_pin_properties *prop) +{ + kfree(prop->package_label); + kfree(prop->panel_label); + kfree(prop->board_label); + kfree(prop->freq_supported); +} + +static int dpll_pin_prop_dup(const struct dpll_pin_properties *src, + struct dpll_pin_properties *dst) +{ + memcpy(dst, src, sizeof(*dst)); + if (src->freq_supported && src->freq_supported_num) { + size_t freq_size = src->freq_supported_num * + sizeof(*src->freq_supported); + dst->freq_supported = kmemdup(src->freq_supported, + freq_size, GFP_KERNEL); + if (!src->freq_supported) + return -ENOMEM; + } + if (src->board_label) { + dst->board_label = kstrdup(src->board_label, GFP_KERNEL); + if (!dst->board_label) + goto err_board_label; + } + if (src->panel_label) { + dst->panel_label = kstrdup(src->panel_label, GFP_KERNEL); + if (!dst->panel_label) + goto err_panel_label; + } + if (src->package_label) { + dst->package_label = kstrdup(src->package_label, GFP_KERNEL); + if (!dst->package_label) + goto err_package_label; + } + + return 0; + +err_package_label: + kfree(dst->panel_label); +err_panel_label: + kfree(dst->board_label); +err_board_label: + kfree(dst->freq_supported); + return -ENOMEM; +} + static struct dpll_pin * dpll_pin_alloc(u64 clock_id, u32 pin_idx, struct module *module, const struct dpll_pin_properties *prop) @@ -443,7 +490,9 @@ dpll_pin_alloc(u64 clock_id, u32 pin_idx, struct module *module, ret = -EINVAL; goto err_pin_prop; } - pin->prop = prop; + ret = dpll_pin_prop_dup(prop, &pin->prop); + if (ret) + goto err_pin_prop; refcount_set(&pin->refcount, 1); xa_init_flags(&pin->dpll_refs, XA_FLAGS_ALLOC); xa_init_flags(&pin->parent_refs, XA_FLAGS_ALLOC); @@ -455,6 +504,7 @@ dpll_pin_alloc(u64 clock_id, u32 pin_idx, struct module *module, err_xa_alloc: xa_destroy(&pin->dpll_refs); xa_destroy(&pin->parent_refs); + dpll_pin_prop_free(&pin->prop); err_pin_prop: kfree(pin); return ERR_PTR(ret); @@ -515,6 +565,7 @@ void dpll_pin_put(struct dpll_pin *pin) xa_destroy(&pin->dpll_refs); xa_destroy(&pin->parent_refs); xa_erase(&dpll_pin_xa, pin->id); + dpll_pin_prop_free(&pin->prop); kfree(pin); } mutex_unlock(&dpll_lock); @@ -637,7 +688,7 @@ int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin, unsigned long i, stop; int ret; - if (WARN_ON(parent->prop->type != DPLL_PIN_TYPE_MUX)) + if (WARN_ON(parent->prop.type != DPLL_PIN_TYPE_MUX)) return -EINVAL; if (WARN_ON(!ops) || diff --git a/drivers/dpll/dpll_core.h b/drivers/dpll/dpll_core.h index 5585873c5c1b..717f715015c7 100644 --- a/drivers/dpll/dpll_core.h +++ b/drivers/dpll/dpll_core.h @@ -44,7 +44,7 @@ struct dpll_device { * @module: module of creator * @dpll_refs: hold referencees to dplls pin was registered with * @parent_refs: hold references to parent pins pin was registered with - * @prop: pointer to pin properties given by registerer + * @prop: pin properties copied from the registerer * @rclk_dev_name: holds name of device when pin can recover clock from it * @refcount: refcount **/ @@ -55,7 +55,7 @@ struct dpll_pin { struct module *module; struct xarray dpll_refs; struct xarray parent_refs; - const struct dpll_pin_properties *prop; + struct dpll_pin_properties prop; refcount_t refcount; }; diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index 3370dbddb86b..30f5be020862 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -303,17 +303,17 @@ dpll_msg_add_pin_freq(struct sk_buff *msg, struct dpll_pin *pin, if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY, sizeof(freq), &freq, DPLL_A_PIN_PAD)) return -EMSGSIZE; - for (fs = 0; fs < pin->prop->freq_supported_num; fs++) { + for (fs = 0; fs < pin->prop.freq_supported_num; fs++) { nest = nla_nest_start(msg, DPLL_A_PIN_FREQUENCY_SUPPORTED); if (!nest) return -EMSGSIZE; - freq = pin->prop->freq_supported[fs].min; + freq = pin->prop.freq_supported[fs].min; if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY_MIN, sizeof(freq), &freq, DPLL_A_PIN_PAD)) { nla_nest_cancel(msg, nest); return -EMSGSIZE; } - freq = pin->prop->freq_supported[fs].max; + freq = pin->prop.freq_supported[fs].max; if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY_MAX, sizeof(freq), &freq, DPLL_A_PIN_PAD)) { nla_nest_cancel(msg, nest); @@ -329,9 +329,9 @@ static bool dpll_pin_is_freq_supported(struct dpll_pin *pin, u32 freq) { int fs; - for (fs = 0; fs < pin->prop->freq_supported_num; fs++) - if (freq >= pin->prop->freq_supported[fs].min && - freq <= pin->prop->freq_supported[fs].max) + for (fs = 0; fs < pin->prop.freq_supported_num; fs++) + if (freq >= pin->prop.freq_supported[fs].min && + freq <= pin->prop.freq_supported[fs].max) return true; return false; } @@ -421,7 +421,7 @@ static int dpll_cmd_pin_get_one(struct sk_buff *msg, struct dpll_pin *pin, struct netlink_ext_ack *extack) { - const struct dpll_pin_properties *prop = pin->prop; + const struct dpll_pin_properties *prop = &pin->prop; struct dpll_pin_ref *ref; int ret; @@ -717,7 +717,7 @@ dpll_pin_on_pin_state_set(struct dpll_pin *pin, u32 parent_idx, int ret; if (!(DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE & - pin->prop->capabilities)) { + pin->prop.capabilities)) { NL_SET_ERR_MSG(extack, "state changing is not allowed"); return -EOPNOTSUPP; } @@ -753,7 +753,7 @@ dpll_pin_state_set(struct dpll_device *dpll, struct dpll_pin *pin, int ret; if (!(DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE & - pin->prop->capabilities)) { + pin->prop.capabilities)) { NL_SET_ERR_MSG(extack, "state changing is not allowed"); return -EOPNOTSUPP; } @@ -780,7 +780,7 @@ dpll_pin_prio_set(struct dpll_device *dpll, struct dpll_pin *pin, int ret; if (!(DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE & - pin->prop->capabilities)) { + pin->prop.capabilities)) { NL_SET_ERR_MSG(extack, "prio changing is not allowed"); return -EOPNOTSUPP; } @@ -808,7 +808,7 @@ dpll_pin_direction_set(struct dpll_pin *pin, struct dpll_device *dpll, int ret; if (!(DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE & - pin->prop->capabilities)) { + pin->prop.capabilities)) { NL_SET_ERR_MSG(extack, "direction changing is not allowed"); return -EOPNOTSUPP; } @@ -838,8 +838,8 @@ dpll_pin_phase_adj_set(struct dpll_pin *pin, struct nlattr *phase_adj_attr, int ret; phase_adj = nla_get_s32(phase_adj_attr); - if (phase_adj > pin->prop->phase_range.max || - phase_adj < pin->prop->phase_range.min) { + if (phase_adj > pin->prop.phase_range.max || + phase_adj < pin->prop.phase_range.min) { NL_SET_ERR_MSG_ATTR(extack, phase_adj_attr, "phase adjust value not supported"); return -EINVAL; @@ -1023,7 +1023,7 @@ dpll_pin_find(u64 clock_id, struct nlattr *mod_name_attr, unsigned long i; xa_for_each_marked(&dpll_pin_xa, i, pin, DPLL_REGISTERED) { - prop = pin->prop; + prop = &pin->prop; cid_match = clock_id ? pin->clock_id == clock_id : true; mod_match = mod_name_attr && module_name(pin->module) ? !nla_strcmp(mod_name_attr, From db2ec3c94667eaeecc6a74d96594fab6baf80fdc Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Fri, 19 Jan 2024 14:43:03 +0100 Subject: [PATCH 151/768] dpll: fix userspace availability of pins If parent pin was unregistered but child pin was not, the userspace would see the "zombie" pins - the ones that were registered with a parent pin (dpll_pin_on_pin_register(..)). Technically those are not available - as there is no dpll device in the system. Do not dump those pins and prevent userspace from any interaction with them. Provide a unified function to determine if the pin is available and use it before acting/responding for user requests. Fixes: 9d71b54b65b1 ("dpll: netlink: Add DPLL framework base functions") Reviewed-by: Jan Glaza Reviewed-by: Jiri Pirko Signed-off-by: Arkadiusz Kubalewski Signed-off-by: David S. Miller --- drivers/dpll/dpll_netlink.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index 30f5be020862..314bb3775465 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -553,6 +553,24 @@ __dpll_device_change_ntf(struct dpll_device *dpll) return dpll_device_event_send(DPLL_CMD_DEVICE_CHANGE_NTF, dpll); } +static bool dpll_pin_available(struct dpll_pin *pin) +{ + struct dpll_pin_ref *par_ref; + unsigned long i; + + if (!xa_get_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED)) + return false; + xa_for_each(&pin->parent_refs, i, par_ref) + if (xa_get_mark(&dpll_pin_xa, par_ref->pin->id, + DPLL_REGISTERED)) + return true; + xa_for_each(&pin->dpll_refs, i, par_ref) + if (xa_get_mark(&dpll_device_xa, par_ref->dpll->id, + DPLL_REGISTERED)) + return true; + return false; +} + /** * dpll_device_change_ntf - notify that the dpll device has been changed * @dpll: registered dpll pointer @@ -579,7 +597,7 @@ dpll_pin_event_send(enum dpll_cmd event, struct dpll_pin *pin) int ret = -ENOMEM; void *hdr; - if (WARN_ON(!xa_get_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED))) + if (!dpll_pin_available(pin)) return -ENODEV; msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); @@ -1130,6 +1148,10 @@ int dpll_nl_pin_id_get_doit(struct sk_buff *skb, struct genl_info *info) } pin = dpll_pin_find_from_nlattr(info); if (!IS_ERR(pin)) { + if (!dpll_pin_available(pin)) { + nlmsg_free(msg); + return -ENODEV; + } ret = dpll_msg_add_pin_handle(msg, pin); if (ret) { nlmsg_free(msg); @@ -1179,6 +1201,8 @@ int dpll_nl_pin_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) xa_for_each_marked_start(&dpll_pin_xa, i, pin, DPLL_REGISTERED, ctx->idx) { + if (!dpll_pin_available(pin)) + continue; hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &dpll_nl_family, NLM_F_MULTI, @@ -1441,7 +1465,8 @@ int dpll_pin_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, } info->user_ptr[0] = xa_load(&dpll_pin_xa, nla_get_u32(info->attrs[DPLL_A_PIN_ID])); - if (!info->user_ptr[0]) { + if (!info->user_ptr[0] || + !dpll_pin_available(info->user_ptr[0])) { NL_SET_ERR_MSG(info->extack, "pin not found"); ret = -ENODEV; goto unlock_dev; From 7dc5b18ff71bd6f948810ab8a08b6a6ff8b315c5 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Fri, 19 Jan 2024 14:43:04 +0100 Subject: [PATCH 152/768] dpll: fix register pin with unregistered parent pin In case of multiple kernel module instances using the same dpll device: if only one registers dpll device, then only that one can register directly connected pins with a dpll device. When unregistered parent is responsible for determining if the muxed pin can be registered with it or not, the drivers need to be loaded in serialized order to work correctly - first the driver instance which registers the direct pins needs to be loaded, then the other instances could register muxed type pins. Allow registration of a pin with a parent even if the parent was not yet registered, thus allow ability for unserialized driver instance load order. Do not WARN_ON notification for unregistered pin, which can be invoked for described case, instead just return error. Fixes: 9431063ad323 ("dpll: core: Add DPLL framework base functions") Fixes: 9d71b54b65b1 ("dpll: netlink: Add DPLL framework base functions") Reviewed-by: Jan Glaza Reviewed-by: Jiri Pirko Signed-off-by: Arkadiusz Kubalewski Signed-off-by: David S. Miller --- drivers/dpll/dpll_core.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c index cb62696467d1..5152bd1b0daf 100644 --- a/drivers/dpll/dpll_core.c +++ b/drivers/dpll/dpll_core.c @@ -29,8 +29,6 @@ static u32 dpll_pin_xa_id; WARN_ON_ONCE(!xa_get_mark(&dpll_device_xa, (d)->id, DPLL_REGISTERED)) #define ASSERT_DPLL_NOT_REGISTERED(d) \ WARN_ON_ONCE(xa_get_mark(&dpll_device_xa, (d)->id, DPLL_REGISTERED)) -#define ASSERT_PIN_REGISTERED(p) \ - WARN_ON_ONCE(!xa_get_mark(&dpll_pin_xa, (p)->id, DPLL_REGISTERED)) struct dpll_device_registration { struct list_head list; @@ -616,8 +614,6 @@ dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin, WARN_ON(!ops->state_on_dpll_get) || WARN_ON(!ops->direction_get)) return -EINVAL; - if (ASSERT_DPLL_REGISTERED(dpll)) - return -EINVAL; mutex_lock(&dpll_lock); if (WARN_ON(!(dpll->module == pin->module && @@ -695,8 +691,6 @@ int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin, WARN_ON(!ops->state_on_pin_get) || WARN_ON(!ops->direction_get)) return -EINVAL; - if (ASSERT_PIN_REGISTERED(parent)) - return -EINVAL; mutex_lock(&dpll_lock); ret = dpll_xa_ref_pin_add(&pin->parent_refs, parent, ops, priv); From aaf632f7ab6dec57bc9329a438f94504fe8034b9 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Fri, 19 Jan 2024 11:47:50 +0100 Subject: [PATCH 153/768] net: micrel: Fix PTP frame parsing for lan8814 The HW has the capability to check each frame if it is a PTP frame, which domain it is, which ptp frame type it is, different ip address in the frame. And if one of these checks fail then the frame is not timestamp. Most of these checks were disabled except checking the field minorVersionPTP inside the PTP header. Meaning that once a partner sends a frame compliant to 8021AS which has minorVersionPTP set to 1, then the frame was not timestamp because the HW expected by default a value of 0 in minorVersionPTP. This is exactly the same issue as on lan8841. Fix this issue by removing this check so the userspace can decide on this. Fixes: ece19502834d ("net: phy: micrel: 1588 support for LAN8814 phy") Signed-off-by: Horatiu Vultur Reviewed-by: Maxime Chevallier Reviewed-by: Divya Koppera Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 81c20eb4b54b..dad720138baa 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -120,6 +120,11 @@ */ #define LAN8814_1PPM_FORMAT 17179 +#define PTP_RX_VERSION 0x0248 +#define PTP_TX_VERSION 0x0288 +#define PTP_MAX_VERSION(x) (((x) & GENMASK(7, 0)) << 8) +#define PTP_MIN_VERSION(x) ((x) & GENMASK(7, 0)) + #define PTP_RX_MOD 0x024F #define PTP_RX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_ BIT(3) #define PTP_RX_TIMESTAMP_EN 0x024D @@ -3150,6 +3155,12 @@ static void lan8814_ptp_init(struct phy_device *phydev) lanphy_write_page_reg(phydev, 5, PTP_TX_PARSE_IP_ADDR_EN, 0); lanphy_write_page_reg(phydev, 5, PTP_RX_PARSE_IP_ADDR_EN, 0); + /* Disable checking for minorVersionPTP field */ + lanphy_write_page_reg(phydev, 5, PTP_RX_VERSION, + PTP_MAX_VERSION(0xff) | PTP_MIN_VERSION(0x0)); + lanphy_write_page_reg(phydev, 5, PTP_TX_VERSION, + PTP_MAX_VERSION(0xff) | PTP_MIN_VERSION(0x0)); + skb_queue_head_init(&ptp_priv->tx_queue); skb_queue_head_init(&ptp_priv->rx_queue); INIT_LIST_HEAD(&ptp_priv->rx_ts_list); From 8cbc756b802605dee3dd40019bd75960772bacf5 Mon Sep 17 00:00:00 2001 From: Liming Sun Date: Thu, 11 Jan 2024 12:31:06 -0500 Subject: [PATCH 154/768] platform/mellanox: mlxbf-tmfifo: Drop Tx network packet when Tx TmFIFO is full MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting from Linux 5.16 kernel, Tx timeout mechanism was added in the virtio_net driver which prints the "Tx timeout" warning message when a packet stays in Tx queue for too long. Below is an example of the reported message: "[494105.316739] virtio_net virtio1 tmfifo_net0: TX timeout on queue: 0, sq: output.0, vq: 0×1, name: output.0, usecs since last trans: 3079892256". This issue could happen when external host driver which drains the FIFO is restared, stopped or upgraded. To avoid such confusing "Tx timeout" messages, this commit adds logic to drop the outstanding Tx packet if it's not able to transmit in two seconds due to Tx FIFO full, which can be considered as congestion or out-of-resource drop. This commit also handles the special case that the packet is half- transmitted into the Tx FIFO. In such case, the packet is discarded with remaining length stored in vring->rem_padding. So paddings with zeros can be sent out when Tx space is available to maintain the integrity of the packet format. The padded packet will be dropped on the receiving side. Signed-off-by: Liming Sun Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20240111173106.96958-1-limings@nvidia.com Signed-off-by: Hans de Goede --- drivers/platform/mellanox/mlxbf-tmfifo.c | 67 ++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c index ed16ec422a7b..b8d1e32e97eb 100644 --- a/drivers/platform/mellanox/mlxbf-tmfifo.c +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c @@ -47,6 +47,9 @@ /* Message with data needs at least two words (for header & data). */ #define MLXBF_TMFIFO_DATA_MIN_WORDS 2 +/* Tx timeout in milliseconds. */ +#define TMFIFO_TX_TIMEOUT 2000 + /* ACPI UID for BlueField-3. */ #define TMFIFO_BF3_UID 1 @@ -62,12 +65,14 @@ struct mlxbf_tmfifo; * @drop_desc: dummy desc for packet dropping * @cur_len: processed length of the current descriptor * @rem_len: remaining length of the pending packet + * @rem_padding: remaining bytes to send as paddings * @pkt_len: total length of the pending packet * @next_avail: next avail descriptor id * @num: vring size (number of descriptors) * @align: vring alignment size * @index: vring index * @vdev_id: vring virtio id (VIRTIO_ID_xxx) + * @tx_timeout: expire time of last tx packet * @fifo: pointer to the tmfifo structure */ struct mlxbf_tmfifo_vring { @@ -79,12 +84,14 @@ struct mlxbf_tmfifo_vring { struct vring_desc drop_desc; int cur_len; int rem_len; + int rem_padding; u32 pkt_len; u16 next_avail; int num; int align; int index; int vdev_id; + unsigned long tx_timeout; struct mlxbf_tmfifo *fifo; }; @@ -819,6 +826,50 @@ mlxbf_tmfifo_desc_done: return true; } +static void mlxbf_tmfifo_check_tx_timeout(struct mlxbf_tmfifo_vring *vring) +{ + unsigned long flags; + + /* Only handle Tx timeout for network vdev. */ + if (vring->vdev_id != VIRTIO_ID_NET) + return; + + /* Initialize the timeout or return if not expired. */ + if (!vring->tx_timeout) { + /* Initialize the timeout. */ + vring->tx_timeout = jiffies + + msecs_to_jiffies(TMFIFO_TX_TIMEOUT); + return; + } else if (time_before(jiffies, vring->tx_timeout)) { + /* Return if not timeout yet. */ + return; + } + + /* + * Drop the packet after timeout. The outstanding packet is + * released and the remaining bytes will be sent with padding byte 0x00 + * as a recovery. On the peer(host) side, the padding bytes 0x00 will be + * either dropped directly, or appended into existing outstanding packet + * thus dropped as corrupted network packet. + */ + vring->rem_padding = round_up(vring->rem_len, sizeof(u64)); + mlxbf_tmfifo_release_pkt(vring); + vring->cur_len = 0; + vring->rem_len = 0; + vring->fifo->vring[0] = NULL; + + /* + * Make sure the load/store are in order before + * returning back to virtio. + */ + virtio_mb(false); + + /* Notify upper layer. */ + spin_lock_irqsave(&vring->fifo->spin_lock[0], flags); + vring_interrupt(0, vring->vq); + spin_unlock_irqrestore(&vring->fifo->spin_lock[0], flags); +} + /* Rx & Tx processing of a queue. */ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx) { @@ -841,6 +892,7 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx) return; do { +retry: /* Get available FIFO space. */ if (avail == 0) { if (is_rx) @@ -851,6 +903,17 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx) break; } + /* Insert paddings for discarded Tx packet. */ + if (!is_rx) { + vring->tx_timeout = 0; + while (vring->rem_padding >= sizeof(u64)) { + writeq(0, vring->fifo->tx.data); + vring->rem_padding -= sizeof(u64); + if (--avail == 0) + goto retry; + } + } + /* Console output always comes from the Tx buffer. */ if (!is_rx && devid == VIRTIO_ID_CONSOLE) { mlxbf_tmfifo_console_tx(fifo, avail); @@ -860,6 +923,10 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx) /* Handle one descriptor. */ more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail); } while (more); + + /* Check Tx timeout. */ + if (avail <= 0 && !is_rx) + mlxbf_tmfifo_check_tx_timeout(vring); } /* Handle Rx or Tx queues. */ From 732c35ce6d4892f7b07cc9aca61a6ad0fd400a26 Mon Sep 17 00:00:00 2001 From: Shravan Kumar Ramani Date: Wed, 17 Jan 2024 05:01:34 -0500 Subject: [PATCH 155/768] platform/mellanox: mlxbf-pmc: Fix offset calculation for crspace events The event selector fields for 2 counters are contained in one 32-bit register and the current logic does not account for this. Fixes: 423c3361855c ("platform/mellanox: mlxbf-pmc: Add support for BlueField-3") Signed-off-by: Shravan Kumar Ramani Reviewed-by: David Thompson Reviewed-by: Vadim Pasternak Link: https://lore.kernel.org/r/8834cfa496c97c7c2fcebcfca5a2aa007e20ae96.1705485095.git.shravankr@nvidia.com Signed-off-by: Hans de Goede --- drivers/platform/mellanox/mlxbf-pmc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c index 1dd84c7a79de..b1995ac268d7 100644 --- a/drivers/platform/mellanox/mlxbf-pmc.c +++ b/drivers/platform/mellanox/mlxbf-pmc.c @@ -1170,7 +1170,7 @@ static int mlxbf_pmc_program_crspace_counter(int blk_num, uint32_t cnt_num, int ret; addr = pmc->block[blk_num].mmio_base + - (rounddown(cnt_num, 2) * MLXBF_PMC_CRSPACE_PERFSEL_SZ); + ((cnt_num / 2) * MLXBF_PMC_CRSPACE_PERFSEL_SZ); ret = mlxbf_pmc_readl(addr, &word); if (ret) return ret; @@ -1413,7 +1413,7 @@ static int mlxbf_pmc_read_crspace_event(int blk_num, uint32_t cnt_num, int ret; addr = pmc->block[blk_num].mmio_base + - (rounddown(cnt_num, 2) * MLXBF_PMC_CRSPACE_PERFSEL_SZ); + ((cnt_num / 2) * MLXBF_PMC_CRSPACE_PERFSEL_SZ); ret = mlxbf_pmc_readl(addr, &word); if (ret) return ret; From 6e2276203ac9ff10fc76917ec9813c660f627369 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Thu, 18 Jan 2024 11:19:29 +0800 Subject: [PATCH 156/768] dmaengine: ti: edma: Add some null pointer checks to the edma_probe devm_kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. Signed-off-by: Kunwu Chan Link: https://lore.kernel.org/r/20240118031929.192192-1-chentao@kylinos.cn Signed-off-by: Vinod Koul --- drivers/dma/ti/edma.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c index f1f920861fa9..5f8d2e93ff3f 100644 --- a/drivers/dma/ti/edma.c +++ b/drivers/dma/ti/edma.c @@ -2404,6 +2404,11 @@ static int edma_probe(struct platform_device *pdev) if (irq > 0) { irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccint", dev_name(dev)); + if (!irq_name) { + ret = -ENOMEM; + goto err_disable_pm; + } + ret = devm_request_irq(dev, irq, dma_irq_handler, 0, irq_name, ecc); if (ret) { @@ -2420,6 +2425,11 @@ static int edma_probe(struct platform_device *pdev) if (irq > 0) { irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccerrint", dev_name(dev)); + if (!irq_name) { + ret = -ENOMEM; + goto err_disable_pm; + } + ret = devm_request_irq(dev, irq, dma_ccerr_handler, 0, irq_name, ecc); if (ret) { From 13e788deb7348cc88df34bed736c3b3b9927ea52 Mon Sep 17 00:00:00 2001 From: Sharath Srinivasan Date: Fri, 19 Jan 2024 17:48:39 -0800 Subject: [PATCH 157/768] net/rds: Fix UBSAN: array-index-out-of-bounds in rds_cmsg_recv Syzcaller UBSAN crash occurs in rds_cmsg_recv(), which reads inc->i_rx_lat_trace[j + 1] with index 4 (3 + 1), but with array size of 4 (RDS_RX_MAX_TRACES). Here 'j' is assigned from rs->rs_rx_trace[i] and in-turn from trace.rx_trace_pos[i] in rds_recv_track_latency(), with both arrays sized 3 (RDS_MSG_RX_DGRAM_TRACE_MAX). So fix the off-by-one bounds check in rds_recv_track_latency() to prevent a potential crash in rds_cmsg_recv(). Found by syzcaller: ================================================================= UBSAN: array-index-out-of-bounds in net/rds/recv.c:585:39 index 4 is out of range for type 'u64 [4]' CPU: 1 PID: 8058 Comm: syz-executor228 Not tainted 6.6.0-gd2f51b3516da #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x136/0x150 lib/dump_stack.c:106 ubsan_epilogue lib/ubsan.c:217 [inline] __ubsan_handle_out_of_bounds+0xd5/0x130 lib/ubsan.c:348 rds_cmsg_recv+0x60d/0x700 net/rds/recv.c:585 rds_recvmsg+0x3fb/0x1610 net/rds/recv.c:716 sock_recvmsg_nosec net/socket.c:1044 [inline] sock_recvmsg+0xe2/0x160 net/socket.c:1066 __sys_recvfrom+0x1b6/0x2f0 net/socket.c:2246 __do_sys_recvfrom net/socket.c:2264 [inline] __se_sys_recvfrom net/socket.c:2260 [inline] __x64_sys_recvfrom+0xe0/0x1b0 net/socket.c:2260 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b ================================================================== Fixes: 3289025aedc0 ("RDS: add receive message trace used by application") Reported-by: Chenyuan Yang Closes: https://lore.kernel.org/linux-rdma/CALGdzuoVdq-wtQ4Az9iottBqC5cv9ZhcE5q8N7LfYFvkRsOVcw@mail.gmail.com/ Signed-off-by: Sharath Srinivasan Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/rds/af_rds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 01c4cdfef45d..8435a20968ef 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -419,7 +419,7 @@ static int rds_recv_track_latency(struct rds_sock *rs, sockptr_t optval, rs->rs_rx_traces = trace.rx_traces; for (i = 0; i < rs->rs_rx_traces; i++) { - if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) { + if (trace.rx_trace_pos[i] >= RDS_MSG_RX_DGRAM_TRACE_MAX) { rs->rs_rx_traces = 0; return -EFAULT; } From bc9847c9ba134cfe3398011e343dcf6588c1c902 Mon Sep 17 00:00:00 2001 From: Jai Luthra Date: Wed, 3 Jan 2024 14:37:55 +0530 Subject: [PATCH 158/768] dmaengine: ti: k3-udma: Report short packet errors Propagate the TR response status to the device using BCDMA split-channels. For example CSI-RX driver should be able to check if a frame was not transferred completely (short packet) and needs to be discarded. Fixes: 25dcb5dd7b7c ("dmaengine: ti: New driver for K3 UDMA") Signed-off-by: Jai Luthra Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20240103-tr_resp_err-v1-1-2fdf6d48ab92@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 2841a539c264..6400d06588a2 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -3968,6 +3968,7 @@ static void udma_desc_pre_callback(struct virt_dma_chan *vc, { struct udma_chan *uc = to_udma_chan(&vc->chan); struct udma_desc *d; + u8 status; if (!vd) return; @@ -3977,12 +3978,12 @@ static void udma_desc_pre_callback(struct virt_dma_chan *vc, if (d->metadata_size) udma_fetch_epib(uc, d); - /* Provide residue information for the client */ if (result) { void *desc_vaddr = udma_curr_cppi5_desc_vaddr(d, d->desc_idx); if (cppi5_desc_get_type(desc_vaddr) == CPPI5_INFO0_DESC_TYPE_VAL_HOST) { + /* Provide residue information for the client */ result->residue = d->residue - cppi5_hdesc_get_pktlen(desc_vaddr); if (result->residue) @@ -3991,7 +3992,12 @@ static void udma_desc_pre_callback(struct virt_dma_chan *vc, result->result = DMA_TRANS_NOERROR; } else { result->residue = 0; - result->result = DMA_TRANS_NOERROR; + /* Propagate TR Response errors to the client */ + status = d->hwdesc[0].tr_resp_base->status; + if (status) + result->result = DMA_TRANS_ABORTED; + else + result->result = DMA_TRANS_NOERROR; } } } From 968bc1d7203d384e72afe34124a1801b7af76514 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 7 Jan 2024 11:02:03 +0100 Subject: [PATCH 159/768] dmaengine: fsl-qdma: Fix a memory leak related to the status queue DMA This dma_alloc_coherent() is undone in the remove function, but not in the error handling path of fsl_qdma_probe(). Switch to the managed version to fix the issue in the probe and simplify the remove function. Fixes: b092529e0aa0 ("dmaengine: fsl-qdma: Add qDMA controller driver for Layerscape SoCs") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/a0ef5d0f5a47381617ef339df776ddc68ce48173.1704621515.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/fsl-qdma.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index a1d0aa63142a..28c4e86bbcd2 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -563,11 +563,11 @@ static struct fsl_qdma_queue /* * Buffer for queue command */ - status_head->cq = dma_alloc_coherent(&pdev->dev, - sizeof(struct fsl_qdma_format) * - status_size, - &status_head->bus_addr, - GFP_KERNEL); + status_head->cq = dmam_alloc_coherent(&pdev->dev, + sizeof(struct fsl_qdma_format) * + status_size, + &status_head->bus_addr, + GFP_KERNEL); if (!status_head->cq) { devm_kfree(&pdev->dev, status_head); return NULL; @@ -1268,8 +1268,6 @@ static void fsl_qdma_cleanup_vchan(struct dma_device *dmadev) static void fsl_qdma_remove(struct platform_device *pdev) { - int i; - struct fsl_qdma_queue *status; struct device_node *np = pdev->dev.of_node; struct fsl_qdma_engine *fsl_qdma = platform_get_drvdata(pdev); @@ -1277,12 +1275,6 @@ static void fsl_qdma_remove(struct platform_device *pdev) fsl_qdma_cleanup_vchan(&fsl_qdma->dma_dev); of_dma_controller_free(np); dma_async_device_unregister(&fsl_qdma->dma_dev); - - for (i = 0; i < fsl_qdma->block_number; i++) { - status = fsl_qdma->status[i]; - dma_free_coherent(&pdev->dev, sizeof(struct fsl_qdma_format) * - status->n_cq, status->cq, status->bus_addr); - } } static const struct of_device_id fsl_qdma_dt_ids[] = { From 3aa58cb51318e329d203857f7a191678e60bb714 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 7 Jan 2024 11:02:04 +0100 Subject: [PATCH 160/768] dmaengine: fsl-qdma: Fix a memory leak related to the queue command DMA This dma_alloc_coherent() is undone neither in the remove function, nor in the error handling path of fsl_qdma_probe(). Switch to the managed version to fix both issues. Fixes: b092529e0aa0 ("dmaengine: fsl-qdma: Add qDMA controller driver for Layerscape SoCs") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/7f66aa14f59d32b13672dde28602b47deb294e1f.1704621515.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/fsl-qdma.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index 28c4e86bbcd2..9b141369bea5 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -514,11 +514,11 @@ static struct fsl_qdma_queue queue_temp = queue_head + i + (j * queue_num); queue_temp->cq = - dma_alloc_coherent(&pdev->dev, - sizeof(struct fsl_qdma_format) * - queue_size[i], - &queue_temp->bus_addr, - GFP_KERNEL); + dmam_alloc_coherent(&pdev->dev, + sizeof(struct fsl_qdma_format) * + queue_size[i], + &queue_temp->bus_addr, + GFP_KERNEL); if (!queue_temp->cq) return NULL; queue_temp->block_base = fsl_qdma->block_base + From 0650006a93a2ce3b57f86e7f000347d9ae7737ef Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 7 Jan 2024 11:02:05 +0100 Subject: [PATCH 161/768] dmaengine: fsl-qdma: Remove a useless devm_kfree() 'status_head' is a managed resource. It will be freed automatically if fsl_qdma_prep_status_queue(), and so fsl_qdma_probe(), fails. Remove the redundant (and harmless) devm_kfree() call. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/6b7f60aa2b92f73b35c586886daffc1a5ac58697.1704621515.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/fsl-qdma.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index 9b141369bea5..f405c77060ad 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -568,10 +568,9 @@ static struct fsl_qdma_queue status_size, &status_head->bus_addr, GFP_KERNEL); - if (!status_head->cq) { - devm_kfree(&pdev->dev, status_head); + if (!status_head->cq) return NULL; - } + status_head->n_cq = status_size; status_head->virt_head = status_head->cq; status_head->virt_tail = status_head->cq; From 1513664f340289cf10402753110f3cff12a738aa Mon Sep 17 00:00:00 2001 From: Andy Chi Date: Mon, 22 Jan 2024 15:48:24 +0800 Subject: [PATCH 162/768] ALSA: hda/realtek: fix mute/micmute LEDs for HP ZBook Power The HP ZBook Power using ALC236 codec which using 0x02 to control mute LED and 0x01 to control micmute LED. Therefore, add a quirk to make it works. Signed-off-by: Andy Chi Cc: Link: https://lore.kernel.org/r/20240122074826.1020964-1-andy.chi@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f6f16622f9cc..19f2eb26659d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9957,6 +9957,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8ca1, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ca2, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), From 4b5581f112075e46d73b34b9848be041e6c1e489 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Tue, 24 Oct 2023 18:53:53 +0200 Subject: [PATCH 163/768] accel/ivpu: Disable PLL after VPU IP reset during FLR IP reset has to followed by ivpu_pll_disable() to properly enter reset state. Fixes: 828d63042aec ("accel/ivpu: Don't enter d0i3 during FLR") Signed-off-by: Jacek Lawrynowicz Reviewed-by: Stanislaw Gruszka Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20231024165353.761507-1-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_40xx.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index eba2fdef2ace..c02061f299e2 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -746,7 +746,7 @@ static int ivpu_hw_40xx_info_init(struct ivpu_device *vdev) return 0; } -static int ivpu_hw_40xx_reset(struct ivpu_device *vdev) +static int ivpu_hw_40xx_ip_reset(struct ivpu_device *vdev) { int ret; u32 val; @@ -768,6 +768,23 @@ static int ivpu_hw_40xx_reset(struct ivpu_device *vdev) return ret; } +static int ivpu_hw_40xx_reset(struct ivpu_device *vdev) +{ + int ret = 0; + + if (ivpu_hw_40xx_ip_reset(vdev)) { + ivpu_err(vdev, "Failed to reset VPU IP\n"); + ret = -EIO; + } + + if (ivpu_pll_disable(vdev)) { + ivpu_err(vdev, "Failed to disable PLL\n"); + ret = -EIO; + } + + return ret; +} + static int ivpu_hw_40xx_d0i3_enable(struct ivpu_device *vdev) { int ret; @@ -913,7 +930,7 @@ static int ivpu_hw_40xx_power_down(struct ivpu_device *vdev) ivpu_hw_40xx_save_d0i3_entry_timestamp(vdev); - if (!ivpu_hw_40xx_is_idle(vdev) && ivpu_hw_40xx_reset(vdev)) + if (!ivpu_hw_40xx_is_idle(vdev) && ivpu_hw_40xx_ip_reset(vdev)) ivpu_warn(vdev, "Failed to reset the VPU\n"); if (ivpu_pll_disable(vdev)) { From 192cdb1c907fd8df2d764c5bb17496e415e59391 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 22 Jan 2024 15:18:11 +0100 Subject: [PATCH 164/768] cpufreq: intel_pstate: Refine computation of P-state for given frequency On systems using HWP, if a given frequency is equal to the maximum turbo frequency or the maximum non-turbo frequency, the HWP performance level corresponding to it is already known and can be used directly without any computation. Accordingly, adjust the code to use the known HWP performance levels in the cases mentioned above. This also helps to avoid limiting CPU capacity artificially in some cases when the BIOS produces the HWP_CAP numbers using a different E-core-to-P-core performance scaling factor than expected by the kernel. Fixes: f5c8cf2a4992 ("cpufreq: intel_pstate: hybrid: Use known scaling factor for P-cores") Cc: 6.1+ # 6.1+ Tested-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 55 +++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 2ca70b0b5fdc..ca94e60e705a 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -529,6 +529,30 @@ static int intel_pstate_cppc_get_scaling(int cpu) } #endif /* CONFIG_ACPI_CPPC_LIB */ +static int intel_pstate_freq_to_hwp_rel(struct cpudata *cpu, int freq, + unsigned int relation) +{ + if (freq == cpu->pstate.turbo_freq) + return cpu->pstate.turbo_pstate; + + if (freq == cpu->pstate.max_freq) + return cpu->pstate.max_pstate; + + switch (relation) { + case CPUFREQ_RELATION_H: + return freq / cpu->pstate.scaling; + case CPUFREQ_RELATION_C: + return DIV_ROUND_CLOSEST(freq, cpu->pstate.scaling); + } + + return DIV_ROUND_UP(freq, cpu->pstate.scaling); +} + +static int intel_pstate_freq_to_hwp(struct cpudata *cpu, int freq) +{ + return intel_pstate_freq_to_hwp_rel(cpu, freq, CPUFREQ_RELATION_L); +} + /** * intel_pstate_hybrid_hwp_adjust - Calibrate HWP performance levels. * @cpu: Target CPU. @@ -546,6 +570,7 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling; int perf_ctl_turbo = pstate_funcs.get_turbo(cpu->cpu); int scaling = cpu->pstate.scaling; + int freq; pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys); pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo); @@ -559,16 +584,16 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling, perf_ctl_scaling); - cpu->pstate.max_pstate_physical = - DIV_ROUND_UP(perf_ctl_max_phys * perf_ctl_scaling, - scaling); + freq = perf_ctl_max_phys * perf_ctl_scaling; + cpu->pstate.max_pstate_physical = intel_pstate_freq_to_hwp(cpu, freq); - cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling; + freq = cpu->pstate.min_pstate * perf_ctl_scaling; + cpu->pstate.min_freq = freq; /* * Cast the min P-state value retrieved via pstate_funcs.get_min() to * the effective range of HWP performance levels. */ - cpu->pstate.min_pstate = DIV_ROUND_UP(cpu->pstate.min_freq, scaling); + cpu->pstate.min_pstate = intel_pstate_freq_to_hwp(cpu, freq); } static inline void update_turbo_state(void) @@ -2528,13 +2553,12 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu, * abstract values to represent performance rather than pure ratios. */ if (hwp_active && cpu->pstate.scaling != perf_ctl_scaling) { - int scaling = cpu->pstate.scaling; int freq; freq = max_policy_perf * perf_ctl_scaling; - max_policy_perf = DIV_ROUND_UP(freq, scaling); + max_policy_perf = intel_pstate_freq_to_hwp(cpu, freq); freq = min_policy_perf * perf_ctl_scaling; - min_policy_perf = DIV_ROUND_UP(freq, scaling); + min_policy_perf = intel_pstate_freq_to_hwp(cpu, freq); } pr_debug("cpu:%d min_policy_perf:%d max_policy_perf:%d\n", @@ -2908,18 +2932,7 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy, cpufreq_freq_transition_begin(policy, &freqs); - switch (relation) { - case CPUFREQ_RELATION_L: - target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling); - break; - case CPUFREQ_RELATION_H: - target_pstate = freqs.new / cpu->pstate.scaling; - break; - default: - target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling); - break; - } - + target_pstate = intel_pstate_freq_to_hwp_rel(cpu, freqs.new, relation); target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, false); freqs.new = target_pstate * cpu->pstate.scaling; @@ -2937,7 +2950,7 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, update_turbo_state(); - target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling); + target_pstate = intel_pstate_freq_to_hwp(cpu, target_freq); target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, true); From 3c18703079b6c7149d037b71d685d6fcaf6c4cd0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 22 Jan 2024 11:50:00 +0000 Subject: [PATCH 165/768] netfs, cachefiles: Change mailing list The publicly accessible archives for Red Hat mailing lists stop at Oct 2023; messages sent after that time are in internal-only archives. Change the netfs and cachefiles mailing list to one that has publicly accessible archives: netfs@lists.linux.dev Signed-off-by: David Howells Link: https://lore.kernel.org/r/20240122115007.3820330-2-dhowells@redhat.com cc: Jeff Layton cc: Matthew Wilcox cc: cc: cc: cc: cc: cc: cc: cc: cc: Signed-off-by: Christian Brauner --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a69..ab5858d24ffc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4547,7 +4547,7 @@ F: drivers/net/ieee802154/ca8210.c CACHEFILES: FS-CACHE BACKEND FOR CACHING ON MOUNTED FILESYSTEMS M: David Howells -L: linux-cachefs@redhat.com (moderated for non-subscribers) +L: netfs@lists.linux.dev S: Supported F: Documentation/filesystems/caching/cachefiles.rst F: fs/cachefiles/ @@ -8223,7 +8223,7 @@ F: include/linux/iomap.h FILESYSTEMS [NETFS LIBRARY] M: David Howells -L: linux-cachefs@redhat.com (moderated for non-subscribers) +L: netfs@lists.linux.dev L: linux-fsdevel@vger.kernel.org S: Supported F: Documentation/filesystems/caching/ From d59da02d1ab690b81a3dd2493112fc6878198f60 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 22 Jan 2024 11:50:01 +0000 Subject: [PATCH 166/768] netfs: Add Jeff Layton as reviewer Add Jeff Layton as a reviewer in the MAINTAINERS file. Signed-off-by: David Howells Link: https://lore.kernel.org/r/20240122115007.3820330-3-dhowells@redhat.com Acked-by: Jeff Layton cc: cc: Signed-off-by: Christian Brauner --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index ab5858d24ffc..2f4f4bf2e7f8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8223,6 +8223,7 @@ F: include/linux/iomap.h FILESYSTEMS [NETFS LIBRARY] M: David Howells +R: Jeff Layton L: netfs@lists.linux.dev L: linux-fsdevel@vger.kernel.org S: Supported From f7cfe7017b531e08c108ac6615b1ddedcc892428 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 9 Jan 2024 09:22:32 +0100 Subject: [PATCH 167/768] x86/paravirt: Make BUG_func() usable by non-GPL modules Several inlined functions subject to paravirt patching are referencing BUG_func() after the recent switch to the alternative patching mechanism. As those functions can legally be used by non-GPL modules, BUG_func() must be usable by those modules, too. So use EXPORT_SYMBOL() when exporting BUG_func(). Fixes: 9824b00c2b58 ("x86/paravirt: Move some functions and defines to alternative.c") Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240109082232.22657-1-jgross@suse.com --- arch/x86/kernel/alternative.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index cc130b57542a..1d85cb7071cb 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -403,7 +403,7 @@ noinstr void BUG_func(void) { BUG(); } -EXPORT_SYMBOL_GPL(BUG_func); +EXPORT_SYMBOL(BUG_func); #define CALL_RIP_REL_OPCODE 0xff #define CALL_RIP_REL_MODRM 0x15 From ed1a72fb0d646c983c85b62144fb1d134a8edb71 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Jan 2024 21:55:22 +0300 Subject: [PATCH 168/768] kunit: Fix a NULL vs IS_ERR() bug The kunit_device_register() function doesn't return NULL, it returns error pointers. Change the KUNIT_ASSERT_NOT_NULL() to check for ERR_OR_NULL(). Fixes: d03c720e03bd ("kunit: Add APIs for managing devices") Signed-off-by: Dan Carpenter Reviewed-by: Rae Moar Reviewed-by: David Gow Signed-off-by: Shuah Khan --- lib/kunit/kunit-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c index c4259d910356..f7980ef236a3 100644 --- a/lib/kunit/kunit-test.c +++ b/lib/kunit/kunit-test.c @@ -720,7 +720,7 @@ static void kunit_device_cleanup_test(struct kunit *test) long action_was_run = 0; test_device = kunit_device_register(test, "my_device"); - KUNIT_ASSERT_NOT_NULL(test, test_device); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, test_device); /* Add an action to verify cleanup. */ devm_add_action(test_device, test_dev_action, &action_was_run); From 083974ebb8fc65978d6cacd1bcfe9158d6234b98 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Jan 2024 21:55:14 +0300 Subject: [PATCH 169/768] kunit: device: Fix a NULL vs IS_ERR() check in init() The root_device_register() function does not return NULL, it returns error pointers. Fix the check to match. Fixes: d03c720e03bd ("kunit: Add APIs for managing devices") Signed-off-by: Dan Carpenter Reviewed-by: Rae Moar Reviewed-by: David Gow Signed-off-by: Shuah Khan --- lib/kunit/device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/kunit/device.c b/lib/kunit/device.c index f5371287b375..074c6dd2e36a 100644 --- a/lib/kunit/device.c +++ b/lib/kunit/device.c @@ -45,8 +45,8 @@ int kunit_bus_init(void) int error; kunit_bus_device = root_device_register("kunit"); - if (!kunit_bus_device) - return -ENOMEM; + if (IS_ERR(kunit_bus_device)) + return PTR_ERR(kunit_bus_device); error = bus_register(&kunit_bus_type); if (error) From 57e39086fb868a84f772cf097004f4715d8aaccb Mon Sep 17 00:00:00 2001 From: David Gow Date: Fri, 12 Jan 2024 07:49:47 +0800 Subject: [PATCH 170/768] MAINTAINERS: kunit: Add Rae Moar as a reviewer Rae has been shouldering a lot of the KUnit review burden for the last year, and will continue to do so in the future. Thanks! Signed-off-by: David Gow Reviewed-by: Rae Moar Signed-off-by: Shuah Khan --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a69..354d993bc2cf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11724,6 +11724,7 @@ F: fs/smb/server/ KERNEL UNIT TESTING FRAMEWORK (KUnit) M: Brendan Higgins M: David Gow +R: Rae Moar L: linux-kselftest@vger.kernel.org L: kunit-dev@googlegroups.com S: Maintained From a1af6a2bfa0cb46d70b7df5352993e750da6c79b Mon Sep 17 00:00:00 2001 From: Marco Pagani Date: Wed, 10 Jan 2024 16:59:47 +0100 Subject: [PATCH 171/768] kunit: run test suites only after module initialization completes Commit 2810c1e99867 ("kunit: Fix wild-memory-access bug in kunit_free_suite_set()") fixed a wild-memory-access bug that could have happened during the loading phase of test suites built and executed as loadable modules. However, it also introduced a problematic side effect that causes test suites modules to crash when they attempt to register fake devices. When a module is loaded, it traverses the MODULE_STATE_UNFORMED and MODULE_STATE_COMING states before reaching the normal operating state MODULE_STATE_LIVE. Finally, when the module is removed, it moves to MODULE_STATE_GOING before being released. However, if the loading function load_module() fails between complete_formation() and do_init_module(), the module goes directly from MODULE_STATE_COMING to MODULE_STATE_GOING without passing through MODULE_STATE_LIVE. This behavior was causing kunit_module_exit() to be called without having first executed kunit_module_init(). Since kunit_module_exit() is responsible for freeing the memory allocated by kunit_module_init() through kunit_filter_suites(), this behavior was resulting in a wild-memory-access bug. Commit 2810c1e99867 ("kunit: Fix wild-memory-access bug in kunit_free_suite_set()") fixed this issue by running the tests when the module is still in MODULE_STATE_COMING. However, modules in that state are not fully initialized, lacking sysfs kobjects. Therefore, if a test module attempts to register a fake device, it will inevitably crash. This patch proposes a different approach to fix the original wild-memory-access bug while restoring the normal module execution flow by making kunit_module_exit() able to detect if kunit_module_init() has previously initialized the tests suite set. In this way, test modules can once again register fake devices without crashing. This behavior is achieved by checking whether mod->kunit_suites is a virtual or direct mapping address. If it is a virtual address, then kunit_module_init() has allocated the suite_set in kunit_filter_suites() using kmalloc_array(). On the contrary, if mod->kunit_suites is still pointing to the original address that was set when looking up the .kunit_test_suites section of the module, then the loading phase has failed and there's no memory to be freed. v4: - rebased on 6.8 - noted that kunit_filter_suites() must return a virtual address v3: - add a comment to clarify why the start address is checked v2: - add include Fixes: 2810c1e99867 ("kunit: Fix wild-memory-access bug in kunit_free_suite_set()") Reviewed-by: David Gow Tested-by: Rae Moar Tested-by: Richard Fitzgerald Reviewed-by: Javier Martinez Canillas Signed-off-by: Marco Pagani Signed-off-by: Shuah Khan --- lib/kunit/executor.c | 4 ++++ lib/kunit/test.c | 14 +++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c index 717b9599036b..689fff2b2b10 100644 --- a/lib/kunit/executor.c +++ b/lib/kunit/executor.c @@ -146,6 +146,10 @@ void kunit_free_suite_set(struct kunit_suite_set suite_set) kfree(suite_set.start); } +/* + * Filter and reallocate test suites. Must return the filtered test suites set + * allocated at a valid virtual address or NULL in case of error. + */ struct kunit_suite_set kunit_filter_suites(const struct kunit_suite_set *suite_set, const char *filter_glob, diff --git a/lib/kunit/test.c b/lib/kunit/test.c index f95d2093a0aa..31a5a992e646 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "debugfs.h" #include "device-impl.h" @@ -801,12 +802,19 @@ static void kunit_module_exit(struct module *mod) }; const char *action = kunit_action(); + /* + * Check if the start address is a valid virtual address to detect + * if the module load sequence has failed and the suite set has not + * been initialized and filtered. + */ + if (!suite_set.start || !virt_addr_valid(suite_set.start)) + return; + if (!action) __kunit_test_suites_exit(mod->kunit_suites, mod->num_kunit_suites); - if (suite_set.start) - kunit_free_suite_set(suite_set); + kunit_free_suite_set(suite_set); } static int kunit_module_notify(struct notifier_block *nb, unsigned long val, @@ -816,12 +824,12 @@ static int kunit_module_notify(struct notifier_block *nb, unsigned long val, switch (val) { case MODULE_STATE_LIVE: + kunit_module_init(mod); break; case MODULE_STATE_GOING: kunit_module_exit(mod); break; case MODULE_STATE_COMING: - kunit_module_init(mod); break; case MODULE_STATE_UNFORMED: break; From 1a9f2c776d1416c4ea6cb0d0b9917778c41a1a7d Mon Sep 17 00:00:00 2001 From: Arthur Grillo Date: Wed, 10 Jan 2024 14:39:28 -0300 Subject: [PATCH 172/768] Documentation: KUnit: Update the instructions on how to test static functions Now that we have the VISIBLE_IF_KUNIT and EXPORT_SYMBOL_IF_KUNIT macros, update the instructions to recommend this way of testing static functions. Signed-off-by: Arthur Grillo Reviewed-by: David Gow Signed-off-by: Shuah Khan --- Documentation/dev-tools/kunit/usage.rst | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst index a9efab50eed8..22955d56b379 100644 --- a/Documentation/dev-tools/kunit/usage.rst +++ b/Documentation/dev-tools/kunit/usage.rst @@ -671,8 +671,23 @@ Testing Static Functions ------------------------ If we do not want to expose functions or variables for testing, one option is to -conditionally ``#include`` the test file at the end of your .c file. For -example: +conditionally export the used symbol. For example: + +.. code-block:: c + + /* In my_file.c */ + + VISIBLE_IF_KUNIT int do_interesting_thing(); + EXPORT_SYMBOL_IF_KUNIT(do_interesting_thing); + + /* In my_file.h */ + + #if IS_ENABLED(CONFIG_KUNIT) + int do_interesting_thing(void); + #endif + +Alternatively, you could conditionally ``#include`` the test file at the end of +your .c file. For example: .. code-block:: c From 523d242d4309797e6b27c708fbd1463f301c199a Mon Sep 17 00:00:00 2001 From: Zhu Ning Date: Sat, 20 Jan 2024 18:12:36 +0800 Subject: [PATCH 173/768] ASoC: codecs: ES8326: improving crosstalk performance We change the crosstalk parameter in es8326_resume function to improve crosstalk performance. Adding crosstalk kcontrol to enhance the flexibility of crosstalk debugging in machine. Adding ES8326_DAC_CROSSTALK macro to declare the crosstalk register. Signed-off-by: Zhu Ning Link: https://msgid.link/r/20240120101240.12496-2-zhuning0077@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/es8326.c | 82 +++++++++++++++++++++++++++++++++++++++ sound/soc/codecs/es8326.h | 1 + 2 files changed, 83 insertions(+) diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c index fa890f6205e2..82d1c4f8324c 100755 --- a/sound/soc/codecs/es8326.c +++ b/sound/soc/codecs/es8326.c @@ -45,6 +45,82 @@ struct es8326_priv { int jack_remove_retry; }; +static int es8326_crosstalk1_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); + struct es8326_priv *es8326 = snd_soc_component_get_drvdata(component); + unsigned int crosstalk_h, crosstalk_l; + unsigned int crosstalk; + + regmap_read(es8326->regmap, ES8326_DAC_RAMPRATE, &crosstalk_h); + regmap_read(es8326->regmap, ES8326_DAC_CROSSTALK, &crosstalk_l); + crosstalk_h &= 0x20; + crosstalk_l &= 0xf0; + crosstalk = crosstalk_h >> 1 | crosstalk_l >> 4; + ucontrol->value.integer.value[0] = crosstalk; + + return 0; +} + +static int es8326_crosstalk1_set(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); + struct es8326_priv *es8326 = snd_soc_component_get_drvdata(component); + unsigned int crosstalk_h, crosstalk_l; + unsigned int crosstalk; + + crosstalk = ucontrol->value.integer.value[0]; + regmap_read(es8326->regmap, ES8326_DAC_CROSSTALK, &crosstalk_l); + crosstalk_h = (crosstalk & 0x10) << 1; + crosstalk_l &= 0x0f; + crosstalk_l |= (crosstalk & 0x0f) << 4; + regmap_update_bits(es8326->regmap, ES8326_DAC_RAMPRATE, + 0x20, crosstalk_h); + regmap_write(es8326->regmap, ES8326_DAC_CROSSTALK, crosstalk_l); + + return 0; +} + +static int es8326_crosstalk2_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); + struct es8326_priv *es8326 = snd_soc_component_get_drvdata(component); + unsigned int crosstalk_h, crosstalk_l; + unsigned int crosstalk; + + regmap_read(es8326->regmap, ES8326_DAC_RAMPRATE, &crosstalk_h); + regmap_read(es8326->regmap, ES8326_DAC_CROSSTALK, &crosstalk_l); + crosstalk_h &= 0x10; + crosstalk_l &= 0x0f; + crosstalk = crosstalk_h | crosstalk_l; + ucontrol->value.integer.value[0] = crosstalk; + + return 0; +} + +static int es8326_crosstalk2_set(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); + struct es8326_priv *es8326 = snd_soc_component_get_drvdata(component); + unsigned int crosstalk_h, crosstalk_l; + unsigned int crosstalk; + + crosstalk = ucontrol->value.integer.value[0]; + regmap_read(es8326->regmap, ES8326_DAC_CROSSTALK, &crosstalk_l); + crosstalk_h = crosstalk & 0x10; + crosstalk_l &= 0xf0; + crosstalk_l |= crosstalk & 0x0f; + regmap_update_bits(es8326->regmap, ES8326_DAC_RAMPRATE, + 0x10, crosstalk_h); + regmap_write(es8326->regmap, ES8326_DAC_CROSSTALK, crosstalk_l); + + return 0; +} + static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(dac_vol_tlv, -9550, 50, 0); static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(adc_vol_tlv, -9550, 50, 0); static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(adc_analog_pga_tlv, 0, 300, 0); @@ -102,6 +178,10 @@ static const struct snd_kcontrol_new es8326_snd_controls[] = { SOC_SINGLE_TLV("ALC Capture Target Level", ES8326_ALC_LEVEL, 0, 0x0f, 0, drc_target_tlv), + SOC_SINGLE_EXT("CROSSTALK1", SND_SOC_NOPM, 0, 31, 0, + es8326_crosstalk1_get, es8326_crosstalk1_set), + SOC_SINGLE_EXT("CROSSTALK2", SND_SOC_NOPM, 0, 31, 0, + es8326_crosstalk2_get, es8326_crosstalk2_set), }; static const struct snd_soc_dapm_widget es8326_dapm_widgets[] = { @@ -844,6 +924,8 @@ static int es8326_resume(struct snd_soc_component *component) regmap_write(es8326->regmap, ES8326_CLK_CAL_TIME, 0x00); /* calibrate for B version */ es8326_calibrate(component); + regmap_write(es8326->regmap, ES8326_DAC_CROSSTALK, 0xaa); + regmap_write(es8326->regmap, ES8326_DAC_RAMPRATE, 0x00); /* turn off headphone out */ regmap_write(es8326->regmap, ES8326_HP_CAL, 0x00); /* set ADC and DAC in low power mode */ diff --git a/sound/soc/codecs/es8326.h b/sound/soc/codecs/es8326.h index 90a08351d6ac..dfef808673f4 100644 --- a/sound/soc/codecs/es8326.h +++ b/sound/soc/codecs/es8326.h @@ -72,6 +72,7 @@ #define ES8326_DAC_VOL 0x50 #define ES8326_DRC_RECOVERY 0x53 #define ES8326_DRC_WINSIZE 0x54 +#define ES8326_DAC_CROSSTALK 0x55 #define ES8326_HPJACK_TIMER 0x56 #define ES8326_HPDET_TYPE 0x57 #define ES8326_INT_SOURCE 0x58 From 14a0a1ec3335ac3945a96437c35465e4a9616b88 Mon Sep 17 00:00:00 2001 From: Zhu Ning Date: Sat, 20 Jan 2024 18:12:37 +0800 Subject: [PATCH 174/768] ASoC: codecs: ES8326: Improving the THD+N performance We update the values of some registers in the initialization sequence in es8326_resume function to improve THD+N performance. THD+N performance decreases if the output level on headphone is close to full scale. So we change the register setting in es8326_jack_detect_handler function to improve THD+N performance if headphone pulgged. Also, the register setting should be restored when the headset is unplugged Signed-off-by: Zhu Ning Link: https://msgid.link/r/20240120101240.12496-3-zhuning0077@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/es8326.c | 21 +++++++++++++-------- sound/soc/codecs/es8326.h | 2 +- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c index 82d1c4f8324c..10157a4bd500 100755 --- a/sound/soc/codecs/es8326.c +++ b/sound/soc/codecs/es8326.c @@ -752,6 +752,8 @@ static void es8326_jack_detect_handler(struct work_struct *work) es8326->hp = 0; } regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01); + regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x0a); + regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x03); /* * Inverted HPJACK_POL bit to trigger one IRQ to double check HP Removal event */ @@ -777,6 +779,8 @@ static void es8326_jack_detect_handler(struct work_struct *work) regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01); usleep_range(50000, 70000); regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x00); + regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x1f); + regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x08); queue_delayed_work(system_wq, &es8326->jack_detect_work, msecs_to_jiffies(400)); es8326->hp = 1; @@ -846,14 +850,14 @@ static int es8326_calibrate(struct snd_soc_component *component) if ((es8326->version == ES8326_VERSION_B) && (es8326->calibrated == false)) { dev_dbg(component->dev, "ES8326_VERSION_B, calibrating\n"); regmap_write(es8326->regmap, ES8326_CLK_INV, 0xc0); - regmap_write(es8326->regmap, ES8326_CLK_DIV1, 0x01); + regmap_write(es8326->regmap, ES8326_CLK_DIV1, 0x03); regmap_write(es8326->regmap, ES8326_CLK_DLL, 0x30); regmap_write(es8326->regmap, ES8326_CLK_MUX, 0xed); regmap_write(es8326->regmap, ES8326_CLK_DAC_SEL, 0x08); regmap_write(es8326->regmap, ES8326_CLK_TRI, 0xc1); regmap_write(es8326->regmap, ES8326_DAC_MUTE, 0x03); regmap_write(es8326->regmap, ES8326_ANA_VSEL, 0x7f); - regmap_write(es8326->regmap, ES8326_VMIDLOW, 0x03); + regmap_write(es8326->regmap, ES8326_VMIDLOW, 0x23); regmap_write(es8326->regmap, ES8326_DAC2HPMIX, 0x88); usleep_range(15000, 20000); regmap_write(es8326->regmap, ES8326_HP_OFFSET_CAL, 0x8c); @@ -894,13 +898,13 @@ static int es8326_resume(struct snd_soc_component *component) /* reset internal clock state */ regmap_write(es8326->regmap, ES8326_RESET, 0x1f); regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x0E); + regmap_write(es8326->regmap, ES8326_ANA_LP, 0xf0); usleep_range(10000, 15000); regmap_write(es8326->regmap, ES8326_HPJACK_TIMER, 0xe9); - regmap_write(es8326->regmap, ES8326_ANA_MICBIAS, 0x4b); + regmap_write(es8326->regmap, ES8326_ANA_MICBIAS, 0xcb); /* set headphone default type and detect pin */ regmap_write(es8326->regmap, ES8326_HPDET_TYPE, 0x83); regmap_write(es8326->regmap, ES8326_CLK_RESAMPLE, 0x05); - regmap_write(es8326->regmap, ES8326_HP_MISC, 0x30); /* set internal oscillator as clock source of headpone cp */ regmap_write(es8326->regmap, ES8326_CLK_DIV_CPC, 0x89); @@ -908,14 +912,15 @@ static int es8326_resume(struct snd_soc_component *component) /* clock manager reset release */ regmap_write(es8326->regmap, ES8326_RESET, 0x17); /* set headphone detection as half scan mode */ - regmap_write(es8326->regmap, ES8326_HP_MISC, 0x30); + regmap_write(es8326->regmap, ES8326_HP_MISC, 0x3d); regmap_write(es8326->regmap, ES8326_PULLUP_CTL, 0x00); /* enable headphone driver */ + regmap_write(es8326->regmap, ES8326_HP_VOL, 0xc4); regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xa7); usleep_range(2000, 5000); - regmap_write(es8326->regmap, ES8326_HP_DRIVER_REF, 0xa3); - regmap_write(es8326->regmap, ES8326_HP_DRIVER_REF, 0xb3); + regmap_write(es8326->regmap, ES8326_HP_DRIVER_REF, 0x23); + regmap_write(es8326->regmap, ES8326_HP_DRIVER_REF, 0x33); regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xa1); regmap_write(es8326->regmap, ES8326_CLK_INV, 0x00); @@ -946,7 +951,7 @@ static int es8326_resume(struct snd_soc_component *component) (ES8326_IO_DMIC_CLK << ES8326_SDINOUT1_SHIFT)); regmap_write(es8326->regmap, ES8326_SDINOUT23_IO, ES8326_IO_INPUT); - regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x3b); + regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x00); regmap_write(es8326->regmap, ES8326_RESET, ES8326_CSM_ON); regmap_update_bits(es8326->regmap, ES8326_PGAGAIN, ES8326_MIC_SEL_MASK, ES8326_MIC1_SEL); diff --git a/sound/soc/codecs/es8326.h b/sound/soc/codecs/es8326.h index dfef808673f4..4234bbb900c4 100644 --- a/sound/soc/codecs/es8326.h +++ b/sound/soc/codecs/es8326.h @@ -101,7 +101,7 @@ #define ES8326_MUTE (3 << 0) /* ES8326_CLK_CTL */ -#define ES8326_CLK_ON (0x7f << 0) +#define ES8326_CLK_ON (0x7e << 0) #define ES8326_CLK_OFF (0 << 0) /* ES8326_CLK_INV */ From a3aa9255d6ccb1bff13c7c98e5d3bf10ba67f92e Mon Sep 17 00:00:00 2001 From: Zhu Ning Date: Sat, 20 Jan 2024 18:12:39 +0800 Subject: [PATCH 175/768] ASoC: codecs: ES8326: Minimize the pop noise on headphone We modify the register settings to minimize headphone pop noise during ES8326 power-up and music start/stop. Signed-off-by: Zhu Ning Link: https://msgid.link/r/20240120101240.12496-5-zhuning0077@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/es8326.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c index 10157a4bd500..1ed068c417d6 100755 --- a/sound/soc/codecs/es8326.c +++ b/sound/soc/codecs/es8326.c @@ -523,7 +523,8 @@ static int es8326_mute(struct snd_soc_dai *dai, int mute, int direction) regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_OFF); regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE, ES8326_MUTE_MASK, ES8326_MUTE); - regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xf0); + regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, + 0x30, 0x00); } else { if (!es8326->calibrated) { regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_FORCE_CAL); @@ -536,8 +537,13 @@ static int es8326_mute(struct snd_soc_dai *dai, int mute, int direction) regmap_write(es8326->regmap, ES8326_HPR_OFFSET_INI, offset_r); es8326->calibrated = true; } + regmap_update_bits(es8326->regmap, ES8326_DAC_DSM, 0x01, 0x01); + usleep_range(1000, 5000); + regmap_update_bits(es8326->regmap, ES8326_DAC_DSM, 0x01, 0x00); + usleep_range(1000, 5000); + regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x30, 0x20); + regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x30, 0x30); regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xa1); - regmap_write(es8326->regmap, ES8326_HP_VOL, 0x91); regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_ON); regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE, ES8326_MUTE_MASK, ~(ES8326_MUTE)); @@ -557,23 +563,20 @@ static int es8326_set_bias_level(struct snd_soc_component *codec, if (ret) return ret; - regmap_update_bits(es8326->regmap, ES8326_DAC_DSM, 0x01, 0x00); + regmap_update_bits(es8326->regmap, ES8326_RESET, 0x02, 0x02); + usleep_range(5000, 10000); regmap_write(es8326->regmap, ES8326_INTOUT_IO, es8326->interrupt_clk); regmap_write(es8326->regmap, ES8326_SDINOUT1_IO, (ES8326_IO_DMIC_CLK << ES8326_SDINOUT1_SHIFT)); - regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x0E); regmap_write(es8326->regmap, ES8326_PGA_PDN, 0x40); regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x00); regmap_update_bits(es8326->regmap, ES8326_CLK_CTL, 0x20, 0x20); - - regmap_update_bits(es8326->regmap, ES8326_RESET, - ES8326_CSM_ON, ES8326_CSM_ON); + regmap_update_bits(es8326->regmap, ES8326_RESET, 0x02, 0x00); break; case SND_SOC_BIAS_PREPARE: break; case SND_SOC_BIAS_STANDBY: regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x3b); - regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x00); regmap_update_bits(es8326->regmap, ES8326_CLK_CTL, 0x20, 0x00); regmap_write(es8326->regmap, ES8326_SDINOUT1_IO, ES8326_IO_INPUT); break; @@ -777,6 +780,7 @@ static void es8326_jack_detect_handler(struct work_struct *work) * Don't report jack status. */ regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01); + es8326_enable_micbias(es8326->component); usleep_range(50000, 70000); regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x00); regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x1f); @@ -820,13 +824,10 @@ exit: static irqreturn_t es8326_irq(int irq, void *dev_id) { struct es8326_priv *es8326 = dev_id; - struct snd_soc_component *comp = es8326->component; if (!es8326->jack) goto out; - es8326_enable_micbias(comp); - if (es8326->jack->status & SND_JACK_HEADSET) queue_delayed_work(system_wq, &es8326->jack_detect_work, msecs_to_jiffies(10)); @@ -943,6 +944,14 @@ static int es8326_resume(struct snd_soc_component *component) regmap_write(es8326->regmap, ES8326_DAC_DSM, 0x08); regmap_write(es8326->regmap, ES8326_DAC_VPPSCALE, 0x15); + regmap_write(es8326->regmap, ES8326_HPDET_TYPE, 0x80 | + ((es8326->version == ES8326_VERSION_B) ? + (ES8326_HP_DET_SRC_PIN9 | es8326->jack_pol) : + (ES8326_HP_DET_SRC_PIN9 | es8326->jack_pol | 0x04))); + usleep_range(5000, 10000); + es8326_enable_micbias(es8326->component); + usleep_range(50000, 70000); + regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x00); regmap_write(es8326->regmap, ES8326_INT_SOURCE, (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON)); regmap_write(es8326->regmap, ES8326_INTOUT_IO, @@ -959,11 +968,6 @@ static int es8326_resume(struct snd_soc_component *component) regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE, ES8326_MUTE_MASK, ES8326_MUTE); - regmap_write(es8326->regmap, ES8326_HPDET_TYPE, 0x80 | - ((es8326->version == ES8326_VERSION_B) ? - (ES8326_HP_DET_SRC_PIN9 | es8326->jack_pol) : - (ES8326_HP_DET_SRC_PIN9 | es8326->jack_pol | 0x04))); - regmap_write(es8326->regmap, ES8326_HP_VOL, 0x11); es8326->jack_remove_retry = 0; es8326->hp = 0; From 8c99a0a607b5e0cf6b79b283d7bb2c2b84e01da5 Mon Sep 17 00:00:00 2001 From: Zhu Ning Date: Sat, 20 Jan 2024 18:12:40 +0800 Subject: [PATCH 176/768] ASoC: codecs: ES8326: fix the capture noise issue We get a noise issue during the startup of recording. We update the register setting and dapm widgets to fix this issue. we change callback type of es8326_mute function to mute_stream. ES8326_ADC_MUTE is moved to es8326_mute function so it can be turned on at last and turned off at first. Signed-off-by: Zhu Ning Link: https://msgid.link/r/20240120101240.12496-6-zhuning0077@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/es8326.c | 63 ++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c index 1ed068c417d6..cbcd02ec6ba4 100755 --- a/sound/soc/codecs/es8326.c +++ b/sound/soc/codecs/es8326.c @@ -197,12 +197,6 @@ static const struct snd_soc_dapm_widget es8326_dapm_widgets[] = { SND_SOC_DAPM_AIF_OUT("I2S OUT", "I2S1 Capture", 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("I2S IN", "I2S1 Playback", 0, SND_SOC_NOPM, 0, 0), - /* ADC Digital Mute */ - SND_SOC_DAPM_PGA("ADC L1", ES8326_ADC_MUTE, 0, 1, NULL, 0), - SND_SOC_DAPM_PGA("ADC R1", ES8326_ADC_MUTE, 1, 1, NULL, 0), - SND_SOC_DAPM_PGA("ADC L2", ES8326_ADC_MUTE, 2, 1, NULL, 0), - SND_SOC_DAPM_PGA("ADC R2", ES8326_ADC_MUTE, 3, 1, NULL, 0), - /* Analog Power Supply*/ SND_SOC_DAPM_DAC("Right DAC", NULL, ES8326_ANA_PDN, 0, 1), SND_SOC_DAPM_DAC("Left DAC", NULL, ES8326_ANA_PDN, 1, 1), @@ -222,15 +216,10 @@ static const struct snd_soc_dapm_widget es8326_dapm_widgets[] = { }; static const struct snd_soc_dapm_route es8326_dapm_routes[] = { - {"ADC L1", NULL, "MIC1"}, - {"ADC R1", NULL, "MIC2"}, - {"ADC L2", NULL, "MIC3"}, - {"ADC R2", NULL, "MIC4"}, - - {"ADC L", NULL, "ADC L1"}, - {"ADC R", NULL, "ADC R1"}, - {"ADC L", NULL, "ADC L2"}, - {"ADC R", NULL, "ADC R2"}, + {"ADC L", NULL, "MIC1"}, + {"ADC R", NULL, "MIC2"}, + {"ADC L", NULL, "MIC3"}, + {"ADC R", NULL, "MIC4"}, {"I2S OUT", NULL, "ADC L"}, {"I2S OUT", NULL, "ADC R"}, @@ -520,11 +509,16 @@ static int es8326_mute(struct snd_soc_dai *dai, int mute, int direction) unsigned int offset_l, offset_r; if (mute) { - regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_OFF); - regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE, - ES8326_MUTE_MASK, ES8326_MUTE); - regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, - 0x30, 0x00); + if (direction == SNDRV_PCM_STREAM_PLAYBACK) { + regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_OFF); + regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE, + ES8326_MUTE_MASK, ES8326_MUTE); + regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, + 0x30, 0x00); + } else { + regmap_update_bits(es8326->regmap, ES8326_ADC_MUTE, + 0x0F, 0x0F); + } } else { if (!es8326->calibrated) { regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_FORCE_CAL); @@ -537,16 +531,22 @@ static int es8326_mute(struct snd_soc_dai *dai, int mute, int direction) regmap_write(es8326->regmap, ES8326_HPR_OFFSET_INI, offset_r); es8326->calibrated = true; } - regmap_update_bits(es8326->regmap, ES8326_DAC_DSM, 0x01, 0x01); - usleep_range(1000, 5000); - regmap_update_bits(es8326->regmap, ES8326_DAC_DSM, 0x01, 0x00); - usleep_range(1000, 5000); - regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x30, 0x20); - regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x30, 0x30); - regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xa1); - regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_ON); - regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE, - ES8326_MUTE_MASK, ~(ES8326_MUTE)); + if (direction == SNDRV_PCM_STREAM_PLAYBACK) { + regmap_update_bits(es8326->regmap, ES8326_DAC_DSM, 0x01, 0x01); + usleep_range(1000, 5000); + regmap_update_bits(es8326->regmap, ES8326_DAC_DSM, 0x01, 0x00); + usleep_range(1000, 5000); + regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x30, 0x20); + regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x30, 0x30); + regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xa1); + regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_ON); + regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE, + ES8326_MUTE_MASK, ~(ES8326_MUTE)); + } else { + msleep(300); + regmap_update_bits(es8326->regmap, ES8326_ADC_MUTE, + 0x0F, 0x00); + } } return 0; } @@ -596,7 +596,7 @@ static const struct snd_soc_dai_ops es8326_ops = { .set_fmt = es8326_set_dai_fmt, .set_sysclk = es8326_set_dai_sysclk, .mute_stream = es8326_mute, - .no_capture_mute = 1, + .no_capture_mute = 0, }; static struct snd_soc_dai_driver es8326_dai = { @@ -968,6 +968,7 @@ static int es8326_resume(struct snd_soc_component *component) regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE, ES8326_MUTE_MASK, ES8326_MUTE); + regmap_write(es8326->regmap, ES8326_ADC_MUTE, 0x0f); es8326->jack_remove_retry = 0; es8326->hp = 0; From b53cc6144a3f6c8b56afcdec89d81195c9b0dc69 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 19 Jan 2024 12:24:17 +0100 Subject: [PATCH 177/768] ASoC: codecs: wsa883x: fix PA volume control The PA gain can be set in steps of 1.5 dB from -3 dB to 18 dB, that is, in 15 levels. Fix the dB values for the PA volume control as experiments using wsa8835 show that the first 16 levels all map to the same lowest gain while the last three map to the highest gain. These values specifically need to be correct for the sound server to provide proper volume control. Note that level 0 (-3 dB) does not mute the PA so the mute flag should also not be set. Fixes: cdb09e623143 ("ASoC: codecs: wsa883x: add control, dapm widgets and map") Cc: stable@vger.kernel.org # 6.0 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://msgid.link/r/20240119112420.7446-2-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wsa883x.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/wsa883x.c b/sound/soc/codecs/wsa883x.c index cb83c569e18d..a2e86ef7d18f 100644 --- a/sound/soc/codecs/wsa883x.c +++ b/sound/soc/codecs/wsa883x.c @@ -1098,7 +1098,11 @@ static int wsa_dev_mode_put(struct snd_kcontrol *kcontrol, return 1; } -static const DECLARE_TLV_DB_SCALE(pa_gain, -300, 150, -300); +static const SNDRV_CTL_TLVD_DECLARE_DB_RANGE(pa_gain, + 0, 14, TLV_DB_SCALE_ITEM(-300, 0, 0), + 15, 29, TLV_DB_SCALE_ITEM(-300, 150, 0), + 30, 31, TLV_DB_SCALE_ITEM(1800, 0, 0), +); static int wsa883x_get_swr_port(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) From 46188db080bd1df7d2d28031b89e56f2fdbabd67 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 19 Jan 2024 12:24:19 +0100 Subject: [PATCH 178/768] ASoC: codecs: lpass-wsa-macro: fix compander volume hack The LPASS WSA macro codec driver is updating the digital gain settings behind the back of user space on DAPM events if companding has been enabled. As compander control is exported to user space, this can result in the digital gain setting being incremented (or decremented) every time the sound server is started and the codec suspended depending on what the UCM configuration looks like. Soon enough playback will become distorted (or too quiet). This is specifically a problem on the Lenovo ThinkPad X13s as this bypasses the limit for the digital gain setting that has been set by the machine driver. Fix this by simply dropping the compander gain offset hack. If someone cares about modelling the impact of the compander setting this can possibly be done by exporting it as a volume control later. Note that the volume registers still need to be written after enabling clocks in order for any prior updates to take effect. Fixes: 2c4066e5d428 ("ASoC: codecs: lpass-wsa-macro: add dapm widgets and route") Cc: stable@vger.kernel.org # 5.11 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://msgid.link/r/20240119112420.7446-4-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-wsa-macro.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c index 7e21cec3c2fb..6ce309980cd1 100644 --- a/sound/soc/codecs/lpass-wsa-macro.c +++ b/sound/soc/codecs/lpass-wsa-macro.c @@ -1584,7 +1584,6 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w, u16 gain_reg; u16 reg; int val; - int offset_val = 0; struct wsa_macro *wsa = snd_soc_component_get_drvdata(component); if (w->shift == WSA_MACRO_COMP1) { @@ -1623,10 +1622,8 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w, CDC_WSA_RX1_RX_PATH_MIX_SEC0, CDC_WSA_RX_PGA_HALF_DB_MASK, CDC_WSA_RX_PGA_HALF_DB_ENABLE); - offset_val = -2; } val = snd_soc_component_read(component, gain_reg); - val += offset_val; snd_soc_component_write(component, gain_reg, val); wsa_macro_config_ear_spkr_gain(component, wsa, event, gain_reg); @@ -1654,10 +1651,6 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w, CDC_WSA_RX1_RX_PATH_MIX_SEC0, CDC_WSA_RX_PGA_HALF_DB_MASK, CDC_WSA_RX_PGA_HALF_DB_DISABLE); - offset_val = 2; - val = snd_soc_component_read(component, gain_reg); - val += offset_val; - snd_soc_component_write(component, gain_reg, val); } wsa_macro_config_ear_spkr_gain(component, wsa, event, gain_reg); From aafa3acf62f1f63e620753fb9fd75095325617b2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 19 Jan 2024 12:24:20 +0100 Subject: [PATCH 179/768] ASoC: codecs: wcd9335: drop unused gain hack remnant The vendor driver appears to be modifying the gain settings behind the back of user space but these hacks never made it upstream except for some essentially dead code that adds a constant zero to the current gain setting on DAPM events. Note that the volume registers still need to be written after enabling clocks in order for any prior updates to take effect. Reviewed-by: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://msgid.link/r/20240119112420.7446-5-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd9335.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sound/soc/codecs/wcd9335.c b/sound/soc/codecs/wcd9335.c index 43c648efd0d9..deb15b95992d 100644 --- a/sound/soc/codecs/wcd9335.c +++ b/sound/soc/codecs/wcd9335.c @@ -3033,7 +3033,6 @@ static int wcd9335_codec_enable_mix_path(struct snd_soc_dapm_widget *w, { struct snd_soc_component *comp = snd_soc_dapm_to_component(w->dapm); u16 gain_reg; - int offset_val = 0; int val = 0; switch (w->reg) { @@ -3073,7 +3072,6 @@ static int wcd9335_codec_enable_mix_path(struct snd_soc_dapm_widget *w, switch (event) { case SND_SOC_DAPM_POST_PMU: val = snd_soc_component_read(comp, gain_reg); - val += offset_val; snd_soc_component_write(comp, gain_reg, val); break; case SND_SOC_DAPM_POST_PMD: @@ -3294,7 +3292,6 @@ static int wcd9335_codec_enable_interpolator(struct snd_soc_dapm_widget *w, u16 gain_reg; u16 reg; int val; - int offset_val = 0; if (!(snd_soc_dapm_widget_name_cmp(w, "RX INT0 INTERP"))) { reg = WCD9335_CDC_RX0_RX_PATH_CTL; @@ -3337,7 +3334,6 @@ static int wcd9335_codec_enable_interpolator(struct snd_soc_dapm_widget *w, case SND_SOC_DAPM_POST_PMU: wcd9335_config_compander(comp, w->shift, event); val = snd_soc_component_read(comp, gain_reg); - val += offset_val; snd_soc_component_write(comp, gain_reg, val); break; case SND_SOC_DAPM_POST_PMD: From 4d0e8bdfa4a57099dc7230952a460903f2e2f8de Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jan 2024 10:11:30 +0100 Subject: [PATCH 180/768] ASoC: codecs: wcd938x: fix headphones volume controls The lowest headphones volume setting does not mute so the leave the TLV mute flag unset. This is specifically needed to let the sound server use the lowest gain setting. Fixes: c03226ba15fe ("ASoC: codecs: wcd938x: fix dB range for HPHL and HPHR") Cc: # 6.5 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://msgid.link/r/20240122091130.27463-1-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index faf8d3f9b3c5..98055dd39b78 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -210,7 +210,7 @@ struct wcd938x_priv { }; static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(ear_pa_gain, 600, -1800); -static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, -3000); +static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, 0); static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(analog_gain, 0, 3000); struct wcd938x_mbhc_zdet_param { From 1d565de8d53cfa823576abac84e82ab1561f04eb Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Thu, 18 Jan 2024 20:00:19 +0530 Subject: [PATCH 181/768] ASoC: amd: acp: Enable rt5682s clocks in acp slave mode Set and enable rt5682s codec bclk and lrclk rates when acp is in slave mode. Signed-off-by: Venkata Prasad Potturu Link: https://msgid.link/r/20240118143023.1903984-1-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-mach-common.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/acp/acp-mach-common.c b/sound/soc/amd/acp/acp-mach-common.c index c90ec3419247..a224043ccd42 100644 --- a/sound/soc/amd/acp/acp-mach-common.c +++ b/sound/soc/amd/acp/acp-mach-common.c @@ -505,6 +505,13 @@ static int acp_card_rt5682s_hw_params(struct snd_pcm_substream *substream, clk_set_rate(drvdata->wclk, srate); clk_set_rate(drvdata->bclk, srate * ch * format); + if (!drvdata->soc_mclk) { + ret = acp_clk_enable(drvdata, srate, ch * format); + if (ret < 0) { + dev_err(rtd->card->dev, "Failed to enable HS clk: %d\n", ret); + return ret; + } + } return 0; } From 4bae2029ffcccfbefb8f31563556494464e7bf2d Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Thu, 18 Jan 2024 20:00:20 +0530 Subject: [PATCH 182/768] ASoC: amd: acp: Update platform name for different boards Update platform name for various boards based on rembrandt and renoir platforms. Signed-off-by: Venkata Prasad Potturu Link: https://msgid.link/r/20240118143023.1903984-2-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-sof-mach.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/amd/acp/acp-sof-mach.c b/sound/soc/amd/acp/acp-sof-mach.c index 2a9fd3275e42..20b94814a046 100644 --- a/sound/soc/amd/acp/acp-sof-mach.c +++ b/sound/soc/amd/acp/acp-sof-mach.c @@ -48,6 +48,7 @@ static struct acp_card_drvdata sof_rt5682s_rt1019_data = { .hs_codec_id = RT5682S, .amp_codec_id = RT1019, .dmic_codec_id = DMIC, + .platform = RENOIR, .tdm_mode = false, }; @@ -58,6 +59,7 @@ static struct acp_card_drvdata sof_rt5682s_max_data = { .hs_codec_id = RT5682S, .amp_codec_id = MAX98360A, .dmic_codec_id = DMIC, + .platform = RENOIR, .tdm_mode = false, }; @@ -68,6 +70,7 @@ static struct acp_card_drvdata sof_nau8825_data = { .hs_codec_id = NAU8825, .amp_codec_id = MAX98360A, .dmic_codec_id = DMIC, + .platform = REMBRANDT, .soc_mclk = true, .tdm_mode = false, }; @@ -79,6 +82,7 @@ static struct acp_card_drvdata sof_rt5682s_hs_rt1019_data = { .hs_codec_id = RT5682S, .amp_codec_id = RT1019, .dmic_codec_id = DMIC, + .platform = REMBRANDT, .soc_mclk = true, .tdm_mode = false, }; From 6cc2aa9a75f2397d42b78d4c159bc06722183c78 Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Thu, 18 Jan 2024 20:00:21 +0530 Subject: [PATCH 183/768] ASoC: amd: acp: Add check for cpu dai link initialization Add condition check for cpu dai link initialization for amplifier codec path, as same pcm id uses for both headset and speaker path for RENOIR platforms. Signed-off-by: Venkata Prasad Potturu Link: https://msgid.link/r/20240118143023.1903984-3-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-mach-common.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sound/soc/amd/acp/acp-mach-common.c b/sound/soc/amd/acp/acp-mach-common.c index a224043ccd42..504d1b8c4cbb 100644 --- a/sound/soc/amd/acp/acp-mach-common.c +++ b/sound/soc/amd/acp/acp-mach-common.c @@ -1471,8 +1471,13 @@ int acp_sofdsp_dai_links_create(struct snd_soc_card *card) if (drv_data->amp_cpu_id == I2S_SP) { links[i].name = "acp-amp-codec"; links[i].id = AMP_BE_ID; - links[i].cpus = sof_sp_virtual; - links[i].num_cpus = ARRAY_SIZE(sof_sp_virtual); + if (drv_data->platform == RENOIR) { + links[i].cpus = sof_sp; + links[i].num_cpus = ARRAY_SIZE(sof_sp); + } else { + links[i].cpus = sof_sp_virtual; + links[i].num_cpus = ARRAY_SIZE(sof_sp_virtual); + } links[i].platforms = sof_component; links[i].num_platforms = ARRAY_SIZE(sof_component); links[i].dpcm_playback = 1; From 086df711d9b886194481b4fbe525eb43e9ae7403 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 17 Jan 2024 16:12:06 +0100 Subject: [PATCH 184/768] ASoC: codecs: wcd938x: handle deferred probe WCD938x sound codec driver ignores return status of getting regulators and returns EINVAL instead of EPROBE_DEFER. If regulator provider probes after the codec, system is left without probed audio: wcd938x_codec audio-codec: wcd938x_probe: Fail to obtain platform data wcd938x_codec: probe of audio-codec failed with error -22 Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240117151208.1219755-1-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index 98055dd39b78..75834ed0365d 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3589,7 +3589,7 @@ static int wcd938x_probe(struct platform_device *pdev) ret = wcd938x_populate_dt_data(wcd938x, dev); if (ret) { dev_err(dev, "%s: Fail to obtain platform data\n", __func__); - return -EINVAL; + return ret; } ret = wcd938x_add_slave_components(wcd938x, dev, &match); From 22221b13d0c20a9791dec33121df73fe0b2ac226 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 17 Jan 2024 16:12:07 +0100 Subject: [PATCH 185/768] ASoC: codecs: wcd938x: skip printing deferred probe failuers Probe calls wcd938x_populate_dt_data() which already prints all the error cases with dev_err_probe(), so skip the additional dev_err(). Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240117151208.1219755-2-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index 75834ed0365d..6021aa5a5689 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3587,10 +3587,8 @@ static int wcd938x_probe(struct platform_device *pdev) mutex_init(&wcd938x->micb_lock); ret = wcd938x_populate_dt_data(wcd938x, dev); - if (ret) { - dev_err(dev, "%s: Fail to obtain platform data\n", __func__); + if (ret) return ret; - } ret = wcd938x_add_slave_components(wcd938x, dev, &match); if (ret) From 35314e39dabcfb256832654ad0e856a9fba744bd Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 17 Jan 2024 16:12:08 +0100 Subject: [PATCH 186/768] ASoC: codecs: wcd934x: drop unneeded regulator include Driver does not use any regulator code, so drop redundant include of regulator/consumer.h header. Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240117151208.1219755-3-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd934x.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c index 1b6e376f3833..6813268e6a19 100644 --- a/sound/soc/codecs/wcd934x.c +++ b/sound/soc/codecs/wcd934x.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include From 70b4769956651e986591dd94b3ff9649122b1513 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 17 Jan 2024 17:01:44 +0100 Subject: [PATCH 187/768] ASoC: allow up to eight CPU/codec DAIs Sound card on Qualcomm X1E80100 CRD board will use eight DAIs in one DAI link, so increase the limit. Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240117160144.1305127-1-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index f8524b5bfb33..516350533e73 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1037,7 +1037,7 @@ component_dai_empty: return -EINVAL; } -#define MAX_DEFAULT_CH_MAP_SIZE 7 +#define MAX_DEFAULT_CH_MAP_SIZE 8 static struct snd_soc_dai_link_ch_map default_ch_map_sync[MAX_DEFAULT_CH_MAP_SIZE] = { { .cpu = 0, .codec = 0 }, { .cpu = 1, .codec = 1 }, @@ -1046,6 +1046,7 @@ static struct snd_soc_dai_link_ch_map default_ch_map_sync[MAX_DEFAULT_CH_MAP_SIZ { .cpu = 4, .codec = 4 }, { .cpu = 5, .codec = 5 }, { .cpu = 6, .codec = 6 }, + { .cpu = 7, .codec = 7 }, }; static struct snd_soc_dai_link_ch_map default_ch_map_1cpu[MAX_DEFAULT_CH_MAP_SIZE] = { { .cpu = 0, .codec = 0 }, @@ -1055,6 +1056,7 @@ static struct snd_soc_dai_link_ch_map default_ch_map_1cpu[MAX_DEFAULT_CH_MAP_SIZ { .cpu = 0, .codec = 4 }, { .cpu = 0, .codec = 5 }, { .cpu = 0, .codec = 6 }, + { .cpu = 0, .codec = 7 }, }; static struct snd_soc_dai_link_ch_map default_ch_map_1codec[MAX_DEFAULT_CH_MAP_SIZE] = { { .cpu = 0, .codec = 0 }, @@ -1064,6 +1066,7 @@ static struct snd_soc_dai_link_ch_map default_ch_map_1codec[MAX_DEFAULT_CH_MAP_S { .cpu = 4, .codec = 0 }, { .cpu = 5, .codec = 0 }, { .cpu = 6, .codec = 0 }, + { .cpu = 7, .codec = 0 }, }; static int snd_soc_compensate_channel_connection_map(struct snd_soc_card *card, struct snd_soc_dai_link *dai_link) From c92688cac239794e4a1d976afa5203a4d3a2ac0e Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 13 Jan 2024 23:46:26 +0100 Subject: [PATCH 188/768] regulator: pwm-regulator: Add validity checks in continuous .get_voltage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Continuous regulators can be configured to operate only in a certain duty cycle range (for example from 0..91%). Add a check to error out if the duty cycle translates to an unsupported (or out of range) voltage. Suggested-by: Uwe Kleine-König Signed-off-by: Martin Blumenstingl Link: https://msgid.link/r/20240113224628.377993-2-martin.blumenstingl@googlemail.com Signed-off-by: Mark Brown --- drivers/regulator/pwm-regulator.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c index 698c420e0869..226ca4c62673 100644 --- a/drivers/regulator/pwm-regulator.c +++ b/drivers/regulator/pwm-regulator.c @@ -158,6 +158,9 @@ static int pwm_regulator_get_voltage(struct regulator_dev *rdev) pwm_get_state(drvdata->pwm, &pstate); voltage = pwm_get_relative_duty_cycle(&pstate, duty_unit); + if (voltage < min(max_uV_duty, min_uV_duty) || + voltage > max(max_uV_duty, min_uV_duty)) + return -ENOTRECOVERABLE; /* * The dutycycle for min_uV might be greater than the one for max_uV. From 6a7d11efd6915d80a025f2a0be4ae09d797b91ec Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 13 Jan 2024 23:46:27 +0100 Subject: [PATCH 189/768] regulator: pwm-regulator: Calculate the output voltage for disabled PWMs If a PWM output is disabled then it's voltage has to be calculated based on a zero duty cycle (for normal polarity) or duty cycle being equal to the PWM period (for inverted polarity). Add support for this to pwm_regulator_get_voltage(). Signed-off-by: Martin Blumenstingl Link: https://msgid.link/r/20240113224628.377993-3-martin.blumenstingl@googlemail.com Signed-off-by: Mark Brown --- drivers/regulator/pwm-regulator.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c index 226ca4c62673..d27b9a7a30c9 100644 --- a/drivers/regulator/pwm-regulator.c +++ b/drivers/regulator/pwm-regulator.c @@ -157,6 +157,13 @@ static int pwm_regulator_get_voltage(struct regulator_dev *rdev) pwm_get_state(drvdata->pwm, &pstate); + if (!pstate.enabled) { + if (pstate.polarity == PWM_POLARITY_INVERSED) + pstate.duty_cycle = pstate.period; + else + pstate.duty_cycle = 0; + } + voltage = pwm_get_relative_duty_cycle(&pstate, duty_unit); if (voltage < min(max_uV_duty, min_uV_duty) || voltage > max(max_uV_duty, min_uV_duty)) From b3cbdcc191819b75c04178142e2d0d4c668f68c0 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 13 Jan 2024 23:46:28 +0100 Subject: [PATCH 190/768] regulator: pwm-regulator: Manage boot-on with disabled PWM channels Odroid-C1 uses a Monolithic Power Systems MP2161 controlled via PWM for the VDDEE voltage supply of the Meson8b SoC. Commit 6b9352f3f8a1 ("pwm: meson: modify and simplify calculation in meson_pwm_get_state") results in my Odroid-C1 crashing with memory corruption in many different places (seemingly at random). It turns out that this is due to a currently not supported corner case. The VDDEE regulator can generate between 860mV (duty cycle of ~91%) and 1140mV (duty cycle of 0%). We consider it to be enabled by the bootloader (which is why it has the regulator-boot-on flag in .dts) as well as being always-on (which is why it has the regulator-always-on flag in .dts) because the VDDEE voltage is generally required for the Meson8b SoC to work. The public S805 datasheet [0] states on page 17 (where "A5" refers to the Cortex-A5 CPU cores): [...] So if EE domains is shut off, A5 memory is also shut off. That does not matter. Before EE power domain is shut off, A5 should be shut off at first. It turns out that at least some bootloader versions are keeping the PWM output disabled. This is not a problem due to the specific design of the regulator: when the PWM output is disabled the output pin is pulled LOW, effectively achieving a 0% duty cycle (which in return means that VDDEE voltage is at 1140mV). The problem comes when the pwm-regulator driver tries to initialize the PWM output. To do so it reads the current state from the hardware, which is: period: 3666ns duty cycle: 3333ns (= ~91%) enabled: false Then those values are translated using the continuous voltage range to 860mV. Later, when the regulator is being enabled (either by the regulator core due to the always-on flag or first consumer - in this case the lima driver for the Mali-450 GPU) the pwm-regulator driver tries to keep the voltage (at 860mV) and just enable the PWM output. This is when things start to go wrong as the typical voltage used for VDDEE is 1100mV. Commit 6b9352f3f8a1 ("pwm: meson: modify and simplify calculation in meson_pwm_get_state") triggers above condition as before that change period and duty cycle were both at 0. Since the change to the pwm-meson driver is considered correct the solution is to be found in the pwm-regulator driver. Update the duty cycle during driver probe if the regulator is flagged as boot-on so that a call to pwm_regulator_enable() (by the regulator core during initialization of a regulator flagged with boot-on) without any preceding call to pwm_regulator_set_voltage() does not change the output voltage. [0] https://dn.odroid.com/S805/Datasheet/S805_Datasheet%20V0.8%2020150126.pdf Signed-off-by: Martin Blumenstingl Link: https://msgid.link/r/20240113224628.377993-4-martin.blumenstingl@googlemail.com Signed-off-by: Mark Brown --- drivers/regulator/pwm-regulator.c | 33 +++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c index d27b9a7a30c9..60cfcd741c2a 100644 --- a/drivers/regulator/pwm-regulator.c +++ b/drivers/regulator/pwm-regulator.c @@ -323,6 +323,32 @@ static int pwm_regulator_init_continuous(struct platform_device *pdev, return 0; } +static int pwm_regulator_init_boot_on(struct platform_device *pdev, + struct pwm_regulator_data *drvdata, + const struct regulator_init_data *init_data) +{ + struct pwm_state pstate; + + if (!init_data->constraints.boot_on || drvdata->enb_gpio) + return 0; + + pwm_get_state(drvdata->pwm, &pstate); + if (pstate.enabled) + return 0; + + /* + * Update the duty cycle so the output does not change + * when the regulator core enables the regulator (and + * thus the PWM channel). + */ + if (pstate.polarity == PWM_POLARITY_INVERSED) + pstate.duty_cycle = pstate.period; + else + pstate.duty_cycle = 0; + + return pwm_apply_might_sleep(drvdata->pwm, &pstate); +} + static int pwm_regulator_probe(struct platform_device *pdev) { const struct regulator_init_data *init_data; @@ -382,6 +408,13 @@ static int pwm_regulator_probe(struct platform_device *pdev) if (ret) return ret; + ret = pwm_regulator_init_boot_on(pdev, drvdata, init_data); + if (ret) { + dev_err(&pdev->dev, "Failed to apply boot_on settings: %d\n", + ret); + return ret; + } + regulator = devm_regulator_register(&pdev->dev, &drvdata->desc, &config); if (IS_ERR(regulator)) { From 6c314425b9ef6b247cefd0903e287eb072580c3b Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 22 Jan 2024 14:00:33 +0200 Subject: [PATCH 191/768] spi: intel-pci: Remove Meteor Lake-S SoC PCI ID from the list Turns out this "SoC" side controller does not support certain commands, such as reading chip JEDEC ID, so the controller is pretty much unusable in Linux. We should be using the "PCH" side controller instead. For this reason remove this PCI ID from the list. Fixes: c2912d42e86e ("spi: intel-pci: Add support for Meteor Lake-S SPI serial flash") Signed-off-by: Mika Westerberg Link: https://msgid.link/r/20240122120034.2664812-2-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-intel-pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c index 57d767a68e7b..b9918dcc3802 100644 --- a/drivers/spi/spi-intel-pci.c +++ b/drivers/spi/spi-intel-pci.c @@ -84,7 +84,6 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0xa2a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xa324), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xa3a4), (unsigned long)&cnl_info }, - { PCI_VDEVICE(INTEL, 0xae23), (unsigned long)&cnl_info }, { }, }; MODULE_DEVICE_TABLE(pci, intel_spi_pci_ids); From 8afe3c7fcaf72fca1e7d3dab16a5b7f4201ece17 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 22 Jan 2024 14:00:34 +0200 Subject: [PATCH 192/768] spi: intel-pci: Add support for Arrow Lake SPI serial flash This adds the PCI ID of the Arrow Lake and Meteor Lake-S PCH SPI serial flash controller. This one supports all the necessary commands Linux SPI-NOR stack requires. Signed-off-by: Mika Westerberg Link: https://msgid.link/r/20240122120034.2664812-3-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-intel-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c index b9918dcc3802..07d20ca1164c 100644 --- a/drivers/spi/spi-intel-pci.c +++ b/drivers/spi/spi-intel-pci.c @@ -76,6 +76,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x7a24), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7aa4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7e23), (unsigned long)&cnl_info }, + { PCI_VDEVICE(INTEL, 0x7f24), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x9d24), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x9da4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xa0a4), (unsigned long)&cnl_info }, From 7777f47f2ea64efd1016262e7b59fab34adfb869 Mon Sep 17 00:00:00 2001 From: Li Lingfeng Date: Thu, 18 Jan 2024 21:04:01 +0800 Subject: [PATCH 193/768] block: Move checking GENHD_FL_NO_PART to bdev_add_partition() Commit 1a721de8489f ("block: don't add or resize partition on the disk with GENHD_FL_NO_PART") prevented all operations about partitions on disks with GENHD_FL_NO_PART in blkpg_do_ioctl() since they are meaningless. However, it changed error code in some scenarios. So move checking GENHD_FL_NO_PART to bdev_add_partition() to eliminate impact. Fixes: 1a721de8489f ("block: don't add or resize partition on the disk with GENHD_FL_NO_PART") Reported-by: Allison Karlitskaya Closes: https://lore.kernel.org/all/CAOYeF9VsmqKMcQjo1k6YkGNujwN-nzfxY17N3F-CMikE1tYp+w@mail.gmail.com/ Signed-off-by: Li Lingfeng Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20240118130401.792757-1-lilingfeng@huaweicloud.com Signed-off-by: Jens Axboe --- block/ioctl.c | 2 -- block/partitions/core.c | 5 +++++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/block/ioctl.c b/block/ioctl.c index 9c73a763ef88..438f79c564cf 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -20,8 +20,6 @@ static int blkpg_do_ioctl(struct block_device *bdev, struct blkpg_partition p; sector_t start, length; - if (disk->flags & GENHD_FL_NO_PART) - return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (copy_from_user(&p, upart, sizeof(struct blkpg_partition))) diff --git a/block/partitions/core.c b/block/partitions/core.c index cab0d76a828e..5f5ed5c75f04 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -439,6 +439,11 @@ int bdev_add_partition(struct gendisk *disk, int partno, sector_t start, goto out; } + if (disk->flags & GENHD_FL_NO_PART) { + ret = -EINVAL; + goto out; + } + if (partition_overlaps(disk, start, length, -1)) { ret = -EBUSY; goto out; From 4d5b7daa3c610af3f322ad1e91fc0c752ff32f0e Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Wed, 17 Jan 2024 17:58:14 -0800 Subject: [PATCH 194/768] drm/bridge: anx7625: Ensure bridge is suspended in disable() Similar to commit 26db46bc9c67 ("drm/bridge: parade-ps8640: Ensure bridge is suspended in .post_disable()"). Add a mutex to ensure that aux transfer won't race with atomic_disable by holding the PM reference and prevent the bridge from suspend. Also we need to use pm_runtime_put_sync_suspend() to suspend the bridge instead of idle with pm_runtime_put_sync(). Fixes: 3203e497eb76 ("drm/bridge: anx7625: Synchronously run runtime suspend.") Fixes: adca62ec370c ("drm/bridge: anx7625: Support reading edid through aux channel") Signed-off-by: Hsin-Yi Wang Tested-by: Xuxin Xiong Reviewed-by: Pin-yen Lin Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20240118015916.2296741-1-hsinyi@chromium.org --- drivers/gpu/drm/bridge/analogix/anx7625.c | 7 ++++++- drivers/gpu/drm/bridge/analogix/anx7625.h | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index ef31033439bc..29d91493b101 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -1762,6 +1762,7 @@ static ssize_t anx7625_aux_transfer(struct drm_dp_aux *aux, u8 request = msg->request & ~DP_AUX_I2C_MOT; int ret = 0; + mutex_lock(&ctx->aux_lock); pm_runtime_get_sync(dev); msg->reply = 0; switch (request) { @@ -1778,6 +1779,7 @@ static ssize_t anx7625_aux_transfer(struct drm_dp_aux *aux, msg->size, msg->buffer); pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); + mutex_unlock(&ctx->aux_lock); return ret; } @@ -2474,7 +2476,9 @@ static void anx7625_bridge_atomic_disable(struct drm_bridge *bridge, ctx->connector = NULL; anx7625_dp_stop(ctx); - pm_runtime_put_sync(dev); + mutex_lock(&ctx->aux_lock); + pm_runtime_put_sync_suspend(dev); + mutex_unlock(&ctx->aux_lock); } static enum drm_connector_status @@ -2668,6 +2672,7 @@ static int anx7625_i2c_probe(struct i2c_client *client) mutex_init(&platform->lock); mutex_init(&platform->hdcp_wq_lock); + mutex_init(&platform->aux_lock); INIT_DELAYED_WORK(&platform->hdcp_work, hdcp_check_work_func); platform->hdcp_workqueue = create_workqueue("hdcp workqueue"); diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.h b/drivers/gpu/drm/bridge/analogix/anx7625.h index 66ebee7f3d83..39ed35d33836 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.h +++ b/drivers/gpu/drm/bridge/analogix/anx7625.h @@ -475,6 +475,8 @@ struct anx7625_data { struct workqueue_struct *hdcp_workqueue; /* Lock for hdcp work queue */ struct mutex hdcp_wq_lock; + /* Lock for aux transfer and disable */ + struct mutex aux_lock; char edid_block; struct display_timing dt; u8 display_timing_valid; From 1a84c213146a06aca1fd0e5b376ab7d36d15e1b3 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Tue, 14 Nov 2023 15:10:33 +0700 Subject: [PATCH 195/768] drm/dp_mst: Separate @failing_port list in drm_dp_mst_atomic_check_mgr() comment Stephen Rothwell reported htmldocs warnings when merging drm-intel tree: Documentation/gpu/drm-kms-helpers:296: drivers/gpu/drm/display/drm_dp_mst_topology.c:5484: ERROR: Unexpected indentation. Documentation/gpu/drm-kms-helpers:296: drivers/gpu/drm/display/drm_dp_mst_topology.c:5488: WARNING: Block quote ends without a blank line; unexpected unindent. Separate @failing_port return value list by surrounding it with a blank line to fix above warnings. Fixes: 1cd0a5ea427931 ("drm/dp_mst: Factor out a helper to check the atomic state of a topology manager") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/linux-next/20231114141715.6f435118@canb.auug.org.au/ Signed-off-by: Bagas Sanjaya Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231114081033.27343-1-bagasdotme@gmail.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index bd6c24d4213c..f7c6b60629c2 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -5491,6 +5491,7 @@ EXPORT_SYMBOL(drm_dp_mst_atomic_enable_dsc); * - 0 if the new state is valid * - %-ENOSPC, if the new state is invalid, because of BW limitation * @failing_port is set to: + * * - The non-root port where a BW limit check failed * with all the ports downstream of @failing_port passing * the BW limit check. @@ -5499,6 +5500,7 @@ EXPORT_SYMBOL(drm_dp_mst_atomic_enable_dsc); * - %NULL if the BW limit check failed at the root port * with all the ports downstream of the root port passing * the BW limit check. + * * - %-EINVAL, if the new state is invalid, because the root port has * too many payloads. */ From 612e1110d689387aab81b2727895cd307d3cbbfd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 22 Jan 2024 12:25:00 -0500 Subject: [PATCH 196/768] bcachefs: Add gfp flags param to bch2_prt_task_backtrace() Fixes: e6a2566f7a00 ("bcachefs: Better journal tracepoints") Signed-off-by: Kent Overstreet Reported-by: smatch --- fs/bcachefs/btree_locking.c | 4 ++-- fs/bcachefs/debug.c | 2 +- fs/bcachefs/journal.c | 2 +- fs/bcachefs/util.c | 10 +++++----- fs/bcachefs/util.h | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index bed75c93c069..684397442338 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -92,7 +92,7 @@ static noinline void print_cycle(struct printbuf *out, struct lock_graph *g) continue; bch2_btree_trans_to_text(out, i->trans); - bch2_prt_task_backtrace(out, task, i == g->g ? 5 : 1); + bch2_prt_task_backtrace(out, task, i == g->g ? 5 : 1, GFP_NOWAIT); } } @@ -227,7 +227,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle) prt_printf(&buf, "backtrace:"); prt_newline(&buf); printbuf_indent_add(&buf, 2); - bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2); + bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2, GFP_NOWAIT); printbuf_indent_sub(&buf, 2); prt_newline(&buf); } diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index cadda9bbe4a4..7bdba8507fc9 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -627,7 +627,7 @@ restart: prt_printf(&i->buf, "backtrace:"); prt_newline(&i->buf); printbuf_indent_add(&i->buf, 2); - bch2_prt_task_backtrace(&i->buf, task, 0); + bch2_prt_task_backtrace(&i->buf, task, 0, GFP_KERNEL); printbuf_indent_sub(&i->buf, 2); prt_newline(&i->buf); diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index d71d26e39521..bc890776eb57 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -233,7 +233,7 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t prt_str(&pbuf, "entry size: "); prt_human_readable_u64(&pbuf, vstruct_bytes(buf->data)); prt_newline(&pbuf); - bch2_prt_task_backtrace(&pbuf, current, 1); + bch2_prt_task_backtrace(&pbuf, current, 1, GFP_NOWAIT); trace_journal_entry_close(c, pbuf.buf); printbuf_exit(&pbuf); } diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index a135136adeee..56b815fd9fc6 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -272,14 +272,14 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines) console_unlock(); } -int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr) +int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr, + gfp_t gfp) { #ifdef CONFIG_STACKTRACE unsigned nr_entries = 0; - int ret = 0; stack->nr = 0; - ret = darray_make_room(stack, 32); + int ret = darray_make_room_gfp(stack, 32, gfp); if (ret) return ret; @@ -308,10 +308,10 @@ void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack) } } -int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task, unsigned skipnr) +int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task, unsigned skipnr, gfp_t gfp) { bch_stacktrace stack = { 0 }; - int ret = bch2_save_backtrace(&stack, task, skipnr + 1); + int ret = bch2_save_backtrace(&stack, task, skipnr + 1, gfp); bch2_prt_backtrace(out, &stack); darray_exit(&stack); diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index df67bf55fe2b..b414736d59a5 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -348,9 +348,9 @@ void bch2_prt_u64_base2(struct printbuf *, u64); void bch2_print_string_as_lines(const char *prefix, const char *lines); typedef DARRAY(unsigned long) bch_stacktrace; -int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned); +int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned, gfp_t); void bch2_prt_backtrace(struct printbuf *, bch_stacktrace *); -int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *, unsigned); +int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *, unsigned, gfp_t); static inline void prt_bdevname(struct printbuf *out, struct block_device *bdev) { From 3e44f325f6f75078cdcd44cd337f517ba3650d05 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Jan 2024 08:36:55 +0100 Subject: [PATCH 197/768] bcachefs: fix incorrect usage of REQ_OP_FLUSH REQ_OP_FLUSH is only for internal use in the blk-mq and request based drivers. File systems and other block layer consumers must use REQ_OP_WRITE | REQ_PREFLUSH as documented in Documentation/block/writeback_cache_control.rst. While REQ_OP_FLUSH appears to work for blk-mq drivers it does not get the proper flush state machine handling, and completely fails for any bio based drivers, including all the stacking drivers. The block layer will also get a check in 6.8 to reject this use case entirely. [Note: completely untested, but as this never got fixed since the original bug report in November: https://bugzilla.kernel.org/show_bug.cgi?id=218184 and the the discussion in December: https://lore.kernel.org/all/20231221053016.72cqcfg46vxwohcj@moria.home.lan/T/ this seems to be best way to force it] Signed-off-by: Christoph Hellwig Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 2 +- fs/bcachefs/journal_io.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index dc52918d06ef..8c70123b6a0c 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -79,7 +79,7 @@ void bch2_inode_flush_nocow_writes_async(struct bch_fs *c, continue; bio = container_of(bio_alloc_bioset(ca->disk_sb.bdev, 0, - REQ_OP_FLUSH, + REQ_OP_WRITE|REQ_PREFLUSH, GFP_KERNEL, &c->nocow_flush_bioset), struct nocow_flush, bio); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 04a1e79a5ed3..bfd6585e746d 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1988,7 +1988,8 @@ CLOSURE_CALLBACK(bch2_journal_write) percpu_ref_get(&ca->io_ref); bio = ca->journal.bio; - bio_reset(bio, ca->disk_sb.bdev, REQ_OP_FLUSH); + bio_reset(bio, ca->disk_sb.bdev, + REQ_OP_WRITE|REQ_PREFLUSH); bio->bi_end_io = journal_write_endio; bio->bi_private = ca; closure_bio_submit(bio, cl); From d53271c05965b4469c57a18c66585075df81c504 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 11 Jan 2024 10:49:22 -0500 Subject: [PATCH 198/768] selftests/rseq: Do not skip !allowed_cpus for mm_cid Indexing with mm_cid is incompatible with skipping disallowed cpumask, because concurrency IDs are based on a virtual ID allocation which is unrelated to the physical CPU mask. These issues can be reproduced by running the rseq selftests under a taskset which excludes CPU 0, e.g. taskset -c 10-20 ./run_param_test.sh Signed-off-by: Mathieu Desnoyers Cc: Shuah Khan Cc: Peter Zijlstra Cc: "Paul E. McKenney" Cc: Boqun Feng Signed-off-by: Shuah Khan --- .../selftests/rseq/basic_percpu_ops_test.c | 14 ++++++++++-- tools/testing/selftests/rseq/param_test.c | 22 ++++++++++++++----- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c index 887542961968..2348d2c20d0a 100644 --- a/tools/testing/selftests/rseq/basic_percpu_ops_test.c +++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c @@ -24,6 +24,11 @@ bool rseq_validate_cpu_id(void) { return rseq_mm_cid_available(); } +static +bool rseq_use_cpu_index(void) +{ + return false; /* Use mm_cid */ +} #else # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID static @@ -36,6 +41,11 @@ bool rseq_validate_cpu_id(void) { return rseq_current_cpu_raw() >= 0; } +static +bool rseq_use_cpu_index(void) +{ + return true; /* Use cpu_id as index. */ +} #endif struct percpu_lock_entry { @@ -274,7 +284,7 @@ void test_percpu_list(void) /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; for (j = 1; j <= 100; j++) { struct percpu_list_node *node; @@ -299,7 +309,7 @@ void test_percpu_list(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_list_node *node; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; while ((node = __percpu_list_pop(&list, i))) { diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 20403d58345c..2f37961240ca 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -288,6 +288,11 @@ bool rseq_validate_cpu_id(void) { return rseq_mm_cid_available(); } +static +bool rseq_use_cpu_index(void) +{ + return false; /* Use mm_cid */ +} # ifdef TEST_MEMBARRIER /* * Membarrier does not currently support targeting a mm_cid, so @@ -312,6 +317,11 @@ bool rseq_validate_cpu_id(void) { return rseq_current_cpu_raw() >= 0; } +static +bool rseq_use_cpu_index(void) +{ + return true; /* Use cpu_id as index. */ +} # ifdef TEST_MEMBARRIER static int rseq_membarrier_expedited(int cpu) @@ -715,7 +725,7 @@ void test_percpu_list(void) /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; for (j = 1; j <= 100; j++) { struct percpu_list_node *node; @@ -752,7 +762,7 @@ void test_percpu_list(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_list_node *node; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; while ((node = __percpu_list_pop(&list, i))) { @@ -902,7 +912,7 @@ void test_percpu_buffer(void) /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; /* Worse-case is every item in same CPU. */ buffer.c[i].array = @@ -952,7 +962,7 @@ void test_percpu_buffer(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_buffer_node *node; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; while ((node = __percpu_buffer_pop(&buffer, i))) { @@ -1113,7 +1123,7 @@ void test_percpu_memcpy_buffer(void) /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; /* Worse-case is every item in same CPU. */ buffer.c[i].array = @@ -1160,7 +1170,7 @@ void test_percpu_memcpy_buffer(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_memcpy_buffer_node item; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) { From 68deb9972079c9904fe714c049a7f08bd997a9ee Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 16 Jan 2024 13:17:17 -0800 Subject: [PATCH 199/768] tools/testing/cxl: Disable "missing prototypes / declarations" warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevent warnings of the form: tools/testing/cxl/test/mock.c:44:6: error: no previous prototype for ‘__wrap_is_acpi_device_node’ [-Werror=missing-prototypes] tools/testing/cxl/test/mock.c:63:5: error: no previous prototype for ‘__wrap_acpi_table_parse_cedt’ [-Werror=missing-prototypes] tools/testing/cxl/test/mock.c:81:13: error: no previous prototype for ‘__wrap_acpi_evaluate_integer’ [-Werror=missing-prototypes] ...by locally disabling some warnings. It turns out that: Commit 0fcb70851fbf ("Makefile.extrawarn: turn on missing-prototypes globally") ...in addition to expanding in-tree coverage, also impacts out-of-tree module builds like those in tools/testing/cxl/. Filter out the warning options on unit test code that does not effect mainline builds. Reviewed-by: Alison Schofield Link: https://lore.kernel.org/r/170543983780.460832.10920261849128601697.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- tools/testing/cxl/Kbuild | 2 ++ tools/testing/cxl/test/Kbuild | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 0b12c36902d8..caff3834671f 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -65,4 +65,6 @@ cxl_core-y += config_check.o cxl_core-y += cxl_core_test.o cxl_core-y += cxl_core_exports.o +KBUILD_CFLAGS := $(filter-out -Wmissing-prototypes -Wmissing-declarations, $(KBUILD_CFLAGS)) + obj-m += test/ diff --git a/tools/testing/cxl/test/Kbuild b/tools/testing/cxl/test/Kbuild index 61d5f7bcddf9..6b1927897856 100644 --- a/tools/testing/cxl/test/Kbuild +++ b/tools/testing/cxl/test/Kbuild @@ -8,3 +8,5 @@ obj-m += cxl_mock_mem.o cxl_test-y := cxl.o cxl_mock-y := mock.o cxl_mock_mem-y := mem.o + +KBUILD_CFLAGS := $(filter-out -Wmissing-prototypes -Wmissing-declarations, $(KBUILD_CFLAGS)) From c97dac57c804b53eab492ca0230d86356729d633 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 16 Jan 2024 13:17:23 -0800 Subject: [PATCH 200/768] tools/testing/nvdimm: Disable "missing prototypes / declarations" warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevent warnings of the form: tools/testing/nvdimm/config_check.c:4:6: error: no previous prototype for ‘check’ [-Werror=missing-prototypes] ...by locally disabling some warnings. It turns out that: Commit 0fcb70851fbf ("Makefile.extrawarn: turn on missing-prototypes globally") ...in addition to expanding in-tree coverage, also impacts out-of-tree module builds like those in tools/testing/nvdimm/. Filter out the warning options on unit test code that does not effect mainline builds. Reviewed-by: Alison Schofield Link: https://lore.kernel.org/r/170543984331.460832.1780246477583036191.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- tools/testing/nvdimm/Kbuild | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 8153251ea389..91a3627f301a 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -82,4 +82,6 @@ libnvdimm-$(CONFIG_NVDIMM_KEYS) += $(NVDIMM_SRC)/security.o libnvdimm-y += libnvdimm_test.o libnvdimm-y += config_check.o +KBUILD_CFLAGS := $(filter-out -Wmissing-prototypes -Wmissing-declarations, $(KBUILD_CFLAGS)) + obj-m += test/ From d72a4caf685989e353dff0a97d7376ee10edbf87 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 17 Jan 2024 17:24:01 -0800 Subject: [PATCH 201/768] cxl/pci: Skip irq features if MSI/MSI-X are not supported CXL 3.1 Section 3.1.1 states: "A Function on a CXL device must not generate INTx messages if that Function participates in CXL.cache protocol or CXL.mem protocols." The generic CXL memory driver only supports devices which use the CXL.mem protocol. The current driver attempts to allocate MSI/MSI-X vectors in anticipation of their need for mailbox interrupts or event processing. However, the above requirement does not require a device to support interrupts, only that they use MSI/MSI-X. For example, a device may disable mailbox interrupts and either be configured for firmware first or skip event processing and function. Dave Larsen reported that the following Intel / Agilex card does not support interrupts on function 0. CXL: Intel Corporation Device 0ddb (rev 01) (prog-if 10 [CXL Memory Device (CXL 2.x)]) Rather than fail device probe if interrupts are not supported; flag that irqs are not enabled and avoid features which require interrupts. Emit messages appropriate for the situation to aid in debugging should device behavior be unexpected due to a failure to allocate vectors. Note that it is possible for a device to have host based event processing through polling. However, the driver does not support polling and it is not anticipated to be generally required. Leave that functionality to a future patch if such a device comes along. Reported-by: Dave Larsen Reviewed-by: Dave Jiang Reviewed-by: Fan Ni Signed-off-by: Ira Weiny Reviewed-and-tested-by: Davidlohr Bueso Link: https://lore.kernel.org/r/20240117-dont-fail-irq-v2-1-f33f26b0e365@intel.com Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 4fd1f207c84e..233e7c42c161 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -382,7 +382,7 @@ static int cxl_pci_mbox_send(struct cxl_memdev_state *mds, return rc; } -static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds) +static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail) { struct cxl_dev_state *cxlds = &mds->cxlds; const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); @@ -441,7 +441,7 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds) INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work); /* background command interrupts are optional */ - if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ)) + if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ) || !irq_avail) return 0; msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap); @@ -588,7 +588,7 @@ static int cxl_mem_alloc_event_buf(struct cxl_memdev_state *mds) return devm_add_action_or_reset(mds->cxlds.dev, free_event_buf, buf); } -static int cxl_alloc_irq_vectors(struct pci_dev *pdev) +static bool cxl_alloc_irq_vectors(struct pci_dev *pdev) { int nvecs; @@ -605,9 +605,9 @@ static int cxl_alloc_irq_vectors(struct pci_dev *pdev) PCI_IRQ_MSIX | PCI_IRQ_MSI); if (nvecs < 1) { dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs); - return -ENXIO; + return false; } - return 0; + return true; } static irqreturn_t cxl_event_thread(int irq, void *id) @@ -743,7 +743,7 @@ static bool cxl_event_int_is_fw(u8 setting) } static int cxl_event_config(struct pci_host_bridge *host_bridge, - struct cxl_memdev_state *mds) + struct cxl_memdev_state *mds, bool irq_avail) { struct cxl_event_interrupt_policy policy; int rc; @@ -755,6 +755,11 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge, if (!host_bridge->native_cxl_error) return 0; + if (!irq_avail) { + dev_info(mds->cxlds.dev, "No interrupt support, disable event processing.\n"); + return 0; + } + rc = cxl_mem_alloc_event_buf(mds); if (rc) return rc; @@ -789,6 +794,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) struct cxl_register_map map; struct cxl_memdev *cxlmd; int i, rc, pmu_count; + bool irq_avail; /* * Double check the anonymous union trickery in struct cxl_regs @@ -846,11 +852,9 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) else dev_warn(&pdev->dev, "Media not active (%d)\n", rc); - rc = cxl_alloc_irq_vectors(pdev); - if (rc) - return rc; + irq_avail = cxl_alloc_irq_vectors(pdev); - rc = cxl_pci_setup_mailbox(mds); + rc = cxl_pci_setup_mailbox(mds, irq_avail); if (rc) return rc; @@ -909,7 +913,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) } } - rc = cxl_event_config(host_bridge, mds); + rc = cxl_event_config(host_bridge, mds, irq_avail); if (rc) return rc; From 27daa514c48d5796d564ea5410cb72f78a521891 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 6 Dec 2023 12:21:42 +0300 Subject: [PATCH 202/768] ELF, MAINTAINERS: specifically mention ELF People complain when I miss people in Cc. [ kees: Also add the ELF uapi doc link ] Signed-off-by: Alexey Dobriyan Link: https://lore.kernel.org/r/2cb0891e-d7c0-4939-bb5f-282812de6078@p183 Signed-off-by: Kees Cook --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a69..39219b144c23 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7955,12 +7955,13 @@ L: rust-for-linux@vger.kernel.org S: Maintained F: rust/kernel/net/phy.rs -EXEC & BINFMT API +EXEC & BINFMT API, ELF R: Eric Biederman R: Kees Cook L: linux-mm@kvack.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/execve +F: Documentation/userspace-api/ELF.rst F: fs/*binfmt_*.c F: fs/exec.c F: include/linux/binfmts.h From 8788a17c2319f020ccdc3f2907179a5ae81b7ad6 Mon Sep 17 00:00:00 2001 From: Askar Safin Date: Tue, 9 Jan 2024 06:04:34 +0300 Subject: [PATCH 203/768] exec: remove useless comment Function name is wrong and the comment tells us nothing Signed-off-by: Askar Safin Link: https://lore.kernel.org/r/20240109030801.31827-1-safinaskar@zohomail.com Signed-off-by: Kees Cook --- fs/exec.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 8cdd5b2dd09c..ba7d0548ac57 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1826,9 +1826,6 @@ static int exec_binprm(struct linux_binprm *bprm) return 0; } -/* - * sys_execve() executes a new program. - */ static int bprm_execve(struct linux_binprm *bprm) { int retval; From 22fb4f041999f5f16ecbda15a2859b4ef4cbf47e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 19 Jan 2024 05:33:19 -0600 Subject: [PATCH 204/768] cpufreq/amd-pstate: Fix setting scaling max/min freq values Scaling min/max freq values were being cached and lagging a setting each time. Fix the ordering of the clamp call to ensure they work. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217931 Fixes: febab20caeba ("cpufreq/amd-pstate: Fix scaling_min_freq and scaling_max_freq update") Signed-off-by: Mario Limonciello Reviewed-by: Wyes Karny Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 1f6186475715..1791d37fbc53 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1232,14 +1232,13 @@ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy) max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); + WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); + WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); + max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, cpudata->max_limit_perf); min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, cpudata->max_limit_perf); - - WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); - WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); - value = READ_ONCE(cpudata->cppc_req_cached); if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) From 92c02d74ba7b7cdf3887ed56bc9af38c3ee17a8b Mon Sep 17 00:00:00 2001 From: Fei Shao Date: Mon, 22 Jan 2024 14:20:27 +0800 Subject: [PATCH 205/768] ASoC: codecs: ES8326: Remove executable bit Remove the executable bit that was unintentionally turned on. Fixes: ee09084fbf9f ("ASoC: codecs: ES8326: Add chip version flag") Signed-off-by: Fei Shao Link: https://msgid.link/r/20240122062055.1673597-1-fshao@chromium.org Signed-off-by: Mark Brown --- sound/soc/codecs/es8326.c | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 sound/soc/codecs/es8326.c diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c old mode 100755 new mode 100644 From c481016bb4f8a9c059c39ac06e7b65e233a61f6a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jan 2024 19:18:17 +0100 Subject: [PATCH 206/768] ASoC: qcom: sc8280xp: limit speaker volumes The UCM configuration for the Lenovo ThinkPad X13s has up until now been setting the speaker PA volume to the minimum -3 dB when enabling the speakers, but this does not prevent the user from increasing the volume further. Limit the digital gain and PA volumes to a combined -3 dB in the machine driver to reduce the risk of speaker damage until we have active speaker protection in place (or higher safe levels have been established). Note that the PA volume limit cannot be set lower than 0 dB or PulseAudio gets confused when the first 16 levels all map to -3 dB. Also note that this will probably need to be generalised using machine-specific limits, but a common limit should do for now. Cc: # 6.5 Signed-off-by: Johan Hovold Link: https://msgid.link/r/20240122181819.4038-3-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/qcom/sc8280xp.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sound/soc/qcom/sc8280xp.c b/sound/soc/qcom/sc8280xp.c index ed4bb551bfbb..b7fd503a1666 100644 --- a/sound/soc/qcom/sc8280xp.c +++ b/sound/soc/qcom/sc8280xp.c @@ -32,12 +32,14 @@ static int sc8280xp_snd_init(struct snd_soc_pcm_runtime *rtd) case WSA_CODEC_DMA_RX_0: case WSA_CODEC_DMA_RX_1: /* - * set limit of 0dB on Digital Volume for Speakers, - * this can prevent damage of speakers to some extent without - * active speaker protection + * Set limit of -3 dB on Digital Volume and 0 dB on PA Volume + * to reduce the risk of speaker damage until we have active + * speaker protection in place. */ - snd_soc_limit_volume(card, "WSA_RX0 Digital Volume", 84); - snd_soc_limit_volume(card, "WSA_RX1 Digital Volume", 84); + snd_soc_limit_volume(card, "WSA_RX0 Digital Volume", 81); + snd_soc_limit_volume(card, "WSA_RX1 Digital Volume", 81); + snd_soc_limit_volume(card, "SpkrLeft PA Volume", 17); + snd_soc_limit_volume(card, "SpkrRight PA Volume", 17); break; default: break; From bdd8f62431ebcf15902a5fce3336388e436405c6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 16 Sep 2022 17:11:18 -0700 Subject: [PATCH 207/768] exec: Add do_close_execat() helper Consolidate the calls to allow_write_access()/fput() into a single place, since we repeat this code pattern. Add comments around the callers for the details on it. Link: https://lore.kernel.org/r/202209161637.9EDAF6B18@keescook Signed-off-by: Kees Cook --- fs/exec.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index ba7d0548ac57..2037cc636036 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -904,6 +904,10 @@ EXPORT_SYMBOL(transfer_args_to_stack); #endif /* CONFIG_MMU */ +/* + * On success, caller must call do_close_execat() on the returned + * struct file to close it. + */ static struct file *do_open_execat(int fd, struct filename *name, int flags) { struct file *file; @@ -948,6 +952,17 @@ exit: return ERR_PTR(err); } +/** + * open_exec - Open a path name for execution + * + * @name: path name to open with the intent of executing it. + * + * Returns ERR_PTR on failure or allocated struct file on success. + * + * As this is a wrapper for the internal do_open_execat(), callers + * must call allow_write_access() before fput() on release. Also see + * do_close_execat(). + */ struct file *open_exec(const char *name) { struct filename *filename = getname_kernel(name); @@ -1484,6 +1499,15 @@ static int prepare_bprm_creds(struct linux_binprm *bprm) return -ENOMEM; } +/* Matches do_open_execat() */ +static void do_close_execat(struct file *file) +{ + if (!file) + return; + allow_write_access(file); + fput(file); +} + static void free_bprm(struct linux_binprm *bprm) { if (bprm->mm) { @@ -1495,10 +1519,7 @@ static void free_bprm(struct linux_binprm *bprm) mutex_unlock(¤t->signal->cred_guard_mutex); abort_creds(bprm->cred); } - if (bprm->file) { - allow_write_access(bprm->file); - fput(bprm->file); - } + do_close_execat(bprm->file); if (bprm->executable) fput(bprm->executable); /* If a binfmt changed the interp, free it. */ @@ -1520,8 +1541,7 @@ static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int fl bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); if (!bprm) { - allow_write_access(file); - fput(file); + do_close_execat(file); return ERR_PTR(-ENOMEM); } From afa6ac2690bb9904ff883c6e942281e1032a484d Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Mon, 22 Jan 2024 21:32:01 +0100 Subject: [PATCH 208/768] HID: logitech-hidpp: add support for Logitech G Pro X Superlight 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let logitech-hidpp driver claim Logitech G Pro X Superlight 2. Reported-by: Marcus Rückert Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-hidpp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index fd6d8f1d9b8f..6ef0c88e3e60 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -4610,6 +4610,8 @@ static const struct hid_device_id hidpp_devices[] = { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC088) }, { /* Logitech G Pro X Superlight Gaming Mouse over USB */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC094) }, + { /* Logitech G Pro X Superlight 2 Gaming Mouse over USB */ + HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC09b) }, { /* G935 Gaming Headset */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0x0a87), From 84c39ec57d409e803a9bb6e4e85daf1243e0e80b Mon Sep 17 00:00:00 2001 From: Bernd Edlinger Date: Mon, 22 Jan 2024 19:34:21 +0100 Subject: [PATCH 209/768] exec: Fix error handling in begin_new_exec() If get_unused_fd_flags() fails, the error handling is incomplete because bprm->cred is already set to NULL, and therefore free_bprm will not unlock the cred_guard_mutex. Note there are two error conditions which end up here, one before and one after bprm->cred is cleared. Fixes: b8a61c9e7b4a ("exec: Generic execfd support") Signed-off-by: Bernd Edlinger Acked-by: Eric W. Biederman Link: https://lore.kernel.org/r/AS8P193MB128517ADB5EFF29E04389EDAE4752@AS8P193MB1285.EURP193.PROD.OUTLOOK.COM Cc: stable@vger.kernel.org Signed-off-by: Kees Cook --- fs/exec.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/exec.c b/fs/exec.c index 2037cc636036..39d773021fff 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1424,6 +1424,9 @@ int begin_new_exec(struct linux_binprm * bprm) out_unlock: up_write(&me->signal->exec_update_lock); + if (!bprm->cred) + mutex_unlock(&me->signal->cred_guard_mutex); + out: return retval; } From 73ae7e1c7644a8c33ba526302a10267cdbc249f8 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 11 Jan 2024 17:57:44 +0100 Subject: [PATCH 210/768] ata: libata-sata: improve sysfs description for ATA_LPM_UNKNOWN Currently, both ATA_LPM_UNKNOWN (0) and ATA_LPM_MAX_POWER (1) displays as "max_performance" in sysfs. This is quite misleading as they are not the same. For ATA_LPM_UNKNOWN, ata_eh_set_lpm() will not be called at all, leaving the configuration in unknown state. For ATA_LPM_MAX_POWER, ata_eh_set_lpm() is called, and setting the policy to ATA_LPM_MAX_POWER. This also matches the description of the SATA_MOBILE_LPM_POLICY Kconfig: 0 => Keep firmware settings 1 => Maximum performance Thus, update the sysfs description for ATA_LPM_UNKNOWN to match reality. While at it, update libata.h to mention that the ascii descriptions are in libata-sata.c and not in libata-scsi.c. Reviewed-by: Damien Le Moal Signed-off-by: Niklas Cassel --- drivers/ata/libata-sata.c | 2 +- include/linux/libata.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index b6656c287175..0fb1934875f2 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -784,7 +784,7 @@ bool sata_lpm_ignore_phy_events(struct ata_link *link) EXPORT_SYMBOL_GPL(sata_lpm_ignore_phy_events); static const char *ata_lpm_policy_names[] = { - [ATA_LPM_UNKNOWN] = "max_performance", + [ATA_LPM_UNKNOWN] = "keep_firmware_settings", [ATA_LPM_MAX_POWER] = "max_performance", [ATA_LPM_MED_POWER] = "medium_power", [ATA_LPM_MED_POWER_WITH_DIPM] = "med_power_with_dipm", diff --git a/include/linux/libata.h b/include/linux/libata.h index 1dbb14daccfa..26d68115afb8 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -471,7 +471,7 @@ enum ata_completion_errors { /* * Link power management policy: If you alter this, you also need to - * alter libata-scsi.c (for the ascii descriptions) + * alter libata-sata.c (for the ascii descriptions) */ enum ata_lpm_policy { ATA_LPM_UNKNOWN, From 018856c3f171517c66d5d7d3755ae0c517924fd7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 22 Jan 2024 16:04:58 +0100 Subject: [PATCH 211/768] fbcon: Fix incorrect printed function name in fbcon_prepare_logo() If the boot logo does not fit, a message is printed, including a wrong function name prefix. Instead of correcting the function name (or using __func__), just use "fbcon", like is done in several other messages. While at it, modernize the call by switching to pr_info(). Signed-off-by: Geert Uytterhoeven Signed-off-by: Helge Deller --- drivers/video/fbdev/core/fbcon.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 63af6ab034b5..1183e7a871f8 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -631,8 +631,7 @@ static void fbcon_prepare_logo(struct vc_data *vc, struct fb_info *info, if (logo_lines > vc->vc_bottom) { logo_shown = FBCON_LOGO_CANSHOW; - printk(KERN_INFO - "fbcon_init: disable boot-logo (boot-logo bigger than screen).\n"); + pr_info("fbcon: disable boot-logo (boot-logo bigger than screen).\n"); } else { logo_shown = FBCON_LOGO_DRAW; vc->vc_top = logo_lines; From 202bc57b675601bc07b5942369ecc16af64d1b95 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Jan 2024 17:17:36 +0000 Subject: [PATCH 212/768] netfs: Don't use certain unnecessary folio_*() functions Filesystems should use folio->index and folio->mapping, instead of folio_index(folio), folio_mapping() and folio_file_mapping() since they know that it's in the pagecache. Change this automagically with: perl -p -i -e 's/folio_mapping[(]([^)]*)[)]/\1->mapping/g' fs/netfs/*.c perl -p -i -e 's/folio_file_mapping[(]([^)]*)[)]/\1->mapping/g' fs/netfs/*.c perl -p -i -e 's/folio_index[(]([^)]*)[)]/\1->index/g' fs/netfs/*.c Reported-by: Matthew Wilcox Signed-off-by: David Howells cc: Jeff Layton cc: linux-afs@lists.infradead.org cc: linux-cachefs@redhat.com cc: linux-cifs@vger.kernel.org cc: linux-erofs@lists.ozlabs.org cc: linux-fsdevel@vger.kernel.org --- fs/netfs/buffered_read.c | 12 ++++++------ fs/netfs/buffered_write.c | 10 +++++----- fs/netfs/io.c | 2 +- fs/netfs/misc.c | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index a59e7b2edaac..3298c29b5548 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -101,7 +101,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) } if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { - if (folio_index(folio) == rreq->no_unlock_folio && + if (folio->index == rreq->no_unlock_folio && test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) _debug("no unlock"); else @@ -246,13 +246,13 @@ EXPORT_SYMBOL(netfs_readahead); */ int netfs_read_folio(struct file *file, struct folio *folio) { - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; struct netfs_io_request *rreq; struct netfs_inode *ctx = netfs_inode(mapping->host); struct folio *sink = NULL; int ret; - _enter("%lx", folio_index(folio)); + _enter("%lx", folio->index); rreq = netfs_alloc_request(mapping, file, folio_file_pos(folio), folio_size(folio), @@ -460,7 +460,7 @@ retry: ret = PTR_ERR(rreq); goto error; } - rreq->no_unlock_folio = folio_index(folio); + rreq->no_unlock_folio = folio->index; __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); ret = netfs_begin_cache_read(rreq, ctx); @@ -518,7 +518,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, size_t offset, size_t len) { struct netfs_io_request *rreq; - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; struct netfs_inode *ctx = netfs_inode(mapping->host); unsigned long long start = folio_pos(folio); size_t flen = folio_size(folio); @@ -535,7 +535,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, goto error; } - rreq->no_unlock_folio = folio_index(folio); + rreq->no_unlock_folio = folio->index; __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); ret = netfs_begin_cache_read(rreq, ctx); if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 93dc76f34e39..e7f9ba6fb16b 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -343,7 +343,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, break; default: WARN(true, "Unexpected modify type %u ix=%lx\n", - howto, folio_index(folio)); + howto, folio->index); ret = -EIO; goto error_folio_unlock; } @@ -648,7 +648,7 @@ static void netfs_pages_written_back(struct netfs_io_request *wreq) xas_for_each(&xas, folio, last) { WARN(!folio_test_writeback(folio), "bad %zx @%llx page %lx %lx\n", - wreq->len, wreq->start, folio_index(folio), last); + wreq->len, wreq->start, folio->index, last); if ((finfo = netfs_folio_info(folio))) { /* Streaming writes cannot be redirtied whilst under @@ -795,7 +795,7 @@ static void netfs_extend_writeback(struct address_space *mapping, continue; if (xa_is_value(folio)) break; - if (folio_index(folio) != index) { + if (folio->index != index) { xas_reset(xas); break; } @@ -901,7 +901,7 @@ static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping, long count = wbc->nr_to_write; int ret; - _enter(",%lx,%llx-%llx,%u", folio_index(folio), start, end, caching); + _enter(",%lx,%llx-%llx,%u", folio->index, start, end, caching); wreq = netfs_alloc_request(mapping, NULL, start, folio_size(folio), NETFS_WRITEBACK); @@ -1047,7 +1047,7 @@ search_again: start = folio_pos(folio); /* May regress with THPs */ - _debug("wback %lx", folio_index(folio)); + _debug("wback %lx", folio->index); /* At this point we hold neither the i_pages lock nor the page lock: * the page may be truncated or invalidated (changing page->mapping to diff --git a/fs/netfs/io.c b/fs/netfs/io.c index 4309edf33862..e8ff1e61ce79 100644 --- a/fs/netfs/io.c +++ b/fs/netfs/io.c @@ -124,7 +124,7 @@ static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq, /* We might have multiple writes from the same huge * folio, but we mustn't unlock a folio more than once. */ - if (have_unlocked && folio_index(folio) <= unlocked) + if (have_unlocked && folio->index <= unlocked) continue; unlocked = folio_next_index(folio) - 1; trace_netfs_folio(folio, netfs_folio_trace_end_copy); diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 0e3af37fc924..90051ced8e2a 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -180,7 +180,7 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) struct netfs_folio *finfo = NULL; size_t flen = folio_size(folio); - _enter("{%lx},%zx,%zx", folio_index(folio), offset, length); + _enter("{%lx},%zx,%zx", folio->index, offset, length); folio_wait_fscache(folio); From fa7d614da3c556c7ef71023cb8c410a3e8571a42 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Jan 2024 17:51:08 +0000 Subject: [PATCH 213/768] afs: Don't use certain unnecessary folio_*() functions Filesystems should use folio->index and folio->mapping, instead of folio_index(folio), folio_mapping() and folio_file_mapping() since they know that it's in the pagecache. Change this automagically with: perl -p -i -e 's/folio_mapping[(]([^)]*)[)]/\1->mapping/g' fs/afs/*.c perl -p -i -e 's/folio_file_mapping[(]([^)]*)[)]/\1->mapping/g' fs/afs/*.c perl -p -i -e 's/folio_index[(]([^)]*)[)]/\1->index/g' fs/afs/*.c Reported-by: Matthew Wilcox Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org cc: linux-fsdevel@vger.kernel.org --- fs/afs/dir.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index c14533ef108f..3f73d61f7c8a 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -124,7 +124,7 @@ static void afs_dir_read_cleanup(struct afs_read *req) if (xas_retry(&xas, folio)) continue; BUG_ON(xa_is_value(folio)); - ASSERTCMP(folio_file_mapping(folio), ==, mapping); + ASSERTCMP(folio->mapping, ==, mapping); folio_put(folio); } @@ -202,12 +202,12 @@ static void afs_dir_dump(struct afs_vnode *dvnode, struct afs_read *req) if (xas_retry(&xas, folio)) continue; - BUG_ON(folio_file_mapping(folio) != mapping); + BUG_ON(folio->mapping != mapping); size = min_t(loff_t, folio_size(folio), req->actual_len - folio_pos(folio)); for (offset = 0; offset < size; offset += sizeof(*block)) { block = kmap_local_folio(folio, offset); - pr_warn("[%02lx] %32phN\n", folio_index(folio) + offset, block); + pr_warn("[%02lx] %32phN\n", folio->index + offset, block); kunmap_local(block); } } @@ -233,7 +233,7 @@ static int afs_dir_check(struct afs_vnode *dvnode, struct afs_read *req) if (xas_retry(&xas, folio)) continue; - BUG_ON(folio_file_mapping(folio) != mapping); + BUG_ON(folio->mapping != mapping); if (!afs_dir_check_folio(dvnode, folio, req->actual_len)) { afs_dir_dump(dvnode, req); @@ -2022,7 +2022,7 @@ static bool afs_dir_release_folio(struct folio *folio, gfp_t gfp_flags) { struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio)); - _enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, folio_index(folio)); + _enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, folio->index); folio_detach_private(folio); From c40497d82387188f14d9adc4caa58ee1cb1999e1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Jan 2024 17:54:35 +0000 Subject: [PATCH 214/768] cifs: Don't use certain unnecessary folio_*() functions Filesystems should use folio->index and folio->mapping, instead of folio_index(folio), folio_mapping() and folio_file_mapping() since they know that it's in the pagecache. Change this automagically with: perl -p -i -e 's/folio_mapping[(]([^)]*)[)]/\1->mapping/g' fs/smb/client/*.c perl -p -i -e 's/folio_file_mapping[(]([^)]*)[)]/\1->mapping/g' fs/smb/client/*.c perl -p -i -e 's/folio_index[(]([^)]*)[)]/\1->index/g' fs/smb/client/*.c Reported-by: Matthew Wilcox Signed-off-by: David Howells cc: Jeff Layton cc: Steve French cc: Paulo Alcantara cc: Ronnie Sahlberg cc: Shyam Prasad N cc: Tom Talpey cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org --- fs/smb/client/file.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 3a213432775b..90da81d0372a 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -87,7 +87,7 @@ void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len continue; if (!folio_test_writeback(folio)) { WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", - len, start, folio_index(folio), end); + len, start, folio->index, end); continue; } @@ -120,7 +120,7 @@ void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len continue; if (!folio_test_writeback(folio)) { WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", - len, start, folio_index(folio), end); + len, start, folio->index, end); continue; } @@ -151,7 +151,7 @@ void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int le xas_for_each(&xas, folio, end) { if (!folio_test_writeback(folio)) { WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", - len, start, folio_index(folio), end); + len, start, folio->index, end); continue; } @@ -2651,7 +2651,7 @@ static void cifs_extend_writeback(struct address_space *mapping, continue; if (xa_is_value(folio)) break; - if (folio_index(folio) != index) + if (folio->index != index) break; if (!folio_try_get_rcu(folio)) { xas_reset(&xas); @@ -2899,7 +2899,7 @@ redo_folio: goto skip_write; } - if (folio_mapping(folio) != mapping || + if (folio->mapping != mapping || !folio_test_dirty(folio)) { start += folio_size(folio); folio_unlock(folio); From 3be0b3ed1d76c6703b9ee482b55f7e01c369cc68 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 12 Jan 2024 09:59:41 +0300 Subject: [PATCH 215/768] netfs, fscache: Prevent Oops in fscache_put_cache() This function dereferences "cache" and then checks if it's IS_ERR_OR_NULL(). Check first, then dereference. Fixes: 9549332df4ed ("fscache: Implement cache registration") Signed-off-by: Dan Carpenter Signed-off-by: David Howells Link: https://lore.kernel.org/r/e84bc740-3502-4f16-982a-a40d5676615c@moroto.mountain/ # v2 --- fs/netfs/fscache_cache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/netfs/fscache_cache.c b/fs/netfs/fscache_cache.c index d645f8b302a2..9397ed39b0b4 100644 --- a/fs/netfs/fscache_cache.c +++ b/fs/netfs/fscache_cache.c @@ -179,13 +179,14 @@ EXPORT_SYMBOL(fscache_acquire_cache); void fscache_put_cache(struct fscache_cache *cache, enum fscache_cache_trace where) { - unsigned int debug_id = cache->debug_id; + unsigned int debug_id; bool zero; int ref; if (IS_ERR_OR_NULL(cache)) return; + debug_id = cache->debug_id; zero = __refcount_dec_and_test(&cache->ref, &ref); trace_fscache_cache(debug_id, ref - 1, where); From 843609df0be792991b3c4a720d6be4828d48dec4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Jan 2024 21:54:42 +0300 Subject: [PATCH 216/768] netfs: Fix a NULL vs IS_ERR() check in netfs_perform_write() The netfs_grab_folio_for_write() function doesn't return NULL, it returns error pointers. Update the check accordingly. Fixes: c38f4e96e605 ("netfs: Provide func to copy data to pagecache for buffered write") Signed-off-by: Dan Carpenter Signed-off-by: David Howells Link: https://lore.kernel.org/r/29fb1310-8e2d-47ba-b68d-40354eb7b896@moroto.mountain/ --- fs/netfs/buffered_write.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index e7f9ba6fb16b..a3059b3168fd 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -221,10 +221,11 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, if (unlikely(fault_in_iov_iter_readable(iter, part) == part)) break; - ret = -ENOMEM; folio = netfs_grab_folio_for_write(mapping, pos, part); - if (!folio) + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); break; + } flen = folio_size(folio); offset = pos & (flen - 1); From 2b44760609e9eaafc9d234a6883d042fc21132a7 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Mon, 22 Jan 2024 16:09:28 +0100 Subject: [PATCH 217/768] tracing: Ensure visibility when inserting an element into tracing_map Running the following two commands in parallel on a multi-processor AArch64 machine can sporadically produce an unexpected warning about duplicate histogram entries: $ while true; do echo hist:key=id.syscall:val=hitcount > \ /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist sleep 0.001 done $ stress-ng --sysbadaddr $(nproc) The warning looks as follows: [ 2911.172474] ------------[ cut here ]------------ [ 2911.173111] Duplicates detected: 1 [ 2911.173574] WARNING: CPU: 2 PID: 12247 at kernel/trace/tracing_map.c:983 tracing_map_sort_entries+0x3e0/0x408 [ 2911.174702] Modules linked in: iscsi_ibft(E) iscsi_boot_sysfs(E) rfkill(E) af_packet(E) nls_iso8859_1(E) nls_cp437(E) vfat(E) fat(E) ena(E) tiny_power_button(E) qemu_fw_cfg(E) button(E) fuse(E) efi_pstore(E) ip_tables(E) x_tables(E) xfs(E) libcrc32c(E) aes_ce_blk(E) aes_ce_cipher(E) crct10dif_ce(E) polyval_ce(E) polyval_generic(E) ghash_ce(E) gf128mul(E) sm4_ce_gcm(E) sm4_ce_ccm(E) sm4_ce(E) sm4_ce_cipher(E) sm4(E) sm3_ce(E) sm3(E) sha3_ce(E) sha512_ce(E) sha512_arm64(E) sha2_ce(E) sha256_arm64(E) nvme(E) sha1_ce(E) nvme_core(E) nvme_auth(E) t10_pi(E) sg(E) scsi_mod(E) scsi_common(E) efivarfs(E) [ 2911.174738] Unloaded tainted modules: cppc_cpufreq(E):1 [ 2911.180985] CPU: 2 PID: 12247 Comm: cat Kdump: loaded Tainted: G E 6.7.0-default #2 1b58bbb22c97e4399dc09f92d309344f69c44a01 [ 2911.182398] Hardware name: Amazon EC2 c7g.8xlarge/, BIOS 1.0 11/1/2018 [ 2911.183208] pstate: 61400005 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) [ 2911.184038] pc : tracing_map_sort_entries+0x3e0/0x408 [ 2911.184667] lr : tracing_map_sort_entries+0x3e0/0x408 [ 2911.185310] sp : ffff8000a1513900 [ 2911.185750] x29: ffff8000a1513900 x28: ffff0003f272fe80 x27: 0000000000000001 [ 2911.186600] x26: ffff0003f272fe80 x25: 0000000000000030 x24: 0000000000000008 [ 2911.187458] x23: ffff0003c5788000 x22: ffff0003c16710c8 x21: ffff80008017f180 [ 2911.188310] x20: ffff80008017f000 x19: ffff80008017f180 x18: ffffffffffffffff [ 2911.189160] x17: 0000000000000000 x16: 0000000000000000 x15: ffff8000a15134b8 [ 2911.190015] x14: 0000000000000000 x13: 205d373432323154 x12: 5b5d313131333731 [ 2911.190844] x11: 00000000fffeffff x10: 00000000fffeffff x9 : ffffd1b78274a13c [ 2911.191716] x8 : 000000000017ffe8 x7 : c0000000fffeffff x6 : 000000000057ffa8 [ 2911.192554] x5 : ffff0012f6c24ec0 x4 : 0000000000000000 x3 : ffff2e5b72b5d000 [ 2911.193404] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff0003ff254480 [ 2911.194259] Call trace: [ 2911.194626] tracing_map_sort_entries+0x3e0/0x408 [ 2911.195220] hist_show+0x124/0x800 [ 2911.195692] seq_read_iter+0x1d4/0x4e8 [ 2911.196193] seq_read+0xe8/0x138 [ 2911.196638] vfs_read+0xc8/0x300 [ 2911.197078] ksys_read+0x70/0x108 [ 2911.197534] __arm64_sys_read+0x24/0x38 [ 2911.198046] invoke_syscall+0x78/0x108 [ 2911.198553] el0_svc_common.constprop.0+0xd0/0xf8 [ 2911.199157] do_el0_svc+0x28/0x40 [ 2911.199613] el0_svc+0x40/0x178 [ 2911.200048] el0t_64_sync_handler+0x13c/0x158 [ 2911.200621] el0t_64_sync+0x1a8/0x1b0 [ 2911.201115] ---[ end trace 0000000000000000 ]--- The problem appears to be caused by CPU reordering of writes issued from __tracing_map_insert(). The check for the presence of an element with a given key in this function is: val = READ_ONCE(entry->val); if (val && keys_match(key, val->key, map->key_size)) ... The write of a new entry is: elt = get_free_elt(map); memcpy(elt->key, key, map->key_size); entry->val = elt; The "memcpy(elt->key, key, map->key_size);" and "entry->val = elt;" stores may become visible in the reversed order on another CPU. This second CPU might then incorrectly determine that a new key doesn't match an already present val->key and subsequently insert a new element, resulting in a duplicate. Fix the problem by adding a write barrier between "memcpy(elt->key, key, map->key_size);" and "entry->val = elt;", and for good measure, also use WRITE_ONCE(entry->val, elt) for publishing the element. The sequence pairs with the mentioned "READ_ONCE(entry->val);" and the "val->key" check which has an address dependency. The barrier is placed on a path executed when adding an element for a new key. Subsequent updates targeting the same key remain unaffected. From the user's perspective, the issue was introduced by commit c193707dde77 ("tracing: Remove code which merges duplicates"), which followed commit cbf4100efb8f ("tracing: Add support to detect and avoid duplicates"). The previous code operated differently; it inherently expected potential races which result in duplicates but merged them later when they occurred. Link: https://lore.kernel.org/linux-trace-kernel/20240122150928.27725-1-petr.pavlu@suse.com Fixes: c193707dde77 ("tracing: Remove code which merges duplicates") Signed-off-by: Petr Pavlu Acked-by: Tom Zanussi Signed-off-by: Steven Rostedt (Google) --- kernel/trace/tracing_map.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index c774e560f2f9..a4dcf0f24352 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -574,7 +574,12 @@ __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only) } memcpy(elt->key, key, map->key_size); - entry->val = elt; + /* + * Ensure the initialization is visible and + * publish the elt. + */ + smp_wmb(); + WRITE_ONCE(entry->val, elt); atomic64_inc(&map->hits); return entry->val; From c3d6569a43322f371e7ba0ad386112723757ac8f Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 19 Jan 2024 20:49:34 +0000 Subject: [PATCH 218/768] cachefiles, erofs: Fix NULL deref in when cachefiles is not doing ondemand-mode cachefiles_ondemand_init_object() as called from cachefiles_open_file() and cachefiles_create_tmpfile() does not check if object->ondemand is set before dereferencing it, leading to an oops something like: RIP: 0010:cachefiles_ondemand_init_object+0x9/0x41 ... Call Trace: cachefiles_open_file+0xc9/0x187 cachefiles_lookup_cookie+0x122/0x2be fscache_cookie_state_machine+0xbe/0x32b fscache_cookie_worker+0x1f/0x2d process_one_work+0x136/0x208 process_scheduled_works+0x3a/0x41 worker_thread+0x1a2/0x1f6 kthread+0xca/0xd2 ret_from_fork+0x21/0x33 Fix this by making cachefiles_ondemand_init_object() return immediately if cachefiles->ondemand is NULL. Fixes: 3c5ecfe16e76 ("cachefiles: extract ondemand info field from cachefiles_object") Reported-by: Marc Dionne Signed-off-by: David Howells cc: Gao Xiang cc: Chao Yu cc: Yue Hu cc: Jeffle Xu cc: linux-erofs@lists.ozlabs.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org --- fs/cachefiles/ondemand.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index 5fd74ec60bef..4ba42f1fa3b4 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -539,6 +539,9 @@ int cachefiles_ondemand_init_object(struct cachefiles_object *object) struct fscache_volume *volume = object->volume->vcookie; size_t volume_key_size, cookie_key_size, data_len; + if (!object->ondemand) + return 0; + /* * CacheFiles will firstly check the cache file under the root cache * directory. If the coherency check failed, it will fallback to From 57e9d49c54528c49b8bffe6d99d782ea051ea534 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 8 Jan 2024 17:22:36 +0000 Subject: [PATCH 219/768] afs: Hide silly-rename files from userspace There appears to be a race between silly-rename files being created/removed and various userspace tools iterating over the contents of a directory, leading to such errors as: find: './kernel/.tmp_cpio_dir/include/dt-bindings/reset/.__afs2080': No such file or directory tar: ./include/linux/greybus/.__afs3C95: File removed before we read it when building a kernel. Fix afs_readdir() so that it doesn't return .__afsXXXX silly-rename files to userspace. This doesn't stop them being looked up directly by name as we need to be able to look them up from within the kernel as part of the silly-rename algorithm. Fixes: 79ddbfa500b3 ("afs: Implement sillyrename for unlink and rename") Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- fs/afs/dir.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 3f73d61f7c8a..eface67ccc06 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -474,6 +474,14 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, continue; } + /* Don't expose silly rename entries to userspace. */ + if (nlen > 6 && + dire->u.name[0] == '.' && + ctx->actor != afs_lookup_filldir && + ctx->actor != afs_lookup_one_filldir && + memcmp(dire->u.name, ".__afs", 6) == 0) + continue; + /* found the next entry */ if (!dir_emit(ctx, dire->u.name, nlen, ntohl(dire->u.vnode), From 17ba6f0bd14fe3ac606aac6bebe5e69bdaad8ba1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jan 2024 14:02:37 +0000 Subject: [PATCH 220/768] afs: Fix error handling with lookup via FS.InlineBulkStatus When afs does a lookup, it tries to use FS.InlineBulkStatus to preemptively look up a bunch of files in the parent directory and cache this locally, on the basis that we might want to look at them too (for example if someone does an ls on a directory, they may want want to then stat every file listed). FS.InlineBulkStatus can be considered a compound op with the normal abort code applying to the compound as a whole. Each status fetch within the compound is then given its own individual abort code - but assuming no error that prevents the bulk fetch from returning the compound result will be 0, even if all the constituent status fetches failed. At the conclusion of afs_do_lookup(), we should use the abort code from the appropriate status to determine the error to return, if any - but instead it is assumed that we were successful if the op as a whole succeeded and we return an incompletely initialised inode, resulting in ENOENT, no matter the actual reason. In the particular instance reported, a vnode with no permission granted to be accessed is being given a UAEACCES abort code which should be reported as EACCES, but is instead being reported as ENOENT. Fix this by abandoning the inode (which will be cleaned up with the op) if file[1] has an abort code indicated and turn that abort code into an error instead. Whilst we're at it, add a tracepoint so that the abort codes of the individual subrequests of FS.InlineBulkStatus can be logged. At the moment only the container abort code can be 0. Fixes: e49c7b2f6de7 ("afs: Build an abstraction around an "operation" concept") Reported-by: Jeffrey Altman Signed-off-by: David Howells Reviewed-by: Marc Dionne cc: linux-afs@lists.infradead.org --- fs/afs/dir.c | 12 +++++++++--- include/trace/events/afs.h | 25 +++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index eface67ccc06..b5b8de521f99 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -716,6 +716,8 @@ static void afs_do_lookup_success(struct afs_operation *op) break; } + if (vp->scb.status.abort_code) + trace_afs_bulkstat_error(op, &vp->fid, i, vp->scb.status.abort_code); if (!vp->scb.have_status && !vp->scb.have_error) continue; @@ -905,12 +907,16 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, afs_begin_vnode_operation(op); afs_wait_for_operation(op); } - inode = ERR_PTR(afs_op_error(op)); out_op: if (!afs_op_error(op)) { - inode = &op->file[1].vnode->netfs.inode; - op->file[1].vnode = NULL; + if (op->file[1].scb.status.abort_code) { + afs_op_accumulate_error(op, -ECONNABORTED, + op->file[1].scb.status.abort_code); + } else { + inode = &op->file[1].vnode->netfs.inode; + op->file[1].vnode = NULL; + } } if (op->file[0].scb.have_status) diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 8d73171cb9f0..08f2c93d6b16 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -1071,6 +1071,31 @@ TRACE_EVENT(afs_file_error, __print_symbolic(__entry->where, afs_file_errors)) ); +TRACE_EVENT(afs_bulkstat_error, + TP_PROTO(struct afs_operation *op, struct afs_fid *fid, unsigned int index, s32 abort), + + TP_ARGS(op, fid, index, abort), + + TP_STRUCT__entry( + __field_struct(struct afs_fid, fid) + __field(unsigned int, op) + __field(unsigned int, index) + __field(s32, abort) + ), + + TP_fast_assign( + __entry->op = op->debug_id; + __entry->fid = *fid; + __entry->index = index; + __entry->abort = abort; + ), + + TP_printk("OP=%08x[%02x] %llx:%llx:%x a=%d", + __entry->op, __entry->index, + __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique, + __entry->abort) + ); + TRACE_EVENT(afs_cm_no_server, TP_PROTO(struct afs_call *call, struct sockaddr_rxrpc *srx), From cfcc005dbcc79f1e6bddc6fd4b3e8a1163a6d181 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 12 Jan 2024 21:59:44 +0000 Subject: [PATCH 221/768] afs: Remove afs_dynroot_d_revalidate() as it is redundant Remove afs_dynroot_d_revalidate() as it is redundant as all it does is return 1 and the caller assumes that if the op is not given. Suggested-by: Alexander Viro Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org cc: linux-fsdevel@vger.kernel.org --- fs/afs/dynroot.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index d3bc4a2d7085..c4d2711e20ad 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -258,16 +258,7 @@ const struct inode_operations afs_dynroot_inode_operations = { .lookup = afs_dynroot_lookup, }; -/* - * Dirs in the dynamic root don't need revalidation. - */ -static int afs_dynroot_d_revalidate(struct dentry *dentry, unsigned int flags) -{ - return 1; -} - const struct dentry_operations afs_dynroot_dentry_operations = { - .d_revalidate = afs_dynroot_d_revalidate, .d_delete = always_delete_dentry, .d_release = afs_d_release, .d_automount = afs_d_automount, From b90493505347a4ca4d900f317e2b330e0e43ae2f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 17 Jan 2024 15:49:26 +0000 Subject: [PATCH 222/768] afs: Fix missing/incorrect unlocking of RCU read lock In afs_proc_addr_prefs_show(), we need to unlock the RCU read lock in both places before returning (and not lock it again). Fixes: f94f70d39cc2 ("afs: Provide a way to configure address priorities") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202401172243.cd53d5f6-oliver.sang@intel.com Signed-off-by: David Howells cc: linux-afs@lists.infradead.org cc: linux-fsdevel@vger.kernel.org --- fs/afs/proc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 3bd02571f30d..15eab053af6d 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -166,7 +166,7 @@ static int afs_proc_addr_prefs_show(struct seq_file *m, void *v) if (!preflist) { seq_puts(m, "NO PREFS\n"); - return 0; + goto out; } seq_printf(m, "PROT SUBNET PRIOR (v=%u n=%u/%u/%u)\n", @@ -191,7 +191,8 @@ static int afs_proc_addr_prefs_show(struct seq_file *m, void *v) } } - rcu_read_lock(); +out: + rcu_read_unlock(); return 0; } From e01a83e12604aa2f8d4ab359ec44e341a2248b4a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 22 Jan 2024 15:39:01 -0800 Subject: [PATCH 223/768] Revert "btrfs: zstd: fix and simplify the inline extent decompression" This reverts commit 1e7f6def8b2370ecefb54b3c8f390ff894b0c51b. It causes my machine to not even boot, and Klara Modin reports that the cause is that small zstd-compressed files return garbage when read. Reported-by: Klara Modin Link: https://lore.kernel.org/linux-btrfs/CABq1_vj4GpUeZpVG49OHCo-3sdbe2-2ROcu_xDvUG-6-5zPRXg@mail.gmail.com/ Reported-and-bisected-by: Linus Torvalds Acked-by: David Sterba Cc: Qu Wenruo Signed-off-by: Linus Torvalds --- fs/btrfs/compression.h | 2 +- fs/btrfs/zstd.c | 73 ++++++++++++++++++++++++++++++------------ 2 files changed, 53 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 97fe3ebf11a2..afd7e50d073d 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -169,7 +169,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, unsigned long *total_in, unsigned long *total_out); int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb); int zstd_decompress(struct list_head *ws, const u8 *data_in, - struct page *dest_page, unsigned long dest_pgoff, size_t srclen, + struct page *dest_page, unsigned long start_byte, size_t srclen, size_t destlen); void zstd_init_workspace_manager(void); void zstd_cleanup_workspace_manager(void); diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c index 346c46d88d07..0d66db8bc1d4 100644 --- a/fs/btrfs/zstd.c +++ b/fs/btrfs/zstd.c @@ -20,7 +20,6 @@ #include "misc.h" #include "compression.h" #include "ctree.h" -#include "super.h" #define ZSTD_BTRFS_MAX_WINDOWLOG 17 #define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG) @@ -619,48 +618,80 @@ done: } int zstd_decompress(struct list_head *ws, const u8 *data_in, - struct page *dest_page, unsigned long dest_pgoff, size_t srclen, + struct page *dest_page, unsigned long start_byte, size_t srclen, size_t destlen) { struct workspace *workspace = list_entry(ws, struct workspace, list); - struct btrfs_fs_info *fs_info = btrfs_sb(dest_page->mapping->host->i_sb); - const u32 sectorsize = fs_info->sectorsize; zstd_dstream *stream; int ret = 0; - unsigned long to_copy = 0; + size_t ret2; + unsigned long total_out = 0; + unsigned long pg_offset = 0; stream = zstd_init_dstream( ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); if (!stream) { pr_warn("BTRFS: zstd_init_dstream failed\n"); + ret = -EIO; goto finish; } + destlen = min_t(size_t, destlen, PAGE_SIZE); + workspace->in_buf.src = data_in; workspace->in_buf.pos = 0; workspace->in_buf.size = srclen; workspace->out_buf.dst = workspace->buf; workspace->out_buf.pos = 0; - workspace->out_buf.size = sectorsize; + workspace->out_buf.size = PAGE_SIZE; - /* - * Since both input and output buffers should not exceed one sector, - * one call should end the decompression. - */ - ret = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf); - if (zstd_is_error(ret)) { - pr_warn_ratelimited("BTRFS: zstd_decompress_stream return %d\n", - zstd_get_error_code(ret)); - goto finish; + ret2 = 1; + while (pg_offset < destlen + && workspace->in_buf.pos < workspace->in_buf.size) { + unsigned long buf_start; + unsigned long buf_offset; + unsigned long bytes; + + /* Check if the frame is over and we still need more input */ + if (ret2 == 0) { + pr_debug("BTRFS: zstd_decompress_stream ended early\n"); + ret = -EIO; + goto finish; + } + ret2 = zstd_decompress_stream(stream, &workspace->out_buf, + &workspace->in_buf); + if (zstd_is_error(ret2)) { + pr_debug("BTRFS: zstd_decompress_stream returned %d\n", + zstd_get_error_code(ret2)); + ret = -EIO; + goto finish; + } + + buf_start = total_out; + total_out += workspace->out_buf.pos; + workspace->out_buf.pos = 0; + + if (total_out <= start_byte) + continue; + + if (total_out > start_byte && buf_start < start_byte) + buf_offset = start_byte - buf_start; + else + buf_offset = 0; + + bytes = min_t(unsigned long, destlen - pg_offset, + workspace->out_buf.size - buf_offset); + + memcpy_to_page(dest_page, pg_offset, + workspace->out_buf.dst + buf_offset, bytes); + + pg_offset += bytes; } - to_copy = workspace->out_buf.pos; - memcpy_to_page(dest_page, dest_pgoff + to_copy, workspace->out_buf.dst, to_copy); + ret = 0; finish: - /* Error or early end. */ - if (unlikely(to_copy < destlen)) { - ret = -EIO; - memzero_page(dest_page, dest_pgoff + to_copy, destlen - to_copy); + if (pg_offset < destlen) { + memzero_page(dest_page, pg_offset, destlen - pg_offset); } return ret; } From 7ed2632ec7d72e926b9e8bcc9ad1bb0cd37274bf Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Sun, 14 Jan 2024 00:33:45 +0300 Subject: [PATCH 224/768] drm/ttm: fix ttm pool initialization for no-dma-device drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The QXL driver doesn't use any device for DMA mappings or allocations so dev_to_node() will panic inside ttm_device_init() on NUMA systems: general protection fault, probably for non-canonical address 0xdffffc000000007a: 0000 [#1] PREEMPT SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x00000000000003d0-0x00000000000003d7] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 6.7.0+ #9 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 RIP: 0010:ttm_device_init+0x10e/0x340 Call Trace: qxl_ttm_init+0xaa/0x310 qxl_device_init+0x1071/0x2000 qxl_pci_probe+0x167/0x3f0 local_pci_probe+0xe1/0x1b0 pci_device_probe+0x29d/0x790 really_probe+0x251/0x910 __driver_probe_device+0x1ea/0x390 driver_probe_device+0x4e/0x2e0 __driver_attach+0x1e3/0x600 bus_for_each_dev+0x12d/0x1c0 bus_add_driver+0x25a/0x590 driver_register+0x15c/0x4b0 qxl_pci_driver_init+0x67/0x80 do_one_initcall+0xf5/0x5d0 kernel_init_freeable+0x637/0xb10 kernel_init+0x1c/0x2e0 ret_from_fork+0x48/0x80 ret_from_fork_asm+0x1b/0x30 RIP: 0010:ttm_device_init+0x10e/0x340 Fall back to NUMA_NO_NODE if there is no device for DMA. Found by Linux Verification Center (linuxtesting.org). Fixes: b0a7ce53d494 ("drm/ttm: Schedule delayed_delete worker closer") Signed-off-by: Fedor Pchelkin Reviewed-by: Christian König Reported-by: Steven Rostedt Cc: Rajneesh Bhardwaj Cc: Felix Kuehling Signed-off-by: Linus Torvalds --- drivers/gpu/drm/ttm/ttm_device.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index f5187b384ae9..4130945052ed 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -195,7 +195,7 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func bool use_dma_alloc, bool use_dma32) { struct ttm_global *glob = &ttm_glob; - int ret; + int ret, nid; if (WARN_ON(vma_manager == NULL)) return -EINVAL; @@ -215,7 +215,12 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func ttm_sys_man_init(bdev); - ttm_pool_init(&bdev->pool, dev, dev_to_node(dev), use_dma_alloc, use_dma32); + if (dev) + nid = dev_to_node(dev); + else + nid = NUMA_NO_NODE; + + ttm_pool_init(&bdev->pool, dev, nid, use_dma_alloc, use_dma32); bdev->vma_manager = vma_manager; spin_lock_init(&bdev->lru_lock); From 3526860f26febbe46960f9b37f5dbd5ccc109ea8 Mon Sep 17 00:00:00 2001 From: Rui Salvaterra Date: Mon, 22 Jan 2024 11:45:12 +0000 Subject: [PATCH 225/768] ALSA: hda: Replace numeric device IDs with constant values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have self-explanatory constants for Intel HDA devices, let's use them instead of magic numbers and code comments. Signed-off-by: Rui Salvaterra Reviewed-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20240122114512.55808-2-rsalvaterra@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 2276adc84478..66f013ee160d 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1729,8 +1729,8 @@ static int default_bdl_pos_adj(struct azx *chip) /* some exceptions: Atoms seem problematic with value 1 */ if (chip->pci->vendor == PCI_VENDOR_ID_INTEL) { switch (chip->pci->device) { - case 0x0f04: /* Baytrail */ - case 0x2284: /* Braswell */ + case PCI_DEVICE_ID_INTEL_HDA_BYT: + case PCI_DEVICE_ID_INTEL_HDA_BSW: return 32; } } From 56beedc88405fd8022edfd1c2e63d1bc6c95efcb Mon Sep 17 00:00:00 2001 From: Rui Salvaterra Date: Mon, 22 Jan 2024 11:45:13 +0000 Subject: [PATCH 226/768] ALSA: hda: Increase default bdl_pos_adj for Apollo Lake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apollo Lake seems to also suffer from IRQ timing issues. After being up for ~4 minutes, a Pentium N4200 system ends up falling back to workqueue-based IRQ handling: [ 208.019906] snd_hda_intel 0000:00:0e.0: IRQ timing workaround is activated for card #0. Suggest a bigger bdl_pos_adj. Unfortunately, the Baytrail and Braswell workaround value of 32 samples isn't enough to fix the issue here. Default to 64 samples. Signed-off-by: Rui Salvaterra Reviewed-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20240122114512.55808-3-rsalvaterra@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 66f013ee160d..1b550c42db09 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1732,6 +1732,8 @@ static int default_bdl_pos_adj(struct azx *chip) case PCI_DEVICE_ID_INTEL_HDA_BYT: case PCI_DEVICE_ID_INTEL_HDA_BSW: return 32; + case PCI_DEVICE_ID_INTEL_HDA_APL: + return 64; } } From a2ed0a44d637ef9deca595054c206da7d6cbdcbc Mon Sep 17 00:00:00 2001 From: Vitaly Rodionov Date: Mon, 22 Jan 2024 18:47:10 +0000 Subject: [PATCH 227/768] ALSA: hda/cs8409: Suppress vmaster control for Dolphin models Customer has reported an issue with specific desktop platform where two CS42L42 codecs are connected to CS8409 HDA bridge. If "Master Volume Control" is created then on Ubuntu OS UCM left/right balance slider in UI audio settings has no effect. This patch will fix this issue for a target paltform. Fixes: 20e507724113 ("ALSA: hda/cs8409: Add support for dolphin") Signed-off-by: Vitaly Rodionov Cc: Link: https://lore.kernel.org/r/20240122184710.5802-1-vitalyr@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_cs8409.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_cs8409.c b/sound/pci/hda/patch_cs8409.c index 627899959ffe..e41316e2e983 100644 --- a/sound/pci/hda/patch_cs8409.c +++ b/sound/pci/hda/patch_cs8409.c @@ -1371,6 +1371,7 @@ void dolphin_fixups(struct hda_codec *codec, const struct hda_fixup *fix, int ac spec->scodecs[CS8409_CODEC1] = &dolphin_cs42l42_1; spec->scodecs[CS8409_CODEC1]->codec = codec; spec->num_scodecs = 2; + spec->gen.suppress_vmaster = 1; codec->patch_ops = cs8409_dolphin_patch_ops; From 4b088005c897a62fe98f70ab69687706cb2fad3b Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 22 Jan 2024 21:26:33 +0100 Subject: [PATCH 228/768] fbdev: stifb: Fix crash in stifb_blank() Avoid a kernel crash in stifb by providing the correct pointer to the fb_info struct. Prior to commit e2e0b838a184 ("video/sticore: Remove info field from STI struct") the fb_info struct was at the beginning of the fb struct. Fixes: e2e0b838a184 ("video/sticore: Remove info field from STI struct") Signed-off-by: Helge Deller Cc: Thomas Zimmermann --- drivers/video/fbdev/stifb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c index 2de0e675fd15..8e5bac27542d 100644 --- a/drivers/video/fbdev/stifb.c +++ b/drivers/video/fbdev/stifb.c @@ -1158,7 +1158,7 @@ stifb_init_display(struct stifb_info *fb) } break; } - stifb_blank(0, (struct fb_info *)fb); /* 0=enable screen */ + stifb_blank(0, fb->info); /* 0=enable screen */ SETUP_FB(fb); } From a969210066054ea109d8b7aff29a9b1c98776841 Mon Sep 17 00:00:00 2001 From: Julian Sikorski Date: Tue, 23 Jan 2024 09:49:35 +0100 Subject: [PATCH 229/768] ALSA: usb-audio: Add a quirk for Yamaha YIT-W12TX transmitter The device fails to initialize otherwise, giving the following error: [ 3676.671641] usb 2-1.1: 1:1: cannot get freq at ep 0x1 Signed-off-by: Julian Sikorski Cc: Link: https://lore.kernel.org/r/20240123084935.2745-1-belegdol+github@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 07cc6a201579..43c14a81a1e2 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2031,6 +2031,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x0499, 0x1509, /* Steinberg UR22 */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x0499, 0x3108, /* Yamaha YIT-W12TX */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x04d8, 0xfeea, /* Benchmark DAC1 Pre */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x04e8, 0xa051, /* Samsung USBC Headset (AKG) */ From 7267e8dcad6b2f9fce05a6a06335d7040acbc2b6 Mon Sep 17 00:00:00 2001 From: Salvatore Dipietro Date: Fri, 19 Jan 2024 11:01:33 -0800 Subject: [PATCH 230/768] tcp: Add memory barrier to tcp_push() On CPUs with weak memory models, reads and updates performed by tcp_push to the sk variables can get reordered leaving the socket throttled when it should not. The tasklet running tcp_wfree() may also not observe the memory updates in time and will skip flushing any packets throttled by tcp_push(), delaying the sending. This can pathologically cause 40ms extra latency due to bad interactions with delayed acks. Adding a memory barrier in tcp_push removes the bug, similarly to the previous commit bf06200e732d ("tcp: tsq: fix nonagle handling"). smp_mb__after_atomic() is used to not incur in unnecessary overhead on x86 since not affected. Patch has been tested using an AWS c7g.2xlarge instance with Ubuntu 22.04 and Apache Tomcat 9.0.83 running the basic servlet below: import java.io.IOException; import java.io.OutputStreamWriter; import java.io.PrintWriter; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; public class HelloWorldServlet extends HttpServlet { @Override protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setContentType("text/html;charset=utf-8"); OutputStreamWriter osw = new OutputStreamWriter(response.getOutputStream(),"UTF-8"); String s = "a".repeat(3096); osw.write(s,0,s.length()); osw.flush(); } } Load was applied using wrk2 (https://github.com/kinvolk/wrk2) from an AWS c6i.8xlarge instance. Before the patch an additional 40ms latency from P99.99+ values is observed while, with the patch, the extra latency disappears. No patch and tcp_autocorking=1 ./wrk -t32 -c128 -d40s --latency -R10000 http://172.31.60.173:8080/hello/hello ... 50.000% 0.91ms 75.000% 1.13ms 90.000% 1.46ms 99.000% 1.74ms 99.900% 1.89ms 99.990% 41.95ms <<< 40+ ms extra latency 99.999% 48.32ms 100.000% 48.96ms With patch and tcp_autocorking=1 ./wrk -t32 -c128 -d40s --latency -R10000 http://172.31.60.173:8080/hello/hello ... 50.000% 0.90ms 75.000% 1.13ms 90.000% 1.45ms 99.000% 1.72ms 99.900% 1.83ms 99.990% 2.11ms <<< no 40+ ms extra latency 99.999% 2.53ms 100.000% 2.62ms Patch has been also tested on x86 (m7i.2xlarge instance) which it is not affected by this issue and the patch doesn't introduce any additional delay. Fixes: 7aa5470c2c09 ("tcp: tsq: move tsq_flags close to sk_wmem_alloc") Signed-off-by: Salvatore Dipietro Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240119190133.43698-1-dipiets@amazon.com Signed-off-by: Paolo Abeni --- net/ipv4/tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1baa484d2190..a1c6de385cce 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -722,6 +722,7 @@ void tcp_push(struct sock *sk, int flags, int mss_now, if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING); set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); + smp_mb__after_atomic(); } /* It is possible TX completion already happened * before we set TSQ_THROTTLED. From 97de5a15edf2d22184f5ff588656030bbb7fa358 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 19 Jan 2024 19:16:42 -0800 Subject: [PATCH 231/768] selftest: Don't reuse port for SO_INCOMING_CPU test. Jakub reported that ASSERT_EQ(cpu, i) in so_incoming_cpu.c seems to fire somewhat randomly. # # RUN so_incoming_cpu.before_reuseport.test3 ... # # so_incoming_cpu.c:191:test3:Expected cpu (32) == i (0) # # test3: Test terminated by assertion # # FAIL so_incoming_cpu.before_reuseport.test3 # not ok 3 so_incoming_cpu.before_reuseport.test3 When the test failed, not-yet-accepted CLOSE_WAIT sockets received SYN with a "challenging" SEQ number, which was sent from an unexpected CPU that did not create the receiver. The test basically does: 1. for each cpu: 1-1. create a server 1-2. set SO_INCOMING_CPU 2. for each cpu: 2-1. set cpu affinity 2-2. create some clients 2-3. let clients connect() to the server on the same cpu 2-4. close() clients 3. for each server: 3-1. accept() all child sockets 3-2. check if all children have the same SO_INCOMING_CPU with the server The root cause was the close() in 2-4. and net.ipv4.tcp_tw_reuse. In a loop of 2., close() changed the client state to FIN_WAIT_2, and the peer transitioned to CLOSE_WAIT. In another loop of 2., connect() happened to select the same port of the FIN_WAIT_2 socket, and it was reused as the default value of net.ipv4.tcp_tw_reuse is 2. As a result, the new client sent SYN to the CLOSE_WAIT socket from a different CPU, and the receiver's sk_incoming_cpu was overwritten with unexpected CPU ID. Also, the SYN had a different SEQ number, so the CLOSE_WAIT socket responded with Challenge ACK. The new client properly returned RST and effectively killed the CLOSE_WAIT socket. This way, all clients were created successfully, but the error was detected later by 3-2., ASSERT_EQ(cpu, i). To avoid the failure, let's make sure that (i) the number of clients is less than the number of available ports and (ii) such reuse never happens. Fixes: 6df96146b202 ("selftest: Add test for SO_INCOMING_CPU.") Reported-by: Jakub Kicinski Signed-off-by: Kuniyuki Iwashima Tested-by: Jakub Kicinski Link: https://lore.kernel.org/r/20240120031642.67014-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/so_incoming_cpu.c | 68 ++++++++++++++----- 1 file changed, 50 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c index a14818164102..e9fa14e10732 100644 --- a/tools/testing/selftests/net/so_incoming_cpu.c +++ b/tools/testing/selftests/net/so_incoming_cpu.c @@ -3,19 +3,16 @@ #define _GNU_SOURCE #include +#include + #include #include #include #include "../kselftest_harness.h" -#define CLIENT_PER_SERVER 32 /* More sockets, more reliable */ -#define NR_SERVER self->nproc -#define NR_CLIENT (CLIENT_PER_SERVER * NR_SERVER) - FIXTURE(so_incoming_cpu) { - int nproc; int *servers; union { struct sockaddr addr; @@ -56,12 +53,47 @@ FIXTURE_VARIANT_ADD(so_incoming_cpu, after_all_listen) .when_to_set = AFTER_ALL_LISTEN, }; +static void write_sysctl(struct __test_metadata *_metadata, + char *filename, char *string) +{ + int fd, len, ret; + + fd = open(filename, O_WRONLY); + ASSERT_NE(fd, -1); + + len = strlen(string); + ret = write(fd, string, len); + ASSERT_EQ(ret, len); +} + +static void setup_netns(struct __test_metadata *_metadata) +{ + ASSERT_EQ(unshare(CLONE_NEWNET), 0); + ASSERT_EQ(system("ip link set lo up"), 0); + + write_sysctl(_metadata, "/proc/sys/net/ipv4/ip_local_port_range", "10000 60001"); + write_sysctl(_metadata, "/proc/sys/net/ipv4/tcp_tw_reuse", "0"); +} + +#define NR_PORT (60001 - 10000 - 1) +#define NR_CLIENT_PER_SERVER_DEFAULT 32 +static int nr_client_per_server, nr_server, nr_client; + FIXTURE_SETUP(so_incoming_cpu) { - self->nproc = get_nprocs(); - ASSERT_LE(2, self->nproc); + setup_netns(_metadata); - self->servers = malloc(sizeof(int) * NR_SERVER); + nr_server = get_nprocs(); + ASSERT_LE(2, nr_server); + + if (NR_CLIENT_PER_SERVER_DEFAULT * nr_server < NR_PORT) + nr_client_per_server = NR_CLIENT_PER_SERVER_DEFAULT; + else + nr_client_per_server = NR_PORT / nr_server; + + nr_client = nr_client_per_server * nr_server; + + self->servers = malloc(sizeof(int) * nr_server); ASSERT_NE(self->servers, NULL); self->in_addr.sin_family = AF_INET; @@ -74,7 +106,7 @@ FIXTURE_TEARDOWN(so_incoming_cpu) { int i; - for (i = 0; i < NR_SERVER; i++) + for (i = 0; i < nr_server; i++) close(self->servers[i]); free(self->servers); @@ -110,10 +142,10 @@ int create_server(struct __test_metadata *_metadata, if (variant->when_to_set == BEFORE_LISTEN) set_so_incoming_cpu(_metadata, fd, cpu); - /* We don't use CLIENT_PER_SERVER here not to block + /* We don't use nr_client_per_server here not to block * this test at connect() if SO_INCOMING_CPU is broken. */ - ret = listen(fd, NR_CLIENT); + ret = listen(fd, nr_client); ASSERT_EQ(ret, 0); if (variant->when_to_set == AFTER_LISTEN) @@ -128,7 +160,7 @@ void create_servers(struct __test_metadata *_metadata, { int i, ret; - for (i = 0; i < NR_SERVER; i++) { + for (i = 0; i < nr_server; i++) { self->servers[i] = create_server(_metadata, self, variant, i); if (i == 0) { @@ -138,7 +170,7 @@ void create_servers(struct __test_metadata *_metadata, } if (variant->when_to_set == AFTER_ALL_LISTEN) { - for (i = 0; i < NR_SERVER; i++) + for (i = 0; i < nr_server; i++) set_so_incoming_cpu(_metadata, self->servers[i], i); } } @@ -149,7 +181,7 @@ void create_clients(struct __test_metadata *_metadata, cpu_set_t cpu_set; int i, j, fd, ret; - for (i = 0; i < NR_SERVER; i++) { + for (i = 0; i < nr_server; i++) { CPU_ZERO(&cpu_set); CPU_SET(i, &cpu_set); @@ -162,7 +194,7 @@ void create_clients(struct __test_metadata *_metadata, ret = sched_setaffinity(0, sizeof(cpu_set), &cpu_set); ASSERT_EQ(ret, 0); - for (j = 0; j < CLIENT_PER_SERVER; j++) { + for (j = 0; j < nr_client_per_server; j++) { fd = socket(AF_INET, SOCK_STREAM, 0); ASSERT_NE(fd, -1); @@ -180,8 +212,8 @@ void verify_incoming_cpu(struct __test_metadata *_metadata, int i, j, fd, cpu, ret, total = 0; socklen_t len = sizeof(int); - for (i = 0; i < NR_SERVER; i++) { - for (j = 0; j < CLIENT_PER_SERVER; j++) { + for (i = 0; i < nr_server; i++) { + for (j = 0; j < nr_client_per_server; j++) { /* If we see -EAGAIN here, SO_INCOMING_CPU is broken */ fd = accept(self->servers[i], &self->addr, &self->addrlen); ASSERT_NE(fd, -1); @@ -195,7 +227,7 @@ void verify_incoming_cpu(struct __test_metadata *_metadata, } } - ASSERT_EQ(total, NR_CLIENT); + ASSERT_EQ(total, nr_client); TH_LOG("SO_INCOMING_CPU is very likely to be " "working correctly with %d sockets.", total); } From 3e4147f33f8b647775357bae0248b9a2aeebfcd2 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 4 Jan 2024 21:11:37 +0100 Subject: [PATCH 232/768] x86/CPU/AMD: Add X86_FEATURE_ZEN5 Add a synthetic feature flag for Zen5. Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240104201138.5072-1-bp@alien8.de --- arch/x86/include/asm/cpufeatures.h | 4 +--- arch/x86/kernel/cpu/amd.c | 25 +++++++++++++++++++++---- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 29cb275a219d..fdf723b6f6d0 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -81,10 +81,8 @@ #define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ #define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ #define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ - -/* CPU types for specific tunings: */ #define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -/* FREE, was #define X86_FEATURE_K7 ( 3*32+ 5) "" Athlon */ +#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* "" CPU based on Zen5 microarchitecture */ #define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ #define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 9f42d1c59e09..bc49e3b3aae0 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -538,7 +538,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) /* Figure out Zen generations: */ switch (c->x86) { - case 0x17: { + case 0x17: switch (c->x86_model) { case 0x00 ... 0x2f: case 0x50 ... 0x5f: @@ -554,8 +554,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) goto warn; } break; - } - case 0x19: { + + case 0x19: switch (c->x86_model) { case 0x00 ... 0x0f: case 0x20 ... 0x5f: @@ -569,7 +569,17 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) goto warn; } break; - } + + case 0x1a: + switch (c->x86_model) { + case 0x00 ... 0x0f: + setup_force_cpu_cap(X86_FEATURE_ZEN5); + break; + default: + goto warn; + } + break; + default: break; } @@ -1039,6 +1049,11 @@ static void init_amd_zen4(struct cpuinfo_x86 *c) msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT); } +static void init_amd_zen5(struct cpuinfo_x86 *c) +{ + init_amd_zen_common(); +} + static void init_amd(struct cpuinfo_x86 *c) { u64 vm_cr; @@ -1084,6 +1099,8 @@ static void init_amd(struct cpuinfo_x86 *c) init_amd_zen3(c); else if (boot_cpu_has(X86_FEATURE_ZEN4)) init_amd_zen4(c); + else if (boot_cpu_has(X86_FEATURE_ZEN5)) + init_amd_zen5(c); /* * Enable workaround for FXSAVE leak on CPUs From 090e3bec01763e415bccae445f5bfe3d0c61b629 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 17 Jan 2024 11:18:44 -0800 Subject: [PATCH 233/768] x86/cpu: Add model number for Intel Clearwater Forest processor Server product based on the Atom Darkmont core. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240117191844.56180-1-tony.luck@intel.com --- arch/x86/include/asm/intel-family.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 197316121f04..b65e9c46b922 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -162,6 +162,8 @@ #define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */ #define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */ +#define INTEL_FAM6_ATOM_DARKMONT_X 0xDD /* Clearwater Forest */ + /* Xeon Phi */ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ From 234ec0b6034b16869d45128b8cd2dc6ffe596f04 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 22 Jan 2024 09:18:07 +0800 Subject: [PATCH 234/768] netlink: fix potential sleeping issue in mqueue_flush_file I analyze the potential sleeping issue of the following processes: Thread A Thread B ... netlink_create //ref = 1 do_mq_notify ... sock = netlink_getsockbyfilp ... //ref = 2 info->notify_sock = sock; ... ... netlink_sendmsg ... skb = netlink_alloc_large_skb //skb->head is vmalloced ... netlink_unicast ... sk = netlink_getsockbyportid //ref = 3 ... netlink_sendskb ... __netlink_sendskb ... skb_queue_tail //put skb to sk_receive_queue ... sock_put //ref = 2 ... ... ... netlink_release ... deferred_put_nlk_sk //ref = 1 mqueue_flush_file spin_lock remove_notification netlink_sendskb sock_put //ref = 0 sk_free ... __sk_destruct netlink_sock_destruct skb_queue_purge //get skb from sk_receive_queue ... __skb_queue_purge_reason kfree_skb_reason __kfree_skb ... skb_release_all skb_release_head_state netlink_skb_destructor vfree(skb->head) //sleeping while holding spinlock In netlink_sendmsg, if the memory pointed to by skb->head is allocated by vmalloc, and is put to sk_receive_queue queue, also the skb is not freed. When the mqueue executes flush, the sleeping bug will occur. Use vfree_atomic instead of vfree in netlink_skb_destructor to solve the issue. Fixes: c05cdb1b864f ("netlink: allow large data transfers from user-space") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20240122011807.2110357-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni --- net/netlink/af_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4ed8ffd58ff3..9c962347cf85 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -374,7 +374,7 @@ static void netlink_skb_destructor(struct sk_buff *skb) if (is_vmalloc_addr(skb->head)) { if (!skb->cloned || !atomic_dec_return(&(skb_shinfo(skb)->dataref))) - vfree(skb->head); + vfree_atomic(skb->head); skb->head = NULL; } From b6eda11c44dc89a681e1c105f0f4660e69b1e183 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 19 Jan 2024 14:07:14 +0800 Subject: [PATCH 235/768] HID: nvidia-shield: Add missing null pointer checks to LED initialization devm_kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. [jkosina@suse.com: tweak changelog a bit] Signed-off-by: Kunwu Chan Reviewed-by: Rahul Rameshbabu Signed-off-by: Jiri Kosina --- drivers/hid/hid-nvidia-shield.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-nvidia-shield.c b/drivers/hid/hid-nvidia-shield.c index 82d0a77359c4..58b15750dbb0 100644 --- a/drivers/hid/hid-nvidia-shield.c +++ b/drivers/hid/hid-nvidia-shield.c @@ -800,6 +800,8 @@ static inline int thunderstrike_led_create(struct thunderstrike *ts) led->name = devm_kasprintf(&ts->base.hdev->dev, GFP_KERNEL, "thunderstrike%d:blue:led", ts->id); + if (!led->name) + return -ENOMEM; led->max_brightness = 1; led->flags = LED_CORE_SUSPENDRESUME | LED_RETAIN_AT_SHUTDOWN; led->brightness_get = &thunderstrike_led_get_brightness; @@ -831,6 +833,8 @@ static inline int thunderstrike_psy_create(struct shield_device *shield_dev) shield_dev->battery_dev.desc.name = devm_kasprintf(&ts->base.hdev->dev, GFP_KERNEL, "thunderstrike_%d", ts->id); + if (!shield_dev->battery_dev.desc.name) + return -ENOMEM; shield_dev->battery_dev.psy = power_supply_register( &hdev->dev, &shield_dev->battery_dev.desc, &psy_cfg); From 26dd6a5667f500c5d991f90a9ac5998a71afaf5c Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 15 Jan 2024 12:50:51 +0800 Subject: [PATCH 236/768] HID: i2c-hid: Skip SET_POWER SLEEP for Cirque touchpad on system suspend There's a Cirque touchpad that wakes system up without anything touched the touchpad. The input report is empty when this happens. The reason is stated in HID over I2C spec, 7.2.8.2: "If the DEVICE wishes to wake the HOST from its low power state, it can issue a wake by asserting the interrupt." This is fine if OS can put system back to suspend by identifying input wakeup count stays the same on resume, like Chrome OS Dark Resume [0]. But for regular distro such policy is lacking. Though the change doesn't bring any impact on power consumption for touchpad is minimal, other i2c-hid device may depends on SLEEP control power. So use a quirk to limit the change scope. [0] https://chromium.googlesource.com/chromiumos/platform2/+/HEAD/power_manager/docs/dark_resume.md Signed-off-by: Kai-Heng Feng Reviewed-by: Douglas Anderson Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/i2c-hid/i2c-hid-core.c | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index fb30e228d35f..828a5c022c64 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -298,6 +298,9 @@ #define USB_VENDOR_ID_CIDC 0x1677 +#define I2C_VENDOR_ID_CIRQUE 0x0488 +#define I2C_PRODUCT_ID_CIRQUE_1063 0x1063 + #define USB_VENDOR_ID_CJTOUCH 0x24b8 #define USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0020 0x0020 #define USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0040 0x0040 diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 90f316ae9819..2df1ab3c31cc 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -49,6 +49,7 @@ #define I2C_HID_QUIRK_RESET_ON_RESUME BIT(2) #define I2C_HID_QUIRK_BAD_INPUT_SIZE BIT(3) #define I2C_HID_QUIRK_NO_WAKEUP_AFTER_RESET BIT(4) +#define I2C_HID_QUIRK_NO_SLEEP_ON_SUSPEND BIT(5) /* Command opcodes */ #define I2C_HID_OPCODE_RESET 0x01 @@ -131,6 +132,8 @@ static const struct i2c_hid_quirks { I2C_HID_QUIRK_RESET_ON_RESUME }, { USB_VENDOR_ID_ITE, I2C_DEVICE_ID_ITE_LENOVO_LEGION_Y720, I2C_HID_QUIRK_BAD_INPUT_SIZE }, + { I2C_VENDOR_ID_CIRQUE, I2C_PRODUCT_ID_CIRQUE_1063, + I2C_HID_QUIRK_NO_SLEEP_ON_SUSPEND }, /* * Sending the wakeup after reset actually break ELAN touchscreen controller */ @@ -956,7 +959,8 @@ static int i2c_hid_core_suspend(struct i2c_hid *ihid, bool force_poweroff) return ret; /* Save some power */ - i2c_hid_set_power(ihid, I2C_HID_PWR_SLEEP); + if (!(ihid->quirks & I2C_HID_QUIRK_NO_SLEEP_ON_SUSPEND)) + i2c_hid_set_power(ihid, I2C_HID_PWR_SLEEP); disable_irq(client->irq); From 420332b94119cdc7db4477cc88484691cb92ae71 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 20 Jan 2024 12:18:39 +0200 Subject: [PATCH 237/768] ovl: mark xwhiteouts directory with overlay.opaque='x' An opaque directory cannot have xwhiteouts, so instead of marking an xwhiteouts directory with a new xattr, overload overlay.opaque xattr for marking both opaque dir ('y') and xwhiteouts dir ('x'). This is more efficient as the overlay.opaque xattr is checked during lookup of directory anyway. This also prevents unnecessary checking the xattr when reading a directory without xwhiteouts, i.e. most of the time. Note that the xwhiteouts marker is not checked on the upper layer and on the last layer in lowerstack, where xwhiteouts are not expected. Fixes: bc8df7a3dc03 ("ovl: Add an alternative type of whiteout") Cc: # v6.7 Reviewed-by: Alexander Larsson Tested-by: Alexander Larsson Signed-off-by: Amir Goldstein --- Documentation/filesystems/overlayfs.rst | 16 ++++++-- fs/overlayfs/namei.c | 43 ++++++++++++-------- fs/overlayfs/overlayfs.h | 23 ++++++++--- fs/overlayfs/ovl_entry.h | 4 +- fs/overlayfs/readdir.c | 7 ++-- fs/overlayfs/super.c | 15 +++++++ fs/overlayfs/util.c | 53 +++++++++++++++---------- 7 files changed, 110 insertions(+), 51 deletions(-) diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst index 1c244866041a..165514401441 100644 --- a/Documentation/filesystems/overlayfs.rst +++ b/Documentation/filesystems/overlayfs.rst @@ -145,7 +145,9 @@ filesystem, an overlay filesystem needs to record in the upper filesystem that files have been removed. This is done using whiteouts and opaque directories (non-directories are always opaque). -A whiteout is created as a character device with 0/0 device number. +A whiteout is created as a character device with 0/0 device number or +as a zero-size regular file with the xattr "trusted.overlay.whiteout". + When a whiteout is found in the upper level of a merged directory, any matching name in the lower level is ignored, and the whiteout itself is also hidden. @@ -154,6 +156,13 @@ A directory is made opaque by setting the xattr "trusted.overlay.opaque" to "y". Where the upper filesystem contains an opaque directory, any directory in the lower filesystem with the same name is ignored. +An opaque directory should not conntain any whiteouts, because they do not +serve any purpose. A merge directory containing regular files with the xattr +"trusted.overlay.whiteout", should be additionally marked by setting the xattr +"trusted.overlay.opaque" to "x" on the merge directory itself. +This is needed to avoid the overhead of checking the "trusted.overlay.whiteout" +on all entries during readdir in the common case. + readdir ------- @@ -534,8 +543,9 @@ A lower dir with a regular whiteout will always be handled by the overlayfs mount, so to support storing an effective whiteout file in an overlayfs mount an alternative form of whiteout is supported. This form is a regular, zero-size file with the "overlay.whiteout" xattr set, inside a directory with the -"overlay.whiteouts" xattr set. Such whiteouts are never created by overlayfs, -but can be used by userspace tools (like containers) that generate lower layers. +"overlay.opaque" xattr set to "x" (see `whiteouts and opaque directories`_). +These alternative whiteouts are never created by overlayfs, but can be used by +userspace tools (like containers) that generate lower layers. These alternative whiteouts can be escaped using the standard xattr escape mechanism in order to properly nest to any depth. diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 984ffdaeed6c..5764f91d283e 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -18,10 +18,11 @@ struct ovl_lookup_data { struct super_block *sb; - struct vfsmount *mnt; + const struct ovl_layer *layer; struct qstr name; bool is_dir; bool opaque; + bool xwhiteouts; bool stop; bool last; char *redirect; @@ -201,17 +202,13 @@ struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh, return real; } -static bool ovl_is_opaquedir(struct ovl_fs *ofs, const struct path *path) -{ - return ovl_path_check_dir_xattr(ofs, path, OVL_XATTR_OPAQUE); -} - static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d, const char *name, struct dentry *base, int len, bool drop_negative) { - struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->mnt), name, base, len); + struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->layer->mnt), name, + base, len); if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) { if (drop_negative && ret->d_lockref.count == 1) { @@ -232,10 +229,13 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, size_t prelen, const char *post, struct dentry **ret, bool drop_negative) { + struct ovl_fs *ofs = OVL_FS(d->sb); struct dentry *this; struct path path; int err; bool last_element = !post[0]; + bool is_upper = d->layer->idx == 0; + char val; this = ovl_lookup_positive_unlocked(d, name, base, namelen, drop_negative); if (IS_ERR(this)) { @@ -253,8 +253,8 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, } path.dentry = this; - path.mnt = d->mnt; - if (ovl_path_is_whiteout(OVL_FS(d->sb), &path)) { + path.mnt = d->layer->mnt; + if (ovl_path_is_whiteout(ofs, &path)) { d->stop = d->opaque = true; goto put_and_out; } @@ -272,7 +272,7 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, d->stop = true; goto put_and_out; } - err = ovl_check_metacopy_xattr(OVL_FS(d->sb), &path, NULL); + err = ovl_check_metacopy_xattr(ofs, &path, NULL); if (err < 0) goto out_err; @@ -292,7 +292,12 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, if (d->last) goto out; - if (ovl_is_opaquedir(OVL_FS(d->sb), &path)) { + /* overlay.opaque=x means xwhiteouts directory */ + val = ovl_get_opaquedir_val(ofs, &path); + if (last_element && !is_upper && val == 'x') { + d->xwhiteouts = true; + ovl_layer_set_xwhiteouts(ofs, d->layer); + } else if (val == 'y') { d->stop = true; if (last_element) d->opaque = true; @@ -863,7 +868,8 @@ fail: * Returns next layer in stack starting from top. * Returns -1 if this is the last layer. */ -int ovl_path_next(int idx, struct dentry *dentry, struct path *path) +int ovl_path_next(int idx, struct dentry *dentry, struct path *path, + const struct ovl_layer **layer) { struct ovl_entry *oe = OVL_E(dentry); struct ovl_path *lowerstack = ovl_lowerstack(oe); @@ -871,13 +877,16 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path) BUG_ON(idx < 0); if (idx == 0) { ovl_path_upper(dentry, path); - if (path->dentry) + if (path->dentry) { + *layer = &OVL_FS(dentry->d_sb)->layers[0]; return ovl_numlower(oe) ? 1 : -1; + } idx++; } BUG_ON(idx > ovl_numlower(oe)); path->dentry = lowerstack[idx - 1].dentry; - path->mnt = lowerstack[idx - 1].layer->mnt; + *layer = lowerstack[idx - 1].layer; + path->mnt = (*layer)->mnt; return (idx < ovl_numlower(oe)) ? idx + 1 : -1; } @@ -1055,7 +1064,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, old_cred = ovl_override_creds(dentry->d_sb); upperdir = ovl_dentry_upper(dentry->d_parent); if (upperdir) { - d.mnt = ovl_upper_mnt(ofs); + d.layer = &ofs->layers[0]; err = ovl_lookup_layer(upperdir, &d, &upperdentry, true); if (err) goto out; @@ -1111,7 +1120,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, else if (d.is_dir || !ofs->numdatalayer) d.last = lower.layer->idx == ovl_numlower(roe); - d.mnt = lower.layer->mnt; + d.layer = lower.layer; err = ovl_lookup_layer(lower.dentry, &d, &this, false); if (err) goto out_put; @@ -1278,6 +1287,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, if (upperopaque) ovl_dentry_set_opaque(dentry); + if (d.xwhiteouts) + ovl_dentry_set_xwhiteouts(dentry); if (upperdentry) ovl_dentry_set_upper_alias(dentry); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 5ba11eb43767..ee949f3e7c77 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -50,7 +50,6 @@ enum ovl_xattr { OVL_XATTR_METACOPY, OVL_XATTR_PROTATTR, OVL_XATTR_XWHITEOUT, - OVL_XATTR_XWHITEOUTS, }; enum ovl_inode_flag { @@ -70,6 +69,8 @@ enum ovl_entry_flag { OVL_E_UPPER_ALIAS, OVL_E_OPAQUE, OVL_E_CONNECTED, + /* Lower stack may contain xwhiteout entries */ + OVL_E_XWHITEOUTS, }; enum { @@ -477,6 +478,10 @@ bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry); bool ovl_dentry_is_opaque(struct dentry *dentry); bool ovl_dentry_is_whiteout(struct dentry *dentry); void ovl_dentry_set_opaque(struct dentry *dentry); +bool ovl_dentry_has_xwhiteouts(struct dentry *dentry); +void ovl_dentry_set_xwhiteouts(struct dentry *dentry); +void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs, + const struct ovl_layer *layer); bool ovl_dentry_has_upper_alias(struct dentry *dentry); void ovl_dentry_set_upper_alias(struct dentry *dentry); bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags); @@ -494,11 +499,10 @@ struct file *ovl_path_open(const struct path *path, int flags); int ovl_copy_up_start(struct dentry *dentry, int flags); void ovl_copy_up_end(struct dentry *dentry); bool ovl_already_copied_up(struct dentry *dentry, int flags); -bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path, - enum ovl_xattr ox); +char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path, + enum ovl_xattr ox); bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path); bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path); -bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path); bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs, const struct path *upperpath); @@ -573,7 +577,13 @@ static inline bool ovl_is_impuredir(struct super_block *sb, .mnt = ovl_upper_mnt(ofs), }; - return ovl_path_check_dir_xattr(ofs, &upperpath, OVL_XATTR_IMPURE); + return ovl_get_dir_xattr_val(ofs, &upperpath, OVL_XATTR_IMPURE) == 'y'; +} + +static inline char ovl_get_opaquedir_val(struct ovl_fs *ofs, + const struct path *path) +{ + return ovl_get_dir_xattr_val(ofs, path, OVL_XATTR_OPAQUE); } static inline bool ovl_redirect_follow(struct ovl_fs *ofs) @@ -680,7 +690,8 @@ int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin, struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh); struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, struct dentry *origin, bool verify); -int ovl_path_next(int idx, struct dentry *dentry, struct path *path); +int ovl_path_next(int idx, struct dentry *dentry, struct path *path, + const struct ovl_layer **layer); int ovl_verify_lowerdata(struct dentry *dentry); struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 5fa9c58af65f..cb449ab310a7 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -40,6 +40,8 @@ struct ovl_layer { int idx; /* One fsid per unique underlying sb (upper fsid == 0) */ int fsid; + /* xwhiteouts were found on this layer */ + bool has_xwhiteouts; }; struct ovl_path { @@ -59,7 +61,7 @@ struct ovl_fs { unsigned int numfs; /* Number of data-only lower layers */ unsigned int numdatalayer; - const struct ovl_layer *layers; + struct ovl_layer *layers; struct ovl_sb *fs; /* workbasedir is the path at workdir= mount option */ struct dentry *workbasedir; diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index e71156baa7bc..0ca8af060b0c 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -305,8 +305,6 @@ static inline int ovl_dir_read(const struct path *realpath, if (IS_ERR(realfile)) return PTR_ERR(realfile); - rdd->in_xwhiteouts_dir = rdd->dentry && - ovl_path_check_xwhiteouts_xattr(OVL_FS(rdd->dentry->d_sb), realpath); rdd->first_maybe_whiteout = NULL; rdd->ctx.pos = 0; do { @@ -359,10 +357,13 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list, .is_lowest = false, }; int idx, next; + const struct ovl_layer *layer; for (idx = 0; idx != -1; idx = next) { - next = ovl_path_next(idx, dentry, &realpath); + next = ovl_path_next(idx, dentry, &realpath, &layer); rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry; + rdd.in_xwhiteouts_dir = layer->has_xwhiteouts && + ovl_dentry_has_xwhiteouts(dentry); if (next != -1) { err = ovl_dir_read(&realpath, &rdd); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 4ab66e3d4cff..2eef6c70b2ae 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1249,6 +1249,7 @@ static struct dentry *ovl_get_root(struct super_block *sb, struct ovl_entry *oe) { struct dentry *root; + struct ovl_fs *ofs = OVL_FS(sb); struct ovl_path *lowerpath = ovl_lowerstack(oe); unsigned long ino = d_inode(lowerpath->dentry)->i_ino; int fsid = lowerpath->layer->fsid; @@ -1270,6 +1271,20 @@ static struct dentry *ovl_get_root(struct super_block *sb, ovl_set_flag(OVL_IMPURE, d_inode(root)); } + /* Look for xwhiteouts marker except in the lowermost layer */ + for (int i = 0; i < ovl_numlower(oe) - 1; i++, lowerpath++) { + struct path path = { + .mnt = lowerpath->layer->mnt, + .dentry = lowerpath->dentry, + }; + + /* overlay.opaque=x means xwhiteouts directory */ + if (ovl_get_opaquedir_val(ofs, &path) == 'x') { + ovl_layer_set_xwhiteouts(ofs, lowerpath->layer); + ovl_dentry_set_xwhiteouts(root); + } + } + /* Root is always merge -> can have whiteouts */ ovl_set_flag(OVL_WHITEOUTS, d_inode(root)); ovl_dentry_set_flag(OVL_E_CONNECTED, root); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 0217094c23ea..a8e17f14d7a2 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -461,6 +461,33 @@ void ovl_dentry_set_opaque(struct dentry *dentry) ovl_dentry_set_flag(OVL_E_OPAQUE, dentry); } +bool ovl_dentry_has_xwhiteouts(struct dentry *dentry) +{ + return ovl_dentry_test_flag(OVL_E_XWHITEOUTS, dentry); +} + +void ovl_dentry_set_xwhiteouts(struct dentry *dentry) +{ + ovl_dentry_set_flag(OVL_E_XWHITEOUTS, dentry); +} + +/* + * ovl_layer_set_xwhiteouts() is called before adding the overlay dir + * dentry to dcache, while readdir of that same directory happens after + * the overlay dir dentry is in dcache, so if some cpu observes that + * ovl_dentry_is_xwhiteouts(), it will also observe layer->has_xwhiteouts + * for the layers where xwhiteouts marker was found in that merge dir. + */ +void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs, + const struct ovl_layer *layer) +{ + if (layer->has_xwhiteouts) + return; + + /* Write once to read-mostly layer properties */ + ofs->layers[layer->idx].has_xwhiteouts = true; +} + /* * For hard links and decoded file handles, it's possible for ovl_dentry_upper() * to return positive, while there's no actual upper alias for the inode. @@ -739,19 +766,6 @@ bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path) return res >= 0; } -bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path) -{ - struct dentry *dentry = path->dentry; - int res; - - /* xattr.whiteouts must be a directory */ - if (!d_is_dir(dentry)) - return false; - - res = ovl_path_getxattr(ofs, path, OVL_XATTR_XWHITEOUTS, NULL, 0); - return res >= 0; -} - /* * Load persistent uuid from xattr into s_uuid if found, or store a new * random generated value in s_uuid and in xattr. @@ -811,20 +825,17 @@ fail: return false; } -bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path, - enum ovl_xattr ox) +char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path, + enum ovl_xattr ox) { int res; char val; if (!d_is_dir(path->dentry)) - return false; + return 0; res = ovl_path_getxattr(ofs, path, ox, &val, 1); - if (res == 1 && val == 'y') - return true; - - return false; + return res == 1 ? val : 0; } #define OVL_XATTR_OPAQUE_POSTFIX "opaque" @@ -837,7 +848,6 @@ bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path, #define OVL_XATTR_METACOPY_POSTFIX "metacopy" #define OVL_XATTR_PROTATTR_POSTFIX "protattr" #define OVL_XATTR_XWHITEOUT_POSTFIX "whiteout" -#define OVL_XATTR_XWHITEOUTS_POSTFIX "whiteouts" #define OVL_XATTR_TAB_ENTRY(x) \ [x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \ @@ -854,7 +864,6 @@ const char *const ovl_xattr_table[][2] = { OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY), OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR), OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUT), - OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUTS), }; int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry, From 435e202d645c197dcfd39d7372eb2a56529b6640 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 22 Jan 2024 18:20:01 +0800 Subject: [PATCH 238/768] ipv6: init the accept_queue's spinlocks in inet6_create In commit 198bc90e0e73("tcp: make sure init the accept_queue's spinlocks once"), the spinlocks of accept_queue are initialized only when socket is created in the inet4 scenario. The locks are not initialized when socket is created in the inet6 scenario. The kernel reports the following error: INFO: trying to register non-static key. The code is fine but needs lockdep annotation, or maybe you didn't initialize this object before use? turning off the locking correctness validator. Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Call Trace: dump_stack_lvl (lib/dump_stack.c:107) register_lock_class (kernel/locking/lockdep.c:1289) __lock_acquire (kernel/locking/lockdep.c:5015) lock_acquire.part.0 (kernel/locking/lockdep.c:5756) _raw_spin_lock_bh (kernel/locking/spinlock.c:178) inet_csk_listen_stop (net/ipv4/inet_connection_sock.c:1386) tcp_disconnect (net/ipv4/tcp.c:2981) inet_shutdown (net/ipv4/af_inet.c:935) __sys_shutdown (./include/linux/file.h:32 net/socket.c:2438) __x64_sys_shutdown (net/socket.c:2445) do_syscall_64 (arch/x86/entry/common.c:52) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) RIP: 0033:0x7f52ecd05a3d Code: 5b 41 5c c3 66 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ab a3 0e 00 f7 d8 64 89 01 48 RSP: 002b:00007f52ecf5dde8 EFLAGS: 00000293 ORIG_RAX: 0000000000000030 RAX: ffffffffffffffda RBX: 00007f52ecf5e640 RCX: 00007f52ecd05a3d RDX: 00007f52ecc8b188 RSI: 0000000000000000 RDI: 0000000000000004 RBP: 00007f52ecf5de20 R08: 00007ffdae45c69f R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000293 R12: 00007f52ecf5e640 R13: 0000000000000000 R14: 00007f52ecc8b060 R15: 00007ffdae45c6e0 Fixes: 198bc90e0e73 ("tcp: make sure init the accept_queue's spinlocks once") Signed-off-by: Zhengchao Shao Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240122102001.2851701-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni --- net/ipv6/af_inet6.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 13a1833a4df5..959bfd9f6344 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -199,6 +199,9 @@ lookup_protocol: if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = SK_CAN_REUSE; + if (INET_PROTOSW_ICSK & answer_flags) + inet_init_csk_locks(sk); + inet = inet_sk(sk); inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags); From 574bf7bbe83794a902679846770f75a9b7f28176 Mon Sep 17 00:00:00 2001 From: Kamal Dasu Date: Tue, 9 Jan 2024 16:00:32 -0500 Subject: [PATCH 239/768] spi: bcm-qspi: fix SFDP BFPT read by usig mspi read SFDP read shall use the mspi reads when using the bcm_qspi_exec_mem_op() call. This fixes SFDP parameter page read failures seen with parts that now use SFDP protocol to read the basic flash parameter table. Fixes: 5f195ee7d830 ("spi: bcm-qspi: Implement the spi_mem interface") Signed-off-by: Kamal Dasu Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://msgid.link/r/20240109210033.43249-1-kamal.dasu@broadcom.com Signed-off-by: Mark Brown --- drivers/spi/spi-bcm-qspi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index d96222e6d7d2..cfdaa5eaec76 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include "spi-bcm-qspi.h" @@ -1221,7 +1221,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem, /* non-aligned and very short transfers are handled by MSPI */ if (!IS_ALIGNED((uintptr_t)addr, 4) || !IS_ALIGNED((uintptr_t)buf, 4) || - len < 4) + len < 4 || op->cmd.opcode == SPINOR_OP_RDSFDP) mspi_read = true; if (!has_bspi(qspi) || mspi_read) From e267a5b3ec59ce88d6be21078e2deb807ca3b436 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 10 Jan 2024 09:54:03 +0100 Subject: [PATCH 240/768] spi: spi-imx: Use dev_err_probe for failed DMA channel requests If dma_request_chan() fails, no error is shown nor any information is shown in /sys/kernel/debug/devices_deferred if -EPROBE_DEFER is returned. Use dev_err_probe to fix both problems. Signed-off-by: Alexander Stein Reviewed-by: Francesco Dolcini Link: https://msgid.link/r/20240110085403.457089-1-alexander.stein@ew.tq-group.com Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 272bc871a848..546cdce525fc 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1344,7 +1344,7 @@ static int spi_imx_sdma_init(struct device *dev, struct spi_imx_data *spi_imx, controller->dma_tx = dma_request_chan(dev, "tx"); if (IS_ERR(controller->dma_tx)) { ret = PTR_ERR(controller->dma_tx); - dev_dbg(dev, "can't get the TX DMA channel, error %d!\n", ret); + dev_err_probe(dev, ret, "can't get the TX DMA channel!\n"); controller->dma_tx = NULL; goto err; } @@ -1353,7 +1353,7 @@ static int spi_imx_sdma_init(struct device *dev, struct spi_imx_data *spi_imx, controller->dma_rx = dma_request_chan(dev, "rx"); if (IS_ERR(controller->dma_rx)) { ret = PTR_ERR(controller->dma_rx); - dev_dbg(dev, "can't get the RX DMA channel, error %d\n", ret); + dev_err_probe(dev, ret, "can't get the RX DMA channel!\n"); controller->dma_rx = NULL; goto err; } From 633cd6fe6e1993ba80e0954c2db127a0b1a3e66f Mon Sep 17 00:00:00 2001 From: Amit Kumar Mahapatra Date: Mon, 18 Dec 2023 14:36:52 +0530 Subject: [PATCH 241/768] spi: spi-cadence: Reverse the order of interleaved write and read operations In the existing implementation, when executing interleaved write and read operations in the ISR for a transfer length greater than the FIFO size, the TXFIFO write precedes the RXFIFO read. Consequently, the initially received data in the RXFIFO is pushed out and lost, leading to a failure in data integrity. To address this issue, reverse the order of interleaved operations and conduct the RXFIFO read followed by the TXFIFO write. Fixes: 6afe2ae8dc48 ("spi: spi-cadence: Interleave write of TX and read of RX FIFO") Signed-off-by: Amit Kumar Mahapatra Link: https://msgid.link/r/20231218090652.18403-1-amit.kumar-mahapatra@amd.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index a50eb4db79de..e5140532071d 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -317,6 +317,15 @@ static void cdns_spi_process_fifo(struct cdns_spi *xspi, int ntx, int nrx) xspi->rx_bytes -= nrx; while (ntx || nrx) { + if (nrx) { + u8 data = cdns_spi_read(xspi, CDNS_SPI_RXD); + + if (xspi->rxbuf) + *xspi->rxbuf++ = data; + + nrx--; + } + if (ntx) { if (xspi->txbuf) cdns_spi_write(xspi, CDNS_SPI_TXD, *xspi->txbuf++); @@ -326,14 +335,6 @@ static void cdns_spi_process_fifo(struct cdns_spi *xspi, int ntx, int nrx) ntx--; } - if (nrx) { - u8 data = cdns_spi_read(xspi, CDNS_SPI_RXD); - - if (xspi->rxbuf) - *xspi->rxbuf++ = data; - - nrx--; - } } } From 67794f882adca00d043899ac248bc002751da9f6 Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Tue, 23 Jan 2024 16:46:35 +0300 Subject: [PATCH 242/768] ALSA: usb-audio: Skip setting clock selector for single connections Since commit 086b957cc17f5 ("ALSA: usb-audio: Skip the clock selector inquiry for single connections") we are already skipping clock selector inquiry if only one clock source is connected, but we are still sending a set request. Lets skip that too. This should fix errors when setting a sample rate on devices that don't have any controls present within the clock selector. An example of such device is the new revision of MOTU M Series (07fd:000b): AudioControl Interface Descriptor: bLength 8 bDescriptorType 36 bDescriptorSubtype 11 (CLOCK_SELECTOR) bClockID 1 bNrInPins 1 baCSourceID(0) 2 bmControls 0x00 iClockSelector 0 Perhaps we also should check if clock selectors are readable and writeable like we already do for clock sources, but this is out of scope of this patch. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217601 Signed-off-by: Alexander Tsoy Link: https://lore.kernel.org/r/20240123134635.54026-1-alexander@tsoy.me Signed-off-by: Takashi Iwai --- sound/usb/clock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 33db334e6556..94e4aaeafe58 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -325,7 +325,8 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, visited, validate); if (ret > 0) { /* Skip setting clock selector again for some devices */ - if (chip->quirk_flags & QUIRK_FLAG_SKIP_CLOCK_SELECTOR) + if (chip->quirk_flags & QUIRK_FLAG_SKIP_CLOCK_SELECTOR || + pins == 1) return ret; err = uac_clock_selector_set_val(chip, entity_id, cur); if (err < 0) From 834bf76add3e6168038150f162cbccf1fd492a67 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 22 Jan 2024 15:27:48 -0500 Subject: [PATCH 243/768] eventfs: Save directory inodes in the eventfs_inode structure The eventfs inodes and directories are allocated when referenced. But this leaves the issue of keeping consistent inode numbers and the number is only saved in the inode structure itself. When the inode is no longer referenced, it can be freed. When the file that the inode was representing is referenced again, the inode is once again created, but the inode number needs to be the same as it was before. Just making the inode numbers the same for all files is fine, but that does not work with directories. The find command will check for loops via the inode number and having the same inode number for directories triggers: # find /sys/kernel/tracing find: File system loop detected; '/sys/kernel/debug/tracing/events/initcall/initcall_finish' is part of the same file system loop as '/sys/kernel/debug/tracing/events/initcall'. [..] Linus pointed out that the eventfs_inode structure ends with a single 32bit int, and on 64 bit machines, there's likely a 4 byte hole due to alignment. We can use this hole to store the inode number for the eventfs_inode. All directories in eventfs are represented by an eventfs_inode and that data structure can hold its inode number. That last int was also purposely placed at the end of the structure to prevent holes from within. Now that there's a 4 byte number to hold the inode, both the inode number and the last integer can be moved up in the structure for better cache locality, where the llist and rcu fields can be moved to the end as they are only used when the eventfs_inode is being deleted. Link: https://lore.kernel.org/all/CAMuHMdXKiorg-jiuKoZpfZyDJ3Ynrfb8=X+c7x0Eewxn-YRdCA@mail.gmail.com/ Link: https://lore.kernel.org/linux-trace-kernel/20240122152748.46897388@gandalf.local.home Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Linus Torvalds Reported-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Fixes: 53c41052ba31 ("eventfs: Have the inodes all for files and directories all be the same") Signed-off-by: Steven Rostedt (Google) Reviewed-by: Kees Cook --- fs/tracefs/event_inode.c | 14 +++++++++++--- fs/tracefs/internal.h | 7 ++++--- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 6795fda2af19..6b211522a13e 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -34,7 +34,15 @@ static DEFINE_MUTEX(eventfs_mutex); /* Choose something "unique" ;-) */ #define EVENTFS_FILE_INODE_INO 0x12c4e37 -#define EVENTFS_DIR_INODE_INO 0x134b2f5 + +/* Just try to make something consistent and unique */ +static int eventfs_dir_ino(struct eventfs_inode *ei) +{ + if (!ei->ino) + ei->ino = get_next_ino(); + + return ei->ino; +} /* * The eventfs_inode (ei) itself is protected by SRCU. It is released from @@ -396,7 +404,7 @@ static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent inode->i_fop = &eventfs_file_operations; /* All directories will have the same inode number */ - inode->i_ino = EVENTFS_DIR_INODE_INO; + inode->i_ino = eventfs_dir_ino(ei); ti = get_tracefs(inode); ti->flags |= TRACEFS_EVENT_INODE; @@ -802,7 +810,7 @@ static int eventfs_iterate(struct file *file, struct dir_context *ctx) name = ei_child->name; - ino = EVENTFS_DIR_INODE_INO; + ino = eventfs_dir_ino(ei_child); if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR)) goto out_dec; diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 12b7d0150ae9..45397df9bb65 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -55,6 +55,10 @@ struct eventfs_inode { struct eventfs_attr *entry_attrs; struct eventfs_attr attr; void *data; + unsigned int is_freed:1; + unsigned int is_events:1; + unsigned int nr_entries:30; + unsigned int ino; /* * Union - used for deletion * @llist: for calling dput() if needed after RCU @@ -64,9 +68,6 @@ struct eventfs_inode { struct llist_node llist; struct rcu_head rcu; }; - unsigned int is_freed:1; - unsigned int is_events:1; - unsigned int nr_entries:30; }; static inline struct tracefs_inode *get_tracefs(const struct inode *inode) From a67e1f0bd4564b485e0f0c3ed7f6bf17688be268 Mon Sep 17 00:00:00 2001 From: Romain Naour Date: Tue, 23 Jan 2024 12:14:56 +0100 Subject: [PATCH 244/768] regulator: ti-abb: don't use devm_platform_ioremap_resource_byname for shared interrupt register We can't use devm_platform_ioremap_resource_byname() to remap the interrupt register that can be shared between regulator-abb-{ivahd,dspeve,gpu} drivers instances. The combined helper introduce a call to devm_request_mem_region() that creates a new busy resource region on PRM_IRQSTATUS_MPU register (0x4ae06010). The first devm_request_mem_region() call succeeds for regulator-abb-ivahd but fails for the two other regulator-abb-dspeve and regulator-abb-gpu. # cat /proc/iomem | grep -i 4ae06 4ae06010-4ae06013 : 4ae07e34.regulator-abb-ivahd int-address 4ae06014-4ae06017 : 4ae07ddc.regulator-abb-mpu int-address regulator-abb-dspeve and regulator-abb-gpu are missing due to devm_request_mem_region() failure (EBUSY): [ 1.326660] ti_abb 4ae07e30.regulator-abb-dspeve: can't request region for resource [mem 0x4ae06010-0x4ae06013] [ 1.326660] ti_abb: probe of 4ae07e30.regulator-abb-dspeve failed with error -16 [ 1.327239] ti_abb 4ae07de4.regulator-abb-gpu: can't request region for resource [mem 0x4ae06010-0x4ae06013] [ 1.327270] ti_abb: probe of 4ae07de4.regulator-abb-gpu failed with error -16 >From arm/boot/dts/dra7.dtsi: The abb_mpu is the only instance using its own interrupt register: (0x4ae06014) PRM_IRQSTATUS_MPU_2, ABB_MPU_DONE_ST (bit 7) The other tree instances (abb_ivahd, abb_dspeve, abb_gpu) share PRM_IRQSTATUS_MPU register (0x4ae06010) but use different bits ABB_IVA_DONE_ST (bit 30), ABB_DSPEVE_DONE_ST( bit 29) and ABB_GPU_DONE_ST (but 28). The commit b36c6b1887ff ("regulator: ti-abb: Make use of the helper function devm_ioremap related") overlooked the following comment implicitly explaining why devm_ioremap() is used in this case: /* * We may have shared interrupt register offsets which are * write-1-to-clear between domains ensuring exclusivity. */ Fixes and partially reverts commit b36c6b1887ff ("regulator: ti-abb: Make use of the helper function devm_ioremap related"). Improve the existing comment to avoid further conversion to devm_platform_ioremap_resource_byname(). Fixes: b36c6b1887ff ("regulator: ti-abb: Make use of the helper function devm_ioremap related") Signed-off-by: Romain Naour Reviewed-by: Yoann Congal Link: https://msgid.link/r/20240123111456.739381-1-romain.naour@smile.fr Signed-off-by: Mark Brown --- drivers/regulator/ti-abb-regulator.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/ti-abb-regulator.c b/drivers/regulator/ti-abb-regulator.c index f48214e2c3b4..04133510e5af 100644 --- a/drivers/regulator/ti-abb-regulator.c +++ b/drivers/regulator/ti-abb-regulator.c @@ -726,9 +726,25 @@ static int ti_abb_probe(struct platform_device *pdev) return PTR_ERR(abb->setup_reg); } - abb->int_base = devm_platform_ioremap_resource_byname(pdev, "int-address"); - if (IS_ERR(abb->int_base)) - return PTR_ERR(abb->int_base); + pname = "int-address"; + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, pname); + if (!res) { + dev_err(dev, "Missing '%s' IO resource\n", pname); + return -ENODEV; + } + /* + * The MPU interrupt status register (PRM_IRQSTATUS_MPU) is + * shared between regulator-abb-{ivahd,dspeve,gpu} driver + * instances. Therefore use devm_ioremap() rather than + * devm_platform_ioremap_resource_byname() to avoid busy + * resource region conflicts. + */ + abb->int_base = devm_ioremap(dev, res->start, + resource_size(res)); + if (!abb->int_base) { + dev_err(dev, "Unable to map '%s'\n", pname); + return -ENOMEM; + } /* Map Optional resources */ pname = "efuse-address"; From de8b6e1c231a95abf95ad097b993d34b31458ec9 Mon Sep 17 00:00:00 2001 From: Devyn Liu Date: Tue, 23 Jan 2024 15:11:49 +0800 Subject: [PATCH 245/768] spi: hisi-sfc-v3xx: Return IRQ_NONE if no interrupts were detected Return IRQ_NONE from the interrupt handler when no interrupt was detected. Because an empty interrupt will cause a null pointer error: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 Call trace: complete+0x54/0x100 hisi_sfc_v3xx_isr+0x2c/0x40 [spi_hisi_sfc_v3xx] __handle_irq_event_percpu+0x64/0x1e0 handle_irq_event+0x7c/0x1cc Signed-off-by: Devyn Liu Link: https://msgid.link/r/20240123071149.917678-1-liudingyuan@huawei.com Signed-off-by: Mark Brown --- drivers/spi/spi-hisi-sfc-v3xx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/spi/spi-hisi-sfc-v3xx.c b/drivers/spi/spi-hisi-sfc-v3xx.c index 9d22018f7985..1301d14483d4 100644 --- a/drivers/spi/spi-hisi-sfc-v3xx.c +++ b/drivers/spi/spi-hisi-sfc-v3xx.c @@ -377,6 +377,11 @@ static const struct spi_controller_mem_ops hisi_sfc_v3xx_mem_ops = { static irqreturn_t hisi_sfc_v3xx_isr(int irq, void *data) { struct hisi_sfc_v3xx_host *host = data; + u32 reg; + + reg = readl(host->regbase + HISI_SFC_V3XX_INT_STAT); + if (!reg) + return IRQ_NONE; hisi_sfc_v3xx_disable_int(host); From 13f3956eb5681a4045a8dfdef48df5dc4d9f58a6 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Sun, 21 Jan 2024 21:26:34 +0100 Subject: [PATCH 246/768] block: Fix WARNING in _copy_from_iter Syzkaller reports a warning in _copy_from_iter because an iov_iter is supposedly used in the wrong direction. The reason is that syzcaller managed to generate a request with a transfer direction of SG_DXFER_TO_FROM_DEV. This instructs the kernel to copy user buffers into the kernel, read into the copied buffers and then copy the data back to user space. Thus the iovec is used in both directions. Detect this situation in the block layer and construct a new iterator with the correct direction for the copy-in. Reported-by: syzbot+a532b03fdfee2c137666@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/0000000000009b92c10604d7a5e9@google.com/t/ Reported-by: syzbot+63dec323ac56c28e644f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/0000000000003faaa105f6e7c658@google.com/T/ Signed-off-by: Christian A. Ehrhardt Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240121202634.275068-1-lk@c--e.de Signed-off-by: Jens Axboe --- block/blk-map.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/block/blk-map.c b/block/blk-map.c index 8584babf3ea0..71210cdb3442 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -205,12 +205,19 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, /* * success */ - if ((iov_iter_rw(iter) == WRITE && - (!map_data || !map_data->null_mapped)) || - (map_data && map_data->from_user)) { + if (iov_iter_rw(iter) == WRITE && + (!map_data || !map_data->null_mapped)) { ret = bio_copy_from_iter(bio, iter); if (ret) goto cleanup; + } else if (map_data && map_data->from_user) { + struct iov_iter iter2 = *iter; + + /* This is the copy-in part of SG_DXFER_TO_FROM_DEV. */ + iter2.data_source = ITER_SOURCE; + ret = bio_copy_from_iter(bio, &iter2); + if (ret) + goto cleanup; } else { if (bmd->is_our_pages) zero_fill_bio(bio); From 41353fbad4f551e82c2792f7e82ac225c79cc710 Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Thu, 18 Jan 2024 20:51:45 +0800 Subject: [PATCH 247/768] nvmet: unify aer type enum The host and target use two definition of aer type, unify them into a single one. Signed-off-by: Guixin Liu Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/core.c | 4 ++-- drivers/nvme/target/discovery.c | 2 +- include/linux/nvme.h | 6 ------ 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index d26aa30f8702..fa35e65915f0 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -248,7 +248,7 @@ void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR)) continue; - nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, + nvmet_add_async_event(ctrl, NVME_AER_NOTICE, NVME_AER_NOTICE_NS_CHANGED, NVME_LOG_CHANGED_NS); } @@ -265,7 +265,7 @@ void nvmet_send_ana_event(struct nvmet_subsys *subsys, continue; if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE)) continue; - nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, + nvmet_add_async_event(ctrl, NVME_AER_NOTICE, NVME_AER_NOTICE_ANA, NVME_LOG_ANA); } mutex_unlock(&subsys->lock); diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 668d257fa986..68e82ccc0e4e 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -21,7 +21,7 @@ static void __nvmet_disc_changed(struct nvmet_port *port, if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_DISC_CHANGE)) return; - nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, + nvmet_add_async_event(ctrl, NVME_AER_NOTICE, NVME_AER_NOTICE_DISC_CHANGED, NVME_LOG_DISC); } diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 462c21e0e417..68eff8c86ce3 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -816,12 +816,6 @@ struct nvme_reservation_status_ext { struct nvme_registered_ctrl_ext regctl_eds[]; }; -enum nvme_async_event_type { - NVME_AER_TYPE_ERROR = 0, - NVME_AER_TYPE_SMART = 1, - NVME_AER_TYPE_NOTICE = 2, -}; - /* I/O commands */ enum nvme_opcode { From 5d390df3bdd13d178eb2e02e60e9a480f7103f7b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 23 Jan 2024 13:40:00 +0300 Subject: [PATCH 248/768] smb: client: delete "true", "false" defines Kernel has its own official true/false definitions. The defines aren't even used in this file. Signed-off-by: Alexey Dobriyan Signed-off-by: Steve French --- fs/smb/client/smbencrypt.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/smb/client/smbencrypt.c b/fs/smb/client/smbencrypt.c index f0ce26414f17..1d1ee9f18f37 100644 --- a/fs/smb/client/smbencrypt.c +++ b/fs/smb/client/smbencrypt.c @@ -26,13 +26,6 @@ #include "cifsproto.h" #include "../common/md4.h" -#ifndef false -#define false 0 -#endif -#ifndef true -#define true 1 -#endif - /* following came from the other byteorder.h to avoid include conflicts */ #define CVAL(buf,pos) (((unsigned char *)(buf))[pos]) #define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8) From d2d0223441d3caad65f6978c07869321bce968e0 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Tue, 23 Jan 2024 13:21:58 -0300 Subject: [PATCH 249/768] docs/sphinx: Fix TOC scroll hack for the home page When on the documentation home page, there won't be any ".current" element since no entry from the TOC was selected yet. That results in a javascript error. Fix that by only trying to set the scrollTop if we have matches for current entries. Signed-off-by: Gustavo Sousa Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20240123162157.61819-2-gustavo.sousa@intel.com --- Documentation/sphinx/templates/kernel-toc.html | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/sphinx/templates/kernel-toc.html b/Documentation/sphinx/templates/kernel-toc.html index b58efa99df52..41f1efbe64bb 100644 --- a/Documentation/sphinx/templates/kernel-toc.html +++ b/Documentation/sphinx/templates/kernel-toc.html @@ -12,5 +12,7 @@ From 3f0e4df37a1b505307f61fbfa7b1f9a2fa2c40bc Mon Sep 17 00:00:00 2001 From: Hu Haowen <2023002089@link.tyut.edu.cn> Date: Thu, 18 Jan 2024 17:01:40 +0800 Subject: [PATCH 250/768] docs/accel: correct links to mailing list archives Since the mailing archive list lkml.org is obsolete, change the links into lore.kernel.org's ones. Signed-off-by: Hu Haowen <2023002089@link.tyut.edu.cn> Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20240118090140.4868-1-2023002089@link.tyut.edu.cn --- Documentation/accel/introduction.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/accel/introduction.rst b/Documentation/accel/introduction.rst index 89984dfececf..ae3030136637 100644 --- a/Documentation/accel/introduction.rst +++ b/Documentation/accel/introduction.rst @@ -101,8 +101,8 @@ External References email threads ------------- -* `Initial discussion on the New subsystem for acceleration devices `_ - Oded Gabbay (2022) -* `patch-set to add the new subsystem `_ - Oded Gabbay (2022) +* `Initial discussion on the New subsystem for acceleration devices `_ - Oded Gabbay (2022) +* `patch-set to add the new subsystem `_ - Oded Gabbay (2022) Conference talks ---------------- From ea7dcd8a48ea3b440a7070b1a0f70f757f8ed9a8 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Thu, 11 Jan 2024 09:52:20 +0100 Subject: [PATCH 251/768] doc: admin-guide/kernel-parameters: remove useless comment This comment about DRM drivers has been there since the first git commit. It simply doesn't belong in kernel-parameters; remove it. Signed-off-by: Vegard Nossum Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20240111085220.3693059-1-vegard.nossum@oracle.com --- Documentation/admin-guide/kernel-parameters.rst | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst index 102937bc8443..4410384596a9 100644 --- a/Documentation/admin-guide/kernel-parameters.rst +++ b/Documentation/admin-guide/kernel-parameters.rst @@ -218,8 +218,3 @@ bytes respectively. Such letter suffixes can also be entirely omitted: .. include:: kernel-parameters.txt :literal: - -Todo ----- - - Add more DRM drivers. From d546978e0c07b6333fdbcbd81b2f2e058d4560b5 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 30 Nov 2023 10:55:15 +0100 Subject: [PATCH 252/768] docs: admin-guide: remove obsolete advice related to SLAB allocator Commit 1db9d06aaa55 ("mm/slab: remove CONFIG_SLAB from all Kconfig and Makefile") removes the config SLAB and makes the SLUB allocator the only default allocator in the kernel. Hence, the advice on reducing OS jitter due to kworker kernel threads to build with CONFIG_SLUB instead of CONFIG_SLAB is obsolete. Remove the obsolete advice to build with SLUB instead of SLAB. Signed-off-by: Lukas Bulwahn Acked-by: Vlastimil Babka Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20231130095515.21586-1-lukas.bulwahn@gmail.com --- .../admin-guide/kernel-per-CPU-kthreads.rst | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst index 993c2a05f5ee..b6aeae3327ce 100644 --- a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst +++ b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst @@ -243,13 +243,9 @@ To reduce its OS jitter, do any of the following: 3. Do any of the following needed to avoid jitter that your application cannot tolerate: - a. Build your kernel with CONFIG_SLUB=y rather than - CONFIG_SLAB=y, thus avoiding the slab allocator's periodic - use of each CPU's workqueues to run its cache_reap() - function. - b. Avoid using oprofile, thus avoiding OS jitter from + a. Avoid using oprofile, thus avoiding OS jitter from wq_sync_buffer(). - c. Limit your CPU frequency so that a CPU-frequency + b. Limit your CPU frequency so that a CPU-frequency governor is not required, possibly enlisting the aid of special heatsinks or other cooling technologies. If done correctly, and if you CPU architecture permits, you should @@ -259,7 +255,7 @@ To reduce its OS jitter, do any of the following: WARNING: Please check your CPU specifications to make sure that this is safe on your particular system. - d. As of v3.18, Christoph Lameter's on-demand vmstat workers + c. As of v3.18, Christoph Lameter's on-demand vmstat workers commit prevents OS jitter due to vmstat_update() on CONFIG_SMP=y systems. Before v3.18, is not possible to entirely get rid of the OS jitter, but you can @@ -274,7 +270,7 @@ To reduce its OS jitter, do any of the following: (based on an earlier one from Gilad Ben-Yossef) that reduces or even eliminates vmstat overhead for some workloads at https://lore.kernel.org/r/00000140e9dfd6bd-40db3d4f-c1be-434f-8132-7820f81bb586-000000@email.amazonses.com. - e. If running on high-end powerpc servers, build with + d. If running on high-end powerpc servers, build with CONFIG_PPC_RTAS_DAEMON=n. This prevents the RTAS daemon from running on each CPU every second or so. (This will require editing Kconfig files and will defeat @@ -282,12 +278,12 @@ To reduce its OS jitter, do any of the following: due to the rtas_event_scan() function. WARNING: Please check your CPU specifications to make sure that this is safe on your particular system. - f. If running on Cell Processor, build your kernel with + e. If running on Cell Processor, build your kernel with CBE_CPUFREQ_SPU_GOVERNOR=n to avoid OS jitter from spu_gov_work(). WARNING: Please check your CPU specifications to make sure that this is safe on your particular system. - g. If running on PowerMAC, build your kernel with + f. If running on PowerMAC, build your kernel with CONFIG_PMAC_RACKMETER=n to disable the CPU-meter, avoiding OS jitter from rackmeter_do_timer(). From 1732ebc4a26181c8f116c7639db99754b313edc8 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Tue, 23 Jan 2024 02:32:07 +0000 Subject: [PATCH 253/768] riscv, bpf: Fix unpredictable kernel crash about RV64 struct_ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We encountered a kernel crash triggered by the bpf_tcp_ca testcase as show below: Unable to handle kernel paging request at virtual address ff60000088554500 Oops [#1] ... CPU: 3 PID: 458 Comm: test_progs Tainted: G OE 6.8.0-rc1-kselftest_plain #1 Hardware name: riscv-virtio,qemu (DT) epc : 0xff60000088554500 ra : tcp_ack+0x288/0x1232 epc : ff60000088554500 ra : ffffffff80cc7166 sp : ff2000000117ba50 gp : ffffffff82587b60 tp : ff60000087be0040 t0 : ff60000088554500 t1 : ffffffff801ed24e t2 : 0000000000000000 s0 : ff2000000117bbc0 s1 : 0000000000000500 a0 : ff20000000691000 a1 : 0000000000000018 a2 : 0000000000000001 a3 : ff60000087be03a0 a4 : 0000000000000000 a5 : 0000000000000000 a6 : 0000000000000021 a7 : ffffffff8263f880 s2 : 000000004ac3c13b s3 : 000000004ac3c13a s4 : 0000000000008200 s5 : 0000000000000001 s6 : 0000000000000104 s7 : ff2000000117bb00 s8 : ff600000885544c0 s9 : 0000000000000000 s10: ff60000086ff0b80 s11: 000055557983a9c0 t3 : 0000000000000000 t4 : 000000000000ffc4 t5 : ffffffff8154f170 t6 : 0000000000000030 status: 0000000200000120 badaddr: ff60000088554500 cause: 000000000000000c Code: c796 67d7 0000 0000 0052 0002 c13b 4ac3 0000 0000 (0001) 0000 ---[ end trace 0000000000000000 ]--- The reason is that commit 2cd3e3772e41 ("x86/cfi,bpf: Fix bpf_struct_ops CFI") changes the func_addr of arch_prepare_bpf_trampoline in struct_ops from NULL to non-NULL, while we use func_addr on RV64 to differentiate between struct_ops and regular trampoline. When the struct_ops testcase is triggered, it emits wrong prologue and epilogue, and lead to unpredictable issues. After commit 2cd3e3772e41, we can use BPF_TRAMP_F_INDIRECT to distinguish them as it always be set in struct_ops. Fixes: 2cd3e3772e41 ("x86/cfi,bpf: Fix bpf_struct_ops CFI") Signed-off-by: Pu Lehui Signed-off-by: Daniel Borkmann Tested-by: Björn Töpel Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20240123023207.1917284-1-pulehui@huaweicloud.com --- arch/riscv/net/bpf_jit_comp64.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 58dc64dd94a8..719a97e7edb2 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -795,6 +795,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; + bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT; void *orig_call = func_addr; bool save_ret; u32 insn; @@ -878,7 +879,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, stack_size = round_up(stack_size, 16); - if (func_addr) { + if (!is_struct_ops) { /* For the trampoline called from function entry, * the frame of traced function and the frame of * trampoline need to be considered. @@ -998,7 +999,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx); - if (func_addr) { + if (!is_struct_ops) { /* trampoline called from function entry */ emit_ld(RV_REG_T0, stack_size - 8, RV_REG_SP, ctx); emit_ld(RV_REG_FP, stack_size - 16, RV_REG_SP, ctx); From 16bae3e1377846734ec6b87eee459c0f3551692c Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 23 Jan 2024 16:55:02 -0500 Subject: [PATCH 254/768] io_uring: enable audit and restrict cred override for IORING_OP_FIXED_FD_INSTALL We need to correct some aspects of the IORING_OP_FIXED_FD_INSTALL command to take into account the security implications of making an io_uring-private file descriptor generally accessible to a userspace task. The first change in this patch is to enable auditing of the FD_INSTALL operation as installing a file descriptor into a task's file descriptor table is a security relevant operation and something that admins/users may want to audit. The second change is to disable the io_uring credential override functionality, also known as io_uring "personalities", in the FD_INSTALL command. The credential override in FD_INSTALL is particularly problematic as it affects the credentials used in the security_file_receive() LSM hook. If a task were to request a credential override via REQ_F_CREDS on a FD_INSTALL operation, the LSM would incorrectly check to see if the overridden credentials of the io_uring were able to "receive" the file as opposed to the task's credentials. After discussions upstream, it's difficult to imagine a use case where we would want to allow a credential override on a FD_INSTALL operation so we are simply going to block REQ_F_CREDS on IORING_OP_FIXED_FD_INSTALL operations. Fixes: dc18b89ab113 ("io_uring/openclose: add support for IORING_OP_FIXED_FD_INSTALL") Signed-off-by: Paul Moore Link: https://lore.kernel.org/r/20240123215501.289566-2-paul@paul-moore.com Signed-off-by: Jens Axboe --- io_uring/opdef.c | 1 - io_uring/openclose.c | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 6705634e5f52..b1ee3a9c3807 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -471,7 +471,6 @@ const struct io_issue_def io_issue_defs[] = { }, [IORING_OP_FIXED_FD_INSTALL] = { .needs_file = 1, - .audit_skip = 1, .prep = io_install_fixed_fd_prep, .issue = io_install_fixed_fd, }, diff --git a/io_uring/openclose.c b/io_uring/openclose.c index 0fe0dd305546..e3357dfa14ca 100644 --- a/io_uring/openclose.c +++ b/io_uring/openclose.c @@ -277,6 +277,10 @@ int io_install_fixed_fd_prep(struct io_kiocb *req, const struct io_uring_sqe *sq if (flags & ~IORING_FIXED_FD_NO_CLOEXEC) return -EINVAL; + /* ensure the task's creds are used when installing/receiving fds */ + if (req->flags & REQ_F_CREDS) + return -EPERM; + /* default to O_CLOEXEC, disable if IORING_FIXED_FD_NO_CLOEXEC is set */ ifi = io_kiocb_to_cmd(req, struct io_fixed_install); ifi->o_flags = O_CLOEXEC; From 8deb05c84b63b4fdb8549e08942867a68924a5b8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 23 Jan 2024 15:47:34 -0800 Subject: [PATCH 255/768] smb: Work around Clang __bdos() type confusion Recent versions of Clang gets confused about the possible size of the "user" allocation, and CONFIG_FORTIFY_SOURCE ends up emitting a warning[1]: repro.c:126:4: warning: call to '__write_overflow_field' declared with 'warning' attribute: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] 126 | __write_overflow_field(p_size_field, size); | ^ for this memset(): int len; __le16 *user; ... len = ses->user_name ? strlen(ses->user_name) : 0; user = kmalloc(2 + (len * 2), GFP_KERNEL); ... if (len) { ... } else { memset(user, '\0', 2); } While Clang works on this bug[2], switch to using a direct assignment, which avoids memset() entirely which both simplifies the code and silences the false positive warning. (Making "len" size_t also silences the warning, but the direct assignment seems better.) Reported-by: Nathan Chancellor Closes: https://github.com/ClangBuiltLinux/linux/issues/1966 [1] Link: https://github.com/llvm/llvm-project/issues/77813 [2] Cc: Steve French Cc: Paulo Alcantara Cc: Ronnie Sahlberg Cc: Shyam Prasad N Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: llvm@lists.linux.dev Signed-off-by: Kees Cook Signed-off-by: Steve French --- fs/smb/client/cifsencrypt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index ef4c2e3c9fa6..6322f0f68a17 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -572,7 +572,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, len = cifs_strtoUTF16(user, ses->user_name, len, nls_cp); UniStrupr(user); } else { - memset(user, '\0', 2); + *(u16 *)user = 0; } rc = crypto_shash_update(ses->server->secmech.hmacmd5, From 966cc171c8be4fbeae1bf166d264e0bfb09e141c Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 15 Feb 2022 19:22:18 +0000 Subject: [PATCH 256/768] cifs: Share server EOF pos with netfslib Use cifsi->netfs_ctx.remote_i_size instead of cifsi->server_eof so that netfslib can refer to it to. Signed-off-by: David Howells cc: Shyam Prasad N cc: Rohith Surabattula cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: Steve French --- fs/smb/client/cifsfs.c | 17 ++++++++++++++--- fs/smb/client/cifsglob.h | 1 - fs/smb/client/file.c | 8 ++++---- fs/smb/client/inode.c | 8 +++++--- fs/smb/client/readdir.c | 2 +- fs/smb/client/smb2ops.c | 18 +++++++++++++----- 6 files changed, 37 insertions(+), 17 deletions(-) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index e902de4e475a..2a4a4e3a8751 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -396,7 +396,7 @@ cifs_alloc_inode(struct super_block *sb) spin_lock_init(&cifs_inode->writers_lock); cifs_inode->writers = 0; cifs_inode->netfs.inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ - cifs_inode->server_eof = 0; + cifs_inode->netfs.remote_i_size = 0; cifs_inode->uniqueid = 0; cifs_inode->createtime = 0; cifs_inode->epoch = 0; @@ -1380,6 +1380,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, struct inode *src_inode = file_inode(src_file); struct inode *target_inode = file_inode(dst_file); struct cifsInodeInfo *src_cifsi = CIFS_I(src_inode); + struct cifsInodeInfo *target_cifsi = CIFS_I(target_inode); struct cifsFileInfo *smb_file_src; struct cifsFileInfo *smb_file_target; struct cifs_tcon *src_tcon; @@ -1428,7 +1429,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, * Advance the EOF marker after the flush above to the end of the range * if it's short of that. */ - if (src_cifsi->server_eof < off + len) { + if (src_cifsi->netfs.remote_i_size < off + len) { rc = cifs_precopy_set_eof(src_inode, src_cifsi, src_tcon, xid, off + len); if (rc < 0) goto unlock; @@ -1452,12 +1453,22 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, /* Discard all the folios that overlap the destination region. */ truncate_inode_pages_range(&target_inode->i_data, fstart, fend); + fscache_invalidate(cifs_inode_cookie(target_inode), NULL, + i_size_read(target_inode), 0); + rc = file_modified(dst_file); if (!rc) { rc = target_tcon->ses->server->ops->copychunk_range(xid, smb_file_src, smb_file_target, off, len, destoff); - if (rc > 0 && destoff + rc > i_size_read(target_inode)) + if (rc > 0 && destoff + rc > i_size_read(target_inode)) { truncate_setsize(target_inode, destoff + rc); + netfs_resize_file(&target_cifsi->netfs, + i_size_read(target_inode), true); + fscache_resize_cookie(cifs_inode_cookie(target_inode), + i_size_read(target_inode)); + } + if (rc > 0 && destoff + rc > target_cifsi->netfs.zero_point) + target_cifsi->netfs.zero_point = destoff + rc; } file_accessed(src_file); diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 20036fb16cec..dd12364ef372 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1561,7 +1561,6 @@ struct cifsInodeInfo { spinlock_t writers_lock; unsigned int writers; /* Number of writers on this inode */ unsigned long time; /* jiffies of last update of inode */ - u64 server_eof; /* current file size on server -- protected by i_lock */ u64 uniqueid; /* server inode number */ u64 createtime; /* creation time on server */ __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for this inode */ diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 3a213432775b..70f9e3f12a5c 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -2120,8 +2120,8 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, { loff_t end_of_write = offset + bytes_written; - if (end_of_write > cifsi->server_eof) - cifsi->server_eof = end_of_write; + if (end_of_write > cifsi->netfs.remote_i_size) + netfs_resize_file(&cifsi->netfs, end_of_write, true); } static ssize_t @@ -3247,8 +3247,8 @@ cifs_uncached_writev_complete(struct work_struct *work) spin_lock(&inode->i_lock); cifs_update_eof(cifsi, wdata->offset, wdata->bytes); - if (cifsi->server_eof > inode->i_size) - i_size_write(inode, cifsi->server_eof); + if (cifsi->netfs.remote_i_size > inode->i_size) + i_size_write(inode, cifsi->netfs.remote_i_size); spin_unlock(&inode->i_lock); complete(&wdata->done); diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index f0989484f2c6..d02f8ba29cb5 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -104,7 +104,7 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr) fattr->cf_mtime = timestamp_truncate(fattr->cf_mtime, inode); mtime = inode_get_mtime(inode); if (timespec64_equal(&mtime, &fattr->cf_mtime) && - cifs_i->server_eof == fattr->cf_eof) { + cifs_i->netfs.remote_i_size == fattr->cf_eof) { cifs_dbg(FYI, "%s: inode %llu is unchanged\n", __func__, cifs_i->uniqueid); return; @@ -194,7 +194,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) else clear_bit(CIFS_INO_DELETE_PENDING, &cifs_i->flags); - cifs_i->server_eof = fattr->cf_eof; + cifs_i->netfs.remote_i_size = fattr->cf_eof; /* * Can't safely change the file size here if the client is writing to * it due to potential races. @@ -2858,7 +2858,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, set_size_out: if (rc == 0) { - cifsInode->server_eof = attrs->ia_size; + netfs_resize_file(&cifsInode->netfs, attrs->ia_size, true); cifs_setsize(inode, attrs->ia_size); /* * i_blocks is not related to (i_size / i_blksize), but instead @@ -3011,6 +3011,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) if ((attrs->ia_valid & ATTR_SIZE) && attrs->ia_size != i_size_read(inode)) { truncate_setsize(inode, attrs->ia_size); + netfs_resize_file(&cifsInode->netfs, attrs->ia_size, true); fscache_resize_cookie(cifs_inode_cookie(inode), attrs->ia_size); } @@ -3210,6 +3211,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) if ((attrs->ia_valid & ATTR_SIZE) && attrs->ia_size != i_size_read(inode)) { truncate_setsize(inode, attrs->ia_size); + netfs_resize_file(&cifsInode->netfs, attrs->ia_size, true); fscache_resize_cookie(cifs_inode_cookie(inode), attrs->ia_size); } diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index 94255401b38d..3b1b01d10f7d 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -141,7 +141,7 @@ retry: if (likely(reparse_inode_match(inode, fattr))) { fattr->cf_mode = inode->i_mode; fattr->cf_rdev = inode->i_rdev; - fattr->cf_eof = CIFS_I(inode)->server_eof; + fattr->cf_eof = CIFS_I(inode)->netfs.remote_i_size; fattr->cf_symlink_target = NULL; } else { CIFS_I(inode)->time = 0; diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index d9553c2556a2..27f8caccff7f 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -3213,6 +3213,9 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, cfile->fid.volatile_fid, cfile->pid, new_size); if (rc >= 0) { truncate_setsize(inode, new_size); + netfs_resize_file(&cifsi->netfs, new_size, true); + if (offset < cifsi->netfs.zero_point) + cifsi->netfs.zero_point = offset; fscache_resize_cookie(cifs_inode_cookie(inode), new_size); } } @@ -3436,7 +3439,7 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon, rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, cfile->pid, new_eof); if (rc == 0) { - cifsi->server_eof = new_eof; + netfs_resize_file(&cifsi->netfs, new_eof, true); cifs_setsize(inode, new_eof); cifs_truncate_page(inode->i_mapping, inode->i_size); truncate_setsize(inode, new_eof); @@ -3528,8 +3531,9 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon, int rc; unsigned int xid; struct inode *inode = file_inode(file); - struct cifsFileInfo *cfile = file->private_data; struct cifsInodeInfo *cifsi = CIFS_I(inode); + struct cifsFileInfo *cfile = file->private_data; + struct netfs_inode *ictx = &cifsi->netfs; loff_t old_eof, new_eof; xid = get_xid(); @@ -3549,6 +3553,7 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon, goto out_2; truncate_pagecache_range(inode, off, old_eof); + ictx->zero_point = old_eof; rc = smb2_copychunk_range(xid, cfile, cfile, off + len, old_eof - off - len, off); @@ -3563,9 +3568,10 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon, rc = 0; - cifsi->server_eof = i_size_read(inode) - len; - truncate_setsize(inode, cifsi->server_eof); - fscache_resize_cookie(cifs_inode_cookie(inode), cifsi->server_eof); + truncate_setsize(inode, new_eof); + netfs_resize_file(&cifsi->netfs, new_eof, true); + ictx->zero_point = new_eof; + fscache_resize_cookie(cifs_inode_cookie(inode), new_eof); out_2: filemap_invalidate_unlock(inode->i_mapping); out: @@ -3581,6 +3587,7 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon, unsigned int xid; struct cifsFileInfo *cfile = file->private_data; struct inode *inode = file_inode(file); + struct cifsInodeInfo *cifsi = CIFS_I(inode); __u64 count, old_eof, new_eof; xid = get_xid(); @@ -3608,6 +3615,7 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon, goto out_2; truncate_setsize(inode, new_eof); + netfs_resize_file(&cifsi->netfs, i_size_read(inode), true); fscache_resize_cookie(cifs_inode_cookie(inode), i_size_read(inode)); rc = smb2_copychunk_range(xid, cfile, cfile, off, count, off + len); From fc43a8ac396d302ced1e991e4913827cf72c8eb9 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Sun, 21 Jan 2024 03:32:43 +0000 Subject: [PATCH 257/768] cifs: cifs_pick_channel should try selecting active channels cifs_pick_channel today just selects a channel based on the policy of least loaded channel. However, it does not take into account if the channel needs reconnect. As a result, we can have failures in send that can be completely avoided. This change doesn't make a channel a candidate for this selection if it needs reconnect. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/transport.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 4f717ad7c21b..8695c9961f5a 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -1026,6 +1026,9 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) if (!server || server->terminate) continue; + if (CIFS_CHAN_NEEDS_RECONNECT(ses, i)) + continue; + /* * strictly speaking, we should pick up req_lock to read * server->in_flight. But it shouldn't matter much here if we From 3222bc997a24821ea4f96d1a9108dafeadc00cfb Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Thu, 18 Jan 2024 11:18:06 -0800 Subject: [PATCH 258/768] Revert "net: macsec: use skb_ensure_writable_head_tail to expand the skb" This reverts commit b34ab3527b9622ca4910df24ff5beed5aa66c6b5. Using skb_ensure_writable_head_tail without a call to skb_unshare causes the MACsec stack to operate on the original skb rather than a copy in the macsec_encrypt path. This causes the buffer to be exceeded in space, and leads to warnings generated by skb_put operations. Opting to revert this change since skb_copy_expand is more efficient than skb_ensure_writable_head_tail followed by a call to skb_unshare. Log: ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:2464! invalid opcode: 0000 [#1] SMP KASAN CPU: 21 PID: 61997 Comm: iperf3 Not tainted 6.7.0-rc8_for_upstream_debug_2024_01_07_17_05 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:skb_put+0x113/0x190 Code: 03 0f b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 04 84 d2 75 70 3b 9d bc 00 00 00 77 0e 48 83 c4 08 4c 89 e8 5b 5d 41 5d c3 <0f> 0b 4c 8b 6c 24 20 89 74 24 04 e8 6d b7 f0 fe 8b 74 24 04 48 c7 RSP: 0018:ffff8882694e7278 EFLAGS: 00010202 RAX: 0000000000000025 RBX: 0000000000000100 RCX: 0000000000000001 RDX: 0000000000000000 RSI: 0000000000000010 RDI: ffff88816ae0bad4 RBP: ffff88816ae0ba60 R08: 0000000000000004 R09: 0000000000000004 R10: 0000000000000001 R11: 0000000000000001 R12: ffff88811ba5abfa R13: ffff8882bdecc100 R14: ffff88816ae0ba60 R15: ffff8882bdecc0ae FS: 00007fe54df02740(0000) GS:ffff88881f080000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fe54d92e320 CR3: 000000010a345003 CR4: 0000000000370eb0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? die+0x33/0x90 ? skb_put+0x113/0x190 ? do_trap+0x1b4/0x3b0 ? skb_put+0x113/0x190 ? do_error_trap+0xb6/0x180 ? skb_put+0x113/0x190 ? handle_invalid_op+0x2c/0x30 ? skb_put+0x113/0x190 ? exc_invalid_op+0x2b/0x40 ? asm_exc_invalid_op+0x16/0x20 ? skb_put+0x113/0x190 ? macsec_start_xmit+0x4e9/0x21d0 macsec_start_xmit+0x830/0x21d0 ? get_txsa_from_nl+0x400/0x400 ? lock_downgrade+0x690/0x690 ? dev_queue_xmit_nit+0x78b/0xae0 dev_hard_start_xmit+0x151/0x560 __dev_queue_xmit+0x1580/0x28f0 ? check_chain_key+0x1c5/0x490 ? netdev_core_pick_tx+0x2d0/0x2d0 ? __ip_queue_xmit+0x798/0x1e00 ? lock_downgrade+0x690/0x690 ? mark_held_locks+0x9f/0xe0 ip_finish_output2+0x11e4/0x2050 ? ip_mc_finish_output+0x520/0x520 ? ip_fragment.constprop.0+0x230/0x230 ? __ip_queue_xmit+0x798/0x1e00 __ip_queue_xmit+0x798/0x1e00 ? __skb_clone+0x57a/0x760 __tcp_transmit_skb+0x169d/0x3490 ? lock_downgrade+0x690/0x690 ? __tcp_select_window+0x1320/0x1320 ? mark_held_locks+0x9f/0xe0 ? lockdep_hardirqs_on_prepare+0x286/0x400 ? tcp_small_queue_check.isra.0+0x120/0x3d0 tcp_write_xmit+0x12b6/0x7100 ? skb_page_frag_refill+0x1e8/0x460 __tcp_push_pending_frames+0x92/0x320 tcp_sendmsg_locked+0x1ed4/0x3190 ? tcp_sendmsg_fastopen+0x650/0x650 ? tcp_sendmsg+0x1a/0x40 ? mark_held_locks+0x9f/0xe0 ? lockdep_hardirqs_on_prepare+0x286/0x400 tcp_sendmsg+0x28/0x40 ? inet_send_prepare+0x1b0/0x1b0 __sock_sendmsg+0xc5/0x190 sock_write_iter+0x222/0x380 ? __sock_sendmsg+0x190/0x190 ? kfree+0x96/0x130 vfs_write+0x842/0xbd0 ? kernel_write+0x530/0x530 ? __fget_light+0x51/0x220 ? __fget_light+0x51/0x220 ksys_write+0x172/0x1d0 ? update_socket_protocol+0x10/0x10 ? __x64_sys_read+0xb0/0xb0 ? lockdep_hardirqs_on_prepare+0x286/0x400 do_syscall_64+0x40/0xe0 entry_SYSCALL_64_after_hwframe+0x46/0x4e RIP: 0033:0x7fe54d9018b7 Code: 0f 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 RSP: 002b:00007ffdbd4191d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000025 RCX: 00007fe54d9018b7 RDX: 0000000000000025 RSI: 0000000000d9859c RDI: 0000000000000004 RBP: 0000000000d9859c R08: 0000000000000004 R09: 0000000000000000 R10: 00007fe54d80afe0 R11: 0000000000000246 R12: 0000000000000004 R13: 0000000000000025 R14: 00007fe54e00ec00 R15: 0000000000d982a0 Modules linked in: 8021q garp mrp iptable_raw bonding vfio_pci rdma_ucm ib_umad mlx5_vfio_pci mlx5_ib vfio_pci_core vfio_iommu_type1 ib_uverbs vfio mlx5_core ip_gre nf_tables ipip tunnel4 ib_ipoib ip6_gre gre ip6_tunnel tunnel6 geneve openvswitch nsh xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay rpcrdma ib_iser libiscsi scsi_transport_iscsi rdma_cm iw_cm ib_cm ib_core zram zsmalloc fuse [last unloaded: ib_uverbs] ---[ end trace 0000000000000000 ]--- Cc: Radu Pirea (NXP OSS) Cc: Sabrina Dubroca Signed-off-by: Rahul Rameshbabu Link: https://lore.kernel.org/r/20240118191811.50271-1-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/macsec.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index e34816638569..7f5426285c61 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -607,11 +607,26 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, return ERR_PTR(-EINVAL); } - ret = skb_ensure_writable_head_tail(skb, dev); - if (unlikely(ret < 0)) { - macsec_txsa_put(tx_sa); - kfree_skb(skb); - return ERR_PTR(ret); + if (unlikely(skb_headroom(skb) < MACSEC_NEEDED_HEADROOM || + skb_tailroom(skb) < MACSEC_NEEDED_TAILROOM)) { + struct sk_buff *nskb = skb_copy_expand(skb, + MACSEC_NEEDED_HEADROOM, + MACSEC_NEEDED_TAILROOM, + GFP_ATOMIC); + if (likely(nskb)) { + consume_skb(skb); + skb = nskb; + } else { + macsec_txsa_put(tx_sa); + kfree_skb(skb); + return ERR_PTR(-ENOMEM); + } + } else { + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) { + macsec_txsa_put(tx_sa); + return ERR_PTR(-ENOMEM); + } } unprotected_len = skb->len; From 6941f67ad37d5465b75b9ffc498fcf6897a3c00e Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 22 Jan 2024 08:20:28 -0800 Subject: [PATCH 259/768] hv_netvsc: Calculate correct ring size when PAGE_SIZE is not 4 Kbytes Current code in netvsc_drv_init() incorrectly assumes that PAGE_SIZE is 4 Kbytes, which is wrong on ARM64 with 16K or 64K page size. As a result, the default VMBus ring buffer size on ARM64 with 64K page size is 8 Mbytes instead of the expected 512 Kbytes. While this doesn't break anything, a typical VM with 8 vCPUs and 8 netvsc channels wastes 120 Mbytes (8 channels * 2 ring buffers/channel * 7.5 Mbytes/ring buffer). Unfortunately, the module parameter specifying the ring buffer size is in units of 4 Kbyte pages. Ideally, it should be in units that are independent of PAGE_SIZE, but backwards compatibility prevents changing that now. Fix this by having netvsc_drv_init() hardcode 4096 instead of using PAGE_SIZE when calculating the ring buffer size in bytes. Also use the VMBUS_RING_SIZE macro to ensure proper alignment when running with page size larger than 4K. Cc: # 5.15.x Fixes: 7aff79e297ee ("Drivers: hv: Enable Hyper-V code to be built on ARM64") Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/20240122162028.348885-1-mhklinux@outlook.com Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/netvsc_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 4406427d4617..273bd8a20122 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -44,7 +44,7 @@ static unsigned int ring_size __ro_after_init = 128; module_param(ring_size, uint, 0444); -MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); +MODULE_PARM_DESC(ring_size, "Ring buffer size (# of 4K pages)"); unsigned int netvsc_ring_bytes __ro_after_init; static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | @@ -2807,7 +2807,7 @@ static int __init netvsc_drv_init(void) pr_info("Increased ring_size to %u (min allowed)\n", ring_size); } - netvsc_ring_bytes = ring_size * PAGE_SIZE; + netvsc_ring_bytes = VMBUS_RING_SIZE(ring_size * 4096); register_netdevice_notifier(&netvsc_netdev_notifier); From 04fe7c5029cbdbcdb28917f09a958d939a8f19f7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 22 Jan 2024 12:35:28 -0800 Subject: [PATCH 260/768] selftests: fill in some missing configs for net We are missing a lot of config options from net selftests, it seems: tun/tap: CONFIG_TUN, CONFIG_MACVLAN, CONFIG_MACVTAP fib_tests: CONFIG_NET_SCH_FQ_CODEL l2tp: CONFIG_L2TP, CONFIG_L2TP_V3, CONFIG_L2TP_IP, CONFIG_L2TP_ETH sctp-vrf: CONFIG_INET_DIAG txtimestamp: CONFIG_NET_CLS_U32 vxlan_mdb: CONFIG_BRIDGE_VLAN_FILTERING gre_gso: CONFIG_NET_IPGRE_DEMUX, CONFIG_IP_GRE, CONFIG_IPV6_GRE srv6_end_dt*_l3vpn: CONFIG_IPV6_SEG6_LWTUNNEL ip_local_port_range: CONFIG_MPTCP fib_test: CONFIG_NET_CLS_BASIC rtnetlink: CONFIG_MACSEC, CONFIG_NET_SCH_HTB, CONFIG_XFRM_INTERFACE CONFIG_NET_IPGRE, CONFIG_BONDING fib_nexthops: CONFIG_MPLS, CONFIG_MPLS_ROUTING vxlan_mdb: CONFIG_NET_ACT_GACT tls: CONFIG_TLS, CONFIG_CRYPTO_CHACHA20POLY1305 psample: CONFIG_PSAMPLE fcnal: CONFIG_TCP_MD5SIG Try to add them in a semi-alphabetical order. Fixes: 62199e3f1658 ("selftests: net: Add VXLAN MDB test") Fixes: c12e0d5f267d ("self-tests: introduce self-tests for RPS default mask") Fixes: 122db5e3634b ("selftests/net: add MPTCP coverage for IP_LOCAL_PORT_RANGE") Link: https://lore.kernel.org/r/20240122203528.672004-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 8da562a9ae87..19ff75051660 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -1,5 +1,6 @@ CONFIG_USER_NS=y CONFIG_NET_NS=y +CONFIG_BONDING=m CONFIG_BPF_SYSCALL=y CONFIG_TEST_BPF=m CONFIG_NUMA=y @@ -14,9 +15,13 @@ CONFIG_VETH=y CONFIG_NET_IPVTI=y CONFIG_IPV6_VTI=y CONFIG_DUMMY=y +CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_BRIDGE=y +CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_VLAN_8021Q=y CONFIG_IFB=y +CONFIG_INET_DIAG=y +CONFIG_IP_GRE=m CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NF_CONNTRACK=m @@ -25,15 +30,36 @@ CONFIG_IP6_NF_IPTABLES=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP6_NF_NAT=m CONFIG_IP_NF_NAT=m +CONFIG_IPV6_GRE=m +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_L2TP_ETH=m +CONFIG_L2TP_IP=m +CONFIG_L2TP=m +CONFIG_L2TP_V3=y +CONFIG_MACSEC=m +CONFIG_MACVLAN=y +CONFIG_MACVTAP=y +CONFIG_MPLS=y +CONFIG_MPTCP=y CONFIG_NF_TABLES=m CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_NAT=m +CONFIG_NET_ACT_GACT=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_U32=m +CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPGRE=m +CONFIG_NET_SCH_FQ_CODEL=m +CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y +CONFIG_PSAMPLE=m +CONFIG_TCP_MD5SIG=y CONFIG_TEST_BLACKHOLE_DEV=m CONFIG_KALLSYMS=y +CONFIG_TLS=m CONFIG_TRACEPOINTS=y CONFIG_NET_DROP_MONITOR=m CONFIG_NETDEVSIM=m @@ -48,7 +74,9 @@ CONFIG_BAREUDP=m CONFIG_IPV6_IOAM6_LWTUNNEL=y CONFIG_CRYPTO_SM4_GENERIC=y CONFIG_AMT=m +CONFIG_TUN=y CONFIG_VXLAN=m CONFIG_IP_SCTP=m CONFIG_NETFILTER_XT_MATCH_POLICY=m CONFIG_CRYPTO_ARIA=y +CONFIG_XFRM_INTERFACE=m From 32f2a0afa95fae0d1ceec2ff06e0e816939964b8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Jan 2024 15:28:43 +0200 Subject: [PATCH 261/768] net/sched: flower: Fix chain template offload When a qdisc is deleted from a net device the stack instructs the underlying driver to remove its flow offload callback from the associated filter block using the 'FLOW_BLOCK_UNBIND' command. The stack then continues to replay the removal of the filters in the block for this driver by iterating over the chains in the block and invoking the 'reoffload' operation of the classifier being used. In turn, the classifier in its 'reoffload' operation prepares and emits a 'FLOW_CLS_DESTROY' command for each filter. However, the stack does not do the same for chain templates and the underlying driver never receives a 'FLOW_CLS_TMPLT_DESTROY' command when a qdisc is deleted. This results in a memory leak [1] which can be reproduced using [2]. Fix by introducing a 'tmplt_reoffload' operation and have the stack invoke it with the appropriate arguments as part of the replay. Implement the operation in the sole classifier that supports chain templates (flower) by emitting the 'FLOW_CLS_TMPLT_{CREATE,DESTROY}' command based on whether a flow offload callback is being bound to a filter block or being unbound from one. As far as I can tell, the issue happens since cited commit which reordered tcf_block_offload_unbind() before tcf_block_flush_all_chains() in __tcf_block_put(). The order cannot be reversed as the filter block is expected to be freed after flushing all the chains. [1] unreferenced object 0xffff888107e28800 (size 2048): comm "tc", pid 1079, jiffies 4294958525 (age 3074.287s) hex dump (first 32 bytes): b1 a6 7c 11 81 88 ff ff e0 5b b3 10 81 88 ff ff ..|......[...... 01 00 00 00 00 00 00 00 e0 aa b0 84 ff ff ff ff ................ backtrace: [] __kmem_cache_alloc_node+0x1e8/0x320 [] __kmalloc+0x4e/0x90 [] mlxsw_sp_acl_ruleset_get+0x34d/0x7a0 [] mlxsw_sp_flower_tmplt_create+0x145/0x180 [] mlxsw_sp_flow_block_cb+0x1ea/0x280 [] tc_setup_cb_call+0x183/0x340 [] fl_tmplt_create+0x3da/0x4c0 [] tc_ctl_chain+0xa15/0x1170 [] rtnetlink_rcv_msg+0x3cc/0xed0 [] netlink_rcv_skb+0x170/0x440 [] netlink_unicast+0x540/0x820 [] netlink_sendmsg+0x8d8/0xda0 [] ____sys_sendmsg+0x30f/0xa80 [] ___sys_sendmsg+0x13a/0x1e0 [] __sys_sendmsg+0x11c/0x1f0 [] do_syscall_64+0x40/0xe0 unreferenced object 0xffff88816d2c0400 (size 1024): comm "tc", pid 1079, jiffies 4294958525 (age 3074.287s) hex dump (first 32 bytes): 40 00 00 00 00 00 00 00 57 f6 38 be 00 00 00 00 @.......W.8..... 10 04 2c 6d 81 88 ff ff 10 04 2c 6d 81 88 ff ff ..,m......,m.... backtrace: [] __kmem_cache_alloc_node+0x1e8/0x320 [] __kmalloc_node+0x51/0x90 [] kvmalloc_node+0xa6/0x1f0 [] bucket_table_alloc.isra.0+0x83/0x460 [] rhashtable_init+0x43b/0x7c0 [] mlxsw_sp_acl_ruleset_get+0x428/0x7a0 [] mlxsw_sp_flower_tmplt_create+0x145/0x180 [] mlxsw_sp_flow_block_cb+0x1ea/0x280 [] tc_setup_cb_call+0x183/0x340 [] fl_tmplt_create+0x3da/0x4c0 [] tc_ctl_chain+0xa15/0x1170 [] rtnetlink_rcv_msg+0x3cc/0xed0 [] netlink_rcv_skb+0x170/0x440 [] netlink_unicast+0x540/0x820 [] netlink_sendmsg+0x8d8/0xda0 [] ____sys_sendmsg+0x30f/0xa80 [2] # tc qdisc add dev swp1 clsact # tc chain add dev swp1 ingress proto ip chain 1 flower dst_ip 0.0.0.0/32 # tc qdisc del dev swp1 clsact # devlink dev reload pci/0000:06:00.0 Fixes: bbf73830cd48 ("net: sched: traverse chains in block with tcf_get_next_chain()") Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/sch_generic.h | 4 ++++ net/sched/cls_api.c | 9 ++++++++- net/sched/cls_flower.c | 23 +++++++++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index ba3e1b315de8..934fdb977551 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -375,6 +375,10 @@ struct tcf_proto_ops { struct nlattr **tca, struct netlink_ext_ack *extack); void (*tmplt_destroy)(void *tmplt_priv); + void (*tmplt_reoffload)(struct tcf_chain *chain, + bool add, + flow_setup_cb_t *cb, + void *cb_priv); struct tcf_exts * (*get_exts)(const struct tcf_proto *tp, u32 handle); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 92a12e3d0fe6..ff3d396a65aa 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1560,6 +1560,9 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb, chain_prev = chain, chain = __tcf_get_next_chain(block, chain), tcf_chain_put(chain_prev)) { + if (chain->tmplt_ops && add) + chain->tmplt_ops->tmplt_reoffload(chain, true, cb, + cb_priv); for (tp = __tcf_get_next_proto(chain, NULL); tp; tp_prev = tp, tp = __tcf_get_next_proto(chain, tp), @@ -1575,6 +1578,9 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb, goto err_playback_remove; } } + if (chain->tmplt_ops && !add) + chain->tmplt_ops->tmplt_reoffload(chain, false, cb, + cb_priv); } return 0; @@ -3000,7 +3006,8 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, ops = tcf_proto_lookup_ops(name, true, extack); if (IS_ERR(ops)) return PTR_ERR(ops); - if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) { + if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump || + !ops->tmplt_reoffload) { NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier"); module_put(ops->owner); return -EOPNOTSUPP; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index e5314a31f75a..efb9d2811b73 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2721,6 +2721,28 @@ static void fl_tmplt_destroy(void *tmplt_priv) kfree(tmplt); } +static void fl_tmplt_reoffload(struct tcf_chain *chain, bool add, + flow_setup_cb_t *cb, void *cb_priv) +{ + struct fl_flow_tmplt *tmplt = chain->tmplt_priv; + struct flow_cls_offload cls_flower = {}; + + cls_flower.rule = flow_rule_alloc(0); + if (!cls_flower.rule) + return; + + cls_flower.common.chain_index = chain->index; + cls_flower.command = add ? FLOW_CLS_TMPLT_CREATE : + FLOW_CLS_TMPLT_DESTROY; + cls_flower.cookie = (unsigned long) tmplt; + cls_flower.rule->match.dissector = &tmplt->dissector; + cls_flower.rule->match.mask = &tmplt->mask; + cls_flower.rule->match.key = &tmplt->dummy_key; + + cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv); + kfree(cls_flower.rule); +} + static int fl_dump_key_val(struct sk_buff *skb, void *val, int val_type, void *mask, int mask_type, int len) @@ -3628,6 +3650,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .bind_class = fl_bind_class, .tmplt_create = fl_tmplt_create, .tmplt_destroy = fl_tmplt_destroy, + .tmplt_reoffload = fl_tmplt_reoffload, .tmplt_dump = fl_tmplt_dump, .get_exts = fl_get_exts, .owner = THIS_MODULE, From 4373534a9850627a2695317944898eb1283a2db0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 12 Jan 2024 15:00:00 +0800 Subject: [PATCH 262/768] scsi: core: Move scsi_host_busy() out of host lock for waking up EH handler Inside scsi_eh_wakeup(), scsi_host_busy() is called & checked with host lock every time for deciding if error handler kthread needs to be waken up. This can be too heavy in case of recovery, such as: - N hardware queues - queue depth is M for each hardware queue - each scsi_host_busy() iterates over (N * M) tag/requests If recovery is triggered in case that all requests are in-flight, each scsi_eh_wakeup() is strictly serialized, when scsi_eh_wakeup() is called for the last in-flight request, scsi_host_busy() has been run for (N * M - 1) times, and request has been iterated for (N*M - 1) * (N * M) times. If both N and M are big enough, hard lockup can be triggered on acquiring host lock, and it is observed on mpi3mr(128 hw queues, queue depth 8169). Fix the issue by calling scsi_host_busy() outside the host lock. We don't need the host lock for getting busy count because host the lock never covers that. [mkp: Drop unnecessary 'busy' variables pointed out by Bart] Cc: Ewan Milne Fixes: 6eb045e092ef ("scsi: core: avoid host-wide host_busy counter for scsi_mq") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20240112070000.4161982-1-ming.lei@redhat.com Reviewed-by: Ewan D. Milne Reviewed-by: Sathya Prakash Veerichetty Tested-by: Sathya Prakash Veerichetty Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_error.c | 8 ++++---- drivers/scsi/scsi_lib.c | 2 +- drivers/scsi/scsi_priv.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 79da4b1c1df0..4f455884fdc4 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -61,11 +61,11 @@ static int scsi_eh_try_stu(struct scsi_cmnd *scmd); static enum scsi_disposition scsi_try_to_abort_cmd(const struct scsi_host_template *, struct scsi_cmnd *); -void scsi_eh_wakeup(struct Scsi_Host *shost) +void scsi_eh_wakeup(struct Scsi_Host *shost, unsigned int busy) { lockdep_assert_held(shost->host_lock); - if (scsi_host_busy(shost) == shost->host_failed) { + if (busy == shost->host_failed) { trace_scsi_eh_wakeup(shost); wake_up_process(shost->ehandler); SCSI_LOG_ERROR_RECOVERY(5, shost_printk(KERN_INFO, shost, @@ -88,7 +88,7 @@ void scsi_schedule_eh(struct Scsi_Host *shost) if (scsi_host_set_state(shost, SHOST_RECOVERY) == 0 || scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY) == 0) { shost->host_eh_scheduled++; - scsi_eh_wakeup(shost); + scsi_eh_wakeup(shost, scsi_host_busy(shost)); } spin_unlock_irqrestore(shost->host_lock, flags); @@ -286,7 +286,7 @@ static void scsi_eh_inc_host_failed(struct rcu_head *head) spin_lock_irqsave(shost->host_lock, flags); shost->host_failed++; - scsi_eh_wakeup(shost); + scsi_eh_wakeup(shost, scsi_host_busy(shost)); spin_unlock_irqrestore(shost->host_lock, flags); } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index cf3864f72093..1fb80eae9a63 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -280,7 +280,7 @@ static void scsi_dec_host_busy(struct Scsi_Host *shost, struct scsi_cmnd *cmd) if (unlikely(scsi_host_in_recovery(shost))) { spin_lock_irqsave(shost->host_lock, flags); if (shost->host_failed || shost->host_eh_scheduled) - scsi_eh_wakeup(shost); + scsi_eh_wakeup(shost, scsi_host_busy(shost)); spin_unlock_irqrestore(shost->host_lock, flags); } rcu_read_unlock(); diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 3f0dfb97db6b..1fbfe1b52c9f 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -92,7 +92,7 @@ extern void scmd_eh_abort_handler(struct work_struct *work); extern enum blk_eh_timer_return scsi_timeout(struct request *req); extern int scsi_error_handler(void *host); extern enum scsi_disposition scsi_decide_disposition(struct scsi_cmnd *cmd); -extern void scsi_eh_wakeup(struct Scsi_Host *shost); +extern void scsi_eh_wakeup(struct Scsi_Host *shost, unsigned int busy); extern void scsi_eh_scmd_add(struct scsi_cmnd *); void scsi_eh_ready_devs(struct Scsi_Host *shost, struct list_head *work_q, From a68106a6928e0a6680f12bcc7338c0dddcfe4d11 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Sun, 21 Jan 2024 03:32:45 +0000 Subject: [PATCH 263/768] cifs: translate network errors on send to -ECONNABORTED When the network stack returns various errors, we today bubble up the error to the user (in case of soft mounts). This change translates all network errors except -EINTR and -EAGAIN to -ECONNABORTED. A similar approach is taken when we receive network errors when reading from the socket. The change also forces the cifsd thread to reconnect during it's next activity. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/transport.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 8695c9961f5a..e00278fcfa4f 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -400,10 +400,17 @@ unmask: server->conn_id, server->hostname); } smbd_done: - if (rc < 0 && rc != -EINTR) + /* + * there's hardly any use for the layers above to know the + * actual error code here. All they should do at this point is + * to retry the connection and hope it goes away. + */ + if (rc < 0 && rc != -EINTR && rc != -EAGAIN) { cifs_server_dbg(VFS, "Error %d sending data on socket to server\n", rc); - else if (rc > 0) + rc = -ECONNABORTED; + cifs_signal_cifsd_for_reconnect(server, false); + } else if (rc > 0) rc = 0; out: cifs_in_send_dec(server); From 64cc377b7628b81ffdbdb1c6bacfba895dcac3f8 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Sun, 21 Jan 2024 03:32:46 +0000 Subject: [PATCH 264/768] cifs: helper function to check replayable error codes The code to check for replay is not just -EAGAIN. In some cases, the send request or receive response may result in network errors, which we're now mapping to -ECONNABORTED. This change introduces a helper function which checks if the error returned in one of the above two errors. And all checks for replays will now use this helper. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cached_dir.c | 1 + fs/smb/client/cifsglob.h | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 971892620504..5730c65ffb40 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -367,6 +367,7 @@ out: atomic_inc(&tcon->num_remote_opens); } kfree(utf16_path); + return rc; } diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index dd12364ef372..4eb706e27c82 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1830,6 +1830,13 @@ static inline bool is_retryable_error(int error) return false; } +static inline bool is_replayable_error(int error) +{ + if (error == -EAGAIN || error == -ECONNABORTED) + return true; + return false; +} + /* cifs_get_writable_file() flags */ #define FIND_WR_ANY 0 From 4f1fffa2376922f3d1d506e49c0fd445b023a28e Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Sun, 21 Jan 2024 03:32:47 +0000 Subject: [PATCH 265/768] cifs: commands that are retried should have replay flag set MS-SMB2 states that the header flag SMB2_FLAGS_REPLAY_OPERATION needs to be set when a command needs to be retried, so that the server is aware that this is a replay for an operation that appeared before. This can be very important, for example, for state changing operations and opens which get retried following a reconnect; since the client maybe unaware of the status of the previous open. This is particularly important for multichannel scenario, since disconnection of one connection does not mean that the session is lost. The requests can be replayed on another channel. This change also makes use of exponential back-off before replays and also limits the number of retries to "retrans" mount option value. Also, this change does not modify the read/write codepath. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cached_dir.c | 23 +++- fs/smb/client/cifsglob.h | 5 + fs/smb/client/smb2inode.c | 33 ++++- fs/smb/client/smb2ops.c | 123 +++++++++++++++-- fs/smb/client/smb2pdu.c | 264 +++++++++++++++++++++++++++++++++---- fs/smb/client/smb2proto.h | 5 + 6 files changed, 406 insertions(+), 47 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 5730c65ffb40..1daeb5714faa 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -145,21 +145,27 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, struct cached_fid *cfid; struct cached_fids *cfids; const char *npath; + int retries = 0, cur_sleep = 1; if (tcon == NULL || tcon->cfids == NULL || tcon->nohandlecache || is_smb1_server(tcon->ses->server) || (dir_cache_timeout == 0)) return -EOPNOTSUPP; ses = tcon->ses; - server = cifs_pick_channel(ses); cfids = tcon->cfids; - if (!server->ops->new_lease_key) - return -EIO; - if (cifs_sb->root == NULL) return -ENOENT; +replay_again: + /* reinitialize for possible replay */ + flags = 0; + oplock = SMB2_OPLOCK_LEVEL_II; + server = cifs_pick_channel(ses); + + if (!server->ops->new_lease_key) + return -EIO; + utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) return -ENOMEM; @@ -268,6 +274,11 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, */ cfid->has_lease = true; + if (retries) { + smb2_set_replay(server, &rqst[0]); + smb2_set_replay(server, &rqst[1]); + } + rc = compound_send_recv(xid, ses, server, flags, 2, rqst, resp_buftype, rsp_iov); @@ -368,6 +379,10 @@ out: } kfree(utf16_path); + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 4eb706e27c82..ceaf9857885d 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -49,6 +49,11 @@ */ #define CIFS_DEF_ACTIMEO (1 * HZ) +/* + * max sleep time before retry to server + */ +#define CIFS_MAX_SLEEP 2000 + /* * max attribute cache timeout (jiffies) - 2^30 */ diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index a652200540c8..05818cd6d932 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -120,6 +120,14 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, unsigned int size[2]; void *data[2]; int len; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + oplock = SMB2_OPLOCK_LEVEL_NONE; + num_rqst = 0; + server = cifs_pick_channel(ses); vars = kzalloc(sizeof(*vars), GFP_ATOMIC); if (vars == NULL) @@ -127,8 +135,6 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, rqst = &vars->rqst[0]; rsp_iov = &vars->rsp_iov[0]; - server = cifs_pick_channel(ses); - if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; @@ -463,15 +469,24 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, num_rqst++; if (cfile) { + if (retries) + for (i = 1; i < num_rqst - 2; i++) + smb2_set_replay(server, &rqst[i]); + rc = compound_send_recv(xid, ses, server, flags, num_rqst - 2, &rqst[1], &resp_buftype[1], &rsp_iov[1]); - } else + } else { + if (retries) + for (i = 0; i < num_rqst; i++) + smb2_set_replay(server, &rqst[i]); + rc = compound_send_recv(xid, ses, server, flags, num_rqst, rqst, resp_buftype, rsp_iov); + } finished: num_rqst = 0; @@ -620,9 +635,6 @@ finished: } SMB2_close_free(&rqst[num_rqst]); - if (cfile) - cifsFileInfo_put(cfile); - num_cmds += 2; if (out_iov && out_buftype) { memcpy(out_iov, rsp_iov, num_cmds * sizeof(*out_iov)); @@ -632,7 +644,16 @@ finished: for (i = 0; i < num_cmds; i++) free_rsp_buf(resp_buftype[i], rsp_iov[i].iov_base); } + num_cmds -= 2; /* correct num_cmds as there could be a retry */ kfree(vars); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + + if (cfile) + cifsFileInfo_put(cfile); + return rc; } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 27f8caccff7f..83c898afc835 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -1108,7 +1108,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, { struct smb2_compound_vars *vars; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; struct smb_rqst *rqst; struct kvec *rsp_iov; __le16 *utf16_path = NULL; @@ -1124,6 +1124,13 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, struct smb2_file_full_ea_info *ea = NULL; struct smb2_query_info_rsp *rsp; int rc, used_len = 0; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = CIFS_CP_CREATE_CLOSE_OP; + oplock = SMB2_OPLOCK_LEVEL_NONE; + server = cifs_pick_channel(ses); if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; @@ -1244,6 +1251,12 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, goto sea_exit; smb2_set_related(&rqst[2]); + if (retries) { + smb2_set_replay(server, &rqst[0]); + smb2_set_replay(server, &rqst[1]); + smb2_set_replay(server, &rqst[2]); + } + rc = compound_send_recv(xid, ses, server, flags, 3, rqst, resp_buftype, rsp_iov); @@ -1260,6 +1273,11 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, kfree(vars); out_free_path: kfree(utf16_path); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } #endif @@ -1484,7 +1502,7 @@ smb2_ioctl_query_info(const unsigned int xid, struct smb_rqst *rqst; struct kvec *rsp_iov; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; char __user *arg = (char __user *)p; struct smb_query_info qi; struct smb_query_info __user *pqi; @@ -1501,6 +1519,13 @@ smb2_ioctl_query_info(const unsigned int xid, void *data[2]; int create_options = is_dir ? CREATE_NOT_FILE : CREATE_NOT_DIR; void (*free_req1_func)(struct smb_rqst *r); + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = CIFS_CP_CREATE_CLOSE_OP; + oplock = SMB2_OPLOCK_LEVEL_NONE; + server = cifs_pick_channel(ses); vars = kzalloc(sizeof(*vars), GFP_ATOMIC); if (vars == NULL) @@ -1641,6 +1666,12 @@ smb2_ioctl_query_info(const unsigned int xid, goto free_req_1; smb2_set_related(&rqst[2]); + if (retries) { + smb2_set_replay(server, &rqst[0]); + smb2_set_replay(server, &rqst[1]); + smb2_set_replay(server, &rqst[2]); + } + rc = compound_send_recv(xid, ses, server, flags, 3, rqst, resp_buftype, rsp_iov); @@ -1701,6 +1732,11 @@ free_output_buffer: kfree(buffer); free_vars: kfree(vars); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -2227,8 +2263,14 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_open_parms oparms; struct smb2_query_directory_rsp *qd_rsp = NULL; struct smb2_create_rsp *op_rsp = NULL; - struct TCP_Server_Info *server = cifs_pick_channel(tcon->ses); - int retry_count = 0; + struct TCP_Server_Info *server; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + oplock = SMB2_OPLOCK_LEVEL_NONE; + server = cifs_pick_channel(tcon->ses); utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) @@ -2278,14 +2320,15 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, smb2_set_related(&rqst[1]); -again: + if (retries) { + smb2_set_replay(server, &rqst[0]); + smb2_set_replay(server, &rqst[1]); + } + rc = compound_send_recv(xid, tcon->ses, server, flags, 2, rqst, resp_buftype, rsp_iov); - if (rc == -EAGAIN && retry_count++ < 10) - goto again; - /* If the open failed there is nothing to do */ op_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base; if (op_rsp == NULL || op_rsp->hdr.Status != STATUS_SUCCESS) { @@ -2333,6 +2376,11 @@ again: SMB2_query_directory_free(&rqst[1]); free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base); free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -2457,6 +2505,22 @@ smb2_oplock_response(struct cifs_tcon *tcon, __u64 persistent_fid, CIFS_CACHE_READ(cinode) ? 1 : 0); } +void +smb2_set_replay(struct TCP_Server_Info *server, struct smb_rqst *rqst) +{ + struct smb2_hdr *shdr; + + if (server->dialect < SMB30_PROT_ID) + return; + + shdr = (struct smb2_hdr *)(rqst->rq_iov[0].iov_base); + if (shdr == NULL) { + cifs_dbg(FYI, "shdr NULL in smb2_set_related\n"); + return; + } + shdr->Flags |= SMB2_FLAGS_REPLAY_OPERATION; +} + void smb2_set_related(struct smb_rqst *rqst) { @@ -2529,6 +2593,27 @@ smb2_set_next_command(struct cifs_tcon *tcon, struct smb_rqst *rqst) shdr->NextCommand = cpu_to_le32(len); } +/* + * helper function for exponential backoff and check if replayable + */ +bool smb2_should_replay(struct cifs_tcon *tcon, + int *pretries, + int *pcur_sleep) +{ + if (!pretries || !pcur_sleep) + return false; + + if (tcon->retry || (*pretries)++ < tcon->ses->server->retrans) { + msleep(*pcur_sleep); + (*pcur_sleep) = ((*pcur_sleep) << 1); + if ((*pcur_sleep) > CIFS_MAX_SLEEP) + (*pcur_sleep) = CIFS_MAX_SLEEP; + return true; + } + + return false; +} + /* * Passes the query info response back to the caller on success. * Caller need to free this with free_rsp_buf(). @@ -2542,7 +2627,7 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, { struct smb2_compound_vars *vars; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; int flags = CIFS_CP_CREATE_CLOSE_OP; struct smb_rqst *rqst; int resp_buftype[3]; @@ -2553,6 +2638,13 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, int rc; __le16 *utf16_path; struct cached_fid *cfid = NULL; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = CIFS_CP_CREATE_CLOSE_OP; + oplock = SMB2_OPLOCK_LEVEL_NONE; + server = cifs_pick_channel(ses); if (!path) path = ""; @@ -2633,6 +2725,14 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, goto qic_exit; smb2_set_related(&rqst[2]); + if (retries) { + if (!cfid) { + smb2_set_replay(server, &rqst[0]); + smb2_set_replay(server, &rqst[2]); + } + smb2_set_replay(server, &rqst[1]); + } + if (cfid) { rc = compound_send_recv(xid, ses, server, flags, 1, &rqst[1], @@ -2665,6 +2765,11 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, kfree(vars); out_free_path: kfree(utf16_path); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 288199f0b987..f92d77099ab4 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2765,7 +2765,14 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, int flags = 0; unsigned int total_len; __le16 *utf16_path = NULL; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + n_iov = 2; + server = cifs_pick_channel(ses); cifs_dbg(FYI, "mkdir\n"); @@ -2869,6 +2876,10 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, /* no need to inc num_remote_opens because we close it just below */ trace_smb3_posix_mkdir_enter(xid, tcon->tid, ses->Suid, full_path, CREATE_NOT_FILE, FILE_WRITE_ATTRIBUTES); + + if (retries) + smb2_set_replay(server, &rqst); + /* resource #4: response buffer */ rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); @@ -2906,6 +2917,11 @@ err_free_req: cifs_small_buf_release(req); err_free_path: kfree(utf16_path); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -3101,12 +3117,18 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, struct smb2_create_rsp *rsp = NULL; struct cifs_tcon *tcon = oparms->tcon; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; struct kvec iov[SMB2_CREATE_IOV_SIZE]; struct kvec rsp_iov = {NULL, 0}; int resp_buftype = CIFS_NO_BUFFER; int rc = 0; int flags = 0; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + server = cifs_pick_channel(ses); cifs_dbg(FYI, "create/open\n"); if (!ses || !server) @@ -3128,6 +3150,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, trace_smb3_open_enter(xid, tcon->tid, tcon->ses->Suid, oparms->path, oparms->create_options, oparms->desired_access); + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); @@ -3181,6 +3206,11 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, creat_exit: SMB2_open_free(&rqst); free_rsp_buf(resp_buftype, rsp); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -3305,6 +3335,22 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, int resp_buftype = CIFS_NO_BUFFER; int rc = 0; int flags = 0; + int retries = 0, cur_sleep = 1; + + if (!tcon) + return -EIO; + + ses = tcon->ses; + if (!ses) + return -EIO; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + server = cifs_pick_channel(ses); + + if (!server) + return -EIO; cifs_dbg(FYI, "SMB2 IOCTL\n"); @@ -3315,17 +3361,6 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, if (plen) *plen = 0; - if (!tcon) - return -EIO; - - ses = tcon->ses; - if (!ses) - return -EIO; - - server = cifs_pick_channel(ses); - if (!server) - return -EIO; - if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; @@ -3340,6 +3375,9 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, if (rc) goto ioctl_exit; + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); @@ -3409,6 +3447,11 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, ioctl_exit: SMB2_ioctl_free(&rqst); free_rsp_buf(resp_buftype, rsp); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -3480,13 +3523,20 @@ __SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, struct smb_rqst rqst; struct smb2_close_rsp *rsp = NULL; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; struct kvec iov[1]; struct kvec rsp_iov; int resp_buftype = CIFS_NO_BUFFER; int rc = 0; int flags = 0; bool query_attrs = false; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + query_attrs = false; + server = cifs_pick_channel(ses); cifs_dbg(FYI, "Close\n"); @@ -3512,6 +3562,9 @@ __SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, if (rc) goto close_exit; + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); rsp = (struct smb2_close_rsp *)rsp_iov.iov_base; @@ -3545,6 +3598,11 @@ close_exit: cifs_dbg(VFS, "handle cancelled close fid 0x%llx returned error %d\n", persistent_fid, tmp_rc); } + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -3675,12 +3733,19 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, struct TCP_Server_Info *server; int flags = 0; bool allocated = false; + int retries = 0, cur_sleep = 1; cifs_dbg(FYI, "Query Info\n"); if (!ses) return -EIO; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + allocated = false; server = cifs_pick_channel(ses); + if (!server) return -EIO; @@ -3702,6 +3767,9 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, trace_smb3_query_info_enter(xid, persistent_fid, tcon->tid, ses->Suid, info_class, (__u32)info_type); + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base; @@ -3744,6 +3812,11 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, qinf_exit: SMB2_query_info_free(&rqst); free_rsp_buf(resp_buftype, rsp); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -3844,7 +3917,7 @@ SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon, u32 *plen /* returned data len */) { struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; struct smb_rqst rqst; struct smb2_change_notify_rsp *smb_rsp; struct kvec iov[1]; @@ -3852,6 +3925,12 @@ SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon, int resp_buftype = CIFS_NO_BUFFER; int flags = 0; int rc = 0; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + server = cifs_pick_channel(ses); cifs_dbg(FYI, "change notify\n"); if (!ses || !server) @@ -3876,6 +3955,10 @@ SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon, trace_smb3_notify_enter(xid, persistent_fid, tcon->tid, ses->Suid, (u8)watch_tree, completion_filter); + + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); @@ -3910,6 +3993,11 @@ SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon, if (rqst.rq_iov) cifs_small_buf_release(rqst.rq_iov[0].iov_base); /* request */ free_rsp_buf(resp_buftype, rsp_iov.iov_base); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -4152,10 +4240,16 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, struct smb_rqst rqst; struct kvec iov[1]; struct kvec rsp_iov = {NULL, 0}; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; int resp_buftype = CIFS_NO_BUFFER; int flags = 0; int rc = 0; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + server = cifs_pick_channel(ses); cifs_dbg(FYI, "flush\n"); if (!ses || !(ses->server)) @@ -4175,6 +4269,10 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, goto flush_exit; trace_smb3_flush_enter(xid, persistent_fid, tcon->tid, ses->Suid); + + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); @@ -4189,6 +4287,11 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, flush_exit: SMB2_flush_free(&rqst); free_rsp_buf(resp_buftype, rsp_iov.iov_base); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -4826,18 +4929,21 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, int flags = 0; unsigned int total_len; struct TCP_Server_Info *server; + int retries = 0, cur_sleep = 1; +replay_again: + /* reinitialize for possible replay */ + flags = 0; *nbytes = 0; - - if (n_vec < 1) - return rc; - if (!io_parms->server) io_parms->server = cifs_pick_channel(io_parms->tcon->ses); server = io_parms->server; if (server == NULL) return -ECONNABORTED; + if (n_vec < 1) + return rc; + rc = smb2_plain_req_init(SMB2_WRITE, io_parms->tcon, server, (void **) &req, &total_len); if (rc) @@ -4871,6 +4977,9 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, rqst.rq_iov = iov; rqst.rq_nvec = n_vec + 1; + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, io_parms->tcon->ses, server, &rqst, &resp_buftype, flags, &rsp_iov); @@ -4895,6 +5004,11 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, cifs_small_buf_release(req); free_rsp_buf(resp_buftype, rsp); + + if (is_replayable_error(rc) && + smb2_should_replay(io_parms->tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -5206,8 +5320,14 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, struct kvec rsp_iov; int rc = 0; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; int flags = 0; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + server = cifs_pick_channel(ses); if (!ses || !(ses->server)) return -EIO; @@ -5227,6 +5347,9 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, if (rc) goto qdir_exit; + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); rsp = (struct smb2_query_directory_rsp *)rsp_iov.iov_base; @@ -5261,6 +5384,11 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, qdir_exit: SMB2_query_directory_free(&rqst); free_rsp_buf(resp_buftype, rsp); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -5327,8 +5455,14 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, int rc = 0; int resp_buftype; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; int flags = 0; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + server = cifs_pick_channel(ses); if (!ses || !server) return -EIO; @@ -5356,6 +5490,8 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, return rc; } + if (retries) + smb2_set_replay(server, &rqst); rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, @@ -5371,6 +5507,11 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, free_rsp_buf(resp_buftype, rsp); kfree(iov); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -5423,12 +5564,18 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, int rc; struct smb2_oplock_break *req = NULL; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; int flags = CIFS_OBREAK_OP; unsigned int total_len; struct kvec iov[1]; struct kvec rsp_iov; int resp_buf_type; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = CIFS_OBREAK_OP; + server = cifs_pick_channel(ses); cifs_dbg(FYI, "SMB2_oplock_break\n"); rc = smb2_plain_req_init(SMB2_OPLOCK_BREAK, tcon, server, @@ -5453,15 +5600,21 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, rqst.rq_iov = iov; rqst.rq_nvec = 1; + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buf_type, flags, &rsp_iov); cifs_small_buf_release(req); - if (rc) { cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE); cifs_dbg(FYI, "Send error in Oplock Break = %d\n", rc); } + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -5547,9 +5700,15 @@ SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon, int rc = 0; int resp_buftype; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; FILE_SYSTEM_POSIX_INFO *info = NULL; int flags = 0; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + server = cifs_pick_channel(ses); rc = build_qfs_info_req(&iov, tcon, server, FS_POSIX_INFORMATION, @@ -5565,6 +5724,9 @@ SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon, rqst.rq_iov = &iov; rqst.rq_nvec = 1; + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); free_qfs_info_req(&iov); @@ -5584,6 +5746,11 @@ SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon, posix_qfsinf_exit: free_rsp_buf(resp_buftype, rsp_iov.iov_base); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -5598,9 +5765,15 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, int rc = 0; int resp_buftype; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; struct smb2_fs_full_size_info *info = NULL; int flags = 0; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + server = cifs_pick_channel(ses); rc = build_qfs_info_req(&iov, tcon, server, FS_FULL_SIZE_INFORMATION, @@ -5616,6 +5789,9 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, rqst.rq_iov = &iov; rqst.rq_nvec = 1; + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); free_qfs_info_req(&iov); @@ -5635,6 +5811,11 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, qfsinf_exit: free_rsp_buf(resp_buftype, rsp_iov.iov_base); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -5649,9 +5830,15 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon, int rc = 0; int resp_buftype, max_len, min_len; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = cifs_pick_channel(ses); + struct TCP_Server_Info *server; unsigned int rsp_len, offset; int flags = 0; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = 0; + server = cifs_pick_channel(ses); if (level == FS_DEVICE_INFORMATION) { max_len = sizeof(FILE_SYSTEM_DEVICE_INFO); @@ -5683,6 +5870,9 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon, rqst.rq_iov = &iov; rqst.rq_nvec = 1; + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); free_qfs_info_req(&iov); @@ -5720,6 +5910,11 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon, qfsattr_exit: free_rsp_buf(resp_buftype, rsp_iov.iov_base); + + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } @@ -5737,7 +5932,13 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, unsigned int count; int flags = CIFS_NO_RSP_BUF; unsigned int total_len; - struct TCP_Server_Info *server = cifs_pick_channel(tcon->ses); + struct TCP_Server_Info *server; + int retries = 0, cur_sleep = 1; + +replay_again: + /* reinitialize for possible replay */ + flags = CIFS_NO_RSP_BUF; + server = cifs_pick_channel(tcon->ses); cifs_dbg(FYI, "smb2_lockv num lock %d\n", num_lock); @@ -5768,6 +5969,9 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, rqst.rq_iov = iov; rqst.rq_nvec = 2; + if (retries) + smb2_set_replay(server, &rqst); + rc = cifs_send_recv(xid, tcon->ses, server, &rqst, &resp_buf_type, flags, &rsp_iov); @@ -5779,6 +5983,10 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, tcon->ses->Suid, rc); } + if (is_replayable_error(rc) && + smb2_should_replay(tcon, &retries, &cur_sleep)) + goto replay_again; + return rc; } diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 0034b537b0b3..b3069911e9dd 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -122,6 +122,11 @@ extern unsigned long smb_rqst_len(struct TCP_Server_Info *server, extern void smb2_set_next_command(struct cifs_tcon *tcon, struct smb_rqst *rqst); extern void smb2_set_related(struct smb_rqst *rqst); +extern void smb2_set_replay(struct TCP_Server_Info *server, + struct smb_rqst *rqst); +extern bool smb2_should_replay(struct cifs_tcon *tcon, + int *pretries, + int *pcur_sleep); /* * SMB2 Worker functions - most of protocol specific implementation details From 4cdad80261862c8cdcbb5fd232aa713d0bdefe24 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Thu, 18 Jan 2024 09:14:10 +0000 Subject: [PATCH 266/768] cifs: set replay flag for retries of write command Similar to the rest of the commands, this is a change to add replay flags on retry. This one does not add a back-off, considering that we may want to flush a write ASAP to the server. Considering that this will be a flush of cached pages, the retrans value is also not honoured. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 1 + fs/smb/client/file.c | 1 + fs/smb/client/smb2pdu.c | 4 +++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index ceaf9857885d..16befff4cbb4 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1506,6 +1506,7 @@ struct cifs_writedata { struct smbd_mr *mr; #endif struct cifs_credits credits; + bool replay; }; /* diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 70f9e3f12a5c..6f079976f9e7 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -3300,6 +3300,7 @@ cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, if (wdata->cfile->invalidHandle) rc = -EAGAIN; else { + wdata->replay = true; #ifdef CONFIG_CIFS_SMB_DIRECT if (wdata->mr) { wdata->mr->need_invalidate = true; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index f92d77099ab4..4f2cc8373b67 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4771,7 +4771,7 @@ smb2_async_writev(struct cifs_writedata *wdata, struct cifs_io_parms *io_parms = NULL; int credit_request; - if (!wdata->server) + if (!wdata->server || wdata->replay) server = wdata->server = cifs_pick_channel(tcon->ses); /* @@ -4856,6 +4856,8 @@ smb2_async_writev(struct cifs_writedata *wdata, rqst.rq_nvec = 1; rqst.rq_iter = wdata->iter; rqst.rq_iter_size = iov_iter_count(&rqst.rq_iter); + if (wdata->replay) + smb2_set_replay(server, &rqst); #ifdef CONFIG_CIFS_SMB_DIRECT if (wdata->mr) iov[0].iov_len += sizeof(struct smbd_buffer_descriptor_v1); From 993d1c346b1a51ac41b2193609a0d4e51e9748f4 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Tue, 23 Jan 2024 05:07:57 +0000 Subject: [PATCH 267/768] cifs: fix stray unlock in cifs_chan_skip_or_disable A recent change moved the code that decides to skip a channel or disable multichannel entirely, into a helper function. During this, a mutex_unlock of the session_mutex should have been removed. Doing that here. Fixes: f591062bdbf4 ("cifs: handle servers that still advertise multichannel after disabling") Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 4f2cc8373b67..86f6f35b7f32 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -195,7 +195,6 @@ cifs_chan_skip_or_disable(struct cifs_ses *ses, pserver = server->primary_server; cifs_signal_cifsd_for_reconnect(pserver, false); skip_terminate: - mutex_unlock(&ses->session_mutex); return -EHOSTDOWN; } From f4469f3858352ad1197434557150b1f7086762a0 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 22 Jan 2024 09:09:56 -0800 Subject: [PATCH 268/768] scsi: storvsc: Fix ring buffer size calculation Current code uses the specified ring buffer size (either the default of 128 Kbytes or a module parameter specified value) to encompass the one page ring buffer header plus the actual ring itself. When the page size is 4K, carving off one page for the header isn't significant. But when the page size is 64K on ARM64, only half of the default 128 Kbytes is left for the actual ring. While this doesn't break anything, the smaller ring size could be a performance bottleneck. Fix this by applying the VMBUS_RING_SIZE macro to the specified ring buffer size. This macro adds a page for the header, and rounds up the size to a page boundary, using the page size for which the kernel is built. Use this new size for subsequent ring buffer calculations. For example, on ARM64 with 64K page size and the default ring size, this results in the actual ring being 128 Kbytes, which is intended. Cc: stable@vger.kernel.org # 5.15.x Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/20240122170956.496436-1-mhklinux@outlook.com Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index a95936b18f69..7ceb982040a5 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -330,6 +330,7 @@ enum storvsc_request_type { */ static int storvsc_ringbuffer_size = (128 * 1024); +static int aligned_ringbuffer_size; static u32 max_outstanding_req_per_channel; static int storvsc_change_queue_depth(struct scsi_device *sdev, int queue_depth); @@ -687,8 +688,8 @@ static void handle_sc_creation(struct vmbus_channel *new_sc) new_sc->next_request_id_callback = storvsc_next_request_id; ret = vmbus_open(new_sc, - storvsc_ringbuffer_size, - storvsc_ringbuffer_size, + aligned_ringbuffer_size, + aligned_ringbuffer_size, (void *)&props, sizeof(struct vmstorage_channel_properties), storvsc_on_channel_callback, new_sc); @@ -1973,7 +1974,7 @@ static int storvsc_probe(struct hv_device *device, dma_set_min_align_mask(&device->device, HV_HYP_PAGE_SIZE - 1); stor_device->port_number = host->host_no; - ret = storvsc_connect_to_vsp(device, storvsc_ringbuffer_size, is_fc); + ret = storvsc_connect_to_vsp(device, aligned_ringbuffer_size, is_fc); if (ret) goto err_out1; @@ -2164,7 +2165,7 @@ static int storvsc_resume(struct hv_device *hv_dev) { int ret; - ret = storvsc_connect_to_vsp(hv_dev, storvsc_ringbuffer_size, + ret = storvsc_connect_to_vsp(hv_dev, aligned_ringbuffer_size, hv_dev_is_fc(hv_dev)); return ret; } @@ -2198,8 +2199,9 @@ static int __init storvsc_drv_init(void) * the ring buffer indices) by the max request size (which is * vmbus_channel_packet_multipage_buffer + struct vstor_packet + u64) */ + aligned_ringbuffer_size = VMBUS_RING_SIZE(storvsc_ringbuffer_size); max_outstanding_req_per_channel = - ((storvsc_ringbuffer_size - PAGE_SIZE) / + ((aligned_ringbuffer_size - PAGE_SIZE) / ALIGN(MAX_MULTIPAGE_BUFFER_PACKET + sizeof(struct vstor_packet) + sizeof(u64), sizeof(u64))); From f74c35b630d40d414ca6b53f7b1b468dd4abf478 Mon Sep 17 00:00:00 2001 From: Mantas Pucka Date: Tue, 23 Jan 2024 18:09:19 +0200 Subject: [PATCH 269/768] phy: qcom-qmp-usb: fix register offsets for ipq8074/ipq6018 Commit 2be22aae6b18 ("phy: qcom-qmp-usb: populate offsets configuration") introduced register offsets to the driver but for ipq8074/ipq6018 they do not match what was in the old style device tree. Example from old ipq6018.dtsi: <0x00078200 0x130>, /* Tx */ <0x00078400 0x200>, /* Rx */ <0x00078800 0x1f8>, /* PCS */ <0x00078600 0x044>; /* PCS misc */ which would translate to: {.., .pcs = 0x800, .pcs_misc = 0x600, .tx = 0x200, .rx = 0x400 } but was translated to: {.., .pcs = 0x600, .tx = 0x200, .rx = 0x400 } So split usb_offsets and fix USB initialization for IPQ8074 and IPQ6018. Tested only on IPQ6018 Fixes: 2be22aae6b18 ("phy: qcom-qmp-usb: populate offsets configuration") Signed-off-by: Mantas Pucka Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/1706026160-17520-2-git-send-email-mantas@8devices.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 243cc2b9a0fb..05b4c0e67896 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1556,6 +1556,14 @@ static const char * const qmp_phy_vreg_l[] = { "vdda-phy", "vdda-pll", }; +static const struct qmp_usb_offsets qmp_usb_offsets_ipq8074 = { + .serdes = 0, + .pcs = 0x800, + .pcs_misc = 0x600, + .tx = 0x200, + .rx = 0x400, +}; + static const struct qmp_usb_offsets qmp_usb_offsets_ipq9574 = { .serdes = 0, .pcs = 0x800, @@ -1616,7 +1624,7 @@ static const struct qmp_usb_offsets qmp_usb_offsets_v7 = { static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = { .lanes = 1, - .offsets = &qmp_usb_offsets_v3, + .offsets = &qmp_usb_offsets_ipq8074, .serdes_tbl = ipq8074_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(ipq8074_usb3_serdes_tbl), From 62a5df451ab911421da96655fcc4d1e269ff6e2f Mon Sep 17 00:00:00 2001 From: Mantas Pucka Date: Tue, 23 Jan 2024 18:09:20 +0200 Subject: [PATCH 270/768] phy: qcom-qmp-usb: fix serdes init sequence for IPQ6018 Commit 23fd679249df ("phy: qcom-qmp: add USB3 PHY support for IPQ6018") noted that IPQ6018 init is identical to IPQ8074. Yet downstream uses separate serdes init sequence for IPQ6018. Since already existing IPQ9574 serdes init sequence is identical, just reuse it and fix failing USB3 mode in IPQ6018. Fixes: 23fd679249df ("phy: qcom-qmp: add USB3 PHY support for IPQ6018") Signed-off-by: Mantas Pucka Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/1706026160-17520-3-git-send-email-mantas@8devices.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 05b4c0e67896..6621246e4ddf 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1621,6 +1621,24 @@ static const struct qmp_usb_offsets qmp_usb_offsets_v7 = { .rx = 0x1000, }; +static const struct qmp_phy_cfg ipq6018_usb3phy_cfg = { + .lanes = 1, + + .offsets = &qmp_usb_offsets_ipq8074, + + .serdes_tbl = ipq9574_usb3_serdes_tbl, + .serdes_tbl_num = ARRAY_SIZE(ipq9574_usb3_serdes_tbl), + .tx_tbl = msm8996_usb3_tx_tbl, + .tx_tbl_num = ARRAY_SIZE(msm8996_usb3_tx_tbl), + .rx_tbl = ipq8074_usb3_rx_tbl, + .rx_tbl_num = ARRAY_SIZE(ipq8074_usb3_rx_tbl), + .pcs_tbl = ipq8074_usb3_pcs_tbl, + .pcs_tbl_num = ARRAY_SIZE(ipq8074_usb3_pcs_tbl), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = qmp_v3_usb3phy_regs_layout, +}; + static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = { .lanes = 1, @@ -2571,7 +2589,7 @@ err_node_put: static const struct of_device_id qmp_usb_of_match_table[] = { { .compatible = "qcom,ipq6018-qmp-usb3-phy", - .data = &ipq8074_usb3phy_cfg, + .data = &ipq6018_usb3phy_cfg, }, { .compatible = "qcom,ipq8074-qmp-usb3-phy", .data = &ipq8074_usb3phy_cfg, From 249abaf3bf0dd07f5ddebbb2fe2e8f4d675f074e Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 5 Jan 2024 18:37:03 +0900 Subject: [PATCH 271/768] phy: renesas: rcar-gen3-usb2: Fix returning wrong error code Even if device_create_file() returns error code, rcar_gen3_phy_usb2_probe() will return zero because the "ret" is variable shadowing. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202312161021.gOLDl48K-lkp@intel.com/ Fixes: 441a681b8843 ("phy: rcar-gen3-usb2: fix implementation for runtime PM") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240105093703.3359949-1-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Vinod Koul --- drivers/phy/renesas/phy-rcar-gen3-usb2.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c index e53eace7c91e..6387c0d34c55 100644 --- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c +++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c @@ -673,8 +673,6 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) channel->irq = platform_get_irq_optional(pdev, 0); channel->dr_mode = rcar_gen3_get_dr_mode(dev->of_node); if (channel->dr_mode != USB_DR_MODE_UNKNOWN) { - int ret; - channel->is_otg_channel = true; channel->uses_otg_pins = !of_property_read_bool(dev->of_node, "renesas,no-otg-pins"); @@ -738,8 +736,6 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) ret = PTR_ERR(provider); goto error; } else if (channel->is_otg_channel) { - int ret; - ret = device_create_file(dev, &dev_attr_role); if (ret < 0) goto error; From 4e4a1183f281d95fbb6caf28d670775d13264beb Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 8 Jan 2024 21:51:40 +0100 Subject: [PATCH 272/768] phy: lan966x: Add missing serdes mux entry According to the datasheet(Table 3-2: Port configuration) the serdes 2 (SD2) can be configured to run QSGMII or SGMII mode. Already the QSGMII mode is supported in the serdes_muxes list but was missing the SGMII mode. In this mode the serdes is connected to the port 4. Therefore add this entry in the list. Signed-off-by: Horatiu Vultur Link: https://lore.kernel.org/r/20240108205140.1701770-1-horatiu.vultur@microchip.com Signed-off-by: Vinod Koul --- drivers/phy/microchip/lan966x_serdes.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/phy/microchip/lan966x_serdes.c b/drivers/phy/microchip/lan966x_serdes.c index c1a41b6cd29b..b5ac2b7995e7 100644 --- a/drivers/phy/microchip/lan966x_serdes.c +++ b/drivers/phy/microchip/lan966x_serdes.c @@ -96,6 +96,8 @@ static const struct serdes_mux lan966x_serdes_muxes[] = { SERDES_MUX_SGMII(SERDES6G(1), 3, HSIO_HW_CFG_SD6G_1_CFG, HSIO_HW_CFG_SD6G_1_CFG_SET(1)), + SERDES_MUX_SGMII(SERDES6G(2), 4, 0, 0), + SERDES_MUX_RGMII(RGMII(0), 2, HSIO_HW_CFG_RGMII_0_CFG | HSIO_HW_CFG_RGMII_ENA | HSIO_HW_CFG_GMII_ENA, From 0077a504e1a4468669fd2e011108db49133db56e Mon Sep 17 00:00:00 2001 From: Conrad Kostecki Date: Tue, 23 Jan 2024 19:30:02 +0100 Subject: [PATCH 273/768] ahci: asm1166: correct count of reported ports The ASM1166 SATA host controller always reports wrongly, that it has 32 ports. But in reality, it only has six ports. This seems to be a hardware issue, as all tested ASM1166 SATA host controllers reports such high count of ports. Example output: ahci 0000:09:00.0: AHCI 0001.0301 32 slots 32 ports 6 Gbps 0xffffff3f impl SATA mode. By adjusting the port_map, the count is limited to six ports. New output: ahci 0000:09:00.0: AHCI 0001.0301 32 slots 32 ports 6 Gbps 0x3f impl SATA mode. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=211873 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218346 Signed-off-by: Conrad Kostecki Reviewed-by: Hans de Goede Signed-off-by: Niklas Cassel --- drivers/ata/ahci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 3a5f3255f51b..762c5d8b7c1a 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -663,6 +663,11 @@ MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets"); static void ahci_pci_save_initial_config(struct pci_dev *pdev, struct ahci_host_priv *hpriv) { + if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && pdev->device == 0x1166) { + dev_info(&pdev->dev, "ASM1166 has only six ports\n"); + hpriv->saved_port_map = 0x3f; + } + if (pdev->vendor == PCI_VENDOR_ID_JMICRON && pdev->device == 0x2361) { dev_info(&pdev->dev, "JMB361 has only one port\n"); hpriv->saved_port_map = 1; From 25461ce8b3d28528f2c55f5e737e99d2906eda83 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 15 Dec 2023 19:31:14 -0800 Subject: [PATCH 274/768] net/mlx5e: Use the correct lag ports number when creating TISes The cited commit moved the code of mlx5e_create_tises() and changed the loop to create TISes over MLX5_MAX_PORTS constant value, instead of getting the correct lag ports supported by the device, which can cause FW errors on devices with less than MLX5_MAX_PORTS ports. Change that back to mlx5e_get_num_lag_ports(mdev). Also IPoIB interfaces create there own TISes, they don't use the eth TISes, pass a flag to indicate that. This fixes the following errors that might appear in kernel log: mlx5_cmd_out_err:808:(pid 650): CREATE_TIS(0x912) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x595b5d), err(-22) mlx5e_create_mdev_resources:174:(pid 650): alloc tises failed, -22 Fixes: b25bd37c859f ("net/mlx5: Move TISes from priv to mdev HW resources") Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- .../ethernet/mellanox/mlx5/core/en_common.c | 21 ++++++++++++------- .../net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 2 +- include/linux/mlx5/driver.h | 1 + 5 files changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 0bfe1ca8a364..55c6ace0acd5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1124,7 +1124,7 @@ static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev) extern const struct ethtool_ops mlx5e_ethtool_ops; int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey); -int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); +int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev, bool create_tises); void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, bool enable_mc_lb); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 67f546683e85..6ed3a32b7e22 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -95,7 +95,7 @@ static void mlx5e_destroy_tises(struct mlx5_core_dev *mdev, u32 tisn[MLX5_MAX_PO { int tc, i; - for (i = 0; i < MLX5_MAX_PORTS; i++) + for (i = 0; i < mlx5e_get_num_lag_ports(mdev); i++) for (tc = 0; tc < MLX5_MAX_NUM_TC; tc++) mlx5e_destroy_tis(mdev, tisn[i][tc]); } @@ -110,7 +110,7 @@ static int mlx5e_create_tises(struct mlx5_core_dev *mdev, u32 tisn[MLX5_MAX_PORT int tc, i; int err; - for (i = 0; i < MLX5_MAX_PORTS; i++) { + for (i = 0; i < mlx5e_get_num_lag_ports(mdev); i++) { for (tc = 0; tc < MLX5_MAX_NUM_TC; tc++) { u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; void *tisc; @@ -140,7 +140,7 @@ err_close_tises: return err; } -int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) +int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev, bool create_tises) { struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs; int err; @@ -169,11 +169,15 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) goto err_destroy_mkey; } - err = mlx5e_create_tises(mdev, res->tisn); - if (err) { - mlx5_core_err(mdev, "alloc tises failed, %d\n", err); - goto err_destroy_bfreg; + if (create_tises) { + err = mlx5e_create_tises(mdev, res->tisn); + if (err) { + mlx5_core_err(mdev, "alloc tises failed, %d\n", err); + goto err_destroy_bfreg; + } + res->tisn_valid = true; } + INIT_LIST_HEAD(&res->td.tirs_list); mutex_init(&res->td.list_lock); @@ -203,7 +207,8 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) mlx5_crypto_dek_cleanup(mdev->mlx5e_res.dek_priv); mdev->mlx5e_res.dek_priv = NULL; - mlx5e_destroy_tises(mdev, res->tisn); + if (res->tisn_valid) + mlx5e_destroy_tises(mdev, res->tisn); mlx5_free_bfreg(mdev, &res->bfreg); mlx5_core_destroy_mkey(mdev, res->mkey); mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b5f1c4ca38ba..c8e8f512803e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5992,7 +5992,7 @@ static int mlx5e_resume(struct auxiliary_device *adev) if (netif_device_present(netdev)) return 0; - err = mlx5e_create_mdev_resources(mdev); + err = mlx5e_create_mdev_resources(mdev, true); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 58845121954c..d77be1b4dd9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -783,7 +783,7 @@ static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u32 port_num, } /* This should only be called once per mdev */ - err = mlx5e_create_mdev_resources(mdev); + err = mlx5e_create_mdev_resources(mdev, false); if (err) goto destroy_ht; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8c55ff351e5f..41f03b352401 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -681,6 +681,7 @@ struct mlx5e_resources { struct mlx5_sq_bfreg bfreg; #define MLX5_MAX_NUM_TC 8 u32 tisn[MLX5_MAX_PORTS][MLX5_MAX_NUM_TC]; + bool tisn_valid; } hw_objs; struct net_device *uplink_netdev; struct mutex uplink_netdev_lock; From cfbc3608a8c69b48bf238bd68f768192f0238e0d Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 19 Dec 2023 14:46:20 +0200 Subject: [PATCH 275/768] net/mlx5: Fix query of sd_group field The sd_group field moved in the HW spec from the MPIR register to the vport context. Align the query accordingly. Fixes: f5e956329960 ("net/mlx5: Expose Management PCIe Index Register (MPIR)") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/vport.c | 21 +++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 10 ++++++--- include/linux/mlx5/vport.h | 1 + 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 21753f327868..1005bb6935b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -440,6 +440,27 @@ out: } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_system_image_guid); +int mlx5_query_nic_vport_sd_group(struct mlx5_core_dev *mdev, u8 *sd_group) +{ + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + u32 *out; + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, 0, out); + if (err) + goto out; + + *sd_group = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.sd_group); +out: + kvfree(out); + return err; +} + int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid) { u32 *out; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index bf5320b28b8b..37230253f9f1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4036,8 +4036,13 @@ struct mlx5_ifc_nic_vport_context_bits { u8 affiliation_criteria[0x4]; u8 affiliated_vhca_id[0x10]; - u8 reserved_at_60[0xd0]; + u8 reserved_at_60[0xa0]; + u8 reserved_at_100[0x1]; + u8 sd_group[0x3]; + u8 reserved_at_104[0x1c]; + + u8 reserved_at_120[0x10]; u8 mtu[0x10]; u8 system_image_guid[0x40]; @@ -10122,8 +10127,7 @@ struct mlx5_ifc_mpir_reg_bits { u8 reserved_at_20[0x20]; u8 local_port[0x8]; - u8 reserved_at_28[0x15]; - u8 sd_group[0x3]; + u8 reserved_at_28[0x18]; u8 reserved_at_60[0x20]; }; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index fbb9bf447889..c36cc6d82926 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -72,6 +72,7 @@ int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu); int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu); int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, u64 *system_image_guid); +int mlx5_query_nic_vport_sd_group(struct mlx5_core_dev *mdev, u8 *sd_group); int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid); int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, u16 vport, u64 node_guid); From 3876638b2c7ebb2c9d181de1191db0de8cac143a Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Wed, 22 Nov 2023 18:32:11 -0800 Subject: [PATCH 276/768] net/mlx5e: Fix operation precedence bug in port timestamping napi_poll context Indirection (*) is of lower precedence than postfix increment (++). Logic in napi_poll context would cause an out-of-bound read by first increment the pointer address by byte address space and then dereference the value. Rather, the intended logic was to dereference first and then increment the underlying value. Fixes: 92214be5979c ("net/mlx5e: Update doorbell for port timestamping CQ before the software counter") Signed-off-by: Rahul Rameshbabu Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index c206cc0a8483..078f56a3cbb2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -213,7 +213,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq, mlx5e_ptpsq_mark_ts_cqes_undelivered(ptpsq, hwtstamp); out: napi_consume_skb(skb, budget); - md_buff[*md_buff_sz++] = metadata_id; + md_buff[(*md_buff_sz)++] = metadata_id; if (unlikely(mlx5e_ptp_metadata_map_unhealthy(&ptpsq->metadata_map)) && !test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) queue_work(ptpsq->txqsq.priv->wq, &ptpsq->report_unhealthy_work); From c20767fd45e82d64352db82d4fc8d281a43e4783 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 5 Nov 2023 17:09:46 +0200 Subject: [PATCH 277/768] net/mlx5e: Fix inconsistent hairpin RQT sizes The processing of traffic in hairpin queues occurs in HW/FW and does not involve the cpus, hence the upper bound on max num channels does not apply to them. Using this bound for the hairpin RQT max_table_size is wrong. It could be too small, and cause the error below [1]. As the RQT size provided on init does not get modified later, use the same value for both actual and max table sizes. [1] mlx5_core 0000:08:00.1: mlx5_cmd_out_err:805:(pid 1200): CREATE_RQT(0x916) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x538faf), err(-22) Fixes: 74a8dadac17e ("net/mlx5e: Preparations for supporting larger number of channels") Signed-off-by: Tariq Toukan Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 30932c9c9a8f..047b465fc6a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -761,7 +761,7 @@ static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) err = mlx5e_rss_params_indir_init(&indir, mdev, mlx5e_rqt_size(mdev, hp->num_channels), - mlx5e_rqt_size(mdev, priv->max_nch)); + mlx5e_rqt_size(mdev, hp->num_channels)); if (err) return err; From d76fdd31f953ac5046555171620f2562715e9b71 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 10 Nov 2023 11:10:22 +0100 Subject: [PATCH 278/768] net/mlx5e: Fix peer flow lists handling The cited change refactored mlx5e_tc_del_fdb_peer_flow() to only clear DUP flag when list of peer flows has become empty. However, if any concurrent user holds a reference to a peer flow (for example, the neighbor update workqueue task is updating peer flow's parent encap entry concurrently), then the flow will not be removed from the peer list and, consecutively, DUP flag will remain set. Since mlx5e_tc_del_fdb_peers_flow() calls mlx5e_tc_del_fdb_peer_flow() for every possible peer index the algorithm will try to remove the flow from eswitch instances that it has never peered with causing either NULL pointer dereference when trying to remove the flow peer list head of peer_index that was never initialized or a warning if the list debug config is enabled[0]. Fix the issue by always removing the peer flow from the list even when not releasing the last reference to it. [0]: [ 3102.985806] ------------[ cut here ]------------ [ 3102.986223] list_del corruption, ffff888139110698->next is NULL [ 3102.986757] WARNING: CPU: 2 PID: 22109 at lib/list_debug.c:53 __list_del_entry_valid_or_report+0x4f/0xc0 [ 3102.987561] Modules linked in: act_ct nf_flow_table bonding act_tunnel_key act_mirred act_skbedit vxlan cls_matchall nfnetlink_cttimeout act_gact cls_flower sch_ingress mlx5_vdpa vringh vhost_iotlb vdpa openvswitch nsh xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat xt_addrtype xt_conntrack nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcg ss oid_registry overlay rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core mlx5_core [last unloaded: bonding] [ 3102.991113] CPU: 2 PID: 22109 Comm: revalidator28 Not tainted 6.6.0-rc6+ #3 [ 3102.991695] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 3102.992605] RIP: 0010:__list_del_entry_valid_or_report+0x4f/0xc0 [ 3102.993122] Code: 39 c2 74 56 48 8b 32 48 39 fe 75 62 48 8b 51 08 48 39 f2 75 73 b8 01 00 00 00 c3 48 89 fe 48 c7 c7 48 fd 0a 82 e8 41 0b ad ff <0f> 0b 31 c0 c3 48 89 fe 48 c7 c7 70 fd 0a 82 e8 2d 0b ad ff 0f 0b [ 3102.994615] RSP: 0018:ffff8881383e7710 EFLAGS: 00010286 [ 3102.995078] RAX: 0000000000000000 RBX: 0000000000000002 RCX: 0000000000000000 [ 3102.995670] RDX: 0000000000000001 RSI: ffff88885f89b640 RDI: ffff88885f89b640 [ 3102.997188] DEL flow 00000000be367878 on port 0 [ 3102.998594] RBP: dead000000000122 R08: 0000000000000000 R09: c0000000ffffdfff [ 3102.999604] R10: 0000000000000008 R11: ffff8881383e7598 R12: dead000000000100 [ 3103.000198] R13: 0000000000000002 R14: ffff888139110000 R15: ffff888101901240 [ 3103.000790] FS: 00007f424cde4700(0000) GS:ffff88885f880000(0000) knlGS:0000000000000000 [ 3103.001486] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3103.001986] CR2: 00007fd42e8dcb70 CR3: 000000011e68a003 CR4: 0000000000370ea0 [ 3103.002596] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 3103.003190] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 3103.003787] Call Trace: [ 3103.004055] [ 3103.004297] ? __warn+0x7d/0x130 [ 3103.004623] ? __list_del_entry_valid_or_report+0x4f/0xc0 [ 3103.005094] ? report_bug+0xf1/0x1c0 [ 3103.005439] ? console_unlock+0x4a/0xd0 [ 3103.005806] ? handle_bug+0x3f/0x70 [ 3103.006149] ? exc_invalid_op+0x13/0x60 [ 3103.006531] ? asm_exc_invalid_op+0x16/0x20 [ 3103.007430] ? __list_del_entry_valid_or_report+0x4f/0xc0 [ 3103.007910] mlx5e_tc_del_fdb_peers_flow+0xcf/0x240 [mlx5_core] [ 3103.008463] mlx5e_tc_del_flow+0x46/0x270 [mlx5_core] [ 3103.008944] mlx5e_flow_put+0x26/0x50 [mlx5_core] [ 3103.009401] mlx5e_delete_flower+0x25f/0x380 [mlx5_core] [ 3103.009901] tc_setup_cb_destroy+0xab/0x180 [ 3103.010292] fl_hw_destroy_filter+0x99/0xc0 [cls_flower] [ 3103.010779] __fl_delete+0x2d4/0x2f0 [cls_flower] [ 3103.011207] fl_delete+0x36/0x80 [cls_flower] [ 3103.011614] tc_del_tfilter+0x56f/0x750 [ 3103.011982] rtnetlink_rcv_msg+0xff/0x3a0 [ 3103.012362] ? netlink_ack+0x1c7/0x4e0 [ 3103.012719] ? rtnl_calcit.isra.44+0x130/0x130 [ 3103.013134] netlink_rcv_skb+0x54/0x100 [ 3103.013533] netlink_unicast+0x1ca/0x2b0 [ 3103.013902] netlink_sendmsg+0x361/0x4d0 [ 3103.014269] __sock_sendmsg+0x38/0x60 [ 3103.014643] ____sys_sendmsg+0x1f2/0x200 [ 3103.015018] ? copy_msghdr_from_user+0x72/0xa0 [ 3103.015265] ___sys_sendmsg+0x87/0xd0 [ 3103.016608] ? copy_msghdr_from_user+0x72/0xa0 [ 3103.017014] ? ___sys_recvmsg+0x9b/0xd0 [ 3103.017381] ? ttwu_do_activate.isra.137+0x58/0x180 [ 3103.017821] ? wake_up_q+0x49/0x90 [ 3103.018157] ? futex_wake+0x137/0x160 [ 3103.018521] ? __sys_sendmsg+0x51/0x90 [ 3103.018882] __sys_sendmsg+0x51/0x90 [ 3103.019230] ? exit_to_user_mode_prepare+0x56/0x130 [ 3103.019670] do_syscall_64+0x3c/0x80 [ 3103.020017] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 3103.020469] RIP: 0033:0x7f4254811ef4 [ 3103.020816] Code: 89 f3 48 83 ec 10 48 89 7c 24 08 48 89 14 24 e8 42 eb ff ff 48 8b 14 24 41 89 c0 48 89 de 48 8b 7c 24 08 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 30 44 89 c7 48 89 04 24 e8 78 eb ff ff 48 8b [ 3103.022290] RSP: 002b:00007f424cdd9480 EFLAGS: 00000293 ORIG_RAX: 000000000000002e [ 3103.022970] RAX: ffffffffffffffda RBX: 00007f424cdd9510 RCX: 00007f4254811ef4 [ 3103.023564] RDX: 0000000000000000 RSI: 00007f424cdd9510 RDI: 0000000000000012 [ 3103.024158] RBP: 00007f424cdda238 R08: 0000000000000000 R09: 00007f41d801a4b0 [ 3103.024748] R10: 0000000000000000 R11: 0000000000000293 R12: 0000000000000001 [ 3103.025341] R13: 00007f424cdd9510 R14: 00007f424cdda240 R15: 00007f424cdd99a0 [ 3103.025931] [ 3103.026182] ---[ end trace 0000000000000000 ]--- [ 3103.027033] ------------[ cut here ]------------ Fixes: 9be6c21fdcf8 ("net/mlx5e: Handle offloads flows per peer") Signed-off-by: Vlad Buslov Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 047b465fc6a5..9fb2c057bd78 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2014,9 +2014,10 @@ static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow, list_for_each_entry_safe(peer_flow, tmp, &flow->peer_flows, peer_flows) { if (peer_index != mlx5_get_dev_index(peer_flow->priv->mdev)) continue; + + list_del(&peer_flow->peer_flows); if (refcount_dec_and_test(&peer_flow->refcnt)) { mlx5e_tc_del_fdb_flow(peer_flow->priv, peer_flow); - list_del(&peer_flow->peer_flows); kfree(peer_flow); } } From cc8091587779cfaddb6b29c9e9edb9079a282cad Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 31 Dec 2023 15:19:50 +0200 Subject: [PATCH 279/768] net/mlx5: Fix a WARN upon a callback command failure The below WARN [1] is reported once a callback command failed. As a callback runs under an interrupt context, needs to use the IRQ save/restore variant. [1] DEBUG_LOCKS_WARN_ON(lockdep_hardirq_context()) WARNING: CPU: 15 PID: 0 at kernel/locking/lockdep.c:4353 lockdep_hardirqs_on_prepare+0x11b/0x180 Modules linked in: vhost_net vhost tap mlx5_vfio_pci vfio_pci vfio_pci_core vfio_iommu_type1 vfio mlx5_vdpa vringh vhost_iotlb vdpa nfnetlink_cttimeout openvswitch nsh ip6table_mangle ip6table_nat ip6table_filter ip6_tables iptable_mangle xt_conntrackxt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi rdma_cm iw_cm ib_umad ib_ipoib ib_cm mlx5_ib ib_uverbs ib_core fuse mlx5_core CPU: 15 PID: 0 Comm: swapper/15 Tainted: G W 6.7.0-rc4+ #1587 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:lockdep_hardirqs_on_prepare+0x11b/0x180 Code: 00 5b c3 c3 e8 e6 0d 58 00 85 c0 74 d6 8b 15 f0 c3 76 01 85 d2 75 cc 48 c7 c6 04 a5 3b 82 48 c7 c7 f1 e9 39 82 e8 95 12 f9 ff <0f> 0b 5b c3 e8 bc 0d 58 00 85 c0 74 ac 8b 3d c6 c3 76 01 85 ff 75 RSP: 0018:ffffc900003ecd18 EFLAGS: 00010086 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000027 RDX: 0000000000000000 RSI: ffff88885fbdb880 RDI: ffff88885fbdb888 RBP: 00000000ffffff87 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000000 R11: 284e4f5f4e524157 R12: 00000000002c9aa1 R13: ffff88810aace980 R14: ffff88810aace9b8 R15: 0000000000000003 FS: 0000000000000000(0000) GS:ffff88885fbc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f731436f4c8 CR3: 000000010aae6001 CR4: 0000000000372eb0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? __warn+0x81/0x170 ? lockdep_hardirqs_on_prepare+0x11b/0x180 ? report_bug+0xf8/0x1c0 ? handle_bug+0x3f/0x70 ? exc_invalid_op+0x13/0x60 ? asm_exc_invalid_op+0x16/0x20 ? lockdep_hardirqs_on_prepare+0x11b/0x180 ? lockdep_hardirqs_on_prepare+0x11b/0x180 trace_hardirqs_on+0x4a/0xa0 raw_spin_unlock_irq+0x24/0x30 cmd_status_err+0xc0/0x1a0 [mlx5_core] cmd_status_err+0x1a0/0x1a0 [mlx5_core] mlx5_cmd_exec_cb_handler+0x24/0x40 [mlx5_core] mlx5_cmd_comp_handler+0x129/0x4b0 [mlx5_core] cmd_comp_notifier+0x1a/0x20 [mlx5_core] notifier_call_chain+0x3e/0xe0 atomic_notifier_call_chain+0x5f/0x130 mlx5_eq_async_int+0xe7/0x200 [mlx5_core] notifier_call_chain+0x3e/0xe0 atomic_notifier_call_chain+0x5f/0x130 irq_int_handler+0x11/0x20 [mlx5_core] __handle_irq_event_percpu+0x99/0x220 ? tick_irq_enter+0x5d/0x80 handle_irq_event_percpu+0xf/0x40 handle_irq_event+0x3a/0x60 handle_edge_irq+0xa2/0x1c0 __common_interrupt+0x55/0x140 common_interrupt+0x7d/0xa0 asm_common_interrupt+0x22/0x40 RIP: 0010:default_idle+0x13/0x20 Code: c0 08 00 00 00 4d 29 c8 4c 01 c7 4c 29 c2 e9 72 ff ff ff cc cc cc cc 8b 05 ea 08 25 01 85 c0 7e 07 0f 00 2d 7f b0 26 00 fb f4 c3 90 66 2e 0f 1f 84 00 00 00 00 00 65 48 8b 04 25 80 d0 02 00 RSP: 0018:ffffc9000010fec8 EFLAGS: 00000242 RAX: 0000000000000001 RBX: 000000000000000f RCX: 4000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff811c410c RBP: ffffffff829478c0 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 ? do_idle+0x1ec/0x210 default_idle_call+0x6c/0x90 do_idle+0x1ec/0x210 cpu_startup_entry+0x26/0x30 start_secondary+0x11b/0x150 secondary_startup_64_no_verify+0x165/0x16b irq event stamp: 833284 hardirqs last enabled at (833283): [] do_idle+0x1ec/0x210 hardirqs last disabled at (833284): [] common_interrupt+0xf/0xa0 softirqs last enabled at (833224): [] __do_softirq+0x2bf/0x40e softirqs last disabled at (833177): [] irq_exit_rcu+0x7f/0xa0 Fixes: 34f46ae0d4b3 ("net/mlx5: Add command failures data to debugfs") Signed-off-by: Yishai Hadas Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index a7b1f9686c09..4957412ff1f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1923,6 +1923,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, { const char *namep = mlx5_command_str(opcode); struct mlx5_cmd_stats *stats; + unsigned long flags; if (!err || !(strcmp(namep, "unknown command opcode"))) return; @@ -1930,7 +1931,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, stats = xa_load(&dev->cmd.stats, opcode); if (!stats) return; - spin_lock_irq(&stats->lock); + spin_lock_irqsave(&stats->lock, flags); stats->failed++; if (err < 0) stats->last_failed_errno = -err; @@ -1939,7 +1940,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, stats->last_failed_mbox_status = status; stats->last_failed_syndrome = syndrome; } - spin_unlock_irq(&stats->lock); + spin_unlock_irqrestore(&stats->lock, flags); } /* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */ From ec7cc38ef9f83553102e84c82536971a81630739 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Sat, 30 Dec 2023 22:40:37 +0200 Subject: [PATCH 280/768] net/mlx5: Bridge, fix multicast packets sent to uplink To enable multicast packets which are offloaded in bridge multicast offload mode to be sent also to uplink, FTE bit uplink_hairpin_en should be set. Add this bit to FTE for the bridge multicast offload rules. Fixes: 18c2916cee12 ("net/mlx5: Bridge, snoop igmp/mld packets") Signed-off-by: Moshe Shemesh Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 2 ++ include/linux/mlx5/fs.h | 1 + include/linux/mlx5/mlx5_ifc.h | 2 +- 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c index a7ed87e9d842..22dd30cf8033 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c @@ -83,6 +83,7 @@ mlx5_esw_bridge_mdb_flow_create(u16 esw_owner_vhca_id, struct mlx5_esw_bridge_md i++; } + rule_spec->flow_context.flags |= FLOW_CONTEXT_UPLINK_HAIRPIN_EN; rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; dmac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, outer_headers.dmac_47_16); ether_addr_copy(dmac_v, entry->key.addr); @@ -587,6 +588,7 @@ mlx5_esw_bridge_mcast_vlan_flow_create(u16 vlan_proto, struct mlx5_esw_bridge_po if (!rule_spec) return ERR_PTR(-ENOMEM); + rule_spec->flow_context.flags |= FLOW_CONTEXT_UPLINK_HAIRPIN_EN; rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; @@ -662,6 +664,7 @@ mlx5_esw_bridge_mcast_fwd_flow_create(struct mlx5_esw_bridge_port *port) dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; dest.vport.vhca_id = port->esw_owner_vhca_id; } + rule_spec->flow_context.flags |= FLOW_CONTEXT_UPLINK_HAIRPIN_EN; handle = mlx5_add_flow_rules(port->mcast.ft, rule_spec, &flow_act, &dest, 1); kvfree(rule_spec); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 1616a6144f7b..9b8599c200e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -566,6 +566,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, fte->flow_context.flow_tag); MLX5_SET(flow_context, in_flow_context, flow_source, fte->flow_context.flow_source); + MLX5_SET(flow_context, in_flow_context, uplink_hairpin_en, + !!(fte->flow_context.flags & FLOW_CONTEXT_UPLINK_HAIRPIN_EN)); MLX5_SET(flow_context, in_flow_context, extended_destination, extended_dest); diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 6f7725238abc..3fb428ce7d1c 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -132,6 +132,7 @@ struct mlx5_flow_handle; enum { FLOW_CONTEXT_HAS_TAG = BIT(0), + FLOW_CONTEXT_UPLINK_HAIRPIN_EN = BIT(1), }; struct mlx5_flow_context { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 37230253f9f1..c726f90ab752 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -3576,7 +3576,7 @@ struct mlx5_ifc_flow_context_bits { u8 action[0x10]; u8 extended_destination[0x1]; - u8 reserved_at_81[0x1]; + u8 uplink_hairpin_en[0x1]; u8 flow_source[0x2]; u8 encrypt_decrypt_type[0x4]; u8 destination_list_size[0x18]; From 5665954293f13642f9c052ead83c1e9d8cff186f Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 17 Dec 2023 11:24:08 +0200 Subject: [PATCH 281/768] net/mlx5: DR, Use the right GVMI number for drop action When FW provides ICM addresses for drop RX/TX, the provided capability is 64 bits that contain its GVMI as well as the ICM address itself. In case of TX DROP this GVMI is different from the GVMI that the domain is operating on. This patch fixes the action to use these GVMI IDs, as provided by FW. Fixes: 9db810ed2d37 ("net/mlx5: DR, Expose steering action functionality") Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 6f9790e97fed..95517c4aca0f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -788,6 +788,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, switch (action_type) { case DR_ACTION_TYP_DROP: attr.final_icm_addr = nic_dmn->drop_icm_addr; + attr.hit_gvmi = nic_dmn->drop_icm_addr >> 48; break; case DR_ACTION_TYP_FT: dest_action = action; From 5b2a2523eeea5f03d39a9d1ff1bad2e9f8eb98d2 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 17 Dec 2023 13:20:36 +0200 Subject: [PATCH 282/768] net/mlx5: DR, Can't go to uplink vport on RX rule Go-To-Vport action on RX is not allowed when the vport is uplink. In such case, the packet should be dropped. Fixes: 9db810ed2d37 ("net/mlx5: DR, Expose steering action functionality") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Erez Shitrit Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/steering/dr_action.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 95517c4aca0f..2ebb61ef3ea9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -874,11 +874,17 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, action->sampler->tx_icm_addr; break; case DR_ACTION_TYP_VPORT: - attr.hit_gvmi = action->vport->caps->vhca_gvmi; - dest_action = action; - attr.final_icm_addr = rx_rule ? - action->vport->caps->icm_address_rx : - action->vport->caps->icm_address_tx; + if (unlikely(rx_rule && action->vport->caps->num == MLX5_VPORT_UPLINK)) { + /* can't go to uplink on RX rule - dropping instead */ + attr.final_icm_addr = nic_dmn->drop_icm_addr; + attr.hit_gvmi = nic_dmn->drop_icm_addr >> 48; + } else { + attr.hit_gvmi = action->vport->caps->vhca_gvmi; + dest_action = action; + attr.final_icm_addr = rx_rule ? + action->vport->caps->icm_address_rx : + action->vport->caps->icm_address_tx; + } break; case DR_ACTION_TYP_POP_VLAN: if (!rx_rule && !(dmn->ste_ctx->actions_caps & From 20cbf8cbb827094197f3b17db60d71449415db1e Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 28 Nov 2023 14:01:54 -0800 Subject: [PATCH 283/768] net/mlx5: Use mlx5 device constant for selecting CQ period mode for ASO mlx5 devices have specific constants for choosing the CQ period mode. These constants do not have to match the constants used by the kernel software API for DIM period mode selection. Fixes: cdd04f4d4d71 ("net/mlx5: Add support to create SQ and CQ for ASO") Signed-off-by: Rahul Rameshbabu Reviewed-by: Jianbo Liu Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c index 40c7be124041..58bd749b5e4d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c @@ -98,7 +98,7 @@ static int create_aso_cq(struct mlx5_aso_cq *cq, void *cqc_data) mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); - MLX5_SET(cqc, cqc, cq_period_mode, DIM_CQ_PERIOD_MODE_START_FROM_EQE); + MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - From 20f5468a7988dedd94a57ba8acd65ebda6a59723 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 12 Dec 2023 13:52:55 +0200 Subject: [PATCH 284/768] net/mlx5e: Allow software parsing when IPsec crypto is enabled All ConnectX devices have software parsing capability enabled, but it is more correct to set allow_swp only if capability exists, which for IPsec means that crypto offload is supported. Fixes: 2451da081a34 ("net/mlx5: Unify device IPsec capabilities check") Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 284253b79266..5d213a9886f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -1064,8 +1064,8 @@ void mlx5e_build_sq_param(struct mlx5_core_dev *mdev, void *wq = MLX5_ADDR_OF(sqc, sqc, wq); bool allow_swp; - allow_swp = - mlx5_geneve_tx_allowed(mdev) || !!mlx5_ipsec_device_caps(mdev); + allow_swp = mlx5_geneve_tx_allowed(mdev) || + (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO); mlx5e_build_sq_param_common(mdev, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); MLX5_SET(sqc, sqc, allow_swp, allow_swp); From 315a597f9bcfe7fe9980985031413457bee95510 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 26 Nov 2023 11:08:10 +0200 Subject: [PATCH 285/768] net/mlx5e: Ignore IPsec replay window values on sender side XFRM stack doesn't prevent from users to configure replay window in TX side and strongswan sets replay_window to be 1. It causes to failures in validation logic when trying to offload the SA. Replay window is not relevant in TX side and should be ignored. Fixes: cded6d80129b ("net/mlx5e: Store replay window in XFRM attributes") Signed-off-by: Aya Levin Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 161c5190c236..05612d9c6080 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -336,12 +336,17 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, /* iv len */ aes_gcm->icv_len = x->aead->alg_icv_len; + attrs->dir = x->xso.dir; + /* esn */ if (x->props.flags & XFRM_STATE_ESN) { attrs->replay_esn.trigger = true; attrs->replay_esn.esn = sa_entry->esn_state.esn; attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb; attrs->replay_esn.overlap = sa_entry->esn_state.overlap; + if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) + goto skip_replay_window; + switch (x->replay_esn->replay_window) { case 32: attrs->replay_esn.replay_window = @@ -365,7 +370,7 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, } } - attrs->dir = x->xso.dir; +skip_replay_window: /* spi */ attrs->spi = be32_to_cpu(x->id.spi); @@ -501,7 +506,8 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, return -EINVAL; } - if (x->replay_esn && x->replay_esn->replay_window != 32 && + if (x->replay_esn && x->xso.dir == XFRM_DEV_OFFLOAD_IN && + x->replay_esn->replay_window != 32 && x->replay_esn->replay_window != 64 && x->replay_esn->replay_window != 128 && x->replay_esn->replay_window != 256) { From 3c6d5189246f590e4e1f167991558bdb72a4738b Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Wed, 17 Jan 2024 15:17:36 +0800 Subject: [PATCH 286/768] net/mlx5e: fix a double-free in arfs_create_groups When `in` allocated by kvzalloc fails, arfs_create_groups will free ft->g and return an error. However, arfs_create_table, the only caller of arfs_create_groups, will hold this error and call to mlx5e_destroy_flow_table, in which the ft->g will be freed again. Fixes: 1cabe6b0965e ("net/mlx5e: Create aRFS flow tables") Signed-off-by: Zhipeng Lu Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_arfs.c | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index bb7f86c993e5..e66f486faafe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -254,11 +254,13 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft, ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in || !ft->g) { - kfree(ft->g); - kvfree(in); + if (!ft->g) return -ENOMEM; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free_g; } mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); @@ -278,7 +280,7 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft, break; default: err = -EINVAL; - goto out; + goto err_free_in; } switch (type) { @@ -300,7 +302,7 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft, break; default: err = -EINVAL; - goto out; + goto err_free_in; } MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); @@ -309,7 +311,7 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft, MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) - goto err; + goto err_clean_group; ft->num_groups++; memset(in, 0, inlen); @@ -318,18 +320,20 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft, MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) - goto err; + goto err_clean_group; ft->num_groups++; kvfree(in); return 0; -err: +err_clean_group: err = PTR_ERR(ft->g[ft->num_groups]); ft->g[ft->num_groups] = NULL; -out: +err_free_in: kvfree(in); - +err_free_g: + kfree(ft->g); + ft->g = NULL; return err; } From aef855df7e1bbd5aa4484851561211500b22707e Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Tue, 28 Nov 2023 17:29:01 +0800 Subject: [PATCH 287/768] net/mlx5e: fix a potential double-free in fs_any_create_groups When kcalloc() for ft->g succeeds but kvzalloc() for in fails, fs_any_create_groups() will free ft->g. However, its caller fs_any_create_table() will free ft->g again through calling mlx5e_destroy_flow_table(), which will lead to a double-free. Fix this by setting ft->g to NULL in fs_any_create_groups(). Fixes: 0f575c20bf06 ("net/mlx5e: Introduce Flow Steering ANY API") Signed-off-by: Dinghao Liu Reviewed-by: Tariq Toukan Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c index e1283531e0b8..671adbad0a40 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c @@ -436,6 +436,7 @@ static int fs_any_create_groups(struct mlx5e_flow_table *ft) in = kvzalloc(inlen, GFP_KERNEL); if (!in || !ft->g) { kfree(ft->g); + ft->g = NULL; kvfree(in); return -ENOMEM; } From 3213b8070ac69b32f05fa2328cbebe0eca75c1bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 17 Jan 2024 14:40:44 +0100 Subject: [PATCH 288/768] drm/xe/dmabuf: Make xe_dmabuf_ops static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is not referenced outside of the xe_dma_buf.c source file. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Rodrigo Vivi Cc: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Francois Dugast Link: https://patchwork.freedesktop.org/patch/msgid/20240117134048.165425-2-thomas.hellstrom@linux.intel.com (cherry picked from commit e2dc52f849f8694bdabb75127164c9df622af459) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_dma_buf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index 64ed303728fd..da2627ed6ae7 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -175,7 +175,7 @@ static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, return 0; } -const struct dma_buf_ops xe_dmabuf_ops = { +static const struct dma_buf_ops xe_dmabuf_ops = { .attach = xe_dma_buf_attach, .detach = xe_dma_buf_detach, .pin = xe_dma_buf_pin, From 03b72dbbd4e96d0197aa8cf894a24a4db8623031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 17 Jan 2024 14:40:48 +0100 Subject: [PATCH 289/768] drm/xe: Use a NULL pointer instead of 0. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The last argument of xe_pcode_read() is a pointer. Use NULL instead of 0. Fixes: 92d44a422d0d ("drm/xe/hwmon: Expose card reactive critical power") Cc: Rodrigo Vivi Signed-off-by: Thomas Hellström Reviewed-by: Francois Dugast Link: https://patchwork.freedesktop.org/patch/msgid/20240117134048.165425-6-thomas.hellstrom@linux.intel.com (cherry picked from commit 79f8eacbdf9dad7ead39b3319e31e12d4dc6529e) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_hwmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 6ef2aa1eae8b..174ed2185481 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -419,7 +419,7 @@ static int xe_hwmon_pcode_read_i1(struct xe_gt *gt, u32 *uval) return xe_pcode_read(gt, PCODE_MBOX(PCODE_POWER_SETUP, POWER_SETUP_SUBCOMMAND_READ_I1, 0), - uval, 0); + uval, NULL); } static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval) From 32f6c3325703c98edee8f1005ad47b4d8431b758 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 18 Jan 2024 16:16:08 -0800 Subject: [PATCH 290/768] drm/xe: Use _ULL for u64 division MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use DIV_ROUND_UP_ULL() so it also works on 32bit build. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Reviewed-by: Matt Roper Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240119001612.2991381-2-lucas.demarchi@intel.com (cherry picked from commit 7b5bdb447b14930b9ef3e39bd301937889c60c96) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index b8d8da546670..1f0b4b9ce84f 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -613,7 +613,7 @@ void xe_device_wmb(struct xe_device *xe) u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) { return xe_device_has_flat_ccs(xe) ? - DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0; + DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0; } bool xe_device_mem_access_ongoing(struct xe_device *xe) From 52e8948c6b6a41603371996b9bc0e43e17d690b4 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 18 Jan 2024 16:16:09 -0800 Subject: [PATCH 291/768] drm/xe/mmio: Cast to u64 when printing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit resource_size_t uses %pa format in printk since the size varies depending on build options. However to keep the io_size/physical_size addition in the same call we can't pass the address without adding yet another variable in these function. Simply cast it to u64 and keep using %llx. Fixes: 286089ce6929 ("drm/xe: Improve vram info debug printing") Cc: Oak Zeng Cc: Michael J. Ruhl Cc: Matthew Brost Cc: Rodrigo Vivi Reviewed-by: Matt Roper Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240119001612.2991381-3-lucas.demarchi@intel.com (cherry picked from commit 6d8d038364d8ec573e9dc0872e17bee1e5f12490) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_mmio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index c8c5d74b6e90..5f6b53ea5528 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -272,8 +272,8 @@ int xe_mmio_probe_vram(struct xe_device *xe) drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id, tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size); drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id, - &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + tile->mem.vram.actual_physical_size, - &tile->mem.vram.io_start, tile->mem.vram.io_start + tile->mem.vram.io_size); + &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + (u64)tile->mem.vram.actual_physical_size, + &tile->mem.vram.io_start, tile->mem.vram.io_start + (u64)tile->mem.vram.io_size); /* calculate total size using tile size to get the correct HW sizing */ total_size += tile_size; From 981460d8ee6042b14149fd8931ae27b91f2146b1 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 18 Jan 2024 16:16:10 -0800 Subject: [PATCH 292/768] drm/xe/display: Avoid calling readq() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit readq() is not available in 32bits and i915_gem_object_read_from_page() is supposed to allow reading arbitrary sizes determined by the `size` argument. Currently the only caller only passes a size == 8 so the second problem is not that big. Migrate to calling memcpy()/memcpy_fromio() to allow possible changes in the display side and to fix the build on 32b architectures. v2: Use memcpy/memcpy_fromio directly rather than using iosys-map with the same size == 8 bytes restriction (Matt Roper) Fixes: 44e694958b95 ("drm/xe/display: Implement display support") Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240119001612.2991381-4-lucas.demarchi@intel.com (cherry picked from commit 406663f777bee53e9ad93dc080c333d4655ab7de) Signed-off-by: Thomas Hellström --- .../drm/xe/compat-i915-headers/gem/i915_gem_object.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h index 5f19550cc845..68d9f6116bdf 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h @@ -35,12 +35,10 @@ static inline int i915_gem_object_read_from_page(struct xe_bo *bo, u32 ofs, u64 *ptr, u32 size) { struct ttm_bo_kmap_obj map; - void *virtual; + void *src; bool is_iomem; int ret; - XE_WARN_ON(size != 8); - ret = xe_bo_lock(bo, true); if (ret) return ret; @@ -50,11 +48,12 @@ static inline int i915_gem_object_read_from_page(struct xe_bo *bo, goto out_unlock; ofs &= ~PAGE_MASK; - virtual = ttm_kmap_obj_virtual(&map, &is_iomem); + src = ttm_kmap_obj_virtual(&map, &is_iomem); + src += ofs; if (is_iomem) - *ptr = readq((void __iomem *)(virtual + ofs)); + memcpy_fromio(ptr, (void __iomem *)src, size); else - *ptr = *(u64 *)(virtual + ofs); + memcpy(ptr, src, size); ttm_bo_kunmap(&map); out_unlock: From c0e2508cb1004fdb153fbbcf0101404abfefdddd Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Fri, 19 Jan 2024 09:48:26 +0530 Subject: [PATCH 293/768] drm/xe/xe2: Use XE_CACHE_WB pat index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pat table entry associated with XE_CACHE_WB is coherent whereas XE_CACHE_NONE is non coherent. Migration expects the coherency with cpu therefore use the coherent entry XE_CACHE_WB for buffers not supporting compression. For read/write to flat ccs region the issue is not related to coherency with cpu. The hardware expects the pat index associated with GPUVA for indirect access to be compression enabled hence use XE_CACHE_NONE_COMPRESSION. v2 - Fix the argument to emit_pte, pass the bool directly. (Thomas) v3 - Rebase - Update commit message (Matt) v4 - Add a Fixes: tag. (Thomas) Cc: Matt Roper Cc: Thomas Hellström Fixes: 65ef8dbad1db ("drm/xe/xe2: Update emit_pte to use compression enabled PAT index") Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Thomas Hellström Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240119041826.1670496-1-himal.prasad.ghimiray@intel.com (cherry picked from commit 6a02867560f77328ae5637b70b06704b140aafa6) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_migrate.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index e05e9e7282b6..5c6c54624252 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -472,7 +472,7 @@ static void emit_pte(struct xe_migrate *m, /* Indirect access needs compression enabled uncached PAT index */ if (GRAPHICS_VERx100(xe) >= 2000) pat_index = is_comp_pte ? xe->pat.idx[XE_CACHE_NONE_COMPRESSION] : - xe->pat.idx[XE_CACHE_NONE]; + xe->pat.idx[XE_CACHE_WB]; else pat_index = xe->pat.idx[XE_CACHE_WB]; @@ -760,14 +760,14 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it)) xe_res_next(&src_it, src_L0); else - emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0, - src); + emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs, + &src_it, src_L0, src); if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it)) xe_res_next(&dst_it, src_L0); else - emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0, - dst); + emit_pte(m, bb, dst_L0_pt, dst_is_vram, copy_system_ccs, + &dst_it, src_L0, dst); if (copy_system_ccs) emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src); @@ -1009,8 +1009,8 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) xe_res_next(&src_it, clear_L0); else - emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0, - dst); + emit_pte(m, bb, clear_L0_pt, clear_vram, clear_system_ccs, + &src_it, clear_L0, dst); bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; From d186e51b0ed05a0cd94c7c9756740a855325c557 Mon Sep 17 00:00:00 2001 From: Moti Haimovski Date: Mon, 22 Jan 2024 12:24:24 +0200 Subject: [PATCH 294/768] drm/xe/vm: bugfix in xe_vm_create_ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix xe_vm_create_ioctl routine not freeing the vm-id allocated to it when the function fails. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Moti Haimovski Reviewed-by: Matthew Brost Reviewed-by: Tomer Tayar Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240122102424.4008095-1-mhaimovski@habana.ai (cherry picked from commit f6bf0424cadc27d7cf6a049d2db960e4b52fa513) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 10b6995fbf29..53833ab81424 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1855,10 +1855,8 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, mutex_lock(&xef->vm.lock); err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); mutex_unlock(&xef->vm.lock); - if (err) { - xe_vm_close_and_put(vm); - return err; - } + if (err) + goto err_close_and_put; if (xe->info.has_asid) { mutex_lock(&xe->usm.lock); @@ -1866,11 +1864,9 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, XA_LIMIT(1, XE_MAX_ASID - 1), &xe->usm.next_asid, GFP_KERNEL); mutex_unlock(&xe->usm.lock); - if (err < 0) { - xe_vm_close_and_put(vm); - return err; - } - err = 0; + if (err < 0) + goto err_free_id; + vm->usm.asid = asid; } @@ -1888,6 +1884,15 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, #endif return 0; + +err_free_id: + mutex_lock(&xef->vm.lock); + xa_erase(&xef->vm.xa, id); + mutex_unlock(&xef->vm.lock); +err_close_and_put: + xe_vm_close_and_put(vm); + + return err; } int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, From 9e3a13f3eef6b14a26cc2660ca2f43f0e46b4318 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 22 Jan 2024 19:12:42 -0800 Subject: [PATCH 295/768] drm/xe: Remove PVC from xe_wa kunit tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the PCI IDs for PVC weren't added to the xe driver, the xe_wa tests should not try to create a fake PVC device since they can't find the right PCI ID. Fix bugs when running kunit: # xe_wa_gt: ASSERTION FAILED at drivers/gpu/drm/xe/tests/xe_wa_test.c:111 Expected ret == 0, but ret == -19 (0xffffffffffffffed) [FAILED] PVC (B0) # xe_wa_gt: ASSERTION FAILED at drivers/gpu/drm/xe/tests/xe_wa_test.c:111 Expected ret == 0, but ret == -19 (0xffffffffffffffed) [FAILED] PVC (B1) # xe_wa_gt: ASSERTION FAILED at drivers/gpu/drm/xe/tests/xe_wa_test.c:111 Expected ret == 0, but ret == -19 (0xffffffffffffffed) [FAILED] PVC (C0) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240123031242.3548724-1-lucas.demarchi@intel.com (cherry picked from commit ab5ae65fb25d06c38a6617a628b964828adb4786) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/tests/xe_wa_test.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c index a53c22a19582..b4715b78ef3b 100644 --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c @@ -74,9 +74,6 @@ static const struct platform_test_case cases[] = { SUBPLATFORM_CASE(DG2, G11, B1), SUBPLATFORM_CASE(DG2, G12, A0), SUBPLATFORM_CASE(DG2, G12, A1), - PLATFORM_CASE(PVC, B0), - PLATFORM_CASE(PVC, B1), - PLATFORM_CASE(PVC, C0), GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0), GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0), GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0), From 56062d60f117dccfb5281869e0ab61e090baf864 Mon Sep 17 00:00:00 2001 From: Richard Palethorpe Date: Wed, 10 Jan 2024 15:01:22 +0200 Subject: [PATCH 296/768] x86/entry/ia32: Ensure s32 is sign extended to s64 Presently ia32 registers stored in ptregs are unconditionally cast to unsigned int by the ia32 stub. They are then cast to long when passed to __se_sys*, but will not be sign extended. This takes the sign of the syscall argument into account in the ia32 stub. It still casts to unsigned int to avoid implementation specific behavior. However then casts to int or unsigned int as necessary. So that the following cast to long sign extends the value. This fixes the io_pgetevents02 LTP test when compiled with -m32. Presently the systemcall io_pgetevents_time64() unexpectedly accepts -1 for the maximum number of events. It doesn't appear other systemcalls with signed arguments are effected because they all have compat variants defined and wired up. Fixes: ebeb8c82ffaf ("syscalls/x86: Use 'struct pt_regs' based syscall calling for IA32_EMULATION and x32") Suggested-by: Arnd Bergmann Signed-off-by: Richard Palethorpe Signed-off-by: Nikolay Borisov Signed-off-by: Thomas Gleixner Reviewed-by: Arnd Bergmann Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240110130122.3836513-1-nik.borisov@suse.com Link: https://lore.kernel.org/ltp/20210921130127.24131-1-rpalethorpe@suse.com/ --- arch/x86/include/asm/syscall_wrapper.h | 25 +++++++++++++++++++++---- include/linux/syscalls.h | 1 + 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index 21f9407be5d3..7e88705e907f 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -58,12 +58,29 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); ,,regs->di,,regs->si,,regs->dx \ ,,regs->r10,,regs->r8,,regs->r9) \ + +/* SYSCALL_PT_ARGS is Adapted from s390x */ +#define SYSCALL_PT_ARG6(m, t1, t2, t3, t4, t5, t6) \ + SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5), m(t6, (regs->bp)) +#define SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5) \ + SYSCALL_PT_ARG4(m, t1, t2, t3, t4), m(t5, (regs->di)) +#define SYSCALL_PT_ARG4(m, t1, t2, t3, t4) \ + SYSCALL_PT_ARG3(m, t1, t2, t3), m(t4, (regs->si)) +#define SYSCALL_PT_ARG3(m, t1, t2, t3) \ + SYSCALL_PT_ARG2(m, t1, t2), m(t3, (regs->dx)) +#define SYSCALL_PT_ARG2(m, t1, t2) \ + SYSCALL_PT_ARG1(m, t1), m(t2, (regs->cx)) +#define SYSCALL_PT_ARG1(m, t1) m(t1, (regs->bx)) +#define SYSCALL_PT_ARGS(x, ...) SYSCALL_PT_ARG##x(__VA_ARGS__) + +#define __SC_COMPAT_CAST(t, a) \ + (__typeof(__builtin_choose_expr(__TYPE_IS_L(t), 0, 0U))) \ + (unsigned int)a + /* Mapping of registers to parameters for syscalls on i386 */ #define SC_IA32_REGS_TO_ARGS(x, ...) \ - __MAP(x,__SC_ARGS \ - ,,(unsigned int)regs->bx,,(unsigned int)regs->cx \ - ,,(unsigned int)regs->dx,,(unsigned int)regs->si \ - ,,(unsigned int)regs->di,,(unsigned int)regs->bp) + SYSCALL_PT_ARGS(x, __SC_COMPAT_CAST, \ + __MAP(x, __SC_TYPE, __VA_ARGS__)) \ #define __SYS_STUB0(abi, name) \ long __##abi##_##name(const struct pt_regs *regs); \ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index cdba4d0c6d4a..77eb9b0e7685 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -128,6 +128,7 @@ struct mnt_id_req; #define __TYPE_IS_LL(t) (__TYPE_AS(t, 0LL) || __TYPE_AS(t, 0ULL)) #define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a #define __SC_CAST(t, a) (__force t) a +#define __SC_TYPE(t, a) t #define __SC_ARGS(t, a) a #define __SC_TEST(t, a) (void)BUILD_BUG_ON_ZERO(!__TYPE_IS_LL(t) && sizeof(t) > sizeof(long)) From b184c8c2889ceef0a137c7d0567ef9fe3d92276e Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Mon, 22 Jan 2024 16:57:15 +0800 Subject: [PATCH 297/768] genirq: Initialize resend_node hlist for all interrupt descriptors For a CONFIG_SPARSE_IRQ=n kernel, early_irq_init() is supposed to initialize all interrupt descriptors. It does except for irq_desc::resend_node, which ia only initialized for the first descriptor. Use the indexed decriptor and not the base pointer to address that. Fixes: bc06a9e08742 ("genirq: Use hlist for managing resend handlers") Signed-off-by: Dawei Li Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240122085716.2999875-5-dawei.li@shingroup.cn --- kernel/irq/irqdesc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 27ca1c866f29..371eb1711d34 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -600,7 +600,7 @@ int __init early_irq_init(void) mutex_init(&desc[i].request_mutex); init_waitqueue_head(&desc[i].wait_for_threads); desc_set_defaults(i, &desc[i], node, NULL, NULL); - irq_resend_init(desc); + irq_resend_init(&desc[i]); } return arch_early_irq_init(); } From fcfc9f711d1e2fc7876ac12b1b16c509404b9625 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 24 Jan 2024 14:21:47 +0800 Subject: [PATCH 298/768] ALSA: hda/realtek - Add speaker pin verbtable for Dell dual speaker platform SSID 0x0c0d platform. It can't mute speaker when HP plugged. This patch add quirk to fill speaker pin verbtable. And disable speaker passthrough. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/38b82976a875451d833d514cee34ff6a@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 19f2eb26659d..bfefe9ccbc6b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -439,6 +439,10 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) alc_update_coef_idx(codec, 0x67, 0xf000, 0x3000); fallthrough; case 0x10ec0215: + case 0x10ec0285: + case 0x10ec0289: + alc_update_coef_idx(codec, 0x36, 1<<13, 0); + fallthrough; case 0x10ec0230: case 0x10ec0233: case 0x10ec0235: @@ -452,9 +456,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0283: case 0x10ec0286: case 0x10ec0288: - case 0x10ec0285: case 0x10ec0298: - case 0x10ec0289: case 0x10ec0300: alc_update_coef_idx(codec, 0x10, 1<<9, 0); break; @@ -9732,6 +9734,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0b71, "Dell Inspiron 16 Plus 7620", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), SND_PCI_QUIRK(0x1028, 0x0beb, "Dell XPS 15 9530 (2023)", ALC289_FIXUP_DELL_CS35L41_SPI_2), SND_PCI_QUIRK(0x1028, 0x0c03, "Dell Precision 5340", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0c0d, "Dell Oasis", ALC289_FIXUP_RTK_AMP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0c19, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1a, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1b, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS), From 7c4298534ce3c4b85099aba53442ec82ef17578b Mon Sep 17 00:00:00 2001 From: Jacob Siverskog Date: Wed, 24 Jan 2024 11:18:24 +0100 Subject: [PATCH 299/768] ALSA: usb-audio: fix typo fix typo in midi fallback log. Signed-off-by: Jacob Siverskog Fixes: ff49d1df79ae ("ALSA: usb-audio: USB MIDI 2.0 UMP support") Link: https://lore.kernel.org/r/20240124101827.35433-1-jacob@teenage.engineering Signed-off-by: Takashi Iwai --- sound/usb/midi2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/midi2.c b/sound/usb/midi2.c index 1ec177fe284e..820d3e4b672a 100644 --- a/sound/usb/midi2.c +++ b/sound/usb/midi2.c @@ -1085,7 +1085,7 @@ int snd_usb_midi_v2_create(struct snd_usb_audio *chip, } if ((quirk && quirk->type != QUIRK_MIDI_STANDARD_INTERFACE) || iface->num_altsetting < 2) { - usb_audio_info(chip, "Quirk or no altest; falling back to MIDI 1.0\n"); + usb_audio_info(chip, "Quirk or no altset; falling back to MIDI 1.0\n"); goto fallback_to_midi1; } hostif = &iface->altsetting[1]; From d62ccb59afcd94e8d301433974672b156392289d Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Wed, 24 Jan 2024 20:08:34 +0800 Subject: [PATCH 300/768] ALSA: virtio: remove duplicate check if queue is broken virtqueue_enable_cb() will call virtqueue_poll() which will check if queue is broken at beginning, so remove the virtqueue_is_broken() call Signed-off-by: Li RongQing Reviewed-by: Stefan Hajnoczi Link: https://lore.kernel.org/r/20240124120834.49410-1-lirongqing@baidu.com Signed-off-by: Takashi Iwai --- sound/virtio/virtio_card.c | 2 -- sound/virtio/virtio_ctl_msg.c | 2 -- sound/virtio/virtio_pcm_msg.c | 2 -- 3 files changed, 6 deletions(-) diff --git a/sound/virtio/virtio_card.c b/sound/virtio/virtio_card.c index e2847c040f75..b158c3cb8e5f 100644 --- a/sound/virtio/virtio_card.c +++ b/sound/virtio/virtio_card.c @@ -91,8 +91,6 @@ static void virtsnd_event_notify_cb(struct virtqueue *vqueue) virtsnd_event_dispatch(snd, event); virtsnd_event_send(vqueue, event, true, GFP_ATOMIC); } - if (unlikely(virtqueue_is_broken(vqueue))) - break; } while (!virtqueue_enable_cb(vqueue)); spin_unlock_irqrestore(&queue->lock, flags); } diff --git a/sound/virtio/virtio_ctl_msg.c b/sound/virtio/virtio_ctl_msg.c index 18dc5aca2e0c..9dabea01277f 100644 --- a/sound/virtio/virtio_ctl_msg.c +++ b/sound/virtio/virtio_ctl_msg.c @@ -303,8 +303,6 @@ void virtsnd_ctl_notify_cb(struct virtqueue *vqueue) virtqueue_disable_cb(vqueue); while ((msg = virtqueue_get_buf(vqueue, &length))) virtsnd_ctl_msg_complete(msg); - if (unlikely(virtqueue_is_broken(vqueue))) - break; } while (!virtqueue_enable_cb(vqueue)); spin_unlock_irqrestore(&queue->lock, flags); } diff --git a/sound/virtio/virtio_pcm_msg.c b/sound/virtio/virtio_pcm_msg.c index 542446c4c7ba..8c32efaf4c52 100644 --- a/sound/virtio/virtio_pcm_msg.c +++ b/sound/virtio/virtio_pcm_msg.c @@ -358,8 +358,6 @@ static inline void virtsnd_pcm_notify_cb(struct virtio_snd_queue *queue) virtqueue_disable_cb(queue->vqueue); while ((msg = virtqueue_get_buf(queue->vqueue, &written_bytes))) virtsnd_pcm_msg_complete(msg, written_bytes); - if (unlikely(virtqueue_is_broken(queue->vqueue))) - break; } while (!virtqueue_enable_cb(queue->vqueue)); spin_unlock_irqrestore(&queue->lock, flags); } From d915a6850e27efb383cd4400caadfe47792623df Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Wed, 24 Jan 2024 16:02:39 +0300 Subject: [PATCH 301/768] ALSA: usb-audio: Add delay quirk for MOTU M Series 2nd revision Audio control requests that sets sampling frequency sometimes fail on this card. Adding delay between control messages eliminates that problem. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217601 Cc: Signed-off-by: Alexander Tsoy Link: https://lore.kernel.org/r/20240124130239.358298-1-alexander@tsoy.me Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 43c14a81a1e2..be0ce1fc4f26 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2075,6 +2075,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x0763, 0x2031, /* M-Audio Fast Track C600 */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x07fd, 0x000b, /* MOTU M Series 2nd hardware revision */ + QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x08bb, 0x2702, /* LineX FM Transmitter */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x0951, 0x16ad, /* Kingston HyperX */ From edcf9725150e42beeca42d085149f4c88fa97afd Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 22 Jan 2024 14:58:16 +1100 Subject: [PATCH 302/768] nfsd: fix RELEASE_LOCKOWNER The test on so_count in nfsd4_release_lockowner() is nonsense and harmful. Revert to using check_for_locks(), changing that to not sleep. First: harmful. As is documented in the kdoc comment for nfsd4_release_lockowner(), the test on so_count can transiently return a false positive resulting in a return of NFS4ERR_LOCKS_HELD when in fact no locks are held. This is clearly a protocol violation and with the Linux NFS client it can cause incorrect behaviour. If RELEASE_LOCKOWNER is sent while some other thread is still processing a LOCK request which failed because, at the time that request was received, the given owner held a conflicting lock, then the nfsd thread processing that LOCK request can hold a reference (conflock) to the lock owner that causes nfsd4_release_lockowner() to return an incorrect error. The Linux NFS client ignores that NFS4ERR_LOCKS_HELD error because it never sends NFS4_RELEASE_LOCKOWNER without first releasing any locks, so it knows that the error is impossible. It assumes the lock owner was in fact released so it feels free to use the same lock owner identifier in some later locking request. When it does reuse a lock owner identifier for which a previous RELEASE failed, it will naturally use a lock_seqid of zero. However the server, which didn't release the lock owner, will expect a larger lock_seqid and so will respond with NFS4ERR_BAD_SEQID. So clearly it is harmful to allow a false positive, which testing so_count allows. The test is nonsense because ... well... it doesn't mean anything. so_count is the sum of three different counts. 1/ the set of states listed on so_stateids 2/ the set of active vfs locks owned by any of those states 3/ various transient counts such as for conflicting locks. When it is tested against '2' it is clear that one of these is the transient reference obtained by find_lockowner_str_locked(). It is not clear what the other one is expected to be. In practice, the count is often 2 because there is precisely one state on so_stateids. If there were more, this would fail. In my testing I see two circumstances when RELEASE_LOCKOWNER is called. In one case, CLOSE is called before RELEASE_LOCKOWNER. That results in all the lock states being removed, and so the lockowner being discarded (it is removed when there are no more references which usually happens when the lock state is discarded). When nfsd4_release_lockowner() finds that the lock owner doesn't exist, it returns success. The other case shows an so_count of '2' and precisely one state listed in so_stateid. It appears that the Linux client uses a separate lock owner for each file resulting in one lock state per lock owner, so this test on '2' is safe. For another client it might not be safe. So this patch changes check_for_locks() to use the (newish) find_any_file_locked() so that it doesn't take a reference on the nfs4_file and so never calls nfsd_file_put(), and so never sleeps. With this check is it safe to restore the use of check_for_locks() rather than testing so_count against the mysterious '2'. Fixes: ce3c4ad7f4ce ("NFSD: Fix possible sleep during nfsd4_release_lockowner()") Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Cc: stable@vger.kernel.org # v6.2+ Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2fa54cfd4882..6dc6340e2852 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7911,14 +7911,16 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) { struct file_lock *fl; int status = false; - struct nfsd_file *nf = find_any_file(fp); + struct nfsd_file *nf; struct inode *inode; struct file_lock_context *flctx; + spin_lock(&fp->fi_lock); + nf = find_any_file_locked(fp); if (!nf) { /* Any valid lock stateid should have some sort of access */ WARN_ON_ONCE(1); - return status; + goto out; } inode = file_inode(nf->nf_file); @@ -7934,7 +7936,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) } spin_unlock(&flctx->flc_lock); } - nfsd_file_put(nf); +out: + spin_unlock(&fp->fi_lock); return status; } @@ -7944,10 +7947,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) * @cstate: NFSv4 COMPOUND state * @u: RELEASE_LOCKOWNER arguments * - * The lockowner's so_count is bumped when a lock record is added - * or when copying a conflicting lock. The latter case is brief, - * but can lead to fleeting false positives when looking for - * locks-in-use. + * Check if theree are any locks still held and if not - free the lockowner + * and any lock state that is owned. * * Return values: * %nfs_ok: lockowner released or not found @@ -7983,10 +7984,13 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, spin_unlock(&clp->cl_lock); return nfs_ok; } - if (atomic_read(&lo->lo_owner.so_count) != 2) { - spin_unlock(&clp->cl_lock); - nfs4_put_stateowner(&lo->lo_owner); - return nfserr_locks_held; + + list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { + if (check_for_locks(stp->st_stid.sc_file, lo)) { + spin_unlock(&clp->cl_lock); + nfs4_put_stateowner(&lo->lo_owner); + return nfserr_locks_held; + } } unhash_lockowner_locked(lo); while (!list_empty(&lo->lo_owner.so_stateids)) { From 2f8c7c3715f2c6fb51a4ecc0905c04dd78a3da29 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 24 Jan 2024 13:24:24 +0000 Subject: [PATCH 303/768] spi: Raise limit on number of chip selects As reported by Guenter the limit we've got on the number of chip selects is set too low for some systems, raise the limit. We should really remove the hard coded limit but this is needed as a fix so let's do the simple thing and raise the limit for now. Fixes: 4d8ff6b0991d ("spi: Add multi-cs memories support in SPI core") Reported-by: Guenter Roeck Suggested-by: Guenter Roeck Signed-off-by: Mark Brown Link: https://msgid.link/r/20240124-spi-multi-cs-max-v2-1-df6fc5ab1abc@kernel.org Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 471fe2ff9066..600fbd5daf68 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -21,7 +21,7 @@ #include /* Max no. of CS supported per spi device */ -#define SPI_CS_CNT_MAX 4 +#define SPI_CS_CNT_MAX 16 struct dma_chan; struct software_node; From 92b0b0ff0ba32e7b3f1e789cc96df1359db712ef Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 23 Jan 2024 14:13:40 -0800 Subject: [PATCH 304/768] nvme: add module description to stop warnings Add MODULE_DESCRIPTION() in order to remove warnings & get clean build:- WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/host/nvme-core.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/host/nvme.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/host/nvme-fabrics.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/host/nvme-rdma.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/host/nvme-fc.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/host/nvme-tcp.o Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 1 + drivers/nvme/host/fabrics.c | 1 + drivers/nvme/host/fc.c | 1 + drivers/nvme/host/pci.c | 1 + drivers/nvme/host/rdma.c | 1 + drivers/nvme/host/tcp.c | 1 + 6 files changed, 6 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 85ab0fcf9e88..0810be0d1154 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4851,5 +4851,6 @@ static void __exit nvme_core_exit(void) MODULE_LICENSE("GPL"); MODULE_VERSION("1.0"); +MODULE_DESCRIPTION("NVMe host core framework"); module_init(nvme_core_init); module_exit(nvme_core_exit); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index b5752a77ad98..373ed08e6b92 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -1488,6 +1488,7 @@ static void __exit nvmf_exit(void) } MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("NVMe host fabrics library"); module_init(nvmf_init); module_exit(nvmf_exit); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 16847a316421..e2308119f8f0 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -4004,4 +4004,5 @@ static void __exit nvme_fc_exit_module(void) module_init(nvme_fc_init_module); module_exit(nvme_fc_exit_module); +MODULE_DESCRIPTION("NVMe host FC transport driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c1d6357ec98a..25eb72779541 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3543,5 +3543,6 @@ static void __exit nvme_exit(void) MODULE_AUTHOR("Matthew Wilcox "); MODULE_LICENSE("GPL"); MODULE_VERSION("1.0"); +MODULE_DESCRIPTION("NVMe host PCIe transport driver"); module_init(nvme_init); module_exit(nvme_exit); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 11dde0d83044..0f48ead986ec 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2400,4 +2400,5 @@ static void __exit nvme_rdma_cleanup_module(void) module_init(nvme_rdma_init_module); module_exit(nvme_rdma_cleanup_module); +MODULE_DESCRIPTION("NVMe host RDMA transport driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index d058d990532b..4393cf244025 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2826,4 +2826,5 @@ static void __exit nvme_tcp_cleanup_module(void) module_init(nvme_tcp_init_module); module_exit(nvme_tcp_cleanup_module); +MODULE_DESCRIPTION("NVMe host TCP transport driver"); MODULE_LICENSE("GPL v2"); From 41951f83ef9044e906e11f5ea7db35a30dc9f581 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 23 Jan 2024 14:13:41 -0800 Subject: [PATCH 305/768] nvmet: add module description to stop warnings Add MODULE_DESCRIPTION() in order to remove warnings & get clean build:- WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/target/nvmet.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/target/nvme-loop.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/target/nvmet-rdma.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/target/nvmet-fc.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/target/nvme-fcloop.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/target/nvmet-tcp.o Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/core.c | 1 + drivers/nvme/target/fc.c | 1 + drivers/nvme/target/fcloop.c | 1 + drivers/nvme/target/loop.c | 1 + drivers/nvme/target/rdma.c | 1 + drivers/nvme/target/tcp.c | 1 + 6 files changed, 6 insertions(+) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index fa35e65915f0..8658e9c08534 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1705,4 +1705,5 @@ static void __exit nvmet_exit(void) module_init(nvmet_init); module_exit(nvmet_exit); +MODULE_DESCRIPTION("NVMe target core framework"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index bda7a3009e85..05e6a755b330 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -2945,4 +2945,5 @@ static void __exit nvmet_fc_exit_module(void) module_init(nvmet_fc_init_module); module_exit(nvmet_fc_exit_module); +MODULE_DESCRIPTION("NVMe target FC transport driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index ead349af30f1..c1faeb1e9e55 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -1650,4 +1650,5 @@ static void __exit fcloop_exit(void) module_init(fcloop_init); module_exit(fcloop_exit); +MODULE_DESCRIPTION("NVMe target FC loop transport driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 9cb434c58075..2b2e0d00af85 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -688,5 +688,6 @@ static void __exit nvme_loop_cleanup_module(void) module_init(nvme_loop_init_module); module_exit(nvme_loop_cleanup_module); +MODULE_DESCRIPTION("NVMe target loop transport driver"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("nvmet-transport-254"); /* 254 == NVMF_TRTYPE_LOOP */ diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 667f9c04f35d..3a0f2c170f4c 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -2104,5 +2104,6 @@ static void __exit nvmet_rdma_exit(void) module_init(nvmet_rdma_init); module_exit(nvmet_rdma_exit); +MODULE_DESCRIPTION("NVMe target RDMA transport driver"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("nvmet-transport-1"); /* 1 == NVMF_TRTYPE_RDMA */ diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 6a1e6bb80062..da0469978d6f 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -2221,5 +2221,6 @@ static void __exit nvmet_tcp_exit(void) module_init(nvmet_tcp_init); module_exit(nvmet_tcp_exit); +MODULE_DESCRIPTION("NVMe target TCP transport driver"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("nvmet-transport-3"); /* 3 == NVMF_TRTYPE_TCP */ From 7822baa844a87cbb93308c1032c3d47d4079bb8a Mon Sep 17 00:00:00 2001 From: Sean Young Date: Wed, 24 Jan 2024 15:15:24 +0000 Subject: [PATCH 306/768] ALSA: usb-audio: add quirk for RODE NT-USB+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RODE NT-USB+ is marketed as a professional usb microphone, however the usb audio interface is a mess: [ 1.130977] usb 1-5: new full-speed USB device number 2 using xhci_hcd [ 1.503906] usb 1-5: config 1 has an invalid interface number: 5 but max is 4 [ 1.503912] usb 1-5: config 1 has no interface number 4 [ 1.519689] usb 1-5: New USB device found, idVendor=19f7, idProduct=0035, bcdDevice= 1.09 [ 1.519695] usb 1-5: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [ 1.519697] usb 1-5: Product: RØDE NT-USB+ [ 1.519699] usb 1-5: Manufacturer: RØDE [ 1.519700] usb 1-5: SerialNumber: 1D773A1A [ 8.327495] usb 1-5: 1:1: cannot get freq at ep 0x82 [ 8.344500] usb 1-5: 1:2: cannot get freq at ep 0x82 [ 8.365499] usb 1-5: 2:1: cannot get freq at ep 0x2 Add QUIRK_FLAG_GET_SAMPLE_RATE to work around the broken sample rate get. I have asked Rode support to fix it, but they show no interest. Signed-off-by: Sean Young Cc: Link: https://lore.kernel.org/r/20240124151524.23314-1-sean@mess.org Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index be0ce1fc4f26..591c3a354696 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2183,6 +2183,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_FIXED_RATE), DEVICE_FLG(0x1bcf, 0x2283, /* NexiGo N930AF FHD Webcam */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x19f7, 0x0035, /* RODE NT-USB+ */ + QUIRK_FLAG_GET_SAMPLE_RATE), /* Vendor matches */ VENDOR_FLG(0x045e, /* MS Lifecam */ From 15ade5bfa5abf0e02249239db91d5438fa796d18 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Wed, 24 Jan 2024 12:16:27 +0000 Subject: [PATCH 307/768] nvme-rdma: Fix transfer length when write_generate/read_verify are 0 When the block layer doesn't generate/verify metadata, the SG length is smaller than the transfer length. This is because the SG length doesn't include the metadata length that is added by the HW on the wire. The target failes those commands with "Data SGL Length Invalid" by comparing the transfer length and the SG length. Fix it by adding the metadata length to the transfer length when there is no metadata SGL. The bug reproduces when setting read_verify/write_generate configs to 0 at the child multipath device or at the primary device when NVMe multipath is disabled. Note that setting those configs to 0 on the multipath device (ns_head) doesn't have any impact on the I/Os. Fixes: 5ec5d3bddc6b ("nvme-rdma: add metadata/T10-PI support") Signed-off-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/rdma.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 0f48ead986ec..3f393ee20281 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1410,6 +1410,8 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue, struct nvme_ns *ns = rq->q->queuedata; struct bio *bio = rq->bio; struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; + struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); + u32 xfer_len; int nr; req->mr = ib_mr_pool_get(queue->qp, &queue->qp->sig_mrs); @@ -1422,8 +1424,7 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue, if (unlikely(nr)) goto mr_put; - nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_bdev->bd_disk), c, - req->mr->sig_attrs, ns->head->pi_type); + nvme_rdma_set_sig_attrs(bi, c, req->mr->sig_attrs, ns->head->pi_type); nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask); ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey)); @@ -1441,7 +1442,11 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue, IB_ACCESS_REMOTE_WRITE; sg->addr = cpu_to_le64(req->mr->iova); - put_unaligned_le24(req->mr->length, sg->length); + xfer_len = req->mr->length; + /* Check if PI is added by the HW */ + if (!pi_count) + xfer_len += (xfer_len >> bi->interval_exp) * ns->head->pi_size; + put_unaligned_le24(xfer_len, sg->length); put_unaligned_le32(req->mr->rkey, sg->key); sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4; From e169bd4fb2b36c4b2bee63c35c740c85daeb2e86 Mon Sep 17 00:00:00 2001 From: Maksim Kiselev Date: Wed, 24 Jan 2024 10:24:36 +0300 Subject: [PATCH 308/768] aoe: avoid potential deadlock at set_capacity Move set_capacity() outside of the section procected by (&d->lock). To avoid possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- [1] lock(&bdev->bd_size_lock); local_irq_disable(); [2] lock(&d->lock); [3] lock(&bdev->bd_size_lock); [4] lock(&d->lock); *** DEADLOCK *** Where [1](&bdev->bd_size_lock) hold by zram_add()->set_capacity(). [2]lock(&d->lock) hold by aoeblk_gdalloc(). And aoeblk_gdalloc() is trying to acquire [3](&bdev->bd_size_lock) at set_capacity() call. In this situation an attempt to acquire [4]lock(&d->lock) from aoecmd_cfg_rsp() will lead to deadlock. So the simplest solution is breaking lock dependency [2](&d->lock) -> [3](&bdev->bd_size_lock) by moving set_capacity() outside. Signed-off-by: Maksim Kiselev Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240124072436.3745720-2-bigunclemax@gmail.com Signed-off-by: Jens Axboe --- drivers/block/aoe/aoeblk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index d2dbf8aaccb5..b1b47d88f5db 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -333,6 +333,7 @@ aoeblk_gdalloc(void *vp) struct gendisk *gd; mempool_t *mp; struct blk_mq_tag_set *set; + sector_t ssize; ulong flags; int late = 0; int err; @@ -396,7 +397,7 @@ aoeblk_gdalloc(void *vp) gd->minors = AOE_PARTITIONS; gd->fops = &aoe_bdops; gd->private_data = d; - set_capacity(gd, d->ssize); + ssize = d->ssize; snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", d->aoemajor, d->aoeminor); @@ -405,6 +406,8 @@ aoeblk_gdalloc(void *vp) spin_unlock_irqrestore(&d->lock, flags); + set_capacity(gd, ssize); + err = device_add_disk(NULL, gd, aoe_attr_groups); if (err) goto out_disk_cleanup; From 668abe6dc7b61941fa5c724c06797efb0b87f070 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 24 Jan 2024 16:53:07 +0100 Subject: [PATCH 309/768] ALSA: usb-audio: Sort quirk table entries The quirk table entries should be put in the USB ID order, but some entries have been put in random places. Re-sort them. Fixes: bf990c102319 ("ALSA: usb-audio: add quirk to fix Hamedal C20 disconnect issue") Fixes: fd28941cff1c ("ALSA: usb-audio: Add new quirk FIXED_RATE for JBL Quantum810 Wireless") Fixes: dfd5fe19db7d ("ALSA: usb-audio: Add FIXED_RATE quirk for JBL Quantum610 Wireless") Fixes: 4a63e68a2951 ("ALSA: usb-audio: Fix microphone sound on Nexigo webcam.") Fixes: 7822baa844a8 ("ALSA: usb-audio: add quirk for RODE NT-USB+") Fixes: 4fb7c24f69c4 ("ALSA: usb-audio: Add quirk for Fiero SC-01") Fixes: 2307a0e1ca0b ("ALSA: usb-audio: Add quirk for Fiero SC-01 (fw v1.0.0)") Link: https://lore.kernel.org/r/20240124155307.16996-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 591c3a354696..09712e61c606 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2037,6 +2037,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x04e8, 0xa051, /* Samsung USBC Headset (AKG) */ QUIRK_FLAG_SKIP_CLOCK_SELECTOR | QUIRK_FLAG_CTL_MSG_DELAY_5M), + DEVICE_FLG(0x0525, 0xa4ad, /* Hamedal C20 usb camero */ + QUIRK_FLAG_IFACE_SKIP_CLOSE), DEVICE_FLG(0x054c, 0x0b8c, /* Sony WALKMAN NW-A45 DAC */ QUIRK_FLAG_SET_IFACE_FIRST), DEVICE_FLG(0x0556, 0x0014, /* Phoenix Audio TMX320VC */ @@ -2083,8 +2085,14 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x0b0e, 0x0349, /* Jabra 550a */ QUIRK_FLAG_CTL_MSG_DELAY_1M), + DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */ + QUIRK_FLAG_FIXED_RATE), + DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */ + QUIRK_FLAG_FIXED_RATE), DEVICE_FLG(0x0fd9, 0x0008, /* Hauppauge HVR-950Q */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), + DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x1395, 0x740a, /* Sennheiser DECT */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x1397, 0x0507, /* Behringer UMC202HD */ @@ -2117,6 +2125,10 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY), DEVICE_FLG(0x1901, 0x0191, /* GE B850V3 CP2114 audio interface */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x19f7, 0x0035, /* RODE NT-USB+ */ + QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x1bcf, 0x2283, /* NexiGo N930AF FHD Webcam */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x2040, 0x7200, /* Hauppauge HVR-950Q */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x2040, 0x7201, /* Hauppauge HVR-950Q-MXL */ @@ -2159,6 +2171,12 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x2912, 0x30c8, /* Audioengine D1 */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x2b53, 0x0023, /* Fiero SC-01 (firmware v1.0.0 @ 48 kHz) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x2b53, 0x0024, /* Fiero SC-01 (firmware v1.0.0 @ 96 kHz) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */ @@ -2167,24 +2185,6 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x534d, 0x2109, /* MacroSilicon MS2109 */ QUIRK_FLAG_ALIGN_TRANSFER), - DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */ - QUIRK_FLAG_GET_SAMPLE_RATE), - DEVICE_FLG(0x2b53, 0x0023, /* Fiero SC-01 (firmware v1.0.0 @ 48 kHz) */ - QUIRK_FLAG_GENERIC_IMPLICIT_FB), - DEVICE_FLG(0x2b53, 0x0024, /* Fiero SC-01 (firmware v1.0.0 @ 96 kHz) */ - QUIRK_FLAG_GENERIC_IMPLICIT_FB), - DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ - QUIRK_FLAG_GENERIC_IMPLICIT_FB), - DEVICE_FLG(0x0525, 0xa4ad, /* Hamedal C20 usb camero */ - QUIRK_FLAG_IFACE_SKIP_CLOSE), - DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */ - QUIRK_FLAG_FIXED_RATE), - DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */ - QUIRK_FLAG_FIXED_RATE), - DEVICE_FLG(0x1bcf, 0x2283, /* NexiGo N930AF FHD Webcam */ - QUIRK_FLAG_GET_SAMPLE_RATE), - DEVICE_FLG(0x19f7, 0x0035, /* RODE NT-USB+ */ - QUIRK_FLAG_GET_SAMPLE_RATE), /* Vendor matches */ VENDOR_FLG(0x045e, /* MS Lifecam */ From d1b163aa0749706379055e40a52cf7a851abf9dc Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 23 Jan 2024 13:09:26 +0100 Subject: [PATCH 310/768] Revert "drivers/firmware: Move sysfb_init() from device_initcall to subsys_initcall_sync" This reverts commit 60aebc9559492cea6a9625f514a8041717e3a2e4. Commit 60aebc9559492cea ("drivers/firmware: Move sysfb_init() from device_initcall to subsys_initcall_sync") messes up initialization order of the graphics drivers and leads to blank displays on some systems. So revert the commit. To make the display drivers fully independent from initialization order requires to track framebuffer memory by device and independently from the loaded drivers. The kernel currently lacks the infrastructure to do so. Reported-by: Jaak Ristioja Closes: https://lore.kernel.org/dri-devel/ZUnNi3q3yB3zZfTl@P70.localdomain/T/#t Reported-by: Huacai Chen Closes: https://lore.kernel.org/dri-devel/20231108024613.2898921-1-chenhuacai@loongson.cn/ Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10133 Signed-off-by: Thomas Zimmermann Cc: Javier Martinez Canillas Cc: Thorsten Leemhuis Cc: Jani Nikula Cc: stable@vger.kernel.org # v6.5+ Reviewed-by: Javier Martinez Canillas Acked-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240123120937.27736-1-tzimmermann@suse.de --- drivers/firmware/sysfb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c index 82fcfd29bc4d..3c197db42c9d 100644 --- a/drivers/firmware/sysfb.c +++ b/drivers/firmware/sysfb.c @@ -128,4 +128,4 @@ unlock_mutex: } /* must execute after PCI subsystem for EFI quirks */ -subsys_initcall_sync(sysfb_init); +device_initcall(sysfb_init); From 520d9708979349dcf6e044eac51efc43788b5cec Mon Sep 17 00:00:00 2001 From: Brandon Brnich Date: Mon, 11 Dec 2023 14:59:19 -0600 Subject: [PATCH 311/768] dt-bindings: media: Remove K3 Family Prefix from Compatible K3 family prefix is not included in other TI compatible strings. Remove this prefix to keep naming convention consistent. Fixes: de4b9f7e371a ("dt-bindings: media: wave5: add yaml devicetree bindings") Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/all/CAMuHMdUYOq=q1j=d+Eac28hthOUAaNUkuvxmRu-mUN1pLKq69g@mail.gmail.com/ Signed-off-by: Brandon Brnich Acked-by: Krzysztof Kozlowski Reviewed-by: Nishanth Menon Signed-off-by: Hans Verkuil --- Documentation/devicetree/bindings/media/cnm,wave521c.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/media/cnm,wave521c.yaml b/Documentation/devicetree/bindings/media/cnm,wave521c.yaml index 6d5569e77b7a..6a11c1d11fb5 100644 --- a/Documentation/devicetree/bindings/media/cnm,wave521c.yaml +++ b/Documentation/devicetree/bindings/media/cnm,wave521c.yaml @@ -17,7 +17,7 @@ properties: compatible: items: - enum: - - ti,k3-j721s2-wave521c + - ti,j721s2-wave521c - const: cnm,wave521c reg: @@ -53,7 +53,7 @@ additionalProperties: false examples: - | vpu: video-codec@12345678 { - compatible = "ti,k3-j721s2-wave521c", "cnm,wave521c"; + compatible = "ti,j721s2-wave521c", "cnm,wave521c"; reg = <0x12345678 0x1000>; clocks = <&clks 42>; interrupts = <42>; From c14d17a32568679385d32fe7a23994560c12772c Mon Sep 17 00:00:00 2001 From: Brandon Brnich Date: Mon, 11 Dec 2023 14:59:20 -0600 Subject: [PATCH 312/768] media: chips-media: wave5: Remove K3 References Change compatible string to match dt bindings for TI devices. K3 family prefix should not be included as it deviates from naming convention. Fixes: 9707a6254a8a ("media: chips-media: wave5: Add the v4l2 layer") Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/all/CAMuHMdUYOq=q1j=d+Eac28hthOUAaNUkuvxmRu-mUN1pLKq69g@mail.gmail.com/ Signed-off-by: Brandon Brnich Reviewed-by: Nishanth Menon Signed-off-by: Hans Verkuil --- drivers/media/platform/chips-media/wave5/wave5-vpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpu.c b/drivers/media/platform/chips-media/wave5/wave5-vpu.c index bfe4caa79cc9..0d90b5820bef 100644 --- a/drivers/media/platform/chips-media/wave5/wave5-vpu.c +++ b/drivers/media/platform/chips-media/wave5/wave5-vpu.c @@ -272,7 +272,7 @@ static const struct wave5_match_data ti_wave521c_data = { }; static const struct of_device_id wave5_dt_ids[] = { - { .compatible = "ti,k3-j721s2-wave521c", .data = &ti_wave521c_data }, + { .compatible = "ti,j721s2-wave521c", .data = &ti_wave521c_data }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, wave5_dt_ids); From 78e23c3e914a5e678a20286fb09bc218017af89a Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Thu, 18 Jan 2024 13:14:52 +0100 Subject: [PATCH 313/768] media: media videobuf2: Stop direct calls to queue num_buffers field Use vb2_get_num_buffers() to avoid using queue num_buffers field directly. This allows us to change how the number of buffers is computed in the future. Fixes: d055a76c0065 ("media: core: Report the maximum possible number of buffers for the queue") Signed-off-by: Benjamin Gaignard Acked-by: Tomasz Figa Signed-off-by: Hans Verkuil --- drivers/media/common/videobuf2/videobuf2-core.c | 2 +- drivers/media/common/videobuf2/videobuf2-v4l2.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c index 41a832dd1426..b6bf8f232f48 100644 --- a/drivers/media/common/videobuf2/videobuf2-core.c +++ b/drivers/media/common/videobuf2/videobuf2-core.c @@ -989,7 +989,7 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, bool no_previous_buffers = !q_num_bufs; int ret = 0; - if (q->num_buffers == q->max_num_buffers) { + if (q_num_bufs == q->max_num_buffers) { dprintk(q, 1, "maximum number of buffers already allocated\n"); return -ENOBUFS; } diff --git a/drivers/media/common/videobuf2/videobuf2-v4l2.c b/drivers/media/common/videobuf2/videobuf2-v4l2.c index 54d572c3b515..6380155d8575 100644 --- a/drivers/media/common/videobuf2/videobuf2-v4l2.c +++ b/drivers/media/common/videobuf2/videobuf2-v4l2.c @@ -1029,7 +1029,7 @@ int vb2_ioctl_create_bufs(struct file *file, void *priv, int res = vb2_verify_memory_type(vdev->queue, p->memory, p->format.type); - p->index = vdev->queue->num_buffers; + p->index = vb2_get_num_buffers(vdev->queue); fill_buf_caps(vdev->queue, &p->capabilities); validate_memory_flags(vdev->queue, p->memory, &p->flags); /* From b32431b753217d8d45b018443b1a7aac215921fb Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 19 Jan 2024 10:03:23 +0100 Subject: [PATCH 314/768] media: vb2: refactor setting flags and caps, fix missing cap Several functions implementing VIDIOC_REQBUFS and _CREATE_BUFS all use almost the same code to fill in the flags and capability fields. Refactor this into a new vb2_set_flags_and_caps() function that replaces the old fill_buf_caps() and validate_memory_flags() functions. This also fixes a bug where vb2_ioctl_create_bufs() would not set the V4L2_BUF_CAP_SUPPORTS_MAX_NUM_BUFFERS cap and also not fill in the max_num_buffers field. Fixes: d055a76c0065 ("media: core: Report the maximum possible number of buffers for the queue") Signed-off-by: Hans Verkuil Reviewed-by: Benjamin Gaignard Acked-by: Tomasz Figa --- .../media/common/videobuf2/videobuf2-v4l2.c | 53 +++++++++---------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/drivers/media/common/videobuf2/videobuf2-v4l2.c b/drivers/media/common/videobuf2/videobuf2-v4l2.c index 6380155d8575..c575198e8354 100644 --- a/drivers/media/common/videobuf2/videobuf2-v4l2.c +++ b/drivers/media/common/videobuf2/videobuf2-v4l2.c @@ -671,8 +671,20 @@ int vb2_querybuf(struct vb2_queue *q, struct v4l2_buffer *b) } EXPORT_SYMBOL(vb2_querybuf); -static void fill_buf_caps(struct vb2_queue *q, u32 *caps) +static void vb2_set_flags_and_caps(struct vb2_queue *q, u32 memory, + u32 *flags, u32 *caps, u32 *max_num_bufs) { + if (!q->allow_cache_hints || memory != V4L2_MEMORY_MMAP) { + /* + * This needs to clear V4L2_MEMORY_FLAG_NON_COHERENT only, + * but in order to avoid bugs we zero out all bits. + */ + *flags = 0; + } else { + /* Clear all unknown flags. */ + *flags &= V4L2_MEMORY_FLAG_NON_COHERENT; + } + *caps = V4L2_BUF_CAP_SUPPORTS_ORPHANED_BUFS; if (q->io_modes & VB2_MMAP) *caps |= V4L2_BUF_CAP_SUPPORTS_MMAP; @@ -686,21 +698,9 @@ static void fill_buf_caps(struct vb2_queue *q, u32 *caps) *caps |= V4L2_BUF_CAP_SUPPORTS_MMAP_CACHE_HINTS; if (q->supports_requests) *caps |= V4L2_BUF_CAP_SUPPORTS_REQUESTS; -} - -static void validate_memory_flags(struct vb2_queue *q, - int memory, - u32 *flags) -{ - if (!q->allow_cache_hints || memory != V4L2_MEMORY_MMAP) { - /* - * This needs to clear V4L2_MEMORY_FLAG_NON_COHERENT only, - * but in order to avoid bugs we zero out all bits. - */ - *flags = 0; - } else { - /* Clear all unknown flags. */ - *flags &= V4L2_MEMORY_FLAG_NON_COHERENT; + if (max_num_bufs) { + *max_num_bufs = q->max_num_buffers; + *caps |= V4L2_BUF_CAP_SUPPORTS_MAX_NUM_BUFFERS; } } @@ -709,8 +709,8 @@ int vb2_reqbufs(struct vb2_queue *q, struct v4l2_requestbuffers *req) int ret = vb2_verify_memory_type(q, req->memory, req->type); u32 flags = req->flags; - fill_buf_caps(q, &req->capabilities); - validate_memory_flags(q, req->memory, &flags); + vb2_set_flags_and_caps(q, req->memory, &flags, + &req->capabilities, NULL); req->flags = flags; return ret ? ret : vb2_core_reqbufs(q, req->memory, req->flags, &req->count); @@ -751,11 +751,9 @@ int vb2_create_bufs(struct vb2_queue *q, struct v4l2_create_buffers *create) int ret = vb2_verify_memory_type(q, create->memory, f->type); unsigned i; - fill_buf_caps(q, &create->capabilities); - validate_memory_flags(q, create->memory, &create->flags); create->index = vb2_get_num_buffers(q); - create->max_num_buffers = q->max_num_buffers; - create->capabilities |= V4L2_BUF_CAP_SUPPORTS_MAX_NUM_BUFFERS; + vb2_set_flags_and_caps(q, create->memory, &create->flags, + &create->capabilities, &create->max_num_buffers); if (create->count == 0) return ret != -EBUSY ? ret : 0; @@ -1006,8 +1004,8 @@ int vb2_ioctl_reqbufs(struct file *file, void *priv, int res = vb2_verify_memory_type(vdev->queue, p->memory, p->type); u32 flags = p->flags; - fill_buf_caps(vdev->queue, &p->capabilities); - validate_memory_flags(vdev->queue, p->memory, &flags); + vb2_set_flags_and_caps(vdev->queue, p->memory, &flags, + &p->capabilities, NULL); p->flags = flags; if (res) return res; @@ -1026,12 +1024,11 @@ int vb2_ioctl_create_bufs(struct file *file, void *priv, struct v4l2_create_buffers *p) { struct video_device *vdev = video_devdata(file); - int res = vb2_verify_memory_type(vdev->queue, p->memory, - p->format.type); + int res = vb2_verify_memory_type(vdev->queue, p->memory, p->format.type); p->index = vb2_get_num_buffers(vdev->queue); - fill_buf_caps(vdev->queue, &p->capabilities); - validate_memory_flags(vdev->queue, p->memory, &p->flags); + vb2_set_flags_and_caps(vdev->queue, p->memory, &p->flags, + &p->capabilities, &p->max_num_buffers); /* * If count == 0, then just check if memory and type are valid. * Any -EBUSY result from vb2_verify_memory_type can be mapped to 0. From e787644caf7628ad3269c1fbd321c3255cf51710 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 19 Dec 2023 00:19:15 +0100 Subject: [PATCH 315/768] rcu: Defer RCU kthreads wakeup when CPU is dying When the CPU goes idle for the last time during the CPU down hotplug process, RCU reports a final quiescent state for the current CPU. If this quiescent state propagates up to the top, some tasks may then be woken up to complete the grace period: the main grace period kthread and/or the expedited main workqueue (or kworker). If those kthreads have a SCHED_FIFO policy, the wake up can indirectly arm the RT bandwith timer to the local offline CPU. Since this happens after hrtimers have been migrated at CPUHP_AP_HRTIMERS_DYING stage, the timer gets ignored. Therefore if the RCU kthreads are waiting for RT bandwidth to be available, they may never be actually scheduled. This triggers TREE03 rcutorture hangs: rcu: INFO: rcu_preempt self-detected stall on CPU rcu: 4-...!: (1 GPs behind) idle=9874/1/0x4000000000000000 softirq=0/0 fqs=20 rcuc=21071 jiffies(starved) rcu: (t=21035 jiffies g=938281 q=40787 ncpus=6) rcu: rcu_preempt kthread starved for 20964 jiffies! g938281 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=0 rcu: Unless rcu_preempt kthread gets sufficient CPU time, OOM is now expected behavior. rcu: RCU grace-period kthread stack dump: task:rcu_preempt state:R running task stack:14896 pid:14 tgid:14 ppid:2 flags:0x00004000 Call Trace: __schedule+0x2eb/0xa80 schedule+0x1f/0x90 schedule_timeout+0x163/0x270 ? __pfx_process_timeout+0x10/0x10 rcu_gp_fqs_loop+0x37c/0x5b0 ? __pfx_rcu_gp_kthread+0x10/0x10 rcu_gp_kthread+0x17c/0x200 kthread+0xde/0x110 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2b/0x40 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 The situation can't be solved with just unpinning the timer. The hrtimer infrastructure and the nohz heuristics involved in finding the best remote target for an unpinned timer would then also need to handle enqueues from an offline CPU in the most horrendous way. So fix this on the RCU side instead and defer the wake up to an online CPU if it's too late for the local one. Reported-by: Paul E. McKenney Fixes: 5c0930ccaad5 ("hrtimers: Push pending hrtimers away from outgoing CPU earlier") Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- kernel/rcu/tree.c | 34 +++++++++++++++++++++++++++++++++- kernel/rcu/tree_exp.h | 3 +-- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 1ae851777806..b2bccfd37c38 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1013,6 +1013,38 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp) return needmore; } +static void swake_up_one_online_ipi(void *arg) +{ + struct swait_queue_head *wqh = arg; + + swake_up_one(wqh); +} + +static void swake_up_one_online(struct swait_queue_head *wqh) +{ + int cpu = get_cpu(); + + /* + * If called from rcutree_report_cpu_starting(), wake up + * is dangerous that late in the CPU-down hotplug process. The + * scheduler might queue an ignored hrtimer. Defer the wake up + * to an online CPU instead. + */ + if (unlikely(cpu_is_offline(cpu))) { + int target; + + target = cpumask_any_and(housekeeping_cpumask(HK_TYPE_RCU), + cpu_online_mask); + + smp_call_function_single(target, swake_up_one_online_ipi, + wqh, 0); + put_cpu(); + } else { + put_cpu(); + swake_up_one(wqh); + } +} + /* * Awaken the grace-period kthread. Don't do a self-awaken (unless in an * interrupt or softirq handler, in which case we just might immediately @@ -1037,7 +1069,7 @@ static void rcu_gp_kthread_wake(void) return; WRITE_ONCE(rcu_state.gp_wake_time, jiffies); WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq)); - swake_up_one(&rcu_state.gp_wq); + swake_up_one_online(&rcu_state.gp_wq); } /* diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 6d7cea5d591f..2ac440bc7e10 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -173,7 +173,6 @@ static bool sync_rcu_exp_done_unlocked(struct rcu_node *rnp) return ret; } - /* * Report the exit from RCU read-side critical section for the last task * that queued itself during or before the current expedited preemptible-RCU @@ -201,7 +200,7 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp, raw_spin_unlock_irqrestore_rcu_node(rnp, flags); if (wake) { smp_mb(); /* EGP done before wake_up(). */ - swake_up_one(&rcu_state.expedited_wq); + swake_up_one_online(&rcu_state.expedited_wq); } break; } From f9f4b0c6425eb9ffd9bf62b8b8143e786b6ba695 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 24 Jan 2024 17:41:01 +0000 Subject: [PATCH 316/768] spi: cs42l43: Handle error from devm_pm_runtime_enable As it devm_pm_runtime_enable can fail due to memory allocations, it is best to handle the error. Suggested-by: Andy Shevchenko Signed-off-by: Charles Keepax Link: https://msgid.link/r/20240124174101.2270249-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/spi/spi-cs42l43.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-cs42l43.c b/drivers/spi/spi-cs42l43.c index f13073e12593..b24190526ce9 100644 --- a/drivers/spi/spi-cs42l43.c +++ b/drivers/spi/spi-cs42l43.c @@ -244,7 +244,10 @@ static int cs42l43_spi_probe(struct platform_device *pdev) priv->ctlr->use_gpio_descriptors = true; priv->ctlr->auto_runtime_pm = true; - devm_pm_runtime_enable(priv->dev); + ret = devm_pm_runtime_enable(priv->dev); + if (ret) + return ret; + pm_runtime_idle(priv->dev); regmap_write(priv->regmap, CS42L43_TRAN_CONFIG6, CS42L43_FIFO_SIZE - 1); From b253d87fd78bf8d3e7efc5d149147765f044e89d Mon Sep 17 00:00:00 2001 From: George Guo Date: Tue, 26 Dec 2023 17:42:42 +0800 Subject: [PATCH 317/768] netfilter: nf_tables: cleanup documentation - Correct comments for nlpid, family, udlen and udata in struct nft_table, and afinfo is no longer a member of enum nft_set_class. - Add comment for data in struct nft_set_elem. - Add comment for flags in struct nft_ctx. - Add comments for timeout in struct nft_set_iter, and flags is not a member of struct nft_set_iter, remove the comment for it. - Add comments for commit, abort, estimate and gc_init in struct nft_set_ops. - Add comments for pending_update, num_exprs, exprs and catchall_list in struct nft_set. - Add comment for ext_len in struct nft_set_ext_tmpl. - Add comment for inner_ops in struct nft_expr_type. - Add comments for clone, destroy_clone, reduce, gc, offload, offload_action, offload_stats in struct nft_expr_ops. - Add comments for blob_gen_0, blob_gen_1, bound, genmask, udlen, udata, blob_next in struct nft_chain. - Add comment for flags in struct nft_base_chain. - Add comments for udlen, udata in struct nft_object. - Add comment for type in struct nft_object_ops. - Add comment for hook_list in struct nft_flowtable, and remove comments for dev_name and ops which are not members of struct nft_flowtable. Signed-off-by: George Guo Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 49 ++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index b157c5cafd14..4e1ea18eb5f0 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -205,6 +205,7 @@ static inline void nft_data_copy(u32 *dst, const struct nft_data *src, * @nla: netlink attributes * @portid: netlink portID of the original message * @seq: netlink sequence number + * @flags: modifiers to new request * @family: protocol family * @level: depth of the chains * @report: notify via unicast netlink message @@ -282,6 +283,7 @@ struct nft_elem_priv { }; * * @key: element key * @key_end: closing element key + * @data: element data * @priv: element private data and extensions */ struct nft_set_elem { @@ -325,10 +327,10 @@ struct nft_set_iter { * @dtype: data type * @dlen: data length * @objtype: object type - * @flags: flags * @size: number of set elements * @policy: set policy * @gc_int: garbage collector interval + * @timeout: element timeout * @field_len: length of each field in concatenation, bytes * @field_count: number of concatenated fields in element * @expr: set must support for expressions @@ -351,9 +353,9 @@ struct nft_set_desc { /** * enum nft_set_class - performance class * - * @NFT_LOOKUP_O_1: constant, O(1) - * @NFT_LOOKUP_O_LOG_N: logarithmic, O(log N) - * @NFT_LOOKUP_O_N: linear, O(N) + * @NFT_SET_CLASS_O_1: constant, O(1) + * @NFT_SET_CLASS_O_LOG_N: logarithmic, O(log N) + * @NFT_SET_CLASS_O_N: linear, O(N) */ enum nft_set_class { NFT_SET_CLASS_O_1, @@ -422,9 +424,13 @@ struct nft_set_ext; * @remove: remove element from set * @walk: iterate over all set elements * @get: get set elements + * @commit: commit set elements + * @abort: abort set elements * @privsize: function to return size of set private data + * @estimate: estimate the required memory size and the lookup complexity class * @init: initialize private data of new set instance * @destroy: destroy private data of set instance + * @gc_init: initialize garbage collection * @elemsize: element private size * * Operations lookup, update and delete have simpler interfaces, are faster @@ -540,13 +546,16 @@ struct nft_set_elem_expr { * @policy: set parameterization (see enum nft_set_policies) * @udlen: user data length * @udata: user data - * @expr: stateful expression + * @pending_update: list of pending update set element * @ops: set ops * @flags: set flags * @dead: set will be freed, never cleared * @genmask: generation mask * @klen: key length * @dlen: data length + * @num_exprs: numbers of exprs + * @exprs: stateful expression + * @catchall_list: list of catch-all set element * @data: private set data */ struct nft_set { @@ -692,6 +701,7 @@ extern const struct nft_set_ext_type nft_set_ext_types[]; * * @len: length of extension area * @offset: offsets of individual extension types + * @ext_len: length of the expected extension(used to sanity check) */ struct nft_set_ext_tmpl { u16 len; @@ -840,6 +850,7 @@ struct nft_expr_ops; * @select_ops: function to select nft_expr_ops * @release_ops: release nft_expr_ops * @ops: default ops, used when no select_ops functions is present + * @inner_ops: inner ops, used for inner packet operation * @list: used internally * @name: Identifier * @owner: module reference @@ -881,14 +892,22 @@ struct nft_offload_ctx; * struct nft_expr_ops - nf_tables expression operations * * @eval: Expression evaluation function + * @clone: Expression clone function * @size: full expression size, including private data size * @init: initialization function * @activate: activate expression in the next generation * @deactivate: deactivate expression in next generation * @destroy: destruction function, called after synchronize_rcu + * @destroy_clone: destruction clone function * @dump: function to dump parameters - * @type: expression type * @validate: validate expression, called during loop detection + * @reduce: reduce expression + * @gc: garbage collection expression + * @offload: hardware offload expression + * @offload_action: function to report true/false to allocate one slot or not in the flow + * offload array + * @offload_stats: function to synchronize hardware stats via updating the counter expression + * @type: expression type * @data: extra data to attach to this expression operation */ struct nft_expr_ops { @@ -1041,14 +1060,21 @@ struct nft_rule_blob { /** * struct nft_chain - nf_tables chain * + * @blob_gen_0: rule blob pointer to the current generation + * @blob_gen_1: rule blob pointer to the future generation * @rules: list of rules in the chain * @list: used internally * @rhlhead: used internally * @table: table that this chain belongs to * @handle: chain handle * @use: number of jump references to this chain - * @flags: bitmask of enum nft_chain_flags + * @flags: bitmask of enum NFTA_CHAIN_FLAGS + * @bound: bind or not + * @genmask: generation mask * @name: name of the chain + * @udlen: user data length + * @udata: user data in the chain + * @blob_next: rule blob pointer to the next in the chain */ struct nft_chain { struct nft_rule_blob __rcu *blob_gen_0; @@ -1146,6 +1172,7 @@ struct nft_hook { * @hook_list: list of netfilter hooks (for NFPROTO_NETDEV family) * @type: chain type * @policy: default policy + * @flags: indicate the base chain disabled or not * @stats: per-cpu chain stats * @chain: the chain * @flow_block: flow block (for hardware offload) @@ -1274,11 +1301,13 @@ struct nft_object_hash_key { * struct nft_object - nf_tables stateful object * * @list: table stateful object list node - * @key: keys that identify this object * @rhlhead: nft_objname_ht node + * @key: keys that identify this object * @genmask: generation mask * @use: number of references to this stateful object * @handle: unique object handle + * @udlen: length of user data + * @udata: user data * @ops: object operations * @data: object data, layout depends on type */ @@ -1344,6 +1373,7 @@ struct nft_object_type { * @destroy: release existing stateful object * @dump: netlink dump stateful object * @update: update stateful object + * @type: pointer to object type */ struct nft_object_ops { void (*eval)(struct nft_object *obj, @@ -1379,9 +1409,8 @@ void nft_unregister_obj(struct nft_object_type *obj_type); * @genmask: generation mask * @use: number of references to this flow table * @handle: unique object handle - * @dev_name: array of device names + * @hook_list: hook list for hooks per net_device in flowtables * @data: rhashtable and garbage collector - * @ops: array of hooks */ struct nft_flowtable { struct list_head list; From 01acb2e8666a6529697141a6017edbf206921913 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 18 Jan 2024 10:56:26 +0100 Subject: [PATCH 318/768] netfilter: nft_chain_filter: handle NETDEV_UNREGISTER for inet/ingress basechain Remove netdevice from inet/ingress basechain in case NETDEV_UNREGISTER event is reported, otherwise a stale reference to netdevice remains in the hook list. Fixes: 60a3815da702 ("netfilter: add inet ingress support") Cc: stable@vger.kernel.org Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_chain_filter.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 680fe557686e..274b6f7e6bb5 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -357,9 +357,10 @@ static int nf_tables_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct nft_base_chain *basechain; struct nftables_pernet *nft_net; - struct nft_table *table; struct nft_chain *chain, *nr; + struct nft_table *table; struct nft_ctx ctx = { .net = dev_net(dev), }; @@ -371,7 +372,8 @@ static int nf_tables_netdev_event(struct notifier_block *this, nft_net = nft_pernet(ctx.net); mutex_lock(&nft_net->commit_mutex); list_for_each_entry(table, &nft_net->tables, list) { - if (table->family != NFPROTO_NETDEV) + if (table->family != NFPROTO_NETDEV && + table->family != NFPROTO_INET) continue; ctx.family = table->family; @@ -380,6 +382,11 @@ static int nf_tables_netdev_event(struct notifier_block *this, if (!nft_is_base_chain(chain)) continue; + basechain = nft_base_chain(chain); + if (table->family == NFPROTO_INET && + basechain->ops.hooknum != NF_INET_INGRESS) + continue; + ctx.chain = chain; nft_netdev_event(event, dev, &ctx); } From c9d9eb9c53d37cdebbad56b91e40baf42d5a97aa Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 19 Jan 2024 13:11:32 +0100 Subject: [PATCH 319/768] netfilter: nft_limit: reject configurations that cause integer overflow Reject bogus configs where internal token counter wraps around. This only occurs with very very large requests, such as 17gbyte/s. Its better to reject this rather than having incorrect ratelimit. Fixes: d2168e849ebf ("netfilter: nft_limit: add per-byte limiting") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_limit.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 79039afde34e..cefa25e0dbb0 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -58,17 +58,19 @@ static inline bool nft_limit_eval(struct nft_limit_priv *priv, u64 cost) static int nft_limit_init(struct nft_limit_priv *priv, const struct nlattr * const tb[], bool pkts) { + u64 unit, tokens, rate_with_burst; bool invert = false; - u64 unit, tokens; if (tb[NFTA_LIMIT_RATE] == NULL || tb[NFTA_LIMIT_UNIT] == NULL) return -EINVAL; priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE])); + if (priv->rate == 0) + return -EINVAL; + unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT])); - priv->nsecs = unit * NSEC_PER_SEC; - if (priv->rate == 0 || priv->nsecs < unit) + if (check_mul_overflow(unit, NSEC_PER_SEC, &priv->nsecs)) return -EOVERFLOW; if (tb[NFTA_LIMIT_BURST]) @@ -77,18 +79,25 @@ static int nft_limit_init(struct nft_limit_priv *priv, if (pkts && priv->burst == 0) priv->burst = NFT_LIMIT_PKT_BURST_DEFAULT; - if (priv->rate + priv->burst < priv->rate) + if (check_add_overflow(priv->rate, priv->burst, &rate_with_burst)) return -EOVERFLOW; if (pkts) { - tokens = div64_u64(priv->nsecs, priv->rate) * priv->burst; + u64 tmp = div64_u64(priv->nsecs, priv->rate); + + if (check_mul_overflow(tmp, priv->burst, &tokens)) + return -EOVERFLOW; } else { + u64 tmp; + /* The token bucket size limits the number of tokens can be * accumulated. tokens_max specifies the bucket size. * tokens_max = unit * (rate + burst) / rate. */ - tokens = div64_u64(priv->nsecs * (priv->rate + priv->burst), - priv->rate); + if (check_mul_overflow(priv->nsecs, rate_with_burst, &tmp)) + return -EOVERFLOW; + + tokens = div64_u64(tmp, priv->rate); } if (tb[NFTA_LIMIT_FLAGS]) { From b462579b2b86a8f5230543cadd3a4836be27baf7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 19 Jan 2024 13:34:32 +0100 Subject: [PATCH 320/768] netfilter: nf_tables: restrict anonymous set and map names to 16 bytes nftables has two types of sets/maps, one where userspace defines the name, and anonymous sets/maps, where userspace defines a template name. For the latter, kernel requires presence of exactly one "%d". nftables uses "__set%d" and "__map%d" for this. The kernel will expand the format specifier and replaces it with the smallest unused number. As-is, userspace could define a template name that allows to move the set name past the 256 bytes upperlimit (post-expansion). I don't see how this could be a problem, but I would prefer if userspace cannot do this, so add a limit of 16 bytes for the '%d' template name. 16 bytes is the old total upper limit for set names that existed when nf_tables was merged initially. Fixes: 387454901bd6 ("netfilter: nf_tables: Allow set names of up to 255 chars") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4b55533ce5ca..02f45424644b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -24,6 +24,7 @@ #include #define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-")) +#define NFT_SET_MAX_ANONLEN 16 unsigned int nf_tables_net_id __read_mostly; @@ -4413,6 +4414,9 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, if (p[1] != 'd' || strchr(p + 2, '%')) return -EINVAL; + if (strnlen(name, NFT_SET_MAX_ANONLEN) >= NFT_SET_MAX_ANONLEN) + return -EINVAL; + inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL); if (inuse == NULL) return -ENOMEM; From f342de4e2f33e0e39165d8639387aa6c19dff660 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 20 Jan 2024 22:50:04 +0100 Subject: [PATCH 321/768] netfilter: nf_tables: reject QUEUE/DROP verdict parameters This reverts commit e0abdadcc6e1. core.c:nf_hook_slow assumes that the upper 16 bits of NF_DROP verdicts contain a valid errno, i.e. -EPERM, -EHOSTUNREACH or similar, or 0. Due to the reverted commit, its possible to provide a positive value, e.g. NF_ACCEPT (1), which results in use-after-free. Its not clear to me why this commit was made. NF_QUEUE is not used by nftables; "queue" rules in nftables will result in use of "nft_queue" expression. If we later need to allow specifiying errno values from userspace (do not know why), this has to call NF_DROP_GETERR and check that "err <= 0" holds true. Fixes: e0abdadcc6e1 ("netfilter: nf_tables: accept QUEUE/DROP verdict parameters") Cc: stable@vger.kernel.org Reported-by: Notselwyn Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 02f45424644b..c537104411e7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -10992,16 +10992,10 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); switch (data->verdict.code) { - default: - switch (data->verdict.code & NF_VERDICT_MASK) { - case NF_ACCEPT: - case NF_DROP: - case NF_QUEUE: - break; - default: - return -EINVAL; - } - fallthrough; + case NF_ACCEPT: + case NF_DROP: + case NF_QUEUE: + break; case NFT_CONTINUE: case NFT_BREAK: case NFT_RETURN: @@ -11036,6 +11030,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, data->verdict.chain = chain; break; + default: + return -EINVAL; } desc->len = sizeof(data->verdict); From d0009effa8862c20a13af4cb7475d9771b905693 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 23 Jan 2024 16:38:25 +0100 Subject: [PATCH 322/768] netfilter: nf_tables: validate NFPROTO_* family Several expressions explicitly refer to NF_INET_* hook definitions from expr->ops->validate, however, family is not validated. Bail out with EOPNOTSUPP in case they are used from unsupported families. Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") Fixes: a3c90f7a2323 ("netfilter: nf_tables: flow offload expression") Fixes: 2fa841938c64 ("netfilter: nf_tables: introduce routing expression") Fixes: 554ced0a6e29 ("netfilter: nf_tables: add support for native socket matching") Fixes: ad49d86e07a4 ("netfilter: nf_tables: Add synproxy support") Fixes: 4ed8eb6570a4 ("netfilter: nf_tables: Add native tproxy support") Fixes: 6c47260250fc ("netfilter: nf_tables: add xfrm expression") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 12 ++++++++++++ net/netfilter/nft_flow_offload.c | 5 +++++ net/netfilter/nft_nat.c | 5 +++++ net/netfilter/nft_rt.c | 5 +++++ net/netfilter/nft_socket.c | 5 +++++ net/netfilter/nft_synproxy.c | 7 +++++-- net/netfilter/nft_tproxy.c | 5 +++++ net/netfilter/nft_xfrm.c | 5 +++++ 8 files changed, 47 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 5284cd2ad532..f0eeda97bfcd 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -350,6 +350,12 @@ static int nft_target_validate(const struct nft_ctx *ctx, unsigned int hook_mask = 0; int ret; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_BRIDGE && + ctx->family != NFPROTO_ARP) + return -EOPNOTSUPP; + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); @@ -595,6 +601,12 @@ static int nft_match_validate(const struct nft_ctx *ctx, unsigned int hook_mask = 0; int ret; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_BRIDGE && + ctx->family != NFPROTO_ARP) + return -EOPNOTSUPP; + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index ab3362c483b4..397351fa4d5f 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -384,6 +384,11 @@ static int nft_flow_offload_validate(const struct nft_ctx *ctx, { unsigned int hook_mask = (1 << NF_INET_FORWARD); + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + return nft_chain_validate_hooks(ctx->chain, hook_mask); } diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 583885ce7232..808f5802c270 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -143,6 +143,11 @@ static int nft_nat_validate(const struct nft_ctx *ctx, struct nft_nat *priv = nft_expr_priv(expr); int err; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); if (err < 0) return err; diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index 35a2c28caa60..24d977138572 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -166,6 +166,11 @@ static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *exp const struct nft_rt *priv = nft_expr_priv(expr); unsigned int hooks; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + switch (priv->key) { case NFT_RT_NEXTHOP4: case NFT_RT_NEXTHOP6: diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 9ed85be79452..f30163e2ca62 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -242,6 +242,11 @@ static int nft_socket_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) { + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index 13da882669a4..1d737f89dfc1 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -186,7 +186,6 @@ static int nft_synproxy_do_init(const struct nft_ctx *ctx, break; #endif case NFPROTO_INET: - case NFPROTO_BRIDGE: err = nf_synproxy_ipv4_init(snet, ctx->net); if (err) goto nf_ct_failure; @@ -219,7 +218,6 @@ static void nft_synproxy_do_destroy(const struct nft_ctx *ctx) break; #endif case NFPROTO_INET: - case NFPROTO_BRIDGE: nf_synproxy_ipv4_fini(snet, ctx->net); nf_synproxy_ipv6_fini(snet, ctx->net); break; @@ -253,6 +251,11 @@ static int nft_synproxy_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) { + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD)); } diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index ae15cd693f0e..71412adb73d4 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -316,6 +316,11 @@ static int nft_tproxy_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) { + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING); } diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c index 452f8587adda..1c866757db55 100644 --- a/net/netfilter/nft_xfrm.c +++ b/net/netfilter/nft_xfrm.c @@ -235,6 +235,11 @@ static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *e const struct nft_xfrm *priv = nft_expr_priv(expr); unsigned int hooks; + if (ctx->family != NFPROTO_IPV4 && + ctx->family != NFPROTO_IPV6 && + ctx->family != NFPROTO_INET) + return -EOPNOTSUPP; + switch (priv->dir) { case XFRM_POLICY_IN: hooks = (1 << NF_INET_FORWARD) | From 4759ff71f23e1a9cba001009abab68cde6dc327a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 24 Jan 2024 11:22:32 -0800 Subject: [PATCH 323/768] exec: Check __FMODE_EXEC instead of in_execve for LSMs After commit 978ffcbf00d8 ("execve: open the executable file before doing anything else"), current->in_execve was no longer in sync with the open(). This broke AppArmor and TOMOYO which depend on this flag to distinguish "open" operations from being "exec" operations. Instead of moving around in_execve, switch to using __FMODE_EXEC, which is where the "is this an exec?" intent is stored. Note that TOMOYO still uses in_execve around cred handling. Reported-by: Kevin Locke Closes: https://lore.kernel.org/all/ZbE4qn9_h14OqADK@kevinlocke.name Suggested-by: Linus Torvalds Fixes: 978ffcbf00d8 ("execve: open the executable file before doing anything else") Cc: Josh Triplett Cc: John Johansen Cc: Paul Moore Cc: James Morris Cc: Serge E. Hallyn Cc: Kentaro Takeda Cc: Tetsuo Handa Cc: Alexander Viro Cc: Christian Brauner Cc: Jan Kara Cc: Eric Biederman Cc: Andrew Morton Cc: Sebastian Andrzej Siewior Cc: Cc: Cc: Cc: Signed-off-by: Kees Cook Signed-off-by: Linus Torvalds --- security/apparmor/lsm.c | 4 +++- security/tomoyo/tomoyo.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 7717354ce095..98e1150bee9d 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -469,8 +469,10 @@ static int apparmor_file_open(struct file *file) * Cache permissions granted by the previous exec check, with * implicit read and executable mmap which are required to * actually execute the image. + * + * Illogically, FMODE_EXEC is in f_flags, not f_mode. */ - if (current->in_execve) { + if (file->f_flags & __FMODE_EXEC) { fctx->allow = MAY_EXEC | MAY_READ | AA_EXEC_MMAP; return 0; } diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 3c3af149bf1c..04a92c3d65d4 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -328,7 +328,8 @@ static int tomoyo_file_fcntl(struct file *file, unsigned int cmd, static int tomoyo_file_open(struct file *f) { /* Don't check read permission here if called from execve(). */ - if (current->in_execve) + /* Illogically, FMODE_EXEC is in f_flags, not f_mode. */ + if (f->f_flags & __FMODE_EXEC) return 0; return tomoyo_check_open_permission(tomoyo_domain(), &f->f_path, f->f_flags); From 90383cc07895183c75a0db2460301c2ffd912359 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 24 Jan 2024 11:15:33 -0800 Subject: [PATCH 324/768] exec: Distinguish in_execve from in_exec Just to help distinguish the fs->in_exec flag from the current->in_execve flag, add comments in check_unsafe_exec() and copy_fs() for more context. Also note that in_execve is only used by TOMOYO now. Cc: Kentaro Takeda Cc: Tetsuo Handa Cc: Alexander Viro Cc: Christian Brauner Cc: Jan Kara Cc: Eric Biederman Cc: Andrew Morton Cc: Sebastian Andrzej Siewior Cc: linux-fsdevel@vger.kernel.org Cc: linux-mm@kvack.org Signed-off-by: Kees Cook --- fs/exec.c | 1 + include/linux/sched.h | 2 +- kernel/fork.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/exec.c b/fs/exec.c index 39d773021fff..d179abb78a1c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1633,6 +1633,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm) } rcu_read_unlock(); + /* "users" and "in_exec" locked for copy_fs() */ if (p->fs->users > n_fs) bprm->unsafe |= LSM_UNSAFE_SHARE; else diff --git a/include/linux/sched.h b/include/linux/sched.h index cdb8ea53c365..ffe8f618ab86 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -920,7 +920,7 @@ struct task_struct { unsigned sched_rt_mutex:1; #endif - /* Bit to tell LSMs we're in execve(): */ + /* Bit to tell TOMOYO we're in execve(): */ unsigned in_execve:1; unsigned in_iowait:1; #ifndef TIF_RESTORE_SIGMASK diff --git a/kernel/fork.c b/kernel/fork.c index 47ff3b35352e..0d944e92a43f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1748,6 +1748,7 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) if (clone_flags & CLONE_FS) { /* tsk->fs is already what we want */ spin_lock(&fs->lock); + /* "users" and "in_exec" locked for check_unsafe_exec() */ if (fs->in_exec) { spin_unlock(&fs->lock); return -EAGAIN; From 443b349019f2d9461b23213a4308f9cf72e41c5e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 24 Jan 2024 11:52:40 -0800 Subject: [PATCH 325/768] samples/cgroup: add .gitignore file for generated samples Make 'git status' quietly happy again after a full allmodconfig build. Fixes: 60433a9d038d ("samples: introduce new samples subdir for cgroup") Signed-off-by: Linus Torvalds --- samples/cgroup/.gitignore | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 samples/cgroup/.gitignore diff --git a/samples/cgroup/.gitignore b/samples/cgroup/.gitignore new file mode 100644 index 000000000000..3a0161194cce --- /dev/null +++ b/samples/cgroup/.gitignore @@ -0,0 +1,3 @@ +/cgroup_event_listener +/memcg_event_listener + From a5f5eee282a0aae80227697e1d9c811b1726d31d Mon Sep 17 00:00:00 2001 From: Bernd Edlinger Date: Mon, 22 Jan 2024 19:19:09 +0100 Subject: [PATCH 326/768] net: stmmac: Wait a bit for the reset to take effect otherwise the synopsys_id value may be read out wrong, because the GMAC_VERSION register might still be in reset state, for at least 1 us after the reset is de-asserted. Add a wait for 10 us before continuing to be on the safe side. > From what have you got that delay value? Just try and error, with very old linux versions and old gcc versions the synopsys_id was read out correctly most of the time (but not always), with recent linux versions and recnet gcc versions it was read out wrongly most of the time, but again not always. I don't have access to the VHDL code in question, so I cannot tell why it takes so long to get the correct values, I also do not have more than a few hardware samples, so I cannot tell how long this timeout must be in worst case. Experimentally I can tell that the register is read several times as zero immediately after the reset is de-asserted, also adding several no-ops is not enough, adding a printk is enough, also udelay(1) seems to be enough but I tried that not very often, and I have not access to many hardware samples to be 100% sure about the necessary delay. And since the udelay here is only executed once per device instance, it seems acceptable to delay the boot for 10 us. BTW: my hardware's synopsys id is 0x37. Fixes: c5e4ddbdfa11 ("net: stmmac: Add support for optional reset control") Signed-off-by: Bernd Edlinger Reviewed-by: Jiri Pirko Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/AS8P193MB1285A810BD78C111E7F6AA34E4752@AS8P193MB1285.EURP193.PROD.OUTLOOK.COM Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index a0e46369ae15..b334eb16da23 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7542,6 +7542,9 @@ int stmmac_dvr_probe(struct device *device, dev_err(priv->device, "unable to bring out of ahb reset: %pe\n", ERR_PTR(ret)); + /* Wait a bit for the reset to take effect */ + udelay(10); + /* Init MAC and get the capabilities */ ret = stmmac_hw_init(priv); if (ret) From 9f538b415db862e74b8c5d3abbccfc1b2b6caa38 Mon Sep 17 00:00:00 2001 From: Jenishkumar Maheshbhai Patel Date: Thu, 18 Jan 2024 19:59:14 -0800 Subject: [PATCH 327/768] net: mvpp2: clear BM pool before initialization Register value persist after booting the kernel using kexec which results in kernel panic. Thus clear the BM pool registers before initialisation to fix the issue. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Jenishkumar Maheshbhai Patel Reviewed-by: Maxime Chevallier Link: https://lore.kernel.org/r/20240119035914.2595665-1-jpatel2@marvell.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/mvpp2/mvpp2_main.c | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 820b1fabe297..23adf53c2aa1 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -614,12 +614,38 @@ static void mvpp23_bm_set_8pool_mode(struct mvpp2 *priv) mvpp2_write(priv, MVPP22_BM_POOL_BASE_ADDR_HIGH_REG, val); } +/* Cleanup pool before actual initialization in the OS */ +static void mvpp2_bm_pool_cleanup(struct mvpp2 *priv, int pool_id) +{ + unsigned int thread = mvpp2_cpu_to_thread(priv, get_cpu()); + u32 val; + int i; + + /* Drain the BM from all possible residues left by firmware */ + for (i = 0; i < MVPP2_BM_POOL_SIZE_MAX; i++) + mvpp2_thread_read(priv, thread, MVPP2_BM_PHY_ALLOC_REG(pool_id)); + + put_cpu(); + + /* Stop the BM pool */ + val = mvpp2_read(priv, MVPP2_BM_POOL_CTRL_REG(pool_id)); + val |= MVPP2_BM_STOP_MASK; + mvpp2_write(priv, MVPP2_BM_POOL_CTRL_REG(pool_id), val); +} + static int mvpp2_bm_init(struct device *dev, struct mvpp2 *priv) { enum dma_data_direction dma_dir = DMA_FROM_DEVICE; int i, err, poolnum = MVPP2_BM_POOLS_NUM; struct mvpp2_port *port; + if (priv->percpu_pools) + poolnum = mvpp2_get_nrxqs(priv) * 2; + + /* Clean up the pool state in case it contains stale state */ + for (i = 0; i < poolnum; i++) + mvpp2_bm_pool_cleanup(priv, i); + if (priv->percpu_pools) { for (i = 0; i < priv->port_count; i++) { port = priv->port_list[i]; @@ -629,7 +655,6 @@ static int mvpp2_bm_init(struct device *dev, struct mvpp2 *priv) } } - poolnum = mvpp2_get_nrxqs(priv) * 2; for (i = 0; i < poolnum; i++) { /* the pool in use */ int pn = i / (poolnum / 2); From 1ed4b563100230ea68821a2b25a3d9f25388a3e6 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Wed, 24 Jan 2024 14:21:44 -0500 Subject: [PATCH 328/768] Revert "KEYS: encrypted: Add check for strsep" This reverts commit b4af096b5df5dd131ab796c79cedc7069d8f4882. New encrypted keys are created either from kernel-generated random numbers or user-provided decrypted data. Revert the change requiring user-provided decrypted data. Reported-by: Vishal Verma Signed-off-by: Mimi Zohar --- security/keys/encrypted-keys/encrypted.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index 76f55dd13cb8..8af2136069d2 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -237,10 +237,6 @@ static int datablob_parse(char *datablob, const char **format, break; } *decrypted_data = strsep(&datablob, " \t"); - if (!*decrypted_data) { - pr_info("encrypted_key: decrypted_data is missing\n"); - break; - } ret = 0; break; case Opt_load: From 3eab830189d94f0f80f34cbff609b5bb54002679 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 24 Jan 2024 13:12:20 -0800 Subject: [PATCH 329/768] uselib: remove use of __FMODE_EXEC Jann Horn points out that uselib() really shouldn't trigger the new FMODE_EXEC logic introduced by commit 4759ff71f23e ("exec: __FMODE_EXEC instead of in_execve for LSMs"). In fact, it shouldn't even have ever triggered the old pre-existing logic for __FMODE_EXEC (like the NFS code that makes executables not need read permissions). Unlike a real execve(), that can work even with files that are purely executable by the user (not readable), uselib() has that MAY_READ requirement becasue it's really just a convenience wrapper around mmap() for legacy shared libraries. The whole FMODE_EXEC bit was originally introduced by commit b500531e6f5f ("[PATCH] Introduce FMODE_EXEC file flag"), primarily to give ETXTBUSY error returns for distributed filesystems. It has since grown a few other warts (like that NFS thing), but there really isn't any reason to use it for uselib(), and now that we are trying to use it to replace the horrid 'tsk->in_execve' flag, it's actively wrong. Of course, as Jann Horn also points out, nobody should be enabling CONFIG_USELIB in the first place in this day and age, but that's a different discussion entirely. Reported-by: Jann Horn Fixes: 4759ff71f23e ("exec: __FMODE_EXEC instead of in_execve for LSMs") Cc: Kees Cook Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/exec.c b/fs/exec.c index 8cdd5b2dd09c..1a097c1c2f77 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -128,7 +128,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) struct filename *tmp = getname(library); int error = PTR_ERR(tmp); static const struct open_flags uselib_flags = { - .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, + .open_flag = O_LARGEFILE | O_RDONLY, .acc_mode = MAY_READ | MAY_EXEC, .intent = LOOKUP_OPEN, .lookup_flags = LOOKUP_FOLLOW, From 0719b5338a0cbe80d1637a5fb03d8141b5bfc7a1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 22 Jan 2024 11:58:15 -0800 Subject: [PATCH 330/768] selftests: net: fix rps_default_mask with >32 CPUs If there is more than 32 cpus the bitmask will start to contain commas, leading to: ./rps_default_mask.sh: line 36: [: 00000000,00000000: integer expression expected Remove the commas, bash doesn't interpret leading zeroes as oct so that should be good enough. Switch to bash, Simon reports that not all shells support this type of substitution. Fixes: c12e0d5f267d ("self-tests: introduce self-tests for RPS default mask") Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240122195815.638997-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/rps_default_mask.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh index a26c5624429f..4287a8529890 100755 --- a/tools/testing/selftests/net/rps_default_mask.sh +++ b/tools/testing/selftests/net/rps_default_mask.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 readonly ksft_skip=4 @@ -33,6 +33,10 @@ chk_rps() { rps_mask=$($cmd /sys/class/net/$dev_name/queues/rx-0/rps_cpus) printf "%-60s" "$msg" + + # In case there is more than 32 CPUs we need to remove commas from masks + rps_mask=${rps_mask//,} + expected_rps_mask=${expected_rps_mask//,} if [ $rps_mask -eq $expected_rps_mask ]; then echo "[ ok ]" else From 096386a5bcf02e4053dc8b6cacb09a8493eeee4f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 22 Jan 2024 18:08:51 -0500 Subject: [PATCH 331/768] bcachefs: discard path uses unlock_long() Some (bad) devices can have really terrible discard latency; we don't want them blocking memory reclaim and causing warnings. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 10704f2d3af5..fd3e175d8342 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1715,7 +1715,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, * This works without any other locks because this is the only * thread that removes items from the need_discard tree */ - bch2_trans_unlock(trans); + bch2_trans_unlock_long(trans); blkdev_issue_discard(ca->disk_sb.bdev, k.k->p.offset * ca->mi.bucket_size, ca->mi.bucket_size, From 0879020a7817e7ce636372c016b4528f541c9f4d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 22 Jan 2024 22:05:29 -0800 Subject: [PATCH 332/768] selftests: netdevsim: fix the udp_tunnel_nic test This test is missing a whole bunch of checks for interface renaming and one ifup. Presumably it was only used on a system with renaming disabled and NetworkManager running. Fixes: 91f430b2c49d ("selftests: net: add a test for UDP tunnel info infra") Acked-by: Paolo Abeni Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240123060529.1033912-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/netdevsim/udp_tunnel_nic.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh index 4855ef597a15..f98435c502f6 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -270,6 +270,7 @@ for port in 0 1; do echo 1 > $NSIM_DEV_SYS/new_port fi NSIM_NETDEV=`get_netdev_name old_netdevs` + ifconfig $NSIM_NETDEV up msg="new NIC device created" exp0=( 0 0 0 0 ) @@ -431,6 +432,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up overflow_table0 "overflow NIC table" @@ -488,6 +490,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up overflow_table0 "overflow NIC table" @@ -544,6 +547,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up overflow_table0 "destroy NIC" @@ -573,6 +577,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up msg="create VxLANs v6" @@ -633,6 +638,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error @@ -688,6 +694,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up msg="create VxLANs v6" @@ -747,6 +754,7 @@ for port in 0 1; do fi echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` ifconfig $NSIM_NETDEV up msg="create VxLANs v6" @@ -877,6 +885,7 @@ msg="re-add a port" echo 2 > $NSIM_DEV_SYS/del_port echo 2 > $NSIM_DEV_SYS/new_port +NSIM_NETDEV=`get_netdev_name old_netdevs` check_tables msg="replace VxLAN in overflow table" From f5e414167be768b0373891d301478351f757ec65 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 23 Jan 2024 11:03:22 -0800 Subject: [PATCH 333/768] net: fill in MODULE_DESCRIPTION()s for 8390 W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to all the good old 8390 modules and drivers. Signed-off-by: Breno Leitao CC: geert@linux-m68k.org Link: https://lore.kernel.org/r/20240123190332.677489-2-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/8390/8390.c | 1 + drivers/net/ethernet/8390/8390p.c | 1 + drivers/net/ethernet/8390/apne.c | 1 + drivers/net/ethernet/8390/hydra.c | 1 + drivers/net/ethernet/8390/stnic.c | 1 + drivers/net/ethernet/8390/zorro8390.c | 1 + 6 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/8390/8390.c b/drivers/net/ethernet/8390/8390.c index 0e0aa4016858..c5636245f1ca 100644 --- a/drivers/net/ethernet/8390/8390.c +++ b/drivers/net/ethernet/8390/8390.c @@ -100,4 +100,5 @@ static void __exit ns8390_module_exit(void) module_init(ns8390_module_init); module_exit(ns8390_module_exit); #endif /* MODULE */ +MODULE_DESCRIPTION("National Semiconductor 8390 core driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/8390/8390p.c b/drivers/net/ethernet/8390/8390p.c index 6834742057b3..6d429b11e9c6 100644 --- a/drivers/net/ethernet/8390/8390p.c +++ b/drivers/net/ethernet/8390/8390p.c @@ -102,4 +102,5 @@ static void __exit NS8390p_cleanup_module(void) module_init(NS8390p_init_module); module_exit(NS8390p_cleanup_module); +MODULE_DESCRIPTION("National Semiconductor 8390 core for ISA driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/8390/apne.c b/drivers/net/ethernet/8390/apne.c index a09f383dd249..828edca8d30c 100644 --- a/drivers/net/ethernet/8390/apne.c +++ b/drivers/net/ethernet/8390/apne.c @@ -610,4 +610,5 @@ static int init_pcmcia(void) return 1; } +MODULE_DESCRIPTION("National Semiconductor 8390 Amiga PCMCIA ethernet driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/8390/hydra.c b/drivers/net/ethernet/8390/hydra.c index 24f49a8ff903..fd9dcdc356e6 100644 --- a/drivers/net/ethernet/8390/hydra.c +++ b/drivers/net/ethernet/8390/hydra.c @@ -270,4 +270,5 @@ static void __exit hydra_cleanup_module(void) module_init(hydra_init_module); module_exit(hydra_cleanup_module); +MODULE_DESCRIPTION("Zorro-II Hydra 8390 ethernet driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/8390/stnic.c b/drivers/net/ethernet/8390/stnic.c index 265976e3b64a..6cc0e190aa79 100644 --- a/drivers/net/ethernet/8390/stnic.c +++ b/drivers/net/ethernet/8390/stnic.c @@ -296,4 +296,5 @@ static void __exit stnic_cleanup(void) module_init(stnic_probe); module_exit(stnic_cleanup); +MODULE_DESCRIPTION("National Semiconductor DP83902AV ethernet driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/8390/zorro8390.c b/drivers/net/ethernet/8390/zorro8390.c index d70390e9d03d..c24dd4fe7a10 100644 --- a/drivers/net/ethernet/8390/zorro8390.c +++ b/drivers/net/ethernet/8390/zorro8390.c @@ -443,4 +443,5 @@ static void __exit zorro8390_cleanup_module(void) module_init(zorro8390_init_module); module_exit(zorro8390_cleanup_module); +MODULE_DESCRIPTION("Zorro NS8390-based ethernet driver"); MODULE_LICENSE("GPL"); From 39535d7ff6c1e5bda0a3f3c87250bab63e910969 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 23 Jan 2024 11:03:23 -0800 Subject: [PATCH 334/768] net: fill in MODULE_DESCRIPTION()s for Broadcom bgmac W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the Broadcom iProc GBit driver. Signed-off-by: Breno Leitao Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20240123190332.677489-3-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bcm4908_enet.c | 1 + drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c | 1 + drivers/net/ethernet/broadcom/bgmac-bcma.c | 1 + drivers/net/ethernet/broadcom/bgmac-platform.c | 1 + drivers/net/ethernet/broadcom/bgmac.c | 1 + 5 files changed, 5 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c index 3e7c8671cd11..72df1bb10172 100644 --- a/drivers/net/ethernet/broadcom/bcm4908_enet.c +++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c @@ -793,5 +793,6 @@ static struct platform_driver bcm4908_enet_driver = { }; module_platform_driver(bcm4908_enet_driver); +MODULE_DESCRIPTION("Broadcom BCM4908 Gigabit Ethernet driver"); MODULE_LICENSE("GPL v2"); MODULE_DEVICE_TABLE(of, bcm4908_enet_of_match); diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c b/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c index 9b83d5361699..50b8e97a811d 100644 --- a/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c +++ b/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c @@ -260,4 +260,5 @@ void bcma_mdio_mii_unregister(struct mii_bus *mii_bus) EXPORT_SYMBOL_GPL(bcma_mdio_mii_unregister); MODULE_AUTHOR("Rafał Miłecki"); +MODULE_DESCRIPTION("Broadcom iProc GBit BCMA MDIO helpers"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c index 6e4f36aaf5db..36f9bad28e6a 100644 --- a/drivers/net/ethernet/broadcom/bgmac-bcma.c +++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c @@ -362,4 +362,5 @@ module_init(bgmac_init) module_exit(bgmac_exit) MODULE_AUTHOR("Rafał Miłecki"); +MODULE_DESCRIPTION("Broadcom iProc GBit BCMA interface driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c index 0b21fd5bd457..77425c7a32db 100644 --- a/drivers/net/ethernet/broadcom/bgmac-platform.c +++ b/drivers/net/ethernet/broadcom/bgmac-platform.c @@ -298,4 +298,5 @@ static struct platform_driver bgmac_enet_driver = { }; module_platform_driver(bgmac_enet_driver); +MODULE_DESCRIPTION("Broadcom iProc GBit platform interface driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 448a1b90de5e..6ffdc4229407 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1626,4 +1626,5 @@ int bgmac_enet_resume(struct bgmac *bgmac) EXPORT_SYMBOL_GPL(bgmac_enet_resume); MODULE_AUTHOR("Rafał Miłecki"); +MODULE_DESCRIPTION("Broadcom iProc GBit driver"); MODULE_LICENSE("GPL"); From bb567fbbbbb41d61b685e224098806a90df8cef2 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 23 Jan 2024 11:03:24 -0800 Subject: [PATCH 335/768] net: fill in MODULE_DESCRIPTION()s for liquidio W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the Cavium Liquidio. Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240123190332.677489-4-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cavium/liquidio/lio_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c index 9cc6303c82ff..f38d31bfab1b 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_core.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c @@ -27,6 +27,7 @@ #include "octeon_network.h" MODULE_AUTHOR("Cavium Networks, "); +MODULE_DESCRIPTION("Cavium LiquidIO Intelligent Server Adapter Core"); MODULE_LICENSE("GPL"); /* OOM task polling interval */ From 53c83e2d36484f8df87792bb5368653bdb441014 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 23 Jan 2024 11:03:25 -0800 Subject: [PATCH 336/768] net: fill in MODULE_DESCRIPTION()s for ep93xxx_eth W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the Cirrus EP93xx ethernet driver. Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240123190332.677489-5-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cirrus/ep93xx_eth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c index 1c2a540db13d..1f495cfd7959 100644 --- a/drivers/net/ethernet/cirrus/ep93xx_eth.c +++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c @@ -868,5 +868,6 @@ static struct platform_driver ep93xx_eth_driver = { module_platform_driver(ep93xx_eth_driver); +MODULE_DESCRIPTION("Cirrus EP93xx Ethernet driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:ep93xx-eth"); From 27881ca8c8e1b6179ac41dc787cbfc3cb4362ff8 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 23 Jan 2024 11:03:26 -0800 Subject: [PATCH 337/768] net: fill in MODULE_DESCRIPTION()s for nps_enet W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the EZchip NPS ethernet driver. Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240123190332.677489-6-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ezchip/nps_enet.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c index 07c2b701b5fa..9ebe751c1df0 100644 --- a/drivers/net/ethernet/ezchip/nps_enet.c +++ b/drivers/net/ethernet/ezchip/nps_enet.c @@ -661,4 +661,5 @@ static struct platform_driver nps_enet_driver = { module_platform_driver(nps_enet_driver); MODULE_AUTHOR("EZchip Semiconductor"); +MODULE_DESCRIPTION("EZchip NPS Ethernet driver"); MODULE_LICENSE("GPL v2"); From 07c42d237567d47225499d0586b9b90a432a7b58 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 23 Jan 2024 11:03:27 -0800 Subject: [PATCH 338/768] net: fill in MODULE_DESCRIPTION()s for enetc W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the NXP ENETC Ethernet driver. Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240123190332.677489-7-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index cffbf27c4656..bfdbdab443ae 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -3216,4 +3216,5 @@ void enetc_pci_remove(struct pci_dev *pdev) } EXPORT_SYMBOL_GPL(enetc_pci_remove); +MODULE_DESCRIPTION("NXP ENETC Ethernet driver"); MODULE_LICENSE("Dual BSD/GPL"); From 2e87576488552aab742391b6c442beedffd31abe Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 23 Jan 2024 11:03:28 -0800 Subject: [PATCH 339/768] net: fill in MODULE_DESCRIPTION()s for fec W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the FEC (MPC8xx) Ethernet controller. Signed-off-by: Breno Leitao Reviewed-by: Wei Fang Link: https://lore.kernel.org/r/20240123190332.677489-8-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index d42594f32275..4b0259e9269a 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -4769,4 +4769,5 @@ static struct platform_driver fec_driver = { module_platform_driver(fec_driver); +MODULE_DESCRIPTION("NXP Fast Ethernet Controller (FEC) driver"); MODULE_LICENSE("GPL"); From 8183c470c17602275ec1e3525d010f6c9cd383e9 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 23 Jan 2024 11:03:29 -0800 Subject: [PATCH 340/768] net: fill in MODULE_DESCRIPTION()s for fsl_pq_mdio W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the Freescale PQ MDIO driver. Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240123190332.677489-9-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fsl_pq_mdio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c index 70dd982a5edc..026f7270a54d 100644 --- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c +++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c @@ -531,4 +531,5 @@ static struct platform_driver fsl_pq_mdio_driver = { module_platform_driver(fsl_pq_mdio_driver); +MODULE_DESCRIPTION("Freescale PQ MDIO helpers"); MODULE_LICENSE("GPL"); From 07d1e0ce874377a88c13bb56a336d6c544367837 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 23 Jan 2024 11:03:30 -0800 Subject: [PATCH 341/768] net: fill in MODULE_DESCRIPTION()s for litex W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the LiteX Liteeth Ethernet device. Signed-off-by: Breno Leitao Acked-by: Gabriel Somlo Link: https://lore.kernel.org/r/20240123190332.677489-10-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/litex/litex_liteeth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/litex/litex_liteeth.c b/drivers/net/ethernet/litex/litex_liteeth.c index 5182fe737c37..ff54fbe41bcc 100644 --- a/drivers/net/ethernet/litex/litex_liteeth.c +++ b/drivers/net/ethernet/litex/litex_liteeth.c @@ -318,4 +318,5 @@ static struct platform_driver liteeth_driver = { module_platform_driver(liteeth_driver); MODULE_AUTHOR("Joel Stanley "); +MODULE_DESCRIPTION("LiteX Liteeth Ethernet driver"); MODULE_LICENSE("GPL"); From bdc6734115d7f66d8bea155454d4ce9259821660 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 23 Jan 2024 11:03:31 -0800 Subject: [PATCH 342/768] net: fill in MODULE_DESCRIPTION()s for rvu_mbox W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the Marvel RVU mbox driver. Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240123190332.677489-11-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/mbox.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c index 9690ac01f02c..b92264d0a77e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c @@ -413,4 +413,5 @@ const char *otx2_mbox_id2name(u16 id) EXPORT_SYMBOL(otx2_mbox_id2name); MODULE_AUTHOR("Marvell."); +MODULE_DESCRIPTION("Marvell RVU NIC Mbox helpers"); MODULE_LICENSE("GPL v2"); From 269009893146c495f41e9572dd9319e787c2eba9 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 24 Jan 2024 20:15:52 +0100 Subject: [PATCH 343/768] xsk: recycle buffer in case Rx queue was full Add missing xsk_buff_free() call when __xsk_rcv_zc() failed to produce descriptor to XSK Rx queue. Fixes: 24ea50127ecf ("xsk: support mbuf on ZC RX") Acked-by: Magnus Karlsson Signed-off-by: Maciej Fijalkowski Link: https://lore.kernel.org/r/20240124191602.566724-2-maciej.fijalkowski@intel.com Signed-off-by: Alexei Starovoitov --- net/xdp/xsk.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 9f13aa3353e3..1eadfac03cc4 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -167,8 +167,10 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) contd = XDP_PKT_CONTD; err = __xsk_rcv_zc(xs, xskb, len, contd); - if (err || likely(!frags)) - goto out; + if (err) + goto err; + if (likely(!frags)) + return 0; xskb_list = &xskb->pool->xskb_list; list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) { @@ -177,11 +179,13 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) len = pos->xdp.data_end - pos->xdp.data; err = __xsk_rcv_zc(xs, pos, len, contd); if (err) - return err; + goto err; list_del(&pos->xskb_list_node); } -out: + return 0; +err: + xsk_buff_free(xdp); return err; } From f7f6aa8e24383fbb11ac55942e66da9660110f80 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 24 Jan 2024 20:15:53 +0100 Subject: [PATCH 344/768] xsk: make xsk_buff_pool responsible for clearing xdp_buff::flags XDP multi-buffer support introduced XDP_FLAGS_HAS_FRAGS flag that is used by drivers to notify data path whether xdp_buff contains fragments or not. Data path looks up mentioned flag on first buffer that occupies the linear part of xdp_buff, so drivers only modify it there. This is sufficient for SKB and XDP_DRV modes as usually xdp_buff is allocated on stack or it resides within struct representing driver's queue and fragments are carried via skb_frag_t structs. IOW, we are dealing with only one xdp_buff. ZC mode though relies on list of xdp_buff structs that is carried via xsk_buff_pool::xskb_list, so ZC data path has to make sure that fragments do *not* have XDP_FLAGS_HAS_FRAGS set. Otherwise, xsk_buff_free() could misbehave if it would be executed against xdp_buff that carries a frag with XDP_FLAGS_HAS_FRAGS flag set. Such scenario can take place when within supplied XDP program bpf_xdp_adjust_tail() is used with negative offset that would in turn release the tail fragment from multi-buffer frame. Calling xsk_buff_free() on tail fragment with XDP_FLAGS_HAS_FRAGS would result in releasing all the nodes from xskb_list that were produced by driver before XDP program execution, which is not what is intended - only tail fragment should be deleted from xskb_list and then it should be put onto xsk_buff_pool::free_list. Such multi-buffer frame will never make it up to user space, so from AF_XDP application POV there would be no traffic running, however due to free_list getting constantly new nodes, driver will be able to feed HW Rx queue with recycled buffers. Bottom line is that instead of traffic being redirected to user space, it would be continuously dropped. To fix this, let us clear the mentioned flag on xsk_buff_pool side during xdp_buff initialization, which is what should have been done right from the start of XSK multi-buffer support. Fixes: 1bbc04de607b ("ice: xsk: add RX multi-buffer support") Fixes: 1c9ba9c14658 ("i40e: xsk: add RX multi-buffer support") Fixes: 24ea50127ecf ("xsk: support mbuf on ZC RX") Signed-off-by: Maciej Fijalkowski Link: https://lore.kernel.org/r/20240124191602.566724-3-maciej.fijalkowski@intel.com Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 1 - drivers/net/ethernet/intel/ice/ice_xsk.c | 1 - include/net/xdp_sock_drv.h | 1 + net/xdp/xsk_buff_pool.c | 1 + 4 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index af7d5fa6cdc1..82aca0d16a3e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -498,7 +498,6 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) xdp_res = i40e_run_xdp_zc(rx_ring, first, xdp_prog); i40e_handle_xdp_result_zc(rx_ring, first, rx_desc, &rx_packets, &rx_bytes, xdp_res, &failure); - first->flags = 0; next_to_clean = next_to_process; if (failure) break; diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 5d1ae8e4058a..d9073a618ad6 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -895,7 +895,6 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) if (!first) { first = xdp; - xdp_buff_clear_frags_flag(first); } else if (ice_add_xsk_frag(rx_ring, first, xdp, size)) { break; } diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 526c1e7f505e..9819e2af0378 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -164,6 +164,7 @@ static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM; xdp->data_meta = xdp->data; xdp->data_end = xdp->data + size; + xdp->flags = 0; } static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool, diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 28711cc44ced..ce60ecd48a4d 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -555,6 +555,7 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool) xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM; xskb->xdp.data_meta = xskb->xdp.data; + xskb->xdp.flags = 0; if (pool->dma_need_sync) { dma_sync_single_range_for_device(pool->dev, xskb->dma, 0, From c5114710c8ce86b8317e9b448f4fd15c711c2a82 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 24 Jan 2024 20:15:54 +0100 Subject: [PATCH 345/768] xsk: fix usage of multi-buffer BPF helpers for ZC XDP Currently when packet is shrunk via bpf_xdp_adjust_tail() and memory type is set to MEM_TYPE_XSK_BUFF_POOL, null ptr dereference happens: [1136314.192256] BUG: kernel NULL pointer dereference, address: 0000000000000034 [1136314.203943] #PF: supervisor read access in kernel mode [1136314.213768] #PF: error_code(0x0000) - not-present page [1136314.223550] PGD 0 P4D 0 [1136314.230684] Oops: 0000 [#1] PREEMPT SMP NOPTI [1136314.239621] CPU: 8 PID: 54203 Comm: xdpsock Not tainted 6.6.0+ #257 [1136314.250469] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0008.031920191559 03/19/2019 [1136314.265615] RIP: 0010:__xdp_return+0x6c/0x210 [1136314.274653] Code: ad 00 48 8b 47 08 49 89 f8 a8 01 0f 85 9b 01 00 00 0f 1f 44 00 00 f0 41 ff 48 34 75 32 4c 89 c7 e9 79 cd 80 ff 83 fe 03 75 17 41 34 01 0f 85 02 01 00 00 48 89 cf e9 22 cc 1e 00 e9 3d d2 86 [1136314.302907] RSP: 0018:ffffc900089f8db0 EFLAGS: 00010246 [1136314.312967] RAX: ffffc9003168aed0 RBX: ffff8881c3300000 RCX: 0000000000000000 [1136314.324953] RDX: 0000000000000000 RSI: 0000000000000003 RDI: ffffc9003168c000 [1136314.336929] RBP: 0000000000000ae0 R08: 0000000000000002 R09: 0000000000010000 [1136314.348844] R10: ffffc9000e495000 R11: 0000000000000040 R12: 0000000000000001 [1136314.360706] R13: 0000000000000524 R14: ffffc9003168aec0 R15: 0000000000000001 [1136314.373298] FS: 00007f8df8bbcb80(0000) GS:ffff8897e0e00000(0000) knlGS:0000000000000000 [1136314.386105] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1136314.396532] CR2: 0000000000000034 CR3: 00000001aa912002 CR4: 00000000007706f0 [1136314.408377] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1136314.420173] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1136314.431890] PKRU: 55555554 [1136314.439143] Call Trace: [1136314.446058] [1136314.452465] ? __die+0x20/0x70 [1136314.459881] ? page_fault_oops+0x15b/0x440 [1136314.468305] ? exc_page_fault+0x6a/0x150 [1136314.476491] ? asm_exc_page_fault+0x22/0x30 [1136314.484927] ? __xdp_return+0x6c/0x210 [1136314.492863] bpf_xdp_adjust_tail+0x155/0x1d0 [1136314.501269] bpf_prog_ccc47ae29d3b6570_xdp_sock_prog+0x15/0x60 [1136314.511263] ice_clean_rx_irq_zc+0x206/0xc60 [ice] [1136314.520222] ? ice_xmit_zc+0x6e/0x150 [ice] [1136314.528506] ice_napi_poll+0x467/0x670 [ice] [1136314.536858] ? ttwu_do_activate.constprop.0+0x8f/0x1a0 [1136314.546010] __napi_poll+0x29/0x1b0 [1136314.553462] net_rx_action+0x133/0x270 [1136314.561619] __do_softirq+0xbe/0x28e [1136314.569303] do_softirq+0x3f/0x60 This comes from __xdp_return() call with xdp_buff argument passed as NULL which is supposed to be consumed by xsk_buff_free() call. To address this properly, in ZC case, a node that represents the frag being removed has to be pulled out of xskb_list. Introduce appropriate xsk helpers to do such node operation and use them accordingly within bpf_xdp_adjust_tail(). Fixes: 24ea50127ecf ("xsk: support mbuf on ZC RX") Acked-by: Magnus Karlsson # For the xsk header part Signed-off-by: Maciej Fijalkowski Link: https://lore.kernel.org/r/20240124191602.566724-4-maciej.fijalkowski@intel.com Signed-off-by: Alexei Starovoitov --- include/net/xdp_sock_drv.h | 26 +++++++++++++++++++++++ net/core/filter.c | 42 ++++++++++++++++++++++++++++++++------ 2 files changed, 62 insertions(+), 6 deletions(-) diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 9819e2af0378..c9aec9ab6191 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -159,6 +159,23 @@ static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) return ret; } +static inline void xsk_buff_del_tail(struct xdp_buff *tail) +{ + struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp); + + list_del(&xskb->xskb_list_node); +} + +static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first) +{ + struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp); + struct xdp_buff_xsk *frag; + + frag = list_last_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk, + xskb_list_node); + return &frag->xdp; +} + static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) { xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM; @@ -351,6 +368,15 @@ static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) return NULL; } +static inline void xsk_buff_del_tail(struct xdp_buff *tail) +{ +} + +static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first) +{ + return NULL; +} + static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) { } diff --git a/net/core/filter.c b/net/core/filter.c index 24061f29c9dd..36fb5ae8af69 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -83,6 +83,7 @@ #include #include #include +#include #include "dev.h" @@ -4096,6 +4097,40 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset) return 0; } +static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink, + struct xdp_mem_info *mem_info, bool release) +{ + struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp); + + if (release) { + xsk_buff_del_tail(zc_frag); + __xdp_return(NULL, mem_info, false, zc_frag); + } else { + zc_frag->data_end -= shrink; + } +} + +static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag, + int shrink) +{ + struct xdp_mem_info *mem_info = &xdp->rxq->mem; + bool release = skb_frag_size(frag) == shrink; + + if (mem_info->type == MEM_TYPE_XSK_BUFF_POOL) { + bpf_xdp_shrink_data_zc(xdp, shrink, mem_info, release); + goto out; + } + + if (release) { + struct page *page = skb_frag_page(frag); + + __xdp_return(page_address(page), mem_info, false, NULL); + } + +out: + return release; +} + static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset) { struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); @@ -4110,12 +4145,7 @@ static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset) len_free += shrink; offset -= shrink; - - if (skb_frag_size(frag) == shrink) { - struct page *page = skb_frag_page(frag); - - __xdp_return(page_address(page), &xdp->rxq->mem, - false, NULL); + if (bpf_xdp_shrink_data(xdp, frag, shrink)) { n_frags_free++; } else { skb_frag_size_sub(frag, shrink); From ad2047cf5d9313200e308612aed516548873d124 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 24 Jan 2024 20:15:55 +0100 Subject: [PATCH 346/768] ice: work on pre-XDP prog frag count Fix an OOM panic in XDP_DRV mode when a XDP program shrinks a multi-buffer packet by 4k bytes and then redirects it to an AF_XDP socket. Since support for handling multi-buffer frames was added to XDP, usage of bpf_xdp_adjust_tail() helper within XDP program can free the page that given fragment occupies and in turn decrease the fragment count within skb_shared_info that is embedded in xdp_buff struct. In current ice driver codebase, it can become problematic when page recycling logic decides not to reuse the page. In such case, __page_frag_cache_drain() is used with ice_rx_buf::pagecnt_bias that was not adjusted after refcount of page was changed by XDP prog which in turn does not drain the refcount to 0 and page is never freed. To address this, let us store the count of frags before the XDP program was executed on Rx ring struct. This will be used to compare with current frag count from skb_shared_info embedded in xdp_buff. A smaller value in the latter indicates that XDP prog freed frag(s). Then, for given delta decrement pagecnt_bias for XDP_DROP verdict. While at it, let us also handle the EOP frag within ice_set_rx_bufs_act() to make our life easier, so all of the adjustments needed to be applied against freed frags are performed in the single place. Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side") Acked-by: Magnus Karlsson Signed-off-by: Maciej Fijalkowski Link: https://lore.kernel.org/r/20240124191602.566724-5-maciej.fijalkowski@intel.com Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/intel/ice/ice_txrx.c | 14 ++++++--- drivers/net/ethernet/intel/ice/ice_txrx.h | 1 + drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 31 +++++++++++++------ 3 files changed, 32 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 74d13cc5a3a7..0c9b4aa8a049 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -603,9 +603,7 @@ out_failure: ret = ICE_XDP_CONSUMED; } exit: - rx_buf->act = ret; - if (unlikely(xdp_buff_has_frags(xdp))) - ice_set_rx_bufs_act(xdp, rx_ring, ret); + ice_set_rx_bufs_act(xdp, rx_ring, ret); } /** @@ -893,14 +891,17 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, } if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) { - if (unlikely(xdp_buff_has_frags(xdp))) - ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED); + ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED); return -ENOMEM; } __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page, rx_buf->page_offset, size); sinfo->xdp_frags_size += size; + /* remember frag count before XDP prog execution; bpf_xdp_adjust_tail() + * can pop off frags but driver has to handle it on its own + */ + rx_ring->nr_frags = sinfo->nr_frags; if (page_is_pfmemalloc(rx_buf->page)) xdp_buff_set_frag_pfmemalloc(xdp); @@ -1251,6 +1252,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) xdp->data = NULL; rx_ring->first_desc = ntc; + rx_ring->nr_frags = 0; continue; construct_skb: if (likely(ice_ring_uses_build_skb(rx_ring))) @@ -1266,10 +1268,12 @@ construct_skb: ICE_XDP_CONSUMED); xdp->data = NULL; rx_ring->first_desc = ntc; + rx_ring->nr_frags = 0; break; } xdp->data = NULL; rx_ring->first_desc = ntc; + rx_ring->nr_frags = 0; stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S); if (unlikely(ice_test_staterr(rx_desc->wb.status_error0, diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index b3379ff73674..af955b0e5dc5 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -358,6 +358,7 @@ struct ice_rx_ring { struct ice_tx_ring *xdp_ring; struct ice_rx_ring *next; /* pointer to next ring in q_vector */ struct xsk_buff_pool *xsk_pool; + u32 nr_frags; dma_addr_t dma; /* physical address of ring */ u16 rx_buf_len; u8 dcb_tc; /* Traffic class of ring */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index 762047508619..afcead4baef4 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -12,26 +12,39 @@ * act: action to store onto Rx buffers related to XDP buffer parts * * Set action that should be taken before putting Rx buffer from first frag - * to one before last. Last one is handled by caller of this function as it - * is the EOP frag that is currently being processed. This function is - * supposed to be called only when XDP buffer contains frags. + * to the last. */ static inline void ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring, const unsigned int act) { - const struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); - u32 first = rx_ring->first_desc; - u32 nr_frags = sinfo->nr_frags; + u32 sinfo_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags; + u32 nr_frags = rx_ring->nr_frags + 1; + u32 idx = rx_ring->first_desc; u32 cnt = rx_ring->count; struct ice_rx_buf *buf; for (int i = 0; i < nr_frags; i++) { - buf = &rx_ring->rx_buf[first]; + buf = &rx_ring->rx_buf[idx]; buf->act = act; - if (++first == cnt) - first = 0; + if (++idx == cnt) + idx = 0; + } + + /* adjust pagecnt_bias on frags freed by XDP prog */ + if (sinfo_frags < rx_ring->nr_frags && act == ICE_XDP_CONSUMED) { + u32 delta = rx_ring->nr_frags - sinfo_frags; + + while (delta) { + if (idx == 0) + idx = cnt - 1; + else + idx--; + buf = &rx_ring->rx_buf[idx]; + buf->pagecnt_bias--; + delta--; + } } } From 83014323c642b8faa2d64a5f303b41c019322478 Mon Sep 17 00:00:00 2001 From: Tirthendu Sarkar Date: Wed, 24 Jan 2024 20:15:56 +0100 Subject: [PATCH 347/768] i40e: handle multi-buffer packets that are shrunk by xdp prog XDP programs can shrink packets by calling the bpf_xdp_adjust_tail() helper function. For multi-buffer packets this may lead to reduction of frag count stored in skb_shared_info area of the xdp_buff struct. This results in issues with the current handling of XDP_PASS and XDP_DROP cases. For XDP_PASS, currently skb is being built using frag count of xdp_buffer before it was processed by XDP prog and thus will result in an inconsistent skb when frag count gets reduced by XDP prog. To fix this, get correct frag count while building the skb instead of using pre-obtained frag count. For XDP_DROP, current page recycling logic will not reuse the page but instead will adjust the pagecnt_bias so that the page can be freed. This again results in inconsistent behavior as the page refcnt has already been changed by the helper while freeing the frag(s) as part of shrinking the packet. To fix this, only adjust pagecnt_bias for buffers that are stillpart of the packet post-xdp prog run. Fixes: e213ced19bef ("i40e: add support for XDP multi-buffer Rx") Reported-by: Maciej Fijalkowski Signed-off-by: Tirthendu Sarkar Link: https://lore.kernel.org/r/20240124191602.566724-6-maciej.fijalkowski@intel.com Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 40 ++++++++++++--------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 971ba3322038..1f0a0f13a334 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2087,7 +2087,8 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res, struct xdp_buff *xdp) { - u32 next = rx_ring->next_to_clean; + u32 nr_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags; + u32 next = rx_ring->next_to_clean, i = 0; struct i40e_rx_buffer *rx_buffer; xdp->flags = 0; @@ -2100,10 +2101,10 @@ static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res, if (!rx_buffer->page) continue; - if (xdp_res == I40E_XDP_CONSUMED) - rx_buffer->pagecnt_bias++; - else + if (xdp_res != I40E_XDP_CONSUMED) i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz); + else if (i++ <= nr_frags) + rx_buffer->pagecnt_bias++; /* EOP buffer will be put in i40e_clean_rx_irq() */ if (next == rx_ring->next_to_process) @@ -2117,20 +2118,20 @@ static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res, * i40e_construct_skb - Allocate skb and populate it * @rx_ring: rx descriptor ring to transact packets on * @xdp: xdp_buff pointing to the data - * @nr_frags: number of buffers for the packet * * This function allocates an skb. It then populates it with the page * data from the current receive descriptor, taking care to set up the * skb correctly. */ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, - struct xdp_buff *xdp, - u32 nr_frags) + struct xdp_buff *xdp) { unsigned int size = xdp->data_end - xdp->data; struct i40e_rx_buffer *rx_buffer; + struct skb_shared_info *sinfo; unsigned int headlen; struct sk_buff *skb; + u32 nr_frags = 0; /* prefetch first cache line of first page */ net_prefetch(xdp->data); @@ -2168,6 +2169,10 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen, sizeof(long))); + if (unlikely(xdp_buff_has_frags(xdp))) { + sinfo = xdp_get_shared_info_from_buff(xdp); + nr_frags = sinfo->nr_frags; + } rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean); /* update all of the pointers */ size -= headlen; @@ -2187,9 +2192,8 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, } if (unlikely(xdp_buff_has_frags(xdp))) { - struct skb_shared_info *sinfo, *skinfo = skb_shinfo(skb); + struct skb_shared_info *skinfo = skb_shinfo(skb); - sinfo = xdp_get_shared_info_from_buff(xdp); memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0], sizeof(skb_frag_t) * nr_frags); @@ -2212,17 +2216,17 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, * i40e_build_skb - Build skb around an existing buffer * @rx_ring: Rx descriptor ring to transact packets on * @xdp: xdp_buff pointing to the data - * @nr_frags: number of buffers for the packet * * This function builds an skb around an existing Rx buffer, taking care * to set up the skb correctly and avoid any memcpy overhead. */ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, - struct xdp_buff *xdp, - u32 nr_frags) + struct xdp_buff *xdp) { unsigned int metasize = xdp->data - xdp->data_meta; + struct skb_shared_info *sinfo; struct sk_buff *skb; + u32 nr_frags; /* Prefetch first cache line of first page. If xdp->data_meta * is unused, this points exactly as xdp->data, otherwise we @@ -2231,6 +2235,11 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, */ net_prefetch(xdp->data_meta); + if (unlikely(xdp_buff_has_frags(xdp))) { + sinfo = xdp_get_shared_info_from_buff(xdp); + nr_frags = sinfo->nr_frags; + } + /* build an skb around the page buffer */ skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz); if (unlikely(!skb)) @@ -2243,9 +2252,6 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, skb_metadata_set(skb, metasize); if (unlikely(xdp_buff_has_frags(xdp))) { - struct skb_shared_info *sinfo; - - sinfo = xdp_get_shared_info_from_buff(xdp); xdp_update_skb_shared_info(skb, nr_frags, sinfo->xdp_frags_size, nr_frags * xdp->frame_sz, @@ -2589,9 +2595,9 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, total_rx_bytes += size; } else { if (ring_uses_build_skb(rx_ring)) - skb = i40e_build_skb(rx_ring, xdp, nfrags); + skb = i40e_build_skb(rx_ring, xdp); else - skb = i40e_construct_skb(rx_ring, xdp, nfrags); + skb = i40e_construct_skb(rx_ring, xdp); /* drop if we failed to retrieve a buffer */ if (!skb) { From 2ee788c06493d02ee85855414cca39825e768aaf Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 24 Jan 2024 20:15:57 +0100 Subject: [PATCH 348/768] ice: remove redundant xdp_rxq_info registration xdp_rxq_info struct can be registered by drivers via two functions - xdp_rxq_info_reg() and __xdp_rxq_info_reg(). The latter one allows drivers that support XDP multi-buffer to set up xdp_rxq_info::frag_size which in turn will make it possible to grow the packet via bpf_xdp_adjust_tail() BPF helper. Currently, ice registers xdp_rxq_info in two spots: 1) ice_setup_rx_ring() // via xdp_rxq_info_reg(), BUG 2) ice_vsi_cfg_rxq() // via __xdp_rxq_info_reg(), OK Cited commit under fixes tag took care of setting up frag_size and updated registration scheme in 2) but it did not help as 1) is called before 2) and as shown above it uses old registration function. This means that 2) sees that xdp_rxq_info is already registered and never calls __xdp_rxq_info_reg() which leaves us with xdp_rxq_info::frag_size being set to 0. To fix this misbehavior, simply remove xdp_rxq_info_reg() call from ice_setup_rx_ring(). Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side") Acked-by: Magnus Karlsson Signed-off-by: Maciej Fijalkowski Link: https://lore.kernel.org/r/20240124191602.566724-7-maciej.fijalkowski@intel.com Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/intel/ice/ice_txrx.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 0c9b4aa8a049..97d41d6ebf1f 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -513,11 +513,6 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) if (ice_is_xdp_ena_vsi(rx_ring->vsi)) WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog); - if (rx_ring->vsi->type == ICE_VSI_PF && - !xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) - if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, - rx_ring->q_index, rx_ring->q_vector->napi.napi_id)) - goto err; return 0; err: From 290779905d09d5fdf6caa4f58ddefc3f4db0c0a9 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 24 Jan 2024 20:15:58 +0100 Subject: [PATCH 349/768] intel: xsk: initialize skb_frag_t::bv_offset in ZC drivers Ice and i40e ZC drivers currently set offset of a frag within skb_shared_info to 0, which is incorrect. xdp_buffs that come from xsk_buff_pool always have 256 bytes of a headroom, so they need to be taken into account to retrieve xdp_buff::data via skb_frag_address(). Otherwise, bpf_xdp_frags_increase_tail() would be starting its job from xdp_buff::data_hard_start which would result in overwriting existing payload. Fixes: 1c9ba9c14658 ("i40e: xsk: add RX multi-buffer support") Fixes: 1bbc04de607b ("ice: xsk: add RX multi-buffer support") Acked-by: Magnus Karlsson Signed-off-by: Maciej Fijalkowski Link: https://lore.kernel.org/r/20240124191602.566724-8-maciej.fijalkowski@intel.com Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 3 ++- drivers/net/ethernet/intel/ice/ice_xsk.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 82aca0d16a3e..11500003af0d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -414,7 +414,8 @@ i40e_add_xsk_frag(struct i40e_ring *rx_ring, struct xdp_buff *first, } __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, - virt_to_page(xdp->data_hard_start), 0, size); + virt_to_page(xdp->data_hard_start), + XDP_PACKET_HEADROOM, size); sinfo->xdp_frags_size += size; xsk_buff_add_frag(xdp); diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index d9073a618ad6..8b81a1677045 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -825,7 +825,8 @@ ice_add_xsk_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *first, } __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, - virt_to_page(xdp->data_hard_start), 0, size); + virt_to_page(xdp->data_hard_start), + XDP_PACKET_HEADROOM, size); sinfo->xdp_frags_size += size; xsk_buff_add_frag(xdp); From 3de38c87174225487fc93befeea7d380db80aef6 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 24 Jan 2024 20:15:59 +0100 Subject: [PATCH 350/768] ice: update xdp_rxq_info::frag_size for ZC enabled Rx queue Now that ice driver correctly sets up frag_size in xdp_rxq_info, let us make it work for ZC multi-buffer as well. ice_rx_ring::rx_buf_len for ZC is being set via xsk_pool_get_rx_frame_size() and this needs to be propagated up to xdp_rxq_info. Use a bigger hammer and instead of unregistering only xdp_rxq_info's memory model, unregister it altogether and register it again and have xdp_rxq_info with correct frag_size value. Fixes: 1bbc04de607b ("ice: xsk: add RX multi-buffer support") Signed-off-by: Maciej Fijalkowski Link: https://lore.kernel.org/r/20240124191602.566724-9-maciej.fijalkowski@intel.com Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/intel/ice/ice_base.c | 37 ++++++++++++++--------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 533b923cae2d..7ac847718882 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -547,19 +547,27 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) ring->rx_buf_len = ring->vsi->rx_buf_len; if (ring->vsi->type == ICE_VSI_PF) { - if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) - /* coverity[check_return] */ - __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, - ring->q_index, - ring->q_vector->napi.napi_id, - ring->vsi->rx_buf_len); + if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { + err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, + ring->q_index, + ring->q_vector->napi.napi_id, + ring->rx_buf_len); + if (err) + return err; + } ring->xsk_pool = ice_xsk_pool(ring); if (ring->xsk_pool) { - xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); + xdp_rxq_info_unreg(&ring->xdp_rxq); ring->rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool); + err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, + ring->q_index, + ring->q_vector->napi.napi_id, + ring->rx_buf_len); + if (err) + return err; err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_XSK_BUFF_POOL, NULL); @@ -571,13 +579,14 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", ring->q_index); } else { - if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) - /* coverity[check_return] */ - __xdp_rxq_info_reg(&ring->xdp_rxq, - ring->netdev, - ring->q_index, - ring->q_vector->napi.napi_id, - ring->vsi->rx_buf_len); + if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { + err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, + ring->q_index, + ring->q_vector->napi.napi_id, + ring->rx_buf_len); + if (err) + return err; + } err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, From fbadd83a612c3b7aad2987893faca6bd24aaebb3 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 24 Jan 2024 20:16:00 +0100 Subject: [PATCH 351/768] xdp: reflect tail increase for MEM_TYPE_XSK_BUFF_POOL XSK ZC Rx path calculates the size of data that will be posted to XSK Rx queue via subtracting xdp_buff::data_end from xdp_buff::data. In bpf_xdp_frags_increase_tail(), when underlying memory type of xdp_rxq_info is MEM_TYPE_XSK_BUFF_POOL, add offset to data_end in tail fragment, so that later on user space will be able to take into account the amount of bytes added by XDP program. Fixes: 24ea50127ecf ("xsk: support mbuf on ZC RX") Signed-off-by: Maciej Fijalkowski Link: https://lore.kernel.org/r/20240124191602.566724-10-maciej.fijalkowski@intel.com Signed-off-by: Alexei Starovoitov --- net/core/filter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index 36fb5ae8af69..ef3e78b6a39c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4093,6 +4093,8 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset) memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset); skb_frag_size_add(frag, offset); sinfo->xdp_frags_size += offset; + if (rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) + xsk_buff_get_tail(xdp)->data_end += offset; return 0; } From a045d2f2d03d23e7db6772dd83e0ba2705dfad93 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 24 Jan 2024 20:16:01 +0100 Subject: [PATCH 352/768] i40e: set xdp_rxq_info::frag_size i40e support XDP multi-buffer so it is supposed to use __xdp_rxq_info_reg() instead of xdp_rxq_info_reg() and set the frag_size. It can not be simply converted at existing callsite because rx_buf_len could be un-initialized, so let us register xdp_rxq_info within i40e_configure_rx_ring(), which happen to be called with already initialized rx_buf_len value. Commit 5180ff1364bc ("i40e: use int for i40e_status") converted 'err' to int, so two variables to deal with return codes are not needed within i40e_configure_rx_ring(). Remove 'ret' and use 'err' to handle status from xdp_rxq_info registration. Fixes: e213ced19bef ("i40e: add support for XDP multi-buffer Rx") Signed-off-by: Maciej Fijalkowski Link: https://lore.kernel.org/r/20240124191602.566724-11-maciej.fijalkowski@intel.com Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/intel/i40e/i40e_main.c | 40 ++++++++++++--------- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 9 ----- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index ae8f9f135725..d3b00d8ed39a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3588,40 +3588,48 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) struct i40e_hmc_obj_rxq rx_ctx; int err = 0; bool ok; - int ret; bitmap_zero(ring->state, __I40E_RING_STATE_NBITS); /* clear the context structure first */ memset(&rx_ctx, 0, sizeof(rx_ctx)); - if (ring->vsi->type == I40E_VSI_MAIN) - xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); + ring->rx_buf_len = vsi->rx_buf_len; + + /* XDP RX-queue info only needed for RX rings exposed to XDP */ + if (ring->vsi->type != I40E_VSI_MAIN) + goto skip; + + if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { + err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, + ring->queue_index, + ring->q_vector->napi.napi_id, + ring->rx_buf_len); + if (err) + return err; + } ring->xsk_pool = i40e_xsk_pool(ring); if (ring->xsk_pool) { - ring->rx_buf_len = - xsk_pool_get_rx_frame_size(ring->xsk_pool); - ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, + ring->rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool); + err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_XSK_BUFF_POOL, NULL); - if (ret) - return ret; + if (err) + return err; dev_info(&vsi->back->pdev->dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", ring->queue_index); } else { - ring->rx_buf_len = vsi->rx_buf_len; - if (ring->vsi->type == I40E_VSI_MAIN) { - ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, - MEM_TYPE_PAGE_SHARED, - NULL); - if (ret) - return ret; - } + err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, + MEM_TYPE_PAGE_SHARED, + NULL); + if (err) + return err; } +skip: xdp_init_buff(&ring->xdp, i40e_rx_pg_size(ring) / 2, &ring->xdp_rxq); rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len, diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 1f0a0f13a334..0d7177083708 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1548,7 +1548,6 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring) int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring) { struct device *dev = rx_ring->dev; - int err; u64_stats_init(&rx_ring->syncp); @@ -1569,14 +1568,6 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring) rx_ring->next_to_process = 0; rx_ring->next_to_use = 0; - /* XDP RX-queue info only needed for RX rings exposed to XDP */ - if (rx_ring->vsi->type == I40E_VSI_MAIN) { - err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, - rx_ring->queue_index, rx_ring->q_vector->napi.napi_id); - if (err < 0) - return err; - } - rx_ring->xdp_prog = rx_ring->vsi->xdp_prog; rx_ring->rx_bi = From 0cbb08707c932b3f004bc1a8ec6200ef572c1f5f Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 24 Jan 2024 20:16:02 +0100 Subject: [PATCH 353/768] i40e: update xdp_rxq_info::frag_size for ZC enabled Rx queue Now that i40e driver correctly sets up frag_size in xdp_rxq_info, let us make it work for ZC multi-buffer as well. i40e_ring::rx_buf_len for ZC is being set via xsk_pool_get_rx_frame_size() and this needs to be propagated up to xdp_rxq_info. Fixes: 1c9ba9c14658 ("i40e: xsk: add RX multi-buffer support") Acked-by: Magnus Karlsson Signed-off-by: Maciej Fijalkowski Link: https://lore.kernel.org/r/20240124191602.566724-12-maciej.fijalkowski@intel.com Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/intel/i40e/i40e_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index d3b00d8ed39a..6e7fd473abfd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3611,7 +3611,14 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) ring->xsk_pool = i40e_xsk_pool(ring); if (ring->xsk_pool) { + xdp_rxq_info_unreg(&ring->xdp_rxq); ring->rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool); + err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, + ring->queue_index, + ring->q_vector->napi.napi_id, + ring->rx_buf_len); + if (err) + return err; err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_XSK_BUFF_POOL, NULL); From f6cc4b6a3ae53df425771000e9c9540cce9b7bb1 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Tue, 23 Jan 2024 01:24:42 +0800 Subject: [PATCH 354/768] fjes: fix memleaks in fjes_hw_setup In fjes_hw_setup, it allocates several memory and delay the deallocation to the fjes_hw_exit in fjes_probe through the following call chain: fjes_probe |-> fjes_hw_init |-> fjes_hw_setup |-> fjes_hw_exit However, when fjes_hw_setup fails, fjes_hw_exit won't be called and thus all the resources allocated in fjes_hw_setup will be leaked. In this patch, we free those resources in fjes_hw_setup and prevents such leaks. Fixes: 2fcbca687702 ("fjes: platform_driver's .probe and .remove routine") Signed-off-by: Zhipeng Lu Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240122172445.3841883-1-alexious@zju.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/fjes/fjes_hw.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/drivers/net/fjes/fjes_hw.c b/drivers/net/fjes/fjes_hw.c index 704e949484d0..b9b5554ea862 100644 --- a/drivers/net/fjes/fjes_hw.c +++ b/drivers/net/fjes/fjes_hw.c @@ -221,21 +221,25 @@ static int fjes_hw_setup(struct fjes_hw *hw) mem_size = FJES_DEV_REQ_BUF_SIZE(hw->max_epid); hw->hw_info.req_buf = kzalloc(mem_size, GFP_KERNEL); - if (!(hw->hw_info.req_buf)) - return -ENOMEM; + if (!(hw->hw_info.req_buf)) { + result = -ENOMEM; + goto free_ep_info; + } hw->hw_info.req_buf_size = mem_size; mem_size = FJES_DEV_RES_BUF_SIZE(hw->max_epid); hw->hw_info.res_buf = kzalloc(mem_size, GFP_KERNEL); - if (!(hw->hw_info.res_buf)) - return -ENOMEM; + if (!(hw->hw_info.res_buf)) { + result = -ENOMEM; + goto free_req_buf; + } hw->hw_info.res_buf_size = mem_size; result = fjes_hw_alloc_shared_status_region(hw); if (result) - return result; + goto free_res_buf; hw->hw_info.buffer_share_bit = 0; hw->hw_info.buffer_unshare_reserve_bit = 0; @@ -246,11 +250,11 @@ static int fjes_hw_setup(struct fjes_hw *hw) result = fjes_hw_alloc_epbuf(&buf_pair->tx); if (result) - return result; + goto free_epbuf; result = fjes_hw_alloc_epbuf(&buf_pair->rx); if (result) - return result; + goto free_epbuf; spin_lock_irqsave(&hw->rx_status_lock, flags); fjes_hw_setup_epbuf(&buf_pair->tx, mac, @@ -273,6 +277,25 @@ static int fjes_hw_setup(struct fjes_hw *hw) fjes_hw_init_command_registers(hw, ¶m); return 0; + +free_epbuf: + for (epidx = 0; epidx < hw->max_epid ; epidx++) { + if (epidx == hw->my_epid) + continue; + fjes_hw_free_epbuf(&hw->ep_shm_info[epidx].tx); + fjes_hw_free_epbuf(&hw->ep_shm_info[epidx].rx); + } + fjes_hw_free_shared_status_region(hw); +free_res_buf: + kfree(hw->hw_info.res_buf); + hw->hw_info.res_buf = NULL; +free_req_buf: + kfree(hw->hw_info.req_buf); + hw->hw_info.req_buf = NULL; +free_ep_info: + kfree(hw->ep_shm_info); + hw->ep_shm_info = NULL; + return result; } static void fjes_hw_cleanup(struct fjes_hw *hw) From 97cf5d53b4812dcb52c13fda700dad5aa8d3446c Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Wed, 24 Jan 2024 11:19:45 +0800 Subject: [PATCH 355/768] erofs: get rid of unneeded GFP_NOFS Clean up some leftovers since there is no way for EROFS to be called again from a reclaim context. Signed-off-by: Jingbo Xu Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/20240124031945.130782-1-jefflexu@linux.alibaba.com Signed-off-by: Gao Xiang --- fs/erofs/fscache.c | 2 +- fs/erofs/inode.c | 2 +- fs/erofs/utils.c | 2 +- fs/erofs/zdata.c | 8 ++++---- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index bc12030393b2..5ff90026fd43 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -459,7 +459,7 @@ static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb inode->i_size = OFFSET_MAX; inode->i_mapping->a_ops = &erofs_fscache_meta_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); + mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); inode->i_blkbits = EROFS_SB(sb)->blkszbits; inode->i_private = ctx; diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 3d616dea55dc..36e638e8b53a 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -60,7 +60,7 @@ static void *erofs_read_inode(struct erofs_buf *buf, } else { const unsigned int gotten = sb->s_blocksize - *ofs; - copied = kmalloc(vi->inode_isize, GFP_NOFS); + copied = kmalloc(vi->inode_isize, GFP_KERNEL); if (!copied) { err = -ENOMEM; goto err_out; diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c index 5dea308764b4..e146d09151af 100644 --- a/fs/erofs/utils.c +++ b/fs/erofs/utils.c @@ -81,7 +81,7 @@ struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, repeat: xa_lock(&sbi->managed_pslots); pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index, - NULL, grp, GFP_NOFS); + NULL, grp, GFP_KERNEL); if (pre) { if (xa_is_err(pre)) { pre = ERR_PTR(xa_err(pre)); diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 692c0c39be63..583c062cd0e4 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -230,7 +230,7 @@ static int z_erofs_bvec_enqueue(struct z_erofs_bvec_iter *iter, struct page *nextpage = *candidate_bvpage; if (!nextpage) { - nextpage = erofs_allocpage(pagepool, GFP_NOFS); + nextpage = erofs_allocpage(pagepool, GFP_KERNEL); if (!nextpage) return -ENOMEM; set_page_private(nextpage, Z_EROFS_SHORTLIVED_PAGE); @@ -302,7 +302,7 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int size) if (nrpages > pcs->maxpages) continue; - pcl = kmem_cache_zalloc(pcs->slab, GFP_NOFS); + pcl = kmem_cache_zalloc(pcs->slab, GFP_KERNEL); if (!pcl) return ERR_PTR(-ENOMEM); pcl->pclustersize = size; @@ -694,7 +694,7 @@ static void z_erofs_cache_invalidate_folio(struct folio *folio, DBG_BUGON(stop > folio_size(folio) || stop < length); if (offset == 0 && stop == folio_size(folio)) - while (!z_erofs_cache_release_folio(folio, GFP_NOFS)) + while (!z_erofs_cache_release_folio(folio, 0)) cond_resched(); } @@ -713,7 +713,7 @@ int erofs_init_managed_cache(struct super_block *sb) set_nlink(inode, 1); inode->i_size = OFFSET_MAX; inode->i_mapping->a_ops = &z_erofs_cache_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); + mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); EROFS_SB(sb)->managed_cache = inode; return 0; } From d76779dd3681c01a4c6c3cae4d0627c9083e0ee6 Mon Sep 17 00:00:00 2001 From: Quanquan Cao Date: Wed, 24 Jan 2024 17:15:26 +0800 Subject: [PATCH 356/768] =?UTF-8?q?cxl/region=EF=BC=9AFix=20overflow=20iss?= =?UTF-8?q?ue=20in=20alloc=5Fhpa()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Creating a region with 16 memory devices caused a problem. The div_u64_rem function, used for dividing an unsigned 64-bit number by a 32-bit one, faced an issue when SZ_256M * p->interleave_ways. The result surpassed the maximum limit of the 32-bit divisor (4G), leading to an overflow and a remainder of 0. note: At this point, p->interleave_ways is 16, meaning 16 * 256M = 4G To fix this issue, I replaced the div_u64_rem function with div64_u64_rem and adjusted the type of the remainder. Signed-off-by: Quanquan Cao Reviewed-by: Dave Jiang Fixes: 23a22cd1c98b ("cxl/region: Allocate HPA capacity to regions") Cc: Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 0f05692bfec3..ce0e2d82bb2b 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -525,7 +525,7 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_region_params *p = &cxlr->params; struct resource *res; - u32 remainder = 0; + u64 remainder = 0; lockdep_assert_held_write(&cxl_region_rwsem); @@ -545,7 +545,7 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid))) return -ENXIO; - div_u64_rem(size, SZ_256M * p->interleave_ways, &remainder); + div64_u64_rem(size, (u64)SZ_256M * p->interleave_ways, &remainder); if (remainder) return -EINVAL; From ebeae8adf89d9a82359f6659b1663d09beec2faa Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Sun, 21 Jan 2024 15:35:06 +0800 Subject: [PATCH 357/768] ksmbd: fix global oob in ksmbd_nl_policy Similar to a reported issue (check the commit b33fb5b801c6 ("net: qualcomm: rmnet: fix global oob in rmnet_policy"), my local fuzzer finds another global out-of-bounds read for policy ksmbd_nl_policy. See bug trace below: ================================================================== BUG: KASAN: global-out-of-bounds in validate_nla lib/nlattr.c:386 [inline] BUG: KASAN: global-out-of-bounds in __nla_validate_parse+0x24af/0x2750 lib/nlattr.c:600 Read of size 1 at addr ffffffff8f24b100 by task syz-executor.1/62810 CPU: 0 PID: 62810 Comm: syz-executor.1 Tainted: G N 6.1.0 #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x8b/0xb3 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:284 [inline] print_report+0x172/0x475 mm/kasan/report.c:395 kasan_report+0xbb/0x1c0 mm/kasan/report.c:495 validate_nla lib/nlattr.c:386 [inline] __nla_validate_parse+0x24af/0x2750 lib/nlattr.c:600 __nla_parse+0x3e/0x50 lib/nlattr.c:697 __nlmsg_parse include/net/netlink.h:748 [inline] genl_family_rcv_msg_attrs_parse.constprop.0+0x1b0/0x290 net/netlink/genetlink.c:565 genl_family_rcv_msg_doit+0xda/0x330 net/netlink/genetlink.c:734 genl_family_rcv_msg net/netlink/genetlink.c:833 [inline] genl_rcv_msg+0x441/0x780 net/netlink/genetlink.c:850 netlink_rcv_skb+0x14f/0x410 net/netlink/af_netlink.c:2540 genl_rcv+0x24/0x40 net/netlink/genetlink.c:861 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x54e/0x800 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x930/0xe50 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0x154/0x190 net/socket.c:734 ____sys_sendmsg+0x6df/0x840 net/socket.c:2482 ___sys_sendmsg+0x110/0x1b0 net/socket.c:2536 __sys_sendmsg+0xf3/0x1c0 net/socket.c:2565 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fdd66a8f359 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 f1 19 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fdd65e00168 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007fdd66bbcf80 RCX: 00007fdd66a8f359 RDX: 0000000000000000 RSI: 0000000020000500 RDI: 0000000000000003 RBP: 00007fdd66ada493 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffc84b81aff R14: 00007fdd65e00300 R15: 0000000000022000 The buggy address belongs to the variable: ksmbd_nl_policy+0x100/0xa80 The buggy address belongs to the physical page: page:0000000034f47940 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1ccc4b flags: 0x200000000001000(reserved|node=0|zone=2) raw: 0200000000001000 ffffea00073312c8 ffffea00073312c8 0000000000000000 raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffffffff8f24b000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffffffff8f24b080: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffffffff8f24b100: f9 f9 f9 f9 00 00 f9 f9 f9 f9 f9 f9 00 00 07 f9 ^ ffffffff8f24b180: f9 f9 f9 f9 00 05 f9 f9 f9 f9 f9 f9 00 00 00 05 ffffffff8f24b200: f9 f9 f9 f9 00 00 03 f9 f9 f9 f9 f9 00 00 04 f9 ================================================================== To fix it, add a placeholder named __KSMBD_EVENT_MAX and let KSMBD_EVENT_MAX to be its original value - 1 according to what other netlink families do. Also change two sites that refer the KSMBD_EVENT_MAX to correct value. Cc: stable@vger.kernel.org Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Signed-off-by: Lin Ma Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/ksmbd_netlink.h | 3 ++- fs/smb/server/transport_ipc.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h index b7521e41402e..0ebf91ffa236 100644 --- a/fs/smb/server/ksmbd_netlink.h +++ b/fs/smb/server/ksmbd_netlink.h @@ -304,7 +304,8 @@ enum ksmbd_event { KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST, KSMBD_EVENT_SPNEGO_AUTHEN_RESPONSE = 15, - KSMBD_EVENT_MAX + __KSMBD_EVENT_MAX, + KSMBD_EVENT_MAX = __KSMBD_EVENT_MAX - 1 }; /* diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c index b49d47bdafc9..f29bb03f0dc4 100644 --- a/fs/smb/server/transport_ipc.c +++ b/fs/smb/server/transport_ipc.c @@ -74,7 +74,7 @@ static int handle_unsupported_event(struct sk_buff *skb, struct genl_info *info) static int handle_generic_event(struct sk_buff *skb, struct genl_info *info); static int ksmbd_ipc_heartbeat_request(void); -static const struct nla_policy ksmbd_nl_policy[KSMBD_EVENT_MAX] = { +static const struct nla_policy ksmbd_nl_policy[KSMBD_EVENT_MAX + 1] = { [KSMBD_EVENT_UNSPEC] = { .len = 0, }, @@ -403,7 +403,7 @@ static int handle_generic_event(struct sk_buff *skb, struct genl_info *info) return -EPERM; #endif - if (type >= KSMBD_EVENT_MAX) { + if (type > KSMBD_EVENT_MAX) { WARN_ON(1); return -EINVAL; } From 9f3fe29d77ef4e7f7cb5c4c8c59f6dc373e57e78 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 17 Jan 2024 19:22:36 +0100 Subject: [PATCH 358/768] md: fix a suspicious RCU usage warning RCU protection was removed in the commit 2d32777d60de ("raid1: remove rcu protection to access rdev from conf"). However, the code in fix_read_error does rcu_dereference outside rcu_read_lock - this triggers the following warning. The warning is triggered by a LVM2 test shell/integrity-caching.sh. This commit removes rcu_dereference. ============================= WARNING: suspicious RCU usage 6.7.0 #2 Not tainted ----------------------------- drivers/md/raid1.c:2265 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 no locks held by mdX_raid1/1859. stack backtrace: CPU: 2 PID: 1859 Comm: mdX_raid1 Not tainted 6.7.0 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 Call Trace: dump_stack_lvl+0x60/0x70 lockdep_rcu_suspicious+0x153/0x1b0 raid1d+0x1732/0x1750 [raid1] ? lock_acquire+0x9f/0x270 ? finish_wait+0x3d/0x80 ? md_thread+0xf7/0x130 [md_mod] ? lock_release+0xaa/0x230 ? md_register_thread+0xd0/0xd0 [md_mod] md_thread+0xa0/0x130 [md_mod] ? housekeeping_test_cpu+0x30/0x30 kthread+0xdc/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x28/0x40 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork_asm+0x11/0x20 Signed-off-by: Mikulas Patocka Fixes: ca294b34aaf3 ("md/raid1: support read error check") Reviewed-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/51539879-e1ca-fde3-b8b4-8934ddedcbc@redhat.com --- drivers/md/raid1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 24f0d799fd98..286f8b16c7bd 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2262,7 +2262,7 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio) int sectors = r1_bio->sectors; int read_disk = r1_bio->read_disk; struct mddev *mddev = conf->mddev; - struct md_rdev *rdev = rcu_dereference(conf->mirrors[read_disk].rdev); + struct md_rdev *rdev = conf->mirrors[read_disk].rdev; if (exceed_read_errors(mddev, rdev)) { r1_bio->bios[r1_bio->read_disk] = IO_BLOCKED; From a3bdcdd022c68942a774e8e63424cc11c85aab78 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Thu, 25 Jan 2024 14:32:26 +0800 Subject: [PATCH 359/768] HID: hidraw: fix a problem of memory leak in hidraw_release() 'struct hidraw_list' is a circular queue whose head can be smaller than tail. Using 'list->tail != list->head' to release all memory that should be released. Fixes: a5623a203cff ("HID: hidraw: fix memory leak in hidraw_release()") Signed-off-by: Su Hui Reviewed-by: Dan Carpenter Signed-off-by: Jiri Kosina --- drivers/hid/hidraw.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index 13c8dd8cd350..2bc762d31ac7 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -357,8 +357,11 @@ static int hidraw_release(struct inode * inode, struct file * file) down_write(&minors_rwsem); spin_lock_irqsave(&hidraw_table[minor]->list_lock, flags); - for (int i = list->tail; i < list->head; i++) - kfree(list->buffer[i].value); + while (list->tail != list->head) { + kfree(list->buffer[list->tail].value); + list->buffer[list->tail].value = NULL; + list->tail = (list->tail + 1) & (HIDRAW_BUFFER_SIZE - 1); + } list_del(&list->node); spin_unlock_irqrestore(&hidraw_table[minor]->list_lock, flags); kfree(list); From 644649553508b9bacf0fc7a5bdc4f9e0165576a5 Mon Sep 17 00:00:00 2001 From: Jiri Wiesner Date: Mon, 22 Jan 2024 18:23:50 +0100 Subject: [PATCH 360/768] clocksource: Skip watchdog check for large watchdog intervals There have been reports of the watchdog marking clocksources unstable on machines with 8 NUMA nodes: clocksource: timekeeping watchdog on CPU373: Marking clocksource 'tsc' as unstable because the skew is too large: clocksource: 'hpet' wd_nsec: 14523447520 clocksource: 'tsc' cs_nsec: 14524115132 The measured clocksource skew - the absolute difference between cs_nsec and wd_nsec - was 668 microseconds: cs_nsec - wd_nsec = 14524115132 - 14523447520 = 667612 The kernel used 200 microseconds for the uncertainty_margin of both the clocksource and watchdog, resulting in a threshold of 400 microseconds (the md variable). Both the cs_nsec and the wd_nsec value indicate that the readout interval was circa 14.5 seconds. The observed behaviour is that watchdog checks failed for large readout intervals on 8 NUMA node machines. This indicates that the size of the skew was directly proportinal to the length of the readout interval on those machines. The measured clocksource skew, 668 microseconds, was evaluated against a threshold (the md variable) that is suited for readout intervals of roughly WATCHDOG_INTERVAL, i.e. HZ >> 1, which is 0.5 second. The intention of 2e27e793e280 ("clocksource: Reduce clocksource-skew threshold") was to tighten the threshold for evaluating skew and set the lower bound for the uncertainty_margin of clocksources to twice WATCHDOG_MAX_SKEW. Later in c37e85c135ce ("clocksource: Loosen clocksource watchdog constraints"), the WATCHDOG_MAX_SKEW constant was increased to 125 microseconds to fit the limit of NTP, which is able to use a clocksource that suffers from up to 500 microseconds of skew per second. Both the TSC and the HPET use default uncertainty_margin. When the readout interval gets stretched the default uncertainty_margin is no longer a suitable lower bound for evaluating skew - it imposes a limit that is far stricter than the skew with which NTP can deal. The root causes of the skew being directly proportinal to the length of the readout interval are: * the inaccuracy of the shift/mult pairs of clocksources and the watchdog * the conversion to nanoseconds is imprecise for large readout intervals Prevent this by skipping the current watchdog check if the readout interval exceeds 2 * WATCHDOG_INTERVAL. Considering the maximum readout interval of 2 * WATCHDOG_INTERVAL, the current default uncertainty margin (of the TSC and HPET) corresponds to a limit on clocksource skew of 250 ppm (microseconds of skew per second). To keep the limit imposed by NTP (500 microseconds of skew per second) for all possible readout intervals, the margins would have to be scaled so that the threshold value is proportional to the length of the actual readout interval. As for why the readout interval may get stretched: Since the watchdog is executed in softirq context the expiration of the watchdog timer can get severely delayed on account of a ksoftirqd thread not getting to run in a timely manner. Surely, a system with such belated softirq execution is not working well and the scheduling issue should be looked into but the clocksource watchdog should be able to deal with it accordingly. Fixes: 2e27e793e280 ("clocksource: Reduce clocksource-skew threshold") Suggested-by: Feng Tang Signed-off-by: Jiri Wiesner Signed-off-by: Thomas Gleixner Tested-by: Paul E. McKenney Reviewed-by: Feng Tang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240122172350.GA740@incl --- kernel/time/clocksource.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c108ed8a9804..3052b1f1168e 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -99,6 +99,7 @@ static u64 suspend_start; * Interval: 0.5sec. */ #define WATCHDOG_INTERVAL (HZ >> 1) +#define WATCHDOG_INTERVAL_MAX_NS ((2 * WATCHDOG_INTERVAL) * (NSEC_PER_SEC / HZ)) /* * Threshold: 0.0312s, when doubled: 0.0625s. @@ -134,6 +135,7 @@ static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); static DEFINE_SPINLOCK(watchdog_lock); static int watchdog_running; static atomic_t watchdog_reset_pending; +static int64_t watchdog_max_interval; static inline void clocksource_watchdog_lock(unsigned long *flags) { @@ -399,8 +401,8 @@ static inline void clocksource_reset_watchdog(void) static void clocksource_watchdog(struct timer_list *unused) { u64 csnow, wdnow, cslast, wdlast, delta; + int64_t wd_nsec, cs_nsec, interval; int next_cpu, reset_pending; - int64_t wd_nsec, cs_nsec; struct clocksource *cs; enum wd_read_status read_ret; unsigned long extra_wait = 0; @@ -470,6 +472,27 @@ static void clocksource_watchdog(struct timer_list *unused) if (atomic_read(&watchdog_reset_pending)) continue; + /* + * The processing of timer softirqs can get delayed (usually + * on account of ksoftirqd not getting to run in a timely + * manner), which causes the watchdog interval to stretch. + * Skew detection may fail for longer watchdog intervals + * on account of fixed margins being used. + * Some clocksources, e.g. acpi_pm, cannot tolerate + * watchdog intervals longer than a few seconds. + */ + interval = max(cs_nsec, wd_nsec); + if (unlikely(interval > WATCHDOG_INTERVAL_MAX_NS)) { + if (system_state > SYSTEM_SCHEDULING && + interval > 2 * watchdog_max_interval) { + watchdog_max_interval = interval; + pr_warn("Long readout interval, skipping watchdog check: cs_nsec: %lld wd_nsec: %lld\n", + cs_nsec, wd_nsec); + } + watchdog_timer.expires = jiffies; + continue; + } + /* Check the deviation from the watchdog clocksource. */ md = cs->uncertainty_margin + watchdog->uncertainty_margin; if (abs(cs_nsec - wd_nsec) > md) { From f9f031dd21a7ce13a13862fa5281d32e1029c70f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 18 Jan 2024 23:21:31 +0200 Subject: [PATCH 361/768] drm/i915/psr: Only allow PSR in LPSP mode on HSW non-ULT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On HSW non-ULT (or at least on Dell Latitude E6540) external displays start to flicker when we enable PSR on the eDP. We observe a much higher SR and PC6 residency than should be possible with an external display, and indeen much higher than what we observe with eDP disabled and only the external display enabled. Looks like the hardware is somehow ignoring the fact that the external display is active during PSR. I wasn't able to redproduce this on my HSW ULT machine, or BDW. So either there's something specific about this particular laptop (eg. some unknown firmware thing) or the issue is limited to just non-ULT HSW systems. All known registers that could affect this look perfectly reasonable on the affected machine. As a workaround let's unmask the LPSP event to prevent PSR entry except while in LPSP mode (only pipe A + eDP active). This will prevent PSR entry entirely when multiple pipes are active. The one slight downside is that we now also prevent PSR entry when driving eDP with pipe B or C, but I think that's a reasonable tradeoff to avoid having to implement a more complex workaround. Cc: stable@vger.kernel.org Fixes: 783d8b80871f ("drm/i915/psr: Re-enable PSR1 on hsw/bdw") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10092 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240118212131.31868-1-ville.syrjala@linux.intel.com Reviewed-by: Jouni Högander (cherry picked from commit 94501c3ca6400e463ff6cc0c9cf4a2feb6a9205d) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_psr.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 8f702c3fc62d..57bbf3e3af92 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1525,8 +1525,18 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, * can rely on frontbuffer tracking. */ mask = EDP_PSR_DEBUG_MASK_MEMUP | - EDP_PSR_DEBUG_MASK_HPD | - EDP_PSR_DEBUG_MASK_LPSP; + EDP_PSR_DEBUG_MASK_HPD; + + /* + * For some unknown reason on HSW non-ULT (or at least on + * Dell Latitude E6540) external displays start to flicker + * when PSR is enabled on the eDP. SR/PC6 residency is much + * higher than should be possible with an external display. + * As a workaround leave LPSP unmasked to prevent PSR entry + * when external displays are active. + */ + if (DISPLAY_VER(dev_priv) >= 8 || IS_HASWELL_ULT(dev_priv)) + mask |= EDP_PSR_DEBUG_MASK_LPSP; if (DISPLAY_VER(dev_priv) < 20) mask |= EDP_PSR_DEBUG_MASK_MAX_SLEEP; From a2933a8759a62269754e54733d993b19de870e84 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 23 Jan 2024 15:59:17 +0800 Subject: [PATCH 362/768] selftests: bonding: do not test arp/ns target with mode balance-alb/tlb The prio_arp/ns tests hard code the mode to active-backup. At the same time, The balance-alb/tlb modes do not support arp/ns target. So remove the prio_arp/ns tests from the loop and only test active-backup mode. Fixes: 481b56e0391e ("selftests: bonding: re-format bond option tests") Reported-by: Jay Vosburgh Closes: https://lore.kernel.org/netdev/17415.1705965957@famine/ Signed-off-by: Hangbin Liu Acked-by: Jay Vosburgh Link: https://lore.kernel.org/r/20240123075917.1576360-1-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- .../testing/selftests/drivers/net/bonding/bond_options.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh index c54d1697f439..d508486cc0bd 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh @@ -162,7 +162,7 @@ prio_arp() local mode=$1 for primary_reselect in 0 1 2; do - prio_test "mode active-backup arp_interval 100 arp_ip_target ${g_ip4} primary eth1 primary_reselect $primary_reselect" + prio_test "mode $mode arp_interval 100 arp_ip_target ${g_ip4} primary eth1 primary_reselect $primary_reselect" log_test "prio" "$mode arp_ip_target primary_reselect $primary_reselect" done } @@ -178,7 +178,7 @@ prio_ns() fi for primary_reselect in 0 1 2; do - prio_test "mode active-backup arp_interval 100 ns_ip6_target ${g_ip6} primary eth1 primary_reselect $primary_reselect" + prio_test "mode $mode arp_interval 100 ns_ip6_target ${g_ip6} primary eth1 primary_reselect $primary_reselect" log_test "prio" "$mode ns_ip6_target primary_reselect $primary_reselect" done } @@ -194,9 +194,9 @@ prio() for mode in $modes; do prio_miimon $mode - prio_arp $mode - prio_ns $mode done + prio_arp "active-backup" + prio_ns "active-backup" } arp_validate_test() From 9a574ea9069be30b835a3da772c039993c43369b Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Mon, 22 Jan 2024 15:35:34 -0800 Subject: [PATCH 363/768] tick/sched: Preserve number of idle sleeps across CPU hotplug events Commit 71fee48f ("tick-sched: Fix idle and iowait sleeptime accounting vs CPU hotplug") preserved total idle sleep time and iowait sleeptime across CPU hotplug events. Similar reasoning applies to the number of idle calls and idle sleeps to get the proper average of sleep time per idle invocation. Preserve those fields too. Fixes: 71fee48f ("tick-sched: Fix idle and iowait sleeptime accounting vs CPU hotplug") Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240122233534.3094238-1-tim.c.chen@linux.intel.com --- kernel/time/tick-sched.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d2501673028d..01fb50c1b17e 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1577,6 +1577,7 @@ void tick_cancel_sched_timer(int cpu) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); ktime_t idle_sleeptime, iowait_sleeptime; + unsigned long idle_calls, idle_sleeps; # ifdef CONFIG_HIGH_RES_TIMERS if (ts->sched_timer.base) @@ -1585,9 +1586,13 @@ void tick_cancel_sched_timer(int cpu) idle_sleeptime = ts->idle_sleeptime; iowait_sleeptime = ts->iowait_sleeptime; + idle_calls = ts->idle_calls; + idle_sleeps = ts->idle_sleeps; memset(ts, 0, sizeof(*ts)); ts->idle_sleeptime = idle_sleeptime; ts->iowait_sleeptime = iowait_sleeptime; + ts->idle_calls = idle_calls; + ts->idle_sleeps = idle_sleeps; } #endif From f1cc6aceecd04964304786530eaa4ad87d90b972 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Mon, 22 Jan 2024 13:09:43 +0100 Subject: [PATCH 364/768] accel/ivpu: Fix dev open/close races with unbind - Add context_list_lock to synchronize user context addition/removal - Use drm_dev_enter() to prevent unbinding the device during ivpu_open() and vpu address allocation Signed-off-by: Jacek Lawrynowicz Reviewed-by: Wachowski, Karol Link: https://patchwork.freedesktop.org/patch/msgid/20240122120945.1150728-2-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 107 +++++++++++++++++++++------------- drivers/accel/ivpu/ivpu_drv.h | 3 +- drivers/accel/ivpu/ivpu_gem.c | 18 +++--- drivers/accel/ivpu/ivpu_gem.h | 2 +- drivers/accel/ivpu/ivpu_job.c | 16 ++--- drivers/accel/ivpu/ivpu_job.h | 2 +- 6 files changed, 84 insertions(+), 64 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 546c0899bb9e..9418c73ee8ef 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -66,22 +67,20 @@ struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv) return file_priv; } -struct ivpu_file_priv *ivpu_file_priv_get_by_ctx_id(struct ivpu_device *vdev, unsigned long id) +static void file_priv_unbind(struct ivpu_device *vdev, struct ivpu_file_priv *file_priv) { - struct ivpu_file_priv *file_priv; + mutex_lock(&file_priv->lock); + if (file_priv->bound) { + ivpu_dbg(vdev, FILE, "file_priv unbind: ctx %u\n", file_priv->ctx.id); - xa_lock_irq(&vdev->context_xa); - file_priv = xa_load(&vdev->context_xa, id); - /* file_priv may still be in context_xa during file_priv_release() */ - if (file_priv && !kref_get_unless_zero(&file_priv->ref)) - file_priv = NULL; - xa_unlock_irq(&vdev->context_xa); - - if (file_priv) - ivpu_dbg(vdev, KREF, "file_priv get by id: ctx %u refcount %u\n", - file_priv->ctx.id, kref_read(&file_priv->ref)); - - return file_priv; + ivpu_cmdq_release_all_locked(file_priv); + ivpu_jsm_context_release(vdev, file_priv->ctx.id); + ivpu_bo_unbind_all_bos_from_context(vdev, &file_priv->ctx); + ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); + file_priv->bound = false; + drm_WARN_ON(&vdev->drm, !xa_erase_irq(&vdev->context_xa, file_priv->ctx.id)); + } + mutex_unlock(&file_priv->lock); } static void file_priv_release(struct kref *ref) @@ -89,13 +88,15 @@ static void file_priv_release(struct kref *ref) struct ivpu_file_priv *file_priv = container_of(ref, struct ivpu_file_priv, ref); struct ivpu_device *vdev = file_priv->vdev; - ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id); + ivpu_dbg(vdev, FILE, "file_priv release: ctx %u bound %d\n", + file_priv->ctx.id, (bool)file_priv->bound); + + pm_runtime_get_sync(vdev->drm.dev); + mutex_lock(&vdev->context_list_lock); + file_priv_unbind(vdev, file_priv); + mutex_unlock(&vdev->context_list_lock); + pm_runtime_put_autosuspend(vdev->drm.dev); - ivpu_cmdq_release_all(file_priv); - ivpu_jsm_context_release(vdev, file_priv->ctx.id); - ivpu_bo_remove_all_bos_from_context(vdev, &file_priv->ctx); - ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); - drm_WARN_ON(&vdev->drm, xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv); mutex_destroy(&file_priv->lock); kfree(file_priv); } @@ -232,49 +233,53 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) struct ivpu_device *vdev = to_ivpu_device(dev); struct ivpu_file_priv *file_priv; u32 ctx_id; - void *old; - int ret; + int idx, ret; - ret = xa_alloc_irq(&vdev->context_xa, &ctx_id, NULL, vdev->context_xa_limit, GFP_KERNEL); - if (ret) { - ivpu_err(vdev, "Failed to allocate context id: %d\n", ret); - return ret; - } + if (!drm_dev_enter(dev, &idx)) + return -ENODEV; file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); if (!file_priv) { ret = -ENOMEM; - goto err_xa_erase; + goto err_dev_exit; } file_priv->vdev = vdev; + file_priv->bound = true; kref_init(&file_priv->ref); mutex_init(&file_priv->lock); + mutex_lock(&vdev->context_list_lock); + + ret = xa_alloc_irq(&vdev->context_xa, &ctx_id, file_priv, + vdev->context_xa_limit, GFP_KERNEL); + if (ret) { + ivpu_err(vdev, "Failed to allocate context id: %d\n", ret); + goto err_unlock; + } + ret = ivpu_mmu_user_context_init(vdev, &file_priv->ctx, ctx_id); if (ret) - goto err_mutex_destroy; + goto err_xa_erase; - old = xa_store_irq(&vdev->context_xa, ctx_id, file_priv, GFP_KERNEL); - if (xa_is_err(old)) { - ret = xa_err(old); - ivpu_err(vdev, "Failed to store context %u: %d\n", ctx_id, ret); - goto err_ctx_fini; - } + mutex_unlock(&vdev->context_list_lock); + drm_dev_exit(idx); + + file->driver_priv = file_priv; ivpu_dbg(vdev, FILE, "file_priv create: ctx %u process %s pid %d\n", ctx_id, current->comm, task_pid_nr(current)); - file->driver_priv = file_priv; return 0; -err_ctx_fini: - ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); -err_mutex_destroy: - mutex_destroy(&file_priv->lock); - kfree(file_priv); err_xa_erase: xa_erase_irq(&vdev->context_xa, ctx_id); +err_unlock: + mutex_unlock(&vdev->context_list_lock); + mutex_destroy(&file_priv->lock); + kfree(file_priv); +err_dev_exit: + drm_dev_exit(idx); return ret; } @@ -531,6 +536,10 @@ static int ivpu_dev_init(struct ivpu_device *vdev) lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key); INIT_LIST_HEAD(&vdev->bo_list); + ret = drmm_mutex_init(&vdev->drm, &vdev->context_list_lock); + if (ret) + goto err_xa_destroy; + ret = drmm_mutex_init(&vdev->drm, &vdev->bo_list_lock); if (ret) goto err_xa_destroy; @@ -602,14 +611,30 @@ err_xa_destroy: return ret; } +static void ivpu_bo_unbind_all_user_contexts(struct ivpu_device *vdev) +{ + struct ivpu_file_priv *file_priv; + unsigned long ctx_id; + + mutex_lock(&vdev->context_list_lock); + + xa_for_each(&vdev->context_xa, ctx_id, file_priv) + file_priv_unbind(vdev, file_priv); + + mutex_unlock(&vdev->context_list_lock); +} + static void ivpu_dev_fini(struct ivpu_device *vdev) { ivpu_pm_disable(vdev); ivpu_shutdown(vdev); if (IVPU_WA(d3hot_after_power_off)) pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); + + ivpu_jobs_abort_all(vdev); ivpu_job_done_consumer_fini(vdev); ivpu_pm_cancel_recovery(vdev); + ivpu_bo_unbind_all_user_contexts(vdev); ivpu_ipc_fini(vdev); ivpu_fw_fini(vdev); diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 7a6bc1918780..069ace4adb2d 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -115,6 +115,7 @@ struct ivpu_device { struct ivpu_mmu_context gctx; struct ivpu_mmu_context rctx; + struct mutex context_list_lock; /* Protects user context addition/removal */ struct xarray context_xa; struct xa_limit context_xa_limit; @@ -147,6 +148,7 @@ struct ivpu_file_priv { struct ivpu_cmdq *cmdq[IVPU_NUM_ENGINES]; struct ivpu_mmu_context ctx; bool has_mmu_faults; + bool bound; }; extern int ivpu_dbg_mask; @@ -162,7 +164,6 @@ extern bool ivpu_disable_mmu_cont_pages; extern int ivpu_test_mode; struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv); -struct ivpu_file_priv *ivpu_file_priv_get_by_ctx_id(struct ivpu_device *vdev, unsigned long id); void ivpu_file_priv_put(struct ivpu_file_priv **link); int ivpu_boot(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index 16f3035b91c0..e9ddbe9f50eb 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -77,7 +77,10 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range) { struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); - int ret; + int idx, ret; + + if (!drm_dev_enter(&vdev->drm, &idx)) + return -ENODEV; mutex_lock(&bo->lock); @@ -93,6 +96,8 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx, mutex_unlock(&bo->lock); + drm_dev_exit(idx); + return ret; } @@ -128,14 +133,7 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo) dma_resv_unlock(bo->base.base.resv); } -static void ivpu_bo_unbind(struct ivpu_bo *bo) -{ - mutex_lock(&bo->lock); - ivpu_bo_unbind_locked(bo); - mutex_unlock(&bo->lock); -} - -void ivpu_bo_remove_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) +void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) { struct ivpu_bo *bo; @@ -239,7 +237,7 @@ static void ivpu_bo_free(struct drm_gem_object *obj) drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ)); - ivpu_bo_unbind(bo); + ivpu_bo_unbind_locked(bo); mutex_destroy(&bo->lock); drm_WARN_ON(obj->dev, bo->base.pages_use_count > 1); diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h index 5cb1dda3e58e..a8559211c70d 100644 --- a/drivers/accel/ivpu/ivpu_gem.h +++ b/drivers/accel/ivpu/ivpu_gem.h @@ -25,7 +25,7 @@ struct ivpu_bo { }; int ivpu_bo_pin(struct ivpu_bo *bo); -void ivpu_bo_remove_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); +void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size); struct ivpu_bo *ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags); diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 82e40bb4803c..4fed0c05e051 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -112,16 +112,14 @@ static void ivpu_cmdq_release_locked(struct ivpu_file_priv *file_priv, u16 engin } } -void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv) +void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv) { int i; - mutex_lock(&file_priv->lock); + lockdep_assert_held(&file_priv->lock); for (i = 0; i < IVPU_NUM_ENGINES; i++) ivpu_cmdq_release_locked(file_priv, i); - - mutex_unlock(&file_priv->lock); } /* @@ -161,15 +159,13 @@ void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev) struct ivpu_file_priv *file_priv; unsigned long ctx_id; - xa_for_each(&vdev->context_xa, ctx_id, file_priv) { - file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id); - if (!file_priv) - continue; + mutex_lock(&vdev->context_list_lock); + xa_for_each(&vdev->context_xa, ctx_id, file_priv) ivpu_cmdq_reset_all(file_priv); - ivpu_file_priv_put(&file_priv); - } + mutex_unlock(&vdev->context_list_lock); + } static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h index 45a2f2ec82e5..bd22cf8e39e7 100644 --- a/drivers/accel/ivpu/ivpu_job.h +++ b/drivers/accel/ivpu/ivpu_job.h @@ -56,7 +56,7 @@ struct ivpu_job { int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv); +void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv); void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev); void ivpu_job_done_consumer_init(struct ivpu_device *vdev); From 264b271d12d0af794f3d1dc3793e6589fae8c66c Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Mon, 22 Jan 2024 13:09:44 +0100 Subject: [PATCH 365/768] accel/ivpu: Improve stability of ivpu_submit_ioctl() - Wake up the device as late as possible - Remove job reference counting in order to simplify the code - Don't put jobs that are not fully submitted on submitted_jobs_xa in order to avoid potential races with reset/recovery Signed-off-by: Jacek Lawrynowicz Reviewed-by: Wachowski, Karol Link: https://patchwork.freedesktop.org/patch/msgid/20240122120945.1150728-3-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_job.c | 139 +++++++++++++++------------------- drivers/accel/ivpu/ivpu_job.h | 1 - 2 files changed, 62 insertions(+), 78 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 4fed0c05e051..d9b47a04b35f 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -125,7 +125,7 @@ void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv) /* * Mark the doorbell as unregistered and reset job queue pointers. * This function needs to be called when the VPU hardware is restarted - * and FW looses job queue state. The next time job queue is used it + * and FW loses job queue state. The next time job queue is used it * will be registered again. */ static void ivpu_cmdq_reset_locked(struct ivpu_file_priv *file_priv, u16 engine) @@ -239,60 +239,32 @@ static struct dma_fence *ivpu_fence_create(struct ivpu_device *vdev) return &fence->base; } -static void job_get(struct ivpu_job *job, struct ivpu_job **link) +static void ivpu_job_destroy(struct ivpu_job *job) { struct ivpu_device *vdev = job->vdev; - - kref_get(&job->ref); - *link = job; - - ivpu_dbg(vdev, KREF, "Job get: id %u refcount %u\n", job->job_id, kref_read(&job->ref)); -} - -static void job_release(struct kref *ref) -{ - struct ivpu_job *job = container_of(ref, struct ivpu_job, ref); - struct ivpu_device *vdev = job->vdev; u32 i; + ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d engine %d", + job->job_id, job->file_priv->ctx.id, job->engine_idx); + for (i = 0; i < job->bo_count; i++) if (job->bos[i]) drm_gem_object_put(&job->bos[i]->base.base); dma_fence_put(job->done_fence); ivpu_file_priv_put(&job->file_priv); - - ivpu_dbg(vdev, KREF, "Job released: id %u\n", job->job_id); kfree(job); - - /* Allow the VPU to get suspended, must be called after ivpu_file_priv_put() */ - ivpu_rpm_put(vdev); -} - -static void job_put(struct ivpu_job *job) -{ - struct ivpu_device *vdev = job->vdev; - - ivpu_dbg(vdev, KREF, "Job put: id %u refcount %u\n", job->job_id, kref_read(&job->ref)); - kref_put(&job->ref, job_release); } static struct ivpu_job * -ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) +ivpu_job_create(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) { struct ivpu_device *vdev = file_priv->vdev; struct ivpu_job *job; - int ret; - - ret = ivpu_rpm_get(vdev); - if (ret < 0) - return NULL; job = kzalloc(struct_size(job, bos, bo_count), GFP_KERNEL); if (!job) - goto err_rpm_put; - - kref_init(&job->ref); + return NULL; job->vdev = vdev; job->engine_idx = engine_idx; @@ -306,17 +278,14 @@ ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) job->file_priv = ivpu_file_priv_get(file_priv); ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx); - return job; err_free_job: kfree(job); -err_rpm_put: - ivpu_rpm_put(vdev); return NULL; } -static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status) +static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status) { struct ivpu_job *job; @@ -333,9 +302,10 @@ static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status) ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n", job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status); + ivpu_job_destroy(job); ivpu_stop_job_timeout_detection(vdev); - job_put(job); + ivpu_rpm_put(vdev); return 0; } @@ -345,10 +315,10 @@ void ivpu_jobs_abort_all(struct ivpu_device *vdev) unsigned long id; xa_for_each(&vdev->submitted_jobs_xa, id, job) - ivpu_job_done(vdev, id, VPU_JSM_STATUS_ABORTED); + ivpu_job_signal_and_destroy(vdev, id, VPU_JSM_STATUS_ABORTED); } -static int ivpu_direct_job_submission(struct ivpu_job *job) +static int ivpu_job_submit(struct ivpu_job *job) { struct ivpu_file_priv *file_priv = job->file_priv; struct ivpu_device *vdev = job->vdev; @@ -356,53 +326,65 @@ static int ivpu_direct_job_submission(struct ivpu_job *job) struct ivpu_cmdq *cmdq; int ret; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->lock); cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx); if (!cmdq) { - ivpu_warn(vdev, "Failed get job queue, ctx %d engine %d\n", - file_priv->ctx.id, job->engine_idx); + ivpu_warn_ratelimited(vdev, "Failed get job queue, ctx %d engine %d\n", + file_priv->ctx.id, job->engine_idx); ret = -EINVAL; - goto err_unlock; + goto err_unlock_file_priv; } job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1)); job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK; - job_get(job, &job); - ret = xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL); + xa_lock(&vdev->submitted_jobs_xa); + ret = __xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL); if (ret) { - ivpu_warn_ratelimited(vdev, "Failed to allocate job id: %d\n", ret); - goto err_job_put; + ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n", + file_priv->ctx.id); + ret = -EBUSY; + goto err_unlock_submitted_jobs_xa; } ret = ivpu_cmdq_push_job(cmdq, job); if (ret) - goto err_xa_erase; + goto err_erase_xa; ivpu_start_job_timeout_detection(vdev); - ivpu_dbg(vdev, JOB, "Job submitted: id %3u addr 0x%llx ctx %2d engine %d next %d\n", - job->job_id, job->cmd_buf_vpu_addr, file_priv->ctx.id, - job->engine_idx, cmdq->jobq->header.tail); - - if (ivpu_test_mode & IVPU_TEST_MODE_NULL_HW) { - ivpu_job_done(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS); + if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) { cmdq->jobq->header.head = cmdq->jobq->header.tail; wmb(); /* Flush WC buffer for jobq header */ } else { ivpu_cmdq_ring_db(vdev, cmdq); } + ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d addr 0x%llx next %d\n", + job->job_id, file_priv->ctx.id, job->engine_idx, + job->cmd_buf_vpu_addr, cmdq->jobq->header.tail); + + xa_unlock(&vdev->submitted_jobs_xa); + mutex_unlock(&file_priv->lock); + + if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) + ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS); + return 0; -err_xa_erase: - xa_erase(&vdev->submitted_jobs_xa, job->job_id); -err_job_put: - job_put(job); -err_unlock: +err_erase_xa: + __xa_erase(&vdev->submitted_jobs_xa, job->job_id); +err_unlock_submitted_jobs_xa: + xa_unlock(&vdev->submitted_jobs_xa); +err_unlock_file_priv: mutex_unlock(&file_priv->lock); + ivpu_rpm_put(vdev); return ret; } @@ -508,44 +490,47 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) params->buffer_count * sizeof(u32)); if (ret) { ret = -EFAULT; - goto free_handles; + goto err_free_handles; } if (!drm_dev_enter(&vdev->drm, &idx)) { ret = -ENODEV; - goto free_handles; + goto err_free_handles; } ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n", file_priv->ctx.id, params->buffer_count); - job = ivpu_create_job(file_priv, params->engine, params->buffer_count); + job = ivpu_job_create(file_priv, params->engine, params->buffer_count); if (!job) { ivpu_err(vdev, "Failed to create job\n"); ret = -ENOMEM; - goto dev_exit; + goto err_exit_dev; } ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, params->buffer_count, params->commands_offset); if (ret) { - ivpu_err(vdev, "Failed to prepare job, ret %d\n", ret); - goto job_put; + ivpu_err(vdev, "Failed to prepare job: %d\n", ret); + goto err_destroy_job; } - ret = ivpu_direct_job_submission(job); - if (ret) { - dma_fence_signal(job->done_fence); - ivpu_err(vdev, "Failed to submit job to the HW, ret %d\n", ret); - } + ret = ivpu_job_submit(job); + if (ret) + goto err_signal_fence; -job_put: - job_put(job); -dev_exit: drm_dev_exit(idx); -free_handles: kfree(buf_handles); + return ret; +err_signal_fence: + dma_fence_signal(job->done_fence); +err_destroy_job: + ivpu_job_destroy(job); +err_exit_dev: + drm_dev_exit(idx); +err_free_handles: + kfree(buf_handles); return ret; } @@ -567,7 +552,7 @@ ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr, } payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload; - ret = ivpu_job_done(vdev, payload->job_id, payload->job_status); + ret = ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status); if (!ret && !xa_empty(&vdev->submitted_jobs_xa)) ivpu_start_job_timeout_detection(vdev); } diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h index bd22cf8e39e7..ca4984071cc7 100644 --- a/drivers/accel/ivpu/ivpu_job.h +++ b/drivers/accel/ivpu/ivpu_job.h @@ -43,7 +43,6 @@ struct ivpu_cmdq { will update the job status */ struct ivpu_job { - struct kref ref; struct ivpu_device *vdev; struct ivpu_file_priv *file_priv; struct dma_fence *done_fence; From 27d19268cf394f2c78db732be0cb31852eeadb0a Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Mon, 22 Jan 2024 13:09:45 +0100 Subject: [PATCH 366/768] accel/ivpu: Improve recovery and reset support - Synchronize job submission with reset/recovery using reset_lock - Always print recovery reason and call diagnose_failure() - Don't allow for autosupend during recovery - Prevent immediate autosuspend after reset/recovery - Prevent force_recovery for issuing TDR when device is suspended - Reset VPU instead triggering recovery after changing debugfs params Signed-off-by: Jacek Lawrynowicz Reviewed-by: Wachowski, Karol Link: https://patchwork.freedesktop.org/patch/msgid/20240122120945.1150728-4-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_debugfs.c | 20 ++++++++++--- drivers/accel/ivpu/ivpu_hw_37xx.c | 14 +++------ drivers/accel/ivpu/ivpu_hw_40xx.c | 8 +++--- drivers/accel/ivpu/ivpu_ipc.c | 6 ++-- drivers/accel/ivpu/ivpu_job.c | 2 ++ drivers/accel/ivpu/ivpu_mmu.c | 14 ++++----- drivers/accel/ivpu/ivpu_pm.c | 48 ++++++++++++++++++++----------- drivers/accel/ivpu/ivpu_pm.h | 6 ++-- 8 files changed, 70 insertions(+), 48 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c index 19035230563d..7cb962e21453 100644 --- a/drivers/accel/ivpu/ivpu_debugfs.c +++ b/drivers/accel/ivpu/ivpu_debugfs.c @@ -102,7 +102,7 @@ static int reset_pending_show(struct seq_file *s, void *v) { struct ivpu_device *vdev = seq_to_ivpu(s); - seq_printf(s, "%d\n", atomic_read(&vdev->pm->in_reset)); + seq_printf(s, "%d\n", atomic_read(&vdev->pm->reset_pending)); return 0; } @@ -130,7 +130,9 @@ dvfs_mode_fops_write(struct file *file, const char __user *user_buf, size_t size fw->dvfs_mode = dvfs_mode; - ivpu_pm_schedule_recovery(vdev); + ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev)); + if (ret) + return ret; return size; } @@ -190,7 +192,10 @@ fw_profiling_freq_fops_write(struct file *file, const char __user *user_buf, return ret; ivpu_hw_profiling_freq_drive(vdev, enable); - ivpu_pm_schedule_recovery(vdev); + + ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev)); + if (ret) + return ret; return size; } @@ -301,11 +306,18 @@ static ssize_t ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) { struct ivpu_device *vdev = file->private_data; + int ret; if (!size) return -EINVAL; - ivpu_pm_schedule_recovery(vdev); + ret = ivpu_rpm_get(vdev); + if (ret) + return ret; + + ivpu_pm_trigger_recovery(vdev, "debugfs"); + flush_work(&vdev->pm->recovery_work); + ivpu_rpm_put(vdev); return size; } diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c index 574cdeefb66b..f15a93d83057 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx.c +++ b/drivers/accel/ivpu/ivpu_hw_37xx.c @@ -875,24 +875,18 @@ static void ivpu_hw_37xx_irq_disable(struct ivpu_device *vdev) static void ivpu_hw_37xx_irq_wdt_nce_handler(struct ivpu_device *vdev) { - ivpu_err_ratelimited(vdev, "WDT NCE irq\n"); - - ivpu_pm_schedule_recovery(vdev); + ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ"); } static void ivpu_hw_37xx_irq_wdt_mss_handler(struct ivpu_device *vdev) { - ivpu_err_ratelimited(vdev, "WDT MSS irq\n"); - ivpu_hw_wdt_disable(vdev); - ivpu_pm_schedule_recovery(vdev); + ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ"); } static void ivpu_hw_37xx_irq_noc_firewall_handler(struct ivpu_device *vdev) { - ivpu_err_ratelimited(vdev, "NOC Firewall irq\n"); - - ivpu_pm_schedule_recovery(vdev); + ivpu_pm_trigger_recovery(vdev, "NOC Firewall IRQ"); } /* Handler for IRQs from VPU core (irqV) */ @@ -970,7 +964,7 @@ static bool ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq) REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, status); if (schedule_recovery) - ivpu_pm_schedule_recovery(vdev); + ivpu_pm_trigger_recovery(vdev, "Buttress IRQ"); return true; } diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index c02061f299e2..704288084f37 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -1049,18 +1049,18 @@ static void ivpu_hw_40xx_irq_disable(struct ivpu_device *vdev) static void ivpu_hw_40xx_irq_wdt_nce_handler(struct ivpu_device *vdev) { /* TODO: For LNN hang consider engine reset instead of full recovery */ - ivpu_pm_schedule_recovery(vdev); + ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ"); } static void ivpu_hw_40xx_irq_wdt_mss_handler(struct ivpu_device *vdev) { ivpu_hw_wdt_disable(vdev); - ivpu_pm_schedule_recovery(vdev); + ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ"); } static void ivpu_hw_40xx_irq_noc_firewall_handler(struct ivpu_device *vdev) { - ivpu_pm_schedule_recovery(vdev); + ivpu_pm_trigger_recovery(vdev, "NOC Firewall IRQ"); } /* Handler for IRQs from VPU core (irqV) */ @@ -1154,7 +1154,7 @@ static bool ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq) REGB_WR32(VPU_40XX_BUTTRESS_INTERRUPT_STAT, status); if (schedule_recovery) - ivpu_pm_schedule_recovery(vdev); + ivpu_pm_trigger_recovery(vdev, "Buttress IRQ"); return true; } diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c index e86621f16f85..fa66c39b57ec 100644 --- a/drivers/accel/ivpu/ivpu_ipc.c +++ b/drivers/accel/ivpu/ivpu_ipc.c @@ -343,10 +343,8 @@ int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *r hb_ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE, &hb_resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); - if (hb_ret == -ETIMEDOUT) { - ivpu_hw_diagnose_failure(vdev); - ivpu_pm_schedule_recovery(vdev); - } + if (hb_ret == -ETIMEDOUT) + ivpu_pm_trigger_recovery(vdev, "IPC timeout"); return ret; } diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index d9b47a04b35f..0440bee3ecaf 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -515,7 +515,9 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto err_destroy_job; } + down_read(&vdev->pm->reset_lock); ret = ivpu_job_submit(job); + up_read(&vdev->pm->reset_lock); if (ret) goto err_signal_fence; diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c index 1f813625aab3..9a3122ffce03 100644 --- a/drivers/accel/ivpu/ivpu_mmu.c +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -887,7 +887,6 @@ static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev) void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev) { - bool schedule_recovery = false; u32 *event; u32 ssid; @@ -897,14 +896,13 @@ void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev) ivpu_mmu_dump_event(vdev, event); ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]); - if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) - schedule_recovery = true; - else - ivpu_mmu_user_context_mark_invalid(vdev, ssid); - } + if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) { + ivpu_pm_trigger_recovery(vdev, "MMU event"); + return; + } - if (schedule_recovery) - ivpu_pm_schedule_recovery(vdev); + ivpu_mmu_user_context_mark_invalid(vdev, ssid); + } } void ivpu_mmu_evtq_dump(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index 8407f1d8c99c..f501f27ebafd 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -112,6 +112,14 @@ static void ivpu_pm_recovery_work(struct work_struct *work) char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; int ret; + ivpu_err(vdev, "Recovering the VPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter)); + + ret = pm_runtime_resume_and_get(vdev->drm.dev); + if (ret) + ivpu_err(vdev, "Failed to resume VPU: %d\n", ret); + + ivpu_fw_log_dump(vdev); + retry: ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev)); if (ret == -EAGAIN && !drm_dev_is_unplugged(&vdev->drm)) { @@ -123,11 +131,13 @@ retry: ivpu_err(vdev, "Failed to reset VPU: %d\n", ret); kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt); + pm_runtime_mark_last_busy(vdev->drm.dev); + pm_runtime_put_autosuspend(vdev->drm.dev); } -void ivpu_pm_schedule_recovery(struct ivpu_device *vdev) +void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason) { - struct ivpu_pm_info *pm = vdev->pm; + ivpu_err(vdev, "Recovery triggered by %s\n", reason); if (ivpu_disable_recovery) { ivpu_err(vdev, "Recovery not available when disable_recovery param is set\n"); @@ -139,10 +149,11 @@ void ivpu_pm_schedule_recovery(struct ivpu_device *vdev) return; } - /* Schedule recovery if it's not in progress */ - if (atomic_cmpxchg(&pm->in_reset, 0, 1) == 0) { - ivpu_hw_irq_disable(vdev); - queue_work(system_long_wq, &pm->recovery_work); + /* Trigger recovery if it's not in progress */ + if (atomic_cmpxchg(&vdev->pm->reset_pending, 0, 1) == 0) { + ivpu_hw_diagnose_failure(vdev); + ivpu_hw_irq_disable(vdev); /* Disable IRQ early to protect from IRQ storm */ + queue_work(system_long_wq, &vdev->pm->recovery_work); } } @@ -150,12 +161,8 @@ static void ivpu_job_timeout_work(struct work_struct *work) { struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work); struct ivpu_device *vdev = pm->vdev; - unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr; - ivpu_err(vdev, "TDR detected, timeout %lu ms", timeout_ms); - ivpu_hw_diagnose_failure(vdev); - - ivpu_pm_schedule_recovery(vdev); + ivpu_pm_trigger_recovery(vdev, "TDR"); } void ivpu_start_job_timeout_detection(struct ivpu_device *vdev) @@ -228,6 +235,9 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev) bool hw_is_idle = true; int ret; + drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa)); + drm_WARN_ON(&vdev->drm, work_pending(&vdev->pm->recovery_work)); + ivpu_dbg(vdev, PM, "Runtime suspend..\n"); if (!ivpu_hw_is_idle(vdev) && vdev->pm->suspend_reschedule_counter) { @@ -310,11 +320,12 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev) { struct ivpu_device *vdev = pci_get_drvdata(pdev); - pm_runtime_get_sync(vdev->drm.dev); - ivpu_dbg(vdev, PM, "Pre-reset..\n"); atomic_inc(&vdev->pm->reset_counter); - atomic_set(&vdev->pm->in_reset, 1); + atomic_set(&vdev->pm->reset_pending, 1); + + pm_runtime_get_sync(vdev->drm.dev); + down_write(&vdev->pm->reset_lock); ivpu_prepare_for_reset(vdev); ivpu_hw_reset(vdev); ivpu_pm_prepare_cold_boot(vdev); @@ -331,9 +342,11 @@ void ivpu_pm_reset_done_cb(struct pci_dev *pdev) ret = ivpu_resume(vdev); if (ret) ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret); - atomic_set(&vdev->pm->in_reset, 0); + up_write(&vdev->pm->reset_lock); + atomic_set(&vdev->pm->reset_pending, 0); ivpu_dbg(vdev, PM, "Post-reset done.\n"); + pm_runtime_mark_last_busy(vdev->drm.dev); pm_runtime_put_autosuspend(vdev->drm.dev); } @@ -346,7 +359,10 @@ void ivpu_pm_init(struct ivpu_device *vdev) pm->vdev = vdev; pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; - atomic_set(&pm->in_reset, 0); + init_rwsem(&pm->reset_lock); + atomic_set(&pm->reset_pending, 0); + atomic_set(&pm->reset_counter, 0); + INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work); INIT_DELAYED_WORK(&pm->job_timeout_work, ivpu_job_timeout_work); diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h index 97c6e0b0aa42..ec60fbeefefc 100644 --- a/drivers/accel/ivpu/ivpu_pm.h +++ b/drivers/accel/ivpu/ivpu_pm.h @@ -6,6 +6,7 @@ #ifndef __IVPU_PM_H__ #define __IVPU_PM_H__ +#include #include struct ivpu_device; @@ -14,8 +15,9 @@ struct ivpu_pm_info { struct ivpu_device *vdev; struct delayed_work job_timeout_work; struct work_struct recovery_work; - atomic_t in_reset; + struct rw_semaphore reset_lock; atomic_t reset_counter; + atomic_t reset_pending; bool is_warmboot; u32 suspend_reschedule_counter; }; @@ -37,7 +39,7 @@ int __must_check ivpu_rpm_get(struct ivpu_device *vdev); int __must_check ivpu_rpm_get_if_active(struct ivpu_device *vdev); void ivpu_rpm_put(struct ivpu_device *vdev); -void ivpu_pm_schedule_recovery(struct ivpu_device *vdev); +void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason); void ivpu_start_job_timeout_detection(struct ivpu_device *vdev); void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev); From 5e344807735023cd3a67c37a1852b849caa42620 Mon Sep 17 00:00:00 2001 From: Shenwei Wang Date: Tue, 23 Jan 2024 10:51:41 -0600 Subject: [PATCH 367/768] net: fec: fix the unhandled context fault from smmu When repeatedly changing the interface link speed using the command below: ethtool -s eth0 speed 100 duplex full ethtool -s eth0 speed 1000 duplex full The following errors may sometimes be reported by the ARM SMMU driver: [ 5395.035364] fec 5b040000.ethernet eth0: Link is Down [ 5395.039255] arm-smmu 51400000.iommu: Unhandled context fault: fsr=0x402, iova=0x00000000, fsynr=0x100001, cbfrsynra=0x852, cb=2 [ 5398.108460] fec 5b040000.ethernet eth0: Link is Up - 100Mbps/Full - flow control off It is identified that the FEC driver does not properly stop the TX queue during the link speed transitions, and this results in the invalid virtual I/O address translations from the SMMU and causes the context faults. Fixes: dbc64a8ea231 ("net: fec: move calls to quiesce/resume packet processing out of fec_restart()") Signed-off-by: Shenwei Wang Link: https://lore.kernel.org/r/20240123165141.2008104-1-shenwei.wang@nxp.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/freescale/fec_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 4b0259e9269a..432523b2c789 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2036,6 +2036,7 @@ static void fec_enet_adjust_link(struct net_device *ndev) /* if any of the above changed restart the FEC */ if (status_change) { + netif_stop_queue(ndev); napi_disable(&fep->napi); netif_tx_lock_bh(ndev); fec_restart(ndev); @@ -2045,6 +2046,7 @@ static void fec_enet_adjust_link(struct net_device *ndev) } } else { if (fep->link) { + netif_stop_queue(ndev); napi_disable(&fep->napi); netif_tx_lock_bh(ndev); fec_stop(ndev); From 50bad6f797d4d501c5ef416a6f92e1912ab5aa8b Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Tue, 23 Jan 2024 21:09:17 +0100 Subject: [PATCH 368/768] tsnep: Remove FCS for XDP data path The RX data buffer includes the FCS. The FCS is already stripped for the normal data path. But for the XDP data path the FCS is included and acts like additional/useless data. Remove the FCS from the RX data buffer also for XDP. Fixes: 65b28c810035 ("tsnep: Add XDP RX support") Fixes: 3fc2333933fd ("tsnep: Add XDP socket zero-copy RX support") Signed-off-by: Gerhard Engleder Signed-off-by: Paolo Abeni --- drivers/net/ethernet/engleder/tsnep_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index df40c720e7b2..456e0336f3f6 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -1485,7 +1485,7 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, xdp_prepare_buff(&xdp, page_address(entry->page), XDP_PACKET_HEADROOM + TSNEP_RX_INLINE_METADATA_SIZE, - length, false); + length - ETH_FCS_LEN, false); consume = tsnep_xdp_run_prog(rx, prog, &xdp, &xdp_status, tx_nq, tx); @@ -1568,7 +1568,7 @@ static int tsnep_rx_poll_zc(struct tsnep_rx *rx, struct napi_struct *napi, prefetch(entry->xdp->data); length = __le32_to_cpu(entry->desc_wb->properties) & TSNEP_DESC_LENGTH_MASK; - xsk_buff_set_size(entry->xdp, length); + xsk_buff_set_size(entry->xdp, length - ETH_FCS_LEN); xsk_buff_dma_sync_for_cpu(entry->xdp, rx->xsk_pool); /* RX metadata with timestamps is in front of actual data, From 9a91c05f4bd6f6bdd6b8f90445e0da92e3ac956c Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Tue, 23 Jan 2024 21:09:18 +0100 Subject: [PATCH 369/768] tsnep: Fix XDP_RING_NEED_WAKEUP for empty fill ring The fill ring of the XDP socket may contain not enough buffers to completey fill the RX queue during socket creation. In this case the flag XDP_RING_NEED_WAKEUP is not set as this flag is only set if the RX queue is not completely filled during polling. Set XDP_RING_NEED_WAKEUP flag also if RX queue is not completely filled during XDP socket creation. Fixes: 3fc2333933fd ("tsnep: Add XDP socket zero-copy RX support") Signed-off-by: Gerhard Engleder Signed-off-by: Paolo Abeni --- drivers/net/ethernet/engleder/tsnep_main.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index 456e0336f3f6..9aeff2b37a61 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -1762,6 +1762,19 @@ static void tsnep_rx_reopen_xsk(struct tsnep_rx *rx) allocated--; } } + + /* set need wakeup flag immediately if ring is not filled completely, + * first polling would be too late as need wakeup signalisation would + * be delayed for an indefinite time + */ + if (xsk_uses_need_wakeup(rx->xsk_pool)) { + int desc_available = tsnep_rx_desc_available(rx); + + if (desc_available) + xsk_set_rx_need_wakeup(rx->xsk_pool); + else + xsk_clear_rx_need_wakeup(rx->xsk_pool); + } } static bool tsnep_pending(struct tsnep_queue *queue) From b9328fd636bd50da89e792e135b234ba8e6fe59f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 24 Jan 2024 16:07:49 -0600 Subject: [PATCH 370/768] x86/CPU/AMD: Add more models to X86_FEATURE_ZEN5 Add model ranges starting at 0x20, 0x40 and 0x70 to the synthetic feature flag X86_FEATURE_ZEN5. Signed-off-by: Mario Limonciello Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240124220749.2983-1-mario.limonciello@amd.com --- arch/x86/kernel/cpu/amd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bc49e3b3aae0..f3abca334199 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -573,6 +573,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) case 0x1a: switch (c->x86_model) { case 0x00 ... 0x0f: + case 0x20 ... 0x2f: + case 0x40 ... 0x4f: + case 0x70 ... 0x7f: setup_force_cpu_cap(X86_FEATURE_ZEN5); break; default: From 20730e9b277873deeb6637339edcba64468f3da3 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Thu, 25 Jan 2024 17:04:01 +0200 Subject: [PATCH 371/768] ahci: add 43-bit DMA address quirk for ASMedia ASM1061 controllers With one of the on-board ASM1061 AHCI controllers (1b21:0612) on an ASUSTeK Pro WS WRX80E-SAGE SE WIFI mainboard, a controller hang was observed that was immediately preceded by the following kernel messages: ahci 0000:28:00.0: Using 64-bit DMA addresses ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00000 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00300 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00380 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00400 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00680 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00700 flags=0x0000] The first message is produced by code in drivers/iommu/dma-iommu.c which is accompanied by the following comment that seems to apply: /* * Try to use all the 32-bit PCI addresses first. The original SAC vs. * DAC reasoning loses relevance with PCIe, but enough hardware and * firmware bugs are still lurking out there that it's safest not to * venture into the 64-bit space until necessary. * * If your device goes wrong after seeing the notice then likely either * its driver is not setting DMA masks accurately, the hardware has * some inherent bug in handling >32-bit addresses, or not all the * expected address bits are wired up between the device and the IOMMU. */ Asking the ASM1061 on a discrete PCIe card to DMA from I/O virtual address 0xffffffff00000000 produces the following I/O page faults: vfio-pci 0000:07:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0021 address=0x7ff00000000 flags=0x0010] vfio-pci 0000:07:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0021 address=0x7ff00000500 flags=0x0010] Note that the upper 21 bits of the logged DMA address are zero. (When asking a different PCIe device in the same PCIe slot to DMA to the same I/O virtual address, we do see all the upper 32 bits of the DMA address as 1, so this is not an issue with the chipset or IOMMU configuration on the test system.) Also, hacking libahci to always set the upper 21 bits of all DMA addresses to 1 produces no discernible effect on the behavior of the ASM1061, and mkfs/mount/scrub/etc work as without this hack. This all strongly suggests that the ASM1061 has a 43 bit DMA address limit, and this commit therefore adds a quirk to deal with this limit. This issue probably applies to (some of) the other supported ASMedia parts as well, but we limit it to the PCI IDs known to refer to ASM1061 parts, as that's the only part we know for sure to be affected by this issue at this point. Link: https://lore.kernel.org/linux-ide/ZaZ2PIpEId-rl6jv@wantstofly.org/ Signed-off-by: Lennert Buytenhek [cassel: drop date from error messages in commit log] Signed-off-by: Niklas Cassel --- drivers/ata/ahci.c | 29 +++++++++++++++++++++++------ drivers/ata/ahci.h | 1 + 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 762c5d8b7c1a..d2460fa985b7 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -48,6 +48,7 @@ enum { enum board_ids { /* board IDs by feature in alphabetical order */ board_ahci, + board_ahci_43bit_dma, board_ahci_ign_iferr, board_ahci_low_power, board_ahci_no_debounce_delay, @@ -128,6 +129,13 @@ static const struct ata_port_info ahci_port_info[] = { .udma_mask = ATA_UDMA6, .port_ops = &ahci_ops, }, + [board_ahci_43bit_dma] = { + AHCI_HFLAGS (AHCI_HFLAG_43BIT_ONLY), + .flags = AHCI_FLAG_COMMON, + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA6, + .port_ops = &ahci_ops, + }, [board_ahci_ign_iferr] = { AHCI_HFLAGS (AHCI_HFLAG_IGN_IRQ_IF_ERR), .flags = AHCI_FLAG_COMMON, @@ -597,11 +605,11 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(PROMISE, 0x3f20), board_ahci }, /* PDC42819 */ { PCI_VDEVICE(PROMISE, 0x3781), board_ahci }, /* FastTrak TX8660 ahci-mode */ - /* Asmedia */ + /* ASMedia */ { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci }, /* ASM1060 */ { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci }, /* ASM1060 */ - { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci }, /* ASM1061 */ - { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci }, /* ASM1062 */ + { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci_43bit_dma }, /* ASM1061 */ + { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci_43bit_dma }, /* ASM1061/1062 */ { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci }, /* ASM1061R */ { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci }, /* ASM1062R */ { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci }, /* ASM1062+JMB575 */ @@ -954,11 +962,20 @@ static int ahci_pci_device_resume(struct device *dev) #endif /* CONFIG_PM */ -static int ahci_configure_dma_masks(struct pci_dev *pdev, int using_dac) +static int ahci_configure_dma_masks(struct pci_dev *pdev, + struct ahci_host_priv *hpriv) { - const int dma_bits = using_dac ? 64 : 32; + int dma_bits; int rc; + if (hpriv->cap & HOST_CAP_64) { + dma_bits = 64; + if (hpriv->flags & AHCI_HFLAG_43BIT_ONLY) + dma_bits = 43; + } else { + dma_bits = 32; + } + /* * If the device fixup already set the dma_mask to some non-standard * value, don't extend it here. This happens on STA2X11, for example. @@ -1931,7 +1948,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ahci_gtf_filter_workaround(host); /* initialize adapter */ - rc = ahci_configure_dma_masks(pdev, hpriv->cap & HOST_CAP_64); + rc = ahci_configure_dma_masks(pdev, hpriv); if (rc) return rc; diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 4bae95b06ae3..df8f8a1a3a34 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -247,6 +247,7 @@ enum { AHCI_HFLAG_SUSPEND_PHYS = BIT(26), /* handle PHYs during suspend/resume */ AHCI_HFLAG_NO_SXS = BIT(28), /* SXS not supported */ + AHCI_HFLAG_43BIT_ONLY = BIT(29), /* 43bit DMA addr limit */ /* ap->flags bits */ From 8412c47d68436b9f9a260039a4a773daa6824925 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 13 Jan 2024 10:03:51 +0100 Subject: [PATCH 372/768] ARM: dts: Fix TPM schema violations Since commit 26c9d152ebf3 ("dt-bindings: tpm: Consolidate TCG TIS bindings"), several issues are reported by "make dtbs_check" for ARM devicetrees: The nodename needs to be "tpm@0" rather than "tpmdev@0" and the compatible property needs to contain the chip's name in addition to the generic "tcg,tpm_tis-spi" or "tcg,tpm-tis-i2c": tpmdev@0: $nodename:0: 'tpmdev@0' does not match '^tpm(@[0-9a-f]+)?$' from schema $id: http://devicetree.org/schemas/tpm/tcg,tpm_tis-spi.yaml# tpm@2e: compatible: 'oneOf' conditional failed, one must be fixed: ['tcg,tpm-tis-i2c'] is too short from schema $id: http://devicetree.org/schemas/tpm/tcg,tpm-tis-i2c.yaml# Fix these schema violations. Aspeed Facebook BMCs use an Infineon SLB9670: https://lore.kernel.org/all/ZZSmMJ%2F%2Fl972Qbxu@fedora/ https://lore.kernel.org/all/ZZT4%2Fw2eVzMhtsPx@fedora/ https://lore.kernel.org/all/ZZTS0p1hdAchIbKp@heinlein.vulture-banana.ts.net/ Aspeed Tacoma uses a Nuvoton NPCT75X per commit 39d8a73c53a2 ("ARM: dts: aspeed: tacoma: Add TPM"). phyGATE-Tauri uses an Infineon SLB9670: https://lore.kernel.org/all/ab45c82485fa272f74adf560cbb58ee60cc42689.camel@phytec.de/ A single schema violation remains in am335x-moxa-uc-2100-common.dtsi because it is unknown which chip is used on the board. The devicetree's author has been asked for clarification but has not responded so far: https://lore.kernel.org/all/20231220090910.GA32182@wunner.de/ Signed-off-by: Lukas Wunner Reviewed-by: Patrick Williams Reviewed-by: Tao Ren Reviewed-by: Bruno Thomsen --- arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts | 4 ++-- arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts | 4 ++-- arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts | 2 +- arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi | 4 ++-- arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi | 2 +- arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts | 2 +- arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts index e899de681f47..5be0e8fd2633 100644 --- a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts +++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts @@ -45,8 +45,8 @@ num-chipselects = <1>; cs-gpios = <&gpio0 ASPEED_GPIO(Z, 0) GPIO_ACTIVE_LOW>; - tpmdev@0 { - compatible = "tcg,tpm_tis-spi"; + tpm@0 { + compatible = "infineon,slb9670", "tcg,tpm_tis-spi"; spi-max-frequency = <33000000>; reg = <0>; }; diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts index a677c827e758..5a8169bbda87 100644 --- a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts +++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts @@ -80,8 +80,8 @@ gpio-miso = <&gpio ASPEED_GPIO(R, 5) GPIO_ACTIVE_HIGH>; num-chipselects = <1>; - tpmdev@0 { - compatible = "tcg,tpm_tis-spi"; + tpm@0 { + compatible = "infineon,slb9670", "tcg,tpm_tis-spi"; spi-max-frequency = <33000000>; reg = <0>; }; diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts index 3f6010ef2b86..213023bc5aec 100644 --- a/arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts +++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts @@ -456,7 +456,7 @@ status = "okay"; tpm: tpm@2e { - compatible = "tcg,tpm-tis-i2c"; + compatible = "nuvoton,npct75x", "tcg,tpm-tis-i2c"; reg = <0x2e>; }; }; diff --git a/arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi b/arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi index 31590d3186a2..00e5887c926f 100644 --- a/arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi +++ b/arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi @@ -35,8 +35,8 @@ gpio-mosi = <&gpio0 ASPEED_GPIO(X, 4) GPIO_ACTIVE_HIGH>; gpio-miso = <&gpio0 ASPEED_GPIO(X, 5) GPIO_ACTIVE_HIGH>; - tpmdev@0 { - compatible = "tcg,tpm_tis-spi"; + tpm@0 { + compatible = "infineon,slb9670", "tcg,tpm_tis-spi"; spi-max-frequency = <33000000>; reg = <0>; }; diff --git a/arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi index 44cc4ff1d0df..d12fb44aeb14 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi @@ -116,7 +116,7 @@ tpm_tis: tpm@1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_tpm>; - compatible = "tcg,tpm_tis-spi"; + compatible = "infineon,slb9670", "tcg,tpm_tis-spi"; reg = <1>; spi-max-frequency = <20000000>; interrupt-parent = <&gpio5>; diff --git a/arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts b/arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts index 3a723843d562..9984b343cdf0 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts +++ b/arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts @@ -130,7 +130,7 @@ * TCG specification - Section 6.4.1 Clocking: * TPM shall support a SPI clock frequency range of 10-24 MHz. */ - st33htph: tpm-tis@0 { + st33htph: tpm@0 { compatible = "st,st33htpm-spi", "tcg,tpm_tis-spi"; reg = <0>; spi-max-frequency = <24000000>; diff --git a/arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi b/arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi index b8730aa52ce6..a59331aa58e5 100644 --- a/arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi +++ b/arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi @@ -217,7 +217,7 @@ pinctrl-names = "default"; pinctrl-0 = <&spi1_pins>; - tpm_spi_tis@0 { + tpm@0 { compatible = "tcg,tpm_tis-spi"; reg = <0>; spi-max-frequency = <500000>; From 5e2400f11d4deec444ac00b73e96267e057fbb37 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 13 Jan 2024 19:06:56 +0100 Subject: [PATCH 373/768] arm64: dts: Fix TPM schema violations Since commit 26c9d152ebf3 ("dt-bindings: tpm: Consolidate TCG TIS bindings"), several issues are reported by "make dtbs_check" for arm64 devicetrees: The compatible property needs to contain the chip's name in addition to the generic "tcg,tpm_tis-spi" and the nodename needs to be "tpm@0" rather than "cr50@0": tpm@1: compatible: ['tcg,tpm_tis-spi'] is too short from schema $id: http://devicetree.org/schemas/tpm/tcg,tpm_tis-spi.yaml# cr50@0: $nodename:0: 'cr50@0' does not match '^tpm(@[0-9a-f]+)?$' from schema $id: http://devicetree.org/schemas/tpm/google,cr50.yaml# Fix these schema violations. phyGATE-Tauri uses an Infineon SLB9670: https://lore.kernel.org/all/ab45c82485fa272f74adf560cbb58ee60cc42689.camel@phytec.de/ Gateworks Venice uses an Atmel ATTPM20P: https://trac.gateworks.com/wiki/tpm Signed-off-by: Lukas Wunner Acked-by: Heiko Stuebner Reviewed-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l.dts | 2 +- arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi | 2 +- arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi | 2 +- arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts | 2 +- arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi | 2 +- arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi | 2 +- arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts | 2 +- arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts | 2 +- arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 2 +- arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi | 2 +- arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts | 2 +- arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l.dts b/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l.dts index 968f475b9a96..27a902569e2a 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l.dts @@ -120,7 +120,7 @@ }; tpm: tpm@1 { - compatible = "tcg,tpm_tis-spi"; + compatible = "infineon,slb9670", "tcg,tpm_tis-spi"; interrupts = <11 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&gpio2>; pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi index 3f3f2a2c89cd..752caa38eb03 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi @@ -89,7 +89,7 @@ status = "okay"; tpm@1 { - compatible = "tcg,tpm_tis-spi"; + compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x1>; spi-max-frequency = <36000000>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi index 06fed9376996..2aa6c1090fc7 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi @@ -109,7 +109,7 @@ status = "okay"; tpm@1 { - compatible = "tcg,tpm_tis-spi"; + compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x1>; spi-max-frequency = <36000000>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts b/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts index feae77e03835..a08057410bde 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts @@ -234,7 +234,7 @@ status = "okay"; tpm: tpm@0 { - compatible = "infineon,slb9670"; + compatible = "infineon,slb9670", "tcg,tpm_tis-spi"; reg = <0>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_tpm>; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi index c24587c895e1..41c79d2ebdd6 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi @@ -103,7 +103,7 @@ status = "okay"; tpm@1 { - compatible = "tcg,tpm_tis-spi"; + compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x1>; spi-max-frequency = <36000000>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi index 628ffba69862..d5c400b355af 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi @@ -115,7 +115,7 @@ status = "okay"; tpm@1 { - compatible = "tcg,tpm_tis-spi"; + compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x1>; spi-max-frequency = <36000000>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts index 9caf7ca25444..cae586cd45bd 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts @@ -196,7 +196,7 @@ status = "okay"; tpm@0 { - compatible = "tcg,tpm_tis-spi"; + compatible = "atmel,attpm20p", "tcg,tpm_tis-spi"; reg = <0x0>; spi-max-frequency = <36000000>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts b/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts index 6376417e918c..d8cf1f27c3ec 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts +++ b/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts @@ -65,7 +65,7 @@ status = "okay"; tpm@0 { - compatible = "infineon,slb9670"; + compatible = "infineon,slb9670", "tcg,tpm_tis-spi"; reg = <0>; spi-max-frequency = <43000000>; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index 5506de83f61d..1b3396b1cee3 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -888,7 +888,7 @@ status = "okay"; cs-gpios = <&pio 86 GPIO_ACTIVE_LOW>; - cr50@0 { + tpm@0 { compatible = "google,cr50"; reg = <0>; spi-max-frequency = <1000000>; diff --git a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi index f2281250ac35..d87aab8d7a79 100644 --- a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi @@ -1402,7 +1402,7 @@ pinctrl-names = "default"; pinctrl-0 = <&spi5_pins>; - cr50@0 { + tpm@0 { compatible = "google,cr50"; reg = <0>; interrupts-extended = <&pio 171 IRQ_TYPE_EDGE_RISING>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts index 0f9cc042d9bf..1cba1d857c96 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts @@ -70,7 +70,7 @@ &spi0 { status = "okay"; - cr50@0 { + tpm@0 { compatible = "google,cr50"; reg = <0>; interrupt-parent = <&gpio0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi index c5e7de60c121..5846a11f0e84 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi @@ -706,7 +706,7 @@ camera: &i2c7 { &spi2 { status = "okay"; - cr50@0 { + tpm@0 { compatible = "google,cr50"; reg = <0>; interrupt-parent = <&gpio1>; From fdaca31a7668cb17f70df5c32b6a9b90e82fc9b5 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 19 Jan 2024 11:32:41 +0800 Subject: [PATCH 374/768] drm/amd/pm: udpate smu v13.0.6 message permission update smu v13.0.6 message to allow guest driver set gfx clock. Signed-off-by: Yang Wang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 3c98a8a0386a..7e1941cf1796 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -160,8 +160,8 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(GfxDriverResetRecovery, PPSMC_MSG_GfxDriverResetRecovery, 0), MSG_MAP(GetMinGfxclkFrequency, PPSMC_MSG_GetMinGfxDpmFreq, 1), MSG_MAP(GetMaxGfxclkFrequency, PPSMC_MSG_GetMaxGfxDpmFreq, 1), - MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxClk, 0), - MSG_MAP(SetSoftMaxGfxClk, PPSMC_MSG_SetSoftMaxGfxClk, 0), + MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxClk, 1), + MSG_MAP(SetSoftMaxGfxClk, PPSMC_MSG_SetSoftMaxGfxClk, 1), MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareForDriverUnload, 0), MSG_MAP(GetCTFLimit, PPSMC_MSG_GetCTFLimit, 0), MSG_MAP(GetThermalLimit, PPSMC_MSG_ReadThrottlerLimit, 0), From a58371d632ebab9ea63f10893a6b6731196b6f8d Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 17 Jan 2024 08:41:52 +0530 Subject: [PATCH 375/768] drm/amd/display: Fix uninitialized variable usage in core_link_ 'read_dpcd() & write_dpcd()' functions The 'status' variable in 'core_link_read_dpcd()' & 'core_link_write_dpcd()' was uninitialized. Thus, initializing 'status' variable to 'DC_ERROR_UNEXPECTED' by default. Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dpcd.c:226 core_link_read_dpcd() error: uninitialized symbol 'status'. drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dpcd.c:248 core_link_write_dpcd() error: uninitialized symbol 'status'. Cc: stable@vger.kernel.org Cc: Jerry Zuo Cc: Jun Lei Cc: Wayne Lin Cc: Aurabindo Pillai Cc: Rodrigo Siqueira Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c index 5c9a30211c10..fc50931c2aec 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c @@ -205,7 +205,7 @@ enum dc_status core_link_read_dpcd( uint32_t extended_size; /* size of the remaining partitioned address space */ uint32_t size_left_to_read; - enum dc_status status; + enum dc_status status = DC_ERROR_UNEXPECTED; /* size of the next partition to be read from */ uint32_t partition_size; uint32_t data_index = 0; @@ -234,7 +234,7 @@ enum dc_status core_link_write_dpcd( { uint32_t partition_size; uint32_t data_index = 0; - enum dc_status status; + enum dc_status status = DC_ERROR_UNEXPECTED; while (size) { partition_size = dpcd_get_next_partition_size(address, size); From ca1ffb174f16b699c536734fc12a4162097c49f4 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Wed, 17 Jan 2024 14:35:29 +0800 Subject: [PATCH 376/768] drm/amdgpu/pm: Fix the power source flag error The power source flag should be updated when [1] System receives an interrupt indicating that the power source has changed. [2] System resumes from suspend or runtime suspend Signed-off-by: Ma Jun Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 13 +++---------- drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 2 ++ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 2 ++ 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index c16703868e5c..a54663f2e2ab 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -24,6 +24,7 @@ #include #include +#include #include #include "amdgpu.h" @@ -817,16 +818,8 @@ static int smu_late_init(void *handle) * handle the switch automatically. Driver involvement * is unnecessary. */ - if (!smu->dc_controlled_by_gpio) { - ret = smu_set_power_source(smu, - adev->pm.ac_power ? SMU_POWER_SOURCE_AC : - SMU_POWER_SOURCE_DC); - if (ret) { - dev_err(adev->dev, "Failed to switch to %s mode!\n", - adev->pm.ac_power ? "AC" : "DC"); - return ret; - } - } + adev->pm.ac_power = power_supply_is_system_supplied() > 0; + smu_set_ac_dc(smu); if ((amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 1)) || (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 3))) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 5a314d0316c1..c7bfa68bf00f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1442,10 +1442,12 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev, case 0x3: dev_dbg(adev->dev, "Switched to AC mode!\n"); schedule_work(&smu->interrupt_work); + adev->pm.ac_power = true; break; case 0x4: dev_dbg(adev->dev, "Switched to DC mode!\n"); schedule_work(&smu->interrupt_work); + adev->pm.ac_power = false; break; case 0x7: /* diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 771a3d457c33..c486182ff275 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -1379,10 +1379,12 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev, case 0x3: dev_dbg(adev->dev, "Switched to AC mode!\n"); smu_v13_0_ack_ac_dc_interrupt(smu); + adev->pm.ac_power = true; break; case 0x4: dev_dbg(adev->dev, "Switched to DC mode!\n"); smu_v13_0_ack_ac_dc_interrupt(smu); + adev->pm.ac_power = false; break; case 0x7: /* From 90751bdeee4e3ac87ebf814bf282b0fa97edfeab Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Sat, 20 Jan 2024 13:32:51 +0530 Subject: [PATCH 377/768] drm/amdgpu: Avoid fetching vram vendor information For GFX 9.4.3 APUs, the current method of fetching vram vendor information is not reliable. Avoid fetching the information. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.7.x --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index f9039d64ff2d..17b7a25121b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1950,7 +1950,8 @@ static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev) static const u32 regBIF_BIOS_SCRATCH_4 = 0x50; u32 vram_info; - if (!amdgpu_sriov_vf(adev)) { + /* Only for dGPU, vendor informaton is reliable */ + if (!amdgpu_sriov_vf(adev) && !(adev->flags & AMD_IS_APU)) { vram_info = RREG32(regBIF_BIOS_SCRATCH_4); adev->gmc.vram_vendor = vram_info & 0xF; } From 30269954745c6cac730352829ac9850918457440 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 19 Jan 2024 16:12:00 +0800 Subject: [PATCH 378/768] drm/amd/pm: update the power cap setting update the power cap setting for smu_v13.0.0/smu_v13.0.7 Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2356 Signed-off-by: Kenneth Feng Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 54 ++++++++++++++++++- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 54 ++++++++++++++++++- 2 files changed, 104 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index a9b25faa63e4..4fdf34fffa9a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2357,6 +2357,7 @@ static int smu_v13_0_0_get_power_limit(struct smu_context *smu, PPTable_t *pptable = table_context->driver_pptable; SkuTable_t *skutable = &pptable->SkuTable; uint32_t power_limit, od_percent_upper, od_percent_lower; + uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; if (smu_v13_0_get_current_power_limit(smu, &power_limit)) power_limit = smu->adev->pm.ac_power ? @@ -2380,7 +2381,7 @@ static int smu_v13_0_0_get_power_limit(struct smu_context *smu, od_percent_upper, od_percent_lower, power_limit); if (max_power_limit) { - *max_power_limit = power_limit * (100 + od_percent_upper); + *max_power_limit = msg_limit * (100 + od_percent_upper); *max_power_limit /= 100; } @@ -2959,6 +2960,55 @@ static bool smu_v13_0_0_wbrf_support_check(struct smu_context *smu) } } +static int smu_v13_0_0_set_power_limit(struct smu_context *smu, + enum smu_ppt_limit_type limit_type, + uint32_t limit) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + SkuTable_t *skutable = &pptable->SkuTable; + uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)table_context->overdrive_table; + int ret = 0; + + if (limit_type != SMU_DEFAULT_PPT_LIMIT) + return -EINVAL; + + if (limit <= msg_limit) { + if (smu->current_power_limit > msg_limit) { + od_table->OverDriveTable.Ppt = 0; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT; + + ret = smu_v13_0_0_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + } + return smu_v13_0_set_power_limit(smu, limit_type, limit); + } else if (smu->od_enabled) { + ret = smu_v13_0_set_power_limit(smu, limit_type, msg_limit); + if (ret) + return ret; + + od_table->OverDriveTable.Ppt = (limit * 100) / msg_limit - 100; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT; + + ret = smu_v13_0_0_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + + smu->current_power_limit = limit; + } else { + return -EINVAL; + } + + return 0; +} + static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table, @@ -3013,7 +3063,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .set_fan_control_mode = smu_v13_0_set_fan_control_mode, .enable_mgpu_fan_boost = smu_v13_0_0_enable_mgpu_fan_boost, .get_power_limit = smu_v13_0_0_get_power_limit, - .set_power_limit = smu_v13_0_set_power_limit, + .set_power_limit = smu_v13_0_0_set_power_limit, .set_power_source = smu_v13_0_set_power_source, .get_power_profile_mode = smu_v13_0_0_get_power_profile_mode, .set_power_profile_mode = smu_v13_0_0_set_power_profile_mode, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 59606a19e3d2..7c3e162e2d81 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2321,6 +2321,7 @@ static int smu_v13_0_7_get_power_limit(struct smu_context *smu, PPTable_t *pptable = table_context->driver_pptable; SkuTable_t *skutable = &pptable->SkuTable; uint32_t power_limit, od_percent_upper, od_percent_lower; + uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; if (smu_v13_0_get_current_power_limit(smu, &power_limit)) power_limit = smu->adev->pm.ac_power ? @@ -2344,7 +2345,7 @@ static int smu_v13_0_7_get_power_limit(struct smu_context *smu, od_percent_upper, od_percent_lower, power_limit); if (max_power_limit) { - *max_power_limit = power_limit * (100 + od_percent_upper); + *max_power_limit = msg_limit * (100 + od_percent_upper); *max_power_limit /= 100; } @@ -2545,6 +2546,55 @@ static bool smu_v13_0_7_wbrf_support_check(struct smu_context *smu) return smu->smc_fw_version > 0x00524600; } +static int smu_v13_0_7_set_power_limit(struct smu_context *smu, + enum smu_ppt_limit_type limit_type, + uint32_t limit) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + SkuTable_t *skutable = &pptable->SkuTable; + uint32_t msg_limit = skutable->MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)table_context->overdrive_table; + int ret = 0; + + if (limit_type != SMU_DEFAULT_PPT_LIMIT) + return -EINVAL; + + if (limit <= msg_limit) { + if (smu->current_power_limit > msg_limit) { + od_table->OverDriveTable.Ppt = 0; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT; + + ret = smu_v13_0_7_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + } + return smu_v13_0_set_power_limit(smu, limit_type, limit); + } else if (smu->od_enabled) { + ret = smu_v13_0_set_power_limit(smu, limit_type, msg_limit); + if (ret) + return ret; + + od_table->OverDriveTable.Ppt = (limit * 100) / msg_limit - 100; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT; + + ret = smu_v13_0_7_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + + smu->current_power_limit = limit; + } else { + return -EINVAL; + } + + return 0; +} + static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_7_set_default_dpm_table, @@ -2596,7 +2646,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .set_fan_control_mode = smu_v13_0_set_fan_control_mode, .enable_mgpu_fan_boost = smu_v13_0_7_enable_mgpu_fan_boost, .get_power_limit = smu_v13_0_7_get_power_limit, - .set_power_limit = smu_v13_0_set_power_limit, + .set_power_limit = smu_v13_0_7_set_power_limit, .set_power_source = smu_v13_0_set_power_source, .get_power_profile_mode = smu_v13_0_7_get_power_profile_mode, .set_power_profile_mode = smu_v13_0_7_set_power_profile_mode, From 89a7c0bd74918f723c94c10452265e25063cba9b Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Sat, 20 Jan 2024 13:48:09 +0530 Subject: [PATCH 379/768] drm/amdgpu: Show vram vendor only if available Ony if vram vendor info is available, show in sysfs. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.7.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 08916538a615..8db880244324 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -221,8 +221,23 @@ static struct attribute *amdgpu_vram_mgr_attributes[] = { NULL }; +static umode_t amdgpu_vram_attrs_is_visible(struct kobject *kobj, + struct attribute *attr, int i) +{ + struct device *dev = kobj_to_dev(kobj); + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + + if (attr == &dev_attr_mem_info_vram_vendor.attr && + !adev->gmc.vram_vendor) + return 0; + + return attr->mode; +} + const struct attribute_group amdgpu_vram_mgr_attr_group = { - .attrs = amdgpu_vram_mgr_attributes + .attrs = amdgpu_vram_mgr_attributes, + .is_visible = amdgpu_vram_attrs_is_visible }; /** From bc8f6d42b1334f486980d57c8d12f3128d30c2e3 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Mon, 22 Jan 2024 17:38:23 +0800 Subject: [PATCH 380/768] drm/amdgpu: Fix null pointer dereference amdgpu_reg_state_sysfs_fini could be invoked at the time when asic_func is even not initialized, i.e., amdgpu_discovery_init fails for some reason. Signed-off-by: Hawking Zhang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/include/amdgpu_reg_state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/include/amdgpu_reg_state.h b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h index be519c8edf49..335980e2afbf 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_reg_state.h +++ b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h @@ -138,7 +138,7 @@ static inline size_t amdgpu_reginst_size(uint16_t num_inst, size_t inst_size, } #define amdgpu_asic_get_reg_state_supported(adev) \ - ((adev)->asic_funcs->get_reg_state ? 1 : 0) + (((adev)->asic_funcs && (adev)->asic_funcs->get_reg_state) ? 1 : 0) #define amdgpu_asic_get_reg_state(adev, state, buf, size) \ ((adev)->asic_funcs->get_reg_state ? \ From f1807682de0edbff6c1e46b19642a517d2e15c57 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 18 Jan 2024 14:25:35 +0530 Subject: [PATCH 381/768] drm/amd/pm: Fetch current power limit from FW Power limit of SMUv13.0.6 SOCs can be updated by out-of-band ways. Fetch the limit from firmware instead of using cached values. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.7.x --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index a54663f2e2ab..7ffad3eb0a01 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2703,6 +2703,7 @@ int smu_get_power_limit(void *handle, case SMU_PPT_LIMIT_CURRENT: switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(13, 0, 2): + case IP_VERSION(13, 0, 6): case IP_VERSION(11, 0, 7): case IP_VERSION(11, 0, 11): case IP_VERSION(11, 0, 12): From e7a8594cc2af920a905db15653c19c362d4ebd3f Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Wed, 17 Jan 2024 12:47:37 -0500 Subject: [PATCH 382/768] drm/amd/amdgpu: Assign GART pages to AMD device mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows kernel mapped pages like the PDB and PTB to be read via the iomem debugfs when there is no vram in the system. Signed-off-by: Tom St Denis Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.7.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 73b8cca35bab..c623e23049d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -121,6 +121,7 @@ int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev) struct amdgpu_bo_param bp; dma_addr_t dma_addr; struct page *p; + unsigned long x; int ret; if (adev->gart.bo != NULL) @@ -130,6 +131,10 @@ int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev) if (!p) return -ENOMEM; + /* assign pages to this device */ + for (x = 0; x < (1UL << order); x++) + p[x].mapping = adev->mman.bdev.dev_mapping; + /* If the hardware does not support UTCL2 snooping of the CPU caches * then set_memory_wc() could be used as a workaround to mark the pages * as write combine memory. @@ -223,6 +228,7 @@ void amdgpu_gart_table_ram_free(struct amdgpu_device *adev) unsigned int order = get_order(adev->gart.table_size); struct sg_table *sg = adev->gart.bo->tbo.sg; struct page *p; + unsigned long x; int ret; ret = amdgpu_bo_reserve(adev->gart.bo, false); @@ -234,6 +240,8 @@ void amdgpu_gart_table_ram_free(struct amdgpu_device *adev) sg_free_table(sg); kfree(sg); p = virt_to_page(adev->gart.ptr); + for (x = 0; x < (1UL << order); x++) + p[x].mapping = NULL; __free_pages(p, order); adev->gart.ptr = NULL; From 03ff6d7238b77e5fb2b85dc5fe01d2db9eb893bd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 19 Jan 2024 12:23:55 -0500 Subject: [PATCH 383/768] drm/amdgpu/gfx10: set UNORD_DISPATCH in compute MQDs This needs to be set to 1 to avoid a potential deadlock in the GC 10.x and newer. On GC 9.x and older, this needs to be set to 0. This can lead to hangs in some mixed graphics and compute workloads. Updated firmware is also required for AQL. Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index d63cab294883..ecb622b7f970 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6589,7 +6589,7 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m, #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); #endif - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, prop->allow_tunneling); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 8b7fed913526..22cbfa1bdadd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -170,6 +170,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); From 3380fcad2c906872110d31ddf7aa1fdea57f9df6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 19 Jan 2024 12:32:59 -0500 Subject: [PATCH 384/768] drm/amdgpu/gfx11: set UNORD_DISPATCH in compute MQDs This needs to be set to 1 to avoid a potential deadlock in the GC 10.x and newer. On GC 9.x and older, this needs to be set to 0. This can lead to hangs in some mixed graphics and compute workloads. Updated firmware is also required for AQL. Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 0ea0866c261f..d9cf9fd03d30 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -3846,7 +3846,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, prop->allow_tunneling); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 15277f1d5cf0..d722cbd31783 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -224,6 +224,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); From ff8caade7429f28217c293672ab64323031f350e Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 7 Dec 2023 14:12:03 -0500 Subject: [PATCH 385/768] drm/amd/display: Allow IPS2 during Replay [Why & How] Add regkey to block video playback in IPS2 by default Allow idle optimizations in the same spot we allow Replay for video playback usecases. Avoid sending it when there's an external display connected by modifying the allow idle checks to check for active non-eDP screens. Reviewed-by: Charlene Liu Acked-by: Alex Hung Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++ drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 9 ++++++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d4f525b66a09..5552d30a6d97 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1717,6 +1717,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.flags.disable_ips = DMUB_IPS_DISABLE_ALL; + init_data.flags.disable_ips_in_vpb = 1; + /* Enable DWB for tested platforms only */ if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0)) init_data.num_virtual_links = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 5d7aa882416b..c9317ea0258e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -434,6 +434,7 @@ struct dc_config { bool EnableMinDispClkODM; bool enable_auto_dpm_test_logs; unsigned int disable_ips; + unsigned int disable_ips_in_vpb; }; enum visual_confirm { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 9c806385ecbd..8b6c49622f3b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -680,7 +680,7 @@ void dcn35_power_down_on_boot(struct dc *dc) bool dcn35_apply_idle_power_optimizations(struct dc *dc, bool enable) { struct dc_link *edp_links[MAX_NUM_EDP]; - int edp_num; + int i, edp_num; if (dc->debug.dmcub_emulation) return true; @@ -688,6 +688,13 @@ bool dcn35_apply_idle_power_optimizations(struct dc *dc, bool enable) dc_get_edp_links(dc, edp_links, &edp_num); if (edp_num == 0 || edp_num > 1) return false; + + for (i = 0; i < dc->current_state->stream_count; ++i) { + struct dc_stream_state *stream = dc->current_state->streams[i]; + + if (!stream->dpms_off && !dc_is_embedded_signal(stream->signal)) + return false; + } } // TODO: review other cases when idle optimization is allowed From 955406e6fd241b2936e7f033a03b2956922c8f32 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 2 Jan 2024 12:02:39 -0500 Subject: [PATCH 386/768] drm/amd/display: Add Replay IPS register for DMUB command table - Introduce a new Replay mode for DMUB version 0.0.199.0 Reviewed-by: Martin Leung Acked-by: Alex Hung Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index c64b6c848ef7..bcd3c361cca5 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -2832,6 +2832,7 @@ struct dmub_rb_cmd_psr_set_power_opt { #define REPLAY_RESIDENCY_MODE_MASK (0x1 << REPLAY_RESIDENCY_MODE_SHIFT) # define REPLAY_RESIDENCY_MODE_PHY (0x0 << REPLAY_RESIDENCY_MODE_SHIFT) # define REPLAY_RESIDENCY_MODE_ALPM (0x1 << REPLAY_RESIDENCY_MODE_SHIFT) +# define REPLAY_RESIDENCY_MODE_IPS 0x10 #define REPLAY_RESIDENCY_ENABLE_MASK (0x1 << REPLAY_RESIDENCY_ENABLE_SHIFT) # define REPLAY_RESIDENCY_DISABLE (0x0 << REPLAY_RESIDENCY_ENABLE_SHIFT) From 196107eb1e1557df25e1425bbfb53e0f7588b80a Mon Sep 17 00:00:00 2001 From: Roman Li Date: Tue, 9 Jan 2024 17:31:33 -0500 Subject: [PATCH 387/768] drm/amd/display: Add IPS checks before dcn register access [Why] With IPS enabled a system hangs once PSR is active. PSR active triggers transition to IPS2 state. While in IPS2 an access to dcn registers results in hard hang. Existing check doesn't cover for PSR sequence. [How] Safeguard register access by disabling idle optimization in atomic commit and crtc scanout. It will be re-enabled on next vblank. Reviewed-by: Nicholas Kazlauskas Acked-by: Roman Li Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5552d30a6d97..d8f85df2bc66 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -272,6 +272,7 @@ static int dm_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc, { u32 v_blank_start, v_blank_end, h_position, v_position; struct amdgpu_crtc *acrtc = NULL; + struct dc *dc = adev->dm.dc; if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc)) return -EINVAL; @@ -284,6 +285,9 @@ static int dm_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc, return 0; } + if (dc && dc->caps.ips_support && dc->idle_optimizations_allowed) + dc_allow_idle_optimizations(dc, false); + /* * TODO rework base driver to use values directly. * for now parse it back into reg-format @@ -8978,16 +8982,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) trace_amdgpu_dm_atomic_commit_tail_begin(state); - if (dm->dc->caps.ips_support) { - for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) { - if (new_con_state->crtc && - new_con_state->crtc->state->active && - drm_atomic_crtc_needs_modeset(new_con_state->crtc->state)) { - dc_dmub_srv_apply_idle_power_optimizations(dm->dc, false); - break; - } - } - } + if (dm->dc->caps.ips_support && dm->dc->idle_optimizations_allowed) + dc_allow_idle_optimizations(dm->dc, false); drm_atomic_helper_update_legacy_modeset_state(dev, state); drm_dp_mst_atomic_wait_for_dependencies(state); From f37f7979202d45489d84469838f5352cda3557bc Mon Sep 17 00:00:00 2001 From: ChunTao Tso Date: Mon, 8 Jan 2024 13:46:59 +0800 Subject: [PATCH 388/768] drm/amd/display: Replay + IPS + ABM in Full Screen VPB [Why] Because ABM will wait VStart to start getting histogram data, it will cause we can't enter IPS while full screnn video playing. [How] Modify the panel refresh rate to the maximun multiple of current refresh rate. Reviewed-by: Dennis Chan Acked-by: Roman Li Signed-off-by: ChunTao Tso Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_types.h | 5 ++ .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 46 +++++++++++++++++++ .../amd/display/modules/power/power_helpers.c | 5 ++ .../amd/display/modules/power/power_helpers.h | 1 + 4 files changed, 57 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index b08ccb8c68bc..9900dda2eef5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1034,6 +1034,7 @@ enum replay_FW_Message_type { Replay_Msg_Not_Support = -1, Replay_Set_Timing_Sync_Supported, Replay_Set_Residency_Frameupdate_Timer, + Replay_Set_Pseudo_VTotal, }; union replay_error_status { @@ -1089,6 +1090,10 @@ struct replay_settings { uint16_t coasting_vtotal_table[PR_COASTING_TYPE_NUM]; /* Maximum link off frame count */ enum replay_link_off_frame_count_level link_off_frame_count_level; + /* Replay pseudo vtotal for abm + ips on full screen video which can improve ips residency */ + uint16_t abm_with_ips_on_full_screen_video_pseudo_vtotal; + /* Replay last pseudo vtotal set to DMUB */ + uint16_t last_pseudo_vtotal; }; /* To split out "global" and "per-panel" config settings. diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index bcd3c361cca5..e699731ee68e 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -2895,6 +2895,10 @@ enum dmub_cmd_replay_type { * Set Residency Frameupdate Timer. */ DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER = 6, + /** + * Set pseudo vtotal + */ + DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL = 7, }; /** @@ -3077,6 +3081,26 @@ struct dmub_cmd_replay_set_timing_sync_data { uint8_t pad[2]; }; +/** + * Data passed from driver to FW in a DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command. + */ +struct dmub_cmd_replay_set_pseudo_vtotal { + /** + * Panel Instance. + * Panel isntance to identify which replay_state to use + * Currently the support is only for 0 or 1 + */ + uint8_t panel_inst; + /** + * Source Vtotal that Replay + IPS + ABM full screen video src vtotal + */ + uint16_t vtotal; + /** + * Explicit padding to 4 byte boundary. + */ + uint8_t pad; +}; + /** * Definition of a DMUB_CMD__SET_REPLAY_POWER_OPT command. */ @@ -3157,6 +3181,20 @@ struct dmub_rb_cmd_replay_set_timing_sync { struct dmub_cmd_replay_set_timing_sync_data replay_set_timing_sync_data; }; +/** + * Definition of a DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command. + */ +struct dmub_rb_cmd_replay_set_pseudo_vtotal { + /** + * Command header. + */ + struct dmub_cmd_header header; + /** + * Definition of DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command. + */ + struct dmub_cmd_replay_set_pseudo_vtotal data; +}; + /** * Data passed from driver to FW in DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command. */ @@ -3208,6 +3246,10 @@ union dmub_replay_cmd_set { * Definition of DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command data. */ struct dmub_cmd_replay_frameupdate_timer_data timer_data; + /** + * Definition of DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command data. + */ + struct dmub_cmd_replay_set_pseudo_vtotal pseudo_vtotal_data; }; /** @@ -4359,6 +4401,10 @@ union dmub_rb_cmd { * Definition of a DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command. */ struct dmub_rb_cmd_replay_set_frameupdate_timer replay_set_frameupdate_timer; + /** + * Definition of a DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command. + */ + struct dmub_rb_cmd_replay_set_pseudo_vtotal replay_set_pseudo_vtotal; }; /** diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index ad98e504c00d..e304e8435fb8 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -980,6 +980,11 @@ void set_replay_coasting_vtotal(struct dc_link *link, link->replay_settings.coasting_vtotal_table[type] = vtotal; } +void set_replay_ips_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal) +{ + link->replay_settings.abm_with_ips_on_full_screen_video_pseudo_vtotal = vtotal; +} + void calculate_replay_link_off_frame_count(struct dc_link *link, uint16_t vtotal, uint16_t htotal) { diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h index c17bbc6fb38c..bef4815e1703 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h @@ -57,6 +57,7 @@ void init_replay_config(struct dc_link *link, struct replay_config *pr_config); void set_replay_coasting_vtotal(struct dc_link *link, enum replay_coasting_vtotal_type type, uint16_t vtotal); +void set_replay_ips_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal); void calculate_replay_link_off_frame_count(struct dc_link *link, uint16_t vtotal, uint16_t htotal); From 8894b9283afd35b8d22ae07a0c118eb5f7d2e78b Mon Sep 17 00:00:00 2001 From: Roman Li Date: Mon, 22 Jan 2024 17:45:41 -0500 Subject: [PATCH 389/768] drm/amd/display: Disable ips before dc interrupt setting [Why] While in IPS2 an access to dcn registers is not allowed. If interrupt results in dc call, we should disable IPS. [How] Safeguard register access in IPS2 by disabling idle optimization before calling dc interrupt setting api. Signed-off-by: Roman Li Tested-by: Mark Broadworth Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index 58b880acb087..3390f0d8420a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -711,7 +711,7 @@ static inline int dm_irq_state(struct amdgpu_device *adev, { bool st; enum dc_irq_source irq_source; - + struct dc *dc = adev->dm.dc; struct amdgpu_crtc *acrtc = adev->mode_info.crtcs[crtc_id]; if (!acrtc) { @@ -729,6 +729,9 @@ static inline int dm_irq_state(struct amdgpu_device *adev, st = (state == AMDGPU_IRQ_STATE_ENABLE); + if (dc && dc->caps.ips_support && dc->idle_optimizations_allowed) + dc_allow_idle_optimizations(dc, false); + dc_interrupt_set(adev->dm.dc, irq_source, st); return 0; } From d45669eb5e68c052d0d890cd88c33a65c115d9f3 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Tue, 23 Jan 2024 15:14:28 -0500 Subject: [PATCH 390/768] drm/amd: Add a DC debug mask for IPS For debugging IPS-related issues, expose a new debug mask that allows to disable IPS. Usage: amdgpu.dcdebugmask=0x800 Signed-off-by: Roman Li Tested-by: Mark Broadworth Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/amd_shared.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 1dc5dd9b7bf7..df2c7ffe190f 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -258,6 +258,7 @@ enum DC_DEBUG_MASK { DC_ENABLE_DML2 = 0x100, DC_DISABLE_PSR_SU = 0x200, DC_DISABLE_REPLAY = 0x400, + DC_DISABLE_IPS = 0x800, }; enum amd_dpm_forced_level; From c82eb25c5f005b33aebb1415a8472fc2eeea0deb Mon Sep 17 00:00:00 2001 From: Roman Li Date: Tue, 23 Jan 2024 15:18:24 -0500 Subject: [PATCH 391/768] drm/amd/display: "Enable IPS by default" [Why] IPS was temporary disabled due to instability. It was fixed in dmub firmware and with: - "drm/amd/display: Add IPS checks before dcn register access" - "drm/amd/display: Disable ips before dc interrupt setting" [How] Enable IPS by default. Disable IPS if 0x800 bit set in amdgpu.dcdebugmask module params Signed-off-by: Roman Li Tested-by: Mark Broadworth Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d8f85df2bc66..6cda5b536362 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1719,7 +1719,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.nbio_reg_offsets = adev->reg_offset[NBIO_HWIP][0]; init_data.clk_reg_offsets = adev->reg_offset[CLK_HWIP][0]; - init_data.flags.disable_ips = DMUB_IPS_DISABLE_ALL; + if (amdgpu_dc_debug_mask & DC_DISABLE_IPS) + init_data.flags.disable_ips = DMUB_IPS_DISABLE_ALL; init_data.flags.disable_ips_in_vpb = 1; From 8c2ae772fe08e33f3d7a83849e85539320701abd Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 25 Jan 2024 14:53:09 -0600 Subject: [PATCH 392/768] spi: fix finalize message on error return In __spi_pump_transfer_message(), the message was not finalized in the first error return as it is in the other error return paths. Not finalizing the message could cause anything waiting on the message to complete to hang forever. This adds the missing call to spi_finalize_current_message(). Fixes: ae7d2346dc89 ("spi: Don't use the message queue if possible in spi_sync") Signed-off-by: David Lechner Link: https://msgid.link/r/20240125205312.3458541-2-dlechner@baylibre.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 7477a11e12be..f2170f4b5077 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1717,6 +1717,10 @@ static int __spi_pump_transfer_message(struct spi_controller *ctlr, pm_runtime_put_noidle(ctlr->dev.parent); dev_err(&ctlr->dev, "Failed to power device: %d\n", ret); + + msg->status = ret; + spi_finalize_current_message(ctlr); + return ret; } } From b64787840080bdbd048bb9c68222ad17236cbd7e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 24 Jan 2024 11:25:50 -0800 Subject: [PATCH 393/768] selftests: tcp_ao: add a config file Still a bit unclear whether each directory should have its own config file, but assuming they should lets add one for tcp_ao. The following tests still fail with this config in place: - rst_ipv4, - rst_ipv6, - bench-lookups_ipv6. other 21 pass. Fixes: d11301f65977 ("selftests/net: Add TCP-AO ICMPs accept test") Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://lore.kernel.org/r/20240124192550.1865743-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tcp_ao/config | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 tools/testing/selftests/net/tcp_ao/config diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config new file mode 100644 index 000000000000..d3277a9de987 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/config @@ -0,0 +1,10 @@ +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_RMD160=y +CONFIG_CRYPTO_SHA1=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_VRF=y +CONFIG_TCP_AO=y +CONFIG_TCP_MD5SIG=y +CONFIG_VETH=m From 98cb12eb52a780e682bea8372fdb2912c08132dd Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 24 Jan 2024 22:33:20 +0100 Subject: [PATCH 394/768] selftests: net: remove dependency on ebpf tests Several net tests requires an XDP program build under the ebpf directory, and error out if such program is not available. That makes running successful net test hard, let's duplicate into the net dir the [very small] program, re-using the existing rules to build it, and finally dropping the bogus dependency. Signed-off-by: Paolo Abeni Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/28e7af7c031557f691dc8045ee41dd549dd5e74c.1706131762.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 5 +++-- tools/testing/selftests/net/udpgro.sh | 4 ++-- tools/testing/selftests/net/udpgro_bench.sh | 4 ++-- tools/testing/selftests/net/udpgro_frglist.sh | 6 +++--- tools/testing/selftests/net/udpgro_fwd.sh | 2 +- tools/testing/selftests/net/veth.sh | 4 ++-- tools/testing/selftests/net/xdp_dummy.c | 13 +++++++++++++ 7 files changed, 26 insertions(+), 12 deletions(-) create mode 100644 tools/testing/selftests/net/xdp_dummy.c diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 50818075e566..304d8b852ef0 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -84,6 +84,7 @@ TEST_PROGS += sctp_vrf.sh TEST_GEN_FILES += sctp_hello TEST_GEN_FILES += csum TEST_GEN_FILES += nat6to4.o +TEST_GEN_FILES += xdp_dummy.o TEST_GEN_FILES += ip_local_port_range TEST_GEN_FILES += bind_wildcard TEST_PROGS += test_vxlan_mdb.sh @@ -104,7 +105,7 @@ $(OUTPUT)/tcp_inq: LDLIBS += -lpthread $(OUTPUT)/bind_bhash: LDLIBS += -lpthread $(OUTPUT)/io_uring_zerocopy_tx: CFLAGS += -I../../../include/ -# Rules to generate bpf obj nat6to4.o +# Rules to generate bpf objs CLANG ?= clang SCRATCH_DIR := $(OUTPUT)/tools BUILD_DIR := $(SCRATCH_DIR)/build @@ -139,7 +140,7 @@ endif CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) -$(OUTPUT)/nat6to4.o: nat6to4.c $(BPFOBJ) | $(MAKE_DIRS) +$(OUTPUT)/nat6to4.o $(OUTPUT)/xdp_dummy.o: $(OUTPUT)/%.o : %.c $(BPFOBJ) | $(MAKE_DIRS) $(CLANG) -O2 --target=bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index af5dc57c8ce9..8802604148dd 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -7,7 +7,7 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.o" # set global exit status, but never reset nonzero one. check_err() @@ -197,7 +197,7 @@ run_all() { } if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Build bpf selftest first" + echo "Missing ${BPF_FILE}. Run 'make' first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index cb664679b434..7080eae5312b 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -7,7 +7,7 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.o" cleanup() { local -r jobs="$(jobs -p)" @@ -84,7 +84,7 @@ run_all() { } if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Build bpf selftest first" + echo "Missing ${BPF_FILE}. Run 'make' first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index dd47fa96f6b3..e1ff645bd3d1 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -7,7 +7,7 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.o" cleanup() { local -r jobs="$(jobs -p)" @@ -85,12 +85,12 @@ run_all() { } if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Build bpf selftest first" + echo "Missing ${BPF_FILE}. Run 'make' first" exit -1 fi if [ ! -f nat6to4.o ]; then - echo "Missing nat6to4 helper. Build bpf nat6to4.o selftest first" + echo "Missing nat6to4 helper. Run 'make' first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index c079565add39..5fa8659ab13d 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.o" readonly BASE="ns-$(mktemp -u XXXXXX)" readonly SRC=2 readonly DST=1 diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 2d073595c620..27574bbf2d63 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.o" readonly STATS="$(mktemp -p /tmp ns-XXXXXX)" readonly BASE=`basename $STATS` readonly SRC=2 @@ -218,7 +218,7 @@ while getopts "hs:" option; do done if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Build bpf selftest first" + echo "Missing ${BPF_FILE}. Run 'make' first" exit 1 fi diff --git a/tools/testing/selftests/net/xdp_dummy.c b/tools/testing/selftests/net/xdp_dummy.c new file mode 100644 index 000000000000..d988b2e0cee8 --- /dev/null +++ b/tools/testing/selftests/net/xdp_dummy.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define KBUILD_MODNAME "xdp_dummy" +#include +#include + +SEC("xdp") +int xdp_dummy_prog(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; From f5173fe3e13b2cbd25d0d73f40acd923d75add55 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 24 Jan 2024 22:33:21 +0100 Subject: [PATCH 395/768] selftests: net: included needed helper in the install targets The blamed commit below introduce a dependency in some net self-tests towards a newly introduce helper script. Such script is currently not included into the TEST_PROGS_EXTENDED list and thus is not installed, causing failure for the relevant tests when executed from the install dir. Fix the issue updating the install targets. Fixes: 3bdd9fd29cb0 ("selftests/net: synchronize udpgro tests' tx and rx connection") Signed-off-by: Paolo Abeni Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/076e8758e21ff2061cc9f81640e7858df775f0a9.1706131762.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 304d8b852ef0..48c6f93b8149 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -55,6 +55,7 @@ TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh lib.sh +TEST_PROGS_EXTENDED += net_helper.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite From 4acffb66630a0e4800880baa61a54ef18047ccd3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 24 Jan 2024 22:33:22 +0100 Subject: [PATCH 396/768] selftests: net: explicitly wait for listener ready The UDP GRO forwarding test still hard-code an arbitrary pause to wait for the UDP listener becoming ready in background. That causes sporadic failures depending on the host load. Replace the sleep with the existing helper waiting for the desired port being exposed. Fixes: a062260a9d5f ("selftests: net: add UDP GRO forwarding self-tests") Signed-off-by: Paolo Abeni Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/4d58900fb09cef42749cfcf2ad7f4b91a97d225c.1706131762.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgro_fwd.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 5fa8659ab13d..d6b9c759043c 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +source net_helper.sh + BPF_FILE="xdp_dummy.o" readonly BASE="ns-$(mktemp -u XXXXXX)" readonly SRC=2 @@ -119,7 +121,7 @@ run_test() { ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 8000 ip netns exec $NS_DST ./udpgso_bench_rx -C 1000 -R 10 -n 10 -l 1300 $rx_args & local spid=$! - sleep 0.1 + wait_local_port_listen "$NS_DST" 8000 udp ip netns exec $NS_SRC ./udpgso_bench_tx $family -M 1 -s 13000 -S 1300 -D $dst local retc=$? wait $spid @@ -168,7 +170,7 @@ run_bench() { ip netns exec $NS_DST bash -c "echo 2 > /sys/class/net/veth$DST/queues/rx-0/rps_cpus" ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 1000 -R 10 & local spid=$! - sleep 0.1 + wait_local_port_listen "$NS_DST" 8000 udp ip netns exec $NS_SRC taskset 0x1 ./udpgso_bench_tx $family -l 3 -S 1300 -D $dst local retc=$? wait $spid From 39b383d77961a544d896242051168b8129cf5be7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 24 Jan 2024 15:36:30 -0800 Subject: [PATCH 397/768] selftests: tcp_ao: set the timeout to 2 minutes The default timeout for tests is 45sec, bench-lookups_ipv6 seems to take around 50sec when running in a VM without HW acceleration. Give it a 2x margin and set the timeout to 120sec. Fixes: d1066c9c58d4 ("selftests/net: Add test/benchmark for removing MKTs") Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://lore.kernel.org/r/20240124233630.1977708-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tcp_ao/settings | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/testing/selftests/net/tcp_ao/settings diff --git a/tools/testing/selftests/net/tcp_ao/settings b/tools/testing/selftests/net/tcp_ao/settings new file mode 100644 index 000000000000..6091b45d226b --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/settings @@ -0,0 +1 @@ +timeout=120 From d2fda304bb739b97c1a3e46e39700eb49f07a62c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 24 Jan 2024 17:26:33 -0500 Subject: [PATCH 398/768] bcachefs: __lookup_dirent() works in snapshot, not subvol Add a new helper, bch2_hash_lookup_in_snapshot(), for when we're not operating in a subvolume and already have a snapshot ID, and then use it in lookup_lostfound() -> __lookup_dirent(). This is a bugfix - lookup_lostfound() doesn't take a subvolume ID, we were passing a nonsense subvolume ID before, and don't have one to pass since we may be operating in an interior snapshot node that doesn't have a subvolume ID. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 23 ++++++++++++----------- fs/bcachefs/str_hash.h | 22 +++++++++++++++------- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 4f0ecd605675..6a760777bafb 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -119,22 +119,19 @@ static int lookup_inode(struct btree_trans *trans, u64 inode_nr, if (!ret) *snapshot = iter.pos.snapshot; err: - bch_err_msg(trans->c, ret, "fetching inode %llu:%u", inode_nr, *snapshot); bch2_trans_iter_exit(trans, &iter); return ret; } -static int __lookup_dirent(struct btree_trans *trans, +static int lookup_dirent_in_snapshot(struct btree_trans *trans, struct bch_hash_info hash_info, subvol_inum dir, struct qstr *name, - u64 *target, unsigned *type) + u64 *target, unsigned *type, u32 snapshot) { struct btree_iter iter; struct bkey_s_c_dirent d; - int ret; - - ret = bch2_hash_lookup(trans, &iter, bch2_dirent_hash_desc, - &hash_info, dir, name, 0); + int ret = bch2_hash_lookup_in_snapshot(trans, &iter, bch2_dirent_hash_desc, + &hash_info, dir, name, 0, snapshot); if (ret) return ret; @@ -225,15 +222,16 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, struct bch_inode_unpacked root_inode; struct bch_hash_info root_hash_info; - ret = lookup_inode(trans, root_inum.inum, &root_inode, &snapshot); + u32 root_inode_snapshot = snapshot; + ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot); bch_err_msg(c, ret, "looking up root inode"); if (ret) return ret; root_hash_info = bch2_hash_info_init(c, &root_inode); - ret = __lookup_dirent(trans, root_hash_info, root_inum, - &lostfound_str, &inum, &d_type); + ret = lookup_dirent_in_snapshot(trans, root_hash_info, root_inum, + &lostfound_str, &inum, &d_type, snapshot); if (bch2_err_matches(ret, ENOENT)) goto create_lostfound; @@ -250,7 +248,10 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, * The bch2_check_dirents pass has already run, dangling dirents * shouldn't exist here: */ - return lookup_inode(trans, inum, lostfound, &snapshot); + ret = lookup_inode(trans, inum, lostfound, &snapshot); + bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)", + inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot)); + return ret; create_lostfound: /* diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index 89fdb7c21134..fcaa5a888744 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -160,21 +160,16 @@ static inline bool is_visible_key(struct bch_hash_desc desc, subvol_inum inum, s } static __always_inline int -bch2_hash_lookup(struct btree_trans *trans, +bch2_hash_lookup_in_snapshot(struct btree_trans *trans, struct btree_iter *iter, const struct bch_hash_desc desc, const struct bch_hash_info *info, subvol_inum inum, const void *key, - unsigned flags) + unsigned flags, u32 snapshot) { struct bkey_s_c k; - u32 snapshot; int ret; - ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); - if (ret) - return ret; - for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id, SPOS(inum.inum, desc.hash_key(info, key), snapshot), POS(inum.inum, U64_MAX), @@ -194,6 +189,19 @@ bch2_hash_lookup(struct btree_trans *trans, return ret ?: -BCH_ERR_ENOENT_str_hash_lookup; } +static __always_inline int +bch2_hash_lookup(struct btree_trans *trans, + struct btree_iter *iter, + const struct bch_hash_desc desc, + const struct bch_hash_info *info, + subvol_inum inum, const void *key, + unsigned flags) +{ + u32 snapshot; + return bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot) ?: + bch2_hash_lookup_in_snapshot(trans, iter, desc, info, inum, key, flags, snapshot); +} + static __always_inline int bch2_hash_hole(struct btree_trans *trans, struct btree_iter *iter, From fc836129f708407502632107e58d48f54b1caf75 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 24 Jan 2024 14:13:44 +0800 Subject: [PATCH 399/768] selftests/net/lib: update busywait timeout value The busywait timeout value is a millisecond, not a second. So the current setting 2 is too small. On slow/busy host (or VMs) the current timeout can expire even on "correct" execution, causing random failures. Let's copy the WAIT_TIMEOUT from forwarding/lib.sh and set BUSYWAIT_TIMEOUT here. Fixes: 25ae948b4478 ("selftests/net: add lib.sh") Signed-off-by: Hangbin Liu Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240124061344.1864484-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index dca549443801..f9fe182dfbd4 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -4,6 +4,9 @@ ############################################################################## # Defines +WAIT_TIMEOUT=${WAIT_TIMEOUT:=20} +BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms + # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 # namespace list created by setup_ns @@ -48,7 +51,7 @@ cleanup_ns() for ns in "$@"; do ip netns delete "${ns}" &> /dev/null - if ! busywait 2 ip netns list \| grep -vq "^$ns$" &> /dev/null; then + if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then echo "Warn: Failed to remove namespace $ns" ret=1 fi From 534326711000c318fe1523c77308450522baa499 Mon Sep 17 00:00:00 2001 From: Praveen Kaligineedi Date: Wed, 24 Jan 2024 08:10:25 -0800 Subject: [PATCH 400/768] gve: Fix skb truesize underestimation For a skb frag with a newly allocated copy page, the true size is incorrectly set to packet buffer size. It should be set to PAGE_SIZE instead. Fixes: 82fd151d38d9 ("gve: Reduce alloc and copy costs in the GQ rx path") Signed-off-by: Praveen Kaligineedi Link: https://lore.kernel.org/r/20240124161025.1819836-1-pkaligineedi@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_rx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 7a8dc5386fff..76615d47e055 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -356,7 +356,7 @@ static enum pkt_hash_types gve_rss_type(__be16 pkt_flags) static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi, struct gve_rx_slot_page_info *page_info, - u16 packet_buffer_size, u16 len, + unsigned int truesize, u16 len, struct gve_rx_ctx *ctx) { u32 offset = page_info->page_offset + page_info->pad; @@ -389,10 +389,10 @@ static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi, if (skb != ctx->skb_head) { ctx->skb_head->len += len; ctx->skb_head->data_len += len; - ctx->skb_head->truesize += packet_buffer_size; + ctx->skb_head->truesize += truesize; } skb_add_rx_frag(skb, num_frags, page_info->page, - offset, len, packet_buffer_size); + offset, len, truesize); return ctx->skb_head; } @@ -486,7 +486,7 @@ static struct sk_buff *gve_rx_copy_to_pool(struct gve_rx_ring *rx, memcpy(alloc_page_info.page_address, src, page_info->pad + len); skb = gve_rx_add_frags(napi, &alloc_page_info, - rx->packet_buffer_size, + PAGE_SIZE, len, ctx); u64_stats_update_begin(&rx->statss); From cefa98e806fd4e2a5e2047457a11ae5f17b8f621 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Wed, 24 Jan 2024 17:19:08 +0200 Subject: [PATCH 401/768] nfp: flower: add hardware offload check for post ct entry The nfp offload flow pay will not allocate a mask id when the out port is openvswitch internal port. This is because these flows are used to configure the pre_tun table and are never actually send to the firmware as an add-flow message. When a tc rule which action contains ct and the post ct entry's out port is openvswitch internal port, the merge offload flow pay with the wrong mask id of 0 will be send to the firmware. Actually, the nfp can not support hardware offload for this situation, so return EOPNOTSUPP. Fixes: bd0fe7f96a3c ("nfp: flower-ct: add zone table entry when handling pre/post_ct flows") CC: stable@vger.kernel.org # 5.14+ Signed-off-by: Hui Zhou Signed-off-by: Louis Peens Link: https://lore.kernel.org/r/20240124151909.31603-2-louis.peens@corigine.com Signed-off-by: Jakub Kicinski --- .../ethernet/netronome/nfp/flower/conntrack.c | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index 2967bab72505..726d8cdf0b9c 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -1864,10 +1864,30 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, { struct flow_rule *rule = flow_cls_offload_flow_rule(flow); struct nfp_fl_ct_flow_entry *ct_entry; + struct flow_action_entry *ct_goto; struct nfp_fl_ct_zone_entry *zt; + struct flow_action_entry *act; bool wildcarded = false; struct flow_match_ct ct; - struct flow_action_entry *ct_goto; + int i; + + flow_action_for_each(i, act, &rule->action) { + switch (act->id) { + case FLOW_ACTION_REDIRECT: + case FLOW_ACTION_REDIRECT_INGRESS: + case FLOW_ACTION_MIRRED: + case FLOW_ACTION_MIRRED_INGRESS: + if (act->dev->rtnl_link_ops && + !strcmp(act->dev->rtnl_link_ops->kind, "openvswitch")) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: out port is openvswitch internal port"); + return -EOPNOTSUPP; + } + break; + default: + break; + } + } flow_rule_match_ct(rule, &ct); if (!ct.mask->ct_zone) { From 3a007b8009b5f8af021021b7a590a6da0dc4c6e0 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Wed, 24 Jan 2024 17:19:09 +0200 Subject: [PATCH 402/768] nfp: flower: fix hardware offload for the transfer layer port The nfp driver will merge the tp source port and tp destination port into one dword which the offset must be zero to do hardware offload. However, the mangle action for the tp source port and tp destination port is separated for tc ct action. Modify the mangle action for the FLOW_ACT_MANGLE_HDR_TYPE_TCP and FLOW_ACT_MANGLE_HDR_TYPE_UDP to satisfy the nfp driver offload check for the tp port. The mangle action provides a 4B value for source, and a 4B value for the destination, but only 2B of each contains the useful information. For offload the 2B of each is combined into a single 4B word. Since the incoming mask for the source is '0xFFFF' the shift-left will throw away the 0xFFFF part. When this gets combined together in the offload it will clear the destination field. Fix this by setting the lower bits back to 0xFFFF, effectively doing a rotate-left operation on the mask. Fixes: 5cee92c6f57a ("nfp: flower: support hw offload for ct nat action") CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Hui Zhou Signed-off-by: Louis Peens Link: https://lore.kernel.org/r/20240124151909.31603-3-louis.peens@corigine.com Signed-off-by: Jakub Kicinski --- .../ethernet/netronome/nfp/flower/conntrack.c | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index 726d8cdf0b9c..15180538b80a 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -1424,10 +1424,30 @@ static void nfp_nft_ct_translate_mangle_action(struct flow_action_entry *mangle_ mangle_action->mangle.mask = (__force u32)cpu_to_be32(mangle_action->mangle.mask); return; + /* Both struct tcphdr and struct udphdr start with + * __be16 source; + * __be16 dest; + * so we can use the same code for both. + */ case FLOW_ACT_MANGLE_HDR_TYPE_TCP: case FLOW_ACT_MANGLE_HDR_TYPE_UDP: - mangle_action->mangle.val = (__force u16)cpu_to_be16(mangle_action->mangle.val); - mangle_action->mangle.mask = (__force u16)cpu_to_be16(mangle_action->mangle.mask); + if (mangle_action->mangle.offset == offsetof(struct tcphdr, source)) { + mangle_action->mangle.val = + (__force u32)cpu_to_be32(mangle_action->mangle.val << 16); + /* The mask of mangle action is inverse mask, + * so clear the dest tp port with 0xFFFF to + * instead of rotate-left operation. + */ + mangle_action->mangle.mask = + (__force u32)cpu_to_be32(mangle_action->mangle.mask << 16 | 0xFFFF); + } + if (mangle_action->mangle.offset == offsetof(struct tcphdr, dest)) { + mangle_action->mangle.offset = 0; + mangle_action->mangle.val = + (__force u32)cpu_to_be32(mangle_action->mangle.val); + mangle_action->mangle.mask = + (__force u32)cpu_to_be32(mangle_action->mangle.mask); + } return; default: From cae1f1c36661f28c92a1db9113961a9ebd61dbaa Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 24 Jan 2024 16:22:09 +0000 Subject: [PATCH 403/768] net: ethernet: mtk_eth_soc: set DMA coherent mask to get PPE working Set DMA coherent mask to 32-bit which makes PPE offloading engine start working on BPi-R4 which got 4 GiB of RAM. Fixes: 2d75891ebc09 ("net: ethernet: mtk_eth_soc: support 36-bit DMA addressing on MT7988") Suggested-by: Elad Yifee Signed-off-by: Daniel Golle Link: https://lore.kernel.org/r/97e90925368b405f0974b9b15f1b7377c4a329ad.1706113251.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index a6e91573f8da..de123350bd46 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -4761,7 +4761,10 @@ static int mtk_probe(struct platform_device *pdev) } if (MTK_HAS_CAPS(eth->soc->caps, MTK_36BIT_DMA)) { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(36)); + err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(36)); + if (!err) + err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); + if (err) { dev_err(&pdev->dev, "Wrong DMA config\n"); return -EINVAL; From ff63cc2e95065bea978d2db01f7e7356cca3d021 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 24 Jan 2024 05:18:23 +0000 Subject: [PATCH 404/768] net: phy: mediatek-ge-soc: sync driver with MediaTek SDK Sync initialization and calibration routines with MediaTek's reference driver. Improves compliance and resolves link stability issues with CH340 IoT devices connected to MT798x built-in PHYs. Fixes: 98c485eaf509 ("net: phy: add driver for MediaTek SoC built-in GE PHYs") Signed-off-by: Daniel Golle Link: https://lore.kernel.org/r/f2195279c234c0f618946424b8236026126bc595.1706071311.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/phy/mediatek-ge-soc.c | 147 ++++++++++++++++-------------- 1 file changed, 81 insertions(+), 66 deletions(-) diff --git a/drivers/net/phy/mediatek-ge-soc.c b/drivers/net/phy/mediatek-ge-soc.c index 8a20d9889f10..0f3a1538a8b8 100644 --- a/drivers/net/phy/mediatek-ge-soc.c +++ b/drivers/net/phy/mediatek-ge-soc.c @@ -489,7 +489,7 @@ static int tx_r50_fill_result(struct phy_device *phydev, u16 tx_r50_cal_val, u16 reg, val; if (phydev->drv->phy_id == MTK_GPHY_ID_MT7988) - bias = -2; + bias = -1; val = clamp_val(bias + tx_r50_cal_val, 0, 63); @@ -705,6 +705,11 @@ restore: static void mt798x_phy_common_finetune(struct phy_device *phydev) { phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); + /* SlvDSPreadyTime = 24, MasDSPreadyTime = 24 */ + __phy_write(phydev, 0x11, 0xc71); + __phy_write(phydev, 0x12, 0xc); + __phy_write(phydev, 0x10, 0x8fae); + /* EnabRandUpdTrig = 1 */ __phy_write(phydev, 0x11, 0x2f00); __phy_write(phydev, 0x12, 0xe); @@ -715,15 +720,56 @@ static void mt798x_phy_common_finetune(struct phy_device *phydev) __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x83aa); - /* TrFreeze = 0 */ + /* FfeUpdGainForce = 1(Enable), FfeUpdGainForceVal = 4 */ + __phy_write(phydev, 0x11, 0x240); + __phy_write(phydev, 0x12, 0x0); + __phy_write(phydev, 0x10, 0x9680); + + /* TrFreeze = 0 (mt7988 default) */ __phy_write(phydev, 0x11, 0x0); __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x9686); + /* SSTrKp100 = 5 */ + /* SSTrKf100 = 6 */ + /* SSTrKp1000Mas = 5 */ + /* SSTrKf1000Mas = 6 */ /* SSTrKp1000Slv = 5 */ + /* SSTrKf1000Slv = 6 */ __phy_write(phydev, 0x11, 0xbaef); __phy_write(phydev, 0x12, 0x2e); __phy_write(phydev, 0x10, 0x968c); + phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); +} + +static void mt7981_phy_finetune(struct phy_device *phydev) +{ + u16 val[8] = { 0x01ce, 0x01c1, + 0x020f, 0x0202, + 0x03d0, 0x03c0, + 0x0013, 0x0005 }; + int i, k; + + /* 100M eye finetune: + * Keep middle level of TX MLT3 shapper as default. + * Only change TX MLT3 overshoot level here. + */ + for (k = 0, i = 1; i < 12; i++) { + if (i % 3 == 0) + continue; + phy_write_mmd(phydev, MDIO_MMD_VEND1, i, val[k++]); + } + + phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); + /* ResetSyncOffset = 6 */ + __phy_write(phydev, 0x11, 0x600); + __phy_write(phydev, 0x12, 0x0); + __phy_write(phydev, 0x10, 0x8fc0); + + /* VgaDecRate = 1 */ + __phy_write(phydev, 0x11, 0x4c2a); + __phy_write(phydev, 0x12, 0x3e); + __phy_write(phydev, 0x10, 0x8fa4); /* MrvlTrFix100Kp = 3, MrvlTrFix100Kf = 2, * MrvlTrFix1000Kp = 3, MrvlTrFix1000Kf = 2 @@ -738,7 +784,7 @@ static void mt798x_phy_common_finetune(struct phy_device *phydev) __phy_write(phydev, 0x10, 0x8ec0); phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); - /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 9*/ + /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 9 */ phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG234, MTK_PHY_TR_OPEN_LOOP_EN_MASK | MTK_PHY_LPF_X_AVERAGE_MASK, BIT(0) | FIELD_PREP(MTK_PHY_LPF_X_AVERAGE_MASK, 0x9)); @@ -771,48 +817,6 @@ static void mt798x_phy_common_finetune(struct phy_device *phydev) phy_write_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_LDO_OUTPUT_V, 0x2222); } -static void mt7981_phy_finetune(struct phy_device *phydev) -{ - u16 val[8] = { 0x01ce, 0x01c1, - 0x020f, 0x0202, - 0x03d0, 0x03c0, - 0x0013, 0x0005 }; - int i, k; - - /* 100M eye finetune: - * Keep middle level of TX MLT3 shapper as default. - * Only change TX MLT3 overshoot level here. - */ - for (k = 0, i = 1; i < 12; i++) { - if (i % 3 == 0) - continue; - phy_write_mmd(phydev, MDIO_MMD_VEND1, i, val[k++]); - } - - phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); - /* SlvDSPreadyTime = 24, MasDSPreadyTime = 24 */ - __phy_write(phydev, 0x11, 0xc71); - __phy_write(phydev, 0x12, 0xc); - __phy_write(phydev, 0x10, 0x8fae); - - /* ResetSyncOffset = 6 */ - __phy_write(phydev, 0x11, 0x600); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x8fc0); - - /* VgaDecRate = 1 */ - __phy_write(phydev, 0x11, 0x4c2a); - __phy_write(phydev, 0x12, 0x3e); - __phy_write(phydev, 0x10, 0x8fa4); - - /* FfeUpdGainForce = 4 */ - __phy_write(phydev, 0x11, 0x240); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x9680); - - phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); -} - static void mt7988_phy_finetune(struct phy_device *phydev) { u16 val[12] = { 0x0187, 0x01cd, 0x01c8, 0x0182, @@ -827,17 +831,7 @@ static void mt7988_phy_finetune(struct phy_device *phydev) /* TCT finetune */ phy_write_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_TX_FILTER, 0x5); - /* Disable TX power saving */ - phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RXADC_CTRL_RG7, - MTK_PHY_DA_AD_BUF_BIAS_LP_MASK, 0x3 << 8); - phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); - - /* SlvDSPreadyTime = 24, MasDSPreadyTime = 12 */ - __phy_write(phydev, 0x11, 0x671); - __phy_write(phydev, 0x12, 0xc); - __phy_write(phydev, 0x10, 0x8fae); - /* ResetSyncOffset = 5 */ __phy_write(phydev, 0x11, 0x500); __phy_write(phydev, 0x12, 0x0); @@ -845,13 +839,27 @@ static void mt7988_phy_finetune(struct phy_device *phydev) /* VgaDecRate is 1 at default on mt7988 */ + /* MrvlTrFix100Kp = 6, MrvlTrFix100Kf = 7, + * MrvlTrFix1000Kp = 6, MrvlTrFix1000Kf = 7 + */ + __phy_write(phydev, 0x11, 0xb90a); + __phy_write(phydev, 0x12, 0x6f); + __phy_write(phydev, 0x10, 0x8f82); + + /* RemAckCntLimitCtrl = 1 */ + __phy_write(phydev, 0x11, 0xfbba); + __phy_write(phydev, 0x12, 0xc3); + __phy_write(phydev, 0x10, 0x87f8); + phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); - phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_2A30); - /* TxClkOffset = 2 */ - __phy_modify(phydev, MTK_PHY_ANARG_RG, MTK_PHY_TCLKOFFSET_MASK, - FIELD_PREP(MTK_PHY_TCLKOFFSET_MASK, 0x2)); - phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); + /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 10 */ + phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG234, + MTK_PHY_TR_OPEN_LOOP_EN_MASK | MTK_PHY_LPF_X_AVERAGE_MASK, + BIT(0) | FIELD_PREP(MTK_PHY_LPF_X_AVERAGE_MASK, 0xa)); + + /* rg_tr_lpf_cnt_val = 1023 */ + phy_write_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_LPF_CNT_VAL, 0x3ff); } static void mt798x_phy_eee(struct phy_device *phydev) @@ -884,11 +892,11 @@ static void mt798x_phy_eee(struct phy_device *phydev) MTK_PHY_LPI_SLV_SEND_TX_EN, FIELD_PREP(MTK_PHY_LPI_SLV_SEND_TX_TIMER_MASK, 0x120)); - phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG239, - MTK_PHY_LPI_SEND_LOC_TIMER_MASK | - MTK_PHY_LPI_TXPCS_LOC_RCV, - FIELD_PREP(MTK_PHY_LPI_SEND_LOC_TIMER_MASK, 0x117)); + /* Keep MTK_PHY_LPI_SEND_LOC_TIMER as 375 */ + phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG239, + MTK_PHY_LPI_TXPCS_LOC_RCV); + /* This also fixes some IoT issues, such as CH340 */ phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG2C7, MTK_PHY_MAX_GAIN_MASK | MTK_PHY_MIN_GAIN_MASK, FIELD_PREP(MTK_PHY_MAX_GAIN_MASK, 0x8) | @@ -922,7 +930,7 @@ static void mt798x_phy_eee(struct phy_device *phydev) __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x9690); - /* REG_EEE_st2TrKf1000 = 3 */ + /* REG_EEE_st2TrKf1000 = 2 */ __phy_write(phydev, 0x11, 0x114f); __phy_write(phydev, 0x12, 0x2); __phy_write(phydev, 0x10, 0x969a); @@ -947,7 +955,7 @@ static void mt798x_phy_eee(struct phy_device *phydev) __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x96b8); - /* REGEEE_wake_slv_tr_wait_dfesigdet_en = 1 */ + /* REGEEE_wake_slv_tr_wait_dfesigdet_en = 0 */ __phy_write(phydev, 0x11, 0x1463); __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x96ca); @@ -1459,6 +1467,13 @@ static int mt7988_phy_probe(struct phy_device *phydev) if (err) return err; + /* Disable TX power saving at probing to: + * 1. Meet common mode compliance test criteria + * 2. Make sure that TX-VCM calibration works fine + */ + phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RXADC_CTRL_RG7, + MTK_PHY_DA_AD_BUF_BIAS_LP_MASK, 0x3 << 8); + return mt798x_phy_calibration(phydev); } From 66dbd9004a55073c5931f5f65f5fe2bbd414bdaa Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Jan 2024 13:08:11 -0800 Subject: [PATCH 405/768] drm/sched: Drain all entities in DRM sched run job worker All entities must be drained in the DRM scheduler run job worker to avoid the following case. An entity found that is ready, no job found ready on entity, and run job worker goes idle with other entities + jobs ready. Draining all ready entities (i.e. loop over all ready entities) in the run job worker ensures all job that are ready will be scheduled. Cc: Thorsten Leemhuis Reported-by: Mikhail Gavrilov Closes: https://lore.kernel.org/all/CABXGCsM2VLs489CH-vF-1539-s3in37=bwuOWtoeeE+q26zE+Q@mail.gmail.com/ Reported-and-tested-by: Mario Limonciello Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3124 Link: https://lore.kernel.org/all/20240123021155.2775-1-mario.limonciello@amd.com/ Reported-and-tested-by: Vlastimil Babka Closes: https://lore.kernel.org/dri-devel/05ddb2da-b182-4791-8ef7-82179fd159a8@amd.com/T/#m0c31d4d1b9ae9995bb880974c4f1dbaddc33a48a Signed-off-by: Matthew Brost Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20240124210811.1639040-1-matthew.brost@intel.com --- drivers/gpu/drm/scheduler/sched_main.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 550492a7a031..85f082396d42 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -1178,21 +1178,20 @@ static void drm_sched_run_job_work(struct work_struct *w) struct drm_sched_entity *entity; struct dma_fence *fence; struct drm_sched_fence *s_fence; - struct drm_sched_job *sched_job; + struct drm_sched_job *sched_job = NULL; int r; if (READ_ONCE(sched->pause_submit)) return; - entity = drm_sched_select_entity(sched); - if (!entity) - return; - - sched_job = drm_sched_entity_pop_job(entity); - if (!sched_job) { - complete_all(&entity->entity_idle); - return; /* No more work */ + /* Find entity with a ready job */ + while (!sched_job && (entity = drm_sched_select_entity(sched))) { + sched_job = drm_sched_entity_pop_job(entity); + if (!sched_job) + complete_all(&entity->entity_idle); } + if (!entity) + return; /* No more work */ s_fence = sched_job->s_fence; From ab4443fe3ca6298663a55c4a70efc6c3ce913ca6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 4 Jan 2024 09:58:39 +0100 Subject: [PATCH 406/768] readahead: avoid multiple marked readahead pages ra_alloc_folio() marks a page that should trigger next round of async readahead. However it rounds up computed index to the order of page being allocated. This can however lead to multiple consecutive pages being marked with readahead flag. Consider situation with index == 1, mark == 1, order == 0. We insert order 0 page at index 1 and mark it. Then we bump order to 1, index to 2, mark (still == 1) is rounded up to 2 so page at index 2 is marked as well. Then we bump order to 2, index is incremented to 4, mark gets rounded to 4 so page at index 4 is marked as well. The fact that multiple pages get marked within a single readahead window confuses the readahead logic and results in readahead window being trimmed back to 1. This situation is triggered in particular when maximum readahead window size is not a power of two (in the observed case it was 768 KB) and as a result sequential read throughput suffers. Fix the problem by rounding 'mark' down instead of up. Because the index is naturally aligned to 'order', we are guaranteed 'rounded mark' == index iff 'mark' is within the page we are allocating at 'index' and thus exactly one page is marked with readahead flag as required by the readahead code and sequential read performance is restored. This effectively reverts part of commit b9ff43dd2743 ("mm/readahead: Fix readahead with large folios"). The commit changed the rounding with the rationale: "... we were setting the readahead flag on the folio which contains the last byte read from the block. This is wrong because we will trigger readahead at the end of the read without waiting to see if a subsequent read is going to use the pages we just read." Although this is true, the fact is this was always the case with read sizes not aligned to folio boundaries and large folios in the page cache just make the situation more obvious (and frequent). Also for sequential read workloads it is better to trigger the readahead earlier rather than later. It is true that the difference in the rounding and thus earlier triggering of the readahead can result in reading more for semi-random workloads. However workloads really suffering from this seem to be rare. In particular I have verified that the workload described in commit b9ff43dd2743 ("mm/readahead: Fix readahead with large folios") of reading random 100k blocks from a file like: [reader] bs=100k rw=randread numjobs=1 size=64g runtime=60s is not impacted by the rounding change and achieves ~70MB/s in both cases. [jack@suse.cz: fix one more place where mark rounding was done as well] Link: https://lkml.kernel.org/r/20240123153254.5206-1-jack@suse.cz Link: https://lkml.kernel.org/r/20240104085839.21029-1-jack@suse.cz Fixes: b9ff43dd2743 ("mm/readahead: Fix readahead with large folios") Signed-off-by: Jan Kara Cc: Matthew Wilcox Cc: Guo Xuenan Cc: Signed-off-by: Andrew Morton --- mm/readahead.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/readahead.c b/mm/readahead.c index 23620c57c122..2648ec4f0494 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -469,7 +469,7 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index, if (!folio) return -ENOMEM; - mark = round_up(mark, 1UL << order); + mark = round_down(mark, 1UL << order); if (index == mark) folio_set_readahead(folio); err = filemap_add_folio(ractl->mapping, folio, index, gfp); @@ -575,7 +575,7 @@ static void ondemand_readahead(struct readahead_control *ractl, * It's the expected callback index, assume sequential access. * Ramp up sizes, and push forward the readahead window. */ - expected = round_up(ra->start + ra->size - ra->async_size, + expected = round_down(ra->start + ra->size - ra->async_size, 1UL << order); if (index == expected || index == (ra->start + ra->size)) { ra->start += ra->size; From 19d3e221807772f8443e565234a6fdc5a2b09d26 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Fri, 12 Jan 2024 10:08:40 -0800 Subject: [PATCH 407/768] fs/hugetlbfs/inode.c: mm/memory-failure.c: fix hugetlbfs hwpoison handling has_extra_refcount() makes the assumption that the page cache adds a ref count of 1 and subtracts this in the extra_pins case. Commit a08c7193e4f1 (mm/filemap: remove hugetlb special casing in filemap.c) modifies __filemap_add_folio() by calling folio_ref_add(folio, nr); for all cases (including hugtetlb) where nr is the number of pages in the folio. We should adjust the number of references coming from the page cache by subtracing the number of pages rather than 1. In hugetlbfs_read_iter(), folio_test_has_hwpoisoned() is testing the wrong flag as, in the hugetlb case, memory-failure code calls folio_test_set_hwpoison() to indicate poison. folio_test_hwpoison() is the correct function to test for that flag. After these fixes, the hugetlb hwpoison read selftest passes all cases. Link: https://lkml.kernel.org/r/20240112180840.367006-1-sidhartha.kumar@oracle.com Fixes: a08c7193e4f1 ("mm/filemap: remove hugetlb special casing in filemap.c") Signed-off-by: Sidhartha Kumar Closes: https://lore.kernel.org/linux-mm/20230713001833.3778937-1-jiaqiyan@google.com/T/#m8e1469119e5b831bbd05d495f96b842e4a1c5519 Reported-by: Muhammad Usama Anjum Tested-by: Muhammad Usama Anjum Acked-by: Miaohe Lin Acked-by: Muchun Song Cc: James Houghton Cc: Jiaqi Yan Cc: Matthew Wilcox (Oracle) Cc: Naoya Horiguchi Cc: [6.7+] Signed-off-by: Andrew Morton --- fs/hugetlbfs/inode.c | 2 +- mm/memory-failure.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index ea5b8e57d904..671664fed307 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -340,7 +340,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) } else { folio_unlock(folio); - if (!folio_test_has_hwpoisoned(folio)) + if (!folio_test_hwpoison(folio)) want = nr; else { /* diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 4f9b61f4a668..636280d04008 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -982,7 +982,7 @@ static bool has_extra_refcount(struct page_state *ps, struct page *p, int count = page_count(p) - 1; if (extra_pins) - count -= 1; + count -= folio_nr_pages(page_folio(p)); if (count > 0) { pr_err("%#lx: %s still referenced by %d users\n", From f8ee4361b7be3f0c5bd21ee47561473ddf3aa17b Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Fri, 12 Jan 2024 12:18:50 +0500 Subject: [PATCH 408/768] selftests/mm: mremap_test: fix build warning Use 2 separate variables of types int and unsigned long long instead of confusing them. This corrects the correct print format for each of them and removes the build warning: warning: format `%d' expects argument of type `int', but argument 2 has type `long long unsigned int' Link: https://lkml.kernel.org/r/20240112071851.612930-1-usama.anjum@collabora.com Fixes: a4cb3b243343 ("selftests: mm: add a test for remapping to area immediately after existing mapping") Signed-off-by: Muhammad Usama Anjum Cc: Joel Fernandes (Google) Cc: Lorenzo Stoakes Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/mremap_test.c | 27 ++++++++++++------------ 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index 1d4c1589c305..2f8b991f78cb 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -360,7 +360,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb, char pattern_seed) { void *addr, *src_addr, *dest_addr, *dest_preamble_addr; - unsigned long long i; + int d; + unsigned long long t; struct timespec t_start = {0, 0}, t_end = {0, 0}; long long start_ns, end_ns, align_mask, ret, offset; unsigned long long threshold; @@ -378,8 +379,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb, /* Set byte pattern for source block. */ srand(pattern_seed); - for (i = 0; i < threshold; i++) - memset((char *) src_addr + i, (char) rand(), 1); + for (t = 0; t < threshold; t++) + memset((char *) src_addr + t, (char) rand(), 1); /* Mask to zero out lower bits of address for alignment */ align_mask = ~(c.dest_alignment - 1); @@ -420,8 +421,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb, /* Set byte pattern for the dest preamble block. */ srand(pattern_seed); - for (i = 0; i < c.dest_preamble_size; i++) - memset((char *) dest_preamble_addr + i, (char) rand(), 1); + for (d = 0; d < c.dest_preamble_size; d++) + memset((char *) dest_preamble_addr + d, (char) rand(), 1); } clock_gettime(CLOCK_MONOTONIC, &t_start); @@ -437,14 +438,14 @@ static long long remap_region(struct config c, unsigned int threshold_mb, /* Verify byte pattern after remapping */ srand(pattern_seed); - for (i = 0; i < threshold; i++) { + for (t = 0; t < threshold; t++) { char c = (char) rand(); - if (((char *) dest_addr)[i] != c) { + if (((char *) dest_addr)[t] != c) { ksft_print_msg("Data after remap doesn't match at offset %llu\n", - i); + t); ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, - ((char *) dest_addr)[i] & 0xff); + ((char *) dest_addr)[t] & 0xff); ret = -1; goto clean_up_dest; } @@ -453,14 +454,14 @@ static long long remap_region(struct config c, unsigned int threshold_mb, /* Verify the dest preamble byte pattern after remapping */ if (c.dest_preamble_size) { srand(pattern_seed); - for (i = 0; i < c.dest_preamble_size; i++) { + for (d = 0; d < c.dest_preamble_size; d++) { char c = (char) rand(); - if (((char *) dest_preamble_addr)[i] != c) { + if (((char *) dest_preamble_addr)[d] != c) { ksft_print_msg("Preamble data after remap doesn't match at offset %d\n", - i); + d); ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, - ((char *) dest_preamble_addr)[i] & 0xff); + ((char *) dest_preamble_addr)[d] & 0xff); ret = -1; goto clean_up_dest; } From 4dca82d14174fe53f656a6bc32398db1bdd8f481 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 15 Jan 2024 11:07:31 +0100 Subject: [PATCH 409/768] uprobes: use pagesize-aligned virtual address when replacing pages uprobes passes an unaligned page mapping address to folio_add_new_anon_rmap(), which ends up triggering a VM_BUG_ON() we recently extended in commit 372cbd4d5a066 ("mm: non-pmd-mappable, large folios for folio_add_new_anon_rmap()"). Arguably, this is uprobes code doing something wrong; however, for the time being it would have likely worked in rmap code because __folio_set_anon() would set folio->index to the same value. Looking at __replace_page(), we'd also pass slightly wrong values to mmu_notifier_range_init(), page_vma_mapped_walk(), flush_cache_page(), ptep_clear_flush() and set_pte_at_notify(). I suspect most of them are fine, but let's just mark the introducing commit as the one needed fixing. I don't think CC stable is warranted. We'll add more sanity checks in rmap code separately, to make sure that we always get properly aligned addresses. Link: https://lkml.kernel.org/r/20240115100731.91007-1-david@redhat.com Fixes: c517ee744b96 ("uprobes: __replace_page() should not use page_address_in_vma()") Signed-off-by: David Hildenbrand Reported-by: Jiri Olsa Closes: https://lkml.kernel.org/r/ZaMR2EWN-HvlCfUl@krava Tested-by: Jiri Olsa Reviewed-by: Ryan Roberts Acked-by: Oleg Nesterov Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Ian Rogers Cc: Adrian Hunter Signed-off-by: Andrew Morton --- kernel/events/uprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 485bb0389b48..929e98c62965 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -537,7 +537,7 @@ retry: } } - ret = __replace_page(vma, vaddr, old_page, new_page); + ret = __replace_page(vma, vaddr & PAGE_MASK, old_page, new_page); if (new_page) put_page(new_page); put_old: From c4608d1bf7c6536d1a3d233eb21e50678681564e Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Wed, 20 Dec 2023 22:59:43 -0800 Subject: [PATCH 410/768] mm: mmap: map MAP_STACK to VM_NOHUGEPAGE commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") incured regression for stress-ng pthread benchmark [1]. It is because THP get allocated to pthread's stack area much more possible than before. Pthread's stack area is allocated by mmap without VM_GROWSDOWN or VM_GROWSUP flag, so kernel can't tell whether it is a stack area or not. The MAP_STACK flag is used to mark the stack area, but it is a no-op on Linux. Mapping MAP_STACK to VM_NOHUGEPAGE to prevent from allocating THP for such stack area. With this change the stack area looks like: fffd18e10000-fffd19610000 rw-p 00000000 00:00 0 Size: 8192 kB KernelPageSize: 4 kB MMUPageSize: 4 kB Rss: 12 kB Pss: 12 kB Pss_Dirty: 12 kB Shared_Clean: 0 kB Shared_Dirty: 0 kB Private_Clean: 0 kB Private_Dirty: 12 kB Referenced: 12 kB Anonymous: 12 kB KSM: 0 kB LazyFree: 0 kB AnonHugePages: 0 kB ShmemPmdMapped: 0 kB FilePmdMapped: 0 kB Shared_Hugetlb: 0 kB Private_Hugetlb: 0 kB Swap: 0 kB SwapPss: 0 kB Locked: 0 kB THPeligible: 0 VmFlags: rd wr mr mw me ac nh The "nh" flag is set. [1] https://lore.kernel.org/linux-mm/202312192310.56367035-oliver.sang@intel.com/ Link: https://lkml.kernel.org/r/20231221065943.2803551-2-shy828301@gmail.com Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") Signed-off-by: Yang Shi Reported-by: kernel test robot Tested-by: Oliver Sang Reviewed-by: Yin Fengwei Cc: Rik van Riel Cc: Matthew Wilcox Cc: Christopher Lameter Cc: Huang, Ying Cc: Signed-off-by: Andrew Morton --- include/linux/mman.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/mman.h b/include/linux/mman.h index 40d94411d492..dc7048824be8 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -156,6 +156,7 @@ calc_vm_flag_bits(unsigned long flags) return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | + _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | arch_calc_vm_flag_bits(flags); } From 63fd327016fdfca6f2fa27eba3496bd079eb8ed3 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 11 Jan 2024 08:29:02 -0500 Subject: [PATCH 411/768] mm: memcontrol: don't throttle dying tasks on memory.high While investigating hosts with high cgroup memory pressures, Tejun found culprit zombie tasks that had were holding on to a lot of memory, had SIGKILL pending, but were stuck in memory.high reclaim. In the past, we used to always force-charge allocations from tasks that were exiting in order to accelerate them dying and freeing up their rss. This changed for memory.max in a4ebf1b6ca1e ("memcg: prohibit unconditional exceeding the limit of dying tasks"); it noted that this can cause (userspace inducable) containment failures, so it added a mandatory reclaim and OOM kill cycle before forcing charges. At the time, memory.high enforcement was handled in the userspace return path, which isn't reached by dying tasks, and so memory.high was still never enforced by dying tasks. When c9afe31ec443 ("memcg: synchronously enforce memory.high for large overcharges") added synchronous reclaim for memory.high, it added unconditional memory.high enforcement for dying tasks as well. The callstack shows that this path is where the zombie is stuck in. We need to accelerate dying tasks getting past memory.high, but we cannot do it quite the same way as we do for memory.max: memory.max is enforced strictly, and tasks aren't allowed to move past it without FIRST reclaiming and OOM killing if necessary. This ensures very small levels of excess. With memory.high, though, enforcement happens lazily after the charge, and OOM killing is never triggered. A lot of concurrent threads could have pushed, or could actively be pushing, the cgroup into excess. The dying task will enter reclaim on every allocation attempt, with little hope of restoring balance. To fix this, skip synchronous memory.high enforcement on dying tasks altogether again. Update memory.high path documentation while at it. [hannes@cmpxchg.org: also handle tasks are being killed during the reclaim] Link: https://lkml.kernel.org/r/20240111192807.GA424308@cmpxchg.org Link: https://lkml.kernel.org/r/20240111132902.389862-1-hannes@cmpxchg.org Fixes: c9afe31ec443 ("memcg: synchronously enforce memory.high for large overcharges") Signed-off-by: Johannes Weiner Reported-by: Tejun Heo Reviewed-by: Yosry Ahmed Acked-by: Shakeel Butt Acked-by: Roman Gushchin Cc: Dan Schatzberg Cc: Michal Hocko Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/memcontrol.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e4c8735e7c85..46d8d02114cf 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2623,8 +2623,9 @@ static unsigned long calculate_high_delay(struct mem_cgroup *memcg, } /* - * Scheduled by try_charge() to be executed from the userland return path - * and reclaims memory over the high limit. + * Reclaims memory over the high limit. Called directly from + * try_charge() (context permitting), as well as from the userland + * return path where reclaim is always able to block. */ void mem_cgroup_handle_over_high(gfp_t gfp_mask) { @@ -2643,6 +2644,17 @@ void mem_cgroup_handle_over_high(gfp_t gfp_mask) current->memcg_nr_pages_over_high = 0; retry_reclaim: + /* + * Bail if the task is already exiting. Unlike memory.max, + * memory.high enforcement isn't as strict, and there is no + * OOM killer involved, which means the excess could already + * be much bigger (and still growing) than it could for + * memory.max; the dying task could get stuck in fruitless + * reclaim for a long time, which isn't desirable. + */ + if (task_is_dying()) + goto out; + /* * The allocating task should reclaim at least the batch size, but for * subsequent retries we only want to do what's necessary to prevent oom @@ -2693,6 +2705,9 @@ retry_reclaim: } /* + * Reclaim didn't manage to push usage below the limit, slow + * this allocating task down. + * * If we exit early, we're guaranteed to die (since * schedule_timeout_killable sets TASK_KILLABLE). This means we don't * need to account for any ill-begotten jiffies to pay them off later. @@ -2887,11 +2902,17 @@ done_restock: } } while ((memcg = parent_mem_cgroup(memcg))); + /* + * Reclaim is set up above to be called from the userland + * return path. But also attempt synchronous reclaim to avoid + * excessive overrun while the task is still inside the + * kernel. If this is successful, the return path will see it + * when it rechecks the overage and simply bail out. + */ if (current->memcg_nr_pages_over_high > MEMCG_CHARGE_BATCH && !(current->flags & PF_MEMALLOC) && - gfpflags_allow_blocking(gfp_mask)) { + gfpflags_allow_blocking(gfp_mask)) mem_cgroup_handle_over_high(gfp_mask); - } return 0; } From 0d7e68c0271853c30655b05934d66cec81dc6afc Mon Sep 17 00:00:00 2001 From: Petr Vorel Date: Wed, 17 Jan 2024 13:22:57 +0100 Subject: [PATCH 412/768] MAINTAINERS: add man-pages git trees The maintainer uses both. Link: https://lkml.kernel.org/r/20240117122257.2707637-1-pvorel@suse.cz Signed-off-by: Petr Vorel Reviewed-by: Alejandro Colomar Signed-off-by: Andrew Morton --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a69..8c756737491e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12902,6 +12902,8 @@ M: Alejandro Colomar L: linux-man@vger.kernel.org S: Maintained W: http://www.kernel.org/doc/man-pages +T: git git://git.kernel.org/pub/scm/docs/man-pages/man-pages.git +T: git git://www.alejandro-colomar.es/src/alx/linux/man-pages/man-pages.git MANAGEMENT COMPONENT TRANSPORT PROTOCOL (MCTP) M: Jeremy Kerr From bc29036e1da1cf66e5f8312649aeec2d51ea3d86 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Tue, 16 Jan 2024 14:04:54 +0500 Subject: [PATCH 413/768] selftests/mm: switch to bash from sh Running charge_reserved_hugetlb.sh generates errors if sh is set to dash: ./charge_reserved_hugetlb.sh: 9: [[: not found ./charge_reserved_hugetlb.sh: 19: [[: not found ./charge_reserved_hugetlb.sh: 27: [[: not found ./charge_reserved_hugetlb.sh: 37: [[: not found ./charge_reserved_hugetlb.sh: 45: Syntax error: "(" unexpected Switch to using /bin/bash instead of /bin/sh. Make the switch for write_hugetlb_memory.sh as well which is called from charge_reserved_hugetlb.sh. Link: https://lkml.kernel.org/r/20240116090455.3407378-1-usama.anjum@collabora.com Signed-off-by: Muhammad Usama Anjum Cc: Muhammad Usama Anjum Cc: Shuah Khan Cc: David Laight Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/charge_reserved_hugetlb.sh | 2 +- tools/testing/selftests/mm/write_hugetlb_memory.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh index 0899019a7fcb..e14bdd4455f2 100755 --- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Kselftest framework requirement - SKIP code is 4. diff --git a/tools/testing/selftests/mm/write_hugetlb_memory.sh b/tools/testing/selftests/mm/write_hugetlb_memory.sh index 70a02301f4c2..3d2d2eb9d6ff 100755 --- a/tools/testing/selftests/mm/write_hugetlb_memory.sh +++ b/tools/testing/selftests/mm/write_hugetlb_memory.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 set -e From 9319b647902cbd5cc884ac08a8a6d54ce111fc78 Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Thu, 18 Jan 2024 10:19:53 -0800 Subject: [PATCH 414/768] mm/writeback: fix possible divide-by-zero in wb_dirty_limits(), again (struct dirty_throttle_control *)->thresh is an unsigned long, but is passed as the u32 divisor argument to div_u64(). On architectures where unsigned long is 64 bytes, the argument will be implicitly truncated. Use div64_u64() instead of div_u64() so that the value used in the "is this a safe division" check is the same as the divisor. Also, remove redundant cast of the numerator to u64, as that should happen implicitly. This would be difficult to exploit in memcg domain, given the ratio-based arithmetic domain_drity_limits() uses, but is much easier in global writeback domain with a BDI_CAP_STRICTLIMIT-backing device, using e.g. vm.dirty_bytes=(1<<32)*PAGE_SIZE so that dtc->thresh == (1<<32) Link: https://lkml.kernel.org/r/20240118181954.1415197-1-zokeefe@google.com Fixes: f6789593d5ce ("mm/page-writeback.c: fix divide by zero in bdi_dirty_limits()") Signed-off-by: Zach O'Keefe Cc: Maxim Patlasov Cc: Signed-off-by: Andrew Morton --- mm/page-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index cd4e4ae77c40..02147b61712b 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1638,7 +1638,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc) */ dtc->wb_thresh = __wb_calc_thresh(dtc); dtc->wb_bg_thresh = dtc->thresh ? - div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; + div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; /* * In order to avoid the stacked BDI deadlock we need From f6564fce256a3944aa1bc76cb3c40e792d97c1eb Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 18 Jan 2024 11:59:14 +0100 Subject: [PATCH 415/768] mm, kmsan: fix infinite recursion due to RCU critical section Alexander Potapenko writes in [1]: "For every memory access in the code instrumented by KMSAN we call kmsan_get_metadata() to obtain the metadata for the memory being accessed. For virtual memory the metadata pointers are stored in the corresponding `struct page`, therefore we need to call virt_to_page() to get them. According to the comment in arch/x86/include/asm/page.h, virt_to_page(kaddr) returns a valid pointer iff virt_addr_valid(kaddr) is true, so KMSAN needs to call virt_addr_valid() as well. To avoid recursion, kmsan_get_metadata() must not call instrumented code, therefore ./arch/x86/include/asm/kmsan.h forks parts of arch/x86/mm/physaddr.c to check whether a virtual address is valid or not. But the introduction of rcu_read_lock() to pfn_valid() added instrumented RCU API calls to virt_to_page_or_null(), which is called by kmsan_get_metadata(), so there is an infinite recursion now. I do not think it is correct to stop that recursion by doing kmsan_enter_runtime()/kmsan_exit_runtime() in kmsan_get_metadata(): that would prevent instrumented functions called from within the runtime from tracking the shadow values, which might introduce false positives." Fix the issue by switching pfn_valid() to the _sched() variant of rcu_read_lock/unlock(), which does not require calling into RCU. Given the critical section in pfn_valid() is very small, this is a reasonable trade-off (with preemptible RCU). KMSAN further needs to be careful to suppress calls into the scheduler, which would be another source of recursion. This can be done by wrapping the call to pfn_valid() into preempt_disable/enable_no_resched(). The downside is that this sacrifices breaking scheduling guarantees; however, a kernel compiled with KMSAN has already given up any performance guarantees due to being heavily instrumented. Note, KMSAN code already disables tracing via Makefile, and since mmzone.h is included, it is not necessary to use the notrace variant, which is generally preferred in all other cases. Link: https://lkml.kernel.org/r/20240115184430.2710652-1-glider@google.com [1] Link: https://lkml.kernel.org/r/20240118110022.2538350-1-elver@google.com Fixes: 5ec8e8ea8b77 ("mm/sparsemem: fix race in accessing memory_section->usage") Signed-off-by: Marco Elver Reported-by: Alexander Potapenko Reported-by: syzbot+93a9e8a3dea8d6085e12@syzkaller.appspotmail.com Reviewed-by: Alexander Potapenko Tested-by: Alexander Potapenko Cc: Charan Teja Kalla Cc: Borislav Petkov (AMD) Cc: Dave Hansen Cc: Dmitry Vyukov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/x86/include/asm/kmsan.h | 17 ++++++++++++++++- include/linux/mmzone.h | 6 +++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kmsan.h b/arch/x86/include/asm/kmsan.h index 8fa6ac0e2d76..d91b37f5b4bb 100644 --- a/arch/x86/include/asm/kmsan.h +++ b/arch/x86/include/asm/kmsan.h @@ -64,6 +64,7 @@ static inline bool kmsan_virt_addr_valid(void *addr) { unsigned long x = (unsigned long)addr; unsigned long y = x - __START_KERNEL_map; + bool ret; /* use the carry flag to determine if x was < __START_KERNEL_map */ if (unlikely(x > y)) { @@ -79,7 +80,21 @@ static inline bool kmsan_virt_addr_valid(void *addr) return false; } - return pfn_valid(x >> PAGE_SHIFT); + /* + * pfn_valid() relies on RCU, and may call into the scheduler on exiting + * the critical section. However, this would result in recursion with + * KMSAN. Therefore, disable preemption here, and re-enable preemption + * below while suppressing reschedules to avoid recursion. + * + * Note, this sacrifices occasionally breaking scheduling guarantees. + * Although, a kernel compiled with KMSAN has already given up on any + * performance guarantees due to being heavily instrumented. + */ + preempt_disable(); + ret = pfn_valid(x >> PAGE_SHIFT); + preempt_enable_no_resched(); + + return ret; } #endif /* !MODULE */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4ed33b127821..a497f189d988 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -2013,9 +2013,9 @@ static inline int pfn_valid(unsigned long pfn) if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; ms = __pfn_to_section(pfn); - rcu_read_lock(); + rcu_read_lock_sched(); if (!valid_section(ms)) { - rcu_read_unlock(); + rcu_read_unlock_sched(); return 0; } /* @@ -2023,7 +2023,7 @@ static inline int pfn_valid(unsigned long pfn) * the entire section-sized span. */ ret = early_section(ms) || pfn_section_valid(ms, pfn); - rcu_read_unlock(); + rcu_read_unlock_sched(); return ret; } From c2a292545cd43dcab86d5290ed95b006f1cefe90 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 18 Jan 2024 12:01:29 +0100 Subject: [PATCH 416/768] stackdepot: add stats counters exported via debugfs Add a few basic stats counters for stack depot that can be used to derive if stack depot is working as intended. This is a snapshot of the new stats after booting a system with a KASAN-enabled kernel: $ cat /sys/kernel/debug/stackdepot/stats pools: 838 allocations: 29861 frees: 6561 in_use: 23300 freelist_size: 1840 Generally, "pools" should be well below the max; once the system is booted, "in_use" should remain relatively steady. Link: https://lkml.kernel.org/r/20240118110216.2539519-1-elver@google.com Signed-off-by: Marco Elver Reviewed-by: Andrey Konovalov Cc: Andi Kleen Cc: Dmitry Vyukov Cc: Vlastimil Babka Cc: Alexander Potapenko Signed-off-by: Andrew Morton --- lib/stackdepot.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index a0be5d05c7f0..80a8ca24ccc8 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -14,6 +14,7 @@ #define pr_fmt(fmt) "stackdepot: " fmt +#include #include #include #include @@ -115,6 +116,23 @@ static bool new_pool_required = true; /* Lock that protects the variables above. */ static DEFINE_RWLOCK(pool_rwlock); +/* Statistics counters for debugfs. */ +enum depot_counter_id { + DEPOT_COUNTER_ALLOCS, + DEPOT_COUNTER_FREES, + DEPOT_COUNTER_INUSE, + DEPOT_COUNTER_FREELIST_SIZE, + DEPOT_COUNTER_COUNT, +}; +static long counters[DEPOT_COUNTER_COUNT]; +static const char *const counter_names[] = { + [DEPOT_COUNTER_ALLOCS] = "allocations", + [DEPOT_COUNTER_FREES] = "frees", + [DEPOT_COUNTER_INUSE] = "in_use", + [DEPOT_COUNTER_FREELIST_SIZE] = "freelist_size", +}; +static_assert(ARRAY_SIZE(counter_names) == DEPOT_COUNTER_COUNT); + static int __init disable_stack_depot(char *str) { return kstrtobool(str, &stack_depot_disabled); @@ -277,6 +295,7 @@ static void depot_init_pool(void *pool) stack->handle.extra = 0; list_add(&stack->list, &free_stacks); + counters[DEPOT_COUNTER_FREELIST_SIZE]++; } /* Save reference to the pool to be used by depot_fetch_stack(). */ @@ -376,6 +395,7 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) /* Get and unlink the first entry from the freelist. */ stack = list_first_entry(&free_stacks, struct stack_record, list); list_del(&stack->list); + counters[DEPOT_COUNTER_FREELIST_SIZE]--; /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ if (size > CONFIG_STACKDEPOT_MAX_FRAMES) @@ -394,6 +414,8 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) */ kmsan_unpoison_memory(stack, DEPOT_STACK_RECORD_SIZE); + counters[DEPOT_COUNTER_ALLOCS]++; + counters[DEPOT_COUNTER_INUSE]++; return stack; } @@ -426,6 +448,10 @@ static void depot_free_stack(struct stack_record *stack) lockdep_assert_held_write(&pool_rwlock); list_add(&stack->list, &free_stacks); + + counters[DEPOT_COUNTER_FREELIST_SIZE]++; + counters[DEPOT_COUNTER_FREES]++; + counters[DEPOT_COUNTER_INUSE]--; } /* Calculates the hash for a stack. */ @@ -690,3 +716,30 @@ unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle) return parts.extra; } EXPORT_SYMBOL(stack_depot_get_extra_bits); + +static int stats_show(struct seq_file *seq, void *v) +{ + /* + * data race ok: These are just statistics counters, and approximate + * statistics are ok for debugging. + */ + seq_printf(seq, "pools: %d\n", data_race(pools_num)); + for (int i = 0; i < DEPOT_COUNTER_COUNT; i++) + seq_printf(seq, "%s: %ld\n", counter_names[i], data_race(counters[i])); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(stats); + +static int depot_debugfs_init(void) +{ + struct dentry *dir; + + if (stack_depot_disabled) + return 0; + + dir = debugfs_create_dir("stackdepot", NULL); + debugfs_create_file("stats", 0444, dir, NULL, &stats_fops); + return 0; +} +late_initcall(depot_debugfs_init); From 4434a56ec20925333d6cf4d4093641d063abd35b Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 18 Jan 2024 12:01:30 +0100 Subject: [PATCH 417/768] stackdepot: make fast paths lock-less again With the introduction of the pool_rwlock (reader-writer lock), several fast paths end up taking the pool_rwlock as readers. Furthermore, stack_depot_put() unconditionally takes the pool_rwlock as a writer. Despite allowing readers to make forward-progress concurrently, reader-writer locks have inherent cache contention issues, which does not scale well on systems with large CPU counts. Rework the synchronization story of stack depot to again avoid taking any locks in the fast paths. This is done by relying on RCU-protected list traversal, and the NMI-safe subset of RCU to delay reuse of freed stack records. See code comments for more details. Along with the performance issues, this also fixes incorrect nesting of rwlock within a raw_spinlock, given that stack depot should still be usable from anywhere: | [ BUG: Invalid wait context ] | ----------------------------- | swapper/0/1 is trying to lock: | ffffffff89869be8 (pool_rwlock){..--}-{3:3}, at: stack_depot_save_flags | other info that might help us debug this: | context-{5:5} | 2 locks held by swapper/0/1: | #0: ffffffff89632440 (rcu_read_lock){....}-{1:3}, at: __queue_work | #1: ffff888100092018 (&pool->lock){-.-.}-{2:2}, at: __queue_work <-- raw_spin_lock Stack depot usage stats are similar to the previous version after a KASAN kernel boot: $ cat /sys/kernel/debug/stackdepot/stats pools: 838 allocations: 29865 frees: 6604 in_use: 23261 freelist_size: 1879 The number of pools is the same as previously. The freelist size is minimally larger, but this may also be due to variance across system boots. This shows that even though we do not eagerly wait for the next RCU grace period (such as with synchronize_rcu() or call_rcu()) after freeing a stack record - requiring depot_pop_free() to "poll" if an entry may be used - new allocations are very likely to happen in later RCU grace periods. Link: https://lkml.kernel.org/r/20240118110216.2539519-2-elver@google.com Fixes: 108be8def46e ("lib/stackdepot: allow users to evict stack traces") Reported-by: Andi Kleen Signed-off-by: Marco Elver Reviewed-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 322 +++++++++++++++++++++++++++++++---------------- 1 file changed, 211 insertions(+), 111 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 80a8ca24ccc8..5caa1f566553 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -22,8 +22,9 @@ #include #include #include -#include #include +#include +#include #include #include #include @@ -68,12 +69,28 @@ union handle_parts { }; struct stack_record { - struct list_head list; /* Links in hash table or freelist */ + struct list_head hash_list; /* Links in the hash table */ u32 hash; /* Hash in hash table */ u32 size; /* Number of stored frames */ - union handle_parts handle; + union handle_parts handle; /* Constant after initialization */ refcount_t count; - unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */ + union { + unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */ + struct { + /* + * An important invariant of the implementation is to + * only place a stack record onto the freelist iff its + * refcount is zero. Because stack records with a zero + * refcount are never considered as valid, it is safe to + * union @entries and freelist management state below. + * Conversely, as soon as an entry is off the freelist + * and its refcount becomes non-zero, the below must not + * be accessed until being placed back on the freelist. + */ + struct list_head free_list; /* Links in the freelist */ + unsigned long rcu_state; /* RCU cookie */ + }; + }; }; #define DEPOT_STACK_RECORD_SIZE \ @@ -113,8 +130,8 @@ static LIST_HEAD(free_stacks); * yet allocated or if the limit on the number of pools is reached. */ static bool new_pool_required = true; -/* Lock that protects the variables above. */ -static DEFINE_RWLOCK(pool_rwlock); +/* The lock must be held when performing pool or freelist modifications. */ +static DEFINE_RAW_SPINLOCK(pool_lock); /* Statistics counters for debugfs. */ enum depot_counter_id { @@ -276,14 +293,15 @@ out_unlock: } EXPORT_SYMBOL_GPL(stack_depot_init); -/* Initializes a stack depol pool. */ +/* + * Initializes new stack depot @pool, release all its entries to the freelist, + * and update the list of pools. + */ static void depot_init_pool(void *pool) { int offset; - lockdep_assert_held_write(&pool_rwlock); - - WARN_ON(!list_empty(&free_stacks)); + lockdep_assert_held(&pool_lock); /* Initialize handles and link stack records into the freelist. */ for (offset = 0; offset <= DEPOT_POOL_SIZE - DEPOT_STACK_RECORD_SIZE; @@ -294,19 +312,36 @@ static void depot_init_pool(void *pool) stack->handle.offset = offset >> DEPOT_STACK_ALIGN; stack->handle.extra = 0; - list_add(&stack->list, &free_stacks); + /* + * Stack traces of size 0 are never saved, and we can simply use + * the size field as an indicator if this is a new unused stack + * record in the freelist. + */ + stack->size = 0; + + INIT_LIST_HEAD(&stack->hash_list); + /* + * Add to the freelist front to prioritize never-used entries: + * required in case there are entries in the freelist, but their + * RCU cookie still belongs to the current RCU grace period + * (there can still be concurrent readers). + */ + list_add(&stack->free_list, &free_stacks); counters[DEPOT_COUNTER_FREELIST_SIZE]++; } /* Save reference to the pool to be used by depot_fetch_stack(). */ stack_pools[pools_num] = pool; - pools_num++; + + /* Pairs with concurrent READ_ONCE() in depot_fetch_stack(). */ + WRITE_ONCE(pools_num, pools_num + 1); + ASSERT_EXCLUSIVE_WRITER(pools_num); } /* Keeps the preallocated memory to be used for a new stack depot pool. */ static void depot_keep_new_pool(void **prealloc) { - lockdep_assert_held_write(&pool_rwlock); + lockdep_assert_held(&pool_lock); /* * If a new pool is already saved or the maximum number of @@ -329,17 +364,16 @@ static void depot_keep_new_pool(void **prealloc) * number of pools is reached. In either case, take note that * keeping another pool is not required. */ - new_pool_required = false; + WRITE_ONCE(new_pool_required, false); } -/* Updates references to the current and the next stack depot pools. */ -static bool depot_update_pools(void **prealloc) +/* + * Try to initialize a new stack depot pool from either a previous or the + * current pre-allocation, and release all its entries to the freelist. + */ +static bool depot_try_init_pool(void **prealloc) { - lockdep_assert_held_write(&pool_rwlock); - - /* Check if we still have objects in the freelist. */ - if (!list_empty(&free_stacks)) - goto out_keep_prealloc; + lockdep_assert_held(&pool_lock); /* Check if we have a new pool saved and use it. */ if (new_pool) { @@ -348,10 +382,9 @@ static bool depot_update_pools(void **prealloc) /* Take note that we might need a new new_pool. */ if (pools_num < DEPOT_MAX_POOLS) - new_pool_required = true; + WRITE_ONCE(new_pool_required, true); - /* Try keeping the preallocated memory for new_pool. */ - goto out_keep_prealloc; + return true; } /* Bail out if we reached the pool limit. */ @@ -368,12 +401,32 @@ static bool depot_update_pools(void **prealloc) } return false; +} -out_keep_prealloc: - /* Keep the preallocated memory for a new pool if required. */ - if (*prealloc) - depot_keep_new_pool(prealloc); - return true; +/* Try to find next free usable entry. */ +static struct stack_record *depot_pop_free(void) +{ + struct stack_record *stack; + + lockdep_assert_held(&pool_lock); + + if (list_empty(&free_stacks)) + return NULL; + + /* + * We maintain the invariant that the elements in front are least + * recently used, and are therefore more likely to be associated with an + * RCU grace period in the past. Consequently it is sufficient to only + * check the first entry. + */ + stack = list_first_entry(&free_stacks, struct stack_record, free_list); + if (stack->size && !poll_state_synchronize_rcu(stack->rcu_state)) + return NULL; + + list_del(&stack->free_list); + counters[DEPOT_COUNTER_FREELIST_SIZE]--; + + return stack; } /* Allocates a new stack in a stack depot pool. */ @@ -382,20 +435,22 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) { struct stack_record *stack; - lockdep_assert_held_write(&pool_rwlock); + lockdep_assert_held(&pool_lock); - /* Update current and new pools if required and possible. */ - if (!depot_update_pools(prealloc)) + /* This should already be checked by public API entry points. */ + if (WARN_ON_ONCE(!size)) return NULL; /* Check if we have a stack record to save the stack trace. */ - if (list_empty(&free_stacks)) - return NULL; - - /* Get and unlink the first entry from the freelist. */ - stack = list_first_entry(&free_stacks, struct stack_record, list); - list_del(&stack->list); - counters[DEPOT_COUNTER_FREELIST_SIZE]--; + stack = depot_pop_free(); + if (!stack) { + /* No usable entries on the freelist - try to refill the freelist. */ + if (!depot_try_init_pool(prealloc)) + return NULL; + stack = depot_pop_free(); + if (WARN_ON(!stack)) + return NULL; + } /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ if (size > CONFIG_STACKDEPOT_MAX_FRAMES) @@ -421,37 +476,73 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) { + const int pools_num_cached = READ_ONCE(pools_num); union handle_parts parts = { .handle = handle }; void *pool; size_t offset = parts.offset << DEPOT_STACK_ALIGN; struct stack_record *stack; - lockdep_assert_held(&pool_rwlock); + lockdep_assert_not_held(&pool_lock); - if (parts.pool_index > pools_num) { + if (parts.pool_index > pools_num_cached) { WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n", - parts.pool_index, pools_num, handle); + parts.pool_index, pools_num_cached, handle); return NULL; } pool = stack_pools[parts.pool_index]; - if (!pool) + if (WARN_ON(!pool)) return NULL; stack = pool + offset; + if (WARN_ON(!refcount_read(&stack->count))) + return NULL; + return stack; } /* Links stack into the freelist. */ static void depot_free_stack(struct stack_record *stack) { - lockdep_assert_held_write(&pool_rwlock); + unsigned long flags; - list_add(&stack->list, &free_stacks); + lockdep_assert_not_held(&pool_lock); + + raw_spin_lock_irqsave(&pool_lock, flags); + printk_deferred_enter(); + + /* + * Remove the entry from the hash list. Concurrent list traversal may + * still observe the entry, but since the refcount is zero, this entry + * will no longer be considered as valid. + */ + list_del_rcu(&stack->hash_list); + + /* + * Due to being used from constrained contexts such as the allocators, + * NMI, or even RCU itself, stack depot cannot rely on primitives that + * would sleep (such as synchronize_rcu()) or recursively call into + * stack depot again (such as call_rcu()). + * + * Instead, get an RCU cookie, so that we can ensure this entry isn't + * moved onto another list until the next grace period, and concurrent + * RCU list traversal remains safe. + */ + stack->rcu_state = get_state_synchronize_rcu(); + + /* + * Add the entry to the freelist tail, so that older entries are + * considered first - their RCU cookie is more likely to no longer be + * associated with the current grace period. + */ + list_add_tail(&stack->free_list, &free_stacks); counters[DEPOT_COUNTER_FREELIST_SIZE]++; counters[DEPOT_COUNTER_FREES]++; counters[DEPOT_COUNTER_INUSE]--; + + printk_deferred_exit(); + raw_spin_unlock_irqrestore(&pool_lock, flags); } /* Calculates the hash for a stack. */ @@ -479,22 +570,52 @@ int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, /* Finds a stack in a bucket of the hash table. */ static inline struct stack_record *find_stack(struct list_head *bucket, - unsigned long *entries, int size, - u32 hash) + unsigned long *entries, int size, + u32 hash, depot_flags_t flags) { - struct list_head *pos; - struct stack_record *found; + struct stack_record *stack, *ret = NULL; - lockdep_assert_held(&pool_rwlock); + /* + * Stack depot may be used from instrumentation that instruments RCU or + * tracing itself; use variant that does not call into RCU and cannot be + * traced. + * + * Note: Such use cases must take care when using refcounting to evict + * unused entries, because the stack record free-then-reuse code paths + * do call into RCU. + */ + rcu_read_lock_sched_notrace(); - list_for_each(pos, bucket) { - found = list_entry(pos, struct stack_record, list); - if (found->hash == hash && - found->size == size && - !stackdepot_memcmp(entries, found->entries, size)) - return found; + list_for_each_entry_rcu(stack, bucket, hash_list) { + if (stack->hash != hash || stack->size != size) + continue; + + /* + * This may race with depot_free_stack() accessing the freelist + * management state unioned with @entries. The refcount is zero + * in that case and the below refcount_inc_not_zero() will fail. + */ + if (data_race(stackdepot_memcmp(entries, stack->entries, size))) + continue; + + /* + * Try to increment refcount. If this succeeds, the stack record + * is valid and has not yet been freed. + * + * If STACK_DEPOT_FLAG_GET is not used, it is undefined behavior + * to then call stack_depot_put() later, and we can assume that + * a stack record is never placed back on the freelist. + */ + if ((flags & STACK_DEPOT_FLAG_GET) && !refcount_inc_not_zero(&stack->count)) + continue; + + ret = stack; + break; } - return NULL; + + rcu_read_unlock_sched_notrace(); + + return ret; } depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, @@ -508,7 +629,6 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, struct page *page = NULL; void *prealloc = NULL; bool can_alloc = depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC; - bool need_alloc = false; unsigned long flags; u32 hash; @@ -531,31 +651,16 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, hash = hash_stack(entries, nr_entries); bucket = &stack_table[hash & stack_hash_mask]; - read_lock_irqsave(&pool_rwlock, flags); - printk_deferred_enter(); - - /* Fast path: look the stack trace up without full locking. */ - found = find_stack(bucket, entries, nr_entries, hash); - if (found) { - if (depot_flags & STACK_DEPOT_FLAG_GET) - refcount_inc(&found->count); - printk_deferred_exit(); - read_unlock_irqrestore(&pool_rwlock, flags); + /* Fast path: look the stack trace up without locking. */ + found = find_stack(bucket, entries, nr_entries, hash, depot_flags); + if (found) goto exit; - } - - /* Take note if another stack pool needs to be allocated. */ - if (new_pool_required) - need_alloc = true; - - printk_deferred_exit(); - read_unlock_irqrestore(&pool_rwlock, flags); /* * Allocate memory for a new pool if required now: * we won't be able to do that under the lock. */ - if (unlikely(can_alloc && need_alloc)) { + if (unlikely(can_alloc && READ_ONCE(new_pool_required))) { /* * Zero out zone modifiers, as we don't have specific zone * requirements. Keep the flags related to allocation in atomic @@ -569,31 +674,36 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, prealloc = page_address(page); } - write_lock_irqsave(&pool_rwlock, flags); + raw_spin_lock_irqsave(&pool_lock, flags); printk_deferred_enter(); - found = find_stack(bucket, entries, nr_entries, hash); + /* Try to find again, to avoid concurrently inserting duplicates. */ + found = find_stack(bucket, entries, nr_entries, hash, depot_flags); if (!found) { struct stack_record *new = depot_alloc_stack(entries, nr_entries, hash, &prealloc); if (new) { - list_add(&new->list, bucket); + /* + * This releases the stack record into the bucket and + * makes it visible to readers in find_stack(). + */ + list_add_rcu(&new->hash_list, bucket); found = new; } - } else { - if (depot_flags & STACK_DEPOT_FLAG_GET) - refcount_inc(&found->count); + } + + if (prealloc) { /* - * Stack depot already contains this stack trace, but let's - * keep the preallocated memory for future. + * Either stack depot already contains this stack trace, or + * depot_alloc_stack() did not consume the preallocated memory. + * Try to keep the preallocated memory for future. */ - if (prealloc) - depot_keep_new_pool(&prealloc); + depot_keep_new_pool(&prealloc); } printk_deferred_exit(); - write_unlock_irqrestore(&pool_rwlock, flags); + raw_spin_unlock_irqrestore(&pool_lock, flags); exit: if (prealloc) { /* Stack depot didn't use this memory, free it. */ @@ -618,7 +728,6 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries) { struct stack_record *stack; - unsigned long flags; *entries = NULL; /* @@ -630,13 +739,13 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, if (!handle || stack_depot_disabled) return 0; - read_lock_irqsave(&pool_rwlock, flags); - printk_deferred_enter(); - stack = depot_fetch_stack(handle); - - printk_deferred_exit(); - read_unlock_irqrestore(&pool_rwlock, flags); + /* + * Should never be NULL, otherwise this is a use-after-put (or just a + * corrupt handle). + */ + if (WARN(!stack, "corrupt handle or use after stack_depot_put()")) + return 0; *entries = stack->entries; return stack->size; @@ -646,29 +755,20 @@ EXPORT_SYMBOL_GPL(stack_depot_fetch); void stack_depot_put(depot_stack_handle_t handle) { struct stack_record *stack; - unsigned long flags; if (!handle || stack_depot_disabled) return; - write_lock_irqsave(&pool_rwlock, flags); - printk_deferred_enter(); - stack = depot_fetch_stack(handle); - if (WARN_ON(!stack)) - goto out; + /* + * Should always be able to find the stack record, otherwise this is an + * unbalanced put attempt (or corrupt handle). + */ + if (WARN(!stack, "corrupt handle or unbalanced stack_depot_put()")) + return; - if (refcount_dec_and_test(&stack->count)) { - /* Unlink stack from the hash table. */ - list_del(&stack->list); - - /* Free stack. */ + if (refcount_dec_and_test(&stack->count)) depot_free_stack(stack); - } - -out: - printk_deferred_exit(); - write_unlock_irqrestore(&pool_rwlock, flags); } EXPORT_SYMBOL_GPL(stack_depot_put); From 0fe8ff51efb3fe5cb25da13229f95aa709613cb3 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Wed, 17 Jan 2024 18:21:52 +0000 Subject: [PATCH 418/768] MAINTAINERS: supplement of zswap maintainers update As discussed on the mailing list [1], merge the zpool maintainers entry into the zswap one. Also, add CREDITS entries for previous zswap/zpool maintainers. [1] https://lore.kernel.org/linux-mm/CAJD7tkYx4YWhGoVwnSeGc8dY_1aRRxxg8PzWBV==A6iqG_OgFw@mail.gmail.com/ Link: https://lkml.kernel.org/r/20240117182152.1439822-1-yosryahmed@google.com Signed-off-by: Yosry Ahmed Acked-by: Nhat Pham Acked-by: Dan Streetman Acked-by: Seth Jennings Acked-by: Johannes Weiner Cc: Vitaly Wool Signed-off-by: Andrew Morton --- CREDITS | 13 +++++++++++++ MAINTAINERS | 9 ++------- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/CREDITS b/CREDITS index 5797e8f7e92b..df8d6946739f 100644 --- a/CREDITS +++ b/CREDITS @@ -2161,6 +2161,19 @@ N: Mike Kravetz E: mike.kravetz@oracle.com D: Maintenance and development of the hugetlb subsystem +N: Seth Jennings +E: sjenning@redhat.com +D: Creation and maintenance of zswap + +N: Dan Streetman +E: ddstreet@ieee.org +D: Maintenance and development of zswap +D: Creation and maintenance of the zpool API + +N: Vitaly Wool +E: vitaly.wool@konsulko.com +D: Maintenance and development of zswap + N: Andreas S. Krebs E: akrebs@altavista.net D: CYPRESS CY82C693 chipset IDE, Digital's PC-Alpha 164SX boards diff --git a/MAINTAINERS b/MAINTAINERS index 8c756737491e..fecebfc4c0dc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -24341,13 +24341,6 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/zonefs.git F: Documentation/filesystems/zonefs.rst F: fs/zonefs/ -ZPOOL COMPRESSED PAGE STORAGE API -M: Dan Streetman -L: linux-mm@kvack.org -S: Maintained -F: include/linux/zpool.h -F: mm/zpool.c - ZR36067 VIDEO FOR LINUX DRIVER M: Corentin Labbe L: mjpeg-users@lists.sourceforge.net @@ -24399,7 +24392,9 @@ M: Nhat Pham L: linux-mm@kvack.org S: Maintained F: Documentation/admin-guide/mm/zswap.rst +F: include/linux/zpool.h F: include/linux/zswap.h +F: mm/zpool.c F: mm/zswap.c THE REST From 91b80cc5b39f00399e8e2d17527cad2c7fa535e2 Mon Sep 17 00:00:00 2001 From: Nico Pache Date: Fri, 19 Jan 2024 06:14:29 -0700 Subject: [PATCH 419/768] selftests: mm: fix map_hugetlb failure on 64K page size systems On systems with 64k page size and 512M huge page sizes, the allocation and test succeeds but errors out at the munmap. As the comment states, munmap will failure if its not HUGEPAGE aligned. This is due to the length of the mapping being 1/2 the size of the hugepage causing the munmap to not be hugepage aligned. Fix this by making the mapping length the full hugepage if the hugepage is larger than the length of the mapping. Link: https://lkml.kernel.org/r/20240119131429.172448-1-npache@redhat.com Signed-off-by: Nico Pache Cc: Donet Tom Cc: Shuah Khan Cc: Christophe Leroy Cc: Michael Ellerman Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/map_hugetlb.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/selftests/mm/map_hugetlb.c b/tools/testing/selftests/mm/map_hugetlb.c index 193281560b61..86e8f2048a40 100644 --- a/tools/testing/selftests/mm/map_hugetlb.c +++ b/tools/testing/selftests/mm/map_hugetlb.c @@ -15,6 +15,7 @@ #include #include #include +#include "vm_util.h" #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) @@ -58,10 +59,16 @@ int main(int argc, char **argv) { void *addr; int ret; + size_t hugepage_size; size_t length = LENGTH; int flags = FLAGS; int shift = 0; + hugepage_size = default_huge_page_size(); + /* munmap with fail if the length is not page aligned */ + if (hugepage_size > length) + length = hugepage_size; + if (argc > 1) length = atol(argv[1]) << 20; if (argc > 2) { From 52e63d67b5bb423b33d7a262ac7f8bd375a90145 Mon Sep 17 00:00:00 2001 From: Audra Mitchell Date: Fri, 19 Jan 2024 15:58:01 -0500 Subject: [PATCH 420/768] selftests/mm: Update va_high_addr_switch.sh to check CPU for la57 flag In order for the page table level 5 to be in use, the CPU must have the setting enabled in addition to the CONFIG option. Check for the flag to be set to avoid false test failures on systems that do not have this cpu flag set. The test does a series of mmap calls including three using the MAP_FIXED flag and specifying an address that is 1<<47 or 1<<48. These addresses are only available if you are using level 5 page tables, which requires both the CPU to have the capabiltiy (la57 flag) and the kernel to be configured. Currently the test only checks for the kernel configuration option, so this test can still report a false positive. Here are the three failing lines: $ ./va_high_addr_switch | grep FAILED mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED): 0xffffffffffffffff - FAILED mmap(HIGH_ADDR, MAP_FIXED): 0xffffffffffffffff - FAILED mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED): 0xffffffffffffffff - FAILED I thought (for about a second) refactoring the test so that these three mmap calls will only be run on systems with the level 5 page tables available, but the whole point of the test is to check the level 5 feature... Link: https://lkml.kernel.org/r/20240119205801.62769-1-audra@redhat.com Fixes: 4f2930c6718a ("selftests/vm: only run 128TBswitch with 5-level paging") Signed-off-by: Audra Mitchell Cc: Rafael Aquini Cc: Shuah Khan Cc: Adam Sindelar Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/va_high_addr_switch.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh index 45cae7cab27e..a0a75f302904 100755 --- a/tools/testing/selftests/mm/va_high_addr_switch.sh +++ b/tools/testing/selftests/mm/va_high_addr_switch.sh @@ -29,9 +29,15 @@ check_supported_x86_64() # See man 1 gzip under '-f'. local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2) + local cpu_supports_pl5=$(awk '/^flags/ {if (/la57/) {print 0;} + else {print 1}; exit}' /proc/cpuinfo 2>/dev/null) + if [[ "${pg_table_levels}" -lt 5 ]]; then echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" exit $ksft_skip + elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then + echo "$0: CPU does not have the necessary la57 flag to support page table level 5" + exit $ksft_skip fi } From db44c658f798ad907219f15e033229b8d1aadb93 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 22 Jan 2024 18:54:07 +0100 Subject: [PATCH 421/768] mm/huge_memory: fix folio_set_dirty() vs. folio_mark_dirty() The correct folio replacement for "set_page_dirty()" is "folio_mark_dirty()", not "folio_set_dirty()". Using the latter won't properly inform the FS using the dirty_folio() callback. This has been found by code inspection, but likely this can result in some real trouble. Link: https://lkml.kernel.org/r/20240122175407.307992-1-david@redhat.com Fixes: a8e61d584eda0 ("mm/huge_memory: page_remove_rmap() -> folio_remove_rmap_pmd()") Signed-off-by: David Hildenbrand Cc: Ryan Roberts Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/huge_memory.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 94ef5c02b459..5f31e51f2235 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2437,7 +2437,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, page = pmd_page(old_pmd); folio = page_folio(page); if (!folio_test_dirty(folio) && pmd_dirty(old_pmd)) - folio_set_dirty(folio); + folio_mark_dirty(folio); if (!folio_test_referenced(folio) && pmd_young(old_pmd)) folio_set_referenced(folio); folio_remove_rmap_pmd(folio, page, vma); @@ -3563,7 +3563,7 @@ int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, } if (pmd_dirty(pmdval)) - folio_set_dirty(folio); + folio_mark_dirty(folio); if (pmd_write(pmdval)) entry = make_writable_migration_entry(page_to_pfn(page)); else if (anon_exclusive) From e4e3df290f65da6cb27dac1309389c916f27db1a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 22 Jan 2024 18:17:51 +0100 Subject: [PATCH 422/768] mm/memory: fix folio_set_dirty() vs. folio_mark_dirty() in zap_pte_range() The correct folio replacement for "set_page_dirty()" is "folio_mark_dirty()", not "folio_set_dirty()". Using the latter won't properly inform the FS using the dirty_folio() callback. This has been found by code inspection, but likely this can result in some real trouble when zapping dirty PTEs that point at clean pagecache folios. Yuezhang Mo said: "Without this fix, testing the latest exfat with xfstests, test cases generic/029 and generic/030 will fail." Link: https://lkml.kernel.org/r/20240122171751.272074-1-david@redhat.com Fixes: c46265030b0f ("mm/memory: page_remove_rmap() -> folio_remove_rmap_pte()") Signed-off-by: David Hildenbrand Reported-by: Ryan Roberts Closes: https://lkml.kernel.org/r/2445cedb-61fb-422c-8bfb-caf0a2beed62@arm.com Reviewed-by: Ryan Roberts Cc: Matthew Wilcox (Oracle) Reviewed-by: Yuezhang Mo Signed-off-by: Andrew Morton --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index 7e1f4849463a..89bcae0b224d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1464,7 +1464,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, delay_rmap = 0; if (!folio_test_anon(folio)) { if (pte_dirty(ptent)) { - folio_set_dirty(folio); + folio_mark_dirty(folio); if (tlb_delay_rmap(tlb)) { delay_rmap = 1; force_flush = 1; From 6f9dc684cae638dda0570154509884ee78d0f75c Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Mon, 22 Jan 2024 09:52:01 -0800 Subject: [PATCH 423/768] scs: add CONFIG_MMU dependency for vfree_atomic() The shadow call stack implementation fails to build without CONFIG_MMU: ld.lld: error: undefined symbol: vfree_atomic >>> referenced by scs.c >>> kernel/scs.o:(scs_free) in archive vmlinux.a Link: https://lkml.kernel.org/r/20240122175204.2371009-1-samuel.holland@sifive.com Fixes: a2abe7cbd8fe ("scs: switch to vmapped shadow stacks") Signed-off-by: Samuel Holland Reviewed-by: Sami Tolvanen Cc: Will Deacon Cc: Signed-off-by: Andrew Morton --- arch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/Kconfig b/arch/Kconfig index c91917b50873..a5af0edd3eb8 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -673,6 +673,7 @@ config SHADOW_CALL_STACK bool "Shadow Call Stack" depends on ARCH_SUPPORTS_SHADOW_CALL_STACK depends on DYNAMIC_FTRACE_WITH_ARGS || DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER + depends on MMU help This option enables the compiler's Shadow Call Stack, which uses a shadow stack to protect function return addresses from From d021b442cf312664811783e92b3d5e4548e92a53 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 22 Jan 2024 12:05:54 +0000 Subject: [PATCH 424/768] selftests/mm: ksm_tests should only MADV_HUGEPAGE valid memory ksm_tests was previously mmapping a region of memory, aligning the returned pointer to a PMD boundary, then setting MADV_HUGEPAGE, but was setting it past the end of the mmapped area due to not taking the pointer alignment into consideration. Fix this behaviour. Up until commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries"), this buggy behavior was (usually) masked because the alignment difference was always less than PMD-size. But since the mentioned commit, `ksm_tests -H -s 100` started failing. Link: https://lkml.kernel.org/r/20240122120554.3108022-1-ryan.roberts@arm.com Fixes: 325254899684 ("selftests: vm: add KSM huge pages merging time test") Signed-off-by: Ryan Roberts Cc: Pedro Demarchi Gomes Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/ksm_tests.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c index 380b691d3eb9..b748c48908d9 100644 --- a/tools/testing/selftests/mm/ksm_tests.c +++ b/tools/testing/selftests/mm/ksm_tests.c @@ -566,7 +566,7 @@ static int ksm_merge_hugepages_time(int merge_type, int mapping, int prot, if (map_ptr_orig == MAP_FAILED) err(2, "initial mmap"); - if (madvise(map_ptr, len + HPAGE_SIZE, MADV_HUGEPAGE)) + if (madvise(map_ptr, len, MADV_HUGEPAGE)) err(2, "MADV_HUGEPAGE"); pagemap_fd = open("/proc/self/pagemap", O_RDONLY); From 67695f18d55924b2013534ef3bdc363bc9e14605 Mon Sep 17 00:00:00 2001 From: Lokesh Gidra Date: Wed, 17 Jan 2024 14:37:29 -0800 Subject: [PATCH 425/768] userfaultfd: fix mmap_changing checking in mfill_atomic_hugetlb In mfill_atomic_hugetlb(), mmap_changing isn't being checked again if we drop mmap_lock and reacquire it. When the lock is not held, mmap_changing could have been incremented. This is also inconsistent with the behavior in mfill_atomic(). Link: https://lkml.kernel.org/r/20240117223729.1444522-1-lokeshgidra@google.com Fixes: df2cc96e77011 ("userfaultfd: prevent non-cooperative events vs mcopy_atomic races") Signed-off-by: Lokesh Gidra Cc: Andrea Arcangeli Cc: Mike Rapoport Cc: Axel Rasmussen Cc: Brian Geffon Cc: David Hildenbrand Cc: Jann Horn Cc: Kalesh Singh Cc: Matthew Wilcox (Oracle) Cc: Nicolas Geoffray Cc: Peter Xu Cc: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 20e3b0d9cf7e..75fcf1f783bc 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -357,6 +357,7 @@ static __always_inline ssize_t mfill_atomic_hugetlb( unsigned long dst_start, unsigned long src_start, unsigned long len, + atomic_t *mmap_changing, uffd_flags_t flags) { struct mm_struct *dst_mm = dst_vma->vm_mm; @@ -472,6 +473,15 @@ retry: goto out; } mmap_read_lock(dst_mm); + /* + * If memory mappings are changing because of non-cooperative + * operation (e.g. mremap) running in parallel, bail out and + * request the user to retry later + */ + if (mmap_changing && atomic_read(mmap_changing)) { + err = -EAGAIN; + break; + } dst_vma = NULL; goto retry; @@ -506,6 +516,7 @@ extern ssize_t mfill_atomic_hugetlb(struct vm_area_struct *dst_vma, unsigned long dst_start, unsigned long src_start, unsigned long len, + atomic_t *mmap_changing, uffd_flags_t flags); #endif /* CONFIG_HUGETLB_PAGE */ @@ -622,8 +633,8 @@ retry: * If this is a HUGETLB vma, pass off to appropriate routine */ if (is_vm_hugetlb_page(dst_vma)) - return mfill_atomic_hugetlb(dst_vma, dst_start, - src_start, len, flags); + return mfill_atomic_hugetlb(dst_vma, dst_start, src_start, + len, mmap_changing, flags); if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) goto out_unlock; From 4ef9ad19e17676b9ef071309bc62020e2373705d Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 18 Jan 2024 10:05:05 -0800 Subject: [PATCH 426/768] mm: huge_memory: don't force huge page alignment on 32 bit commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") caused two issues [1] [2] reported on 32 bit system or compat userspace. It doesn't make too much sense to force huge page alignment on 32 bit system due to the constrained virtual address space. [1] https://lore.kernel.org/linux-mm/d0a136a0-4a31-46bc-adf4-2db109a61672@kernel.org/ [2] https://lore.kernel.org/linux-mm/CAJuCfpHXLdQy1a2B6xN2d7quTYwg2OoZseYPZTRpU0eHHKD-sQ@mail.gmail.com/ Link: https://lkml.kernel.org/r/20240118180505.2914778-1-shy828301@gmail.com Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") Signed-off-by: Yang Shi Reported-by: Jiri Slaby Reported-by: Suren Baghdasaryan Tested-by: Jiri Slaby Tested-by: Suren Baghdasaryan Reviewed-by: Matthew Wilcox (Oracle) Cc: Rik van Riel Cc: Christopher Lameter Signed-off-by: Andrew Morton --- mm/huge_memory.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5f31e51f2235..172b85208276 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -811,6 +812,9 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, loff_t off_align = round_up(off, size); unsigned long len_pad, ret; + if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall()) + return 0; + if (off_end <= off_align || (off_end - off_align) < size) return 0; From 5056c596c3d1848021a4eaa76ee42f4c05c50346 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 26 Jan 2024 16:22:07 +0800 Subject: [PATCH 427/768] LoongArch/smp: Call rcutree_report_cpu_starting() at tlb_init() Machines which have more than 8 nodes fail to boot SMP after commit a2ccf46333d7b2cf96 ("LoongArch/smp: Call rcutree_report_cpu_starting() earlier"). Because such machines use tlb-based per-cpu base address rather than dmw-based per-cpu base address, resulting per-cpu variables can only be accessed after tlb_init(). But rcutree_report_cpu_starting() is now called before tlb_init() and accesses per-cpu variables indeed. Since the original patch want to avoid the lockdep warning caused by page allocation in tlb_init(), we can move rcutree_report_cpu_starting() to tlb_init() where after tlb exception configuration but before page allocation. Fixes: a2ccf46333d7b2cf96 ("LoongArch/smp: Call rcutree_report_cpu_starting() earlier") Signed-off-by: Huacai Chen --- arch/loongarch/kernel/smp.c | 1 - arch/loongarch/mm/tlb.c | 16 ++++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index a16e3dbe9f09..2b49d30eb7c0 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -509,7 +509,6 @@ asmlinkage void start_secondary(void) sync_counter(); cpu = raw_smp_processor_id(); set_my_cpu_offset(per_cpu_offset(cpu)); - rcutree_report_cpu_starting(cpu); cpu_probe(); constant_clockevent_init(); diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index 2c0a411f23aa..0b95d32b30c9 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -284,12 +284,16 @@ static void setup_tlb_handler(int cpu) set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE); set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE); set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE); - } + } else { + int vec_sz __maybe_unused; + void *addr __maybe_unused; + struct page *page __maybe_unused; + + /* Avoid lockdep warning */ + rcutree_report_cpu_starting(cpu); + #ifdef CONFIG_NUMA - else { - void *addr; - struct page *page; - const int vec_sz = sizeof(exception_handlers); + vec_sz = sizeof(exception_handlers); if (pcpu_handlers[cpu]) return; @@ -305,8 +309,8 @@ static void setup_tlb_handler(int cpu) csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_EENTRY); csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_MERRENTRY); csr_write64(pcpu_handlers[cpu] + 80*VECSIZE, LOONGARCH_CSR_TLBRENTRY); - } #endif + } } void tlb_init(int cpu) From 614f362918c782d1cfa4ee50f96072a95eac264e Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 26 Jan 2024 16:22:07 +0800 Subject: [PATCH 428/768] LoongArch: KVM: Fix build due to API changes Commit 8569992d64b8f750e34b7858eac ("KVM: Use gfn instead of hva for mmu_notifier_retry") replaces mmu_invalidate_retry_hva() usage with mmu_invalidate_retry_gfn() for X86, LoongArch also need similar changes to fix build. Signed-off-by: Huacai Chen --- arch/loongarch/kvm/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 915f17527893..50a6acd7ffe4 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -675,7 +675,7 @@ static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot, * * There are several ways to safely use this helper: * - * - Check mmu_invalidate_retry_hva() after grabbing the mapping level, before + * - Check mmu_invalidate_retry_gfn() after grabbing the mapping level, before * consuming it. In this case, mmu_lock doesn't need to be held during the * lookup, but it does need to be held while checking the MMU notifier. * @@ -855,7 +855,7 @@ retry: /* Check if an invalidation has taken place since we got pfn */ spin_lock(&kvm->mmu_lock); - if (mmu_invalidate_retry_hva(kvm, mmu_seq, hva)) { + if (mmu_invalidate_retry_gfn(kvm, mmu_seq, gfn)) { /* * This can happen when mappings are changed asynchronously, but * also synchronously if a COW is triggered by From 48ef9e87b407f89f230f804815af7ac2031ec17a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 26 Jan 2024 16:22:07 +0800 Subject: [PATCH 429/768] LoongArch: KVM: Add returns to SIMD stubs The stubs for kvm_own/lsx()/kvm_own_lasx() when CONFIG_CPU_HAS_LSX or CONFIG_CPU_HAS_LASX is not defined should have a return value since they return an int, so add "return -EINVAL;" to the stubs. Fixes the build error: In file included from ../arch/loongarch/include/asm/kvm_csr.h:12, from ../arch/loongarch/kvm/interrupt.c:8: ../arch/loongarch/include/asm/kvm_vcpu.h: In function 'kvm_own_lasx': ../arch/loongarch/include/asm/kvm_vcpu.h:73:39: error: no return statement in function returning non-void [-Werror=return-type] 73 | static inline int kvm_own_lasx(struct kvm_vcpu *vcpu) { } Fixes: db1ecca22edf ("LoongArch: KVM: Add LSX (128bit SIMD) support") Fixes: 118e10cd893d ("LoongArch: KVM: Add LASX (256bit SIMD) support") Signed-off-by: Randy Dunlap Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kvm_vcpu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h index e71ceb88f29e..0cb4fdb8a9b5 100644 --- a/arch/loongarch/include/asm/kvm_vcpu.h +++ b/arch/loongarch/include/asm/kvm_vcpu.h @@ -60,7 +60,7 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu); void kvm_save_lsx(struct loongarch_fpu *fpu); void kvm_restore_lsx(struct loongarch_fpu *fpu); #else -static inline int kvm_own_lsx(struct kvm_vcpu *vcpu) { } +static inline int kvm_own_lsx(struct kvm_vcpu *vcpu) { return -EINVAL; } static inline void kvm_save_lsx(struct loongarch_fpu *fpu) { } static inline void kvm_restore_lsx(struct loongarch_fpu *fpu) { } #endif @@ -70,7 +70,7 @@ int kvm_own_lasx(struct kvm_vcpu *vcpu); void kvm_save_lasx(struct loongarch_fpu *fpu); void kvm_restore_lasx(struct loongarch_fpu *fpu); #else -static inline int kvm_own_lasx(struct kvm_vcpu *vcpu) { } +static inline int kvm_own_lasx(struct kvm_vcpu *vcpu) { return -EINVAL; } static inline void kvm_save_lasx(struct loongarch_fpu *fpu) { } static inline void kvm_restore_lasx(struct loongarch_fpu *fpu) { } #endif From e1d54d153fc3e697b841999df7cbad51492def8e Mon Sep 17 00:00:00 2001 From: Damian Muszynski Date: Fri, 19 Jan 2024 17:12:38 +0100 Subject: [PATCH 430/768] crypto: qat - fix arbiter mapping generation algorithm for QAT 402xx The commit "crypto: qat - generate dynamically arbiter mappings" introduced a regression on qat_402xx devices. This is reported when the driver probes the device, as indicated by the following error messages: 4xxx 0000:0b:00.0: enabling device (0140 -> 0142) 4xxx 0000:0b:00.0: Generate of the thread to arbiter map failed 4xxx 0000:0b:00.0: Direct firmware load for qat_402xx_mmp.bin failed with error -2 The root cause of this issue was the omission of a necessary function pointer required by the mapping algorithm during the implementation. Fix it by adding the missing function pointer. Fixes: 5da6a2d5353e ("crypto: qat - generate dynamically arbiter mappings") Signed-off-by: Damian Muszynski Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c index 479062aa5e6b..94a0ebb03d8c 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -463,6 +463,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id) hw_data->fw_name = ADF_402XX_FW; hw_data->fw_mmp_name = ADF_402XX_MMP; hw_data->uof_get_name = uof_get_name_402xx; + hw_data->get_ena_thd_mask = get_ena_thd_mask; break; case ADF_401XX_PCI_DEVICE_ID: hw_data->fw_name = ADF_4XXX_FW; From c5a2f74db71a849f3a60bc153d684d6d28a0c665 Mon Sep 17 00:00:00 2001 From: Gaurav Jain Date: Thu, 18 Jan 2024 14:55:57 +0530 Subject: [PATCH 431/768] crypto: caam - fix asynchronous hash ahash_alg->setkey is updated to ahash_nosetkey in ahash.c so checking setkey() function to determine hmac algorithm is not valid. to fix this added is_hmac variable in structure caam_hash_alg to determine whether the algorithm is hmac or not. Fixes: 2f1f34c1bf7b ("crypto: ahash - optimize performance when wrapping shash") Signed-off-by: Gaurav Jain Reviewed-by: Eric Biggers Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg_qi2.c | 7 +++++-- drivers/crypto/caam/caamhash.c | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c index a148ff1f0872..a4f6884416a0 100644 --- a/drivers/crypto/caam/caamalg_qi2.c +++ b/drivers/crypto/caam/caamalg_qi2.c @@ -4545,6 +4545,7 @@ struct caam_hash_alg { struct list_head entry; struct device *dev; int alg_type; + bool is_hmac; struct ahash_alg ahash_alg; }; @@ -4571,7 +4572,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) ctx->dev = caam_hash->dev; - if (alg->setkey) { + if (caam_hash->is_hmac) { ctx->adata.key_dma = dma_map_single_attrs(ctx->dev, ctx->key, ARRAY_SIZE(ctx->key), DMA_TO_DEVICE, @@ -4611,7 +4612,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) * For keyed hash algorithms shared descriptors * will be created later in setkey() callback */ - return alg->setkey ? 0 : ahash_set_sh_desc(ahash); + return caam_hash->is_hmac ? 0 : ahash_set_sh_desc(ahash); } static void caam_hash_cra_exit(struct crypto_tfm *tfm) @@ -4646,12 +4647,14 @@ static struct caam_hash_alg *caam_hash_alloc(struct device *dev, template->hmac_name); snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", template->hmac_driver_name); + t_alg->is_hmac = true; } else { snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", template->name); snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", template->driver_name); t_alg->ahash_alg.setkey = NULL; + t_alg->is_hmac = false; } alg->cra_module = THIS_MODULE; alg->cra_init = caam_hash_cra_init; diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 290c8500c247..fdd724228c2f 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -1753,6 +1753,7 @@ static struct caam_hash_template driver_hash[] = { struct caam_hash_alg { struct list_head entry; int alg_type; + bool is_hmac; struct ahash_engine_alg ahash_alg; }; @@ -1804,7 +1805,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) } else { if (priv->era >= 6) { ctx->dir = DMA_BIDIRECTIONAL; - ctx->key_dir = alg->setkey ? DMA_TO_DEVICE : DMA_NONE; + ctx->key_dir = caam_hash->is_hmac ? DMA_TO_DEVICE : DMA_NONE; } else { ctx->dir = DMA_TO_DEVICE; ctx->key_dir = DMA_NONE; @@ -1862,7 +1863,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) * For keyed hash algorithms shared descriptors * will be created later in setkey() callback */ - return alg->setkey ? 0 : ahash_set_sh_desc(ahash); + return caam_hash->is_hmac ? 0 : ahash_set_sh_desc(ahash); } static void caam_hash_cra_exit(struct crypto_tfm *tfm) @@ -1915,12 +1916,14 @@ caam_hash_alloc(struct caam_hash_template *template, template->hmac_name); snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", template->hmac_driver_name); + t_alg->is_hmac = true; } else { snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", template->name); snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", template->driver_name); halg->setkey = NULL; + t_alg->is_hmac = false; } alg->cra_module = THIS_MODULE; alg->cra_init = caam_hash_cra_init; From 96204e15310c218fd9355bdcacd02fed1d18070e Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Tue, 23 Jan 2024 17:14:20 +0000 Subject: [PATCH 432/768] mm: thp_get_unmapped_area must honour topdown preference The addition of commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") caused the "virtual_address_range" mm selftest to start failing on arm64. Let's fix that regression. There were 2 visible problems when running the test; 1) it takes much longer to execute, and 2) the test fails. Both are related: The (first part of the) test allocates as many 1GB anonymous blocks as it can in the low 256TB of address space, passing NULL as the addr hint to mmap. Before the faulty patch, all allocations were abutted and contained in a single, merged VMA. However, after this patch, each allocation is in its own VMA, and there is a 2M gap between each VMA. This causes the 2 problems in the test: 1) mmap becomes MUCH slower because there are so many VMAs to check to find a new 1G gap. 2) mmap fails once it hits the VMA limit (/proc/sys/vm/max_map_count). Hitting this limit then causes a subsequent calloc() to fail, which causes the test to fail. The problem is that arm64 (unlike x86) selects ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT. But __thp_get_unmapped_area() allocates len+2M then always aligns to the bottom of the discovered gap. That causes the 2M hole. Fix this by detecting cases where we can still achive the alignment goal when moved to the top of the allocated area, if configured to prefer top-down allocation. While we are at it, fix thp_get_unmapped_area's use of pgoff, which should always be zero for anonymous mappings. Prior to the faulty change, while it was possible for user space to pass in pgoff!=0, the old mm->get_unmapped_area() handler would not use it. thp_get_unmapped_area() does use it, so let's explicitly zero it before calling the handler. This should also be the correct behavior for arches that define their own get_unmapped_area() handler. Link: https://lkml.kernel.org/r/20240123171420.3970220-1-ryan.roberts@arm.com Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") Closes: https://lore.kernel.org/linux-mm/1e8f5ac7-54ce-433a-ae53-81522b2320e1@arm.com/ Signed-off-by: Ryan Roberts Reviewed-by: Yang Shi Cc: Matthew Wilcox (Oracle) Cc: Rik van Riel Cc: Signed-off-by: Andrew Morton --- mm/huge_memory.c | 10 ++++++++-- mm/mmap.c | 6 ++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 172b85208276..94c958f7ebb5 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -810,7 +810,7 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, { loff_t off_end = off + len; loff_t off_align = round_up(off, size); - unsigned long len_pad, ret; + unsigned long len_pad, ret, off_sub; if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall()) return 0; @@ -839,7 +839,13 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, if (ret == addr) return addr; - ret += (off - ret) & (size - 1); + off_sub = (off - ret) & (size - 1); + + if (current->mm->get_unmapped_area == arch_get_unmapped_area_topdown && + !off_sub) + return ret + size; + + ret += off_sub; return ret; } diff --git a/mm/mmap.c b/mm/mmap.c index b78e83d351d2..d89770eaab6b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1825,15 +1825,17 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, /* * mmap_region() will call shmem_zero_setup() to create a file, * so use shmem's get_unmapped_area in case it can be huge. - * do_mmap() will clear pgoff, so match alignment. */ - pgoff = 0; get_area = shmem_get_unmapped_area; } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { /* Ensures that larger anonymous mappings are THP aligned. */ get_area = thp_get_unmapped_area; } + /* Always treat pgoff as zero for anonymous memory. */ + if (!file) + pgoff = 0; + addr = get_area(file, addr, len, pgoff, flags); if (IS_ERR_VALUE(addr)) return addr; From dd3c33ccbb8f0dc6a256dc55e7607569aea69721 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 23 Jan 2024 09:46:54 -0800 Subject: [PATCH 433/768] MIPS: BCM63XX: Fix missing prototypes Most of the symbols for which we do not have a prototype can actually be made static and for the few that cannot, there is already a declaration in a header for it. Signed-off-by: Florian Fainelli Signed-off-by: Thomas Bogendoerfer --- arch/mips/bcm63xx/boards/board_bcm963xx.c | 2 +- arch/mips/bcm63xx/dev-rng.c | 2 +- arch/mips/bcm63xx/dev-uart.c | 1 + arch/mips/bcm63xx/dev-wdt.c | 2 +- arch/mips/bcm63xx/irq.c | 2 +- arch/mips/bcm63xx/setup.c | 2 +- arch/mips/bcm63xx/timer.c | 2 +- 7 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c index 01aff80a5967..99f321b6e417 100644 --- a/arch/mips/bcm63xx/boards/board_bcm963xx.c +++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c @@ -702,7 +702,7 @@ static struct ssb_sprom bcm63xx_sprom = { .boardflags_hi = 0x0000, }; -int bcm63xx_get_fallback_sprom(struct ssb_bus *bus, struct ssb_sprom *out) +static int bcm63xx_get_fallback_sprom(struct ssb_bus *bus, struct ssb_sprom *out) { if (bus->bustype == SSB_BUSTYPE_PCI) { memcpy(out, &bcm63xx_sprom, sizeof(struct ssb_sprom)); diff --git a/arch/mips/bcm63xx/dev-rng.c b/arch/mips/bcm63xx/dev-rng.c index d277b4dc6c68..f94151f7c96f 100644 --- a/arch/mips/bcm63xx/dev-rng.c +++ b/arch/mips/bcm63xx/dev-rng.c @@ -26,7 +26,7 @@ static struct platform_device bcm63xx_rng_device = { .resource = rng_resources, }; -int __init bcm63xx_rng_register(void) +static int __init bcm63xx_rng_register(void) { if (!BCMCPU_IS_6368()) return -ENODEV; diff --git a/arch/mips/bcm63xx/dev-uart.c b/arch/mips/bcm63xx/dev-uart.c index 3bc7f3bfc9ad..5d6bf0445b29 100644 --- a/arch/mips/bcm63xx/dev-uart.c +++ b/arch/mips/bcm63xx/dev-uart.c @@ -10,6 +10,7 @@ #include #include #include +#include static struct resource uart0_resources[] = { { diff --git a/arch/mips/bcm63xx/dev-wdt.c b/arch/mips/bcm63xx/dev-wdt.c index 42130914a3c2..302bf7ed5ad5 100644 --- a/arch/mips/bcm63xx/dev-wdt.c +++ b/arch/mips/bcm63xx/dev-wdt.c @@ -34,7 +34,7 @@ static struct platform_device bcm63xx_wdt_device = { }, }; -int __init bcm63xx_wdt_register(void) +static int __init bcm63xx_wdt_register(void) { wdt_resources[0].start = bcm63xx_regset_address(RSET_WDT); wdt_resources[0].end = wdt_resources[0].start; diff --git a/arch/mips/bcm63xx/irq.c b/arch/mips/bcm63xx/irq.c index 2548013442f6..6240a8f88ea3 100644 --- a/arch/mips/bcm63xx/irq.c +++ b/arch/mips/bcm63xx/irq.c @@ -72,7 +72,7 @@ static inline int enable_irq_for_cpu(int cpu, struct irq_data *d, */ #define BUILD_IPIC_INTERNAL(width) \ -void __dispatch_internal_##width(int cpu) \ +static void __dispatch_internal_##width(int cpu) \ { \ u32 pending[width / 32]; \ unsigned int src, tgt; \ diff --git a/arch/mips/bcm63xx/setup.c b/arch/mips/bcm63xx/setup.c index d811e3e03f81..c13ddb544a23 100644 --- a/arch/mips/bcm63xx/setup.c +++ b/arch/mips/bcm63xx/setup.c @@ -159,7 +159,7 @@ void __init plat_mem_setup(void) board_setup(); } -int __init bcm63xx_register_devices(void) +static int __init bcm63xx_register_devices(void) { /* register gpiochip */ bcm63xx_gpio_init(); diff --git a/arch/mips/bcm63xx/timer.c b/arch/mips/bcm63xx/timer.c index a86065854c0c..74b83807df30 100644 --- a/arch/mips/bcm63xx/timer.c +++ b/arch/mips/bcm63xx/timer.c @@ -178,7 +178,7 @@ int bcm63xx_timer_set(int id, int monotonic, unsigned int countdown_us) EXPORT_SYMBOL(bcm63xx_timer_set); -int bcm63xx_timer_init(void) +static int bcm63xx_timer_init(void) { int ret, irq; u32 reg; From abcabb9e30a1f9a69c76776f8abffc31c377b542 Mon Sep 17 00:00:00 2001 From: Huang Pei Date: Tue, 23 Jan 2024 09:47:57 +0800 Subject: [PATCH 434/768] MIPS: reserve exception vector space ONLY ONCE "cpu_probe" is called both by BP and APs, but reserving exception vector (like 0x0-0x1000) called by "cpu_probe" need once and calling on APs is too late since memblock is unavailable at that time. So, reserve exception vector ONLY by BP. Suggested-by: Thomas Bogendoerfer Signed-off-by: Huang Pei Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/traps.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index dec6878b35f6..a1c1cb5de913 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2007,7 +2007,13 @@ unsigned long vi_handlers[64]; void reserve_exception_space(phys_addr_t addr, unsigned long size) { - memblock_reserve(addr, size); + /* + * reserve exception space on CPUs other than CPU0 + * is too late, since memblock is unavailable when APs + * up + */ + if (smp_processor_id() == 0) + memblock_reserve(addr, size); } void __init *set_except_vector(int n, void *addr) From ce7b1b97776ec0b068c4dd6b6dbb48ae09a23519 Mon Sep 17 00:00:00 2001 From: Huang Pei Date: Tue, 23 Jan 2024 09:47:58 +0800 Subject: [PATCH 435/768] MIPS: loongson64: set nid for reserved memblock region Commit 61167ad5fecd("mm: pass nid to reserve_bootmem_region()") reveals that reserved memblock regions have no valid node id set, just set it right since loongson64 firmware makes it clear in memory layout info. This works around booting failure on 3A1000+ since commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") under CONFIG_DEFERRED_STRUCT_PAGE_INIT. Signed-off-by: Huang Pei Signed-off-by: Thomas Bogendoerfer --- arch/mips/loongson64/init.c | 2 ++ arch/mips/loongson64/numa.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c index f25caa6aa9d3..000ba91c0886 100644 --- a/arch/mips/loongson64/init.c +++ b/arch/mips/loongson64/init.c @@ -103,6 +103,8 @@ void __init szmem(unsigned int node) if (loongson_sysconf.vgabios_addr) memblock_reserve(virt_to_phys((void *)loongson_sysconf.vgabios_addr), SZ_256K); + /* set nid for reserved memory */ + memblock_set_node((u64)node << 44, (u64)(node+1) << 44, &memblock.reserved, node); } #ifndef CONFIG_NUMA diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c index 8f61e93c0c5b..6345e096c532 100644 --- a/arch/mips/loongson64/numa.c +++ b/arch/mips/loongson64/numa.c @@ -132,6 +132,8 @@ static void __init node_mem_init(unsigned int node) /* Reserve pfn range 0~node[0]->node_start_pfn */ memblock_reserve(0, PAGE_SIZE * start_pfn); + /* set nid for reserved memory on node 0 */ + memblock_set_node(0, (u64)1 << 44, &memblock.reserved, 1); } } From 4bf2a626dc4bb46f0754d8ac02ec8584ff114ad5 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Mon, 22 Jan 2024 19:47:09 +0100 Subject: [PATCH 436/768] MIPS: lantiq: register smp_ops on non-smp platforms Lantiq uses a common kernel config for devices with 24Kc and 34Kc cores. The changes made previously to add support for interrupts on all cores work on 24Kc platforms with SMP disabled and 34Kc platforms with SMP enabled. This patch fixes boot issues on Danube (single core 24Kc) with SMP enabled. Fixes: 730320fd770d ("MIPS: lantiq: enable all hardware interrupts on second VPE") Signed-off-by: Aleksander Jan Bajkowski Signed-off-by: Thomas Bogendoerfer --- arch/mips/lantiq/prom.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c index a3cf29365858..0c45767eacf6 100644 --- a/arch/mips/lantiq/prom.c +++ b/arch/mips/lantiq/prom.c @@ -108,10 +108,9 @@ void __init prom_init(void) prom_init_cmdline(); #if defined(CONFIG_MIPS_MT_SMP) - if (cpu_has_mipsmt) { - lantiq_smp_ops = vsmp_smp_ops; + lantiq_smp_ops = vsmp_smp_ops; + if (cpu_has_mipsmt) lantiq_smp_ops.init_secondary = lantiq_init_secondary; - register_smp_ops(&lantiq_smp_ops); - } + register_smp_ops(&lantiq_smp_ops); #endif } From cc4b2dd95f0d1eba8c691b36e8f4d1795582f1ff Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Thu, 25 Jan 2024 20:00:39 +0800 Subject: [PATCH 437/768] erofs: fix infinite loop due to a race of filling compressed_bvecs I encountered a race issue after lengthy (~594647 secs) stress tests on a 64k-page arm64 VM with several 4k-block EROFS images. The timing is like below: z_erofs_try_inplace_io z_erofs_fill_bio_vec cmpxchg(&compressed_bvecs[].page, NULL, ..) [access bufvec] compressed_bvecs[] = *bvec; Previously, z_erofs_submit_queue() just accessed bufvec->page only, so other fields in bufvec didn't matter. After the subpage block support is landed, .offset and .end can be used too, but filling bufvec isn't an atomic operation which can cause inconsistency. Let's use a spinlock to keep the atomicity of each bufvec. More specifically, just reuse the existing spinlock `pcl->obj.lockref.lock` since it's rarely used (also it takes a short time if even used) as long as the pcluster has a reference. Fixes: 192351616a9d ("erofs: support I/O submission for sub-page compressed blocks") Signed-off-by: Gao Xiang Reviewed-by: Yue Hu Reviewed-by: Sandeep Dhavale Link: https://lore.kernel.org/r/20240125120039.3228103-1-hsiangkao@linux.alibaba.com --- fs/erofs/zdata.c | 74 +++++++++++++++++++++++++----------------------- 1 file changed, 38 insertions(+), 36 deletions(-) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 583c062cd0e4..c1c77166b30f 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -563,21 +563,19 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; unsigned int i; - if (i_blocksize(fe->inode) != PAGE_SIZE) - return; - if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED) + if (i_blocksize(fe->inode) != PAGE_SIZE || + fe->mode < Z_EROFS_PCLUSTER_FOLLOWED) return; for (i = 0; i < pclusterpages; ++i) { struct page *page, *newpage; void *t; /* mark pages just found for debugging */ - /* the compressed page was loaded before */ + /* Inaccurate check w/o locking to avoid unneeded lookups */ if (READ_ONCE(pcl->compressed_bvecs[i].page)) continue; page = find_get_page(mc, pcl->obj.index + i); - if (page) { t = (void *)((unsigned long)page | 1); newpage = NULL; @@ -597,9 +595,13 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE); t = (void *)((unsigned long)newpage | 1); } - - if (!cmpxchg_relaxed(&pcl->compressed_bvecs[i].page, NULL, t)) + spin_lock(&pcl->obj.lockref.lock); + if (!pcl->compressed_bvecs[i].page) { + pcl->compressed_bvecs[i].page = t; + spin_unlock(&pcl->obj.lockref.lock); continue; + } + spin_unlock(&pcl->obj.lockref.lock); if (page) put_page(page); @@ -718,31 +720,25 @@ int erofs_init_managed_cache(struct super_block *sb) return 0; } -static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe, - struct z_erofs_bvec *bvec) -{ - struct z_erofs_pcluster *const pcl = fe->pcl; - - while (fe->icur > 0) { - if (!cmpxchg(&pcl->compressed_bvecs[--fe->icur].page, - NULL, bvec->page)) { - pcl->compressed_bvecs[fe->icur] = *bvec; - return true; - } - } - return false; -} - /* callers must be with pcluster lock held */ static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe, struct z_erofs_bvec *bvec, bool exclusive) { + struct z_erofs_pcluster *pcl = fe->pcl; int ret; if (exclusive) { /* give priority for inplaceio to use file pages first */ - if (z_erofs_try_inplace_io(fe, bvec)) + spin_lock(&pcl->obj.lockref.lock); + while (fe->icur > 0) { + if (pcl->compressed_bvecs[--fe->icur].page) + continue; + pcl->compressed_bvecs[fe->icur] = *bvec; + spin_unlock(&pcl->obj.lockref.lock); return 0; + } + spin_unlock(&pcl->obj.lockref.lock); + /* otherwise, check if it can be used as a bvpage */ if (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED && !fe->candidate_bvpage) @@ -1423,23 +1419,26 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec, { gfp_t gfp = mapping_gfp_mask(mc); bool tocache = false; - struct z_erofs_bvec *zbv = pcl->compressed_bvecs + nr; + struct z_erofs_bvec zbv; struct address_space *mapping; - struct page *page, *oldpage; + struct page *page; int justfound, bs = i_blocksize(f->inode); /* Except for inplace pages, the entire page can be used for I/Os */ bvec->bv_offset = 0; bvec->bv_len = PAGE_SIZE; repeat: - oldpage = READ_ONCE(zbv->page); - if (!oldpage) + spin_lock(&pcl->obj.lockref.lock); + zbv = pcl->compressed_bvecs[nr]; + page = zbv.page; + justfound = (unsigned long)page & 1UL; + page = (struct page *)((unsigned long)page & ~1UL); + pcl->compressed_bvecs[nr].page = page; + spin_unlock(&pcl->obj.lockref.lock); + if (!page) goto out_allocpage; - justfound = (unsigned long)oldpage & 1UL; - page = (struct page *)((unsigned long)oldpage & ~1UL); bvec->bv_page = page; - DBG_BUGON(z_erofs_is_shortlived_page(page)); /* * Handle preallocated cached pages. We tried to allocate such pages @@ -1448,7 +1447,6 @@ repeat: */ if (page->private == Z_EROFS_PREALLOCATED_PAGE) { set_page_private(page, 0); - WRITE_ONCE(zbv->page, page); tocache = true; goto out_tocache; } @@ -1459,9 +1457,9 @@ repeat: * therefore it is impossible for `mapping` to be NULL. */ if (mapping && mapping != mc) { - if (zbv->offset < 0) - bvec->bv_offset = round_up(-zbv->offset, bs); - bvec->bv_len = round_up(zbv->end, bs) - bvec->bv_offset; + if (zbv.offset < 0) + bvec->bv_offset = round_up(-zbv.offset, bs); + bvec->bv_len = round_up(zbv.end, bs) - bvec->bv_offset; return; } @@ -1471,7 +1469,6 @@ repeat: /* the cached page is still in managed cache */ if (page->mapping == mc) { - WRITE_ONCE(zbv->page, page); /* * The cached page is still available but without a valid * `->private` pcluster hint. Let's reconnect them. @@ -1503,11 +1500,15 @@ repeat: put_page(page); out_allocpage: page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL); - if (oldpage != cmpxchg(&zbv->page, oldpage, page)) { + spin_lock(&pcl->obj.lockref.lock); + if (pcl->compressed_bvecs[nr].page) { erofs_pagepool_add(&f->pagepool, page); + spin_unlock(&pcl->obj.lockref.lock); cond_resched(); goto repeat; } + pcl->compressed_bvecs[nr].page = page; + spin_unlock(&pcl->obj.lockref.lock); bvec->bv_page = page; out_tocache: if (!tocache || bs != PAGE_SIZE || @@ -1685,6 +1686,7 @@ submit_bio_retry: if (cur + bvec.bv_len > end) bvec.bv_len = end - cur; + DBG_BUGON(bvec.bv_len < sb->s_blocksize); if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len, bvec.bv_offset)) goto submit_bio_retry; From 1f4a994be2c3d13852fd5c1054f292bd303352cc Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Fri, 26 Jan 2024 17:20:00 +0800 Subject: [PATCH 438/768] riscv: dts: sophgo: separate sg2042 mtime and mtimecmp to fit aclint format Change the timer layout in the dtb to fit the format that needed by the SBI. Fixes: 967a94a92aaa ("riscv: dts: add initial Sophgo SG2042 SoC device tree") Reviewed-by: Chen Wang Reviewed-by: Guo Ren Signed-off-by: Inochi Amaoto Signed-off-by: Chen Wang Signed-off-by: Arnd Bergmann --- arch/riscv/boot/dts/sophgo/sg2042.dtsi | 80 +++++++++++++++----------- 1 file changed, 48 insertions(+), 32 deletions(-) diff --git a/arch/riscv/boot/dts/sophgo/sg2042.dtsi b/arch/riscv/boot/dts/sophgo/sg2042.dtsi index 93256540d078..ead1cc35d88b 100644 --- a/arch/riscv/boot/dts/sophgo/sg2042.dtsi +++ b/arch/riscv/boot/dts/sophgo/sg2042.dtsi @@ -93,144 +93,160 @@ <&cpu63_intc 3>; }; - clint_mtimer0: timer@70ac000000 { + clint_mtimer0: timer@70ac004000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac000000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac004000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu0_intc 7>, <&cpu1_intc 7>, <&cpu2_intc 7>, <&cpu3_intc 7>; }; - clint_mtimer1: timer@70ac010000 { + clint_mtimer1: timer@70ac014000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac010000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac014000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu4_intc 7>, <&cpu5_intc 7>, <&cpu6_intc 7>, <&cpu7_intc 7>; }; - clint_mtimer2: timer@70ac020000 { + clint_mtimer2: timer@70ac024000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac020000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac024000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu8_intc 7>, <&cpu9_intc 7>, <&cpu10_intc 7>, <&cpu11_intc 7>; }; - clint_mtimer3: timer@70ac030000 { + clint_mtimer3: timer@70ac034000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac030000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac034000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu12_intc 7>, <&cpu13_intc 7>, <&cpu14_intc 7>, <&cpu15_intc 7>; }; - clint_mtimer4: timer@70ac040000 { + clint_mtimer4: timer@70ac044000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac040000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac044000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu16_intc 7>, <&cpu17_intc 7>, <&cpu18_intc 7>, <&cpu19_intc 7>; }; - clint_mtimer5: timer@70ac050000 { + clint_mtimer5: timer@70ac054000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac050000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac054000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu20_intc 7>, <&cpu21_intc 7>, <&cpu22_intc 7>, <&cpu23_intc 7>; }; - clint_mtimer6: timer@70ac060000 { + clint_mtimer6: timer@70ac064000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac060000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac064000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu24_intc 7>, <&cpu25_intc 7>, <&cpu26_intc 7>, <&cpu27_intc 7>; }; - clint_mtimer7: timer@70ac070000 { + clint_mtimer7: timer@70ac074000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac070000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac074000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu28_intc 7>, <&cpu29_intc 7>, <&cpu30_intc 7>, <&cpu31_intc 7>; }; - clint_mtimer8: timer@70ac080000 { + clint_mtimer8: timer@70ac084000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac080000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac084000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu32_intc 7>, <&cpu33_intc 7>, <&cpu34_intc 7>, <&cpu35_intc 7>; }; - clint_mtimer9: timer@70ac090000 { + clint_mtimer9: timer@70ac094000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac090000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac094000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu36_intc 7>, <&cpu37_intc 7>, <&cpu38_intc 7>, <&cpu39_intc 7>; }; - clint_mtimer10: timer@70ac0a0000 { + clint_mtimer10: timer@70ac0a4000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac0a0000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac0a4000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu40_intc 7>, <&cpu41_intc 7>, <&cpu42_intc 7>, <&cpu43_intc 7>; }; - clint_mtimer11: timer@70ac0b0000 { + clint_mtimer11: timer@70ac0b4000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac0b0000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac0b4000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu44_intc 7>, <&cpu45_intc 7>, <&cpu46_intc 7>, <&cpu47_intc 7>; }; - clint_mtimer12: timer@70ac0c0000 { + clint_mtimer12: timer@70ac0c4000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac0c0000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac0c4000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu48_intc 7>, <&cpu49_intc 7>, <&cpu50_intc 7>, <&cpu51_intc 7>; }; - clint_mtimer13: timer@70ac0d0000 { + clint_mtimer13: timer@70ac0d4000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac0d0000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac0d4000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu52_intc 7>, <&cpu53_intc 7>, <&cpu54_intc 7>, <&cpu55_intc 7>; }; - clint_mtimer14: timer@70ac0e0000 { + clint_mtimer14: timer@70ac0e4000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac0e0000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac0e4000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu56_intc 7>, <&cpu57_intc 7>, <&cpu58_intc 7>, <&cpu59_intc 7>; }; - clint_mtimer15: timer@70ac0f0000 { + clint_mtimer15: timer@70ac0f4000 { compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer"; - reg = <0x00000070 0xac0f0000 0x00000000 0x00007ff8>; + reg = <0x00000070 0xac0f4000 0x00000000 0x0000c000>; + reg-names = "mtimecmp"; interrupts-extended = <&cpu60_intc 7>, <&cpu61_intc 7>, <&cpu62_intc 7>, From ff3d5d04db07e5374758baa7e877fde8d683ebab Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 13 Nov 2023 17:43:44 +0100 Subject: [PATCH 439/768] drm: bridge: samsung-dsim: Don't use FORCE_STOP_STATE The FORCE_STOP_STATE bit is unsuitable to force the DSI link into LP-11 mode. It seems the bridge internally queues DSI packets and when the FORCE_STOP_STATE bit is cleared, they are sent in close succession without any useful timing (this also means that the DSI lanes won't go into LP-11 mode). The length of this gibberish varies between 1ms and 5ms. This sometimes breaks an attached bridge (TI SN65DSI84 in this case). In our case, the bridge will fail in about 1 per 500 reboots. The FORCE_STOP_STATE handling was introduced to have the DSI lanes in LP-11 state during the .pre_enable phase. But as it turns out, none of this is needed at all. Between samsung_dsim_init() and samsung_dsim_set_display_enable() the lanes are already in LP-11 mode. The code as it was before commit 20c827683de0 ("drm: bridge: samsung-dsim: Fix init during host transfer") and 0c14d3130654 ("drm: bridge: samsung-dsim: Fix i.MX8M enable flow to meet spec") was correct in this regard. This patch basically reverts both commits. It was tested on an i.MX8M SoC with an SN65DSI84 bridge. The signals were probed and the DSI packets were decoded during initialization and link start-up. After this patch the first DSI packet on the link is a VSYNC packet and the timing is correct. Command mode between .pre_enable and .enable was also briefly tested by a quick hack. There was no DSI link partner which would have responded, but it was made sure the DSI packet was send on the link. As a side note, the command mode seems to just work in HS mode. I couldn't find that the bridge will handle commands in LP mode. Fixes: 20c827683de0 ("drm: bridge: samsung-dsim: Fix init during host transfer") Fixes: 0c14d3130654 ("drm: bridge: samsung-dsim: Fix i.MX8M enable flow to meet spec") Signed-off-by: Michael Walle Signed-off-by: Inki Dae Link: https://patchwork.freedesktop.org/patch/msgid/20231113164344.1612602-1-mwalle@kernel.org --- drivers/gpu/drm/bridge/samsung-dsim.c | 32 ++------------------------- 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c index be5914caa17d..63a1a0c88be4 100644 --- a/drivers/gpu/drm/bridge/samsung-dsim.c +++ b/drivers/gpu/drm/bridge/samsung-dsim.c @@ -969,10 +969,6 @@ static int samsung_dsim_init_link(struct samsung_dsim *dsi) reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG); reg &= ~DSIM_STOP_STATE_CNT_MASK; reg |= DSIM_STOP_STATE_CNT(driver_data->reg_values[STOP_STATE_CNT]); - - if (!samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) - reg |= DSIM_FORCE_STOP_STATE; - samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg); reg = DSIM_BTA_TIMEOUT(0xff) | DSIM_LPDR_TIMEOUT(0xffff); @@ -1431,18 +1427,6 @@ static void samsung_dsim_disable_irq(struct samsung_dsim *dsi) disable_irq(dsi->irq); } -static void samsung_dsim_set_stop_state(struct samsung_dsim *dsi, bool enable) -{ - u32 reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG); - - if (enable) - reg |= DSIM_FORCE_STOP_STATE; - else - reg &= ~DSIM_FORCE_STOP_STATE; - - samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg); -} - static int samsung_dsim_init(struct samsung_dsim *dsi) { const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; @@ -1492,9 +1476,6 @@ static void samsung_dsim_atomic_pre_enable(struct drm_bridge *bridge, ret = samsung_dsim_init(dsi); if (ret) return; - - samsung_dsim_set_display_mode(dsi); - samsung_dsim_set_display_enable(dsi, true); } } @@ -1503,12 +1484,8 @@ static void samsung_dsim_atomic_enable(struct drm_bridge *bridge, { struct samsung_dsim *dsi = bridge_to_dsi(bridge); - if (samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) { - samsung_dsim_set_display_mode(dsi); - samsung_dsim_set_display_enable(dsi, true); - } else { - samsung_dsim_set_stop_state(dsi, false); - } + samsung_dsim_set_display_mode(dsi); + samsung_dsim_set_display_enable(dsi, true); dsi->state |= DSIM_STATE_VIDOUT_AVAILABLE; } @@ -1521,9 +1498,6 @@ static void samsung_dsim_atomic_disable(struct drm_bridge *bridge, if (!(dsi->state & DSIM_STATE_ENABLED)) return; - if (!samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) - samsung_dsim_set_stop_state(dsi, true); - dsi->state &= ~DSIM_STATE_VIDOUT_AVAILABLE; } @@ -1828,8 +1802,6 @@ static ssize_t samsung_dsim_host_transfer(struct mipi_dsi_host *host, if (ret) return ret; - samsung_dsim_set_stop_state(dsi, false); - ret = mipi_dsi_create_packet(&xfer.packet, msg); if (ret < 0) return ret; From 690811f0128eb6034e5fd011c3c54878839b4014 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 25 Jan 2024 11:00:14 -0300 Subject: [PATCH 440/768] tools headers uapi: Sync linux/stat.h with the kernel sources to pick STATX_MNT_ID_UNIQUE To pick the changes from: 98d2b43081972abe ("add unique mount ID") That add STATX_MNT_ID_UNIQUE that was manually added to tools/perf/trace/beauty/statx.c, at some point this should move to the shell based automated way. This silences this perf build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/stat.h include/uapi/linux/stat.h Cc: Adrian Hunter Cc: Christian Brauner Cc: Ian Rogers Cc: Jiri Olsa Cc: Miklos Szeredi Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZbJq08s19890WDo-@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/stat.h | 1 + tools/perf/trace/beauty/statx.c | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 7cab2c65d3d7..2f2ee82d5517 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -154,6 +154,7 @@ struct statx { #define STATX_BTIME 0x00000800U /* Want/got stx_btime */ #define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */ #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ +#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ diff --git a/tools/perf/trace/beauty/statx.c b/tools/perf/trace/beauty/statx.c index 5f5320f7c6e2..dc5943a6352d 100644 --- a/tools/perf/trace/beauty/statx.c +++ b/tools/perf/trace/beauty/statx.c @@ -67,6 +67,7 @@ size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_a P_FLAG(BTIME); P_FLAG(MNT_ID); P_FLAG(DIOALIGN); + P_FLAG(MNT_ID_UNIQUE); #undef P_FLAG From 174372668933ede569b9e56eab26971669a6a0a9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 25 Jan 2024 11:08:44 -0300 Subject: [PATCH 441/768] tools arch x86: Sync the msr-index.h copy with the kernel sources to pick IA32_MKTME_KEYID_PARTITIONING To pick up the changes in: 765a0542fdc7aad7 ("x86/virt/tdx: Detect TDX during kernel boot") Addressing this tools/perf build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h That makes the beautification scripts to pick some new entries: $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after $ diff -u before after --- before 2024-01-25 11:08:12.363223880 -0300 +++ after 2024-01-25 11:08:24.839307699 -0300 @@ -21,6 +21,7 @@ [0x0000004f] = "PPIN", [0x00000060] = "LBR_CORE_TO", [0x00000079] = "IA32_UCODE_WRITE", + [0x00000087] = "IA32_MKTME_KEYID_PARTITIONING", [0x0000008b] = "AMD64_PATCH_LEVEL", [0x0000008C] = "IA32_SGXLEPUBKEYHASH0", [0x0000008D] = "IA32_SGXLEPUBKEYHASH1", $ Now one can trace systemwide asking to see backtraces to where that MSR is being read/written, see this example with a previous update: # perf trace -e msr:*_msr/max-stack=32/ --filter="msr==IA32_MKTME_KEYID_PARTITIONING" ^C# If we use -v (verbose mode) we can see what it does behind the scenes: # perf trace -v -e msr:*_msr/max-stack=32/ --filter="msr==IA32_MKTME_KEYID_PARTITIONING" Using CPUID GenuineIntel-6-8E-A 0x87 New filter for msr:read_msr: (msr==0x87) && (common_pid != 58627 && common_pid != 3792) 0x87 New filter for msr:write_msr: (msr==0x87) && (common_pid != 58627 && common_pid != 3792) mmap size 528384B ^C# Example with a frequent msr: # perf trace -v -e msr:*_msr/max-stack=32/ --filter="msr==IA32_SPEC_CTRL" --max-events 2 Using CPUID AuthenticAMD-25-21-0 0x48 New filter for msr:read_msr: (msr==0x48) && (common_pid != 2612129 && common_pid != 3841) 0x48 New filter for msr:write_msr: (msr==0x48) && (common_pid != 2612129 && common_pid != 3841) mmap size 528384B Looking at the vmlinux_path (8 entries long) symsrc__init: build id mismatch for vmlinux. Using /proc/kcore for kernel data Using /proc/kallsyms for symbols 0.000 Timer/2525383 msr:write_msr(msr: IA32_SPEC_CTRL, val: 6) do_trace_write_msr ([kernel.kallsyms]) do_trace_write_msr ([kernel.kallsyms]) __switch_to_xtra ([kernel.kallsyms]) __switch_to ([kernel.kallsyms]) __schedule ([kernel.kallsyms]) schedule ([kernel.kallsyms]) futex_wait_queue_me ([kernel.kallsyms]) futex_wait ([kernel.kallsyms]) do_futex ([kernel.kallsyms]) __x64_sys_futex ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) __futex_abstimed_wait_common64 (/usr/lib64/libpthread-2.33.so) 0.030 :0/0 msr:write_msr(msr: IA32_SPEC_CTRL, val: 2) do_trace_write_msr ([kernel.kallsyms]) do_trace_write_msr ([kernel.kallsyms]) __switch_to_xtra ([kernel.kallsyms]) __switch_to ([kernel.kallsyms]) __schedule ([kernel.kallsyms]) schedule_idle ([kernel.kallsyms]) do_idle ([kernel.kallsyms]) cpu_startup_entry ([kernel.kallsyms]) secondary_startup_64_no_verify ([kernel.kallsyms]) # Cc: Adrian Hunter Cc: Dave Hansen Cc: Ian Rogers Cc: Jiri Olsa Cc: Kai Huang Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZbJt27rjkQVU1YoP@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/msr-index.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 1d51e1850ed0..f1bd7b91b3c6 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -237,6 +237,11 @@ #define LBR_INFO_CYCLES 0xffff #define LBR_INFO_BR_TYPE_OFFSET 56 #define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET) +#define LBR_INFO_BR_CNTR_OFFSET 32 +#define LBR_INFO_BR_CNTR_NUM 4 +#define LBR_INFO_BR_CNTR_BITS 2 +#define LBR_INFO_BR_CNTR_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_BITS - 1, 0) +#define LBR_INFO_BR_CNTR_FULL_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS - 1, 0) #define MSR_ARCH_LBR_CTL 0x000014ce #define ARCH_LBR_CTL_LBREN BIT(0) @@ -536,6 +541,9 @@ #define MSR_RELOAD_PMC0 0x000014c1 #define MSR_RELOAD_FIXED_CTR0 0x00001309 +/* KeyID partitioning between MKTME and TDX */ +#define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087 + /* * AMD64 MSRs. Not complete. See the architecture manual for a more * complete list. From b0dc99215598d7fc7c6903437d49f0b33d1ef899 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 25 Jan 2024 11:23:56 -0300 Subject: [PATCH 442/768] tools headers UAPI: Sync linux/fcntl.h with the kernel sources To get the changes in: 8a924db2d7b5eb69 ("fs: Pass AT_GETATTR_NOSEC flag to getattr interface function") That don't add anything that is handled by existing hard coded tables or table generation scripts. This silences this perf build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/fcntl.h include/uapi/linux/fcntl.h Cc: Adrian Hunter Cc: Christian Brauner Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stefan Berger Link: https://lore.kernel.org/lkml/ZbJv9fGF_k2xXEdr@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/fcntl.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h index 6c80f96049bd..282e90aeb163 100644 --- a/tools/include/uapi/linux/fcntl.h +++ b/tools/include/uapi/linux/fcntl.h @@ -116,5 +116,8 @@ #define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to compare object identity and may not be usable to open_by_handle_at(2) */ +#if defined(__KERNEL__) +#define AT_GETATTR_NOSEC 0x80000000 +#endif #endif /* _UAPI_LINUX_FCNTL_H */ From 2dac1f089add90a45d93fe8217938281532b86c7 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 25 Jan 2024 11:03:51 +0100 Subject: [PATCH 443/768] perf test: Fix 'perf script' tests on s390 In linux next repo, test case 'perf script tests' fails on s390. The root case is a command line invocation of 'perf record' with call-graph information. On s390 only DWARF formatted call-graphs are supported and only on software events. Change the command line parameters for s390. Output before: # perf test 89 89: perf script tests : FAILED! # Output after: # perf test 89 89: perf script tests : Ok # Fixes: 0dd5041c9a0eaf8c ("perf addr_location: Add init/exit/copy functions") Reviewed-by: Ian Rogers Signed-off-by: Thomas Richter Cc: Heiko Carstens Cc: Namhyung Kim Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Link: https://lore.kernel.org/r/20240125100351.936262-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/script.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/script.sh b/tools/perf/tests/shell/script.sh index 5ae7bd0031a8..2973adab445d 100755 --- a/tools/perf/tests/shell/script.sh +++ b/tools/perf/tests/shell/script.sh @@ -54,7 +54,14 @@ def sample_table(*args): def call_path_table(*args): print(f'call_path_table({args}') _end_of_file_ - perf record -g -o "${perfdatafile}" true + case $(uname -m) + in s390x) + cmd_flags="--call-graph dwarf -e cpu-clock";; + *) + cmd_flags="-g";; + esac + + perf record $cmd_flags -o "${perfdatafile}" true perf script -i "${perfdatafile}" -s "${db_test}" echo "DB test [Success]" } From 9d95c6be48fc8d3d622658da8fbd6d6787d5c2e7 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 23 Jan 2024 20:30:11 -0800 Subject: [PATCH 444/768] perf list: Switch error message to pr_err() to respect debug settings (-v) Using printf() can interrupt 'perf list output', use pr_err() which can respect debug settings and the debug file. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Mark Rutland Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Ross Zwisler Cc: Shirisha G Link: https://lore.kernel.org/r/20240124043015.1388867-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/print-events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index b0fc48be623f..9e47712507cc 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -66,7 +66,7 @@ void print_tracepoint_events(const struct print_callbacks *print_cb __maybe_unus put_tracing_file(events_path); if (events_fd < 0) { - printf("Error: failed to open tracing events directory\n"); + pr_err("Error: failed to open tracing events directory\n"); return; } From 79bacb6ad73ce63faba3564671c2028e6c3fa1e2 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 23 Jan 2024 20:30:12 -0800 Subject: [PATCH 445/768] perf list: Add output file option Add an option to write the 'perf list' output to a specific file. This can avoid issues with debug output being written into the output stream. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Mark Rutland Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Ross Zwisler Cc: Shirisha G Link: https://lore.kernel.org/r/20240124043015.1388867-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 4 + tools/perf/builtin-list.c | 211 +++++++++++++++---------- 2 files changed, 133 insertions(+), 82 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 1b90575ee3c8..3b12595193c9 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -47,6 +47,10 @@ Print PMU events and metrics limited to the specific PMU name. --json:: Output in JSON format. +-o:: +--output=:: + Output file name. By default output is written to stdout. + [[EVENT_MODIFIERS]] EVENT MODIFIERS --------------- diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 61c2c96cc070..e27a1b1288c2 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -30,6 +30,8 @@ * functions. */ struct print_state { + /** @fp: File to write output to. */ + FILE *fp; /** * @pmu_glob: Optionally restrict PMU and metric matching to PMU or * debugfs subsystem name. @@ -66,13 +68,15 @@ static void default_print_start(void *ps) { struct print_state *print_state = ps; - if (!print_state->name_only && pager_in_use()) - printf("\nList of pre-defined events (to be used in -e or -M):\n\n"); + if (!print_state->name_only && pager_in_use()) { + fprintf(print_state->fp, + "\nList of pre-defined events (to be used in -e or -M):\n\n"); + } } static void default_print_end(void *print_state __maybe_unused) {} -static void wordwrap(const char *s, int start, int max, int corr) +static void wordwrap(FILE *fp, const char *s, int start, int max, int corr) { int column = start; int n; @@ -82,10 +86,10 @@ static void wordwrap(const char *s, int start, int max, int corr) int wlen = strcspn(s, " \t\n"); if ((column + wlen >= max && column > start) || saw_newline) { - printf("\n%*s", start, ""); + fprintf(fp, "\n%*s", start, ""); column = start + corr; } - n = printf("%s%.*s", column > start ? " " : "", wlen, s); + n = fprintf(fp, "%s%.*s", column > start ? " " : "", wlen, s); if (n <= 0) break; saw_newline = s[wlen] == '\n'; @@ -104,6 +108,7 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi { struct print_state *print_state = ps; int pos; + FILE *fp = print_state->fp; if (deprecated && !print_state->deprecated) return; @@ -119,30 +124,30 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi if (print_state->name_only) { if (event_alias && strlen(event_alias)) - printf("%s ", event_alias); + fprintf(fp, "%s ", event_alias); else - printf("%s ", event_name); + fprintf(fp, "%s ", event_name); return; } if (strcmp(print_state->last_topic, topic ?: "")) { if (topic) - printf("\n%s:\n", topic); + fprintf(fp, "\n%s:\n", topic); zfree(&print_state->last_topic); print_state->last_topic = strdup(topic ?: ""); } if (event_alias && strlen(event_alias)) - pos = printf(" %s OR %s", event_name, event_alias); + pos = fprintf(fp, " %s OR %s", event_name, event_alias); else - pos = printf(" %s", event_name); + pos = fprintf(fp, " %s", event_name); if (!topic && event_type_desc) { for (; pos < 53; pos++) - putchar(' '); - printf("[%s]\n", event_type_desc); + fputc(' ', fp); + fprintf(fp, "[%s]\n", event_type_desc); } else - putchar('\n'); + fputc('\n', fp); if (desc && print_state->desc) { char *desc_with_unit = NULL; @@ -155,22 +160,22 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi ? "%s. Unit: %s" : "%s Unit: %s", desc, pmu_name); } - printf("%*s", 8, "["); - wordwrap(desc_len > 0 ? desc_with_unit : desc, 8, pager_get_columns(), 0); - printf("]\n"); + fprintf(fp, "%*s", 8, "["); + wordwrap(fp, desc_len > 0 ? desc_with_unit : desc, 8, pager_get_columns(), 0); + fprintf(fp, "]\n"); free(desc_with_unit); } long_desc = long_desc ?: desc; if (long_desc && print_state->long_desc) { - printf("%*s", 8, "["); - wordwrap(long_desc, 8, pager_get_columns(), 0); - printf("]\n"); + fprintf(fp, "%*s", 8, "["); + wordwrap(fp, long_desc, 8, pager_get_columns(), 0); + fprintf(fp, "]\n"); } if (print_state->detailed && encoding_desc) { - printf("%*s", 8, ""); - wordwrap(encoding_desc, 8, pager_get_columns(), 0); - putchar('\n'); + fprintf(fp, "%*s", 8, ""); + wordwrap(fp, encoding_desc, 8, pager_get_columns(), 0); + fputc('\n', fp); } } @@ -184,6 +189,7 @@ static void default_print_metric(void *ps, const char *unit __maybe_unused) { struct print_state *print_state = ps; + FILE *fp = print_state->fp; if (print_state->event_glob && (!print_state->metrics || !name || !strglobmatch(name, print_state->event_glob)) && @@ -192,27 +198,27 @@ static void default_print_metric(void *ps, if (!print_state->name_only && !print_state->last_metricgroups) { if (print_state->metricgroups) { - printf("\nMetric Groups:\n"); + fprintf(fp, "\nMetric Groups:\n"); if (!print_state->metrics) - putchar('\n'); + fputc('\n', fp); } else { - printf("\nMetrics:\n\n"); + fprintf(fp, "\nMetrics:\n\n"); } } if (!print_state->last_metricgroups || strcmp(print_state->last_metricgroups, group ?: "")) { if (group && print_state->metricgroups) { if (print_state->name_only) - printf("%s ", group); + fprintf(fp, "%s ", group); else if (print_state->metrics) { const char *gdesc = describe_metricgroup(group); if (gdesc) - printf("\n%s: [%s]\n", group, gdesc); + fprintf(fp, "\n%s: [%s]\n", group, gdesc); else - printf("\n%s:\n", group); + fprintf(fp, "\n%s:\n", group); } else - printf("%s\n", group); + fprintf(fp, "%s\n", group); } zfree(&print_state->last_metricgroups); print_state->last_metricgroups = strdup(group ?: ""); @@ -223,53 +229,59 @@ static void default_print_metric(void *ps, if (print_state->name_only) { if (print_state->metrics && !strlist__has_entry(print_state->visited_metrics, name)) { - printf("%s ", name); + fprintf(fp, "%s ", name); strlist__add(print_state->visited_metrics, name); } return; } - printf(" %s\n", name); + fprintf(fp, " %s\n", name); if (desc && print_state->desc) { - printf("%*s", 8, "["); - wordwrap(desc, 8, pager_get_columns(), 0); - printf("]\n"); + fprintf(fp, "%*s", 8, "["); + wordwrap(fp, desc, 8, pager_get_columns(), 0); + fprintf(fp, "]\n"); } if (long_desc && print_state->long_desc) { - printf("%*s", 8, "["); - wordwrap(long_desc, 8, pager_get_columns(), 0); - printf("]\n"); + fprintf(fp, "%*s", 8, "["); + wordwrap(fp, long_desc, 8, pager_get_columns(), 0); + fprintf(fp, "]\n"); } if (expr && print_state->detailed) { - printf("%*s", 8, "["); - wordwrap(expr, 8, pager_get_columns(), 0); - printf("]\n"); + fprintf(fp, "%*s", 8, "["); + wordwrap(fp, expr, 8, pager_get_columns(), 0); + fprintf(fp, "]\n"); } if (threshold && print_state->detailed) { - printf("%*s", 8, "["); - wordwrap(threshold, 8, pager_get_columns(), 0); - printf("]\n"); + fprintf(fp, "%*s", 8, "["); + wordwrap(fp, threshold, 8, pager_get_columns(), 0); + fprintf(fp, "]\n"); } } struct json_print_state { + /** @fp: File to write output to. */ + FILE *fp; /** Should a separator be printed prior to the next item? */ bool need_sep; }; -static void json_print_start(void *print_state __maybe_unused) +static void json_print_start(void *ps) { - printf("[\n"); + struct json_print_state *print_state = ps; + FILE *fp = print_state->fp; + + fprintf(fp, "[\n"); } static void json_print_end(void *ps) { struct json_print_state *print_state = ps; + FILE *fp = print_state->fp; - printf("%s]\n", print_state->need_sep ? "\n" : ""); + fprintf(fp, "%s]\n", print_state->need_sep ? "\n" : ""); } -static void fix_escape_printf(struct strbuf *buf, const char *fmt, ...) +static void fix_escape_fprintf(FILE *fp, struct strbuf *buf, const char *fmt, ...) { va_list args; @@ -318,7 +330,7 @@ static void fix_escape_printf(struct strbuf *buf, const char *fmt, ...) } } va_end(args); - fputs(buf->buf, stdout); + fputs(buf->buf, fp); } static void json_print_event(void *ps, const char *pmu_name, const char *topic, @@ -330,60 +342,71 @@ static void json_print_event(void *ps, const char *pmu_name, const char *topic, { struct json_print_state *print_state = ps; bool need_sep = false; + FILE *fp = print_state->fp; struct strbuf buf; strbuf_init(&buf, 0); - printf("%s{\n", print_state->need_sep ? ",\n" : ""); + fprintf(fp, "%s{\n", print_state->need_sep ? ",\n" : ""); print_state->need_sep = true; if (pmu_name) { - fix_escape_printf(&buf, "\t\"Unit\": \"%S\"", pmu_name); + fix_escape_fprintf(fp, &buf, "\t\"Unit\": \"%S\"", pmu_name); need_sep = true; } if (topic) { - fix_escape_printf(&buf, "%s\t\"Topic\": \"%S\"", need_sep ? ",\n" : "", topic); + fix_escape_fprintf(fp, &buf, "%s\t\"Topic\": \"%S\"", + need_sep ? ",\n" : "", + topic); need_sep = true; } if (event_name) { - fix_escape_printf(&buf, "%s\t\"EventName\": \"%S\"", need_sep ? ",\n" : "", - event_name); + fix_escape_fprintf(fp, &buf, "%s\t\"EventName\": \"%S\"", + need_sep ? ",\n" : "", + event_name); need_sep = true; } if (event_alias && strlen(event_alias)) { - fix_escape_printf(&buf, "%s\t\"EventAlias\": \"%S\"", need_sep ? ",\n" : "", - event_alias); + fix_escape_fprintf(fp, &buf, "%s\t\"EventAlias\": \"%S\"", + need_sep ? ",\n" : "", + event_alias); need_sep = true; } if (scale_unit && strlen(scale_unit)) { - fix_escape_printf(&buf, "%s\t\"ScaleUnit\": \"%S\"", need_sep ? ",\n" : "", - scale_unit); + fix_escape_fprintf(fp, &buf, "%s\t\"ScaleUnit\": \"%S\"", + need_sep ? ",\n" : "", + scale_unit); need_sep = true; } if (event_type_desc) { - fix_escape_printf(&buf, "%s\t\"EventType\": \"%S\"", need_sep ? ",\n" : "", - event_type_desc); + fix_escape_fprintf(fp, &buf, "%s\t\"EventType\": \"%S\"", + need_sep ? ",\n" : "", + event_type_desc); need_sep = true; } if (deprecated) { - fix_escape_printf(&buf, "%s\t\"Deprecated\": \"%S\"", need_sep ? ",\n" : "", - deprecated ? "1" : "0"); + fix_escape_fprintf(fp, &buf, "%s\t\"Deprecated\": \"%S\"", + need_sep ? ",\n" : "", + deprecated ? "1" : "0"); need_sep = true; } if (desc) { - fix_escape_printf(&buf, "%s\t\"BriefDescription\": \"%S\"", need_sep ? ",\n" : "", - desc); + fix_escape_fprintf(fp, &buf, "%s\t\"BriefDescription\": \"%S\"", + need_sep ? ",\n" : "", + desc); need_sep = true; } if (long_desc) { - fix_escape_printf(&buf, "%s\t\"PublicDescription\": \"%S\"", need_sep ? ",\n" : "", - long_desc); + fix_escape_fprintf(fp, &buf, "%s\t\"PublicDescription\": \"%S\"", + need_sep ? ",\n" : "", + long_desc); need_sep = true; } if (encoding_desc) { - fix_escape_printf(&buf, "%s\t\"Encoding\": \"%S\"", need_sep ? ",\n" : "", - encoding_desc); + fix_escape_fprintf(fp, &buf, "%s\t\"Encoding\": \"%S\"", + need_sep ? ",\n" : "", + encoding_desc); need_sep = true; } - printf("%s}", need_sep ? "\n" : ""); + fprintf(fp, "%s}", need_sep ? "\n" : ""); strbuf_release(&buf); } @@ -394,43 +417,53 @@ static void json_print_metric(void *ps __maybe_unused, const char *group, { struct json_print_state *print_state = ps; bool need_sep = false; + FILE *fp = print_state->fp; struct strbuf buf; strbuf_init(&buf, 0); - printf("%s{\n", print_state->need_sep ? ",\n" : ""); + fprintf(fp, "%s{\n", print_state->need_sep ? ",\n" : ""); print_state->need_sep = true; if (group) { - fix_escape_printf(&buf, "\t\"MetricGroup\": \"%S\"", group); + fix_escape_fprintf(fp, &buf, "\t\"MetricGroup\": \"%S\"", group); need_sep = true; } if (name) { - fix_escape_printf(&buf, "%s\t\"MetricName\": \"%S\"", need_sep ? ",\n" : "", name); + fix_escape_fprintf(fp, &buf, "%s\t\"MetricName\": \"%S\"", + need_sep ? ",\n" : "", + name); need_sep = true; } if (expr) { - fix_escape_printf(&buf, "%s\t\"MetricExpr\": \"%S\"", need_sep ? ",\n" : "", expr); + fix_escape_fprintf(fp, &buf, "%s\t\"MetricExpr\": \"%S\"", + need_sep ? ",\n" : "", + expr); need_sep = true; } if (threshold) { - fix_escape_printf(&buf, "%s\t\"MetricThreshold\": \"%S\"", need_sep ? ",\n" : "", - threshold); + fix_escape_fprintf(fp, &buf, "%s\t\"MetricThreshold\": \"%S\"", + need_sep ? ",\n" : "", + threshold); need_sep = true; } if (unit) { - fix_escape_printf(&buf, "%s\t\"ScaleUnit\": \"%S\"", need_sep ? ",\n" : "", unit); + fix_escape_fprintf(fp, &buf, "%s\t\"ScaleUnit\": \"%S\"", + need_sep ? ",\n" : "", + unit); need_sep = true; } if (desc) { - fix_escape_printf(&buf, "%s\t\"BriefDescription\": \"%S\"", need_sep ? ",\n" : "", - desc); + fix_escape_fprintf(fp, &buf, "%s\t\"BriefDescription\": \"%S\"", + need_sep ? ",\n" : "", + desc); need_sep = true; } if (long_desc) { - fix_escape_printf(&buf, "%s\t\"PublicDescription\": \"%S\"", need_sep ? ",\n" : "", - long_desc); + fix_escape_fprintf(fp, &buf, "%s\t\"PublicDescription\": \"%S\"", + need_sep ? ",\n" : "", + long_desc); need_sep = true; } - printf("%s}", need_sep ? "\n" : ""); + fprintf(fp, "%s}", need_sep ? "\n" : ""); strbuf_release(&buf); } @@ -449,8 +482,12 @@ static bool default_skip_duplicate_pmus(void *ps) int cmd_list(int argc, const char **argv) { int i, ret = 0; - struct print_state default_ps = {}; - struct print_state json_ps = {}; + struct print_state default_ps = { + .fp = stdout, + }; + struct print_state json_ps = { + .fp = stdout, + }; void *ps = &default_ps; struct print_callbacks print_cb = { .print_start = default_print_start, @@ -461,6 +498,7 @@ int cmd_list(int argc, const char **argv) }; const char *cputype = NULL; const char *unit_name = NULL; + const char *output_path = NULL; bool json = false; struct option list_options[] = { OPT_BOOLEAN(0, "raw-dump", &default_ps.name_only, "Dump raw events"), @@ -471,6 +509,7 @@ int cmd_list(int argc, const char **argv) "Print longer event descriptions."), OPT_BOOLEAN(0, "details", &default_ps.detailed, "Print information on the perf event names and expressions used internally by events."), + OPT_STRING('o', "output", &output_path, "file", "output file name"), OPT_BOOLEAN(0, "deprecated", &default_ps.deprecated, "Print deprecated events."), OPT_STRING(0, "cputype", &cputype, "cpu type", @@ -497,6 +536,11 @@ int cmd_list(int argc, const char **argv) argc = parse_options(argc, argv, list_options, list_usage, PARSE_OPT_STOP_AT_NON_OPTION); + if (output_path) { + default_ps.fp = fopen(output_path, "w"); + json_ps.fp = default_ps.fp; + } + setup_pager(); if (!default_ps.name_only) @@ -618,5 +662,8 @@ out: free(default_ps.last_topic); free(default_ps.last_metricgroups); strlist__delete(default_ps.visited_metrics); + if (output_path) + fclose(default_ps.fp); + return ret; } From a734c7f969750302b1150ef20468704498f74e64 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 23 Jan 2024 20:30:13 -0800 Subject: [PATCH 446/768] perf test: Workaround debug output in list test Write the JSON output to a specific file to avoid debug output breaking it. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Mark Rutland Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Ross Zwisler Cc: Shirisha G Link: https://lore.kernel.org/r/20240124043015.1388867-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/list.sh | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/shell/list.sh b/tools/perf/tests/shell/list.sh index 22b004f2b23e..8a868ae64560 100755 --- a/tools/perf/tests/shell/list.sh +++ b/tools/perf/tests/shell/list.sh @@ -3,17 +3,32 @@ # SPDX-License-Identifier: GPL-2.0 set -e -err=0 shelldir=$(dirname "$0") # shellcheck source=lib/setup_python.sh . "${shelldir}"/lib/setup_python.sh +list_output=$(mktemp /tmp/__perf_test.list_output.json.XXXXX) + +cleanup() { + rm -f "${list_output}" + + trap - EXIT TERM INT +} + +trap_cleanup() { + cleanup + exit 1 +} +trap trap_cleanup EXIT TERM INT + test_list_json() { echo "Json output test" - perf list -j | $PYTHON -m json.tool + perf list -j -o "${list_output}" + $PYTHON -m json.tool "${list_output}" echo "Json output test [Success]" } test_list_json -exit $err +cleanup +exit 0 From 1c2124ec8431f88f5c122be73cb4b784b558d600 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 23 Jan 2024 20:30:14 -0800 Subject: [PATCH 447/768] perf test shell script: Fix test for python being disabled "grep -cv" can exit with an error code that causes the "set -e" to abort the script. Switch to using the grep exit code in the if condition to avoid this. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Mark Rutland Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Ross Zwisler Cc: Shirisha G Link: https://lore.kernel.org/r/20240124043015.1388867-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/script.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/tests/shell/script.sh b/tools/perf/tests/shell/script.sh index 2973adab445d..fa4d71e2e72a 100755 --- a/tools/perf/tests/shell/script.sh +++ b/tools/perf/tests/shell/script.sh @@ -36,8 +36,7 @@ test_db() echo "DB test" # Check if python script is supported - libpython=$(perf version --build-options | grep python | grep -cv OFF) - if [ "${libpython}" != "1" ] ; then + if perf version --build-options | grep python | grep -q OFF ; then echo "SKIP: python scripting is not supported" err=2 return From 9a8dd2f24d1cb8e8df2c5d18b4973298db1006e1 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 23 Jan 2024 20:30:15 -0800 Subject: [PATCH 448/768] perf test shell daemon: Make signal test less racy The daemon signal test sends signals and then expects files to be written. It was observed on an Intel Alderlake that the signals were sent too quickly leading to the 3 expected files not appearing. To avoid this send the next signal only after the expected previous file has appeared. To avoid an infinite loop the number of retries is limited. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Kajol Jain Cc: Kan Liang Cc: Mark Rutland Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Ross Zwisler Cc: Shirisha G Link: https://lore.kernel.org/r/20240124043015.1388867-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/daemon.sh | 34 ++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/tools/perf/tests/shell/daemon.sh b/tools/perf/tests/shell/daemon.sh index 4c598cfc5afa..e5fa8d6f9eb1 100755 --- a/tools/perf/tests/shell/daemon.sh +++ b/tools/perf/tests/shell/daemon.sh @@ -414,16 +414,30 @@ EOF # start daemon daemon_start ${config} test - # send 2 signals - perf daemon signal --config ${config} --session test - perf daemon signal --config ${config} - - # stop daemon - daemon_exit ${config} - - # count is 2 perf.data for signals and 1 for perf record finished - count=`ls ${base}/session-test/*perf.data* | wc -l` - if [ ${count} -ne 3 ]; then + # send 2 signals then exit. Do this in a loop watching the number of + # files to avoid races. If the loop retries more than 600 times then + # give up. + local retries=0 + local signals=0 + local success=0 + while [ ${retries} -lt 600 ] && [ ${success} -eq 0 ]; do + local files + files=`ls ${base}/session-test/*perf.data* 2> /dev/null | wc -l` + if [ ${signals} -eq 0 ]; then + perf daemon signal --config ${config} --session test + signals=1 + elif [ ${signals} -eq 1 ] && [ $files -ge 1 ]; then + perf daemon signal --config ${config} + signals=2 + elif [ ${signals} -eq 2 ] && [ $files -ge 2 ]; then + daemon_exit ${config} + signals=3 + elif [ ${signals} -eq 3 ] && [ $files -ge 3 ]; then + success=1 + fi + retries=$((${retries} +1)) + done + if [ ${success} -eq 0 ]; then error=1 echo "FAILED: perf data no generated" fi From 61f61c89fa5d9925e0b874854fb62e51948a6de1 Mon Sep 17 00:00:00 2001 From: Andreas Larsson Date: Mon, 15 Jan 2024 16:02:00 +0100 Subject: [PATCH 449/768] MAINTAINERS: Add Andreas Larsson as co-maintainer for arch/sparc Dave has not been very active on arch/sparc for the past two years. I have been contributing to the SPARC32 port as well as maintaining out-of-tree SPARC32 patches for LEON3/4/5 (SPARCv8 with CAS support) since 2012. I am willing to step up as an arch/sparc (co-)maintainer. For recent discussions on the matter, see [1] and [2]. [1] https://lore.kernel.org/r/20230713075235.2164609-1-u.kleine-koenig@pengutronix.de [2] https://lore.kernel.org/r/20231209105816.GA1085691@ravnborg.org/ Signed-off-by: Andreas Larsson Suggested-by: Arnd Bergmann Acked-by: Sam Ravnborg Acked-by: John Paul Adrian Glaubitz Acked-by: Jose E. Marchesi Signed-off-by: Arnd Bergmann --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a69..542ab762be7d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20549,6 +20549,7 @@ F: Documentation/translations/sp_SP/ SPARC + UltraSPARC (sparc/sparc64) M: "David S. Miller" +M: Andreas Larsson L: sparclinux@vger.kernel.org S: Maintained Q: http://patchwork.ozlabs.org/project/sparclinux/list/ From 1233d1d54b7f66813cfa748aaaeca8c4f9c36c6b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 26 Jan 2024 11:00:01 -0300 Subject: [PATCH 450/768] tools headers UAPI: Update tools's copy of drm.h headers to pick DRM_IOCTL_MODE_CLOSEFB Picking the changes from: 8570c27932e132d2 ("drm/syncobj: Add deadline support for syncobj waits") 9724ed6c1b1212d1 ("drm: Introduce DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT") e4d983acffff270c ("drm: introduce DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP") d208d875667e2a29 ("drm: introduce CLOSEFB IOCTL") afa5cf3175a22b71 ("drm/i915/uapi: fix typos/spellos and punctuation") Addressing these perf build warnings: Warning: Kernel ABI header differences: Now 'perf trace' and other code that might use the tools/perf/trace/beauty autogenerated tables will be able to translate this new ioctl command into a string: $ tools/perf/trace/beauty/drm_ioctl.sh > before $ cp include/uapi/drm/drm.h tools/include/uapi/drm/drm.h $ tools/perf/trace/beauty/drm_ioctl.sh > after $ diff -u before after --- before 2024-01-26 10:54:23.486381862 -0300 +++ after 2024-01-26 10:54:35.767902442 -0300 @@ -109,6 +109,7 @@ [0xCD] = "SYNCOBJ_TIMELINE_SIGNAL", [0xCE] = "MODE_GETFB2", [0xCF] = "SYNCOBJ_EVENTFD", + [0xD0] = "MODE_CLOSEFB", [DRM_COMMAND_BASE + 0x00] = "I915_INIT", [DRM_COMMAND_BASE + 0x01] = "I915_FLUSH", [DRM_COMMAND_BASE + 0x02] = "I915_FLIP", $ Cc: Adrian Hunter Cc: Dmitry Baryshkov Cc: Ian Rogers Cc: Javier Martinez Canillas Cc: Jiri Olsa Cc: Namhyung Kim Cc: Randy Dunlap Cc: Rob Clark Cc: Simon Ser Cc: Tvrtko Ursulin Cc: Zack Rusin Link: https://lore.kernel.org/lkml/ZbPIN9Dcc5AM0uxo@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/drm.h | 72 ++++++++++++++++++++++++++++++- tools/include/uapi/drm/i915_drm.h | 12 +++--- 2 files changed, 77 insertions(+), 7 deletions(-) diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index de723566c5ae..16122819edfe 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -713,7 +713,8 @@ struct drm_gem_open { /** * DRM_CAP_ASYNC_PAGE_FLIP * - * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC. + * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for legacy + * page-flips. */ #define DRM_CAP_ASYNC_PAGE_FLIP 0x7 /** @@ -773,6 +774,13 @@ struct drm_gem_open { * :ref:`drm_sync_objects`. */ #define DRM_CAP_SYNCOBJ_TIMELINE 0x14 +/** + * DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP + * + * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for atomic + * commits. + */ +#define DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP 0x15 /* DRM_IOCTL_GET_CAP ioctl argument type */ struct drm_get_cap { @@ -842,6 +850,31 @@ struct drm_get_cap { */ #define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5 +/** + * DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT + * + * Drivers for para-virtualized hardware (e.g. vmwgfx, qxl, virtio and + * virtualbox) have additional restrictions for cursor planes (thus + * making cursor planes on those drivers not truly universal,) e.g. + * they need cursor planes to act like one would expect from a mouse + * cursor and have correctly set hotspot properties. + * If this client cap is not set the DRM core will hide cursor plane on + * those virtualized drivers because not setting it implies that the + * client is not capable of dealing with those extra restictions. + * Clients which do set cursor hotspot and treat the cursor plane + * like a mouse cursor should set this property. + * The client must enable &DRM_CLIENT_CAP_ATOMIC first. + * + * Setting this property on drivers which do not special case + * cursor planes (i.e. non-virtualized drivers) will return + * EOPNOTSUPP, which can be used by userspace to gauge + * requirements of the hardware/drivers they're running on. + * + * This capability is always supported for atomic-capable virtualized + * drivers starting from kernel version 6.6. + */ +#define DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT 6 + /* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ struct drm_set_client_cap { __u64 capability; @@ -893,6 +926,7 @@ struct drm_syncobj_transfer { #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) /* wait for time point to become available */ +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE (1 << 3) /* set fence deadline to deadline_nsec */ struct drm_syncobj_wait { __u64 handles; /* absolute timeout */ @@ -901,6 +935,14 @@ struct drm_syncobj_wait { __u32 flags; __u32 first_signaled; /* only valid when not waiting all */ __u32 pad; + /** + * @deadline_nsec - fence deadline hint + * + * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing + * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is + * set. + */ + __u64 deadline_nsec; }; struct drm_syncobj_timeline_wait { @@ -913,6 +955,14 @@ struct drm_syncobj_timeline_wait { __u32 flags; __u32 first_signaled; /* only valid when not waiting all */ __u32 pad; + /** + * @deadline_nsec - fence deadline hint + * + * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing + * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is + * set. + */ + __u64 deadline_nsec; }; /** @@ -1218,6 +1268,26 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd) +/** + * DRM_IOCTL_MODE_CLOSEFB - Close a framebuffer. + * + * This closes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL + * argument is a framebuffer object ID. + * + * This IOCTL is similar to &DRM_IOCTL_MODE_RMFB, except it doesn't disable + * planes and CRTCs. As long as the framebuffer is used by a plane, it's kept + * alive. When the plane no longer uses the framebuffer (because the + * framebuffer is replaced with another one, or the plane is disabled), the + * framebuffer is cleaned up. + * + * This is useful to implement flicker-free transitions between two processes. + * + * Depending on the threat model, user-space may want to ensure that the + * framebuffer doesn't expose any sensitive user information: closed + * framebuffers attached to a plane can be read back by the next DRM master. + */ +#define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb) + /* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 218edb0a96f8..fd4f9574d177 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -693,7 +693,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_EXEC_FENCE 44 /* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture - * user specified bufffers for post-mortem debugging of GPU hangs. See + * user-specified buffers for post-mortem debugging of GPU hangs. See * EXEC_OBJECT_CAPTURE. */ #define I915_PARAM_HAS_EXEC_CAPTURE 45 @@ -1606,7 +1606,7 @@ struct drm_i915_gem_busy { * is accurate. * * The returned dword is split into two fields to indicate both - * the engine classess on which the object is being read, and the + * the engine classes on which the object is being read, and the * engine class on which it is currently being written (if any). * * The low word (bits 0:15) indicate if the object is being written @@ -1815,7 +1815,7 @@ struct drm_i915_gem_madvise { __u32 handle; /* Advice: either the buffer will be needed again in the near future, - * or wont be and could be discarded under memory pressure. + * or won't be and could be discarded under memory pressure. */ __u32 madv; @@ -3246,7 +3246,7 @@ struct drm_i915_query_topology_info { * // enough to hold our array of engines. The kernel will fill out the * // item.length for us, which is the number of bytes we need. * // - * // Alternatively a large buffer can be allocated straight away enabling + * // Alternatively a large buffer can be allocated straightaway enabling * // querying in one pass, in which case item.length should contain the * // length of the provided buffer. * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); @@ -3256,7 +3256,7 @@ struct drm_i915_query_topology_info { * // Now that we allocated the required number of bytes, we call the ioctl * // again, this time with the data_ptr pointing to our newly allocated * // blob, which the kernel can then populate with info on all engines. - * item.data_ptr = (uintptr_t)&info, + * item.data_ptr = (uintptr_t)&info; * * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); * if (err) ... @@ -3286,7 +3286,7 @@ struct drm_i915_query_topology_info { /** * struct drm_i915_engine_info * - * Describes one engine and it's capabilities as known to the driver. + * Describes one engine and its capabilities as known to the driver. */ struct drm_i915_engine_info { /** @engine: Engine class and instance. */ From 39af67413997a350fa02a88e15eaacf202c94884 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 22 Jan 2024 16:04:06 +0800 Subject: [PATCH 451/768] perf build: Make minimal shellcheck version to v0.6.0 The perf build failed due to the shellcheck on my machine (v0.4.6 on Ubuntu 18.04.1 LTS) doesn't support -a/--check-sourced and -S/--severity option. These two options are introduced in shellcheck v0.4.7 and v0.6.0 respectively. So restrict the minimal version of shellcheck to v0.6.0. Fixes: b809fc656e763296 ("perf build: Shellcheck support for OUTPUT directory") Reviewed-by: Ian Rogers Signed-off-by: Yicong Yang Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Junhao He Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: linuxarm@huawei.com Link: https://lore.kernel.org/r/20240122080406.28678-1-yangyicong@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 27e7c478880f..f8774a9b1377 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -236,6 +236,16 @@ else SHELLCHECK := $(shell which shellcheck 2> /dev/null) endif +# shellcheck is using in tools/perf/tests/Build with option -a/--check-sourced ( +# introduced in v0.4.7) and -S/--severity (introduced in v0.6.0). So make the +# minimal shellcheck version as v0.6.0. +ifneq ($(SHELLCHECK),) + ifeq ($(shell expr $(shell $(SHELLCHECK) --version | grep version: | \ + sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \< 060), 1) + SHELLCHECK := + endif +endif + export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK export HOSTCC HOSTLD HOSTAR HOSTCFLAGS SHELLCHECK From 79baac8acfc60a7a5114f6d60731e28c242ef8ce Mon Sep 17 00:00:00 2001 From: Sun Haiyong Date: Mon, 4 Dec 2023 16:20:55 +0800 Subject: [PATCH 452/768] perf top: Remove needless malloc(0) call that triggers -Walloc-size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC 14 introduces a new -Walloc-size included in -Wextra which errors out like: builtin-top.c: In function ‘prompt_integer’: builtin-top.c:360:21: error: allocation of insufficient size ‘0’ for type ‘char’ with size ‘1’ [-Werror=alloc-size] 360 | char *buf = malloc(0), *p; | ^~~~~~ Just set it to NULL, getline() will do the allocation. Signed-off-by: Sun Haiyong Signed-off-by: Yanteng Si Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231204082055.91877-1-siyanteng@loongson.cn Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index baf1ab083436..5301d1badd43 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -357,7 +357,7 @@ static void perf_top__print_sym_table(struct perf_top *top) static void prompt_integer(int *target, const char *msg) { - char *buf = malloc(0), *p; + char *buf = NULL, *p; size_t dummy = 0; int tmp; From eeca59a6e8e610e593d17e12f67324c615ec5ef2 Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Thu, 25 Jan 2024 23:54:57 +0300 Subject: [PATCH 453/768] ALSA: usb-audio: Support read-only clock selector control Clock selector control might be read-only. Add corresponding checks to prevent sending control requests that would fail. Signed-off-by: Alexander Tsoy Link: https://lore.kernel.org/r/20240125205457.28258-1-alexander@tsoy.me Signed-off-by: Takashi Iwai --- sound/usb/clock.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 94e4aaeafe58..7b259641adb5 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -261,6 +261,8 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, int ret, i, cur, err, pins, clock_id; const u8 *sources; int proto = fmt->protocol; + bool readable, writeable; + u32 bmControls; entity_id &= 0xff; @@ -292,11 +294,27 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, sources = GET_VAL(selector, proto, baCSourceID); cur = 0; + if (proto == UAC_VERSION_3) + bmControls = le32_to_cpu(*(__le32 *)(&selector->v3.baCSourceID[0] + pins)); + else + bmControls = *(__u8 *)(&selector->v2.baCSourceID[0] + pins); + + readable = uac_v2v3_control_is_readable(bmControls, + UAC2_CX_CLOCK_SELECTOR); + writeable = uac_v2v3_control_is_writeable(bmControls, + UAC2_CX_CLOCK_SELECTOR); + if (pins == 1) { ret = 1; goto find_source; } + /* for now just warn about buggy device */ + if (!readable) + usb_audio_warn(chip, + "%s(): clock selector control is not readable, id %d\n", + __func__, clock_id); + /* the entity ID we are looking at is a selector. * find out what it currently selects */ ret = uac_clock_selector_get_val(chip, clock_id); @@ -326,7 +344,7 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, if (ret > 0) { /* Skip setting clock selector again for some devices */ if (chip->quirk_flags & QUIRK_FLAG_SKIP_CLOCK_SELECTOR || - pins == 1) + pins == 1 || !writeable) return ret; err = uac_clock_selector_set_val(chip, entity_id, cur); if (err < 0) @@ -337,6 +355,9 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, return ret; find_others: + if (!writeable) + return -ENXIO; + /* The current clock source is invalid, try others. */ for (i = 1; i <= pins; i++) { if (i == cur) From 7bbe8f0071dfa23fcc3b2864ec9f3b1aeb7ab2df Mon Sep 17 00:00:00 2001 From: Sun Haiyong Date: Sat, 6 Jan 2024 17:41:29 +0800 Subject: [PATCH 454/768] perf tools: Fix calloc() arguments to address error introduced in gcc-14 the definition of calloc is as follows: void *calloc(size_t nmemb, size_t size); number of members is in the first parameter and the size is in the second parameter. Fix error messages on gcc 14 20240102: error: 'calloc' sizes specified with 'sizeof' in the earlier argument and not in the later argument [-Werror=calloc-transposed-args] Committer notes: I noticed this on fedora 40 and rawhide. Signed-off-by: Sun Haiyong Acked-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20240106094129.3337057-1-siyanteng@loongson.cn Signed-off-by: Yanteng Si Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 4 ++-- tools/perf/util/hist.c | 4 ++-- tools/perf/util/metricgroup.c | 2 +- tools/perf/util/synthetic-events.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 91e6828c38cc..86c910125172 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -4080,8 +4080,8 @@ int cmd_record(int argc, const char **argv) } if (rec->switch_output.num_files) { - rec->switch_output.filenames = calloc(sizeof(char *), - rec->switch_output.num_files); + rec->switch_output.filenames = calloc(rec->switch_output.num_files, + sizeof(char *)); if (!rec->switch_output.filenames) { err = -EINVAL; goto out_opts; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 0888b7163b7c..fa359180ebf8 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -491,8 +491,8 @@ static int hist_entry__init(struct hist_entry *he, } if (symbol_conf.res_sample) { - he->res_samples = calloc(sizeof(struct res_sample), - symbol_conf.res_sample); + he->res_samples = calloc(symbol_conf.res_sample, + sizeof(struct res_sample)); if (!he->res_samples) goto err_srcline; } diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index ca3e0404f187..966cca5a3e88 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -286,7 +286,7 @@ static int setup_metric_events(const char *pmu, struct hashmap *ids, *out_metric_events = NULL; ids_size = hashmap__size(ids); - metric_events = calloc(sizeof(void *), ids_size + 1); + metric_events = calloc(ids_size + 1, sizeof(void *)); if (!metric_events) return -ENOMEM; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 3712186353fb..2a0289c14959 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1055,11 +1055,11 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (thread_nr > n) thread_nr = n; - synthesize_threads = calloc(sizeof(pthread_t), thread_nr); + synthesize_threads = calloc(thread_nr, sizeof(pthread_t)); if (synthesize_threads == NULL) goto free_dirent; - args = calloc(sizeof(*args), thread_nr); + args = calloc(thread_nr, sizeof(*args)); if (args == NULL) goto free_threads; From 47c5dd66c1840524572dcdd956f4af2bdb6fbdff Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Fri, 26 Jan 2024 16:26:43 +0800 Subject: [PATCH 455/768] nvmet-tcp: fix nvme tcp ida memory leak The nvmet_tcp_queue_ida should be destroy when the nvmet-tcp module exit. Signed-off-by: Guixin Liu Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index da0469978d6f..c8655fc5aa5b 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -2216,6 +2216,7 @@ static void __exit nvmet_tcp_exit(void) flush_workqueue(nvmet_wq); destroy_workqueue(nvmet_tcp_wq); + ida_destroy(&nvmet_tcp_queue_ida); } module_init(nvmet_tcp_init); From 00aab7dcb2267f2aef59447602f34501efe1a07f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 26 Jan 2024 18:09:01 +0100 Subject: [PATCH 456/768] HID: i2c-hid-of: fix NULL-deref on failed power up A while back the I2C HID implementation was split in an ACPI and OF part, but the new OF driver never initialises the client pointer which is dereferenced on power-up failures. Fixes: b33752c30023 ("HID: i2c-hid: Reorganize so ACPI and OF are separate modules") Cc: stable@vger.kernel.org # 5.12 Cc: Douglas Anderson Signed-off-by: Johan Hovold Reviewed-by: Douglas Anderson Signed-off-by: Jiri Kosina --- drivers/hid/i2c-hid/i2c-hid-of.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/i2c-hid/i2c-hid-of.c b/drivers/hid/i2c-hid/i2c-hid-of.c index c4e1fa0273c8..8be4d576da77 100644 --- a/drivers/hid/i2c-hid/i2c-hid-of.c +++ b/drivers/hid/i2c-hid/i2c-hid-of.c @@ -87,6 +87,7 @@ static int i2c_hid_of_probe(struct i2c_client *client) if (!ihid_of) return -ENOMEM; + ihid_of->client = client; ihid_of->ops.power_up = i2c_hid_of_power_up; ihid_of->ops.power_down = i2c_hid_of_power_down; From 4d7acc8f48bcf27d0dc068f02e55c77e840b9110 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 27 Jan 2024 04:04:34 +1000 Subject: [PATCH 457/768] Revert "nouveau: push event block/allowing out of the fence context" This reverts commit eacabb5462717a52fccbbbba458365a4f5e61f35. This commit causes some regressions in desktop usage, this will reintroduce the original deadlock in DRI_PRIME situations, I've got an idea to fix it by offloading to a workqueue in a different spot, however this code has a race condition where we sometimes miss interrupts so I'd like to fix that as well. Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/nouveau/nouveau_fence.c | 28 +++++-------------------- drivers/gpu/drm/nouveau/nouveau_fence.h | 5 +---- 2 files changed, 6 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 5057d976fa57..ca762ea55413 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -62,7 +62,7 @@ nouveau_fence_signal(struct nouveau_fence *fence) if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) { struct nouveau_fence_chan *fctx = nouveau_fctx(fence); - if (atomic_dec_and_test(&fctx->notify_ref)) + if (!--fctx->notify_ref) drop = 1; } @@ -103,7 +103,6 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) void nouveau_fence_context_del(struct nouveau_fence_chan *fctx) { - cancel_work_sync(&fctx->allow_block_work); nouveau_fence_context_kill(fctx, 0); nvif_event_dtor(&fctx->event); fctx->dead = 1; @@ -168,18 +167,6 @@ nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc return ret; } -static void -nouveau_fence_work_allow_block(struct work_struct *work) -{ - struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan, - allow_block_work); - - if (atomic_read(&fctx->notify_ref) == 0) - nvif_event_block(&fctx->event); - else - nvif_event_allow(&fctx->event); -} - void nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx) { @@ -191,7 +178,6 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha } args; int ret; - INIT_WORK(&fctx->allow_block_work, nouveau_fence_work_allow_block); INIT_LIST_HEAD(&fctx->flip); INIT_LIST_HEAD(&fctx->pending); spin_lock_init(&fctx->lock); @@ -535,19 +521,15 @@ static bool nouveau_fence_enable_signaling(struct dma_fence *f) struct nouveau_fence *fence = from_fence(f); struct nouveau_fence_chan *fctx = nouveau_fctx(fence); bool ret; - bool do_work; - if (atomic_inc_return(&fctx->notify_ref) == 0) - do_work = true; + if (!fctx->notify_ref++) + nvif_event_allow(&fctx->event); ret = nouveau_fence_no_signaling(f); if (ret) set_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags); - else if (atomic_dec_and_test(&fctx->notify_ref)) - do_work = true; - - if (do_work) - schedule_work(&fctx->allow_block_work); + else if (!--fctx->notify_ref) + nvif_event_block(&fctx->event); return ret; } diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 28f5cf013b89..64d33ae7f356 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -3,7 +3,6 @@ #define __NOUVEAU_FENCE_H__ #include -#include #include struct nouveau_drm; @@ -46,9 +45,7 @@ struct nouveau_fence_chan { char name[32]; struct nvif_event event; - struct work_struct allow_block_work; - atomic_t notify_ref; - int dead, killed; + int notify_ref, dead, killed; }; struct nouveau_fence_priv { From 97aab852c4b9e1b378de48a55f8c9b8d76c36ccc Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 22 Jan 2024 07:49:52 -0800 Subject: [PATCH 458/768] hwmon: gigabyte_waterforce: Fix locking bug in waterforce_get_status() Goto 'unlock_and_return' for unlocking before returning on the error path. Fixes: d5939a793693 ("hwmon: Add driver for Gigabyte AORUS Waterforce AIO coolers") Signed-off-by: Harshit Mogalapalli Reviewed-by: Aleksa Savic Link: https://lore.kernel.org/r/20240122154952.2851934-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Guenter Roeck --- drivers/hwmon/gigabyte_waterforce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/gigabyte_waterforce.c b/drivers/hwmon/gigabyte_waterforce.c index 85e523775714..8129d7b3ceaf 100644 --- a/drivers/hwmon/gigabyte_waterforce.c +++ b/drivers/hwmon/gigabyte_waterforce.c @@ -146,7 +146,7 @@ static int waterforce_get_status(struct waterforce_data *priv) /* Send command for getting status */ ret = waterforce_write_expanded(priv, get_status_cmd, GET_STATUS_CMD_LENGTH); if (ret < 0) - return ret; + goto unlock_and_return; ret = wait_for_completion_interruptible_timeout(&priv->status_report_received, msecs_to_jiffies(STATUS_VALIDITY)); From 118063f3803324411be0e601d560ed7d1c9824b0 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Tue, 23 Jan 2024 19:44:57 +0530 Subject: [PATCH 459/768] platform/x86/amd/pmf: Get Human presence information from AMD SFH driver AMD SFH driver has APIs defined to export the human presence information; use this within the PMF driver to send inputs to the PMF TA, so that PMF driver can enact to the actions coming from the TA. Signed-off-by: Shyam Sundar S K Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20240123141458.3715211-1-Shyam-sundar.S-k@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmf/Kconfig | 1 + drivers/platform/x86/amd/pmf/spc.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/drivers/platform/x86/amd/pmf/Kconfig b/drivers/platform/x86/amd/pmf/Kconfig index f246252bddd8..f4fa8bd8bda8 100644 --- a/drivers/platform/x86/amd/pmf/Kconfig +++ b/drivers/platform/x86/amd/pmf/Kconfig @@ -10,6 +10,7 @@ config AMD_PMF depends on AMD_NB select ACPI_PLATFORM_PROFILE depends on TEE && AMDTEE + depends on AMD_SFH_HID help This driver provides support for the AMD Platform Management Framework. The goal is to enhance end user experience by making AMD PCs smarter, diff --git a/drivers/platform/x86/amd/pmf/spc.c b/drivers/platform/x86/amd/pmf/spc.c index a0423942f771..87ae7c41c9f8 100644 --- a/drivers/platform/x86/amd/pmf/spc.c +++ b/drivers/platform/x86/amd/pmf/spc.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include "pmf.h" @@ -44,6 +45,7 @@ void amd_pmf_dump_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table * dev_dbg(dev->dev, "Max C0 Residency: %u\n", in->ev_info.max_c0residency); dev_dbg(dev->dev, "GFX Busy: %u\n", in->ev_info.gfx_busy); dev_dbg(dev->dev, "LID State: %s\n", in->ev_info.lid_state ? "close" : "open"); + dev_dbg(dev->dev, "User Presence: %s\n", in->ev_info.user_present ? "Present" : "Away"); dev_dbg(dev->dev, "==== TA inputs END ====\n"); } #else @@ -147,6 +149,31 @@ static int amd_pmf_get_slider_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_ return 0; } +static int amd_pmf_get_sensor_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in) +{ + struct amd_sfh_info sfh_info; + int ret; + + /* get HPD data */ + ret = amd_get_sfh_info(&sfh_info, MT_HPD); + if (ret) + return ret; + + switch (sfh_info.user_present) { + case SFH_NOT_DETECTED: + in->ev_info.user_present = 0xff; /* assume no sensors connected */ + break; + case SFH_USER_PRESENT: + in->ev_info.user_present = 1; + break; + case SFH_USER_AWAY: + in->ev_info.user_present = 0; + break; + } + + return 0; +} + void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in) { /* TA side lid open is 1 and close is 0, hence the ! here */ @@ -155,4 +182,5 @@ void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_tab amd_pmf_get_smu_info(dev, in); amd_pmf_get_battery_info(dev, in); amd_pmf_get_slider_info(dev, in); + amd_pmf_get_sensor_info(dev, in); } From cedecdba60f4a42a8562574119f317ed0c674b5a Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Tue, 23 Jan 2024 19:44:58 +0530 Subject: [PATCH 460/768] platform/x86/amd/pmf: Get ambient light information from AMD SFH driver AMD SFH driver has APIs defined to export the ambient light information; use this within the PMF driver to send inputs to the PMF TA, so that PMF driver can enact to the actions coming from the TA. Signed-off-by: Shyam Sundar S K Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20240123141458.3715211-2-Shyam-sundar.S-k@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmf/spc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/platform/x86/amd/pmf/spc.c b/drivers/platform/x86/amd/pmf/spc.c index 87ae7c41c9f8..a3dec14c3004 100644 --- a/drivers/platform/x86/amd/pmf/spc.c +++ b/drivers/platform/x86/amd/pmf/spc.c @@ -46,6 +46,7 @@ void amd_pmf_dump_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table * dev_dbg(dev->dev, "GFX Busy: %u\n", in->ev_info.gfx_busy); dev_dbg(dev->dev, "LID State: %s\n", in->ev_info.lid_state ? "close" : "open"); dev_dbg(dev->dev, "User Presence: %s\n", in->ev_info.user_present ? "Present" : "Away"); + dev_dbg(dev->dev, "Ambient Light: %d\n", in->ev_info.ambient_light); dev_dbg(dev->dev, "==== TA inputs END ====\n"); } #else @@ -154,6 +155,13 @@ static int amd_pmf_get_sensor_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_ struct amd_sfh_info sfh_info; int ret; + /* Get ALS data */ + ret = amd_get_sfh_info(&sfh_info, MT_ALS); + if (!ret) + in->ev_info.ambient_light = sfh_info.ambient_light; + else + return ret; + /* get HPD data */ ret = amd_get_sfh_info(&sfh_info, MT_HPD); if (ret) From a692a86efe97fe0aba7cb15f38cbce866c080689 Mon Sep 17 00:00:00 2001 From: Cong Liu Date: Wed, 24 Jan 2024 09:29:38 +0800 Subject: [PATCH 461/768] platform/x86/amd/pmf: Fix memory leak in amd_pmf_get_pb_data() amd_pmf_get_pb_data() will allocate memory for the policy buffer, but does not free it if copy_from_user() fails. This leads to a memory leak. Fixes: 10817f28e533 ("platform/x86/amd/pmf: Add capability to sideload of policy binary") Reviewed-by: Shyam Sundar S K Signed-off-by: Cong Liu Link: https://lore.kernel.org/r/20240124012939.6550-1-liucong2@kylinos.cn Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmf/tee-if.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index 502ce93d5cdd..f8c0177afb0d 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -298,8 +298,10 @@ static ssize_t amd_pmf_get_pb_data(struct file *filp, const char __user *buf, if (!new_policy_buf) return -ENOMEM; - if (copy_from_user(new_policy_buf, buf, length)) + if (copy_from_user(new_policy_buf, buf, length)) { + kfree(new_policy_buf); return -EFAULT; + } kfree(dev->policy_buf); dev->policy_buf = new_policy_buf; From 8c898ec07a2fc1d4694e81097a48e94a3816308d Mon Sep 17 00:00:00 2001 From: Jithu Joseph Date: Thu, 25 Jan 2024 00:22:50 -0800 Subject: [PATCH 462/768] platform/x86/intel/ifs: Call release_firmware() when handling errors. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Missing release_firmware() due to error handling blocked any future image loading. Fix the return code and release_fiwmare() to release the bad image. Fixes: 25a76dbb36dd ("platform/x86/intel/ifs: Validate image size") Reported-by: Pengfei Xu Signed-off-by: Jithu Joseph Signed-off-by: Ashok Raj Tested-by: Pengfei Xu Reviewed-by: Tony Luck Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20240125082254.424859-2-ashok.raj@intel.com Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/ifs/load.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/ifs/load.c b/drivers/platform/x86/intel/ifs/load.c index a1ee1a74fc3c..2cf3b4a8813f 100644 --- a/drivers/platform/x86/intel/ifs/load.c +++ b/drivers/platform/x86/intel/ifs/load.c @@ -399,7 +399,8 @@ int ifs_load_firmware(struct device *dev) if (fw->size != expected_size) { dev_err(dev, "File size mismatch (expected %u, actual %zu). Corrupted IFS image.\n", expected_size, fw->size); - return -EINVAL; + ret = -EINVAL; + goto release; } ret = image_sanity_check(dev, (struct microcode_header_intel *)fw->data); From 1abdf288b0ef5606f76b6e191fa6df05330e3d7e Mon Sep 17 00:00:00 2001 From: Phoenix Chen Date: Fri, 26 Jan 2024 17:53:08 +0800 Subject: [PATCH 463/768] platform/x86: touchscreen_dmi: Add info for the TECLAST X16 Plus tablet Add touch screen info for TECLAST X16 Plus tablet. Signed-off-by: Phoenix Chen Link: https://lore.kernel.org/r/20240126095308.5042-1-asbeltogf@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/touchscreen_dmi.c | 35 ++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 0c6733772698..7aee5e9ff2b8 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -944,6 +944,32 @@ static const struct ts_dmi_data teclast_tbook11_data = { .properties = teclast_tbook11_props, }; +static const struct property_entry teclast_x16_plus_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 8), + PROPERTY_ENTRY_U32("touchscreen-min-y", 14), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1916), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1264), + PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl3692-teclast-x16-plus.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + PROPERTY_ENTRY_BOOL("silead,home-button"), + { } +}; + +static const struct ts_dmi_data teclast_x16_plus_data = { + .embedded_fw = { + .name = "silead/gsl3692-teclast-x16-plus.fw", + .prefix = { 0xf0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00 }, + .length = 43560, + .sha256 = { 0x9d, 0xb0, 0x3d, 0xf1, 0x00, 0x3c, 0xb5, 0x25, + 0x62, 0x8a, 0xa0, 0x93, 0x4b, 0xe0, 0x4e, 0x75, + 0xd1, 0x27, 0xb1, 0x65, 0x3c, 0xba, 0xa5, 0x0f, + 0xcd, 0xb4, 0xbe, 0x00, 0xbb, 0xf6, 0x43, 0x29 }, + }, + .acpi_name = "MSSL1680:00", + .properties = teclast_x16_plus_props, +}; + static const struct property_entry teclast_x3_plus_props[] = { PROPERTY_ENTRY_U32("touchscreen-size-x", 1980), PROPERTY_ENTRY_U32("touchscreen-size-y", 1500), @@ -1612,6 +1638,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_SKU, "E5A6_A1"), }, }, + { + /* Teclast X16 Plus */ + .driver_data = (void *)&teclast_x16_plus_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TECLAST"), + DMI_MATCH(DMI_PRODUCT_NAME, "Default string"), + DMI_MATCH(DMI_PRODUCT_SKU, "D3A5_A1"), + }, + }, { /* Teclast X3 Plus */ .driver_data = (void *)&teclast_x3_plus_data, From fcf67d82b8b878bdd95145382be43927bce07ec6 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 26 Jan 2024 16:32:36 +0100 Subject: [PATCH 464/768] selftests: net: add missing config for big tcp tests The big_tcp test-case requires a few kernel knobs currently not specified in the net selftests config, causing the following failure: # selftests: net: big_tcp.sh # Error: Failed to load TC action module. # We have an error talking to the kernel ... # Testing for BIG TCP: # CLI GSO | GW GRO | GW GSO | SER GRO # ./big_tcp.sh: line 107: test: !=: unary operator expected ... # on on on on : [FAIL_on_link1] Add the missing configs Fixes: 6bb382bcf742 ("selftests: add a selftest for big tcp") Signed-off-by: Paolo Abeni Acked-by: Aaron Conole Acked-by: Xin Long Link: https://lore.kernel.org/all/21630ecea872fea13f071342ac64ef52a991a9b5.1706282943.git.pabeni@redhat.com/ Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 19ff75051660..413ab9abcf1b 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -29,7 +29,9 @@ CONFIG_NF_NAT=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP6_NF_NAT=m +CONFIG_IP6_NF_RAW=m CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_RAW=m CONFIG_IPV6_GRE=m CONFIG_IPV6_SEG6_LWTUNNEL=y CONFIG_L2TP_ETH=m @@ -45,6 +47,8 @@ CONFIG_NF_TABLES=m CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_NAT=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GACT=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_U32=m @@ -55,6 +59,7 @@ CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y +CONFIG_NF_FLOW_TABLE=m CONFIG_PSAMPLE=m CONFIG_TCP_MD5SIG=y CONFIG_TEST_BLACKHOLE_DEV=m From 0958b33ef5a04ed91f61cef4760ac412080c4e08 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Fri, 26 Jan 2024 09:42:58 +0900 Subject: [PATCH 465/768] tracing/trigger: Fix to return error if failed to alloc snapshot Fix register_snapshot_trigger() to return error code if it failed to allocate a snapshot instead of 0 (success). Unless that, it will register snapshot trigger without an error. Link: https://lore.kernel.org/linux-trace-kernel/170622977792.270660.2789298642759362200.stgit@devnote2 Fixes: 0bbe7f719985 ("tracing: Fix the race between registering 'snapshot' event trigger and triggering 'snapshot' operation") Cc: stable@vger.kernel.org Cc: Vincent Donnefort Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_trigger.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 46439e3bcec4..b33c3861fbbb 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1470,8 +1470,10 @@ register_snapshot_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file) { - if (tracing_alloc_snapshot_instance(file->tr) != 0) - return 0; + int ret = tracing_alloc_snapshot_instance(file->tr); + + if (ret < 0) + return ret; return register_trigger(glob, data, file); } From c60fe56c169eb552113a4711db3a5f99e3acd4c1 Mon Sep 17 00:00:00 2001 From: Konstantin Aladyshev Date: Fri, 26 Jan 2024 23:57:14 +0300 Subject: [PATCH 466/768] hwmon: (pmbus/mp2975) Fix driver initialization for MP2975 device The commit 1feb31e810b0 ("hwmon: (pmbus/mp2975) Simplify VOUT code") has introduced a bug that makes it impossible to initialize MP2975 device: """ mp2975 5-0020: Failed to identify chip capabilities i2c i2c-5: new_device: Instantiated device mp2975 at 0x20 i2c i2c-5: delete_device: Deleting device mp2975 at 0x20 """ Since the 'read_byte_data' function was removed from the 'pmbus_driver_info ' structure the driver no longer reports correctly that VOUT mode is direct. Therefore 'pmbus_identify_common' fails with error, making it impossible to initialize the device. Restore 'read_byte_data' function to fix the issue. Tested: - before: it is not possible to initialize MP2975 device with the 'mp2975' driver, - after: 'mp2975' correctly initializes MP2975 device and all sensor data is correct. Fixes: 1feb31e810b0 ("hwmon: (pmbus/mp2975) Simplify VOUT code") Signed-off-by: Konstantin Aladyshev Link: https://lore.kernel.org/r/20240126205714.2363-1-aladyshev22@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/mp2975.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/hwmon/pmbus/mp2975.c b/drivers/hwmon/pmbus/mp2975.c index b9bb469e2d8f..5bbfdacb61a7 100644 --- a/drivers/hwmon/pmbus/mp2975.c +++ b/drivers/hwmon/pmbus/mp2975.c @@ -126,6 +126,22 @@ static const struct regulator_desc __maybe_unused mp2975_reg_desc[] = { #define to_mp2975_data(x) container_of(x, struct mp2975_data, info) +static int mp2975_read_byte_data(struct i2c_client *client, int page, int reg) +{ + switch (reg) { + case PMBUS_VOUT_MODE: + /* + * Enforce VOUT direct format, since device allows to set the + * different formats for the different rails. Conversion from + * VID to direct provided by driver internally, in case it is + * necessary. + */ + return PB_VOUT_MODE_DIRECT; + default: + return -ENODATA; + } +} + static int mp2975_read_word_helper(struct i2c_client *client, int page, int phase, u8 reg, u16 mask) @@ -869,6 +885,7 @@ static struct pmbus_driver_info mp2975_info = { PMBUS_HAVE_IIN | PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT | PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP | PMBUS_HAVE_POUT | PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT | PMBUS_PHASE_VIRTUAL, + .read_byte_data = mp2975_read_byte_data, .read_word_data = mp2975_read_word_data, #if IS_ENABLED(CONFIG_SENSORS_MP2975_REGULATOR) .num_regulators = 1, From d0266d7ab1618482d58015d67a5220e590333298 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 23 Jan 2024 17:00:53 +0100 Subject: [PATCH 467/768] Revert "power: supply: qcom_battmgr: Register the power supplies after PDR is up" This reverts commit b43f7ddc2b7a5a90447d96cb4d3c6d142dd4a810. The offending commit deferred power-supply class device registration until the service-started notification is received. This triggers a NULL pointer dereference during boot of the Lenovo ThinkPad X13s and SC8280XP CRD as battery status notifications can be received before the service-start notification: Unable to handle kernel NULL pointer dereference at virtual address 00000000000005c0 ... Call trace: _acquire+0x338/0x2064 acquire+0x1e8/0x318 spin_lock_irqsave+0x60/0x88 _supply_changed+0x2c/0xa4 battmgr_callback+0x1d4/0x60c [qcom_battmgr] pmic_glink_rpmsg_callback+0x5c/0xa4 [pmic_glink] qcom_glink_native_rx+0x58c/0x7e8 qcom_glink_smem_intr+0x14/0x24 [qcom_glink_smem] __handle_irq_event_percpu+0xb0/0x2d4 handle_irq_event+0x4c/0xb8 As trying to serialise this is non-trivial and risks missing notifications, let's revert to registration during probe so that the driver data is all set up once the service goes live. The warning message during resume in case the aDSP firmware is not running that motivated the change can be considered a feature and should not be suppressed. Fixes: b43f7ddc2b7a ("power: supply: qcom_battmgr: Register the power supplies after PDR is up") Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240123160053.18331-1-johan+linaro@kernel.org Signed-off-by: Sebastian Reichel --- drivers/power/supply/qcom_battmgr.c | 109 +++++++++++++--------------- 1 file changed, 49 insertions(+), 60 deletions(-) diff --git a/drivers/power/supply/qcom_battmgr.c b/drivers/power/supply/qcom_battmgr.c index a12e2a66d516..ec163d1bcd18 100644 --- a/drivers/power/supply/qcom_battmgr.c +++ b/drivers/power/supply/qcom_battmgr.c @@ -282,7 +282,6 @@ struct qcom_battmgr_wireless { struct qcom_battmgr { struct device *dev; - struct auxiliary_device *adev; struct pmic_glink_client *client; enum qcom_battmgr_variant variant; @@ -1294,69 +1293,11 @@ static void qcom_battmgr_enable_worker(struct work_struct *work) dev_err(battmgr->dev, "failed to request power notifications\n"); } -static char *qcom_battmgr_battery[] = { "battery" }; - -static void qcom_battmgr_register_psy(struct qcom_battmgr *battmgr) -{ - struct power_supply_config psy_cfg_supply = {}; - struct auxiliary_device *adev = battmgr->adev; - struct power_supply_config psy_cfg = {}; - struct device *dev = &adev->dev; - - psy_cfg.drv_data = battmgr; - psy_cfg.of_node = adev->dev.of_node; - - psy_cfg_supply.drv_data = battmgr; - psy_cfg_supply.of_node = adev->dev.of_node; - psy_cfg_supply.supplied_to = qcom_battmgr_battery; - psy_cfg_supply.num_supplicants = 1; - - if (battmgr->variant == QCOM_BATTMGR_SC8280XP) { - battmgr->bat_psy = devm_power_supply_register(dev, &sc8280xp_bat_psy_desc, &psy_cfg); - if (IS_ERR(battmgr->bat_psy)) - dev_err(dev, "failed to register battery power supply (%ld)\n", - PTR_ERR(battmgr->bat_psy)); - - battmgr->ac_psy = devm_power_supply_register(dev, &sc8280xp_ac_psy_desc, &psy_cfg_supply); - if (IS_ERR(battmgr->ac_psy)) - dev_err(dev, "failed to register AC power supply (%ld)\n", - PTR_ERR(battmgr->ac_psy)); - - battmgr->usb_psy = devm_power_supply_register(dev, &sc8280xp_usb_psy_desc, &psy_cfg_supply); - if (IS_ERR(battmgr->usb_psy)) - dev_err(dev, "failed to register USB power supply (%ld)\n", - PTR_ERR(battmgr->usb_psy)); - - battmgr->wls_psy = devm_power_supply_register(dev, &sc8280xp_wls_psy_desc, &psy_cfg_supply); - if (IS_ERR(battmgr->wls_psy)) - dev_err(dev, "failed to register wireless charing power supply (%ld)\n", - PTR_ERR(battmgr->wls_psy)); - } else { - battmgr->bat_psy = devm_power_supply_register(dev, &sm8350_bat_psy_desc, &psy_cfg); - if (IS_ERR(battmgr->bat_psy)) - dev_err(dev, "failed to register battery power supply (%ld)\n", - PTR_ERR(battmgr->bat_psy)); - - battmgr->usb_psy = devm_power_supply_register(dev, &sm8350_usb_psy_desc, &psy_cfg_supply); - if (IS_ERR(battmgr->usb_psy)) - dev_err(dev, "failed to register USB power supply (%ld)\n", - PTR_ERR(battmgr->usb_psy)); - - battmgr->wls_psy = devm_power_supply_register(dev, &sm8350_wls_psy_desc, &psy_cfg_supply); - if (IS_ERR(battmgr->wls_psy)) - dev_err(dev, "failed to register wireless charing power supply (%ld)\n", - PTR_ERR(battmgr->wls_psy)); - } -} - static void qcom_battmgr_pdr_notify(void *priv, int state) { struct qcom_battmgr *battmgr = priv; if (state == SERVREG_SERVICE_STATE_UP) { - if (!battmgr->bat_psy) - qcom_battmgr_register_psy(battmgr); - battmgr->service_up = true; schedule_work(&battmgr->enable_work); } else { @@ -1371,9 +1312,13 @@ static const struct of_device_id qcom_battmgr_of_variants[] = { {} }; +static char *qcom_battmgr_battery[] = { "battery" }; + static int qcom_battmgr_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) { + struct power_supply_config psy_cfg_supply = {}; + struct power_supply_config psy_cfg = {}; const struct of_device_id *match; struct qcom_battmgr *battmgr; struct device *dev = &adev->dev; @@ -1383,7 +1328,14 @@ static int qcom_battmgr_probe(struct auxiliary_device *adev, return -ENOMEM; battmgr->dev = dev; - battmgr->adev = adev; + + psy_cfg.drv_data = battmgr; + psy_cfg.of_node = adev->dev.of_node; + + psy_cfg_supply.drv_data = battmgr; + psy_cfg_supply.of_node = adev->dev.of_node; + psy_cfg_supply.supplied_to = qcom_battmgr_battery; + psy_cfg_supply.num_supplicants = 1; INIT_WORK(&battmgr->enable_work, qcom_battmgr_enable_worker); mutex_init(&battmgr->lock); @@ -1395,6 +1347,43 @@ static int qcom_battmgr_probe(struct auxiliary_device *adev, else battmgr->variant = QCOM_BATTMGR_SM8350; + if (battmgr->variant == QCOM_BATTMGR_SC8280XP) { + battmgr->bat_psy = devm_power_supply_register(dev, &sc8280xp_bat_psy_desc, &psy_cfg); + if (IS_ERR(battmgr->bat_psy)) + return dev_err_probe(dev, PTR_ERR(battmgr->bat_psy), + "failed to register battery power supply\n"); + + battmgr->ac_psy = devm_power_supply_register(dev, &sc8280xp_ac_psy_desc, &psy_cfg_supply); + if (IS_ERR(battmgr->ac_psy)) + return dev_err_probe(dev, PTR_ERR(battmgr->ac_psy), + "failed to register AC power supply\n"); + + battmgr->usb_psy = devm_power_supply_register(dev, &sc8280xp_usb_psy_desc, &psy_cfg_supply); + if (IS_ERR(battmgr->usb_psy)) + return dev_err_probe(dev, PTR_ERR(battmgr->usb_psy), + "failed to register USB power supply\n"); + + battmgr->wls_psy = devm_power_supply_register(dev, &sc8280xp_wls_psy_desc, &psy_cfg_supply); + if (IS_ERR(battmgr->wls_psy)) + return dev_err_probe(dev, PTR_ERR(battmgr->wls_psy), + "failed to register wireless charing power supply\n"); + } else { + battmgr->bat_psy = devm_power_supply_register(dev, &sm8350_bat_psy_desc, &psy_cfg); + if (IS_ERR(battmgr->bat_psy)) + return dev_err_probe(dev, PTR_ERR(battmgr->bat_psy), + "failed to register battery power supply\n"); + + battmgr->usb_psy = devm_power_supply_register(dev, &sm8350_usb_psy_desc, &psy_cfg_supply); + if (IS_ERR(battmgr->usb_psy)) + return dev_err_probe(dev, PTR_ERR(battmgr->usb_psy), + "failed to register USB power supply\n"); + + battmgr->wls_psy = devm_power_supply_register(dev, &sm8350_wls_psy_desc, &psy_cfg_supply); + if (IS_ERR(battmgr->wls_psy)) + return dev_err_probe(dev, PTR_ERR(battmgr->wls_psy), + "failed to register wireless charing power supply\n"); + } + battmgr->client = devm_pmic_glink_register_client(dev, PMIC_GLINK_OWNER_BATTMGR, qcom_battmgr_callback, From c3dfcdb65ec1a4813ec1e0871c52c671ba9c71ac Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Thu, 25 Jan 2024 20:39:16 +0800 Subject: [PATCH 468/768] net/smc: fix incorrect SMC-D link group matching logic The logic to determine if SMC-D link group matches is incorrect. The correct logic should be that it only returns true when the GID is the same, and the SMC-D device is the same and the extended GID is the same (in the case of virtual ISM). It can be fixed by adding brackets around the conditional (or ternary) operator expression. But for better readability and maintainability, it has been changed to an if-else statement. Reported-by: Matthew Rosato Closes: https://lore.kernel.org/r/13579588-eb9d-4626-a063-c0b77ed80f11@linux.ibm.com Fixes: b40584d14570 ("net/smc: compatible with 128-bits extended GID of virtual ISM device") Link: https://lore.kernel.org/r/13579588-eb9d-4626-a063-c0b77ed80f11@linux.ibm.com Signed-off-by: Wen Gu Reviewed-by: Alexandra Winter Link: https://lore.kernel.org/r/20240125123916.77928-1-guwen@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/smc/smc_core.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 95cc95458e2d..e4c858411207 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1877,9 +1877,15 @@ static bool smcd_lgr_match(struct smc_link_group *lgr, struct smcd_dev *smcismdev, struct smcd_gid *peer_gid) { - return lgr->peer_gid.gid == peer_gid->gid && lgr->smcd == smcismdev && - smc_ism_is_virtual(smcismdev) ? - (lgr->peer_gid.gid_ext == peer_gid->gid_ext) : 1; + if (lgr->peer_gid.gid != peer_gid->gid || + lgr->smcd != smcismdev) + return false; + + if (smc_ism_is_virtual(smcismdev) && + lgr->peer_gid.gid_ext != peer_gid->gid_ext) + return false; + + return true; } /* create a new SMC connection (and a new link group if necessary) */ From 281cb9d65a95c00bb844f332cd187491d2d55496 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 25 Jan 2024 05:41:03 -0800 Subject: [PATCH 469/768] bnxt_en: Make PTP timestamp HWRM more silent commit 056bce63c469 ("bnxt_en: Make PTP TX timestamp HWRM query silent") changed a netdev_err() to netdev_WARN_ONCE(). netdev_WARN_ONCE() is it generates a kernel WARNING, which is bad, for the following reasons: * You do not a kernel warning if the firmware queries are late * In busy networks, timestamp query failures fairly regularly * A WARNING message doesn't bring much value, since the code path is clear. (This was discussed in-depth in [1]) Transform the netdev_WARN_ONCE() into a netdev_warn_once(), and print a more well-behaved message, instead of a full WARN(). bnxt_en 0000:67:00.0 eth0: TS query for TX timer failed rc = fffffff5 [1] Link: https://lore.kernel.org/all/ZbDj%2FFI4EJezcfd1@gmail.com/ Signed-off-by: Breno Leitao Reviewed-by: Pavan Chebbi Reviewed-by: Michael Chan Fixes: 056bce63c469 ("bnxt_en: Make PTP TX timestamp HWRM query silent") Link: https://lore.kernel.org/r/20240125134104.2045573-1-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index adad188e38b8..cc07660330f5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -684,7 +684,7 @@ static void bnxt_stamp_tx_skb(struct bnxt *bp, struct sk_buff *skb) timestamp.hwtstamp = ns_to_ktime(ns); skb_tstamp_tx(ptp->tx_skb, ×tamp); } else { - netdev_WARN_ONCE(bp->dev, + netdev_warn_once(bp->dev, "TS query for TX timer failed rc = %x\n", rc); } From d3cb3b0088ca92082e2bebc40cc6894a632173e2 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 25 Jan 2024 09:22:50 +0100 Subject: [PATCH 470/768] selftests: net: add missing required classifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit the udpgro_fraglist self-test uses the BPF classifiers, but the current net self-test configuration does not include it, causing CI failures: # selftests: net: udpgro_frglist.sh # ipv6 # tcp - over veth touching data # -l 4 -6 -D 2001:db8::1 -t rx -4 -t # Error: TC classifier not found. # We have an error talking to the kernel # Error: TC classifier not found. # We have an error talking to the kernel Add the missing knob. Fixes: edae34a3ed92 ("selftests net: add UDP GRO fraglist + bpf self-tests") Signed-off-by: Paolo Abeni Reviewed-by: Maciej Żenczykowski Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/7c3643763b331e9a400e1874fe089193c99a1c3f.1706170897.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 413ab9abcf1b..56da5d52674c 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -51,6 +51,7 @@ CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GACT=m CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_BPF=m CONFIG_NET_CLS_U32=m CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m From 89abe628375301fedb68770644df845d49018d8b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 25 Jan 2024 19:09:06 +0100 Subject: [PATCH 471/768] selftests: net: give more time for GRO aggregation The gro.sh test-case relay on the gro_flush_timeout to ensure that all the segments belonging to any given batch are properly aggregated. The other end, the sender is a user-space program transmitting each packet with a separate write syscall. A busy host and/or stracing the sender program can make the relevant segments reach the GRO engine after the flush timeout triggers. Give the GRO flush timeout more slack, to avoid sporadic self-tests failures. Fixes: 9af771d2ec04 ("selftests/net: allow GRO coalesce test on veth") Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Tested-by: Eric Dumazet Link: https://lore.kernel.org/r/bffec2beab3a5672dd13ecabe4fad81d2155b367.1706206101.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/setup_veth.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh index a9a1759e035c..1f78a87f6f37 100644 --- a/tools/testing/selftests/net/setup_veth.sh +++ b/tools/testing/selftests/net/setup_veth.sh @@ -11,7 +11,7 @@ setup_veth_ns() { local -r ns_mac="$4" [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" - echo 100000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" + echo 1000000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535 ip -netns "${ns_name}" link set dev "${ns_dev}" up From 8d975c15c0cd744000ca386247432d57b21f9df0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 25 Jan 2024 17:05:57 +0000 Subject: [PATCH 472/768] ip6_tunnel: make sure to pull inner header in __ip6_tnl_rcv() syzbot found __ip6_tnl_rcv() could access unitiliazed data [1]. Call pskb_inet_may_pull() to fix this, and initialize ipv6h variable after this call as it can change skb->head. [1] BUG: KMSAN: uninit-value in __INET_ECN_decapsulate include/net/inet_ecn.h:253 [inline] BUG: KMSAN: uninit-value in INET_ECN_decapsulate include/net/inet_ecn.h:275 [inline] BUG: KMSAN: uninit-value in IP6_ECN_decapsulate+0x7df/0x1e50 include/net/inet_ecn.h:321 __INET_ECN_decapsulate include/net/inet_ecn.h:253 [inline] INET_ECN_decapsulate include/net/inet_ecn.h:275 [inline] IP6_ECN_decapsulate+0x7df/0x1e50 include/net/inet_ecn.h:321 ip6ip6_dscp_ecn_decapsulate+0x178/0x1b0 net/ipv6/ip6_tunnel.c:727 __ip6_tnl_rcv+0xd4e/0x1590 net/ipv6/ip6_tunnel.c:845 ip6_tnl_rcv+0xce/0x100 net/ipv6/ip6_tunnel.c:888 gre_rcv+0x143f/0x1870 ip6_protocol_deliver_rcu+0xda6/0x2a60 net/ipv6/ip6_input.c:438 ip6_input_finish net/ipv6/ip6_input.c:483 [inline] NF_HOOK include/linux/netfilter.h:314 [inline] ip6_input+0x15d/0x430 net/ipv6/ip6_input.c:492 ip6_mc_input+0xa7e/0xc80 net/ipv6/ip6_input.c:586 dst_input include/net/dst.h:461 [inline] ip6_rcv_finish+0x5db/0x870 net/ipv6/ip6_input.c:79 NF_HOOK include/linux/netfilter.h:314 [inline] ipv6_rcv+0xda/0x390 net/ipv6/ip6_input.c:310 __netif_receive_skb_one_core net/core/dev.c:5532 [inline] __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5646 netif_receive_skb_internal net/core/dev.c:5732 [inline] netif_receive_skb+0x58/0x660 net/core/dev.c:5791 tun_rx_batched+0x3ee/0x980 drivers/net/tun.c:1555 tun_get_user+0x53af/0x66d0 drivers/net/tun.c:2002 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:2084 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0x786/0x1200 fs/read_write.c:590 ksys_write+0x20f/0x4c0 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:652 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x6d/0x140 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: slab_post_alloc_hook+0x129/0xa70 mm/slab.h:768 slab_alloc_node mm/slub.c:3478 [inline] kmem_cache_alloc_node+0x5e9/0xb10 mm/slub.c:3523 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:560 __alloc_skb+0x318/0x740 net/core/skbuff.c:651 alloc_skb include/linux/skbuff.h:1286 [inline] alloc_skb_with_frags+0xc8/0xbd0 net/core/skbuff.c:6334 sock_alloc_send_pskb+0xa80/0xbf0 net/core/sock.c:2787 tun_alloc_skb drivers/net/tun.c:1531 [inline] tun_get_user+0x1e8a/0x66d0 drivers/net/tun.c:1846 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:2084 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0x786/0x1200 fs/read_write.c:590 ksys_write+0x20f/0x4c0 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:652 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x6d/0x140 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b CPU: 0 PID: 5034 Comm: syz-executor331 Not tainted 6.7.0-syzkaller-00562-g9f8413c4a66f #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023 Fixes: 0d3c703a9d17 ("ipv6: Cleanup IPv6 tunnel receive path") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240125170557.2663942-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_tunnel.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 46c19bd48990..9bbabf750a21 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -796,8 +796,8 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, struct sk_buff *skb), bool log_ecn_err) { - const struct ipv6hdr *ipv6h = ipv6_hdr(skb); - int err; + const struct ipv6hdr *ipv6h; + int nh, err; if ((!(tpi->flags & TUNNEL_CSUM) && (tunnel->parms.i_flags & TUNNEL_CSUM)) || @@ -829,7 +829,6 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, goto drop; } - ipv6h = ipv6_hdr(skb); skb->protocol = eth_type_trans(skb, tunnel->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } else { @@ -837,7 +836,23 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, skb_reset_mac_header(skb); } + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_reset_network_header(skb); + + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(tunnel->dev, rx_length_errors); + DEV_STATS_INC(tunnel->dev, rx_errors); + goto drop; + } + + /* Get the outer header. */ + ipv6h = (struct ipv6hdr *)(skb->head + nh); + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); From dfa988b4c7c3a48bde7c2713308920c7741fff29 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 24 Jan 2024 05:17:25 +0000 Subject: [PATCH 473/768] net: dsa: mt7530: fix 10M/100M speed on MT7988 switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setup PMCR port register for actual speed and duplex on internally connected PHYs of the MT7988 built-in switch. This fixes links with speeds other than 1000M. Fixes: 110c18bfed41 ("net: dsa: mt7530: introduce driver for MT7988 built-in switch") Signed-off-by: Daniel Golle Reviewed-by: Vladimir Oltean Acked-by: Arınç ÜNAL Link: https://lore.kernel.org/r/a5b04dfa8256d8302f402545a51ac4c626fdba25.1706071272.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 391c4dbdff42..3c1f657593a8 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2838,8 +2838,7 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port, /* MT753x MAC works in 1G full duplex mode for all up-clocked * variants. */ - if (interface == PHY_INTERFACE_MODE_INTERNAL || - interface == PHY_INTERFACE_MODE_TRGMII || + if (interface == PHY_INTERFACE_MODE_TRGMII || (phy_interface_mode_is_8023z(interface))) { speed = SPEED_1000; duplex = DUPLEX_FULL; From 99b817c173cd213671daecd25ca27f56b0c7c4ec Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 26 Jan 2024 11:44:03 +0100 Subject: [PATCH 474/768] lsm: fix the logic in security_inode_getsecctx() The inode_getsecctx LSM hook has previously been corrected to have -EOPNOTSUPP instead of 0 as the default return value to fix BPF LSM behavior. However, the call_int_hook()-generated loop in security_inode_getsecctx() was left treating 0 as the neutral value, so after an LSM returns 0, the loop continues to try other LSMs, and if one of them returns a non-zero value, the function immediately returns with said value. So in a situation where SELinux and the BPF LSMs registered this hook, -EOPNOTSUPP would be incorrectly returned whenever SELinux returned 0. Fix this by open-coding the call_int_hook() loop and making it use the correct LSM_RET_DEFAULT() value as the neutral one, similar to what other hooks do. Cc: stable@vger.kernel.org Reported-by: Stephen Smalley Link: https://lore.kernel.org/selinux/CAEjxPJ4ev-pasUwGx48fDhnmjBnq_Wh90jYPwRQRAqXxmOKD4Q@mail.gmail.com/ Link: https://bugzilla.redhat.com/show_bug.cgi?id=2257983 Fixes: b36995b8609a ("lsm: fix default return value for inode_getsecctx") Signed-off-by: Ondrej Mosnacek Reviewed-by: Casey Schaufler [PM: subject line tweak] Signed-off-by: Paul Moore --- security/security.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/security/security.c b/security/security.c index 0144a98d3712..6196ccaba433 100644 --- a/security/security.c +++ b/security/security.c @@ -4255,7 +4255,19 @@ EXPORT_SYMBOL(security_inode_setsecctx); */ int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) { - return call_int_hook(inode_getsecctx, -EOPNOTSUPP, inode, ctx, ctxlen); + struct security_hook_list *hp; + int rc; + + /* + * Only one module will provide a security context. + */ + hlist_for_each_entry(hp, &security_hook_heads.inode_getsecctx, list) { + rc = hp->hook.inode_getsecctx(inode, ctx, ctxlen); + if (rc != LSM_RET_DEFAULT(inode_getsecctx)) + return rc; + } + + return LSM_RET_DEFAULT(inode_getsecctx); } EXPORT_SYMBOL(security_inode_getsecctx); From d9281660ff3ffb4a05302b485cc59a87e709aefc Mon Sep 17 00:00:00 2001 From: Chunhai Guo Date: Fri, 26 Jan 2024 22:01:42 +0800 Subject: [PATCH 475/768] erofs: relaxed temporary buffers allocation on readahead Even with inplace decompression, sometimes very few temporary buffers may be still needed for a single decompression shot (e.g. 16 pages for 64k sliding window or 4 pages for 16k sliding window). In low-memory scenarios, it would be better to try to allocate with GFP_NOWAIT on readahead first. That can help reduce the time spent on page allocation under durative memory pressure. Here are detailed performance numbers under multi-app launch benchmark workload [1] on ARM64 Android devices (8-core CPU and 8GB of memory) running a 5.15 LTS kernel with EROFS of 4k pclusters: +----------------------------------------------+ | LZ4 | vanilla | patched | diff | |----------------+---------+---------+---------| | Average (ms) | 3364 | 2684 | -20.21% | [64k sliding window] |----------------+---------+---------+---------| | Average (ms) | 2079 | 1610 | -22.56% | [16k sliding window] +----------------------------------------------+ The total size of system images for 4k pclusters is almost unchanged: (64k sliding window) 9,117,044 KB (16k sliding window) 9,113,096 KB Therefore, in addition to switch the sliding window from 64k to 16k, after applying this patch, it can eventually save 52.14% (3364 -> 1610) on average with no memory reservation. That is particularly useful for embedded devices with limited resources. [1] https://lore.kernel.org/r/20240109074143.4138783-1-guochunhai@vivo.com Suggested-by: Gao Xiang Signed-off-by: Chunhai Guo Signed-off-by: Gao Xiang Reviewed-by: Yue Hu Link: https://lore.kernel.org/r/20240126140142.201718-1-hsiangkao@linux.alibaba.com --- fs/erofs/compress.h | 5 ++--- fs/erofs/decompressor.c | 5 +++-- fs/erofs/decompressor_deflate.c | 19 +++++++++++++------ fs/erofs/decompressor_lzma.c | 17 ++++++++++++----- fs/erofs/zdata.c | 16 ++++++++++++---- 5 files changed, 42 insertions(+), 20 deletions(-) diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h index 279933e007d2..7cc5841577b2 100644 --- a/fs/erofs/compress.h +++ b/fs/erofs/compress.h @@ -11,13 +11,12 @@ struct z_erofs_decompress_req { struct super_block *sb; struct page **in, **out; - unsigned short pageofs_in, pageofs_out; unsigned int inputsize, outputsize; - /* indicate the algorithm will be used for decompression */ - unsigned int alg; + unsigned int alg; /* the algorithm for decompression */ bool inplace_io, partial_decoding, fillgaps; + gfp_t gfp; /* allocation flags for extra temporary buffers */ }; struct z_erofs_decompressor { diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 072ef6a66823..d4cee95af14c 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -111,8 +111,9 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx, victim = availables[--top]; get_page(victim); } else { - victim = erofs_allocpage(pagepool, - GFP_KERNEL | __GFP_NOFAIL); + victim = erofs_allocpage(pagepool, rq->gfp); + if (!victim) + return -ENOMEM; set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE); } rq->out[i] = victim; diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c index 4a64a9c91dd3..b98872058abe 100644 --- a/fs/erofs/decompressor_deflate.c +++ b/fs/erofs/decompressor_deflate.c @@ -95,7 +95,7 @@ int z_erofs_load_deflate_config(struct super_block *sb, } int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq, - struct page **pagepool) + struct page **pgpl) { const unsigned int nrpages_out = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; @@ -158,8 +158,12 @@ again: strm->z.avail_out = min_t(u32, outsz, PAGE_SIZE - pofs); outsz -= strm->z.avail_out; if (!rq->out[no]) { - rq->out[no] = erofs_allocpage(pagepool, - GFP_KERNEL | __GFP_NOFAIL); + rq->out[no] = erofs_allocpage(pgpl, rq->gfp); + if (!rq->out[no]) { + kout = NULL; + err = -ENOMEM; + break; + } set_page_private(rq->out[no], Z_EROFS_SHORTLIVED_PAGE); } @@ -211,8 +215,11 @@ again: DBG_BUGON(erofs_page_is_managed(EROFS_SB(sb), rq->in[j])); - tmppage = erofs_allocpage(pagepool, - GFP_KERNEL | __GFP_NOFAIL); + tmppage = erofs_allocpage(pgpl, rq->gfp); + if (!tmppage) { + err = -ENOMEM; + goto failed; + } set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE); copy_highpage(tmppage, rq->in[j]); rq->in[j] = tmppage; @@ -230,7 +237,7 @@ again: break; } } - +failed: if (zlib_inflateEnd(&strm->z) != Z_OK && !err) err = -EIO; if (kout) diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 2dd14f99c1dc..6ca357d83cfa 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -148,7 +148,7 @@ again: } int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, - struct page **pagepool) + struct page **pgpl) { const unsigned int nrpages_out = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; @@ -215,8 +215,11 @@ again: PAGE_SIZE - pageofs); outlen -= strm->buf.out_size; if (!rq->out[no] && rq->fillgaps) { /* deduped */ - rq->out[no] = erofs_allocpage(pagepool, - GFP_KERNEL | __GFP_NOFAIL); + rq->out[no] = erofs_allocpage(pgpl, rq->gfp); + if (!rq->out[no]) { + err = -ENOMEM; + break; + } set_page_private(rq->out[no], Z_EROFS_SHORTLIVED_PAGE); } @@ -258,8 +261,11 @@ again: DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb), rq->in[j])); - tmppage = erofs_allocpage(pagepool, - GFP_KERNEL | __GFP_NOFAIL); + tmppage = erofs_allocpage(pgpl, rq->gfp); + if (!tmppage) { + err = -ENOMEM; + goto failed; + } set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE); copy_highpage(tmppage, rq->in[j]); rq->in[j] = tmppage; @@ -277,6 +283,7 @@ again: break; } } +failed: if (no < nrpages_out && strm->buf.out) kunmap(rq->out[no]); if (ni < nrpages_in) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index c1c77166b30f..ff0aa72b0db3 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -82,6 +82,9 @@ struct z_erofs_pcluster { /* L: indicate several pageofs_outs or not */ bool multibases; + /* L: whether extra buffer allocations are best-effort */ + bool besteffort; + /* A: compressed bvecs (can be cached or inplaced pages) */ struct z_erofs_bvec compressed_bvecs[]; }; @@ -960,7 +963,7 @@ static int z_erofs_read_fragment(struct super_block *sb, struct page *page, } static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, - struct page *page) + struct page *page, bool ra) { struct inode *const inode = fe->inode; struct erofs_map_blocks *const map = &fe->map; @@ -1010,6 +1013,7 @@ repeat: err = z_erofs_pcluster_begin(fe); if (err) goto out; + fe->pcl->besteffort |= !ra; } /* @@ -1276,6 +1280,9 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, .inplace_io = overlapped, .partial_decoding = pcl->partial, .fillgaps = pcl->multibases, + .gfp = pcl->besteffort ? + GFP_KERNEL | __GFP_NOFAIL : + GFP_NOWAIT | __GFP_NORETRY }, be->pagepool); /* must handle all compressed pages before actual file pages */ @@ -1318,6 +1325,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, pcl->length = 0; pcl->partial = true; pcl->multibases = false; + pcl->besteffort = false; pcl->bvset.nextpage = NULL; pcl->vcnt = 0; @@ -1787,7 +1795,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, if (PageUptodate(page)) unlock_page(page); else - (void)z_erofs_do_read_page(f, page); + (void)z_erofs_do_read_page(f, page, !!rac); put_page(page); } @@ -1808,7 +1816,7 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio) f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT; z_erofs_pcluster_readmore(&f, NULL, true); - err = z_erofs_do_read_page(&f, &folio->page); + err = z_erofs_do_read_page(&f, &folio->page, false); z_erofs_pcluster_readmore(&f, NULL, false); z_erofs_pcluster_end(&f); @@ -1849,7 +1857,7 @@ static void z_erofs_readahead(struct readahead_control *rac) folio = head; head = folio_get_private(folio); - err = z_erofs_do_read_page(&f, &folio->page); + err = z_erofs_do_read_page(&f, &folio->page, true); if (err && err != -EINTR) erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu", folio->index, EROFS_I(inode)->nid); From e622502c310f1069fd9f41cd38210553115f610a Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 25 Jan 2024 15:18:47 +0100 Subject: [PATCH 476/768] ipmr: fix kernel panic when forwarding mcast packets The stacktrace was: [ 86.305548] BUG: kernel NULL pointer dereference, address: 0000000000000092 [ 86.306815] #PF: supervisor read access in kernel mode [ 86.307717] #PF: error_code(0x0000) - not-present page [ 86.308624] PGD 0 P4D 0 [ 86.309091] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 86.309883] CPU: 2 PID: 3139 Comm: pimd Tainted: G U 6.8.0-6wind-knet #1 [ 86.311027] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.1-0-g0551a4be2c-prebuilt.qemu-project.org 04/01/2014 [ 86.312728] RIP: 0010:ip_mr_forward (/build/work/knet/net/ipv4/ipmr.c:1985) [ 86.313399] Code: f9 1f 0f 87 85 03 00 00 48 8d 04 5b 48 8d 04 83 49 8d 44 c5 00 48 8b 40 70 48 39 c2 0f 84 d9 00 00 00 49 8b 46 58 48 83 e0 fe <80> b8 92 00 00 00 00 0f 84 55 ff ff ff 49 83 47 38 01 45 85 e4 0f [ 86.316565] RSP: 0018:ffffad21c0583ae0 EFLAGS: 00010246 [ 86.317497] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 86.318596] RDX: ffff9559cb46c000 RSI: 0000000000000000 RDI: 0000000000000000 [ 86.319627] RBP: ffffad21c0583b30 R08: 0000000000000000 R09: 0000000000000000 [ 86.320650] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000001 [ 86.321672] R13: ffff9559c093a000 R14: ffff9559cc00b800 R15: ffff9559c09c1d80 [ 86.322873] FS: 00007f85db661980(0000) GS:ffff955a79d00000(0000) knlGS:0000000000000000 [ 86.324291] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 86.325314] CR2: 0000000000000092 CR3: 000000002f13a000 CR4: 0000000000350ef0 [ 86.326589] Call Trace: [ 86.327036] [ 86.327434] ? show_regs (/build/work/knet/arch/x86/kernel/dumpstack.c:479) [ 86.328049] ? __die (/build/work/knet/arch/x86/kernel/dumpstack.c:421 /build/work/knet/arch/x86/kernel/dumpstack.c:434) [ 86.328508] ? page_fault_oops (/build/work/knet/arch/x86/mm/fault.c:707) [ 86.329107] ? do_user_addr_fault (/build/work/knet/arch/x86/mm/fault.c:1264) [ 86.329756] ? srso_return_thunk (/build/work/knet/arch/x86/lib/retpoline.S:223) [ 86.330350] ? __irq_work_queue_local (/build/work/knet/kernel/irq_work.c:111 (discriminator 1)) [ 86.331013] ? exc_page_fault (/build/work/knet/./arch/x86/include/asm/paravirt.h:693 /build/work/knet/arch/x86/mm/fault.c:1515 /build/work/knet/arch/x86/mm/fault.c:1563) [ 86.331702] ? asm_exc_page_fault (/build/work/knet/./arch/x86/include/asm/idtentry.h:570) [ 86.332468] ? ip_mr_forward (/build/work/knet/net/ipv4/ipmr.c:1985) [ 86.333183] ? srso_return_thunk (/build/work/knet/arch/x86/lib/retpoline.S:223) [ 86.333920] ipmr_mfc_add (/build/work/knet/./include/linux/rcupdate.h:782 /build/work/knet/net/ipv4/ipmr.c:1009 /build/work/knet/net/ipv4/ipmr.c:1273) [ 86.334583] ? __pfx_ipmr_hash_cmp (/build/work/knet/net/ipv4/ipmr.c:363) [ 86.335357] ip_mroute_setsockopt (/build/work/knet/net/ipv4/ipmr.c:1470) [ 86.336135] ? srso_return_thunk (/build/work/knet/arch/x86/lib/retpoline.S:223) [ 86.336854] ? ip_mroute_setsockopt (/build/work/knet/net/ipv4/ipmr.c:1470) [ 86.337679] do_ip_setsockopt (/build/work/knet/net/ipv4/ip_sockglue.c:944) [ 86.338408] ? __pfx_unix_stream_read_actor (/build/work/knet/net/unix/af_unix.c:2862) [ 86.339232] ? srso_return_thunk (/build/work/knet/arch/x86/lib/retpoline.S:223) [ 86.339809] ? aa_sk_perm (/build/work/knet/security/apparmor/include/cred.h:153 /build/work/knet/security/apparmor/net.c:181) [ 86.340342] ip_setsockopt (/build/work/knet/net/ipv4/ip_sockglue.c:1415) [ 86.340859] raw_setsockopt (/build/work/knet/net/ipv4/raw.c:836) [ 86.341408] ? security_socket_setsockopt (/build/work/knet/security/security.c:4561 (discriminator 13)) [ 86.342116] sock_common_setsockopt (/build/work/knet/net/core/sock.c:3716) [ 86.342747] do_sock_setsockopt (/build/work/knet/net/socket.c:2313) [ 86.343363] __sys_setsockopt (/build/work/knet/./include/linux/file.h:32 /build/work/knet/net/socket.c:2336) [ 86.344020] __x64_sys_setsockopt (/build/work/knet/net/socket.c:2340) [ 86.344766] do_syscall_64 (/build/work/knet/arch/x86/entry/common.c:52 /build/work/knet/arch/x86/entry/common.c:83) [ 86.345433] ? srso_return_thunk (/build/work/knet/arch/x86/lib/retpoline.S:223) [ 86.346161] ? syscall_exit_work (/build/work/knet/./include/linux/audit.h:357 /build/work/knet/kernel/entry/common.c:160) [ 86.346938] ? srso_return_thunk (/build/work/knet/arch/x86/lib/retpoline.S:223) [ 86.347657] ? syscall_exit_to_user_mode (/build/work/knet/kernel/entry/common.c:215) [ 86.348538] ? srso_return_thunk (/build/work/knet/arch/x86/lib/retpoline.S:223) [ 86.349262] ? do_syscall_64 (/build/work/knet/./arch/x86/include/asm/cpufeature.h:171 /build/work/knet/arch/x86/entry/common.c:98) [ 86.349971] entry_SYSCALL_64_after_hwframe (/build/work/knet/arch/x86/entry/entry_64.S:129) The original packet in ipmr_cache_report() may be queued and then forwarded with ip_mr_forward(). This last function has the assumption that the skb dst is set. After the below commit, the skb dst is dropped by ipv4_pktinfo_prepare(), which causes the oops. Fixes: bb7403655b3c ("ipmr: support IP_PKTINFO on cache report IGMP msg") Signed-off-by: Nicolas Dichtel Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240125141847.1931933-1-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski --- include/net/ip.h | 2 +- net/ipv4/ip_sockglue.c | 6 ++++-- net/ipv4/ipmr.c | 2 +- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 2 +- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index de0c69c57e3c..25cb688bdc62 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -767,7 +767,7 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev); * Functions provided by ip_sockglue.c */ -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst); void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 7aa9dc0e6760..21d2ffa919e9 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1363,12 +1363,13 @@ e_inval: * ipv4_pktinfo_prepare - transfer some info from rtable to skb * @sk: socket * @skb: buffer + * @drop_dst: if true, drops skb dst * * To support IP_CMSG_PKTINFO option, we store rt_iif and specific * destination in skb->cb[] before dst drop. * This way, receiver doesn't make cache line misses to read rtable. */ -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst) { struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); bool prepare = inet_test_bit(PKTINFO, sk) || @@ -1397,7 +1398,8 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) pktinfo->ipi_ifindex = 0; pktinfo->ipi_spec_dst.s_addr = 0; } - skb_dst_drop(skb); + if (drop_dst) + skb_dst_drop(skb); } int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9d6f59531b3a..362229836510 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1073,7 +1073,7 @@ static int ipmr_cache_report(const struct mr_table *mrt, msg = (struct igmpmsg *)skb_network_header(skb); msg->im_vif = vifi; msg->im_vif_hi = vifi >> 8; - ipv4_pktinfo_prepare(mroute_sk, pkt); + ipv4_pktinfo_prepare(mroute_sk, pkt, false); memcpy(skb->cb, pkt->cb, sizeof(skb->cb)); /* Add our header */ igmp = skb_put(skb, sizeof(struct igmphdr)); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 27da9d7294c0..aea89326c697 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -292,7 +292,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) /* Charge it to the socket. */ - ipv4_pktinfo_prepare(sk, skb); + ipv4_pktinfo_prepare(sk, skb, true); if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { kfree_skb_reason(skb, reason); return NET_RX_DROP; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 148ffb007969..f631b0a21af4 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2169,7 +2169,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) udp_csum_pull_header(skb); - ipv4_pktinfo_prepare(sk, skb); + ipv4_pktinfo_prepare(sk, skb, true); return __udp_queue_rcv_skb(sk, skb); csum_error: From 59f7ea703c38abc3f239068d49cc8897740e4c54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 9 Jan 2024 22:58:59 +0100 Subject: [PATCH 477/768] batman-adv: mcast: fix mcast packet type counter on timeouted nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a node which does not have the new batman-adv multicast packet type capability vanishes then the according, global counter erroneously would not be reduced in response on other nodes. Which in turn leads to the mesh never switching back to sending with the new multicast packet type. Fix this by reducing the according counter when such a node times out. Fixes: 90039133221e ("batman-adv: mcast: implement multicast packet generation") Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/multicast.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index d982daea8329..b4f8b4af1722 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -2198,6 +2198,8 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig) BATADV_MCAST_WANT_NO_RTR4); batadv_mcast_want_rtr6_update(bat_priv, orig, BATADV_MCAST_WANT_NO_RTR6); + batadv_mcast_have_mc_ptype_update(bat_priv, orig, + BATADV_MCAST_HAVE_MC_PTYPE_CAPA); spin_unlock_bh(&orig->mcast_handler_lock); } From 0a186b49bba596b81de5a686ce5bfc9cd48ab3ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Wed, 17 Jan 2024 06:22:20 +0100 Subject: [PATCH 478/768] batman-adv: mcast: fix memory leak on deleting a batman-adv interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The batman-adv multicast tracker TVLV handler is registered for the new batman-adv multicast packet type upon creating a batman-adv interface, but not unregistered again upon the interface's deletion, leading to a memory leak. Fix this memory leak by calling the according TVLV handler unregister routine for the multicast tracker TVLV upon batman-adv interface deletion. Fixes: 07afe1ba288c ("batman-adv: mcast: implement multicast packet reception and forwarding") Reported-and-tested-by: syzbot+ebe64cc5950868e77358@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000beadc4060f0cbc23@google.com/ Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/multicast.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index b4f8b4af1722..14088c4ff2f6 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -2175,6 +2175,7 @@ void batadv_mcast_free(struct batadv_priv *bat_priv) cancel_delayed_work_sync(&bat_priv->mcast.work); batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2); + batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST_TRACKER, 1); batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2); /* safely calling outside of worker, as worker was canceled above */ From 586e40aa883cab255ab8ddfe332c4092a5349cb7 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 24 Jan 2024 09:16:22 +0000 Subject: [PATCH 479/768] MAINTAINERS: Add connector headers to NETWORKING DRIVERS Commit 46cf789b68b2 ("connector: Move maintainence under networking drivers umbrella.") moved the connector maintenance but did not include the connector header files. It seems that it has always been implied that these headers were maintained along with the rest of the connector code, both before and after the cited commit. Make this explicit. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 92152ac346c8..9435ea0f8cda 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15179,6 +15179,7 @@ F: Documentation/networking/net_cachelines/net_device.rst F: drivers/connector/ F: drivers/net/ F: include/dt-bindings/net/ +F: include/linux/cn_proc.h F: include/linux/etherdevice.h F: include/linux/fcdevice.h F: include/linux/fddidevice.h @@ -15186,6 +15187,7 @@ F: include/linux/hippidevice.h F: include/linux/if_* F: include/linux/inetdevice.h F: include/linux/netdevice.h +F: include/uapi/linux/cn_proc.h F: include/uapi/linux/if_* F: include/uapi/linux/netdevice.h X: drivers/net/wireless/ From 62b4248105353e7d1debd30ca5c57ec5e5f28e35 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Wed, 24 Jan 2024 11:17:58 +0100 Subject: [PATCH 480/768] net: lan966x: Fix port configuration when using SGMII interface In case the interface between the MAC and the PHY is SGMII, then the bit GIGA_MODE on the MAC side needs to be set regardless of the speed at which it is running. Fixes: d28d6d2e37d1 ("net: lan966x: add port module support") Signed-off-by: Horatiu Vultur Reviewed-by: Maxime Chevallier Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan966x/lan966x_port.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c index 92108d354051..2e83bbb9477e 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c @@ -168,9 +168,10 @@ static void lan966x_port_link_up(struct lan966x_port *port) lan966x_taprio_speed_set(port, config->speed); /* Also the GIGA_MODE_ENA(1) needs to be set regardless of the - * port speed for QSGMII ports. + * port speed for QSGMII or SGMII ports. */ - if (phy_interface_num_ports(config->portmode) == 4) + if (phy_interface_num_ports(config->portmode) == 4 || + config->portmode == PHY_INTERFACE_MODE_SGMII) mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA_SET(1); lan_wr(config->duplex | mode, From c91c6b2f08afb7be111678ceade563158c9a31ba Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Sat, 27 Jan 2024 11:07:49 +0100 Subject: [PATCH 481/768] Revert "MIPS: loongson64: set nid for reserved memblock region" This reverts commit ce7b1b97776ec0b068c4dd6b6dbb48ae09a23519. Signed-off-by: Thomas Bogendoerfer --- arch/mips/loongson64/init.c | 2 -- arch/mips/loongson64/numa.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c index 000ba91c0886..f25caa6aa9d3 100644 --- a/arch/mips/loongson64/init.c +++ b/arch/mips/loongson64/init.c @@ -103,8 +103,6 @@ void __init szmem(unsigned int node) if (loongson_sysconf.vgabios_addr) memblock_reserve(virt_to_phys((void *)loongson_sysconf.vgabios_addr), SZ_256K); - /* set nid for reserved memory */ - memblock_set_node((u64)node << 44, (u64)(node+1) << 44, &memblock.reserved, node); } #ifndef CONFIG_NUMA diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c index 6345e096c532..8f61e93c0c5b 100644 --- a/arch/mips/loongson64/numa.c +++ b/arch/mips/loongson64/numa.c @@ -132,8 +132,6 @@ static void __init node_mem_init(unsigned int node) /* Reserve pfn range 0~node[0]->node_start_pfn */ memblock_reserve(0, PAGE_SIZE * start_pfn); - /* set nid for reserved memory on node 0 */ - memblock_set_node(0, (u64)1 << 44, &memblock.reserved, 1); } } From 822df315cc7c85c3c10afcc6408b254a6fa0f166 Mon Sep 17 00:00:00 2001 From: Huang Pei Date: Sat, 27 Jan 2024 17:12:21 +0800 Subject: [PATCH 482/768] MIPS: loongson64: set nid for reserved memblock region Commit 61167ad5fecd("mm: pass nid to reserve_bootmem_region()") reveals that reserved memblock regions have no valid node id set, just set it right since loongson64 firmware makes it clear in memory layout info. This works around booting failure on 3A1000+ since commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") under CONFIG_DEFERRED_STRUCT_PAGE_INIT. Signed-off-by: Huang Pei Signed-off-by: Thomas Bogendoerfer --- arch/mips/loongson64/init.c | 3 +++ arch/mips/loongson64/numa.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c index f25caa6aa9d3..553142c1f14f 100644 --- a/arch/mips/loongson64/init.c +++ b/arch/mips/loongson64/init.c @@ -103,6 +103,9 @@ void __init szmem(unsigned int node) if (loongson_sysconf.vgabios_addr) memblock_reserve(virt_to_phys((void *)loongson_sysconf.vgabios_addr), SZ_256K); + /* set nid for reserved memory */ + memblock_set_node((u64)node << 44, (u64)(node + 1) << 44, + &memblock.reserved, node); } #ifndef CONFIG_NUMA diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c index 8f61e93c0c5b..68dafd6d3e25 100644 --- a/arch/mips/loongson64/numa.c +++ b/arch/mips/loongson64/numa.c @@ -132,6 +132,8 @@ static void __init node_mem_init(unsigned int node) /* Reserve pfn range 0~node[0]->node_start_pfn */ memblock_reserve(0, PAGE_SIZE * start_pfn); + /* set nid for reserved memory on node 0 */ + memblock_set_node(0, 1ULL << 44, &memblock.reserved, 0); } } From 59be5c35850171e307ca5d3d703ee9ff4096b948 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sat, 27 Jan 2024 05:05:57 +0800 Subject: [PATCH 483/768] mips: Call lose_fpu(0) before initializing fcr31 in mips_set_personality_nan If we still own the FPU after initializing fcr31, when we are preempted the dirty value in the FPU will be read out and stored into fcr31, clobbering our setting. This can cause an improper floating-point environment after execve(). For example: zsh% cat measure.c #include int main() { return fetestexcept(FE_INEXACT); } zsh% cc measure.c -o measure -lm zsh% echo $((1.0/3)) # raising FE_INEXACT 0.33333333333333331 zsh% while ./measure; do ; done (stopped in seconds) Call lose_fpu(0) before setting fcr31 to prevent this. Closes: https://lore.kernel.org/linux-mips/7a6aa1bbdbbe2e63ae96ff163fab0349f58f1b9e.camel@xry111.site/ Fixes: 9b26616c8d9d ("MIPS: Respect the ISA level in FCSR handling") Cc: stable@vger.kernel.org Signed-off-by: Xi Ruoyao Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/elf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c index 5582a4ca1e9e..7aa2c2360ff6 100644 --- a/arch/mips/kernel/elf.c +++ b/arch/mips/kernel/elf.c @@ -11,6 +11,7 @@ #include #include +#include #ifdef CONFIG_MIPS_FP_SUPPORT @@ -309,6 +310,11 @@ void mips_set_personality_nan(struct arch_elf_state *state) struct cpuinfo_mips *c = &boot_cpu_data; struct task_struct *t = current; + /* Do this early so t->thread.fpu.fcr31 won't be clobbered in case + * we are preempted before the lose_fpu(0) in start_thread. + */ + lose_fpu(0); + t->thread.fpu.fcr31 = c->fpu_csr31; switch (state->nan_2008) { case 0: From 915644189c22d9c93e9fee7c7c993b58e745bef7 Mon Sep 17 00:00:00 2001 From: Konstantin Aladyshev Date: Sat, 27 Jan 2024 18:48:44 +0300 Subject: [PATCH 484/768] hwmon: (pmbus/mp2975) Correct comment inside 'mp2975_read_byte_data' The current driver code no longer perfrom internal conversion from VID to direct. Instead it configures READ_VOUT using MFR_DC_LOOP_CTRL. Correct the comment message inside the 'mp2975_read_byte_data' function to match the driver logic. Signed-off-by: Konstantin Aladyshev Fixes: c60fe56c169e ("hwmon: (pmbus/mp2975) Fix driver initialization for MP2975 device") Link: https://lore.kernel.org/r/20240127154844.989-1-aladyshev22@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/mp2975.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/pmbus/mp2975.c b/drivers/hwmon/pmbus/mp2975.c index 5bbfdacb61a7..e5fa10b3b8bc 100644 --- a/drivers/hwmon/pmbus/mp2975.c +++ b/drivers/hwmon/pmbus/mp2975.c @@ -131,10 +131,9 @@ static int mp2975_read_byte_data(struct i2c_client *client, int page, int reg) switch (reg) { case PMBUS_VOUT_MODE: /* - * Enforce VOUT direct format, since device allows to set the - * different formats for the different rails. Conversion from - * VID to direct provided by driver internally, in case it is - * necessary. + * Report direct format as configured by MFR_DC_LOOP_CTRL. + * Unlike on MP2971/MP2973 the reported VOUT_MODE isn't automatically + * internally updated, but always reads as PB_VOUT_MODE_VID. */ return PB_VOUT_MODE_DIRECT; default: From 6c20faec8ffc97af21acb43382adda011eb39359 Mon Sep 17 00:00:00 2001 From: Zhang Bingwu Date: Sun, 14 Jan 2024 16:13:59 +0800 Subject: [PATCH 485/768] kbuild: defconf: use SRCARCH to find merged configs For some ARCH values, SRCARCH, which should be used for finding arch/ subdirectory, is different from ARCH. Signed-off-by: Zhang Bingwu Signed-off-by: Masahiro Yamada --- scripts/Makefile.defconf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/Makefile.defconf b/scripts/Makefile.defconf index ab271b2051a2..226ea3df3b4b 100644 --- a/scripts/Makefile.defconf +++ b/scripts/Makefile.defconf @@ -9,8 +9,8 @@ # Input config fragments without '.config' suffix define merge_into_defconfig $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \ - -m -O $(objtree) $(srctree)/arch/$(ARCH)/configs/$(1) \ - $(foreach config,$(2),$(srctree)/arch/$(ARCH)/configs/$(config).config) + -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$(1) \ + $(foreach config,$(2),$(srctree)/arch/$(SRCARCH)/configs/$(config).config) +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig endef @@ -23,7 +23,7 @@ endef # Input config fragments without '.config' suffix define merge_into_defconfig_override $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \ - -Q -m -O $(objtree) $(srctree)/arch/$(ARCH)/configs/$(1) \ - $(foreach config,$(2),$(srctree)/arch/$(ARCH)/configs/$(config).config) + -Q -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$(1) \ + $(foreach config,$(2),$(srctree)/arch/$(SRCARCH)/configs/$(config).config) +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig endef From 846cfbeed09b45d985079a9173cf390cc053715b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 23 Jan 2024 15:59:54 -0700 Subject: [PATCH 486/768] um: Fix adding '-no-pie' for clang The kernel builds with -fno-PIE, so commit 883354afbc10 ("um: link vmlinux with -no-pie") added the compiler linker flag '-no-pie' via cc-option because '-no-pie' was only supported in GCC 6.1.0 and newer. While this works for GCC, this does not work for clang because cc-option uses '-c', which stops the pipeline right before linking, so '-no-pie' is unconsumed and clang warns, causing cc-option to fail just as it would if the option was entirely unsupported: $ clang -Werror -no-pie -c -o /dev/null -x c /dev/null clang-16: error: argument unused during compilation: '-no-pie' [-Werror,-Wunused-command-line-argument] A recent version of clang exposes this because it generates a relocation under '-mcmodel=large' that is not supported in PIE mode: /usr/sbin/ld: init/main.o: relocation R_X86_64_32 against symbol `saved_command_line' can not be used when making a PIE object; recompile with -fPIE /usr/sbin/ld: failed to set dynamic section sizes: bad value clang: error: linker command failed with exit code 1 (use -v to see invocation) Remove the cc-option check altogether. It is wasteful to invoke the compiler to check for '-no-pie' because only one supported compiler version does not support it, GCC 5.x (as it is supported with the minimum version of clang and GCC 6.1.0+). Use a combination of the gcc-min-version macro and CONFIG_CC_IS_CLANG to unconditionally add '-no-pie' with CONFIG_LD_SCRIPT_DYN=y, so that it is enabled with all compilers that support this. Furthermore, using gcc-min-version can help turn this back into LINK-$(CONFIG_LD_SCRIPT_DYN) += -no-pie when the minimum version of GCC is bumped past 6.1.0. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/1982 Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- arch/um/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/um/Makefile b/arch/um/Makefile index 82f05f250634..34957dcb88b9 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -115,7 +115,9 @@ archprepare: $(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static -LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie) +ifdef CONFIG_LD_SCRIPT_DYN +LINK-$(call gcc-min-version, 60100)$(CONFIG_CC_IS_CLANG) += -no-pie +endif LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \ From 397586506c3da005b9333ce5947ad01e8018a3be Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 23 Jan 2024 15:59:55 -0700 Subject: [PATCH 487/768] modpost: Add '.ltext' and '.ltext.*' to TEXT_SECTIONS After the linked LLVM change, building ARCH=um defconfig results in a segmentation fault in modpost. Prior to commit a23e7584ecf3 ("modpost: unify 'sym' and 'to' in default_mismatch_handler()"), there was a warning: WARNING: modpost: vmlinux.o(__ex_table+0x88): Section mismatch in reference to the .ltext:(unknown) WARNING: modpost: The relocation at __ex_table+0x88 references section ".ltext" which is not in the list of authorized sections. If you're adding a new section and/or if this reference is valid, add ".ltext" to the list of authorized sections to jump to on fault. This can be achieved by adding ".ltext" to OTHER_TEXT_SECTIONS in scripts/mod/modpost.c. The linked LLVM change moves global objects to the '.ltext' (and '.ltext.*' with '-ffunction-sections') sections with '-mcmodel=large', which ARCH=um uses. These sections should be handled just as '.text' and '.text.*' are, so add them to TEXT_SECTIONS. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/1981 Link: https://github.com/llvm/llvm-project/commit/4bf8a688956a759b7b6b8d94f42d25c13c7af130 Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 795b21154446..acf72112acd8 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -806,7 +806,8 @@ static void check_section(const char *modname, struct elf_info *elf, #define DATA_SECTIONS ".data", ".data.rel" #define TEXT_SECTIONS ".text", ".text.*", ".sched.text", \ - ".kprobes.text", ".cpuidle.text", ".noinstr.text" + ".kprobes.text", ".cpuidle.text", ".noinstr.text", \ + ".ltext", ".ltext.*" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ ".fixup", ".entry.text", ".exception.text", \ ".coldtext", ".softirqentry.text" From e30dca91e5667568a6be54886020c43f1f6f95d3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 27 Jan 2024 14:52:23 -0300 Subject: [PATCH 488/768] tools headers UAPI: Sync kvm headers with the kernel sources To pick the changes in: a5d3df8ae13fada7 ("KVM: remove deprecated UAPIs") 6d72283526090850 ("KVM x86/xen: add an override for PVCLOCK_TSC_STABLE_BIT") 89ea60c2c7b5838b ("KVM: x86: Add support for "protected VMs" that can utilize private memory") 8dd2eee9d526c30f ("KVM: x86/mmu: Handle page fault for private memory") a7800aa80ea4d535 ("KVM: Add KVM_CREATE_GUEST_MEMFD ioctl() for guest-specific backing memory") 5a475554db1e476a ("KVM: Introduce per-page memory attributes") 16f95f3b95caded2 ("KVM: Add KVM_EXIT_MEMORY_FAULT exit to report faults to userspace") bb58b90b1a8f753b ("KVM: Introduce KVM_SET_USER_MEMORY_REGION2") 3f9cd0ca848413fd ("KVM: arm64: Allow userspace to get the writable masks for feature ID registers") That automatically adds support for some new ioctls and remove a bunch of deprecated ones. This ends up making the new binary to forget about the deprecated one, so when used in an older system it will not be able to resolve those codes to strings. $ tools/perf/trace/beauty/kvm_ioctl.sh > before $ cp include/uapi/linux/kvm.h tools/include/uapi/linux/kvm.h $ tools/perf/trace/beauty/kvm_ioctl.sh > after $ diff -u before after --- before 2024-01-27 14:48:16.523014020 -0300 +++ after 2024-01-27 14:48:24.183932866 -0300 @@ -14,6 +14,7 @@ [0x46] = "SET_USER_MEMORY_REGION", [0x47] = "SET_TSS_ADDR", [0x48] = "SET_IDENTITY_MAP_ADDR", + [0x49] = "SET_USER_MEMORY_REGION2", [0x60] = "CREATE_IRQCHIP", [0x61] = "IRQ_LINE", [0x62] = "GET_IRQCHIP", @@ -22,14 +23,8 @@ [0x65] = "GET_PIT", [0x66] = "SET_PIT", [0x67] = "IRQ_LINE_STATUS", - [0x69] = "ASSIGN_PCI_DEVICE", [0x6a] = "SET_GSI_ROUTING", - [0x70] = "ASSIGN_DEV_IRQ", [0x71] = "REINJECT_CONTROL", - [0x72] = "DEASSIGN_PCI_DEVICE", - [0x73] = "ASSIGN_SET_MSIX_NR", - [0x74] = "ASSIGN_SET_MSIX_ENTRY", - [0x75] = "DEASSIGN_DEV_IRQ", [0x76] = "IRQFD", [0x77] = "CREATE_PIT2", [0x78] = "SET_BOOT_CPU_ID", @@ -66,7 +61,6 @@ [0x9f] = "GET_VCPU_EVENTS", [0xa0] = "SET_VCPU_EVENTS", [0xa3] = "ENABLE_CAP", - [0xa4] = "ASSIGN_SET_INTX_MASK", [0xa5] = "SIGNAL_MSI", [0xa6] = "GET_XCRS", [0xa7] = "SET_XCRS", @@ -97,6 +91,8 @@ [0xcd] = "SET_SREGS2", [0xce] = "GET_STATS_FD", [0xd0] = "XEN_HVM_EVTCHN_SEND", + [0xd2] = "SET_MEMORY_ATTRIBUTES", + [0xd4] = "CREATE_GUEST_MEMFD", [0xe0] = "CREATE_DEVICE", [0xe1] = "SET_DEVICE_ATTR", [0xe2] = "GET_DEVICE_ATTR", $ This silences these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h Cc: Adrian Hunter Cc: Chao Peng Cc: Ian Rogers Cc: Jing Zhang Cc: Jiri Olsa Cc: Namhyung Kim Cc: Oliver Upton Cc: Paolo Bonzini Cc: Paul Durrant Cc: Sean Christopherson Link: https://lore.kernel.org/lkml/ZbVLbkngp4oq13qN@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/kvm.h | 3 + tools/include/uapi/linux/kvm.h | 140 +++++++++----------------- 2 files changed, 53 insertions(+), 90 deletions(-) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 1a6a1f987949..a448d0964fc0 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -562,4 +562,7 @@ struct kvm_pmu_event_filter { /* x86-specific KVM_EXIT_HYPERCALL flags. */ #define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0) +#define KVM_X86_DEFAULT_VM 0 +#define KVM_X86_SW_PROTECTED_VM 1 + #endif /* _ASM_X86_KVM_H */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 211b86de35ac..c3308536482b 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -16,76 +16,6 @@ #define KVM_API_VERSION 12 -/* *** Deprecated interfaces *** */ - -#define KVM_TRC_SHIFT 16 - -#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT) -#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1)) - -#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01) -#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02) -#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01) - -#define KVM_TRC_HEAD_SIZE 12 -#define KVM_TRC_CYCLE_SIZE 8 -#define KVM_TRC_EXTRA_MAX 7 - -#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) -#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) -#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) -#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) -#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) -#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) -#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) -#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) -#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) -#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) -#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) -#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) -#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) -#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) -#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) -#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) -#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) -#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) -#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) -#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) -#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16) -#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17) -#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) -#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) - -struct kvm_user_trace_setup { - __u32 buf_size; - __u32 buf_nr; -}; - -#define __KVM_DEPRECATED_MAIN_W_0x06 \ - _IOW(KVMIO, 0x06, struct kvm_user_trace_setup) -#define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07) -#define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08) - -#define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq) - -struct kvm_breakpoint { - __u32 enabled; - __u32 padding; - __u64 address; -}; - -struct kvm_debug_guest { - __u32 enabled; - __u32 pad; - struct kvm_breakpoint breakpoints[4]; - __u32 singlestep; -}; - -#define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest) - -/* *** End of deprecated interfaces *** */ - - /* for KVM_SET_USER_MEMORY_REGION */ struct kvm_userspace_memory_region { __u32 slot; @@ -95,6 +25,19 @@ struct kvm_userspace_memory_region { __u64 userspace_addr; /* start of the userspace allocated memory */ }; +/* for KVM_SET_USER_MEMORY_REGION2 */ +struct kvm_userspace_memory_region2 { + __u32 slot; + __u32 flags; + __u64 guest_phys_addr; + __u64 memory_size; + __u64 userspace_addr; + __u64 guest_memfd_offset; + __u32 guest_memfd; + __u32 pad1; + __u64 pad2[14]; +}; + /* * The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for * userspace, other bits are reserved for kvm internal use which are defined @@ -102,6 +45,7 @@ struct kvm_userspace_memory_region { */ #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) #define KVM_MEM_READONLY (1UL << 1) +#define KVM_MEM_GUEST_MEMFD (1UL << 2) /* for KVM_IRQ_LINE */ struct kvm_irq_level { @@ -265,6 +209,7 @@ struct kvm_xen_exit { #define KVM_EXIT_RISCV_CSR 36 #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 +#define KVM_EXIT_MEMORY_FAULT 39 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -518,6 +463,13 @@ struct kvm_run { #define KVM_NOTIFY_CONTEXT_INVALID (1 << 0) __u32 flags; } notify; + /* KVM_EXIT_MEMORY_FAULT */ + struct { +#define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3) + __u64 flags; + __u64 gpa; + __u64 size; + } memory_fault; /* Fix the size of the union. */ char padding[256]; }; @@ -945,9 +897,6 @@ struct kvm_ppc_resize_hpt { */ #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ #define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2) -#define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 -#define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 -#define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 #define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) #define KVM_GET_MSR_FEATURE_INDEX_LIST _IOWR(KVMIO, 0x0a, struct kvm_msr_list) @@ -1201,6 +1150,11 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228 #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 +#define KVM_CAP_USER_MEMORY2 231 +#define KVM_CAP_MEMORY_FAULT_INFO 232 +#define KVM_CAP_MEMORY_ATTRIBUTES 233 +#define KVM_CAP_GUEST_MEMFD 234 +#define KVM_CAP_VM_TYPES 235 #ifdef KVM_CAP_IRQ_ROUTING @@ -1291,6 +1245,7 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) +#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) struct kvm_xen_hvm_config { __u32 flags; @@ -1483,6 +1438,8 @@ struct kvm_vfio_spapr_tce { struct kvm_userspace_memory_region) #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) +#define KVM_SET_USER_MEMORY_REGION2 _IOW(KVMIO, 0x49, \ + struct kvm_userspace_memory_region2) /* enable ucontrol for s390 */ struct kvm_s390_ucas_mapping { @@ -1507,20 +1464,8 @@ struct kvm_s390_ucas_mapping { _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) #define KVM_UNREGISTER_COALESCED_MMIO \ _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) -#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ - struct kvm_assigned_pci_dev) #define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) -/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */ -#define KVM_ASSIGN_IRQ __KVM_DEPRECATED_VM_R_0x70 -#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq) #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) -#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ - struct kvm_assigned_pci_dev) -#define KVM_ASSIGN_SET_MSIX_NR _IOW(KVMIO, 0x73, \ - struct kvm_assigned_msix_nr) -#define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO, 0x74, \ - struct kvm_assigned_msix_entry) -#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) #define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) #define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) @@ -1537,9 +1482,6 @@ struct kvm_s390_ucas_mapping { * KVM_CAP_VM_TSC_CONTROL to set defaults for a VM */ #define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2) #define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3) -/* Available with KVM_CAP_PCI_2_3 */ -#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \ - struct kvm_assigned_pci_dev) /* Available with KVM_CAP_SIGNAL_MSI */ #define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi) /* Available with KVM_CAP_PPC_GET_SMMU_INFO */ @@ -1592,8 +1534,6 @@ struct kvm_s390_ucas_mapping { #define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs) #define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation) #define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt) -/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */ -#define KVM_DEBUG_GUEST __KVM_DEPRECATED_VCPU_W_0x87 #define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs) #define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) #define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) @@ -2267,4 +2207,24 @@ struct kvm_s390_zpci_op { /* flags for kvm_s390_zpci_op->u.reg_aen.flags */ #define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) +/* Available with KVM_CAP_MEMORY_ATTRIBUTES */ +#define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes) + +struct kvm_memory_attributes { + __u64 address; + __u64 size; + __u64 attributes; + __u64 flags; +}; + +#define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) + +#define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) + +struct kvm_create_guest_memfd { + __u64 size; + __u64 flags; + __u64 reserved[6]; +}; + #endif /* __LINUX_KVM_H */ From becc24e96ad4b9bc5c5bba800dc184661b6702bc Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 4 Jan 2024 15:19:03 -0800 Subject: [PATCH 489/768] perf vendor events intel: Alderlake/sapphirerapids metric fixes As events are deduplicated by name, ensure PMU prefixes are always used in metrics. Previously they may be missed on the first event in a formula. Update metric constraints for architectures with topdown l2 events. Conversion script updated in: https://github.com/intel/perfmon/pull/128 Reported-by: Arnaldo Carvalho de Melo Reviewed-by: Kan Liang Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Edward Baker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Closes: https://lore.kernel.org/lkml/ZZam-EG-UepcXtWw@kernel.org/ Link: https://lore.kernel.org/r/20240104231903.775717-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/x86/alderlake/adl-metrics.json | 254 ++++++++---------- .../arch/x86/alderlaken/adln-metrics.json | 4 - .../arch/x86/sapphirerapids/spr-metrics.json | 25 +- 3 files changed, 123 insertions(+), 160 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json index 35124a4ddcb2..bbfa3883e533 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json @@ -114,7 +114,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to certain allocation restrictions.", - "MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", "MetricName": "tma_alloc_restriction", "MetricThreshold": "tma_alloc_restriction > 0.1", @@ -124,7 +124,7 @@ { "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", "DefaultMetricgroupName": "TopdownL1", - "MetricExpr": "TOPDOWN_BE_BOUND.ALL / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.ALL@ / tma_info_core_slots", "MetricGroup": "Default;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.1", @@ -169,7 +169,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend", - "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.BRANCH_DETECT@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", "MetricName": "tma_branch_detect", "MetricThreshold": "tma_branch_detect > 0.05", @@ -179,7 +179,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to branch mispredicts.", - "MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.MISPREDICT@ / tma_info_core_slots", "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.05", @@ -189,7 +189,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.", - "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.BRANCH_RESTEER@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", "MetricName": "tma_branch_resteer", "MetricThreshold": "tma_branch_resteer > 0.05", @@ -198,7 +198,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to the microcode sequencer (MS).", - "MetricExpr": "TOPDOWN_FE_BOUND.CISC / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.CISC@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", "MetricName": "tma_cisc", "MetricThreshold": "tma_cisc > 0.05", @@ -217,7 +217,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to decode stalls.", - "MetricExpr": "TOPDOWN_FE_BOUND.DECODE / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.DECODE@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", "MetricName": "tma_decode", "MetricThreshold": "tma_decode > 0.05", @@ -235,7 +235,6 @@ }, { "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_dram_bound", @@ -245,7 +244,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear classified as a fast nuke due to memory ordering, memory disambiguation and memory renaming.", - "MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.FASTNUKE@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group", "MetricName": "tma_fast_nuke", "MetricThreshold": "tma_fast_nuke > 0.05", @@ -254,7 +253,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", - "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH@ / tma_info_core_slots", "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_bandwidth", "MetricThreshold": "tma_fetch_bandwidth > 0.1", @@ -264,7 +263,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", - "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.FRONTEND_LATENCY@ / tma_info_core_slots", "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.15", @@ -283,7 +282,7 @@ }, { "BriefDescription": "Counts the number of floating point divide operations per uop.", - "MetricExpr": "UOPS_RETIRED.FPDIV / tma_info_core_slots", + "MetricExpr": "cpu_atom@UOPS_RETIRED.FPDIV@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_base_group", "MetricName": "tma_fpdiv_uops", "MetricThreshold": "tma_fpdiv_uops > 0.2", @@ -293,7 +292,7 @@ { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.", "DefaultMetricgroupName": "TopdownL1", - "MetricExpr": "TOPDOWN_FE_BOUND.ALL / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ALL@ / tma_info_core_slots", "MetricGroup": "Default;TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.2", @@ -303,7 +302,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to instruction cache misses.", - "MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ICACHE@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", "MetricName": "tma_icache_misses", "MetricThreshold": "tma_icache_misses > 0.05", @@ -330,7 +329,7 @@ }, { "BriefDescription": "Instructions Per Cycle", - "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / tma_info_core_clks", "MetricName": "tma_info_core_ipc", "Unit": "cpu_atom" }, @@ -342,7 +341,7 @@ }, { "BriefDescription": "Uops Per Instruction", - "MetricExpr": "UOPS_RETIRED.ALL / INST_RETIRED.ANY", + "MetricExpr": "cpu_atom@UOPS_RETIRED.ALL@ / INST_RETIRED.ANY", "MetricName": "tma_info_core_upi", "Unit": "cpu_atom" }, @@ -366,13 +365,13 @@ }, { "BriefDescription": "Ratio of all branches which mispredict", - "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES", + "MetricExpr": "cpu_atom@BR_MISP_RETIRED.ALL_BRANCHES@ / BR_INST_RETIRED.ALL_BRANCHES", "MetricName": "tma_info_inst_mix_branch_mispredict_ratio", "Unit": "cpu_atom" }, { "BriefDescription": "Ratio between Mispredicted branches and unknown branches", - "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY", + "MetricExpr": "cpu_atom@BR_MISP_RETIRED.ALL_BRANCHES@ / BACLEARS.ANY", "MetricName": "tma_info_inst_mix_branch_mispredict_to_unknown_branch_ratio", "Unit": "cpu_atom" }, @@ -390,61 +389,61 @@ }, { "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_INST_RETIRED.ALL_BRANCHES", "MetricName": "tma_info_inst_mix_ipbranch", "Unit": "cpu_atom" }, { "BriefDescription": "Instruction per (near) call (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.CALL", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_INST_RETIRED.CALL", "MetricName": "tma_info_inst_mix_ipcall", "Unit": "cpu_atom" }, { "BriefDescription": "Instructions per Far Branch", - "MetricExpr": "INST_RETIRED.ANY / (cpu_atom@BR_INST_RETIRED.FAR_BRANCH@ / 2)", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / (cpu_atom@BR_INST_RETIRED.FAR_BRANCH@ / 2)", "MetricName": "tma_info_inst_mix_ipfarbranch", "Unit": "cpu_atom" }, { "BriefDescription": "Instructions per Load", - "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / MEM_UOPS_RETIRED.ALL_LOADS", "MetricName": "tma_info_inst_mix_ipload", "Unit": "cpu_atom" }, { "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was not taken", - "MetricExpr": "INST_RETIRED.ANY / (cpu_atom@BR_MISP_RETIRED.COND@ - cpu_atom@BR_MISP_RETIRED.COND_TAKEN@)", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / (cpu_atom@BR_MISP_RETIRED.COND@ - cpu_atom@BR_MISP_RETIRED.COND_TAKEN@)", "MetricName": "tma_info_inst_mix_ipmisp_cond_ntaken", "Unit": "cpu_atom" }, { "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was taken", - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_TAKEN", "MetricName": "tma_info_inst_mix_ipmisp_cond_taken", "Unit": "cpu_atom" }, { "BriefDescription": "Instructions per retired indirect call or jump Branch Misprediction", - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.INDIRECT", "MetricName": "tma_info_inst_mix_ipmisp_indirect", "Unit": "cpu_atom" }, { "BriefDescription": "Instructions per retired return Branch Misprediction", - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RETURN", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.RETURN", "MetricName": "tma_info_inst_mix_ipmisp_ret", "Unit": "cpu_atom" }, { "BriefDescription": "Instructions per retired Branch Misprediction", - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.ALL_BRANCHES", "MetricName": "tma_info_inst_mix_ipmispredict", "Unit": "cpu_atom" }, { "BriefDescription": "Instructions per Store", - "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / MEM_UOPS_RETIRED.ALL_STORES", "MetricName": "tma_info_inst_mix_ipstore", "Unit": "cpu_atom" }, @@ -480,19 +479,19 @@ }, { "BriefDescription": "Cycle cost per DRAM hit", - "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_LOAD_UOPS_RETIRED.DRAM_HIT", + "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / MEM_LOAD_UOPS_RETIRED.DRAM_HIT", "MetricName": "tma_info_memory_cycles_per_demand_load_dram_hit", "Unit": "cpu_atom" }, { "BriefDescription": "Cycle cost per L2 hit", - "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_LOAD_UOPS_RETIRED.L2_HIT", + "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / MEM_LOAD_UOPS_RETIRED.L2_HIT", "MetricName": "tma_info_memory_cycles_per_demand_load_l2_hit", "Unit": "cpu_atom" }, { "BriefDescription": "Cycle cost per LLC hit", - "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_LOAD_UOPS_RETIRED.L3_HIT", + "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / MEM_LOAD_UOPS_RETIRED.L3_HIT", "MetricName": "tma_info_memory_cycles_per_demand_load_l3_hit", "Unit": "cpu_atom" }, @@ -504,7 +503,7 @@ }, { "BriefDescription": "Average CPU Utilization", - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC", + "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / TSC", "MetricName": "tma_info_system_cpu_utilization", "Unit": "cpu_atom" }, @@ -524,7 +523,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.", - "MetricExpr": "TOPDOWN_FE_BOUND.ITLB / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ITLB@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", "MetricName": "tma_itlb_misses", "MetricThreshold": "tma_itlb_misses > 0.05", @@ -533,7 +532,7 @@ }, { "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a load block.", - "MetricExpr": "LD_HEAD.L1_BOUND_AT_RET / tma_info_core_clks", + "MetricExpr": "cpu_atom@LD_HEAD.L1_BOUND_AT_RET@ / tma_info_core_clks", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l1_bound", "MetricThreshold": "tma_l1_bound > 0.1", @@ -542,7 +541,6 @@ }, { "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l2_bound", @@ -552,7 +550,6 @@ }, { "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l3_bound", @@ -571,7 +568,7 @@ }, { "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.", - "MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS@ / tma_info_core_slots", "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.05", @@ -581,7 +578,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.", - "MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.MEM_SCHEDULER@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", "MetricName": "tma_mem_scheduler", "MetricThreshold": "tma_mem_scheduler > 0.1", @@ -590,7 +587,7 @@ }, { "BriefDescription": "Counts the number of cycles the core is stalled due to stores or loads.", - "MetricExpr": "min(cpu_atom@TOPDOWN_BE_BOUND.ALL@ / tma_info_core_slots, cpu_atom@LD_HEAD.ANY_AT_RET@ / tma_info_core_clks + tma_store_bound)", + "MetricExpr": "min(tma_backend_bound, cpu_atom@LD_HEAD.ANY_AT_RET@ / tma_info_core_clks + tma_store_bound)", "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2", @@ -609,7 +606,7 @@ }, { "BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS)", - "MetricExpr": "UOPS_RETIRED.MS / tma_info_core_slots", + "MetricExpr": "cpu_atom@UOPS_RETIRED.MS@ / tma_info_core_slots", "MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_ms_uops", "MetricThreshold": "tma_ms_uops > 0.05", @@ -620,7 +617,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.", - "MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", "MetricName": "tma_non_mem_scheduler", "MetricThreshold": "tma_non_mem_scheduler > 0.1", @@ -629,7 +626,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear (slow nuke).", - "MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.NUKE@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group", "MetricName": "tma_nuke", "MetricThreshold": "tma_nuke > 0.05", @@ -638,7 +635,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to other common frontend stalls not categorized.", - "MetricExpr": "TOPDOWN_FE_BOUND.OTHER / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.OTHER@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", "MetricName": "tma_other_fb", "MetricThreshold": "tma_other_fb > 0.05", @@ -647,7 +644,7 @@ }, { "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a number of other load blocks.", - "MetricExpr": "LD_HEAD.OTHER_AT_RET / tma_info_core_clks", + "MetricExpr": "cpu_atom@LD_HEAD.OTHER_AT_RET@ / tma_info_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_other_l1", "MetricThreshold": "tma_other_l1 > 0.05", @@ -683,7 +680,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to wrong predecodes.", - "MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.PREDECODE@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", "MetricName": "tma_predecode", "MetricThreshold": "tma_predecode > 0.05", @@ -692,7 +689,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).", - "MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.REGISTER@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", "MetricName": "tma_register", "MetricThreshold": "tma_register > 0.1", @@ -701,7 +698,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the reorder buffer being full (ROB stalls).", - "MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.REORDER_BUFFER@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", "MetricName": "tma_reorder_buffer", "MetricThreshold": "tma_reorder_buffer > 0.1", @@ -722,7 +719,7 @@ { "BriefDescription": "Counts the number of issue slots that result in retirement slots.", "DefaultMetricgroupName": "TopdownL1", - "MetricExpr": "TOPDOWN_RETIRING.ALL / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_RETIRING.ALL@ / tma_info_core_slots", "MetricGroup": "Default;TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.75", @@ -741,7 +738,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).", - "MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.SERIALIZATION@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", "MetricName": "tma_serialization", "MetricThreshold": "tma_serialization > 0.1", @@ -768,7 +765,7 @@ }, { "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a first level TLB miss.", - "MetricExpr": "LD_HEAD.DTLB_MISS_AT_RET / tma_info_core_clks", + "MetricExpr": "cpu_atom@LD_HEAD.DTLB_MISS_AT_RET@ / tma_info_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_stlb_hit", "MetricThreshold": "tma_stlb_hit > 0.05", @@ -777,7 +774,7 @@ }, { "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a second level TLB miss requiring a page walk.", - "MetricExpr": "LD_HEAD.PGWALK_AT_RET / tma_info_core_clks", + "MetricExpr": "cpu_atom@LD_HEAD.PGWALK_AT_RET@ / tma_info_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_stlb_miss", "MetricThreshold": "tma_stlb_miss > 0.05", @@ -795,8 +792,7 @@ }, { "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", - "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks", + "MetricExpr": "cpu_atom@LD_HEAD.ST_ADDR_AT_RET@ / tma_info_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_store_fwd_blk", "MetricThreshold": "tma_store_fwd_blk > 0.05", @@ -875,7 +871,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers", - "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks + tma_unknown_branches", + "MetricExpr": "cpu_core@INT_MISC.CLEAR_RESTEER_CYCLES@ / tma_info_thread_clks + tma_unknown_branches", "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group", "MetricName": "tma_branch_resteers", "MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", @@ -905,7 +901,6 @@ }, { "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(25 * tma_info_system_average_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) + 24 * tma_info_system_average_frequency * cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks", "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", "MetricName": "tma_contested_accesses", @@ -927,7 +922,6 @@ }, { "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "24 * tma_info_system_average_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD@ + cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (1 - cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks", "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", "MetricName": "tma_data_sharing", @@ -948,7 +942,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles where the Divider unit was active", - "MetricExpr": "ARITH.DIV_ACTIVE / tma_info_thread_clks", + "MetricExpr": "cpu_core@ARITH.DIV_ACTIVE@ / tma_info_thread_clks", "MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group", "MetricName": "tma_divider", "MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)", @@ -958,7 +952,6 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@ / tma_info_thread_clks", "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_dram_bound", @@ -979,7 +972,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines", - "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks", + "MetricExpr": "cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES@ / tma_info_thread_clks", "MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB", "MetricName": "tma_dsb_switches", "MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", @@ -1019,7 +1012,7 @@ }, { "BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed", - "MetricExpr": "L1D_PEND_MISS.FB_FULL / tma_info_thread_clks", + "MetricExpr": "cpu_core@L1D_PEND_MISS.FB_FULL@ / tma_info_thread_clks", "MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group", "MetricName": "tma_fb_full", "MetricThreshold": "tma_fb_full > 0.3", @@ -1154,7 +1147,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses", - "MetricExpr": "ICACHE_DATA.STALLS / tma_info_thread_clks", + "MetricExpr": "cpu_core@ICACHE_DATA.STALLS@ / tma_info_thread_clks", "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group", "MetricName": "tma_icache_misses", "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", @@ -1164,7 +1157,6 @@ }, { "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES", "MetricGroup": "Bad;BrMispredicts;tma_issueBM", "MetricName": "tma_info_bad_spec_branch_misprediction_cost", @@ -1173,7 +1165,7 @@ }, { "BriefDescription": "Instructions per retired mispredicts for conditional non-taken branches (lower number means higher occurrence rate).", - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_NTAKEN", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_NTAKEN", "MetricGroup": "Bad;BrMispredicts", "MetricName": "tma_info_bad_spec_ipmisp_cond_ntaken", "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_ntaken < 200", @@ -1181,7 +1173,7 @@ }, { "BriefDescription": "Instructions per retired mispredicts for conditional taken branches (lower number means higher occurrence rate).", - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_TAKEN", "MetricGroup": "Bad;BrMispredicts", "MetricName": "tma_info_bad_spec_ipmisp_cond_taken", "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_taken < 200", @@ -1197,7 +1189,7 @@ }, { "BriefDescription": "Instructions per retired mispredicts for return branches (lower number means higher occurrence rate).", - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RET", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.RET", "MetricGroup": "Bad;BrMispredicts", "MetricName": "tma_info_bad_spec_ipmisp_ret", "MetricThreshold": "tma_info_bad_spec_ipmisp_ret < 500", @@ -1205,7 +1197,7 @@ }, { "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.ALL_BRANCHES", "MetricGroup": "Bad;BadSpec;BrMispredicts", "MetricName": "tma_info_bad_spec_ipmispredict", "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200", @@ -1213,7 +1205,6 @@ }, { "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)", "MetricGroup": "Cor;SMT", "MetricName": "tma_info_botlnk_l0_core_bound_likely", @@ -1222,7 +1213,6 @@ }, { "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))", "MetricGroup": "DSBmiss;Fed;tma_issueFB", "MetricName": "tma_info_botlnk_l2_dsb_misses", @@ -1232,7 +1222,6 @@ }, { "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL", "MetricName": "tma_info_botlnk_l2_ic_misses", @@ -1242,7 +1231,6 @@ }, { "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)", "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC", "MetricName": "tma_info_bottleneck_big_code", @@ -1261,7 +1249,6 @@ }, { "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code", "MetricGroup": "Fed;FetchBW;Frontend", "MetricName": "tma_info_bottleneck_instruction_fetch_bw", @@ -1270,7 +1257,6 @@ }, { "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))", "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW", "MetricName": "tma_info_bottleneck_memory_bandwidth", @@ -1280,7 +1266,6 @@ }, { "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))", "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB", "MetricName": "tma_info_bottleneck_memory_data_tlbs", @@ -1290,7 +1275,6 @@ }, { "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))", "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat", "MetricName": "tma_info_bottleneck_memory_latency", @@ -1300,7 +1284,6 @@ }, { "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM", "MetricName": "tma_info_bottleneck_mispredictions", @@ -1317,14 +1300,14 @@ }, { "BriefDescription": "Fraction of branches that are non-taken conditionals", - "MetricExpr": "BR_INST_RETIRED.COND_NTAKEN / BR_INST_RETIRED.ALL_BRANCHES", + "MetricExpr": "cpu_core@BR_INST_RETIRED.COND_NTAKEN@ / BR_INST_RETIRED.ALL_BRANCHES", "MetricGroup": "Bad;Branches;CodeGen;PGO", "MetricName": "tma_info_branches_cond_nt", "Unit": "cpu_core" }, { "BriefDescription": "Fraction of branches that are taken conditionals", - "MetricExpr": "BR_INST_RETIRED.COND_TAKEN / BR_INST_RETIRED.ALL_BRANCHES", + "MetricExpr": "cpu_core@BR_INST_RETIRED.COND_TAKEN@ / BR_INST_RETIRED.ALL_BRANCHES", "MetricGroup": "Bad;Branches;CodeGen;PGO", "MetricName": "tma_info_branches_cond_tk", "Unit": "cpu_core" @@ -1352,7 +1335,7 @@ }, { "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / tma_info_core_core_clks", "MetricGroup": "Ret;SMT;TmaL1;tma_L1_group", "MetricName": "tma_info_core_coreipc", "Unit": "cpu_core" @@ -1374,14 +1357,14 @@ }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core", - "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@)", + "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@)", "MetricGroup": "Backend;Cor;Pipeline;PortsUtil", "MetricName": "tma_info_core_ilp", "Unit": "cpu_core" }, { "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", - "MetricExpr": "IDQ.DSB_UOPS / cpu_core@UOPS_ISSUED.ANY@", + "MetricExpr": "cpu_core@IDQ.DSB_UOPS@ / cpu_core@UOPS_ISSUED.ANY@", "MetricGroup": "DSB;Fed;FetchBW;tma_issueFB", "MetricName": "tma_info_frontend_dsb_coverage", "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 6 > 0.35", @@ -1390,28 +1373,28 @@ }, { "BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details.", - "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@", + "MetricExpr": "cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES@ / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@", "MetricGroup": "DSBmiss", "MetricName": "tma_info_frontend_dsb_switch_cost", "Unit": "cpu_core" }, { "BriefDescription": "Average number of Uops issued by front-end when it issued something", - "MetricExpr": "UOPS_ISSUED.ANY / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=1@", + "MetricExpr": "cpu_core@UOPS_ISSUED.ANY@ / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=1@", "MetricGroup": "Fed;FetchBW", "MetricName": "tma_info_frontend_fetch_upc", "Unit": "cpu_core" }, { "BriefDescription": "Average Latency for L1 instruction cache misses", - "MetricExpr": "ICACHE_DATA.STALLS / cpu_core@ICACHE_DATA.STALLS\\,cmask\\=1\\,edge@", + "MetricExpr": "cpu_core@ICACHE_DATA.STALLS@ / cpu_core@ICACHE_DATA.STALLS\\,cmask\\=1\\,edge@", "MetricGroup": "Fed;FetchLat;IcMiss", "MetricName": "tma_info_frontend_icache_miss_latency", "Unit": "cpu_core" }, { "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / FRONTEND_RETIRED.ANY_DSB_MISS", "MetricGroup": "DSBmiss;Fed", "MetricName": "tma_info_frontend_ipdsb_miss_ret", "MetricThreshold": "tma_info_frontend_ipdsb_miss_ret < 50", @@ -1440,14 +1423,14 @@ }, { "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)", - "MetricExpr": "LSD.UOPS / cpu_core@UOPS_ISSUED.ANY@", + "MetricExpr": "cpu_core@LSD.UOPS@ / cpu_core@UOPS_ISSUED.ANY@", "MetricGroup": "Fed;LSD", "MetricName": "tma_info_frontend_lsd_coverage", "Unit": "cpu_core" }, { "BriefDescription": "Branch instructions per taken branch.", - "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", + "MetricExpr": "cpu_core@BR_INST_RETIRED.ALL_BRANCHES@ / BR_INST_RETIRED.NEAR_TAKEN", "MetricGroup": "Branches;Fed;PGO", "MetricName": "tma_info_inst_mix_bptkbranch", "Unit": "cpu_core" @@ -1462,7 +1445,7 @@ }, { "BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@)", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@)", "MetricGroup": "Flops;InsType", "MetricName": "tma_info_inst_mix_iparith", "MetricThreshold": "tma_info_inst_mix_iparith < 10", @@ -1471,7 +1454,7 @@ }, { "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@)", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@)", "MetricGroup": "Flops;FpVector;InsType", "MetricName": "tma_info_inst_mix_iparith_avx128", "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10", @@ -1480,7 +1463,7 @@ }, { "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)", "MetricGroup": "Flops;FpVector;InsType", "MetricName": "tma_info_inst_mix_iparith_avx256", "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10", @@ -1489,7 +1472,7 @@ }, { "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", "MetricGroup": "Flops;FpScalar;InsType", "MetricName": "tma_info_inst_mix_iparith_scalar_dp", "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10", @@ -1498,7 +1481,7 @@ }, { "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / FP_ARITH_INST_RETIRED.SCALAR_SINGLE", "MetricGroup": "Flops;FpScalar;InsType", "MetricName": "tma_info_inst_mix_iparith_scalar_sp", "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10", @@ -1507,7 +1490,7 @@ }, { "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_INST_RETIRED.ALL_BRANCHES", "MetricGroup": "Branches;Fed;InsType", "MetricName": "tma_info_inst_mix_ipbranch", "MetricThreshold": "tma_info_inst_mix_ipbranch < 8", @@ -1515,7 +1498,7 @@ }, { "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_INST_RETIRED.NEAR_CALL", "MetricGroup": "Branches;Fed;PGO", "MetricName": "tma_info_inst_mix_ipcall", "MetricThreshold": "tma_info_inst_mix_ipcall < 200", @@ -1523,7 +1506,7 @@ }, { "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)", "MetricGroup": "Flops;InsType", "MetricName": "tma_info_inst_mix_ipflop", "MetricThreshold": "tma_info_inst_mix_ipflop < 10", @@ -1531,7 +1514,7 @@ }, { "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / MEM_INST_RETIRED.ALL_LOADS", "MetricGroup": "InsType", "MetricName": "tma_info_inst_mix_ipload", "MetricThreshold": "tma_info_inst_mix_ipload < 3", @@ -1539,7 +1522,7 @@ }, { "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / MEM_INST_RETIRED.ALL_STORES", "MetricGroup": "InsType", "MetricName": "tma_info_inst_mix_ipstore", "MetricThreshold": "tma_info_inst_mix_ipstore < 8", @@ -1547,7 +1530,7 @@ }, { "BriefDescription": "Instructions per Software prefetch instruction (of any type: NTA/T0/T1/T2/Prefetch) (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / cpu_core@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@", "MetricGroup": "Prefetches", "MetricName": "tma_info_inst_mix_ipswpf", "MetricThreshold": "tma_info_inst_mix_ipswpf < 100", @@ -1555,7 +1538,7 @@ }, { "BriefDescription": "Instruction per taken branch", - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_INST_RETIRED.NEAR_TAKEN", "MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB", "MetricName": "tma_info_inst_mix_iptb", "MetricThreshold": "tma_info_inst_mix_iptb < 13", @@ -1655,14 +1638,14 @@ }, { "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)", - "MetricExpr": "L1D_PEND_MISS.PENDING / MEM_LOAD_COMPLETED.L1_MISS_ANY", + "MetricExpr": "cpu_core@L1D_PEND_MISS.PENDING@ / MEM_LOAD_COMPLETED.L1_MISS_ANY", "MetricGroup": "Mem;MemoryBound;MemoryLat", "MetricName": "tma_info_memory_load_miss_real_latency", "Unit": "cpu_core" }, { "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss", - "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES", + "MetricExpr": "cpu_core@L1D_PEND_MISS.PENDING@ / L1D_PEND_MISS.PENDING_CYCLES", "MetricGroup": "Mem;MemoryBW;MemoryBound", "MetricName": "tma_info_memory_mlp", "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)", @@ -1670,28 +1653,28 @@ }, { "BriefDescription": "Average Parallel L2 cache miss data reads", - "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD@ / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", "MetricGroup": "Memory_BW;Offcore", "MetricName": "tma_info_memory_oro_data_l2_mlp", "Unit": "cpu_core" }, { "BriefDescription": "Average Latency for L2 cache miss demand Loads", - "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD", + "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD@ / OFFCORE_REQUESTS.DEMAND_DATA_RD", "MetricGroup": "Memory_Lat;Offcore", "MetricName": "tma_info_memory_oro_load_l2_miss_latency", "Unit": "cpu_core" }, { "BriefDescription": "Average Parallel L2 cache miss demand Loads", - "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@", + "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD@ / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@", "MetricGroup": "Memory_BW;Offcore", "MetricName": "tma_info_memory_oro_load_l2_mlp", "Unit": "cpu_core" }, { "BriefDescription": "Average Latency for L3 cache miss demand Loads", - "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", + "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD@ / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", "MetricGroup": "Memory_Lat;Offcore", "MetricName": "tma_info_memory_oro_load_l3_miss_latency", "Unit": "cpu_core" @@ -1755,14 +1738,14 @@ }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread", - "MetricExpr": "UOPS_EXECUTED.THREAD / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@", + "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@", "MetricGroup": "Cor;Pipeline;PortsUtil;SMT", "MetricName": "tma_info_pipeline_execute", "Unit": "cpu_core" }, { "BriefDescription": "Instructions per a microcode Assist invocation", - "MetricExpr": "INST_RETIRED.ANY / cpu_core@ASSISTS.ANY\\,umask\\=0x1B@", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@ASSISTS.ANY\\,umask\\=0x1B@", "MetricGroup": "Pipeline;Ret;Retire", "MetricName": "tma_info_pipeline_ipassist", "MetricThreshold": "tma_info_pipeline_ipassist < 100e3", @@ -1778,7 +1761,7 @@ }, { "BriefDescription": "Estimated fraction of retirement-cycles dealing with repeat instructions", - "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@", + "MetricExpr": "cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@", "MetricGroup": "Pipeline;Ret", "MetricName": "tma_info_pipeline_strings_cycles", "MetricThreshold": "tma_info_pipeline_strings_cycles > 0.1", @@ -1793,7 +1776,7 @@ }, { "BriefDescription": "Average CPU Utilization", - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC", + "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / TSC", "MetricGroup": "HPC;Summary", "MetricName": "tma_info_system_cpu_utilization", "Unit": "cpu_core" @@ -1816,7 +1799,7 @@ }, { "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]", - "MetricExpr": "INST_RETIRED.ANY / cpu_core@BR_INST_RETIRED.FAR_BRANCH@u", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@BR_INST_RETIRED.FAR_BRANCH@u", "MetricGroup": "Branches;OS", "MetricName": "tma_info_system_ipfarbranch", "MetricThreshold": "tma_info_system_ipfarbranch < 1e6", @@ -1847,6 +1830,7 @@ }, { "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.RD + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.RD", "MetricGroup": "Mem;MemoryLat;SoC", "MetricName": "tma_info_system_mem_read_latency", @@ -1855,6 +1839,7 @@ }, { "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.ALL + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.ALL", "MetricGroup": "Mem;SoC", "MetricName": "tma_info_system_mem_request_latency", @@ -1897,7 +1882,7 @@ }, { "BriefDescription": "The ratio of Executed- by Issued-Uops", - "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY", + "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / UOPS_ISSUED.ANY", "MetricGroup": "Cor;Pipeline", "MetricName": "tma_info_thread_execute_per_issue", "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage.", @@ -1905,7 +1890,7 @@ }, { "BriefDescription": "Instructions Per Cycle (per Logical Processor)", - "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / tma_info_thread_clks", "MetricGroup": "Ret;Summary", "MetricName": "tma_info_thread_ipc", "Unit": "cpu_core" @@ -1972,7 +1957,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses", - "MetricExpr": "ICACHE_TAG.STALLS / tma_info_thread_clks", + "MetricExpr": "cpu_core@ICACHE_TAG.STALLS@ / tma_info_thread_clks", "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group", "MetricName": "tma_itlb_misses", "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", @@ -1992,7 +1977,6 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L1D_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@) / tma_info_thread_clks", "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l2_bound", @@ -2003,7 +1987,6 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@) / tma_info_thread_clks", "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l3_bound", @@ -2024,7 +2007,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)", - "MetricExpr": "DECODE.LCP / tma_info_thread_clks", + "MetricExpr": "cpu_core@DECODE.LCP@ / tma_info_thread_clks", "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB", "MetricName": "tma_lcp", "MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", @@ -2045,7 +2028,7 @@ }, { "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations", - "MetricExpr": "UOPS_DISPATCHED.PORT_2_3_10 / (3 * tma_info_core_core_clks)", + "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_2_3_10@ / (3 * tma_info_core_core_clks)", "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group", "MetricName": "tma_load_op_utilization", "MetricThreshold": "tma_load_op_utilization > 0.6", @@ -2064,7 +2047,7 @@ }, { "BriefDescription": "This metric estimates the fraction of cycles where the Second-level TLB (STLB) was missed by load accesses, performing a hardware page walk", - "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_thread_clks", + "MetricExpr": "cpu_core@DTLB_LOAD_MISSES.WALK_ACTIVE@ / tma_info_thread_clks", "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_load_group", "MetricName": "tma_load_stlb_miss", "MetricThreshold": "tma_load_stlb_miss > 0.05 & (tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))", @@ -2073,7 +2056,6 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(16 * max(0, cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ - cpu_core@L2_RQSTS.ALL_RFO@) + cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ / cpu_core@MEM_INST_RETIRED.ALL_STORES@ * (10 * cpu_core@L2_RQSTS.RFO_HIT@ + min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO@))) / tma_info_thread_clks", "MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group", "MetricName": "tma_lock_latency", @@ -2136,6 +2118,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "13 * cpu_core@MISC2_RETIRED.LFENCE@ / tma_info_thread_clks", "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group", "MetricName": "tma_memory_fence", @@ -2145,7 +2128,6 @@ }, { "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_light_operations * cpu_core@MEM_UOP_RETIRED.ANY@ / (tma_retiring * tma_info_thread_slots)", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_memory_operations", @@ -2155,7 +2137,7 @@ }, { "BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit", - "MetricExpr": "UOPS_RETIRED.MS / tma_info_thread_slots", + "MetricExpr": "cpu_core@UOPS_RETIRED.MS@ / tma_info_thread_slots", "MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS", "MetricName": "tma_microcode_sequencer", "MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1", @@ -2225,7 +2207,6 @@ }, { "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_other_light_ops", @@ -2246,7 +2227,7 @@ }, { "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)", - "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_core_clks", + "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_0@ / tma_info_core_core_clks", "MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P", "MetricName": "tma_port_0", "MetricThreshold": "tma_port_0 > 0.6", @@ -2256,7 +2237,7 @@ }, { "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)", - "MetricExpr": "UOPS_DISPATCHED.PORT_1 / tma_info_core_core_clks", + "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_1@ / tma_info_core_core_clks", "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P", "MetricName": "tma_port_1", "MetricThreshold": "tma_port_1 > 0.6", @@ -2266,7 +2247,7 @@ }, { "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)", - "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_core_clks", + "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_6@ / tma_info_core_core_clks", "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P", "MetricName": "tma_port_6", "MetricThreshold": "tma_port_6 > 0.6", @@ -2296,7 +2277,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", - "MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_thread_clks", + "MetricExpr": "cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ / tma_info_thread_clks", "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group", "MetricName": "tma_ports_utilized_1", "MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))", @@ -2306,7 +2287,8 @@ }, { "BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", - "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", + "MetricExpr": "cpu_core@EXE_ACTIVITY.2_PORTS_UTIL@ / tma_info_thread_clks", "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group", "MetricName": "tma_ports_utilized_2", "MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))", @@ -2316,7 +2298,8 @@ }, { "BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", - "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", + "MetricExpr": "cpu_core@UOPS_EXECUTED.CYCLES_GE_3@ / tma_info_thread_clks", "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group", "MetricName": "tma_ports_utilized_3m", "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))", @@ -2338,7 +2321,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations", - "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_thread_clks", + "MetricExpr": "cpu_core@RESOURCE_STALLS.SCOREBOARD@ / tma_info_thread_clks", "MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group", "MetricName": "tma_serializing_operation", "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))", @@ -2348,7 +2331,7 @@ }, { "BriefDescription": "This metric represents Shuffle (cross \"vector lane\" data transfers) uops fraction the CPU has retired.", - "MetricExpr": "INT_VEC_RETIRED.SHUFFLES / (tma_retiring * tma_info_thread_slots)", + "MetricExpr": "cpu_core@INT_VEC_RETIRED.SHUFFLES@ / (tma_retiring * tma_info_thread_slots)", "MetricGroup": "HPC;Pipeline;TopdownL4;tma_L4_group;tma_int_operations_group", "MetricName": "tma_shuffles", "MetricThreshold": "tma_shuffles > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)", @@ -2357,7 +2340,8 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions", - "MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", + "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.PAUSE@ / tma_info_thread_clks", "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group", "MetricName": "tma_slow_pause", "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))", @@ -2377,8 +2361,7 @@ }, { "BriefDescription": "This metric represents rate of split store accesses", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", - "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks", + "MetricExpr": "cpu_core@MEM_INST_RETIRED.SPLIT_STORES@ / tma_info_core_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group", "MetricName": "tma_split_stores", "MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", @@ -2398,7 +2381,7 @@ }, { "BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write", - "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_thread_clks", + "MetricExpr": "cpu_core@EXE_ACTIVITY.BOUND_ON_STORES@ / tma_info_thread_clks", "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_store_bound", "MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)", @@ -2408,7 +2391,6 @@ }, { "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "13 * cpu_core@LD_BLOCKS.STORE_FORWARD@ / tma_info_thread_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_store_fwd_blk", @@ -2448,7 +2430,7 @@ }, { "BriefDescription": "This metric estimates the fraction of cycles where the STLB was missed by store accesses, performing a hardware page walk", - "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_core_clks", + "MetricExpr": "cpu_core@DTLB_STORE_MISSES.WALK_ACTIVE@ / tma_info_core_core_clks", "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_store_group", "MetricName": "tma_store_stlb_miss", "MetricThreshold": "tma_store_stlb_miss > 0.05 & (tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))", @@ -2467,7 +2449,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears", - "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / tma_info_thread_clks", + "MetricExpr": "cpu_core@INT_MISC.UNKNOWN_BRANCH_CYCLES@ / tma_info_thread_clks", "MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group", "MetricName": "tma_unknown_branches", "MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))", diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json index c150c14ac6ed..a35edf7d86a9 100644 --- a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json +++ b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json @@ -195,7 +195,6 @@ }, { "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_dram_bound", @@ -457,7 +456,6 @@ }, { "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l2_bound", @@ -466,7 +464,6 @@ }, { "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l3_bound", @@ -683,7 +680,6 @@ }, { "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_store_fwd_blk", diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json index e31a4aac9f20..56e54babcc26 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json @@ -400,7 +400,6 @@ }, { "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(76 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 75.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks", "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", "MetricName": "tma_contested_accesses", @@ -421,7 +420,6 @@ }, { "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "75.5 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks", "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", "MetricName": "tma_data_sharing", @@ -449,7 +447,6 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks - tma_pmm_bound if #has_pmem > 0 else MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks)", "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_dram_bound", @@ -656,7 +653,6 @@ }, { "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES", "MetricGroup": "Bad;BrMispredicts;tma_issueBM", "MetricName": "tma_info_bad_spec_branch_misprediction_cost", @@ -699,7 +695,6 @@ }, { "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)", "MetricGroup": "Cor;SMT", "MetricName": "tma_info_botlnk_l0_core_bound_likely", @@ -707,7 +702,6 @@ }, { "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_mite))", "MetricGroup": "DSBmiss;Fed;tma_issueFB", "MetricName": "tma_info_botlnk_l2_dsb_misses", @@ -716,7 +710,6 @@ }, { "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL", "MetricName": "tma_info_botlnk_l2_ic_misses", @@ -725,7 +718,6 @@ }, { "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)", "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC", "MetricName": "tma_info_bottleneck_big_code", @@ -742,7 +734,6 @@ }, { "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code", "MetricGroup": "Fed;FetchBW;Frontend", "MetricName": "tma_info_bottleneck_instruction_fetch_bw", @@ -750,7 +741,6 @@ }, { "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))", "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW", "MetricName": "tma_info_bottleneck_memory_bandwidth", @@ -759,7 +749,6 @@ }, { "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))", "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB", "MetricName": "tma_info_bottleneck_memory_data_tlbs", @@ -768,7 +757,6 @@ }, { "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound))", "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat", "MetricName": "tma_info_bottleneck_memory_latency", @@ -777,7 +765,6 @@ }, { "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM", "MetricName": "tma_info_bottleneck_mispredictions", @@ -1301,6 +1288,7 @@ }, { "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_system_socket_clks / duration_time)", "MetricGroup": "Mem;MemoryLat;SoC", "MetricName": "tma_info_system_mem_read_latency", @@ -1455,7 +1443,6 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L1D_MISS - MEMORY_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks", "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l2_bound", @@ -1465,7 +1452,6 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L2_MISS - MEMORY_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks", "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l3_bound", @@ -1538,7 +1524,6 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks", "MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group", "MetricName": "tma_lock_latency", @@ -1596,6 +1581,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "13 * MISC2_RETIRED.LFENCE / tma_info_thread_clks", "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group", "MetricName": "tma_memory_fence", @@ -1604,7 +1590,6 @@ }, { "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_light_operations * MEM_UOP_RETIRED.ANY / (tma_retiring * tma_info_thread_slots)", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_memory_operations", @@ -1676,7 +1661,6 @@ }, { "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_other_light_ops", @@ -1758,6 +1742,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks", "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group", "MetricName": "tma_ports_utilized_2", @@ -1767,6 +1752,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks", "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group", "MetricName": "tma_ports_utilized_3m", @@ -1822,6 +1808,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks", "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group", "MetricName": "tma_slow_pause", @@ -1840,7 +1827,6 @@ }, { "BriefDescription": "This metric represents rate of split store accesses", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group", "MetricName": "tma_split_stores", @@ -1868,7 +1854,6 @@ }, { "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_store_fwd_blk", From 20d03ae36ec010aa97a97495d9dd9202cb93cb87 Mon Sep 17 00:00:00 2001 From: Udipto Goswami Date: Mon, 8 Jan 2024 18:57:20 +0530 Subject: [PATCH 490/768] usb: gadget: ncm: Fix indentations in documentation of NCM section Currently, the section of NCM which describes attributes are having wrong indentation. Fix this by following the correct format recommended. Fixes: 1900daeefd3e ("usb: gadget: ncm: Add support to update wMaxSegmentSize via configfs") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/all/20240108160221.743649b5@canb.auug.org.au/ Signed-off-by: Udipto Goswami Link: https://lore.kernel.org/r/20240108132720.7786-1-quic_ugoswami@quicinc.com Signed-off-by: Greg Kroah-Hartman --- Documentation/usb/gadget-testing.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Documentation/usb/gadget-testing.rst b/Documentation/usb/gadget-testing.rst index 8cd62c466d20..077dfac7ed98 100644 --- a/Documentation/usb/gadget-testing.rst +++ b/Documentation/usb/gadget-testing.rst @@ -448,17 +448,17 @@ Function-specific configfs interface The function name to use when creating the function directory is "ncm". The NCM function provides these attributes in its function directory: - =============== ================================================== - ifname network device interface name associated with this - function instance - qmult queue length multiplier for high and super speed - host_addr MAC address of host's end of this - Ethernet over USB link - dev_addr MAC address of device's end of this - Ethernet over USB link - max_segment_size Segment size required for P2P connections. This - will set MTU to (max_segment_size - 14 bytes) - =============== ================================================== + ======================= ================================================== + ifname network device interface name associated with this + function instance + qmult queue length multiplier for high and super speed + host_addr MAC address of host's end of this + Ethernet over USB link + dev_addr MAC address of device's end of this + Ethernet over USB link + max_segment_size Segment size required for P2P connections. This + will set MTU to 14 bytes + ======================= ================================================== and after creating the functions/ncm. they contain default values: qmult is 5, dev_addr and host_addr are randomly selected. From 817349b6d26aadd8b38283a05ce0bab106b4c765 Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Tue, 16 Jan 2024 11:28:15 +0530 Subject: [PATCH 491/768] usb: dwc3: host: Set XHCI_SG_TRB_CACHE_SIZE_QUIRK Upstream commit bac1ec551434 ("usb: xhci: Set quirk for XHCI_SG_TRB_CACHE_SIZE_QUIRK") introduced a new quirk in XHCI which fixes XHC timeout, which was seen on synopsys XHCs while using SG buffers. But the support for this quirk isn't present in the DWC3 layer. We will encounter this XHCI timeout/hung issue if we run iperf loopback tests using RTL8156 ethernet adaptor on DWC3 targets with scatter-gather enabled. This gets resolved after enabling the XHCI_SG_TRB_CACHE_SIZE_QUIRK. This patch enables it using the xhci device property since its needed for DWC3 controller. In Synopsys DWC3 databook, Table 9-3: xHCI Debug Capability Limitations Chained TRBs greater than TRB cache size: The debug capability driver must not create a multi-TRB TD that describes smaller than a 1K packet that spreads across 8 or more TRBs on either the IN TR or the OUT TR. Cc: stable@vger.kernel.org #5.11 Signed-off-by: Prashanth K Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20240116055816.1169821-2-quic_prashk@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/host.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c index 61f57fe5bb78..43230915323c 100644 --- a/drivers/usb/dwc3/host.c +++ b/drivers/usb/dwc3/host.c @@ -61,7 +61,7 @@ out: int dwc3_host_init(struct dwc3 *dwc) { - struct property_entry props[4]; + struct property_entry props[5]; struct platform_device *xhci; int ret, irq; int prop_idx = 0; @@ -89,6 +89,8 @@ int dwc3_host_init(struct dwc3 *dwc) memset(props, 0, sizeof(struct property_entry) * ARRAY_SIZE(props)); + props[prop_idx++] = PROPERTY_ENTRY_BOOL("xhci-sg-trb-cache-size-quirk"); + if (dwc->usb3_lpm_capable) props[prop_idx++] = PROPERTY_ENTRY_BOOL("usb3-lpm-capable"); From 520b391e3e813c1dd142d1eebb3ccfa6d08c3995 Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Tue, 16 Jan 2024 11:28:16 +0530 Subject: [PATCH 492/768] usb: host: xhci-plat: Add support for XHCI_SG_TRB_CACHE_SIZE_QUIRK Upstream commit bac1ec551434 ("usb: xhci: Set quirk for XHCI_SG_TRB_CACHE_SIZE_QUIRK") introduced a new quirk in XHCI which fixes XHC timeout, which was seen on synopsys XHCs while using SG buffers. Currently this quirk can only be set using xhci private data. But there are some drivers like dwc3/host.c which adds adds quirks using software node for xhci device. Hence set this xhci quirk by iterating over device properties. Cc: stable@vger.kernel.org # 5.11 Fixes: bac1ec551434 ("usb: xhci: Set quirk for XHCI_SG_TRB_CACHE_SIZE_QUIRK") Signed-off-by: Prashanth K Link: https://lore.kernel.org/r/20240116055816.1169821-3-quic_prashk@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index f04fde19f551..3d071b875308 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -253,6 +253,9 @@ int xhci_plat_probe(struct platform_device *pdev, struct device *sysdev, const s if (device_property_read_bool(tmpdev, "quirk-broken-port-ped")) xhci->quirks |= XHCI_BROKEN_PORT_PED; + if (device_property_read_bool(tmpdev, "xhci-sg-trb-cache-size-quirk")) + xhci->quirks |= XHCI_SG_TRB_CACHE_SIZE_QUIRK; + device_property_read_u32(tmpdev, "imod-interval-ns", &xhci->imod_interval); } From 9dc292413c56a2d01e34787d3fc4a76635e4a498 Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Thu, 18 Jan 2024 21:18:53 +0530 Subject: [PATCH 493/768] usb: gadget: ncm: Fix endianness of wMaxSegmentSize variable in ecm_desc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent commit [1] added support for changing max segment size of the NCM interface via configfs. But the value of segment size value stored in ncm_opts need to be converted to little endian before saving it in ecm_desc. Also while initialising the value of segment size in opts during instance allocation, the value ETH_FRAME_LEN needs to be assigned directly without any conversion as ETH_FRAME_LEN and the variable max_segment_size are native endian. The current implementaion modifies it into little endian thus breaking things for big endian targets. Fix endianness while assigning these variables. While at it, fix up some stray spaces in comments added in code. [1]: https://lore.kernel.org/all/20231221153216.18657-1-quic_kriskura@quicinc.com/ Fixes: 1900daeefd3e ("usb: gadget: ncm: Add support to update wMaxSegmentSize via configfs") Signed-off-by: Krishna Kurapati Reviewed-by: Maciej Żenczykowski Link: https://lore.kernel.org/r/20240118154910.8765-1-quic_kriskura@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ncm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index a1575a0ca568..ca5d5f564998 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -105,8 +105,8 @@ static inline struct f_ncm *func_to_ncm(struct usb_function *f) /* * Although max mtu as dictated by u_ether is 15412 bytes, setting - * max_segment_sizeto 15426 would not be efficient. If user chooses segment - * size to be (>= 8192), then we can't aggregate more than one buffer in each + * max_segment_size to 15426 would not be efficient. If user chooses segment + * size to be (>= 8192), then we can't aggregate more than one buffer in each * NTB (assuming each packet coming from network layer is >= 8192 bytes) as ep * maxpacket limit is 16384. So let max_segment_size be limited to 8000 to allow * at least 2 packets to be aggregated reducing wastage of NTB buffer space @@ -1489,7 +1489,7 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f) ncm_data_intf.bInterfaceNumber = status; ncm_union_desc.bSlaveInterface0 = status; - ecm_desc.wMaxSegmentSize = ncm_opts->max_segment_size; + ecm_desc.wMaxSegmentSize = cpu_to_le16(ncm_opts->max_segment_size); status = -ENODEV; @@ -1685,7 +1685,7 @@ static struct usb_function_instance *ncm_alloc_inst(void) kfree(opts); return ERR_CAST(net); } - opts->max_segment_size = cpu_to_le16(ETH_FRAME_LEN); + opts->max_segment_size = ETH_FRAME_LEN; INIT_LIST_HEAD(&opts->ncm_os_desc.ext_prop); descs[0] = &opts->ncm_os_desc; From a54a594d72f25b08f39d743880a76721fba9ae77 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 25 Jan 2024 17:27:34 +0200 Subject: [PATCH 494/768] xhci: fix possible null pointer dereference at secondary interrupter removal Don't try to remove a secondary interrupter that is known to be invalid. Also check if the interrupter is valid inside the spinlock that protects the array of interrupters. Found by smatch static checker Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-usb/ffaa0a1b-5984-4a1f-bfd3-9184630a97b9@moroto.mountain/ Fixes: c99b38c41234 ("xhci: add support to allocate several interrupters") Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240125152737.2983959-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 4460fa7e9fab..d00d4d937236 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1861,15 +1861,15 @@ void xhci_remove_secondary_interrupter(struct usb_hcd *hcd, struct xhci_interrup struct xhci_hcd *xhci = hcd_to_xhci(hcd); unsigned int intr_num; - /* interrupter 0 is primary interrupter, don't touch it */ - if (!ir || !ir->intr_num || ir->intr_num >= xhci->max_interrupters) - xhci_dbg(xhci, "Invalid secondary interrupter, can't remove\n"); - - /* fixme, should we check xhci->interrupter[intr_num] == ir */ - /* fixme locking */ - spin_lock_irq(&xhci->lock); + /* interrupter 0 is primary interrupter, don't touch it */ + if (!ir || !ir->intr_num || ir->intr_num >= xhci->max_interrupters) { + xhci_dbg(xhci, "Invalid secondary interrupter, can't remove\n"); + spin_unlock_irq(&xhci->lock); + return; + } + intr_num = ir->intr_num; xhci_remove_interrupter(xhci, ir); From 09f197225cbc35db8ac135659cdd21bc1e29bda0 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 25 Jan 2024 17:27:35 +0200 Subject: [PATCH 495/768] xhci: fix off by one check when adding a secondary interrupter. The sanity check of interrupter index when adding a new interrupter is off by one. intr_num needs to be smaller than xhci->max_interrupter to fit the array of interrupters. Luckily this doesn't cause any real word harm as xhci_add_interrupter() is always called with a intr_num value smaller than xhci->max_interrupters in any current kernel. Should not be needed for stable as 6.7 kernel and older only supports one interrupter, with intr_num always being zero. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-usb/e9771296-586d-456a-ac24-a82de79bb2e6@moroto.mountain/ Fixes: 4bf398e15aa4 ("xhci: split allocate interrupter into separate alloacte and add parts") Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240125152737.2983959-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index d00d4d937236..a7716202a8dd 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2322,7 +2322,7 @@ xhci_add_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir, u64 erst_base; u32 erst_size; - if (intr_num > xhci->max_interrupters) { + if (intr_num >= xhci->max_interrupters) { xhci_warn(xhci, "Can't add interrupter %d, max interrupters %d\n", intr_num, xhci->max_interrupters); return -EINVAL; From 5372c65e1311a16351ef03dd096ff576e6477674 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 25 Jan 2024 17:27:36 +0200 Subject: [PATCH 496/768] xhci: process isoc TD properly when there was a transaction error mid TD. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The last TRB of a isoc TD might not trigger an event if there was an error event for a TRB mid TD. This is seen on a NEC Corporation uPD720200 USB 3.0 Host After an error mid a multi-TRB TD the xHC should according to xhci 4.9.1 generate events for passed TRBs with IOC flag set if it proceeds to the next TD. This event is either a copy of the original error, or a "success" transfer event. If that event is missing then the driver and xHC host get out of sync as the driver is still expecting a transfer event for that first TD, while xHC host is already sending events for the next TD in the list. This leads to "Transfer event TRB DMA ptr not part of current TD" messages. As a solution we tag the isoc TDs that get error events mid TD. If an event doesn't match the first TD, then check if the tag is set, and event points to the next TD. In that case give back the fist TD and process the next TD normally Make sure TD status and transferred length stay valid in both cases with and without final TD completion event. Reported-by: Michał Pecio Closes: https://lore.kernel.org/linux-usb/20240112235205.1259f60c@foxbook/ Tested-by: Michał Pecio Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240125152737.2983959-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 74 +++++++++++++++++++++++++++++------- drivers/usb/host/xhci.h | 1 + 2 files changed, 61 insertions(+), 14 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 33806ae966f9..41be7d31a36e 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2376,6 +2376,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, /* handle completion code */ switch (trb_comp_code) { case COMP_SUCCESS: + /* Don't overwrite status if TD had an error, see xHCI 4.9.1 */ + if (td->error_mid_td) + break; if (remaining) { frame->status = short_framestatus; if (xhci->quirks & XHCI_TRUST_TX_LENGTH) @@ -2401,8 +2404,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, break; case COMP_USB_TRANSACTION_ERROR: frame->status = -EPROTO; + sum_trbs_for_length = true; if (ep_trb != td->last_trb) - return 0; + td->error_mid_td = true; break; case COMP_STOPPED: sum_trbs_for_length = true; @@ -2422,6 +2426,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, break; } + if (td->urb_length_set) + goto finish_td; + if (sum_trbs_for_length) frame->actual_length = sum_trb_lengths(xhci, ep->ring, ep_trb) + ep_trb_len - remaining; @@ -2430,6 +2437,14 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, td->urb->actual_length += frame->actual_length; +finish_td: + /* Don't give back TD yet if we encountered an error mid TD */ + if (td->error_mid_td && ep_trb != td->last_trb) { + xhci_dbg(xhci, "Error mid isoc TD, wait for final completion event\n"); + td->urb_length_set = true; + return 0; + } + return finish_td(xhci, ep, ep_ring, td, trb_comp_code); } @@ -2808,17 +2823,51 @@ static int handle_tx_event(struct xhci_hcd *xhci, } if (!ep_seg) { - if (!ep->skip || - !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { - /* Some host controllers give a spurious - * successful event after a short transfer. - * Ignore it. - */ - if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && - ep_ring->last_td_was_short) { - ep_ring->last_td_was_short = false; - goto cleanup; + + if (ep->skip && usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { + skip_isoc_td(xhci, td, ep, status); + goto cleanup; + } + + /* + * Some hosts give a spurious success event after a short + * transfer. Ignore it. + */ + if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && + ep_ring->last_td_was_short) { + ep_ring->last_td_was_short = false; + goto cleanup; + } + + /* + * xhci 4.10.2 states isoc endpoints should continue + * processing the next TD if there was an error mid TD. + * So host like NEC don't generate an event for the last + * isoc TRB even if the IOC flag is set. + * xhci 4.9.1 states that if there are errors in mult-TRB + * TDs xHC should generate an error for that TRB, and if xHC + * proceeds to the next TD it should genete an event for + * any TRB with IOC flag on the way. Other host follow this. + * So this event might be for the next TD. + */ + if (td->error_mid_td && + !list_is_last(&td->td_list, &ep_ring->td_list)) { + struct xhci_td *td_next = list_next_entry(td, td_list); + + ep_seg = trb_in_td(xhci, td_next->start_seg, td_next->first_trb, + td_next->last_trb, ep_trb_dma, false); + if (ep_seg) { + /* give back previous TD, start handling new */ + xhci_dbg(xhci, "Missing TD completion event after mid TD error\n"); + ep_ring->dequeue = td->last_trb; + ep_ring->deq_seg = td->last_trb_seg; + inc_deq(xhci, ep_ring); + xhci_td_cleanup(xhci, td, ep_ring, td->status); + td = td_next; } + } + + if (!ep_seg) { /* HC is busted, give up! */ xhci_err(xhci, "ERROR Transfer event TRB DMA ptr not " @@ -2830,9 +2879,6 @@ static int handle_tx_event(struct xhci_hcd *xhci, ep_trb_dma, true); return -ESHUTDOWN; } - - skip_isoc_td(xhci, td, ep, status); - goto cleanup; } if (trb_comp_code == COMP_SHORT_PACKET) ep_ring->last_td_was_short = true; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index a5c72a634e6a..6f82d404883f 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1549,6 +1549,7 @@ struct xhci_td { struct xhci_segment *bounce_seg; /* actual_length of the URB has already been set */ bool urb_length_set; + bool error_mid_td; unsigned int num_trbs; }; From 7c4650ded49e5b88929ecbbb631efb8b0838e811 Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Thu, 25 Jan 2024 17:27:37 +0200 Subject: [PATCH 497/768] xhci: handle isoc Babble and Buffer Overrun events properly xHCI 4.9 explicitly forbids assuming that the xHC has released its ownership of a multi-TRB TD when it reports an error on one of the early TRBs. Yet the driver makes such assumption and releases the TD, allowing the remaining TRBs to be freed or overwritten by new TDs. The xHC should also report completion of the final TRB due to its IOC flag being set by us, regardless of prior errors. This event cannot be recognized if the TD has already been freed earlier, resulting in "Transfer event TRB DMA ptr not part of current TD" error message. Fix this by reusing the logic for processing isoc Transaction Errors. This also handles hosts which fail to report the final completion. Fix transfer length reporting on Babble errors. They may be caused by device malfunction, no guarantee that the buffer has been filled. Signed-off-by: Michal Pecio Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240125152737.2983959-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 41be7d31a36e..f0d8a607ff21 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2394,9 +2394,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, case COMP_BANDWIDTH_OVERRUN_ERROR: frame->status = -ECOMM; break; - case COMP_ISOCH_BUFFER_OVERRUN: case COMP_BABBLE_DETECTED_ERROR: + sum_trbs_for_length = true; + fallthrough; + case COMP_ISOCH_BUFFER_OVERRUN: frame->status = -EOVERFLOW; + if (ep_trb != td->last_trb) + td->error_mid_td = true; break; case COMP_INCOMPATIBLE_DEVICE_ERROR: case COMP_STALL_ERROR: From 12783c0b9e2c7915a50d5ec829630ff2da50472c Mon Sep 17 00:00:00 2001 From: Udipto Goswami Date: Wed, 10 Jan 2024 15:28:14 +0530 Subject: [PATCH 498/768] usb: core: Prevent null pointer dereference in update_port_device_state Currently, the function update_port_device_state gets the usb_hub from udev->parent by calling usb_hub_to_struct_hub. However, in case the actconfig or the maxchild is 0, the usb_hub would be NULL and upon further accessing to get port_dev would result in null pointer dereference. Fix this by introducing an if check after the usb_hub is populated. Fixes: 83cb2604f641 ("usb: core: add sysfs entry for usb device state") Cc: stable@vger.kernel.org Signed-off-by: Udipto Goswami Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20240110095814.7626-1-quic_ugoswami@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index ffd7c99e24a3..48409d51ea43 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2053,9 +2053,19 @@ static void update_port_device_state(struct usb_device *udev) if (udev->parent) { hub = usb_hub_to_struct_hub(udev->parent); - port_dev = hub->ports[udev->portnum - 1]; - WRITE_ONCE(port_dev->state, udev->state); - sysfs_notify_dirent(port_dev->state_kn); + + /* + * The Link Layer Validation System Driver (lvstest) + * has a test step to unbind the hub before running the + * rest of the procedure. This triggers hub_disconnect + * which will set the hub's maxchild to 0, further + * resulting in usb_hub_to_struct_hub returning NULL. + */ + if (hub) { + port_dev = hub->ports[udev->portnum - 1]; + WRITE_ONCE(port_dev->state, udev->state); + sysfs_notify_dirent(port_dev->state_kn); + } } } From de4b5b28c87ccae4da268a53c5df135437f5cfde Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 15 Jan 2024 11:28:20 +0200 Subject: [PATCH 499/768] usb: dwc3: pci: add support for the Intel Arrow Lake-H This patch adds the necessary PCI ID for Intel Arrow Lake-H devices. Acked-by: Thinh Nguyen Signed-off-by: Heikki Krogerus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240115092820.1454492-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 6604845c397c..39564e17f3b0 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -51,6 +51,8 @@ #define PCI_DEVICE_ID_INTEL_MTLP 0x7ec1 #define PCI_DEVICE_ID_INTEL_MTLS 0x7f6f #define PCI_DEVICE_ID_INTEL_MTL 0x7e7e +#define PCI_DEVICE_ID_INTEL_ARLH 0x7ec1 +#define PCI_DEVICE_ID_INTEL_ARLH_PCH 0x777e #define PCI_DEVICE_ID_INTEL_TGL 0x9a15 #define PCI_DEVICE_ID_AMD_MR 0x163a @@ -421,6 +423,8 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_DEVICE_DATA(INTEL, MTLP, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, MTL, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, MTLS, &dwc3_pci_intel_swnode) }, + { PCI_DEVICE_DATA(INTEL, ARLH, &dwc3_pci_intel_swnode) }, + { PCI_DEVICE_DATA(INTEL, ARLH_PCH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, TGL, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(AMD, NL_USB, &dwc3_pci_amd_swnode) }, From 61a348857e869432e6a920ad8ea9132e8d44c316 Mon Sep 17 00:00:00 2001 From: Uttkarsh Aggarwal Date: Fri, 19 Jan 2024 15:18:25 +0530 Subject: [PATCH 500/768] usb: dwc3: gadget: Fix NULL pointer dereference in dwc3_gadget_suspend In current scenario if Plug-out and Plug-In performed continuously there could be a chance while checking for dwc->gadget_driver in dwc3_gadget_suspend, a NULL pointer dereference may occur. Call Stack: CPU1: CPU2: gadget_unbind_driver dwc3_suspend_common dwc3_gadget_stop dwc3_gadget_suspend dwc3_disconnect_gadget CPU1 basically clears the variable and CPU2 checks the variable. Consider CPU1 is running and right before gadget_driver is cleared and in parallel CPU2 executes dwc3_gadget_suspend where it finds dwc->gadget_driver which is not NULL and resumes execution and then CPU1 completes execution. CPU2 executes dwc3_disconnect_gadget where it checks dwc->gadget_driver is already NULL because of which the NULL pointer deference occur. Cc: stable@vger.kernel.org Fixes: 9772b47a4c29 ("usb: dwc3: gadget: Fix suspend/resume during device mode") Acked-by: Thinh Nguyen Signed-off-by: Uttkarsh Aggarwal Link: https://lore.kernel.org/r/20240119094825.26530-1-quic_uaggarwa@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 019368f8e9c4..564976b3e2b9 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4709,15 +4709,13 @@ int dwc3_gadget_suspend(struct dwc3 *dwc) unsigned long flags; int ret; - if (!dwc->gadget_driver) - return 0; - ret = dwc3_gadget_soft_disconnect(dwc); if (ret) goto err; spin_lock_irqsave(&dwc->lock, flags); - dwc3_disconnect_gadget(dwc); + if (dwc->gadget_driver) + dwc3_disconnect_gadget(dwc); spin_unlock_irqrestore(&dwc->lock, flags); return 0; From cc509b6a47e7c8998d9e41c273191299d5d9d631 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Fri, 19 Jan 2024 20:35:37 +0800 Subject: [PATCH 501/768] usb: chipidea: core: handle power lost in workqueue When power is recycled in usb controller during system power management, the controller will recognize it and switch role if role has been changed during power lost. In current design, it will be completed in resume() function. However, this may bring issues since usb class devices have their pm operations too and these device's resume() functions are still not being called at this point. When usb controller recognized host role should be stopped, these usb class devices will be removed at this point. But these usb class devices can't be removed in some cases, such as scsi devices. Since scsi driver may sync data to U-disk, however it will block there because scsi drvier can only handle pm request when is in suspended state. Therefore, there may exist a dependency between ci_resume() and usb class device's resume(). To break this potential dependency, we need to handle power lost work in a workqueue. Fixes: 74494b33211d ("usb: chipidea: core: add controller resume support when controller is powered off") cc: stable@vger.kernel.org Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20240119123537.3614838-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci.h | 2 ++ drivers/usb/chipidea/core.c | 44 ++++++++++++++++++++----------------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/drivers/usb/chipidea/ci.h b/drivers/usb/chipidea/ci.h index d9bb3d3f026e..2a38e1eb6546 100644 --- a/drivers/usb/chipidea/ci.h +++ b/drivers/usb/chipidea/ci.h @@ -176,6 +176,7 @@ struct hw_bank { * @enabled_otg_timer_bits: bits of enabled otg timers * @next_otg_timer: next nearest enabled timer to be expired * @work: work for role changing + * @power_lost_work: work for power lost handling * @wq: workqueue thread * @qh_pool: allocation pool for queue heads * @td_pool: allocation pool for transfer descriptors @@ -226,6 +227,7 @@ struct ci_hdrc { enum otg_fsm_timer next_otg_timer; struct usb_role_switch *role_switch; struct work_struct work; + struct work_struct power_lost_work; struct workqueue_struct *wq; struct dma_pool *qh_pool; diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 41014f93cfdf..835bf2428dc6 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -856,6 +856,27 @@ static int ci_extcon_register(struct ci_hdrc *ci) return 0; } +static void ci_power_lost_work(struct work_struct *work) +{ + struct ci_hdrc *ci = container_of(work, struct ci_hdrc, power_lost_work); + enum ci_role role; + + disable_irq_nosync(ci->irq); + pm_runtime_get_sync(ci->dev); + if (!ci_otg_is_fsm_mode(ci)) { + role = ci_get_role(ci); + + if (ci->role != role) { + ci_handle_id_switch(ci); + } else if (role == CI_ROLE_GADGET) { + if (ci->is_otg && hw_read_otgsc(ci, OTGSC_BSV)) + usb_gadget_vbus_connect(&ci->gadget); + } + } + pm_runtime_put_sync(ci->dev); + enable_irq(ci->irq); +} + static DEFINE_IDA(ci_ida); struct platform_device *ci_hdrc_add_device(struct device *dev, @@ -1045,6 +1066,8 @@ static int ci_hdrc_probe(struct platform_device *pdev) spin_lock_init(&ci->lock); mutex_init(&ci->mutex); + INIT_WORK(&ci->power_lost_work, ci_power_lost_work); + ci->dev = dev; ci->platdata = dev_get_platdata(dev); ci->imx28_write_fix = !!(ci->platdata->flags & @@ -1396,25 +1419,6 @@ static int ci_suspend(struct device *dev) return 0; } -static void ci_handle_power_lost(struct ci_hdrc *ci) -{ - enum ci_role role; - - disable_irq_nosync(ci->irq); - if (!ci_otg_is_fsm_mode(ci)) { - role = ci_get_role(ci); - - if (ci->role != role) { - ci_handle_id_switch(ci); - } else if (role == CI_ROLE_GADGET) { - if (ci->is_otg && hw_read_otgsc(ci, OTGSC_BSV)) - usb_gadget_vbus_connect(&ci->gadget); - } - } - - enable_irq(ci->irq); -} - static int ci_resume(struct device *dev) { struct ci_hdrc *ci = dev_get_drvdata(dev); @@ -1446,7 +1450,7 @@ static int ci_resume(struct device *dev) ci_role(ci)->resume(ci, power_lost); if (power_lost) - ci_handle_power_lost(ci); + queue_work(system_freezable_wq, &ci->power_lost_work); if (ci->supports_runtime_pm) { pm_runtime_disable(dev); From f17c34ffc792bbb520e4b61baa16b6cfc7d44b13 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 22 Jan 2024 16:35:32 +0100 Subject: [PATCH 502/768] USB: hub: check for alternate port before enabling A_ALT_HNP_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The OTG 1.3 spec has the feature A_ALT_HNP_SUPPORT, which tells a device that it is connected to the wrong port. Some devices refuse to operate if you enable that feature, because it indicates to them that they ought to request to be connected to another port. According to the spec this feature may be used based only the following three conditions: 6.5.3 a_alt_hnp_support Setting this feature indicates to the B-device that it is connected to an A-device port that is not capable of HNP, but that the A-device does have an alternate port that is capable of HNP. The A-device is required to set this feature under the following conditions: • the A-device has multiple receptacles • the A-device port that connects to the B-device does not support HNP • the A-device has another port that does support HNP A check for the third and first condition is missing. Add it. Signed-off-by: Oliver Neukum Cc: stable Fixes: 7d2d641c44269 ("usb: otg: don't set a_alt_hnp_support feature for OTG 2.0 device") Link: https://lore.kernel.org/r/20240122153545.12284-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 48409d51ea43..e38a4124f610 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2398,17 +2398,25 @@ static int usb_enumerate_device_otg(struct usb_device *udev) } } else if (desc->bLength == sizeof (struct usb_otg_descriptor)) { - /* Set a_alt_hnp_support for legacy otg device */ - err = usb_control_msg(udev, - usb_sndctrlpipe(udev, 0), - USB_REQ_SET_FEATURE, 0, - USB_DEVICE_A_ALT_HNP_SUPPORT, - 0, NULL, 0, - USB_CTRL_SET_TIMEOUT); - if (err < 0) - dev_err(&udev->dev, - "set a_alt_hnp_support failed: %d\n", - err); + /* + * We are operating on a legacy OTP device + * These should be told that they are operating + * on the wrong port if we have another port that does + * support HNP + */ + if (bus->otg_port != 0) { + /* Set a_alt_hnp_support for legacy otg device */ + err = usb_control_msg(udev, + usb_sndctrlpipe(udev, 0), + USB_REQ_SET_FEATURE, 0, + USB_DEVICE_A_ALT_HNP_SUPPORT, + 0, NULL, 0, + USB_CTRL_SET_TIMEOUT); + if (err < 0) + dev_err(&udev->dev, + "set a_alt_hnp_support failed: %d\n", + err); + } } } #endif From b2d2d7ea0dd09802cf5a0545bf54d8ad8987d20c Mon Sep 17 00:00:00 2001 From: yuan linyu Date: Tue, 23 Jan 2024 11:48:29 +0800 Subject: [PATCH 503/768] usb: f_mass_storage: forbid async queue when shutdown happen When write UDC to empty and unbind gadget driver from gadget device, it is possible that there are many queue failures for mass storage function. The root cause is mass storage main thread alaways try to queue request to receive a command from host if running flag is on, on platform like dwc3, if pull down called, it will not queue request again and return -ESHUTDOWN, but it not affect running flag of mass storage function. Check return code from mass storage function and clear running flag if it is -ESHUTDOWN, also indicate start in/out transfer failure to break loops. Cc: stable Signed-off-by: yuan linyu Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20240123034829.3848409-1-yuanlinyu@hihonor.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_mass_storage.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 722a3ab2b337..c265a1f62fc1 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -545,21 +545,37 @@ static int start_transfer(struct fsg_dev *fsg, struct usb_ep *ep, static bool start_in_transfer(struct fsg_common *common, struct fsg_buffhd *bh) { + int rc; + if (!fsg_is_set(common)) return false; bh->state = BUF_STATE_SENDING; - if (start_transfer(common->fsg, common->fsg->bulk_in, bh->inreq)) + rc = start_transfer(common->fsg, common->fsg->bulk_in, bh->inreq); + if (rc) { bh->state = BUF_STATE_EMPTY; + if (rc == -ESHUTDOWN) { + common->running = 0; + return false; + } + } return true; } static bool start_out_transfer(struct fsg_common *common, struct fsg_buffhd *bh) { + int rc; + if (!fsg_is_set(common)) return false; bh->state = BUF_STATE_RECEIVING; - if (start_transfer(common->fsg, common->fsg->bulk_out, bh->outreq)) + rc = start_transfer(common->fsg, common->fsg->bulk_out, bh->outreq); + if (rc) { bh->state = BUF_STATE_FULL; + if (rc == -ESHUTDOWN) { + common->running = 0; + return false; + } + } return true; } From 032178972f8e992b90f9794a13265fec8c8314b0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 14 Jan 2024 16:30:08 -0800 Subject: [PATCH 504/768] usb: gadget: pch_udc: fix an Excess kernel-doc warning Delete one extraneous line of kernel-doc to prevent a kernel-doc warning: pch_udc.c:297: warning: Excess struct member 'desc' description in 'pch_udc_ep' Signed-off-by: Randy Dunlap Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Cc: tomoya-linux@dsn.lapis-semi.com Link: https://lore.kernel.org/r/20240115003008.5763-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/pch_udc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c index 4f8617210d85..169f72665739 100644 --- a/drivers/usb/gadget/udc/pch_udc.c +++ b/drivers/usb/gadget/udc/pch_udc.c @@ -274,7 +274,6 @@ struct pch_udc_cfg_data { * @td_data: for data request * @dev: reference to device struct * @offset_addr: offset address of ep register - * @desc: for this ep * @queue: queue for requests * @num: endpoint number * @in: endpoint is IN From b717dfbf73e842d15174699fe2c6ee4fdde8aa1f Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 17 Jan 2024 11:47:42 +0000 Subject: [PATCH 505/768] Revert "usb: typec: tcpm: fix cc role at port reset" This reverts commit 1e35f074399dece73d5df11847d4a0d7a6f49434. Given that ERROR_RECOVERY calls into PORT_RESET for Hi-Zing the CC pins, setting CC pins to default state during PORT_RESET breaks error recovery. 4.5.2.2.2.1 ErrorRecovery State Requirements The port shall not drive VBUS or VCONN, and shall present a high-impedance to ground (above zOPEN) on its CC1 and CC2 pins. Hi-Zing the CC pins is the inteded behavior for PORT_RESET. CC pins are set to default state after tErrorRecovery in PORT_RESET_WAIT_OFF. 4.5.2.2.2.2 Exiting From ErrorRecovery State A Sink shall transition to Unattached.SNK after tErrorRecovery. A Source shall transition to Unattached.SRC after tErrorRecovery. Cc: stable@vger.kernel.org Cc: Frank Wang Fixes: 1e35f074399d ("usb: typec: tcpm: fix cc role at port reset") Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20240117114742.2587779-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 5945e3a2b0f7..9d410718eaf4 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -4876,8 +4876,7 @@ static void run_state_machine(struct tcpm_port *port) break; case PORT_RESET: tcpm_reset_port(port); - tcpm_set_cc(port, tcpm_default_state(port) == SNK_UNATTACHED ? - TYPEC_CC_RD : tcpm_rp_cc(port)); + tcpm_set_cc(port, TYPEC_CC_OPEN); tcpm_set_state(port, PORT_RESET_WAIT_OFF, PD_T_ERROR_RECOVERY); break; From 3caf2b2ad7334ef35f55b95f3e1b138c6f77b368 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 26 Jan 2024 17:38:00 -0500 Subject: [PATCH 506/768] usb: ulpi: Fix debugfs directory leak The ULPI per-device debugfs root is named after the ulpi device's parent, but ulpi_unregister_interface tries to remove a debugfs directory named after the ulpi device itself. This results in the directory sticking around and preventing subsequent (deferred) probes from succeeding. Change the directory name to match the ulpi device. Fixes: bd0a0a024f2a ("usb: ulpi: Add debugfs support") Cc: stable@vger.kernel.org Signed-off-by: Sean Anderson Link: https://lore.kernel.org/r/20240126223800.2864613-1-sean.anderson@seco.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/common/ulpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index 84d91b1c1eed..0886b19d2e1c 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -301,7 +301,7 @@ static int ulpi_register(struct device *dev, struct ulpi *ulpi) return ret; } - root = debugfs_create_dir(dev_name(dev), ulpi_root); + root = debugfs_create_dir(dev_name(&ulpi->dev), ulpi_root); debugfs_create_file("regs", 0444, root, ulpi, &ulpi_regs_fops); dev_dbg(&ulpi->dev, "registered ULPI PHY: vendor %04x, product %04x\n", From c9aed03a0a683fd1600ea92f2ad32232d4736272 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Sun, 21 Jan 2024 21:41:21 +0100 Subject: [PATCH 507/768] usb: ucsi: Add missing ppm_lock Calling ->sync_write must be done while holding the PPM lock as the mailbox logic does not support concurrent commands. At least since the addition of partner task this means that ucsi_acknowledge_connector_change should be called with the PPM lock held as it calls ->sync_write. Thus protect the only call to ucsi_acknowledge_connector_change with the PPM. All other calls to ->sync_write already happen under the PPM lock. Fixes: b9aa02ca39a4 ("usb: typec: ucsi: Add polling mechanism for partner tasks like alt mode checking") Cc: stable@vger.kernel.org Signed-off-by: "Christian A. Ehrhardt" Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240121204123.275441-2-lk@c--e.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 5392ec698959..14f5a7bfae2e 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -938,7 +938,9 @@ static void ucsi_handle_connector_change(struct work_struct *work) clear_bit(EVENT_PENDING, &con->ucsi->flags); + mutex_lock(&ucsi->ppm_lock); ret = ucsi_acknowledge_connector_change(ucsi); + mutex_unlock(&ucsi->ppm_lock); if (ret) dev_err(ucsi->dev, "%s: ACK failed (%d)", __func__, ret); From 2840143e393a4ddc1caab4372969ea337371168c Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Sun, 21 Jan 2024 21:41:22 +0100 Subject: [PATCH 508/768] usb: ucsi_acpi: Fix command completion handling In case of a spurious or otherwise delayed notification it is possible that CCI still reports the previous completion. The UCSI spec is aware of this and provides two completion bits in CCI, one for normal commands and one for acks. As acks and commands alternate the notification handler can determine if the completion bit is from the current command. The initial UCSI code correctly handled this but the distinction between the two completion bits was lost with the introduction of the new API. To fix this revive the ACK_PENDING bit for ucsi_acpi and only complete commands if the completion bit matches. Fixes: f56de278e8ec ("usb: typec: ucsi: acpi: Move to the new API") Cc: stable@vger.kernel.org Signed-off-by: "Christian A. Ehrhardt" Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240121204123.275441-3-lk@c--e.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_acpi.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 6bbf490ac401..fa222080887d 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -73,9 +73,13 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, const void *val, size_t val_len) { struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); + bool ack = UCSI_COMMAND(*(u64 *)val) == UCSI_ACK_CC_CI; int ret; - set_bit(COMMAND_PENDING, &ua->flags); + if (ack) + set_bit(ACK_PENDING, &ua->flags); + else + set_bit(COMMAND_PENDING, &ua->flags); ret = ucsi_acpi_async_write(ucsi, offset, val, val_len); if (ret) @@ -85,7 +89,10 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, ret = -ETIMEDOUT; out_clear_bit: - clear_bit(COMMAND_PENDING, &ua->flags); + if (ack) + clear_bit(ACK_PENDING, &ua->flags); + else + clear_bit(COMMAND_PENDING, &ua->flags); return ret; } @@ -142,8 +149,10 @@ static void ucsi_acpi_notify(acpi_handle handle, u32 event, void *data) if (UCSI_CCI_CONNECTOR(cci)) ucsi_connector_change(ua->ucsi, UCSI_CCI_CONNECTOR(cci)); - if (test_bit(COMMAND_PENDING, &ua->flags) && - cci & (UCSI_CCI_ACK_COMPLETE | UCSI_CCI_COMMAND_COMPLETE)) + if (cci & UCSI_CCI_ACK_COMPLETE && test_bit(ACK_PENDING, &ua->flags)) + complete(&ua->complete); + if (cci & UCSI_CCI_COMMAND_COMPLETE && + test_bit(COMMAND_PENDING, &ua->flags)) complete(&ua->complete); } From f3be347ea42dbb0358cd8b2d8dc543a23b70a976 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Sun, 21 Jan 2024 21:41:23 +0100 Subject: [PATCH 509/768] usb: ucsi_acpi: Quirk to ack a connector change ack cmd The PPM on some Dell laptops seems to expect that the ACK_CC_CI command to clear the connector change notification is in turn followed by another ACK_CC_CI to acknowledge the ACK_CC_CI command itself. This is in violation of the UCSI spec that states: "The only notification that is not acknowledged by the OPM is the command completion notification for the ACK_CC_CI or the PPM_RESET command." Add a quirk to send this ack anyway. Apply the quirk to all Dell systems. On the first command that acks a connector change send a dummy command to determine if it runs into a timeout. Only activate the quirk if it does. This ensure that we do not break Dell systems that do not need the quirk. Signed-off-by: "Christian A. Ehrhardt" Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240121204123.275441-4-lk@c--e.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_acpi.c | 71 ++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index fa222080887d..928eacbeb21a 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -25,6 +25,8 @@ struct ucsi_acpi { unsigned long flags; guid_t guid; u64 cmd; + bool dell_quirk_probed; + bool dell_quirk_active; }; static int ucsi_acpi_dsm(struct ucsi_acpi *ua, int func) @@ -126,12 +128,73 @@ static const struct ucsi_operations ucsi_zenbook_ops = { .async_write = ucsi_acpi_async_write }; -static const struct dmi_system_id zenbook_dmi_id[] = { +/* + * Some Dell laptops expect that an ACK command with the + * UCSI_ACK_CONNECTOR_CHANGE bit set is followed by a (separate) + * ACK command that only has the UCSI_ACK_COMMAND_COMPLETE bit set. + * If this is not done events are not delivered to OSPM and + * subsequent commands will timeout. + */ +static int +ucsi_dell_sync_write(struct ucsi *ucsi, unsigned int offset, + const void *val, size_t val_len) +{ + struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); + u64 cmd = *(u64 *)val, ack = 0; + int ret; + + if (UCSI_COMMAND(cmd) == UCSI_ACK_CC_CI && + cmd & UCSI_ACK_CONNECTOR_CHANGE) + ack = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE; + + ret = ucsi_acpi_sync_write(ucsi, offset, val, val_len); + if (ret != 0) + return ret; + if (ack == 0) + return ret; + + if (!ua->dell_quirk_probed) { + ua->dell_quirk_probed = true; + + cmd = UCSI_GET_CAPABILITY; + ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd, + sizeof(cmd)); + if (ret == 0) + return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, + &ack, sizeof(ack)); + if (ret != -ETIMEDOUT) + return ret; + + ua->dell_quirk_active = true; + dev_err(ua->dev, "Firmware bug: Additional ACK required after ACKing a connector change.\n"); + dev_err(ua->dev, "Firmware bug: Enabling workaround\n"); + } + + if (!ua->dell_quirk_active) + return ret; + + return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &ack, sizeof(ack)); +} + +static const struct ucsi_operations ucsi_dell_ops = { + .read = ucsi_acpi_read, + .sync_write = ucsi_dell_sync_write, + .async_write = ucsi_acpi_async_write +}; + +static const struct dmi_system_id ucsi_acpi_quirks[] = { { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UA_UM325UA"), }, + .driver_data = (void *)&ucsi_zenbook_ops, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + }, + .driver_data = (void *)&ucsi_dell_ops, }, { } }; @@ -160,6 +223,7 @@ static int ucsi_acpi_probe(struct platform_device *pdev) { struct acpi_device *adev = ACPI_COMPANION(&pdev->dev); const struct ucsi_operations *ops = &ucsi_acpi_ops; + const struct dmi_system_id *id; struct ucsi_acpi *ua; struct resource *res; acpi_status status; @@ -189,8 +253,9 @@ static int ucsi_acpi_probe(struct platform_device *pdev) init_completion(&ua->complete); ua->dev = &pdev->dev; - if (dmi_check_system(zenbook_dmi_id)) - ops = &ucsi_zenbook_ops; + id = dmi_first_match(ucsi_acpi_quirks); + if (id) + ops = id->driver_data; ua->ucsi = ucsi_create(&pdev->dev, ops); if (IS_ERR(ua->ucsi)) From f2e5d3de7e1fbf24483e7f996e519b3ebc3935a1 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 13 Jan 2024 22:55:48 +0200 Subject: [PATCH 510/768] usb: typec: tcpm: fix the PD disabled case If the PD is disabled for the port, port->pds will be left as NULL, which causes the following crash during caps intilisation. Fix the crash. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Call trace: tcpm_register_port+0xaec/0xc44 qcom_pmic_typec_probe+0x1a4/0x254 platform_probe+0x68/0xc0 really_probe+0x148/0x2ac __driver_probe_device+0x78/0x12c driver_probe_device+0xd8/0x160 Bluetooth: hci0: QCA Product ID :0x0000000a __device_attach_driver+0xb8/0x138 bus_for_each_drv+0x80/0xdc Bluetooth: hci0: QCA SOC Version :0x40020150 __device_attach+0x9c/0x188 device_initial_probe+0x14/0x20 bus_probe_device+0xac/0xb0 deferred_probe_work_func+0x8c/0xc8 process_one_work+0x1ec/0x51c worker_thread+0x1ec/0x3e4 kthread+0x120/0x124 ret_from_fork+0x10/0x20 Fixes: cd099cde4ed2 ("usb: typec: tcpm: Support multiple capabilities") Signed-off-by: Dmitry Baryshkov Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240113-pmi632-typec-v2-5-182d9aa0a5b3@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 9d410718eaf4..f7d7daa60c8d 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -6847,7 +6847,8 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) if (err) goto out_role_sw_put; - port->typec_caps.pd = port->pds[0]; + if (port->pds) + port->typec_caps.pd = port->pds[0]; port->typec_port = typec_register_port(port->dev, &port->typec_caps); if (IS_ERR(port->typec_port)) { From e15c99be0c915bbe70dfe55450d268d7bd5bdac8 Mon Sep 17 00:00:00 2001 From: Christoph Niedermaier Date: Fri, 19 Jan 2024 12:35:16 +0100 Subject: [PATCH 511/768] tty: serial: Fix bit order in RS485 flag definitions Since the commit 93f3350c46fa ("RS485: fix inconsistencies in the meaning of some variables"), the definition for bit 3 has been removed. But with the switch to bit shift macros in commit 76ac8e29855b ("tty: serial: Cleanup the bit shift with macro"), this gap wasn't preserved. To avoid a break in user/kernel api of the system skip bit 3 again and add a placeholder comment. Signed-off-by: Christoph Niedermaier Fixes: 76ac8e29855b ("tty: serial: Cleanup the bit shift with macro") Fixes: 6056f20f27e9 ("tty: serial: Add RS422 flag to struct serial_rs485") Reviewed-by: Jiri Slaby Cc: Greg Kroah-Hartman Cc: Crescent CY Hsieh Cc: Jiri Slaby Cc: Lukas Wunner Cc: Lino Sanfilippo Cc: Hugo Villeneuve Link: https://lore.kernel.org/r/20240119113516.2944-1-cniedermaier@dh-electronics.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/include/uapi/linux/serial.h b/include/uapi/linux/serial.h index 9086367db043..de9b4733607e 100644 --- a/include/uapi/linux/serial.h +++ b/include/uapi/linux/serial.h @@ -145,12 +145,13 @@ struct serial_rs485 { #define SER_RS485_ENABLED _BITUL(0) #define SER_RS485_RTS_ON_SEND _BITUL(1) #define SER_RS485_RTS_AFTER_SEND _BITUL(2) -#define SER_RS485_RX_DURING_TX _BITUL(3) -#define SER_RS485_TERMINATE_BUS _BITUL(4) -#define SER_RS485_ADDRB _BITUL(5) -#define SER_RS485_ADDR_RECV _BITUL(6) -#define SER_RS485_ADDR_DEST _BITUL(7) -#define SER_RS485_MODE_RS422 _BITUL(8) +/* Placeholder for bit 3: SER_RS485_RTS_BEFORE_SEND, which isn't used anymore */ +#define SER_RS485_RX_DURING_TX _BITUL(4) +#define SER_RS485_TERMINATE_BUS _BITUL(5) +#define SER_RS485_ADDRB _BITUL(6) +#define SER_RS485_ADDR_RECV _BITUL(7) +#define SER_RS485_ADDR_DEST _BITUL(8) +#define SER_RS485_MODE_RS422 _BITUL(9) __u32 delay_rts_before_send; __u32 delay_rts_after_send; From 86ee55e9bc7ff7308eda67e5bcb3f8c834c7c113 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Jan 2024 22:25:21 +0300 Subject: [PATCH 512/768] serial: 8250_pci1xxxx: fix off by one in pci1xxxx_process_read_data() These > comparisons should be >= to prevent writing one element beyond the end of the rx_buff[] array. The rx_buff[] buffer has RX_BUF_SIZE elements. Fix the buffer overflow. Fixes: aba8290f368d ("8250: microchip: pci1xxxx: Add Burst mode reception support in uart driver for writing into FIFO") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/ZZ7vIfj7Jgh-pJn8@moroto Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci1xxxx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_pci1xxxx.c b/drivers/tty/serial/8250/8250_pci1xxxx.c index 558c4c7f3104..cd258922bd78 100644 --- a/drivers/tty/serial/8250/8250_pci1xxxx.c +++ b/drivers/tty/serial/8250/8250_pci1xxxx.c @@ -302,7 +302,7 @@ static void pci1xxxx_process_read_data(struct uart_port *port, * to read, the data is received one byte at a time. */ while (valid_burst_count--) { - if (*buff_index > (RX_BUF_SIZE - UART_BURST_SIZE)) + if (*buff_index >= (RX_BUF_SIZE - UART_BURST_SIZE)) break; burst_buf = (u32 *)&rx_buff[*buff_index]; *burst_buf = readl(port->membase + UART_RX_BURST_FIFO); @@ -311,7 +311,7 @@ static void pci1xxxx_process_read_data(struct uart_port *port, } while (*valid_byte_count) { - if (*buff_index > RX_BUF_SIZE) + if (*buff_index >= RX_BUF_SIZE) break; rx_buff[*buff_index] = readb(port->membase + UART_RX_BYTE_FIFO); From 30926783a46841c2d1bbf3f74067ba85d304fd0d Mon Sep 17 00:00:00 2001 From: Gui-Dong Han <2045gemini@gmail.com> Date: Fri, 12 Jan 2024 19:36:24 +0800 Subject: [PATCH 513/768] serial: core: Fix atomicity violation in uart_tiocmget In uart_tiocmget(): result = uport->mctrl; uart_port_lock_irq(uport); result |= uport->ops->get_mctrl(uport); uart_port_unlock_irq(uport); ... return result; In uart_update_mctrl(): uart_port_lock_irqsave(port, &flags); ... port->mctrl = (old & ~clear) | set; ... port->ops->set_mctrl(port, port->mctrl); ... uart_port_unlock_irqrestore(port, flags); An atomicity violation is identified due to the concurrent execution of uart_tiocmget() and uart_update_mctrl(). After assigning result = uport->mctrl, the mctrl value may change in uart_update_mctrl(), leading to a mismatch between the value returned by uport->ops->get_mctrl(uport) and the mctrl value previously read. This can result in uart_tiocmget() returning an incorrect value. This possible bug is found by an experimental static analysis tool developed by our team, BassCheck[1]. This tool analyzes the locking APIs to extract function pairs that can be concurrently executed, and then analyzes the instructions in the paired functions to identify possible concurrency bugs including data races and atomicity violations. The above possible bug is reported when our tool analyzes the source code of Linux 5.17. To address this issue, it is suggested to move the line result = uport->mctrl inside the uart_port_lock block to ensure atomicity and prevent the mctrl value from being altered during the execution of uart_tiocmget(). With this patch applied, our tool no longer reports the bug, with the kernel configuration allyesconfig for x86_64. Due to the absence of the requisite hardware, we are unable to conduct runtime testing of the patch. Therefore, our verification is solely based on code logic analysis. [1] https://sites.google.com/view/basscheck/ Fixes: c5f4644e6c8b ("[PATCH] Serial: Adjust serial locking") Cc: stable@vger.kernel.org Signed-off-by: Gui-Dong Han <2045gemini@gmail.com> Link: https://lore.kernel.org/r/20240112113624.17048-1-2045gemini@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index b56ed8c376b2..d6a58a9e072a 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1084,8 +1084,8 @@ static int uart_tiocmget(struct tty_struct *tty) goto out; if (!tty_io_error(tty)) { - result = uport->mctrl; uart_port_lock_irq(uport); + result = uport->mctrl; result |= uport->ops->get_mctrl(uport); uart_port_unlock_irq(uport); } From 0419373333c2f2024966d36261fd82a453281e80 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 16 Jan 2024 16:29:58 -0500 Subject: [PATCH 514/768] serial: max310x: set default value when reading clock ready bit If regmap_read() returns a non-zero value, the 'val' variable can be left uninitialized. Clear it before calling regmap_read() to make sure we properly detect the clock ready bit. Fixes: 4cf9a888fd3c ("serial: max310x: Check the clock readiness") Cc: stable@vger.kernel.org Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240116213001.3691629-2-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index f3a99daebdaa..b2c753ba9cbf 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -641,7 +641,7 @@ static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, /* Wait for crystal */ if (xtal) { - unsigned int val; + unsigned int val = 0; msleep(10); regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val); if (!(val & MAX310X_STS_CLKREADY_BIT)) { From 93cd256ab224c2519e7c4e5f58bb4f1ac2bf0965 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 16 Jan 2024 16:29:59 -0500 Subject: [PATCH 515/768] serial: max310x: improve crystal stable clock detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some people are seeing a warning similar to this when using a crystal: max310x 11-006c: clock is not stable yet The datasheet doesn't mention the maximum time to wait for the clock to be stable when using a crystal, and it seems that the 10ms delay in the driver is not always sufficient. Jan Kundrát reported that it took three tries (each separated by 10ms) to get a stable clock. Modify behavior to check stable clock ready bit multiple times (20), and waiting 10ms between each try. Note: the first draft of the driver originally used a 50ms delay, without checking the clock stable bit. Then a loop with 1000 retries was implemented, each time reading the clock stable bit. Fixes: 4cf9a888fd3c ("serial: max310x: Check the clock readiness") Cc: stable@vger.kernel.org Suggested-by: Jan Kundrát Link: https://www.spinics.net/lists/linux-serial/msg35773.html Link: https://lore.kernel.org/all/20240110174015.6f20195fde08e5c9e64e5675@hugovil.com/raw Link: https://github.com/boundarydevices/linux/commit/e5dfe3e4a751392515d78051973190301a37ca9a Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240116213001.3691629-3-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index b2c753ba9cbf..c0eb0615d945 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -237,6 +237,10 @@ #define MAX310x_REV_MASK (0xf8) #define MAX310X_WRITE_BIT 0x80 +/* Crystal-related definitions */ +#define MAX310X_XTAL_WAIT_RETRIES 20 /* Number of retries */ +#define MAX310X_XTAL_WAIT_DELAY_MS 10 /* Delay between retries */ + /* MAX3107 specific */ #define MAX3107_REV_ID (0xa0) @@ -641,12 +645,19 @@ static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, /* Wait for crystal */ if (xtal) { - unsigned int val = 0; - msleep(10); - regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val); - if (!(val & MAX310X_STS_CLKREADY_BIT)) { + bool stable = false; + unsigned int try = 0, val = 0; + + do { + msleep(MAX310X_XTAL_WAIT_DELAY_MS); + regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val); + + if (val & MAX310X_STS_CLKREADY_BIT) + stable = true; + } while (!stable && (++try < MAX310X_XTAL_WAIT_RETRIES)); + + if (!stable) dev_warn(dev, "clock is not stable yet\n"); - } } return bestfreq; From 8afa6c6decea37e7cb473d2c60473f37f46cea35 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 16 Jan 2024 16:30:00 -0500 Subject: [PATCH 516/768] serial: max310x: fail probe if clock crystal is unstable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A stable clock is really required in order to use this UART, so log an error message and bail out if the chip reports that the clock is not stable. Fixes: 4cf9a888fd3c ("serial: max310x: Check the clock readiness") Cc: stable@vger.kernel.org Suggested-by: Jan Kundrát Link: https://www.spinics.net/lists/linux-serial/msg35773.html Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240116213001.3691629-4-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index c0eb0615d945..552e153a24e0 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -587,7 +587,7 @@ static int max310x_update_best_err(unsigned long f, long *besterr) return 1; } -static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, +static s32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, unsigned long freq, bool xtal) { unsigned int div, clksrc, pllcfg = 0; @@ -657,7 +657,8 @@ static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, } while (!stable && (++try < MAX310X_XTAL_WAIT_RETRIES)); if (!stable) - dev_warn(dev, "clock is not stable yet\n"); + return dev_err_probe(dev, -EAGAIN, + "clock is not stable\n"); } return bestfreq; @@ -1282,7 +1283,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty { int i, ret, fmin, fmax, freq; struct max310x_port *s; - u32 uartclk = 0; + s32 uartclk = 0; bool xtal; for (i = 0; i < devtype->nr; i++) @@ -1360,6 +1361,11 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty } uartclk = max310x_set_ref_clk(dev, s, freq, xtal); + if (uartclk < 0) { + ret = uartclk; + goto out_uart; + } + dev_dbg(dev, "Reference clock set to %i Hz\n", uartclk); for (i = 0; i < devtype->nr; i++) { From b35f8dbbce818b02c730dc85133dc7754266e084 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 16 Jan 2024 16:30:01 -0500 Subject: [PATCH 517/768] serial: max310x: prevent infinite while() loop in port startup If there is a problem after resetting a port, the do/while() loop that checks the default value of DIVLSB register may run forever and spam the I2C bus. Add a delay before each read of DIVLSB, and a maximum number of tries to prevent that situation from happening. Also fail probe if port reset is unsuccessful. Fixes: 10d8b34a4217 ("serial: max310x: Driver rework") Cc: stable@vger.kernel.org Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240116213001.3691629-5-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 552e153a24e0..10bf6d75bf9e 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -237,6 +237,10 @@ #define MAX310x_REV_MASK (0xf8) #define MAX310X_WRITE_BIT 0x80 +/* Port startup definitions */ +#define MAX310X_PORT_STARTUP_WAIT_RETRIES 20 /* Number of retries */ +#define MAX310X_PORT_STARTUP_WAIT_DELAY_MS 10 /* Delay between retries */ + /* Crystal-related definitions */ #define MAX310X_XTAL_WAIT_RETRIES 20 /* Number of retries */ #define MAX310X_XTAL_WAIT_DELAY_MS 10 /* Delay between retries */ @@ -1346,6 +1350,9 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty goto out_clk; for (i = 0; i < devtype->nr; i++) { + bool started = false; + unsigned int try = 0, val = 0; + /* Reset port */ regmap_write(regmaps[i], MAX310X_MODE2_REG, MAX310X_MODE2_RST_BIT); @@ -1354,8 +1361,17 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty /* Wait for port startup */ do { - regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &ret); - } while (ret != 0x01); + msleep(MAX310X_PORT_STARTUP_WAIT_DELAY_MS); + regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &val); + + if (val == 0x01) + started = true; + } while (!started && (++try < MAX310X_PORT_STARTUP_WAIT_RETRIES)); + + if (!started) { + ret = dev_err_probe(dev, -EAGAIN, "port reset failed\n"); + goto out_uart; + } regmap_write(regmaps[i], MAX310X_MODE1_REG, devtype->mode1); } From 2751153b9945c31eb905deb9fbe2d7f127b4b34c Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 1 Dec 2023 21:20:50 +0100 Subject: [PATCH 518/768] parisc: Make RO_DATA page aligned in vmlinux.lds.S The rodata_test program for CONFIG_DEBUG_RODATA_TEST=y complains if read-only data does not start at page boundary. Signed-off-by: Helge Deller --- arch/parisc/kernel/vmlinux.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 548051b0b4af..b445e47903cf 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -127,7 +127,7 @@ SECTIONS } #endif - RO_DATA(8) + RO_DATA(PAGE_SIZE) /* unwind info */ . = ALIGN(4); From b9402e3b97289ca9e0f0f79f4df64bd6c9176a86 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 17 Jan 2024 17:46:43 +0100 Subject: [PATCH 519/768] parisc: Check for valid stride size for cache flushes Report if the calculated cache stride size is zero, otherwise the cache flushing routine will never finish and hang the machine. This can be reproduced with a testcase in qemu, where the firmware reports wrong cache values. Signed-off-by: Helge Deller --- arch/parisc/kernel/cache.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 268d90a9325b..0c015487e5db 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -264,6 +264,10 @@ parisc_cache_init(void) icache_stride = CAFL_STRIDE(cache_info.ic_conf); #undef CAFL_STRIDE + /* stride needs to be non-zero, otherwise cache flushes will not work */ + WARN_ON(cache_info.dc_size && dcache_stride == 0); + WARN_ON(cache_info.ic_size && icache_stride == 0); + if ((boot_cpu_data.pdc.capabilities & PDC_MODEL_NVA_MASK) == PDC_MODEL_NVA_UNSUPPORTED) { printk(KERN_WARNING "parisc_cache_init: Only equivalent aliasing supported!\n"); From c8708d758e715c3824a73bf0cda97292b52be44d Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 19 Jan 2024 21:16:39 +0100 Subject: [PATCH 520/768] parisc: Prevent hung tasks when printing inventory on serial console Printing the inventory on a serial console can be quite slow and thus may trigger the hung task detector (CONFIG_DETECT_HUNG_TASK=y) and possibly reboot the machine. Adding a cond_resched() prevents this. Signed-off-by: Helge Deller Cc: # v6.0+ --- arch/parisc/kernel/drivers.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 25f9b9e9d6df..404ea37705ce 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -1004,6 +1004,9 @@ static __init int qemu_print_iodc_data(struct device *lin_dev, void *data) pr_info("\n"); + /* Prevent hung task messages when printing on serial console */ + cond_resched(); + pr_info("#define HPA_%08lx_DESCRIPTION \"%s\"\n", hpa, parisc_hardware_description(&dev->id)); From 20e08a720cc526dacc0678067ffc81613aa738ca Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 22 Jan 2024 17:51:15 +0100 Subject: [PATCH 521/768] parisc: Drop unneeded semicolon in parse_tree_node() Reported-by: kernel test robot Signed-off-by: Helge Deller Closes: https://lore.kernel.org/oe-kbuild-all/202401222059.Wli6OGT0-lkp@intel.com/ --- arch/parisc/kernel/drivers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 404ea37705ce..c7ff339732ba 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -742,7 +742,7 @@ parse_tree_node(struct device *parent, int index, struct hardware_path *modpath) }; if (device_for_each_child(parent, &recurse_data, descend_children)) - { /* nothing */ }; + { /* nothing */ } return d.dev; } From 29142dc92c37d3259a33aef15b03e6ee25b0d188 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 27 Jan 2024 13:21:14 -0800 Subject: [PATCH 522/768] tracefs: remove stale 'update_gid' code The 'eventfs_update_gid()' function is no longer called, so remove it (and the helper function it uses). Link: https://lore.kernel.org/all/CAHk-=wj+DsZZ=2iTUkJ-Nojs9fjYMvPs1NuoM3yK7aTDtJfPYQ@mail.gmail.com/ Fixes: 8186fff7ab64 ("tracefs/eventfs: Use root and instance inodes as default ownership") Signed-off-by: Linus Torvalds Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 38 -------------------------------------- fs/tracefs/internal.h | 1 - 2 files changed, 39 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 6b211522a13e..1c3dd0ad4660 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -281,44 +281,6 @@ static void update_inode_attr(struct dentry *dentry, struct inode *inode, inode->i_gid = attr->gid; } -static void update_gid(struct eventfs_inode *ei, kgid_t gid, int level) -{ - struct eventfs_inode *ei_child; - - /* at most we have events/system/event */ - if (WARN_ON_ONCE(level > 3)) - return; - - ei->attr.gid = gid; - - if (ei->entry_attrs) { - for (int i = 0; i < ei->nr_entries; i++) { - ei->entry_attrs[i].gid = gid; - } - } - - /* - * Only eventfs_inode with dentries are updated, make sure - * all eventfs_inodes are updated. If one of the children - * do not have a dentry, this function must traverse it. - */ - list_for_each_entry_srcu(ei_child, &ei->children, list, - srcu_read_lock_held(&eventfs_srcu)) { - if (!ei_child->dentry) - update_gid(ei_child, gid, level + 1); - } -} - -void eventfs_update_gid(struct dentry *dentry, kgid_t gid) -{ - struct eventfs_inode *ei = dentry->d_fsdata; - int idx; - - idx = srcu_read_lock(&eventfs_srcu); - update_gid(ei, gid, 0); - srcu_read_unlock(&eventfs_srcu, idx); -} - /** * create_file - create a file in the tracefs filesystem * @name: the name of the file to create. diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 45397df9bb65..91c2bf0b91d9 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -82,7 +82,6 @@ struct inode *tracefs_get_inode(struct super_block *sb); struct dentry *eventfs_start_creating(const char *name, struct dentry *parent); struct dentry *eventfs_failed_creating(struct dentry *dentry); struct dentry *eventfs_end_creating(struct dentry *dentry); -void eventfs_update_gid(struct dentry *dentry, kgid_t gid); void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry); #endif /* _TRACEFS_INTERNAL_H */ From 41bccc98fb7931d63d03f326a746ac4d429c1dd3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 28 Jan 2024 17:01:12 -0800 Subject: [PATCH 523/768] Linux 6.8-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9f9b76d3a4b7..6c0a4d294444 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From c79f52f0656eeb3e4a12f7f358f760077ae111b6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 27 Jan 2024 13:44:58 -0700 Subject: [PATCH 524/768] io_uring/rw: ensure poll based multishot read retries appropriately io_read_mshot() always relies on poll triggering retries, and this works fine as long as we do a retry per size of the buffer being read. The buffer size is given by the size of the buffer(s) in the given buffer group ID. But if we're reading less than what is available, then we don't always get to read everything that is available. For example, if the buffers available are 32 bytes and we have 64 bytes to read, then we'll correctly read the first 32 bytes and then wait for another poll trigger before we attempt the next read. This next poll trigger may never happen, in which case we just sit forever and never make progress, or it may trigger at some point in the future, and now we're just delivering the available data much later than we should have. io_read_mshot() could do retries itself, but that is wasteful as we'll be going through all of __io_read() again, and most likely in vain. Rather than do that, bump our poll reference count and have io_poll_check_events() do one more loop and check with vfs_poll() if we have more data to read. If we do, io_read_mshot() will get invoked again directly and we'll read the next chunk. io_poll_multishot_retry() must only get called from inside io_poll_issue(), which is our multishot retry handler, as we know we already "own" the request at this point. Cc: stable@vger.kernel.org Link: https://github.com/axboe/liburing/issues/1041 Fixes: fc68fcda0491 ("io_uring/rw: add support for IORING_OP_READ_MULTISHOT") Signed-off-by: Jens Axboe --- io_uring/poll.h | 9 +++++++++ io_uring/rw.c | 10 +++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/io_uring/poll.h b/io_uring/poll.h index ff4d5d753387..1dacae9e816c 100644 --- a/io_uring/poll.h +++ b/io_uring/poll.h @@ -24,6 +24,15 @@ struct async_poll { struct io_poll *double_poll; }; +/* + * Must only be called inside issue_flags & IO_URING_F_MULTISHOT, or + * potentially other cases where we already "own" this poll request. + */ +static inline void io_poll_multishot_retry(struct io_kiocb *req) +{ + atomic_inc(&req->poll_refs); +} + int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_poll_add(struct io_kiocb *req, unsigned int issue_flags); diff --git a/io_uring/rw.c b/io_uring/rw.c index 118cc9f1cf16..d5e79d9bdc71 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -18,6 +18,7 @@ #include "opdef.h" #include "kbuf.h" #include "rsrc.h" +#include "poll.h" #include "rw.h" struct io_rw { @@ -962,8 +963,15 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags) if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER, ret, cflags | IORING_CQE_F_MORE)) { - if (issue_flags & IO_URING_F_MULTISHOT) + if (issue_flags & IO_URING_F_MULTISHOT) { + /* + * Force retry, as we might have more data to + * be read and otherwise it won't get retried + * until (if ever) another poll is triggered. + */ + io_poll_multishot_retry(req); return IOU_ISSUE_SKIP_COMPLETE; + } return -EAGAIN; } } From efb56d84dd9c3de3c99fc396abb57c6d330038b5 Mon Sep 17 00:00:00 2001 From: David Senoner Date: Fri, 26 Jan 2024 16:56:26 +0100 Subject: [PATCH 525/768] ALSA: hda/realtek: Fix the external mic not being recognised for Acer Swift 1 SF114-32 If you connect an external headset/microphone to the 3.5mm jack on the Acer Swift 1 SF114-32 it does not recognize the microphone. This fixes that and gives the user the ability to choose between internal and headset mic. Signed-off-by: David Senoner Cc: Link: https://lore.kernel.org/r/20240126155626.2304465-1-seda18@rolmail.net Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bfefe9ccbc6b..ef51e51fb6a5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9655,6 +9655,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), SND_PCI_QUIRK(0x1025, 0x1248, "Acer Veriton N4660G", ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1269, "Acer SWIFT SF314-54", ALC256_FIXUP_ACER_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x126a, "Acer Swift SF114-32", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), From f0d78972f27dc1d1d51fbace2713ad3cdc60a877 Mon Sep 17 00:00:00 2001 From: Luka Guzenko Date: Sun, 28 Jan 2024 16:57:04 +0100 Subject: [PATCH 526/768] ALSA: hda/realtek: Enable Mute LED on HP Laptop 14-fq0xxx This HP Laptop uses ALC236 codec with COEF 0x07 controlling the mute LED. Enable existing quirk for this device. Signed-off-by: Luka Guzenko Cc: Link: https://lore.kernel.org/r/20240128155704.2333812-1-l.guzenko@web.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ef51e51fb6a5..bfa244028b99 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9856,6 +9856,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8786, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8787, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x87b7, "HP Laptop 14-fq0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e7, "HP ProBook 450 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), From c0787fcff88bce36d21e5b726bdeab694baba6a5 Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Sun, 28 Jan 2024 16:23:38 +0300 Subject: [PATCH 527/768] Revert "ALSA: usb-audio: Skip setting clock selector for single connections" This reverts commit 67794f882adca00d043899ac248bc002751da9f6. We need to explicitly set up the clock selector to workaround a problem with the Behringer mixers. This was originally done in d2e8f641257d ("ALSA: usb-audio: Explicitly set up the clock selector") The problem with MOTU M Series mentioned in commit message was fixed in a different way by checking control capabilities of clock selectors. Signed-off-by: Alexander Tsoy Link: https://lore.kernel.org/r/20240128132338.819273-1-alexander@tsoy.me Signed-off-by: Takashi Iwai --- sound/usb/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 7b259641adb5..a8204c6d6fac 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -344,7 +344,7 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, if (ret > 0) { /* Skip setting clock selector again for some devices */ if (chip->quirk_flags & QUIRK_FLAG_SKIP_CLOCK_SELECTOR || - pins == 1 || !writeable) + !writeable) return ret; err = uac_clock_selector_set_val(chip, entity_id, cur); if (err < 0) From 82ef1a5356572219f41f9123ca047259a77bd67b Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Wed, 24 Jan 2024 11:26:44 +0100 Subject: [PATCH 528/768] xfs: reset XFS_ATTR_INCOMPLETE filter on node removal In XFS_DAS_NODE_REMOVE_ATTR case, xfs_attr_mode_remove_attr() sets filter to XFS_ATTR_INCOMPLETE. The filter is then reset in xfs_attr_complete_op() if XFS_DA_OP_REPLACE operation is performed. The filter is not reset though if XFS just removes the attribute (args->value == NULL) with xfs_attr_defer_remove(). attr code goes to XFS_DAS_DONE state. Fix this by always resetting XFS_ATTR_INCOMPLETE filter. The replace operation already resets this filter in anyway and others are completed at this step hence don't need it. Fixes: fdaf1bb3cafc ("xfs: ATTR_REPLACE algorithm with LARP enabled needs rework") Signed-off-by: Andrey Albershteyn Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/libxfs/xfs_attr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 9976a00a73f9..e965a48e7db9 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -421,10 +421,10 @@ xfs_attr_complete_op( bool do_replace = args->op_flags & XFS_DA_OP_REPLACE; args->op_flags &= ~XFS_DA_OP_REPLACE; - if (do_replace) { - args->attr_filter &= ~XFS_ATTR_INCOMPLETE; + args->attr_filter &= ~XFS_ATTR_INCOMPLETE; + if (do_replace) return replace_state; - } + return XFS_DAS_DONE; } From 9c64e749cebd9c2d3d55261530a98bcccb83b950 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 23 Jan 2024 19:14:14 +0100 Subject: [PATCH 529/768] drm/virtio: Set segment size for virtio_gpu device Set the segment size of the virtio_gpu device to the value used by the drm helpers when allocating sg lists to fix the following complaint from DMA_API debug code: DMA-API: virtio-pci 0000:07:00.0: mapping sg segment longer than device claims to support [len=262144] [max=65536] Cc: stable@vger.kernel.org Tested-by: Zhenyu Zhang Acked-by: Vivek Kasireddy Signed-off-by: Sebastian Ott Signed-off-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/7258a4cc-da16-5c34-a042-2a23ee396d56@redhat.com --- drivers/gpu/drm/virtio/virtgpu_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index f8e9abe647b9..9539aa28937f 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -94,6 +94,7 @@ static int virtio_gpu_probe(struct virtio_device *vdev) goto err_free; } + dma_set_max_seg_size(dev->dev, dma_max_mapping_size(dev->dev) ?: UINT_MAX); ret = virtio_gpu_init(vdev, dev); if (ret) goto err_free; From 37e8c97e539015637cb920d3e6f1e404f707a06e Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Wed, 24 Jan 2024 02:21:47 -0800 Subject: [PATCH 530/768] net: hsr: remove WARN_ONCE() in send_hsr_supervision_frame() Syzkaller reported [1] hitting a warning after failing to allocate resources for skb in hsr_init_skb(). Since a WARN_ONCE() call will not help much in this case, it might be prudent to switch to netdev_warn_once(). At the very least it will suppress syzkaller reports such as [1]. Just in case, use netdev_warn_once() in send_prp_supervision_frame() for similar reasons. [1] HSR: Could not send supervision frame WARNING: CPU: 1 PID: 85 at net/hsr/hsr_device.c:294 send_hsr_supervision_frame+0x60a/0x810 net/hsr/hsr_device.c:294 RIP: 0010:send_hsr_supervision_frame+0x60a/0x810 net/hsr/hsr_device.c:294 ... Call Trace: hsr_announce+0x114/0x370 net/hsr/hsr_device.c:382 call_timer_fn+0x193/0x590 kernel/time/timer.c:1700 expire_timers kernel/time/timer.c:1751 [inline] __run_timers+0x764/0xb20 kernel/time/timer.c:2022 run_timer_softirq+0x58/0xd0 kernel/time/timer.c:2035 __do_softirq+0x21a/0x8de kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0xb7/0x120 kernel/softirq.c:644 sysvec_apic_timer_interrupt+0x95/0xb0 arch/x86/kernel/apic/apic.c:1076 asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:649 ... This issue is also found in older kernels (at least up to 5.10). Cc: stable@vger.kernel.org Reported-by: syzbot+3ae0a3f42c84074b7c8e@syzkaller.appspotmail.com Fixes: 121c33b07b31 ("net: hsr: introduce common code for skb initialization") Signed-off-by: Nikita Zhandarovich Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 7ceb9ac6e730..9d71b66183da 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -308,7 +308,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master, skb = hsr_init_skb(master); if (!skb) { - WARN_ONCE(1, "HSR: Could not send supervision frame\n"); + netdev_warn_once(master->dev, "HSR: Could not send supervision frame\n"); return; } @@ -355,7 +355,7 @@ static void send_prp_supervision_frame(struct hsr_port *master, skb = hsr_init_skb(master); if (!skb) { - WARN_ONCE(1, "PRP: Could not send supervision frame\n"); + netdev_warn_once(master->dev, "PRP: Could not send supervision frame\n"); return; } From bfb007aebe6bff451f7f3a4be19f4f286d0d5d9c Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Thu, 25 Jan 2024 12:53:09 +0300 Subject: [PATCH 531/768] nfc: nci: free rx_data_reassembly skb on NCI device cleanup rx_data_reassembly skb is stored during NCI data exchange for processing fragmented packets. It is dropped only when the last fragment is processed or when an NTF packet with NCI_OP_RF_DEACTIVATE_NTF opcode is received. However, the NCI device may be deallocated before that which leads to skb leak. As by design the rx_data_reassembly skb is bound to the NCI device and nothing prevents the device to be freed before the skb is processed in some way and cleaned, free it on the NCI device cleanup. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 6a2968aaf50c ("NFC: basic NCI protocol implementation") Cc: stable@vger.kernel.org Reported-by: syzbot+6b7c68d9c21e4ee4251b@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/000000000000f43987060043da7b@google.com/ Signed-off-by: Fedor Pchelkin Signed-off-by: David S. Miller --- net/nfc/nci/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 97348cedb16b..cdad47b140fa 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1208,6 +1208,10 @@ void nci_free_device(struct nci_dev *ndev) { nfc_free_device(ndev->nfc_dev); nci_hci_deallocate(ndev); + + /* drop partial rx data packet if present */ + if (ndev->rx_data_reassembly) + kfree_skb(ndev->rx_data_reassembly); kfree(ndev); } EXPORT_SYMBOL(nci_free_device); From 577e4432f3ac810049cb7e6b71f4d96ec7c6e894 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 25 Jan 2024 10:33:17 +0000 Subject: [PATCH 532/768] tcp: add sanity checks to rx zerocopy TCP rx zerocopy intent is to map pages initially allocated from NIC drivers, not pages owned by a fs. This patch adds to can_map_frag() these additional checks: - Page must not be a compound one. - page->mapping must be NULL. This fixes the panic reported by ZhangPeng. syzbot was able to loopback packets built with sendfile(), mapping pages owned by an ext4 file to TCP rx zerocopy. r3 = socket$inet_tcp(0x2, 0x1, 0x0) mmap(&(0x7f0000ff9000/0x4000)=nil, 0x4000, 0x0, 0x12, r3, 0x0) r4 = socket$inet_tcp(0x2, 0x1, 0x0) bind$inet(r4, &(0x7f0000000000)={0x2, 0x4e24, @multicast1}, 0x10) connect$inet(r4, &(0x7f00000006c0)={0x2, 0x4e24, @empty}, 0x10) r5 = openat$dir(0xffffffffffffff9c, &(0x7f00000000c0)='./file0\x00', 0x181e42, 0x0) fallocate(r5, 0x0, 0x0, 0x85b8) sendfile(r4, r5, 0x0, 0x8ba0) getsockopt$inet_tcp_TCP_ZEROCOPY_RECEIVE(r4, 0x6, 0x23, &(0x7f00000001c0)={&(0x7f0000ffb000/0x3000)=nil, 0x3000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, &(0x7f0000000440)=0x40) r6 = openat$dir(0xffffffffffffff9c, &(0x7f00000000c0)='./file0\x00', 0x181e42, 0x0) Fixes: 93ab6cc69162 ("tcp: implement mmap() for zero copy receive") Link: https://lore.kernel.org/netdev/5106a58e-04da-372a-b836-9d3d0bd2507b@huawei.com/T/ Reported-and-bisected-by: ZhangPeng Signed-off-by: Eric Dumazet Cc: Arjun Roy Cc: Matthew Wilcox Cc: linux-mm@vger.kernel.org Cc: Andrew Morton Cc: linux-fsdevel@vger.kernel.org Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a1c6de385cce..7e2481b9eae1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1786,7 +1786,17 @@ static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb, static bool can_map_frag(const skb_frag_t *frag) { - return skb_frag_size(frag) == PAGE_SIZE && !skb_frag_off(frag); + struct page *page; + + if (skb_frag_size(frag) != PAGE_SIZE || skb_frag_off(frag)) + return false; + + page = skb_frag_page(frag); + + if (PageCompound(page) || page->mapping) + return false; + + return true; } static int find_next_mappable_frag(const skb_frag_t *frag, From c92c108403b09f75f3393588c2326ecad49ee2e2 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 19 Jan 2024 03:08:37 -0600 Subject: [PATCH 533/768] Revert "drm/amd/pm: fix the high voltage and temperature issue" This reverts commit 5f38ac54e60562323ea4abb1bfb37d043ee23357. This causes issues with rebooting and the 7800XT. Cc: Kenneth Feng Cc: stable@vger.kernel.org Fixes: 5f38ac54e605 ("drm/amd/pm: fix the high voltage and temperature issue") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3062 Signed-off-by: Mario Limonciello Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 24 ++++---------- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 33 ++----------------- drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 1 - .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 8 +---- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 8 +---- 5 files changed, 11 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b158d27d0a71..31b28e6f35b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4121,23 +4121,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, } } } else { - switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { - case IP_VERSION(13, 0, 0): - case IP_VERSION(13, 0, 7): - case IP_VERSION(13, 0, 10): - r = psp_gpu_reset(adev); - break; - default: - tmp = amdgpu_reset_method; - /* It should do a default reset when loading or reloading the driver, - * regardless of the module parameter reset_method. - */ - amdgpu_reset_method = AMD_RESET_METHOD_NONE; - r = amdgpu_asic_reset(adev); - amdgpu_reset_method = tmp; - break; - } - + tmp = amdgpu_reset_method; + /* It should do a default reset when loading or reloading the driver, + * regardless of the module parameter reset_method. + */ + amdgpu_reset_method = AMD_RESET_METHOD_NONE; + r = amdgpu_asic_reset(adev); + amdgpu_reset_method = tmp; if (r) { dev_err(adev->dev, "asic reset on init failed\n"); goto failed; diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 7ffad3eb0a01..0ad947df777a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -734,7 +734,7 @@ static int smu_early_init(void *handle) smu->adev = adev; smu->pm_enabled = !!amdgpu_dpm; smu->is_apu = false; - smu->smu_baco.state = SMU_BACO_STATE_NONE; + smu->smu_baco.state = SMU_BACO_STATE_EXIT; smu->smu_baco.platform_support = false; smu->user_dpm_profile.fan_mode = -1; @@ -1954,31 +1954,10 @@ static int smu_smc_hw_cleanup(struct smu_context *smu) return 0; } -static int smu_reset_mp1_state(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - int ret = 0; - - if ((!adev->in_runpm) && (!adev->in_suspend) && - (!amdgpu_in_reset(adev))) - switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { - case IP_VERSION(13, 0, 0): - case IP_VERSION(13, 0, 7): - case IP_VERSION(13, 0, 10): - ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD); - break; - default: - break; - } - - return ret; -} - static int smu_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct smu_context *smu = adev->powerplay.pp_handle; - int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; @@ -1996,15 +1975,7 @@ static int smu_hw_fini(void *handle) adev->pm.dpm_enabled = false; - ret = smu_smc_hw_cleanup(smu); - if (ret) - return ret; - - ret = smu_reset_mp1_state(smu); - if (ret) - return ret; - - return 0; + return smu_smc_hw_cleanup(smu); } static void smu_late_fini(void *handle) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 2aa4fea87314..66e84defd0b6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -424,7 +424,6 @@ enum smu_reset_mode { enum smu_baco_state { SMU_BACO_STATE_ENTER = 0, SMU_BACO_STATE_EXIT, - SMU_BACO_STATE_NONE, }; struct smu_baco_context { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 4fdf34fffa9a..3230701d0d38 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2748,13 +2748,7 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu, switch (mp1_state) { case PP_MP1_STATE_UNLOAD: - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_PrepareMp1ForUnload, - 0x55, NULL); - - if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT) - ret = smu_v13_0_disable_pmfw_state(smu); - + ret = smu_cmn_set_mp1_state(smu, mp1_state); break; default: /* Ignore others */ diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 7c3e162e2d81..0ffdb58af74e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2505,13 +2505,7 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu, switch (mp1_state) { case PP_MP1_STATE_UNLOAD: - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_PrepareMp1ForUnload, - 0x55, NULL); - - if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT) - ret = smu_v13_0_disable_pmfw_state(smu); - + ret = smu_cmn_set_mp1_state(smu, mp1_state); break; default: /* Ignore others */ From e42e29cc442395d62f1a8963ec2dfb700ba6a5d7 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Mon, 29 Jan 2024 08:40:23 -0600 Subject: [PATCH 534/768] Revert "jfs: fix shift-out-of-bounds in dbJoin" This reverts commit cca974daeb6c43ea971f8ceff5a7080d7d49ee30. The added sanity check is incorrect. BUDMIN is not the wrong value and is too small. Signed-off-by: Dave Kleikamp --- fs/jfs/jfs_dmap.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 8eec84c651bf..cb3cda1390ad 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -2763,9 +2763,7 @@ static int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl) * leafno - the number of the leaf to be updated. * newval - the new value for the leaf. * - * RETURN VALUES: - * 0 - success - * -EIO - i/o error + * RETURN VALUES: none */ static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl) { @@ -2792,10 +2790,6 @@ static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl) * get the buddy size (number of words covered) of * the new value. */ - - if ((newval - tp->dmt_budmin) > BUDMIN) - return -EIO; - budsz = BUDSIZE(newval, tp->dmt_budmin); /* try to join. From 6d3c7fb17b4c047ccd0b42cf1308da693ab45acb Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 24 Jan 2024 09:27:27 -0800 Subject: [PATCH 535/768] nvme: use ctrl state accessor The ctrl->state value is updated in another thread using WRITE_ONCE, so ensure all the readers use the appropriate accessor. Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/apple.c | 13 +++++++------ drivers/nvme/host/auth.c | 2 +- drivers/nvme/host/core.c | 4 ++-- drivers/nvme/host/fabrics.h | 8 +++++--- drivers/nvme/host/multipath.c | 15 ++++++++------- drivers/nvme/host/nvme.h | 11 ++++++----- drivers/nvme/host/sysfs.c | 6 +++--- drivers/nvme/target/loop.c | 8 +++++--- 8 files changed, 37 insertions(+), 30 deletions(-) diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index 596bb11eeba5..c727cd1f264b 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -797,6 +797,7 @@ static int apple_nvme_init_request(struct blk_mq_tag_set *set, static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown) { + enum nvme_ctrl_state state = nvme_ctrl_state(&anv->ctrl); u32 csts = readl(anv->mmio_nvme + NVME_REG_CSTS); bool dead = false, freeze = false; unsigned long flags; @@ -808,8 +809,8 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown) if (csts & NVME_CSTS_CFS) dead = true; - if (anv->ctrl.state == NVME_CTRL_LIVE || - anv->ctrl.state == NVME_CTRL_RESETTING) { + if (state == NVME_CTRL_LIVE || + state == NVME_CTRL_RESETTING) { freeze = true; nvme_start_freeze(&anv->ctrl); } @@ -881,7 +882,7 @@ static enum blk_eh_timer_return apple_nvme_timeout(struct request *req) unsigned long flags; u32 csts = readl(anv->mmio_nvme + NVME_REG_CSTS); - if (anv->ctrl.state != NVME_CTRL_LIVE) { + if (nvme_ctrl_state(&anv->ctrl) != NVME_CTRL_LIVE) { /* * From rdma.c: * If we are resetting, connecting or deleting we should @@ -985,10 +986,10 @@ static void apple_nvme_reset_work(struct work_struct *work) u32 boot_status, aqa; struct apple_nvme *anv = container_of(work, struct apple_nvme, ctrl.reset_work); + enum nvme_ctrl_state state = nvme_ctrl_state(&anv->ctrl); - if (anv->ctrl.state != NVME_CTRL_RESETTING) { - dev_warn(anv->dev, "ctrl state %d is not RESETTING\n", - anv->ctrl.state); + if (state != NVME_CTRL_RESETTING) { + dev_warn(anv->dev, "ctrl state %d is not RESETTING\n", state); ret = -ENODEV; goto out; } diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 72c0525c75f5..2a12ee878783 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -897,7 +897,7 @@ static void nvme_ctrl_auth_work(struct work_struct *work) * If the ctrl is no connected, bail as reconnect will handle * authentication. */ - if (ctrl->state != NVME_CTRL_LIVE) + if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) return; /* Authenticate admin queue first */ diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0810be0d1154..a42c347d08e8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -721,7 +721,7 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl, EXPORT_SYMBOL_GPL(nvme_fail_nonready_command); bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, - bool queue_live) + bool queue_live, enum nvme_ctrl_state state) { struct nvme_request *req = nvme_req(rq); @@ -742,7 +742,7 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, * command, which is require to set the queue live in the * appropinquate states. */ - switch (nvme_ctrl_state(ctrl)) { + switch (state) { case NVME_CTRL_CONNECTING: if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) && (req->cmd->fabrics.fctype == nvme_fabrics_type_connect || diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index fbaee5a7be19..06cc54851b1b 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -185,9 +185,11 @@ static inline bool nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl, struct nvmf_ctrl_options *opts) { - if (ctrl->state == NVME_CTRL_DELETING || - ctrl->state == NVME_CTRL_DELETING_NOIO || - ctrl->state == NVME_CTRL_DEAD || + enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); + + if (state == NVME_CTRL_DELETING || + state == NVME_CTRL_DELETING_NOIO || + state == NVME_CTRL_DEAD || strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) || strcmp(opts->host->nqn, ctrl->opts->host->nqn) || !uuid_equal(&opts->host->id, &ctrl->opts->host->id)) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 2dd4137a08b2..74de1e64aeea 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -156,7 +156,7 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) if (!ns->head->disk) continue; kblockd_schedule_work(&ns->head->requeue_work); - if (ctrl->state == NVME_CTRL_LIVE) + if (nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE) disk_uevent(ns->head->disk, KOBJ_CHANGE); } up_read(&ctrl->namespaces_rwsem); @@ -223,13 +223,14 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns) static bool nvme_path_is_disabled(struct nvme_ns *ns) { + enum nvme_ctrl_state state = nvme_ctrl_state(ns->ctrl); + /* * We don't treat NVME_CTRL_DELETING as a disabled path as I/O should * still be able to complete assuming that the controller is connected. * Otherwise it will fail immediately and return to the requeue list. */ - if (ns->ctrl->state != NVME_CTRL_LIVE && - ns->ctrl->state != NVME_CTRL_DELETING) + if (state != NVME_CTRL_LIVE && state != NVME_CTRL_DELETING) return true; if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) || !test_bit(NVME_NS_READY, &ns->flags)) @@ -331,7 +332,7 @@ out: static inline bool nvme_path_is_optimized(struct nvme_ns *ns) { - return ns->ctrl->state == NVME_CTRL_LIVE && + return nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE && ns->ana_state == NVME_ANA_OPTIMIZED; } @@ -358,7 +359,7 @@ static bool nvme_available_path(struct nvme_ns_head *head) list_for_each_entry_rcu(ns, &head->list, siblings) { if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags)) continue; - switch (ns->ctrl->state) { + switch (nvme_ctrl_state(ns->ctrl)) { case NVME_CTRL_LIVE: case NVME_CTRL_RESETTING: case NVME_CTRL_CONNECTING: @@ -667,7 +668,7 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, * controller is ready. */ if (nvme_state_is_live(ns->ana_state) && - ns->ctrl->state == NVME_CTRL_LIVE) + nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE) nvme_mpath_set_live(ns); } @@ -748,7 +749,7 @@ static void nvme_ana_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, ana_work); - if (ctrl->state != NVME_CTRL_LIVE) + if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) return; nvme_read_ana_log(ctrl); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 030c80818240..1700063bc24d 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -805,17 +805,18 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req); blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl, struct request *req); bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, - bool queue_live); + bool queue_live, enum nvme_ctrl_state state); static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, bool queue_live) { - if (likely(ctrl->state == NVME_CTRL_LIVE)) + enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); + + if (likely(state == NVME_CTRL_LIVE)) return true; - if (ctrl->ops->flags & NVME_F_FABRICS && - ctrl->state == NVME_CTRL_DELETING) + if (ctrl->ops->flags & NVME_F_FABRICS && state == NVME_CTRL_DELETING) return queue_live; - return __nvme_check_ready(ctrl, rq, queue_live); + return __nvme_check_ready(ctrl, rq, queue_live, state); } /* diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 754e91111042..6b2f06f190de 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -311,6 +311,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev, char *buf) { struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + unsigned state = (unsigned)nvme_ctrl_state(ctrl); static const char *const state_name[] = { [NVME_CTRL_NEW] = "new", [NVME_CTRL_LIVE] = "live", @@ -321,9 +322,8 @@ static ssize_t nvme_sysfs_show_state(struct device *dev, [NVME_CTRL_DEAD] = "dead", }; - if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) && - state_name[ctrl->state]) - return sysfs_emit(buf, "%s\n", state_name[ctrl->state]); + if (state < ARRAY_SIZE(state_name) && state_name[state]) + return sysfs_emit(buf, "%s\n", state_name[state]); return sysfs_emit(buf, "unknown state\n"); } diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 2b2e0d00af85..e589915ddef8 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -400,7 +400,7 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) } nvme_quiesce_admin_queue(&ctrl->ctrl); - if (ctrl->ctrl.state == NVME_CTRL_LIVE) + if (nvme_ctrl_state(&ctrl->ctrl) == NVME_CTRL_LIVE) nvme_disable_ctrl(&ctrl->ctrl, true); nvme_cancel_admin_tagset(&ctrl->ctrl); @@ -434,8 +434,10 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) nvme_loop_shutdown_ctrl(ctrl); if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { - if (ctrl->ctrl.state != NVME_CTRL_DELETING && - ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO) + enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl); + + if (state != NVME_CTRL_DELETING && + state != NVME_CTRL_DELETING_NOIO) /* state change failure for non-deleted ctrl? */ WARN_ON_ONCE(1); return; From 346f59d1e8ed0eed41c80e1acb657e484c308e6a Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Mon, 29 Jan 2024 15:12:54 +0300 Subject: [PATCH 536/768] ALSA: usb-audio: Check presence of valid altsetting control Many devices with a single alternate setting do not have a Valid Alternate Setting Control and validation performed by validate_sample_rate_table_v2v3() doesn't work on them and is not really needed. So check the presense of control before sending altsetting validation requests. MOTU Microbook IIc is suffering the most without this check. It takes up to 40 seconds to bootup due to how slow it switches sampling rates: [ 2659.164824] usb 3-2: New USB device found, idVendor=07fd, idProduct=0004, bcdDevice= 0.60 [ 2659.164827] usb 3-2: New USB device strings: Mfr=1, Product=2, SerialNumber=0 [ 2659.164829] usb 3-2: Product: MicroBook IIc [ 2659.164830] usb 3-2: Manufacturer: MOTU [ 2659.166204] usb 3-2: Found last interface = 3 [ 2679.322298] usb 3-2: No valid sample rate available for 1:1, assuming a firmware bug [ 2679.322306] usb 3-2: 1:1: add audio endpoint 0x3 [ 2679.322321] usb 3-2: Creating new data endpoint #3 [ 2679.322552] usb 3-2: 1:1 Set sample rate 96000, clock 1 [ 2684.362250] usb 3-2: 2:1: cannot get freq (v2/v3): err -110 [ 2694.444700] usb 3-2: No valid sample rate available for 2:1, assuming a firmware bug [ 2694.444707] usb 3-2: 2:1: add audio endpoint 0x84 [ 2694.444721] usb 3-2: Creating new data endpoint #84 [ 2699.482103] usb 3-2: 2:1 Set sample rate 96000, clock 1 Signed-off-by: Alexander Tsoy Link: https://lore.kernel.org/r/20240129121254.3454481-1-alexander@tsoy.me Signed-off-by: Takashi Iwai --- sound/usb/format.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/sound/usb/format.c b/sound/usb/format.c index ab5fed9f55b6..3b45d0ee7693 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -470,9 +470,11 @@ static int validate_sample_rate_table_v2v3(struct snd_usb_audio *chip, int clock) { struct usb_device *dev = chip->dev; + struct usb_host_interface *alts; unsigned int *table; unsigned int nr_rates; int i, err; + u32 bmControls; /* performing the rate verification may lead to unexpected USB bus * behavior afterwards by some unknown reason. Do this only for the @@ -481,6 +483,24 @@ static int validate_sample_rate_table_v2v3(struct snd_usb_audio *chip, if (!(chip->quirk_flags & QUIRK_FLAG_VALIDATE_RATES)) return 0; /* don't perform the validation as default */ + alts = snd_usb_get_host_interface(chip, fp->iface, fp->altsetting); + if (!alts) + return 0; + + if (fp->protocol == UAC_VERSION_3) { + struct uac3_as_header_descriptor *as = snd_usb_find_csint_desc( + alts->extra, alts->extralen, NULL, UAC_AS_GENERAL); + bmControls = le32_to_cpu(as->bmControls); + } else { + struct uac2_as_header_descriptor *as = snd_usb_find_csint_desc( + alts->extra, alts->extralen, NULL, UAC_AS_GENERAL); + bmControls = as->bmControls; + } + + if (!uac_v2v3_control_is_readable(bmControls, + UAC2_AS_VAL_ALT_SETTINGS)) + return 0; + table = kcalloc(fp->nr_rates, sizeof(*table), GFP_KERNEL); if (!table) return -ENOMEM; From 21fdd8dd3726199451fc495f20104356e071a997 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 29 Jan 2024 13:00:07 -0300 Subject: [PATCH 537/768] tools headers UAPI: Sync unistd.h to pick {list,stat}mount, lsm_{[gs]et_self_attr,list_modules} syscall numbers To pick the changes in these csets: d8b0f5465012538c ("wire up syscalls for statmount/listmount") 5f42375904b08890 ("LSM: wireup Linux Security Module syscalls") Used in some architectures to create syscall tables. This addresses this perf build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h Cc: Casey Schaufler Cc: Christian Brauner Cc: Miklos Szeredi Cc: Paul Moore Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZbfMuAlUMRO9Hqa6@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/unistd.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 756b013fb832..75f00965ab15 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -829,8 +829,21 @@ __SYSCALL(__NR_futex_wait, sys_futex_wait) #define __NR_futex_requeue 456 __SYSCALL(__NR_futex_requeue, sys_futex_requeue) +#define __NR_statmount 457 +__SYSCALL(__NR_statmount, sys_statmount) + +#define __NR_listmount 458 +__SYSCALL(__NR_listmount, sys_listmount) + +#define __NR_lsm_get_self_attr 459 +__SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr) +#define __NR_lsm_set_self_attr 460 +__SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr) +#define __NR_lsm_list_modules 461 +__SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) + #undef __NR_syscalls -#define __NR_syscalls 457 +#define __NR_syscalls 462 /* * 32 bit systems traditionally used different From c6dce23ec993f7da7790a9eadb36864ceb60e942 Mon Sep 17 00:00:00 2001 From: Techno Mooney Date: Mon, 29 Jan 2024 15:11:47 +0700 Subject: [PATCH 538/768] ASoC: amd: yc: Add DMI quirk for MSI Bravo 15 C7VF The laptop requires a quirk ID to enable its internal microphone. Add it to the DMI quirk table. Reported-by: Techno Mooney Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218402 Cc: stable@vger.kernel.org Signed-off-by: Techno Mooney Signed-off-by: Bagas Sanjaya Link: https://msgid.link/r/20240129081148.1044891-1-bagasdotme@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index d83cb6e4c62a..23d44a50d815 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -297,6 +297,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 15 B7ED"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."), + DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 15 C7VF"), + } + }, { .driver_data = &acp6x_card, .matches = { From 0adf963b8463faa44653e22e56ce55f747e68868 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 28 Jan 2024 00:32:43 +0800 Subject: [PATCH 539/768] ASoC: sunxi: sun4i-spdif: Add support for Allwinner H616 The SPDIF hardware block found in the H616 SoC has the same layout as the one found in the H6 SoC, except that it is missing the receiver side. Since the driver currently only supports the transmit function, support for the H616 is identical to what is currently done for the H6. Signed-off-by: Chen-Yu Tsai Reviewed-by: Andre Przywara Reviewed-by: Jernej Skrabec Link: https://msgid.link/r/20240127163247.384439-4-wens@kernel.org Signed-off-by: Mark Brown --- sound/soc/sunxi/sun4i-spdif.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/sunxi/sun4i-spdif.c b/sound/soc/sunxi/sun4i-spdif.c index 702386823d17..f41c30955857 100644 --- a/sound/soc/sunxi/sun4i-spdif.c +++ b/sound/soc/sunxi/sun4i-spdif.c @@ -577,6 +577,11 @@ static const struct of_device_id sun4i_spdif_of_match[] = { .compatible = "allwinner,sun50i-h6-spdif", .data = &sun50i_h6_spdif_quirks, }, + { + .compatible = "allwinner,sun50i-h616-spdif", + /* Essentially the same as the H6, but without RX */ + .data = &sun50i_h6_spdif_quirks, + }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, sun4i_spdif_of_match); From 57b3c130d97e45b8a07586e0ae113e43776c5ea8 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 28 Jan 2024 00:32:41 +0800 Subject: [PATCH 540/768] ASoC: sun4i-spdif: Fix requirements for H6 When the H6 was added to the bindings, only the TX DMA channel was added. As the hardware supports both transmit and receive functions, the binding is missing the RX DMA channel and is thus incorrect. Also, the reset control was not made mandatory. Add the RX DMA channel for SPDIF on H6 by removing the compatible from the list of compatibles that should only have a TX DMA channel. And add the H6 compatible to the list of compatibles that require the reset control to be present. Fixes: b20453031472 ("dt-bindings: sound: sun4i-spdif: Add Allwinner H6 compatible") Signed-off-by: Chen-Yu Tsai Acked-by: Conor Dooley Reviewed-by: Jernej Skrabec Link: https://msgid.link/r/20240127163247.384439-2-wens@kernel.org Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml index 8108c564dd78..98e2e053fa19 100644 --- a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml +++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml @@ -62,6 +62,7 @@ allOf: enum: - allwinner,sun6i-a31-spdif - allwinner,sun8i-h3-spdif + - allwinner,sun50i-h6-spdif then: required: @@ -73,7 +74,6 @@ allOf: contains: enum: - allwinner,sun8i-h3-spdif - - allwinner,sun50i-h6-spdif then: properties: From 7a9dc944f129bb56ef855d9c0b0647bc3e98a56f Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 28 Jan 2024 00:32:42 +0800 Subject: [PATCH 541/768] ASoC: sun4i-spdif: Add Allwinner H616 compatible The SPDIF hardware block found in the H616 SoC has the same layout as the one found in the H6 SoC, except that it is missing the receiver side. Add a new compatible string for it. Signed-off-by: Chen-Yu Tsai Acked-by: Conor Dooley Reviewed-by: Jernej Skrabec Link: https://msgid.link/r/20240127163247.384439-3-wens@kernel.org Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml index 98e2e053fa19..aa32dc950e72 100644 --- a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml +++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml @@ -22,6 +22,7 @@ properties: - const: allwinner,sun6i-a31-spdif - const: allwinner,sun8i-h3-spdif - const: allwinner,sun50i-h6-spdif + - const: allwinner,sun50i-h616-spdif - items: - const: allwinner,sun8i-a83t-spdif - const: allwinner,sun8i-h3-spdif @@ -63,6 +64,7 @@ allOf: - allwinner,sun6i-a31-spdif - allwinner,sun8i-h3-spdif - allwinner,sun50i-h6-spdif + - allwinner,sun50i-h616-spdif then: required: @@ -74,6 +76,7 @@ allOf: contains: enum: - allwinner,sun8i-h3-spdif + - allwinner,sun50i-h616-spdif then: properties: From f1f6a6b1830a8f1dc92ee26fae76333f446b0663 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 11 Dec 2023 18:05:52 -0800 Subject: [PATCH 542/768] e1000e: correct maximum frequency adjustment values The e1000e driver supports hardware with a variety of different clock speeds, and thus a variety of different increment values used for programming its PTP hardware clock. The values currently programmed in e1000e_ptp_init are incorrect. In particular, only two maximum adjustments are used: 24000000 - 1, and 600000000 - 1. These were originally intended to be used with the 96 MHz clock and the 25 MHz clock. Both of these values are actually slightly too high. For the 96 MHz clock, the actual maximum value that can safely be programmed is 23,999,938. For the 25 MHz clock, the maximum value is 599,999,904. Worse, several devices use a 24 MHz clock or a 38.4 MHz clock. These parts are incorrectly assigned one of either the 24million or 600million values. For the 24 MHz clock, this is not a significant issue: its current increment value can support an adjustment up to 7billion in the positive direction. However, the 38.4 KHz clock uses an increment value which can only support up to 230,769,157 before it starts overflowing. To understand where these values come from, consider that frequency adjustments have the form of: new_incval = base_incval + (base_incval * adjustment) / (unit of adjustment) The maximum adjustment is reported in terms of parts per billion: new_incval = base_incval + (base_incval * adjustment) / 1 billion The largest possible adjustment is thus given by the following: max_incval = base_incval + (base_incval * max_adj) / 1 billion Re-arranging to solve for max_adj: max_adj = (max_incval - base_incval) * 1 billion / base_incval We also need to ensure that negative adjustments cannot underflow. This can be achieved simply by ensuring max_adj is always less than 1 billion. Introduce new macros in e1000.h codifying the maximum adjustment in PPB for each frequency given its associated increment values. Also clarify where these values come from by commenting about the above equations. Replace the switch statement in e1000e_ptp_init with one which mirrors the increment value switch statement from e1000e_get_base_timinica. For each device, assign the appropriate maximum adjustment based on its frequency. Some parts can have one of two frequency modes as determined by E1000_TSYNCRXCTL_SYSCFI. Since the new flow directly matches the assignments in e1000e_get_base_timinca, and uses well defined macro names, it is much easier to verify that the resulting maximum adjustments are correct. It also avoids difficult to parse construction such as the "hw->mac.type < e1000_phc_lpt", and the use of fallthrough which was especially confusing when combined with a conditional block. Note that I believe the current increment value configuration used for 24MHz clocks is sub-par, as it leaves at least 3 extra bits available in the INCVALUE register. However, fixing that requires more careful review of the clock rate and associated values. Reported-by: Trey Harrison Fixes: 68fe1d5da548 ("e1000e: Add Support for 38.4MHZ frequency") Fixes: d89777bf0e42 ("e1000e: add support for IEEE-1588 PTP") Signed-off-by: Jacob Keller Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/e1000.h | 20 ++++++++++++++++++++ drivers/net/ethernet/intel/e1000e/ptp.c | 22 +++++++++++++++------- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index a187582d2299..ba9c19e6994c 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -360,23 +360,43 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca); * As a result, a shift of INCVALUE_SHIFT_n is used to fit a value of * INCVALUE_n into the TIMINCA register allowing 32+8+(24-INCVALUE_SHIFT_n) * bits to count nanoseconds leaving the rest for fractional nonseconds. + * + * Any given INCVALUE also has an associated maximum adjustment value. This + * maximum adjustment value is the largest increase (or decrease) which can be + * safely applied without overflowing the INCVALUE. Since INCVALUE has + * a maximum range of 24 bits, its largest value is 0xFFFFFF. + * + * To understand where the maximum value comes from, consider the following + * equation: + * + * new_incval = base_incval + (base_incval * adjustment) / 1billion + * + * To avoid overflow that means: + * max_incval = base_incval + (base_incval * max_adj) / billion + * + * Re-arranging: + * max_adj = floor(((max_incval - base_incval) * 1billion) / 1billion) */ #define INCVALUE_96MHZ 125 #define INCVALUE_SHIFT_96MHZ 17 #define INCPERIOD_SHIFT_96MHZ 2 #define INCPERIOD_96MHZ (12 >> INCPERIOD_SHIFT_96MHZ) +#define MAX_PPB_96MHZ 23999900 /* 23,999,900 ppb */ #define INCVALUE_25MHZ 40 #define INCVALUE_SHIFT_25MHZ 18 #define INCPERIOD_25MHZ 1 +#define MAX_PPB_25MHZ 599999900 /* 599,999,900 ppb */ #define INCVALUE_24MHZ 125 #define INCVALUE_SHIFT_24MHZ 14 #define INCPERIOD_24MHZ 3 +#define MAX_PPB_24MHZ 999999999 /* 999,999,999 ppb */ #define INCVALUE_38400KHZ 26 #define INCVALUE_SHIFT_38400KHZ 19 #define INCPERIOD_38400KHZ 1 +#define MAX_PPB_38400KHZ 230769100 /* 230,769,100 ppb */ /* Another drawback of scaling the incvalue by a large factor is the * 64-bit SYSTIM register overflows more quickly. This is dealt with diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index 02d871bc112a..bbcfd529399b 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -280,8 +280,17 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) switch (hw->mac.type) { case e1000_pch2lan: + adapter->ptp_clock_info.max_adj = MAX_PPB_96MHZ; + break; case e1000_pch_lpt: + if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) + adapter->ptp_clock_info.max_adj = MAX_PPB_96MHZ; + else + adapter->ptp_clock_info.max_adj = MAX_PPB_25MHZ; + break; case e1000_pch_spt: + adapter->ptp_clock_info.max_adj = MAX_PPB_24MHZ; + break; case e1000_pch_cnp: case e1000_pch_tgp: case e1000_pch_adp: @@ -289,15 +298,14 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) case e1000_pch_lnp: case e1000_pch_ptp: case e1000_pch_nvp: - if ((hw->mac.type < e1000_pch_lpt) || - (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)) { - adapter->ptp_clock_info.max_adj = 24000000 - 1; - break; - } - fallthrough; + if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) + adapter->ptp_clock_info.max_adj = MAX_PPB_24MHZ; + else + adapter->ptp_clock_info.max_adj = MAX_PPB_38400KHZ; + break; case e1000_82574: case e1000_82583: - adapter->ptp_clock_info.max_adj = 600000000 - 1; + adapter->ptp_clock_info.max_adj = MAX_PPB_25MHZ; break; default: break; From bbc404d20d1b46d89b461918bc44587620eda200 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 20 Jan 2024 18:25:36 +0100 Subject: [PATCH 543/768] ixgbe: Fix an error handling path in ixgbe_read_iosf_sb_reg_x550() All error handling paths, except this one, go to 'out' where release_swfw_sync() is called. This call balances the acquire_swfw_sync() call done at the beginning of the function. Branch to the error handling path in order to correctly release some resources in case of error. Fixes: ae14a1d8e104 ("ixgbe: Fix IOSF SB access issues") Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 6208923e29a2..c1adc94a5a65 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -716,7 +716,8 @@ static s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) { error = FIELD_GET(IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK, command); hw_dbg(hw, "Failed to read, error %x\n", error); - return -EIO; + ret = -EIO; + goto out; } if (!ret) From ccbca118ef1a71d5faa012b9bb1ecd784e9e2b42 Mon Sep 17 00:00:00 2001 From: Samasth Norway Ananda Date: Mon, 22 Jan 2024 21:35:09 -0800 Subject: [PATCH 544/768] NFSv4.1: Assign the right value for initval and retries for rpc timeout Make sure the rpc timeout was assigned with the correct value for initial timeout and max number of retries. Fixes: 57331a59ac0d ("NFSv4.1: Use the nfs_client's rpc timeouts for backchannel") Signed-off-by: Samasth Norway Ananda Reviewed-by: Benjamin Coddington Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- net/sunrpc/svc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index f60c93e5a25d..b969e505c7b7 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1598,10 +1598,10 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp) /* Finally, send the reply synchronously */ if (rqstp->bc_to_initval > 0) { timeout.to_initval = rqstp->bc_to_initval; - timeout.to_retries = rqstp->bc_to_initval; + timeout.to_retries = rqstp->bc_to_retries; } else { timeout.to_initval = req->rq_xprt->timeout->to_initval; - timeout.to_initval = req->rq_xprt->timeout->to_retries; + timeout.to_retries = req->rq_xprt->timeout->to_retries; } memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); task = rpc_run_bc_task(req, &timeout); From 5513c5d0fb3d509cdd0a11afc18441c57eb7c94c Mon Sep 17 00:00:00 2001 From: Marian Postevca Date: Sun, 28 Jan 2024 19:22:29 +0200 Subject: [PATCH 545/768] ASoC: amd: acp: Fix support for a Huawei Matebook laptop Previous commit that added support for Huawei MateBook D16 2021 with Ryzen 4600H (HVY-WXX9 M1010) was incomplete. To activate support for this laptop, the DMI table in acp3x-es83xx machine driver must also be updated. Fixes: b5338b1b901e ("ASoC: amd: acp: Add support for a new Huawei Matebook laptop") Signed-off-by: Marian Postevca Link: https://msgid.link/r/20240128172229.657142-1-posteuca@mutex.one Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp3x-es83xx/acp3x-es83xx.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/amd/acp/acp3x-es83xx/acp3x-es83xx.c b/sound/soc/amd/acp/acp3x-es83xx/acp3x-es83xx.c index f85b85ea4be9..2b0aa270a3e9 100644 --- a/sound/soc/amd/acp/acp3x-es83xx/acp3x-es83xx.c +++ b/sound/soc/amd/acp/acp3x-es83xx/acp3x-es83xx.c @@ -354,6 +354,14 @@ static const struct dmi_system_id acp3x_es83xx_dmi_table[] = { }, .driver_data = (void *)(ES83XX_ENABLE_DMIC|ES83XX_48_MHZ_MCLK), }, + { + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "HUAWEI"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "HVY-WXX9"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "M1010"), + }, + .driver_data = (void *)(ES83XX_ENABLE_DMIC), + }, { .matches = { DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "HUAWEI"), From e84b01a880f635e3084a361afba41f95ff500d12 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Jan 2024 11:52:54 -0700 Subject: [PATCH 546/768] io_uring/poll: move poll execution helpers higher up In preparation for calling __io_poll_execute() higher up, move the functions to avoid forward declarations. No functional changes in this patch. Signed-off-by: Jens Axboe --- io_uring/poll.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/io_uring/poll.c b/io_uring/poll.c index d59b74a99d4e..785a5b191003 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -228,6 +228,26 @@ enum { IOU_POLL_REISSUE = 3, }; +static void __io_poll_execute(struct io_kiocb *req, int mask) +{ + unsigned flags = 0; + + io_req_set_res(req, mask, 0); + req->io_task_work.func = io_poll_task_func; + + trace_io_uring_task_add(req, mask); + + if (!(req->flags & REQ_F_POLL_NO_LAZY)) + flags = IOU_F_TWQ_LAZY_WAKE; + __io_req_task_work_add(req, flags); +} + +static inline void io_poll_execute(struct io_kiocb *req, int res) +{ + if (io_poll_get_ownership(req)) + __io_poll_execute(req, res); +} + /* * All poll tw should go through this. Checks for poll events, manages * references, does rewait, etc. @@ -364,26 +384,6 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts) } } -static void __io_poll_execute(struct io_kiocb *req, int mask) -{ - unsigned flags = 0; - - io_req_set_res(req, mask, 0); - req->io_task_work.func = io_poll_task_func; - - trace_io_uring_task_add(req, mask); - - if (!(req->flags & REQ_F_POLL_NO_LAZY)) - flags = IOU_F_TWQ_LAZY_WAKE; - __io_req_task_work_add(req, flags); -} - -static inline void io_poll_execute(struct io_kiocb *req, int res) -{ - if (io_poll_get_ownership(req)) - __io_poll_execute(req, res); -} - static void io_poll_cancel_req(struct io_kiocb *req) { io_poll_mark_cancelled(req); From 91e5d765a82fb2c9d0b7ad930d8953208081ddf1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Jan 2024 11:54:18 -0700 Subject: [PATCH 547/768] io_uring/net: un-indent mshot retry path in io_recv_finish() In preparation for putting some retry logic in there, have the done path just skip straight to the end rather than have too much nesting in here. No functional changes in this patch. Signed-off-by: Jens Axboe --- io_uring/net.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 75d494dad7e2..740c6bfa5b59 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -645,23 +645,27 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, return true; } - if (!mshot_finished) { - if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER, - *ret, cflags | IORING_CQE_F_MORE)) { - io_recv_prep_retry(req); - /* Known not-empty or unknown state, retry */ - if (cflags & IORING_CQE_F_SOCK_NONEMPTY || - msg->msg_inq == -1) - return false; - if (issue_flags & IO_URING_F_MULTISHOT) - *ret = IOU_ISSUE_SKIP_COMPLETE; - else - *ret = -EAGAIN; - return true; - } - /* Otherwise stop multishot but use the current result. */ - } + if (mshot_finished) + goto finish; + /* + * Fill CQE for this receive and see if we should keep trying to + * receive from this socket. + */ + if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER, + *ret, cflags | IORING_CQE_F_MORE)) { + io_recv_prep_retry(req); + /* Known not-empty or unknown state, retry */ + if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq == -1) + return false; + if (issue_flags & IO_URING_F_MULTISHOT) + *ret = IOU_ISSUE_SKIP_COMPLETE; + else + *ret = -EAGAIN; + return true; + } + /* Otherwise stop multishot but use the current result. */ +finish: io_req_set_res(req, *ret, cflags); if (issue_flags & IO_URING_F_MULTISHOT) From 704ea888d646cb9d715662944cf389c823252ee0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Jan 2024 11:57:11 -0700 Subject: [PATCH 548/768] io_uring/poll: add requeue return code from poll multishot handling Since our poll handling is edge triggered, multishot handlers retry internally until they know that no more data is available. In preparation for limiting these retries, add an internal return code, IOU_REQUEUE, which can be used to inform the poll backend about the handler wanting to retry, but that this should happen through a normal task_work requeue rather than keep hammering on the issue side for this one request. No functional changes in this patch, nobody is using this return code just yet. Signed-off-by: Jens Axboe --- io_uring/io_uring.h | 8 +++++++- io_uring/poll.c | 9 ++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 04e33f25919c..d5495710c178 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -15,11 +15,17 @@ #include #endif - enum { IOU_OK = 0, IOU_ISSUE_SKIP_COMPLETE = -EIOCBQUEUED, + /* + * Requeue the task_work to restart operations on this request. The + * actual value isn't important, should just be not an otherwise + * valid error code, yet less than -MAX_ERRNO and valid internally. + */ + IOU_REQUEUE = -3072, + /* * Intended only when both IO_URING_F_MULTISHOT is passed * to indicate to the poll runner that multishot should be diff --git a/io_uring/poll.c b/io_uring/poll.c index 785a5b191003..7513afc7b702 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -226,6 +226,7 @@ enum { IOU_POLL_NO_ACTION = 1, IOU_POLL_REMOVE_POLL_USE_RES = 2, IOU_POLL_REISSUE = 3, + IOU_POLL_REQUEUE = 4, }; static void __io_poll_execute(struct io_kiocb *req, int mask) @@ -329,6 +330,8 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts) int ret = io_poll_issue(req, ts); if (ret == IOU_STOP_MULTISHOT) return IOU_POLL_REMOVE_POLL_USE_RES; + else if (ret == IOU_REQUEUE) + return IOU_POLL_REQUEUE; if (ret < 0) return ret; } @@ -351,8 +354,12 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts) int ret; ret = io_poll_check_events(req, ts); - if (ret == IOU_POLL_NO_ACTION) + if (ret == IOU_POLL_NO_ACTION) { return; + } else if (ret == IOU_POLL_REQUEUE) { + __io_poll_execute(req, 0); + return; + } io_poll_remove_entries(req); io_poll_tw_hash_eject(req, ts); From 76b367a2d83163cf19173d5cb0b562acbabc8eac Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Jan 2024 12:00:58 -0700 Subject: [PATCH 549/768] io_uring/net: limit inline multishot retries If we have multiple clients and some/all are flooding the receives to such an extent that we can retry a LOT handling multishot receives, then we can be starving some clients and hence serving traffic in an imbalanced fashion. Limit multishot retry attempts to some arbitrary value, whose only purpose serves to ensure that we don't keep serving a single connection for way too long. We default to 32 retries, which should be more than enough to provide fairness, yet not so small that we'll spend too much time requeuing rather than handling traffic. Cc: stable@vger.kernel.org Depends-on: 704ea888d646 ("io_uring/poll: add requeue return code from poll multishot handling") Depends-on: 1e5d765a82f ("io_uring/net: un-indent mshot retry path in io_recv_finish()") Depends-on: e84b01a880f6 ("io_uring/poll: move poll execution helpers higher up") Fixes: b3fdea6ecb55 ("io_uring: multishot recv") Fixes: 9bb66906f23e ("io_uring: support multishot in recvmsg") Link: https://github.com/axboe/liburing/issues/1043 Signed-off-by: Jens Axboe --- io_uring/net.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 740c6bfa5b59..a12ff69e6843 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -60,6 +60,7 @@ struct io_sr_msg { unsigned len; unsigned done_io; unsigned msg_flags; + unsigned nr_multishot_loops; u16 flags; /* initialised and used only by !msg send variants */ u16 addr_len; @@ -70,6 +71,13 @@ struct io_sr_msg { struct io_kiocb *notif; }; +/* + * Number of times we'll try and do receives if there's more data. If we + * exceed this limit, then add us to the back of the queue and retry from + * there. This helps fairness between flooding clients. + */ +#define MULTISHOT_MAX_RETRY 32 + static inline bool io_check_multishot(struct io_kiocb *req, unsigned int issue_flags) { @@ -611,6 +619,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) sr->msg_flags |= MSG_CMSG_COMPAT; #endif sr->done_io = 0; + sr->nr_multishot_loops = 0; return 0; } @@ -654,12 +663,20 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, */ if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER, *ret, cflags | IORING_CQE_F_MORE)) { + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE; + io_recv_prep_retry(req); /* Known not-empty or unknown state, retry */ - if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq == -1) - return false; + if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq == -1) { + if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY) + return false; + /* mshot retries exceeded, force a requeue */ + sr->nr_multishot_loops = 0; + mshot_retry_ret = IOU_REQUEUE; + } if (issue_flags & IO_URING_F_MULTISHOT) - *ret = IOU_ISSUE_SKIP_COMPLETE; + *ret = mshot_retry_ret; else *ret = -EAGAIN; return true; From c44fc98f0a8ffd94fa0bd291928e7e312ffc7ca4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= Date: Fri, 26 Jan 2024 11:49:35 +0100 Subject: [PATCH 550/768] net: dsa: qca8k: fix illegal usage of GPIO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When working with GPIO, its direction must be set either when the GPIO is requested by gpiod_get*() or later on by one of the gpiod_direction_*() functions. Neither of this is done here which results in undefined behavior on some systems. As the reset GPIO is used right after it is requested here, it makes sense to configure it as GPIOD_OUT_HIGH right away. With that, the following gpiod_set_value_cansleep(1) becomes redundant and can be safely removed. Fixes: a653f2f538f9 ("net: dsa: qca8k: introduce reset via gpio feature") Signed-off-by: Michal Vokáč Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/1706266175-3408-1-git-send-email-michal.vokac@ysoft.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/qca/qca8k-8xxx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index c51f40960961..7a864329cb72 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -2051,12 +2051,11 @@ qca8k_sw_probe(struct mdio_device *mdiodev) priv->info = of_device_get_match_data(priv->dev); priv->reset_gpio = devm_gpiod_get_optional(priv->dev, "reset", - GPIOD_ASIS); + GPIOD_OUT_HIGH); if (IS_ERR(priv->reset_gpio)) return PTR_ERR(priv->reset_gpio); if (priv->reset_gpio) { - gpiod_set_value_cansleep(priv->reset_gpio, 1); /* The active low duration must be greater than 10 ms * and checkpatch.pl wants 20 ms. */ From 59c93583491ab15db109f9902524d241c4fa4c0b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 26 Jan 2024 12:13:08 -0800 Subject: [PATCH 551/768] selftests: net: add missing config for nftables-backed iptables Modern OSes use iptables implementation with nf_tables as a backend, e.g.: $ iptables -V iptables v1.8.8 (nf_tables) Pablo points out that we need CONFIG_NFT_COMPAT to make that work, otherwise we see a lot of: Warning: Extension DNAT revision 0 not supported, missing kernel module? with DNAT being just an example here, other modules we need include udp, TTL, length etc. Link: https://lore.kernel.org/r/20240126201308.2903602-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 56da5d52674c..d4b38177ed04 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -60,6 +60,7 @@ CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y +CONFIG_NFT_COMPAT=m CONFIG_NF_FLOW_TABLE=m CONFIG_PSAMPLE=m CONFIG_TCP_MD5SIG=y From 881f78f472556ed05588172d5b5676b48dc48240 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 29 Jan 2024 20:27:23 -0800 Subject: [PATCH 552/768] xfs: remove conditional building of rt geometry validator functions I mistakenly turned off CONFIG_XFS_RT in the Kconfig file for arm64 variant of the djwong-wtf git branch. Unfortunately, it took me a good hour to figure out that RT wasn't built because this is what got printed to dmesg: XFS (sda2): realtime geometry sanity check failed XFS (sda2): Metadata corruption detected at xfs_sb_read_verify+0x170/0x190 [xfs], xfs_sb block 0x0 Whereas I would have expected: XFS (sda2): Not built with CONFIG_XFS_RT XFS (sda2): RT mount failed The root cause of these problems is the conditional compilation of the new functions xfs_validate_rtextents and xfs_compute_rextslog that I introduced in the two commits listed below. The !RT versions of these functions return false and 0, respectively, which causes primary superblock validation to fail, which explains the first message. Move the two functions to other parts of libxfs that are not conditionally defined by CONFIG_XFS_RT and remove the broken stubs so that validation works again. Fixes: e14293803f4e ("xfs: don't allow overly small or large realtime volumes") Fixes: a6a38f309afc ("xfs: make rextslog computation consistent with mkfs") Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/libxfs/xfs_rtbitmap.c | 14 -------------- fs/xfs/libxfs/xfs_rtbitmap.h | 16 ---------------- fs/xfs/libxfs/xfs_sb.c | 14 ++++++++++++++ fs/xfs/libxfs/xfs_sb.h | 2 ++ fs/xfs/libxfs/xfs_types.h | 12 ++++++++++++ fs/xfs/scrub/rtbitmap.c | 1 + fs/xfs/scrub/rtsummary.c | 1 + 7 files changed, 30 insertions(+), 30 deletions(-) diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 31100120b2c5..e31663cb7b43 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -1118,20 +1118,6 @@ xfs_rtbitmap_blockcount( return howmany_64(rtextents, NBBY * mp->m_sb.sb_blocksize); } -/* - * Compute the maximum level number of the realtime summary file, as defined by - * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct - * use of rt volumes with more than 2^32 extents. - */ -uint8_t -xfs_compute_rextslog( - xfs_rtbxlen_t rtextents) -{ - if (!rtextents) - return 0; - return xfs_highbit64(rtextents); -} - /* * Compute the number of rtbitmap words needed to populate every block of a * bitmap that is large enough to track the given number of rt extents. diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h index 274dc7dae1fa..152a66750af5 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.h +++ b/fs/xfs/libxfs/xfs_rtbitmap.h @@ -351,20 +351,6 @@ xfs_rtfree_extent( int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno, xfs_filblks_t rtlen); -uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); - -/* Do we support an rt volume having this number of rtextents? */ -static inline bool -xfs_validate_rtextents( - xfs_rtbxlen_t rtextents) -{ - /* No runt rt volumes */ - if (rtextents == 0) - return false; - - return true; -} - xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents); unsigned long long xfs_rtbitmap_wordcount(struct xfs_mount *mp, @@ -383,8 +369,6 @@ unsigned long long xfs_rtsummary_wordcount(struct xfs_mount *mp, # define xfs_rtsummary_read_buf(a,b) (-ENOSYS) # define xfs_rtbuf_cache_relse(a) (0) # define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS) -# define xfs_compute_rextslog(rtx) (0) -# define xfs_validate_rtextents(rtx) (false) static inline xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents) { diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 4a9e8588f4c9..5bb6e2bd6dee 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -1377,3 +1377,17 @@ xfs_validate_stripe_geometry( } return true; } + +/* + * Compute the maximum level number of the realtime summary file, as defined by + * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct + * use of rt volumes with more than 2^32 extents. + */ +uint8_t +xfs_compute_rextslog( + xfs_rtbxlen_t rtextents) +{ + if (!rtextents) + return 0; + return xfs_highbit64(rtextents); +} diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 19134b23c10b..2e8e8d63d4eb 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -38,4 +38,6 @@ extern int xfs_sb_get_secondary(struct xfs_mount *mp, extern bool xfs_validate_stripe_geometry(struct xfs_mount *mp, __s64 sunit, __s64 swidth, int sectorsize, bool silent); +uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); + #endif /* __XFS_SB_H__ */ diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 20b5375f2d9c..62e02d5380ad 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -251,4 +251,16 @@ bool xfs_verify_fileoff(struct xfs_mount *mp, xfs_fileoff_t off); bool xfs_verify_fileext(struct xfs_mount *mp, xfs_fileoff_t off, xfs_fileoff_t len); +/* Do we support an rt volume having this number of rtextents? */ +static inline bool +xfs_validate_rtextents( + xfs_rtbxlen_t rtextents) +{ + /* No runt rt volumes */ + if (rtextents == 0) + return false; + + return true; +} + #endif /* __XFS_TYPES_H__ */ diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index 441ca9977652..46583517377f 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -15,6 +15,7 @@ #include "xfs_inode.h" #include "xfs_bmap.h" #include "xfs_bit.h" +#include "xfs_sb.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/repair.h" diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c index fabd0ed9dfa6..b1ff4f33324a 100644 --- a/fs/xfs/scrub/rtsummary.c +++ b/fs/xfs/scrub/rtsummary.c @@ -16,6 +16,7 @@ #include "xfs_rtbitmap.h" #include "xfs_bit.h" #include "xfs_bmap.h" +#include "xfs_sb.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" From 60365049ccbacd101654a66ddcb299abfabd4fc5 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 26 Jan 2024 09:32:20 +0100 Subject: [PATCH 553/768] ipv6: Ensure natural alignment of const ipv6 loopback and router addresses On a parisc64 kernel I sometimes notice this kernel warning: Kernel unaligned access to 0x40ff8814 at ndisc_send_skb+0xc0/0x4d8 The address 0x40ff8814 points to the in6addr_linklocal_allrouters variable and the warning simply means that some ipv6 function tries to read a 64-bit word directly from the not-64-bit aligned in6addr_linklocal_allrouters variable. Unaligned accesses are non-critical as the architecture or exception handlers usually will fix it up at runtime. Nevertheless it may trigger a performance penality for some architectures. For details read the "unaligned-memory-access" kernel documentation. The patch below ensures that the ipv6 loopback and router addresses will always be naturally aligned. This prevents the unaligned accesses for all architectures. Signed-off-by: Helge Deller Fixes: 034dfc5df99eb ("ipv6: export in6addr_loopback to modules") Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/ZbNuFM1bFqoH-UoY@p100 Signed-off-by: Paolo Abeni --- net/ipv6/addrconf_core.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 507a8353a6bd..c008d21925d7 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -220,19 +220,26 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { EXPORT_SYMBOL_GPL(ipv6_stub); /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ -const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; +const struct in6_addr in6addr_loopback __aligned(BITS_PER_LONG/8) + = IN6ADDR_LOOPBACK_INIT; EXPORT_SYMBOL(in6addr_loopback); -const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; +const struct in6_addr in6addr_any __aligned(BITS_PER_LONG/8) + = IN6ADDR_ANY_INIT; EXPORT_SYMBOL(in6addr_any); -const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_linklocal_allnodes __aligned(BITS_PER_LONG/8) + = IN6ADDR_LINKLOCAL_ALLNODES_INIT; EXPORT_SYMBOL(in6addr_linklocal_allnodes); -const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_linklocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_linklocal_allrouters); -const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_interfacelocal_allnodes __aligned(BITS_PER_LONG/8) + = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; EXPORT_SYMBOL(in6addr_interfacelocal_allnodes); -const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_interfacelocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_interfacelocal_allrouters); -const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_sitelocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_sitelocal_allrouters); static void snmp6_free_dev(struct inet6_dev *idev) From 2fa28abd1090562b4d9bc4aedd70abcca26561af Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 23 Jan 2024 14:30:54 +0100 Subject: [PATCH 554/768] arm64: Revert "scs: Work around full LTO issue with dynamic SCS" This reverts commit 8c5a19cb17a71e ("arm64: scs: Work around full LTO issue with dynamic SCS"), which did not quite fix the issue as intended. Apparently, -fno-unwind-tables is ignored for the final full LTO link when it is set on any of the objects, resulting in an early boot crash due to the SCS patching code patching itself, and attempting to pop the return address from the shadow stack while the associated push was still a PACIASP instruction when it executed. Reported-by: Sami Tolvanen Signed-off-by: Ard Biesheuvel Reviewed-by: Kees Cook Reviewed-by: Sami Tolvanen Tested-by: Sami Tolvanen Link: https://lore.kernel.org/r/20240123133052.1417449-5-ardb+git@google.com Signed-off-by: Will Deacon --- arch/arm64/kernel/Makefile | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index e5d03a7039b4..d95b3d6b471a 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -73,13 +73,7 @@ obj-$(CONFIG_ARM64_MTE) += mte.o obj-y += vdso-wrap.o obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.o - -# We need to prevent the SCS patching code from patching itself. Using -# -mbranch-protection=none here to avoid the patchable PAC opcodes from being -# generated triggers an issue with full LTO on Clang, which stops emitting PAC -# instructions altogether. So instead, omit the unwind tables used by the -# patching code, so it will not be able to locate its own PAC instructions. -CFLAGS_patch-scs.o += -fno-asynchronous-unwind-tables -fno-unwind-tables +CFLAGS_patch-scs.o += -mbranch-protection=none # Force dependency (vdso*-wrap.S includes vdso.so through incbin) $(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so From d104a6fef3fec137d8d44961224ab76edbd6cbc7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 23 Jan 2024 14:30:55 +0100 Subject: [PATCH 555/768] arm64: scs: Disable LTO for SCS patching code Full LTO takes the '-mbranch-protection=none' passed to the compiler when generating the dynamic shadow call stack patching code as a hint to stop emitting PAC instructions altogether. (Thin LTO appears unaffected by this) Work around this by disabling LTO for the compilation unit, which appears to convince the linker that it should still use PAC in the rest of the kernel.. Fixes: 3b619e22c460 ("arm64: implement dynamic shadow call stack for Clang") Signed-off-by: Ard Biesheuvel Reviewed-by: Kees Cook Reviewed-by: Sami Tolvanen Tested-by: Sami Tolvanen Link: https://lore.kernel.org/r/20240123133052.1417449-6-ardb+git@google.com Signed-off-by: Will Deacon --- arch/arm64/kernel/Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index d95b3d6b471a..467cb7117273 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -73,7 +73,13 @@ obj-$(CONFIG_ARM64_MTE) += mte.o obj-y += vdso-wrap.o obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.o + +# We need to prevent the SCS patching code from patching itself. Using +# -mbranch-protection=none here to avoid the patchable PAC opcodes from being +# generated triggers an issue with full LTO on Clang, which stops emitting PAC +# instructions altogether. So disable LTO as well for the compilation unit. CFLAGS_patch-scs.o += -mbranch-protection=none +CFLAGS_REMOVE_patch-scs.o += $(CC_FLAGS_LTO) # Force dependency (vdso*-wrap.S includes vdso.so through incbin) $(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so From 4896bb7c0b31a0a3379b290ea7729900c59e0c69 Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Fri, 26 Jan 2024 10:10:41 +0100 Subject: [PATCH 556/768] net: stmmac: do not clear TBS enable bit on link up/down With the dma conf being reallocated on each call to stmmac_open(), any information in there is lost, unless we specifically handle it. The STMMAC_TBS_EN bit is set when adding an etf qdisc, and the etf qdisc therefore would stop working when link was set down and then back up. Fixes: ba39b344e924 ("net: ethernet: stmicro: stmmac: generate stmmac dma conf before open") Cc: stable@vger.kernel.org Signed-off-by: Esben Haabendal Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b334eb16da23..25519952f754 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3932,6 +3932,9 @@ static int __stmmac_open(struct net_device *dev, priv->rx_copybreak = STMMAC_RX_COPYBREAK; buf_sz = dma_conf->dma_buf_sz; + for (int i = 0; i < MTL_MAX_TX_QUEUES; i++) + if (priv->dma_conf.tx_queue[i].tbs & STMMAC_TBS_EN) + dma_conf->tx_queue[i].tbs = priv->dma_conf.tx_queue[i].tbs; memcpy(&priv->dma_conf, dma_conf, sizeof(*dma_conf)); stmmac_reset_queues_param(priv); From 3b12ec8f618ebaccfe43ea4621a6f5fb586edef8 Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Fri, 26 Jan 2024 10:10:42 +0100 Subject: [PATCH 557/768] net: stmmac: dwmac-imx: set TSO/TBS TX queues default settings TSO and TBS cannot coexist. For now we set i.MX Ethernet QOS controller to use the first TX queue with TSO and the rest for TBS. TX queues with TBS can support etf qdisc hw offload. Signed-off-by: Esben Haabendal Reviewed-by: Kurt Kanzenbach Reviewed-by: Vadim Fedorenko Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index 8f730ada71f9..6b65420e11b5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -353,6 +353,10 @@ static int imx_dwmac_probe(struct platform_device *pdev) if (data->flags & STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY) plat_dat->flags |= STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY; + /* Default TX Q0 to use TSO and rest TXQ for TBS */ + for (int i = 1; i < plat_dat->tx_queues_to_use; i++) + plat_dat->tx_queues_cfg[i].tbs_en = 1; + plat_dat->host_dma_width = dwmac->ops->addr_width; plat_dat->init = imx_dwmac_init; plat_dat->exit = imx_dwmac_exit; From c7767f5c43df2c453af4651d1f58f489e3eb4ac1 Mon Sep 17 00:00:00 2001 From: Kevin Brodsky Date: Mon, 29 Jan 2024 15:47:48 +0000 Subject: [PATCH 558/768] arm64: vdso32: Remove unused vdso32-offsets.h Commit 2d071968a405 ("arm64: compat: Remove 32-bit sigreturn code from the vDSO") removed all VDSO_* symbols in the compat vDSO. As a result, vdso32-offsets.h is now empty and therefore unused. Time to remove it. Signed-off-by: Kevin Brodsky Link: https://lore.kernel.org/r/20240129154748.1727759-1-kevin.brodsky@arm.com Signed-off-by: Will Deacon --- arch/arm64/Makefile | 2 +- arch/arm64/include/asm/vdso.h | 3 --- arch/arm64/kernel/vdso32/Makefile | 9 --------- 3 files changed, 1 insertion(+), 13 deletions(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 47ecc4cff9d2..a88cdf910687 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -195,7 +195,7 @@ vdso_prepare: prepare0 include/generated/vdso-offsets.h arch/arm64/kernel/vdso/vdso.so ifdef CONFIG_COMPAT_VDSO $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 \ - include/generated/vdso32-offsets.h arch/arm64/kernel/vdso32/vdso.so + arch/arm64/kernel/vdso32/vdso.so endif endif diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h index b4ae32109932..4305995c8f82 100644 --- a/arch/arm64/include/asm/vdso.h +++ b/arch/arm64/include/asm/vdso.h @@ -17,9 +17,6 @@ #ifndef __ASSEMBLY__ #include -#ifdef CONFIG_COMPAT_VDSO -#include -#endif #define VDSO_SYMBOL(base, name) \ ({ \ diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 2266fcdff78a..f5f80fdce0fe 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -127,9 +127,6 @@ obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) targets += vdso.lds CPPFLAGS_vdso.lds += -P -C -U$(ARCH) -include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE - $(call if_changed,vdsosym) - # Strip rule for vdso.so $(obj)/vdso.so: OBJCOPYFLAGS := -S $(obj)/vdso.so: $(obj)/vdso32.so.dbg FORCE @@ -166,9 +163,3 @@ quiet_cmd_vdsoas = AS32 $@ quiet_cmd_vdsomunge = MUNGE $@ cmd_vdsomunge = $(obj)/$(munge) $< $@ - -# Generate vDSO offsets using helper script (borrowed from the 64-bit vDSO) -gen-vdsosym := $(srctree)/$(src)/../vdso/gen_vdso_offsets.sh -quiet_cmd_vdsosym = VDSOSYM $@ -# The AArch64 nm should be able to read an AArch32 binary - cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ From aa2b2eb3934859904c287bf5434647ba72e14c1c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 26 Jan 2024 16:55:32 +0000 Subject: [PATCH 559/768] llc: call sock_orphan() at release time syzbot reported an interesting trace [1] caused by a stale sk->sk_wq pointer in a closed llc socket. In commit ff7b11aa481f ("net: socket: set sock->sk to NULL after calling proto_ops::release()") Eric Biggers hinted that some protocols are missing a sock_orphan(), we need to perform a full audit. In net-next, I plan to clear sock->sk from sock_orphan() and amend Eric patch to add a warning. [1] BUG: KASAN: slab-use-after-free in list_empty include/linux/list.h:373 [inline] BUG: KASAN: slab-use-after-free in waitqueue_active include/linux/wait.h:127 [inline] BUG: KASAN: slab-use-after-free in sock_def_write_space_wfree net/core/sock.c:3384 [inline] BUG: KASAN: slab-use-after-free in sock_wfree+0x9a8/0x9d0 net/core/sock.c:2468 Read of size 8 at addr ffff88802f4fc880 by task ksoftirqd/1/27 CPU: 1 PID: 27 Comm: ksoftirqd/1 Not tainted 6.8.0-rc1-syzkaller-00049-g6098d87eaf31 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:377 [inline] print_report+0xc4/0x620 mm/kasan/report.c:488 kasan_report+0xda/0x110 mm/kasan/report.c:601 list_empty include/linux/list.h:373 [inline] waitqueue_active include/linux/wait.h:127 [inline] sock_def_write_space_wfree net/core/sock.c:3384 [inline] sock_wfree+0x9a8/0x9d0 net/core/sock.c:2468 skb_release_head_state+0xa3/0x2b0 net/core/skbuff.c:1080 skb_release_all net/core/skbuff.c:1092 [inline] napi_consume_skb+0x119/0x2b0 net/core/skbuff.c:1404 e1000_unmap_and_free_tx_resource+0x144/0x200 drivers/net/ethernet/intel/e1000/e1000_main.c:1970 e1000_clean_tx_irq drivers/net/ethernet/intel/e1000/e1000_main.c:3860 [inline] e1000_clean+0x4a1/0x26e0 drivers/net/ethernet/intel/e1000/e1000_main.c:3801 __napi_poll.constprop.0+0xb4/0x540 net/core/dev.c:6576 napi_poll net/core/dev.c:6645 [inline] net_rx_action+0x956/0xe90 net/core/dev.c:6778 __do_softirq+0x21a/0x8de kernel/softirq.c:553 run_ksoftirqd kernel/softirq.c:921 [inline] run_ksoftirqd+0x31/0x60 kernel/softirq.c:913 smpboot_thread_fn+0x660/0xa10 kernel/smpboot.c:164 kthread+0x2c6/0x3a0 kernel/kthread.c:388 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242 Allocated by task 5167: kasan_save_stack+0x33/0x50 mm/kasan/common.c:47 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 unpoison_slab_object mm/kasan/common.c:314 [inline] __kasan_slab_alloc+0x81/0x90 mm/kasan/common.c:340 kasan_slab_alloc include/linux/kasan.h:201 [inline] slab_post_alloc_hook mm/slub.c:3813 [inline] slab_alloc_node mm/slub.c:3860 [inline] kmem_cache_alloc_lru+0x142/0x6f0 mm/slub.c:3879 alloc_inode_sb include/linux/fs.h:3019 [inline] sock_alloc_inode+0x25/0x1c0 net/socket.c:308 alloc_inode+0x5d/0x220 fs/inode.c:260 new_inode_pseudo+0x16/0x80 fs/inode.c:1005 sock_alloc+0x40/0x270 net/socket.c:634 __sock_create+0xbc/0x800 net/socket.c:1535 sock_create net/socket.c:1622 [inline] __sys_socket_create net/socket.c:1659 [inline] __sys_socket+0x14c/0x260 net/socket.c:1706 __do_sys_socket net/socket.c:1720 [inline] __se_sys_socket net/socket.c:1718 [inline] __x64_sys_socket+0x72/0xb0 net/socket.c:1718 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xd3/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Freed by task 0: kasan_save_stack+0x33/0x50 mm/kasan/common.c:47 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 kasan_save_free_info+0x3f/0x60 mm/kasan/generic.c:640 poison_slab_object mm/kasan/common.c:241 [inline] __kasan_slab_free+0x121/0x1b0 mm/kasan/common.c:257 kasan_slab_free include/linux/kasan.h:184 [inline] slab_free_hook mm/slub.c:2121 [inline] slab_free mm/slub.c:4299 [inline] kmem_cache_free+0x129/0x350 mm/slub.c:4363 i_callback+0x43/0x70 fs/inode.c:249 rcu_do_batch kernel/rcu/tree.c:2158 [inline] rcu_core+0x819/0x1680 kernel/rcu/tree.c:2433 __do_softirq+0x21a/0x8de kernel/softirq.c:553 Last potentially related work creation: kasan_save_stack+0x33/0x50 mm/kasan/common.c:47 __kasan_record_aux_stack+0xba/0x100 mm/kasan/generic.c:586 __call_rcu_common.constprop.0+0x9a/0x7b0 kernel/rcu/tree.c:2683 destroy_inode+0x129/0x1b0 fs/inode.c:315 iput_final fs/inode.c:1739 [inline] iput.part.0+0x560/0x7b0 fs/inode.c:1765 iput+0x5c/0x80 fs/inode.c:1755 dentry_unlink_inode+0x292/0x430 fs/dcache.c:400 __dentry_kill+0x1ca/0x5f0 fs/dcache.c:603 dput.part.0+0x4ac/0x9a0 fs/dcache.c:845 dput+0x1f/0x30 fs/dcache.c:835 __fput+0x3b9/0xb70 fs/file_table.c:384 task_work_run+0x14d/0x240 kernel/task_work.c:180 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0xa8a/0x2ad0 kernel/exit.c:871 do_group_exit+0xd4/0x2a0 kernel/exit.c:1020 __do_sys_exit_group kernel/exit.c:1031 [inline] __se_sys_exit_group kernel/exit.c:1029 [inline] __x64_sys_exit_group+0x3e/0x50 kernel/exit.c:1029 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xd3/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b The buggy address belongs to the object at ffff88802f4fc800 which belongs to the cache sock_inode_cache of size 1408 The buggy address is located 128 bytes inside of freed 1408-byte region [ffff88802f4fc800, ffff88802f4fcd80) The buggy address belongs to the physical page: page:ffffea0000bd3e00 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x2f4f8 head:ffffea0000bd3e00 order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0 anon flags: 0xfff00000000840(slab|head|node=0|zone=1|lastcpupid=0x7ff) page_type: 0xffffffff() raw: 00fff00000000840 ffff888013b06b40 0000000000000000 0000000000000001 raw: 0000000000000000 0000000080150015 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 3, migratetype Reclaimable, gfp_mask 0xd20d0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_RECLAIMABLE), pid 4956, tgid 4956 (sshd), ts 31423924727, free_ts 0 set_page_owner include/linux/page_owner.h:31 [inline] post_alloc_hook+0x2d0/0x350 mm/page_alloc.c:1533 prep_new_page mm/page_alloc.c:1540 [inline] get_page_from_freelist+0xa28/0x3780 mm/page_alloc.c:3311 __alloc_pages+0x22f/0x2440 mm/page_alloc.c:4567 __alloc_pages_node include/linux/gfp.h:238 [inline] alloc_pages_node include/linux/gfp.h:261 [inline] alloc_slab_page mm/slub.c:2190 [inline] allocate_slab mm/slub.c:2354 [inline] new_slab+0xcc/0x3a0 mm/slub.c:2407 ___slab_alloc+0x4af/0x19a0 mm/slub.c:3540 __slab_alloc.constprop.0+0x56/0xa0 mm/slub.c:3625 __slab_alloc_node mm/slub.c:3678 [inline] slab_alloc_node mm/slub.c:3850 [inline] kmem_cache_alloc_lru+0x379/0x6f0 mm/slub.c:3879 alloc_inode_sb include/linux/fs.h:3019 [inline] sock_alloc_inode+0x25/0x1c0 net/socket.c:308 alloc_inode+0x5d/0x220 fs/inode.c:260 new_inode_pseudo+0x16/0x80 fs/inode.c:1005 sock_alloc+0x40/0x270 net/socket.c:634 __sock_create+0xbc/0x800 net/socket.c:1535 sock_create net/socket.c:1622 [inline] __sys_socket_create net/socket.c:1659 [inline] __sys_socket+0x14c/0x260 net/socket.c:1706 __do_sys_socket net/socket.c:1720 [inline] __se_sys_socket net/socket.c:1718 [inline] __x64_sys_socket+0x72/0xb0 net/socket.c:1718 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xd3/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b page_owner free stack trace missing Memory state around the buggy address: ffff88802f4fc780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88802f4fc800: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88802f4fc880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88802f4fc900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88802f4fc980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 43815482370c ("net: sock_def_readable() and friends RCU conversion") Reported-and-tested-by: syzbot+32b89eaa102b372ff76d@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Cc: Eric Biggers Cc: Kuniyuki Iwashima Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240126165532.3396702-1-edumazet@google.com Signed-off-by: Paolo Abeni --- net/llc/af_llc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 20551cfb7da6..fde1140d899e 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -226,6 +226,8 @@ static int llc_ui_release(struct socket *sock) } netdev_put(llc->dev, &llc->dev_tracker); sock_put(sk); + sock_orphan(sk); + sock->sk = NULL; llc_sk_free(sk); out: return 0; From c16dfab33f99fc3ff43d48253bc2784ccb84c1de Mon Sep 17 00:00:00 2001 From: Kenzo Gomez Date: Sat, 27 Jan 2024 17:46:21 +0100 Subject: [PATCH 560/768] ALSA: hda: cs35l41: Support additional ASUS Zenbook UX3402VA Add new model entry into configuration table. Signed-off-by: Kenzo Gomez Link: https://lore.kernel.org/r/20240127164621.26431-1-kenzo.sgomez@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l41_hda_property.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_property.c index 35277ce890a4..59504852adc6 100644 --- a/sound/pci/hda/cs35l41_hda_property.c +++ b/sound/pci/hda/cs35l41_hda_property.c @@ -76,6 +76,7 @@ static const struct cs35l41_config cs35l41_config_table[] = { { "10431533", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, { "10431573", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 }, { "10431663", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 1000, 4500, 24 }, + { "104316A3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, { "104316D3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, { "104316F3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, { "104317F3", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, @@ -410,6 +411,7 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = { { "CSC3551", "10431533", generic_dsd_config }, { "CSC3551", "10431573", generic_dsd_config }, { "CSC3551", "10431663", generic_dsd_config }, + { "CSC3551", "104316A3", generic_dsd_config }, { "CSC3551", "104316D3", generic_dsd_config }, { "CSC3551", "104316F3", generic_dsd_config }, { "CSC3551", "104317F3", generic_dsd_config }, From 15d6daad8f8add8ef99b6e4e2b5bf0db48e1a8db Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Jan 2024 10:09:18 -0300 Subject: [PATCH 561/768] tools headers x86 cpufeatures: Sync with the kernel sources to pick TDX, Zen, APIC MSR fence changes To pick the changes from: 1e536e10689700e0 ("x86/cpu: Detect TDX partial write machine check erratum") 765a0542fdc7aad7 ("x86/virt/tdx: Detect TDX during kernel boot") 30fa92832f405d5a ("x86/CPU/AMD: Add ZenX generations flags") 04c3024560d3a14a ("x86/barrier: Do not serialize MSR accesses on AMD") This causes these perf files to be rebuilt and brings some X86_FEATURE that will be used when updating the copies of tools/arch/x86/lib/mem{cpy,set}_64.S with the kernel sources: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And addresses this perf build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Cc: Adrian Hunter Cc: Borislav Petkov Cc: Dave Hansen Cc: Ian Rogers Cc: Jiri Olsa Cc: Kai Huang Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index f4542d2718f4..29cb275a219d 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -198,6 +198,7 @@ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ +#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* Platform supports being a TDX host */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ @@ -308,10 +309,14 @@ #define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ #define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ #define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */ - #define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ #define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ #define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ +#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */ +#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */ +#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */ +#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */ +#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ @@ -495,6 +500,7 @@ #define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ #define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ #define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */ +#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* CPU may incur #MC if non-TD software does partial write to TDX private memory */ /* BUG word 2 */ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ From be220d2e5544ff094142d263db5cf94d034b5e39 Mon Sep 17 00:00:00 2001 From: Chhayly Leang Date: Fri, 26 Jan 2024 15:09:12 +0700 Subject: [PATCH 562/768] ALSA: hda: cs35l41: Support ASUS Zenbook UM3402YAR Adds sound support for ASUS Zenbook UM3402YAR with missing DSD Signed-off-by: Chhayly Leang Link: https://lore.kernel.org/r/20240126080912.87422-1-clw.leang@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l41_hda_property.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_property.c index 59504852adc6..d74cf11eef1e 100644 --- a/sound/pci/hda/cs35l41_hda_property.c +++ b/sound/pci/hda/cs35l41_hda_property.c @@ -76,6 +76,7 @@ static const struct cs35l41_config cs35l41_config_table[] = { { "10431533", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, { "10431573", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 }, { "10431663", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 1000, 4500, 24 }, + { "10431683", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, { "104316A3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, { "104316D3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, { "104316F3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, @@ -411,6 +412,7 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = { { "CSC3551", "10431533", generic_dsd_config }, { "CSC3551", "10431573", generic_dsd_config }, { "CSC3551", "10431663", generic_dsd_config }, + { "CSC3551", "10431683", generic_dsd_config }, { "CSC3551", "104316A3", generic_dsd_config }, { "CSC3551", "104316D3", generic_dsd_config }, { "CSC3551", "104316F3", generic_dsd_config }, From f7c4cb4a3f77867612b45c6327f80eac58a8ce65 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Thu, 25 Jan 2024 22:35:19 +0000 Subject: [PATCH 563/768] ALSA: pcm: Add missing formats to formats list Add 4 missing formats to 'snd_pcm_format_names' array in order to be able to get their names with 'snd_pcm_format_name' function. Signed-off-by: Ivan Orlov Link: https://lore.kernel.org/r/20240125223522.1122765-1-ivan.orlov0322@gmail.com Signed-off-by: Takashi Iwai --- sound/core/pcm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/core/pcm.c b/sound/core/pcm.c index a09f0154e6a7..d0788126cbab 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -211,6 +211,10 @@ static const char * const snd_pcm_format_names[] = { FORMAT(DSD_U32_LE), FORMAT(DSD_U16_BE), FORMAT(DSD_U32_BE), + FORMAT(S20_LE), + FORMAT(S20_BE), + FORMAT(U20_LE), + FORMAT(U20_BE), }; /** From efe80f9c9063228136bc3824f7ac6b4ff2e273b4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Jan 2024 10:45:24 -0300 Subject: [PATCH 564/768] tools headers: Update the copy of x86's mem{cpy,set}_64.S used in 'perf bench' This is to get the changes from: 94ea9c05219518ef ("x86/headers: Replace #include with #include ") 10f4c9b9a33b7df0 ("x86/asm: Fix build of UML with KASAN") That addresses these perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Vincent Whitchurch Link: https://lore.kernel.org/lkml/ZbkIKpKdNqOFdMwJ@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/lib/memcpy_64.S | 4 ++-- tools/arch/x86/lib/memset_64.S | 4 ++-- tools/perf/util/include/linux/linkage.h | 4 ++++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index d055b82d22cc..59cf6f9065aa 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -1,11 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* Copyright 2002 Andi Kleen */ +#include #include #include #include #include -#include .section .noinstr.text, "ax" @@ -39,7 +39,7 @@ SYM_TYPED_FUNC_START(__memcpy) SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) -SYM_FUNC_ALIAS(memcpy, __memcpy) +SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy) EXPORT_SYMBOL(memcpy) SYM_FUNC_START_LOCAL(memcpy_orig) diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index 7c59a704c458..0199d56cb479 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -1,10 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright 2002 Andi Kleen, SuSE Labs */ +#include #include #include #include -#include .section .noinstr.text, "ax" @@ -40,7 +40,7 @@ SYM_FUNC_START(__memset) SYM_FUNC_END(__memset) EXPORT_SYMBOL(__memset) -SYM_FUNC_ALIAS(memset, __memset) +SYM_FUNC_ALIAS_MEMFUNC(memset, __memset) EXPORT_SYMBOL(memset) SYM_FUNC_START_LOCAL(memset_orig) diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h index 75e2248416f5..178b00205fe6 100644 --- a/tools/perf/util/include/linux/linkage.h +++ b/tools/perf/util/include/linux/linkage.h @@ -115,6 +115,10 @@ SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK) #endif +#ifndef SYM_FUNC_ALIAS_MEMFUNC +#define SYM_FUNC_ALIAS_MEMFUNC SYM_FUNC_ALIAS +#endif + // In the kernel sources (include/linux/cfi_types.h), this has a different // definition when CONFIG_CFI_CLANG is used, for tools/ just use the !clang // definition: From 7814fe24a6211a610db0b408d87420403b5b7a36 Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 24 Jan 2024 09:43:57 +0000 Subject: [PATCH 565/768] perf evlist: Fix evlist__new_default() for > 1 core PMU The 'Session topology' test currently fails with this message when evlist__new_default() opens more than one event: 32: Session topology : --- start --- templ file: /tmp/perf-test-vv5YzZ Using CPUID 0x00000000410fd070 Opening: unknown-hardware:HG ------------------------------------------------------------ perf_event_attr: type 0 (PERF_TYPE_HARDWARE) config 0xb00000000 disabled 1 ------------------------------------------------------------ sys_perf_event_open: pid 0 cpu -1 group_fd -1 flags 0x8 = 4 Opening: unknown-hardware:HG ------------------------------------------------------------ perf_event_attr: type 0 (PERF_TYPE_HARDWARE) config 0xa00000000 disabled 1 ------------------------------------------------------------ sys_perf_event_open: pid 0 cpu -1 group_fd -1 flags 0x8 = 5 non matching sample_type FAILED tests/topology.c:73 can't get session ---- end ---- Session topology: FAILED! This is because when re-opening the file and parsing the header, Perf expects that any file that has more than one event has the sample ID flag set. Perf record already sets the flag in a similar way when there is more than one event, so add the same logic to evlist__new_default(). evlist__new_default() is only currently used in tests, so I don't expect this change to have any other side effects. The other tests that use it don't save and re-open the file so don't hit this issue. The session topology test has been failing on Arm big.LITTLE platforms since commit 251aa040244a3b17 ("perf parse-events: Wildcard most "numeric" events") when evlist__new_default() started opening multiple events for 'cycles'. Fixes: 251aa040244a3b17 ("perf parse-events: Wildcard most "numeric" events") Reviewed-by: Ian Rogers Signed-off-by: James Clark [ This was failing as well on a Rocket Lake Refresh/14700k Intel hybrid system - Arnaldo ] Tested-by: Arnaldo Carvalho de Melo Tested-by: Ian Rogers Tested-by: Kan Liang Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Changbin Du Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Yang Jihong Closes: https://lore.kernel.org/lkml/CAP-5=fWVQ-7ijjK3-w1q+k2WYVNHbAcejb-xY0ptbjRw476VKA@mail.gmail.com/ Link: https://lore.kernel.org/r/20240124094358.489372-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 95f25e9fb994..55a300a0977b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -103,7 +103,14 @@ struct evlist *evlist__new_default(void) err = parse_event(evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu"); if (err) { evlist__delete(evlist); - evlist = NULL; + return NULL; + } + + if (evlist->core.nr_entries > 1) { + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) + evsel__set_sample_id(evsel, /*can_sample_identifier=*/false); } return evlist; From 1f8c43b09ec6906079ac1f02e2b0a381c6f48c6c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Jan 2024 11:44:00 -0300 Subject: [PATCH 566/768] tools include UAPI: Sync linux/mount.h copy with the kernel sources To pick the changes from: 35e27a5744131996 ("fs: keep struct mnt_id_req extensible") b4c2bea8ceaa50cd ("add listmount(2) syscall") 46eae99ef73302f9 ("add statmount(2) syscall") That doesn't change anything in tools this time as nothing that is harvested by the beauty scripts got changed: $ ls -1 tools/perf/trace/beauty/*mount*sh tools/perf/trace/beauty/fsmount.sh tools/perf/trace/beauty/mount_flags.sh tools/perf/trace/beauty/move_mount_flags.sh $ This addresses this perf build warning. Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/mount.h include/uapi/linux/mount.h Cc: Adrian Hunter Cc: Christian Brauner Cc: Ian Rogers Cc: Jiri Olsa Cc: Miklos Szeredi Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZbkMiB7ZcOsLP2V5@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/mount.h | 70 ++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h index bb242fdcfe6b..ad5478dbad00 100644 --- a/tools/include/uapi/linux/mount.h +++ b/tools/include/uapi/linux/mount.h @@ -138,4 +138,74 @@ struct mount_attr { /* List of all mount_attr versions. */ #define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */ + +/* + * Structure for getting mount/superblock/filesystem info with statmount(2). + * + * The interface is similar to statx(2): individual fields or groups can be + * selected with the @mask argument of statmount(). Kernel will set the @mask + * field according to the supported fields. + * + * If string fields are selected, then the caller needs to pass a buffer that + * has space after the fixed part of the structure. Nul terminated strings are + * copied there and offsets relative to @str are stored in the relevant fields. + * If the buffer is too small, then EOVERFLOW is returned. The actually used + * size is returned in @size. + */ +struct statmount { + __u32 size; /* Total size, including strings */ + __u32 __spare1; + __u64 mask; /* What results were written */ + __u32 sb_dev_major; /* Device ID */ + __u32 sb_dev_minor; + __u64 sb_magic; /* ..._SUPER_MAGIC */ + __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */ + __u32 fs_type; /* [str] Filesystem type */ + __u64 mnt_id; /* Unique ID of mount */ + __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */ + __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */ + __u32 mnt_parent_id_old; + __u64 mnt_attr; /* MOUNT_ATTR_... */ + __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */ + __u64 mnt_peer_group; /* ID of shared peer group */ + __u64 mnt_master; /* Mount receives propagation from this ID */ + __u64 propagate_from; /* Propagation from in current namespace */ + __u32 mnt_root; /* [str] Root of mount relative to root of fs */ + __u32 mnt_point; /* [str] Mountpoint relative to current root */ + __u64 __spare2[50]; + char str[]; /* Variable size part containing strings */ +}; + +/* + * Structure for passing mount ID and miscellaneous parameters to statmount(2) + * and listmount(2). + * + * For statmount(2) @param represents the request mask. + * For listmount(2) @param represents the last listed mount id (or zero). + */ +struct mnt_id_req { + __u32 size; + __u32 spare; + __u64 mnt_id; + __u64 param; +}; + +/* List of all mnt_id_req versions. */ +#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ + +/* + * @mask bits for statmount(2) + */ +#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */ +#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */ +#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */ +#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ +#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ +#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ + +/* + * Special @mnt_id values that can be passed to listmount + */ +#define LSMT_ROOT 0xffffffffffffffff /* root mount */ + #endif /* _UAPI_LINUX_MOUNT_H */ From 12b17b4eb82a41977eb848048137b5908d52845c Mon Sep 17 00:00:00 2001 From: Leonard Dallmayr Date: Fri, 5 Jan 2024 13:35:51 +0100 Subject: [PATCH 567/768] USB: serial: cp210x: add ID for IMST iM871A-USB The device IMST USB-Stick for Smart Meter is a rebranded IMST iM871A-USB Wireless M-Bus USB-adapter. It is used to read wireless water, gas and electricity meters. Signed-off-by: Leonard Dallmayr Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 1e61fe043171..923e0ed85444 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -146,6 +146,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ { USB_DEVICE(0x10C4, 0x8664) }, /* AC-Services CAN-IF */ { USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */ + { USB_DEVICE(0x10C4, 0x87ED) }, /* IMST USB-Stick for Smart Meter */ { USB_DEVICE(0x10C4, 0x8856) }, /* CEL EM357 ZigBee USB Stick - LR */ { USB_DEVICE(0x10C4, 0x8857) }, /* CEL EM357 ZigBee USB Stick */ { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */ From 129690fb229a20b6e563a77a2c85266acecf20bc Mon Sep 17 00:00:00 2001 From: JackBB Wu Date: Tue, 23 Jan 2024 17:39:48 +0800 Subject: [PATCH 568/768] USB: serial: qcserial: add new usb-id for Dell Wireless DW5826e Add support for Dell DW5826e with USB-id 0x413c:0x8217 & 0x413c:0x8218. It is 0x413c:0x8217 T: Bus=02 Lev=01 Prnt=01 Port=05 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=413c ProdID=8217 Rev= 5.04 S: Manufacturer=DELL S: Product=COMPAL Electronics EXM-G1A S: SerialNumber=359302940050401 C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=qcserial E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=qcserial E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=qcserial E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=87(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I:* If#= 8 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=88(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms It is 0x413c:0x8218 T: Bus=02 Lev=01 Prnt=01 Port=05 Cnt=01 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=413c ProdID=8218 Rev= 0.00 S: Manufacturer=DELL S: Product=COMPAL Electronics EXM-G1A S: SerialNumber=359302940050401 C:* #Ifs= 1 Cfg#= 1 Atr=a0 MxPwr= 2mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=qcserial E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: JackBB Wu Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/qcserial.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index b1e844bf31f8..703a9c563557 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -184,6 +184,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x413c, 0x81d0)}, /* Dell Wireless 5819 */ {DEVICE_SWI(0x413c, 0x81d1)}, /* Dell Wireless 5818 */ {DEVICE_SWI(0x413c, 0x81d2)}, /* Dell Wireless 5818 */ + {DEVICE_SWI(0x413c, 0x8217)}, /* Dell Wireless DW5826e */ + {DEVICE_SWI(0x413c, 0x8218)}, /* Dell Wireless DW5826e QDL */ /* Huawei devices */ {DEVICE_HWI(0x03f0, 0x581d)}, /* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */ From a3fa9838e8140584a6f338e8516f2b05d3bea812 Mon Sep 17 00:00:00 2001 From: Patrick Rudolph Date: Tue, 30 Jan 2024 20:32:56 +0530 Subject: [PATCH 569/768] regulator (max5970): Fix IRQ handler The max5970 datasheet gives the impression that IRQ status bits must be cleared by writing a one to set bits, as those are marked with 'R/C', however tests showed that a zero must be written. Fixes an IRQ storm as the interrupt handler actually clears the IRQ status bits. Signed-off-by: Patrick Rudolph Signed-off-by: Naresh Solanki Link: https://msgid.link/r/20240130150257.3643657-1-naresh.solanki@9elements.com Signed-off-by: Mark Brown --- drivers/regulator/max5970-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/max5970-regulator.c b/drivers/regulator/max5970-regulator.c index bc88a40a88d4..830a1c4cd705 100644 --- a/drivers/regulator/max5970-regulator.c +++ b/drivers/regulator/max5970-regulator.c @@ -392,7 +392,7 @@ static int max597x_regmap_read_clear(struct regmap *map, unsigned int reg, return ret; if (*val) - return regmap_write(map, reg, *val); + return regmap_write(map, reg, 0); return 0; } From 6500ad28fd5d67d5ca0fee9da73c463090842440 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 30 Jan 2024 10:40:53 +0100 Subject: [PATCH 570/768] spi: sh-msiof: avoid integer overflow in constants cppcheck rightfully warned: drivers/spi/spi-sh-msiof.c:792:28: warning: Signed integer overflow for expression '7<<29'. [integerOverflow] sh_msiof_write(p, SIFCTR, SIFCTR_TFWM_1 | SIFCTR_RFWM_1); Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Link: https://msgid.link/r/20240130094053.10672-1-wsa+renesas@sang-engineering.com Signed-off-by: Mark Brown --- drivers/spi/spi-sh-msiof.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index cfc3b1ddbd22..6f12e4fb2e2e 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -136,14 +136,14 @@ struct sh_msiof_spi_priv { /* SIFCTR */ #define SIFCTR_TFWM_MASK GENMASK(31, 29) /* Transmit FIFO Watermark */ -#define SIFCTR_TFWM_64 (0 << 29) /* Transfer Request when 64 empty stages */ -#define SIFCTR_TFWM_32 (1 << 29) /* Transfer Request when 32 empty stages */ -#define SIFCTR_TFWM_24 (2 << 29) /* Transfer Request when 24 empty stages */ -#define SIFCTR_TFWM_16 (3 << 29) /* Transfer Request when 16 empty stages */ -#define SIFCTR_TFWM_12 (4 << 29) /* Transfer Request when 12 empty stages */ -#define SIFCTR_TFWM_8 (5 << 29) /* Transfer Request when 8 empty stages */ -#define SIFCTR_TFWM_4 (6 << 29) /* Transfer Request when 4 empty stages */ -#define SIFCTR_TFWM_1 (7 << 29) /* Transfer Request when 1 empty stage */ +#define SIFCTR_TFWM_64 (0UL << 29) /* Transfer Request when 64 empty stages */ +#define SIFCTR_TFWM_32 (1UL << 29) /* Transfer Request when 32 empty stages */ +#define SIFCTR_TFWM_24 (2UL << 29) /* Transfer Request when 24 empty stages */ +#define SIFCTR_TFWM_16 (3UL << 29) /* Transfer Request when 16 empty stages */ +#define SIFCTR_TFWM_12 (4UL << 29) /* Transfer Request when 12 empty stages */ +#define SIFCTR_TFWM_8 (5UL << 29) /* Transfer Request when 8 empty stages */ +#define SIFCTR_TFWM_4 (6UL << 29) /* Transfer Request when 4 empty stages */ +#define SIFCTR_TFWM_1 (7UL << 29) /* Transfer Request when 1 empty stage */ #define SIFCTR_TFUA_MASK GENMASK(26, 20) /* Transmit FIFO Usable Area */ #define SIFCTR_TFUA_SHIFT 20 #define SIFCTR_TFUA(i) ((i) << SIFCTR_TFUA_SHIFT) From f1fea725cc93fcc3c5af9a2af63ffdc40dd2259e Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Wed, 20 Dec 2023 10:11:51 -0500 Subject: [PATCH 571/768] selftests/livepatch: fix and refactor new dmesg message code The livepatching kselftests rely on comparing expected vs. observed dmesg output. After each test, new dmesg entries are determined by the 'comm' utility comparing a saved, pre-test copy of dmesg to post-test dmesg output. Alexander reports that the 'comm --nocheck-order -13' invocation used by the tests can be confused when dmesg entry timestamps vary in magnitude (ie, "[ 98.820331]" vs. "[ 100.031067]"), in which case, additional messages are reported as new. The unexpected entries then spoil the test results. Instead of relying on 'comm' or 'diff' to determine new testing dmesg entries, refactor the code: - pre-test : log a unique canary dmesg entry - test : run tests, log messages - post-test : filter dmesg starting from pre-test message Reported-by: Alexander Gordeev Closes: https://lore.kernel.org/live-patching/ZYAimyPYhxVA9wKg@li-008a6a4c-3549-11b2-a85c-c5cc2836eea2.ibm.com/ Signed-off-by: Joe Lawrence Acked-by: Alexander Gordeev Tested-by: Marcos Paulo de Souza Reviewed-by: Marcos Paulo de Souza Acked-by: Miroslav Benes Signed-off-by: Shuah Khan --- .../testing/selftests/livepatch/functions.sh | 37 +++++++++---------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh index c8416c54b463..b1fd7362c2fe 100644 --- a/tools/testing/selftests/livepatch/functions.sh +++ b/tools/testing/selftests/livepatch/functions.sh @@ -42,17 +42,6 @@ function die() { exit 1 } -# save existing dmesg so we can detect new content -function save_dmesg() { - SAVED_DMESG=$(mktemp --tmpdir -t klp-dmesg-XXXXXX) - dmesg > "$SAVED_DMESG" -} - -# cleanup temporary dmesg file from save_dmesg() -function cleanup_dmesg_file() { - rm -f "$SAVED_DMESG" -} - function push_config() { DYNAMIC_DEBUG=$(grep '^kernel/livepatch' /sys/kernel/debug/dynamic_debug/control | \ awk -F'[: ]' '{print "file " $1 " line " $2 " " $4}') @@ -99,7 +88,6 @@ function set_ftrace_enabled() { function cleanup() { pop_config - cleanup_dmesg_file } # setup_config - save the current config and set a script exit trap that @@ -280,7 +268,15 @@ function set_pre_patch_ret { function start_test { local test="$1" - save_dmesg + # Dump something unique into the dmesg log, then stash the entry + # in LAST_DMESG. The check_result() function will use it to + # find new kernel messages since the test started. + local last_dmesg_msg="livepatch kselftest timestamp: $(date --rfc-3339=ns)" + log "$last_dmesg_msg" + loop_until 'dmesg | grep -q "$last_dmesg_msg"' || + die "buffer busy? can't find canary dmesg message: $last_dmesg_msg" + LAST_DMESG=$(dmesg | grep "$last_dmesg_msg") + echo -n "TEST: $test ... " log "===== TEST: $test =====" } @@ -291,23 +287,24 @@ function check_result { local expect="$*" local result - # Note: when comparing dmesg output, the kernel log timestamps - # help differentiate repeated testing runs. Remove them with a - # post-comparison sed filter. - - result=$(dmesg | comm --nocheck-order -13 "$SAVED_DMESG" - | \ + # Test results include any new dmesg entry since LAST_DMESG, then: + # - include lines matching keywords + # - exclude lines matching keywords + # - filter out dmesg timestamp prefixes + result=$(dmesg | awk -v last_dmesg="$LAST_DMESG" 'p; $0 == last_dmesg { p=1 }' | \ grep -e 'livepatch:' -e 'test_klp' | \ grep -v '\(tainting\|taints\) kernel' | \ sed 's/^\[[ 0-9.]*\] //') if [[ "$expect" == "$result" ]] ; then echo "ok" + elif [[ "$result" == "" ]] ; then + echo -e "not ok\n\nbuffer overrun? can't find canary dmesg entry: $LAST_DMESG\n" + die "livepatch kselftest(s) failed" else echo -e "not ok\n\n$(diff -upr --label expected --label result <(echo "$expect") <(echo "$result"))\n" die "livepatch kselftest(s) failed" fi - - cleanup_dmesg_file } # check_sysfs_rights(modname, rel_path, expected_rights) - check sysfs From 5820cfee443f8a90ea3eb9f99f57f2049a4a93c3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 24 Jan 2024 13:00:18 +0000 Subject: [PATCH 572/768] kselftest/seccomp: Use kselftest output functions for benchmark In preparation for trying to output the test results themselves in TAP format rework all the prints in the benchmark to use the kselftest output functions. The uses of system() all produce single line output so we can avoid having to deal with fully managing the child process and continue to use system() by simply printing an empty message before we invoke system(). We also leave one printf() used to complete a line of output in place. Tested-by: Anders Roxell Acked-by: Kees Cook Signed-off-by: Mark Brown Signed-off-by: Shuah Khan --- .../selftests/seccomp/seccomp_benchmark.c | 45 ++++++++++--------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index 5b5c9d558dee..93168dd2c1e3 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -38,10 +38,10 @@ unsigned long long timing(clockid_t clk_id, unsigned long long samples) i *= 1000000000ULL; i += finish.tv_nsec - start.tv_nsec; - printf("%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n", - finish.tv_sec, finish.tv_nsec, - start.tv_sec, start.tv_nsec, - i, (double)i / 1000000000.0); + ksft_print_msg("%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n", + finish.tv_sec, finish.tv_nsec, + start.tv_sec, start.tv_nsec, + i, (double)i / 1000000000.0); return i; } @@ -53,7 +53,7 @@ unsigned long long calibrate(void) pid_t pid, ret; int seconds = 15; - printf("Calibrating sample size for %d seconds worth of syscalls ...\n", seconds); + ksft_print_msg("Calibrating sample size for %d seconds worth of syscalls ...\n", seconds); samples = 0; pid = getpid(); @@ -102,14 +102,14 @@ long compare(const char *name_one, const char *name_eval, const char *name_two, { bool good; - printf("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two, - (long long)one, name_eval, (long long)two); + ksft_print_msg("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two, + (long long)one, name_eval, (long long)two); if (one > INT_MAX) { - printf("Miscalculation! Measurement went negative: %lld\n", (long long)one); + ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)one); return 1; } if (two > INT_MAX) { - printf("Miscalculation! Measurement went negative: %lld\n", (long long)two); + ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)two); return 1; } @@ -145,12 +145,15 @@ int main(int argc, char *argv[]) setbuf(stdout, NULL); - printf("Running on:\n"); + ksft_print_msg("Running on:\n"); + ksft_print_msg(""); system("uname -a"); - printf("Current BPF sysctl settings:\n"); + ksft_print_msg("Current BPF sysctl settings:\n"); /* Avoid using "sysctl" which may not be installed. */ + ksft_print_msg(""); system("grep -H . /proc/sys/net/core/bpf_jit_enable"); + ksft_print_msg(""); system("grep -H . /proc/sys/net/core/bpf_jit_harden"); if (argc > 1) @@ -158,11 +161,11 @@ int main(int argc, char *argv[]) else samples = calibrate(); - printf("Benchmarking %llu syscalls...\n", samples); + ksft_print_msg("Benchmarking %llu syscalls...\n", samples); /* Native call */ native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid native: %llu ns\n", native); + ksft_print_msg("getpid native: %llu ns\n", native); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); assert(ret == 0); @@ -172,33 +175,33 @@ int main(int argc, char *argv[]) assert(ret == 0); bitmap1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 1 filter (bitmap): %llu ns\n", bitmap1); + ksft_print_msg("getpid RET_ALLOW 1 filter (bitmap): %llu ns\n", bitmap1); /* Second filter resulting in a bitmap */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog); assert(ret == 0); bitmap2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 2 filters (bitmap): %llu ns\n", bitmap2); + ksft_print_msg("getpid RET_ALLOW 2 filters (bitmap): %llu ns\n", bitmap2); /* Third filter, can no longer be converted to bitmap */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); assert(ret == 0); filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 3 filters (full): %llu ns\n", filter1); + ksft_print_msg("getpid RET_ALLOW 3 filters (full): %llu ns\n", filter1); /* Fourth filter, can not be converted to bitmap because of filter 3 */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog); assert(ret == 0); filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 4 filters (full): %llu ns\n", filter2); + ksft_print_msg("getpid RET_ALLOW 4 filters (full): %llu ns\n", filter2); /* Estimations */ #define ESTIMATE(fmt, var, what) do { \ var = (what); \ - printf("Estimated " fmt ": %llu ns\n", var); \ + ksft_print_msg("Estimated " fmt ": %llu ns\n", var); \ if (var > INT_MAX) \ goto more_samples; \ } while (0) @@ -218,7 +221,7 @@ int main(int argc, char *argv[]) ESTIMATE("seccomp per-filter overhead (filters / 4)", per_filter2, (filter2 - native - entry) / 4); - printf("Expectations:\n"); + ksft_print_msg("Expectations:\n"); ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1); bits = compare("native", "≤", "1 filter", native, le, filter1); if (bits) @@ -230,7 +233,7 @@ int main(int argc, char *argv[]) bits = compare("1 bitmapped", "≈", "2 bitmapped", bitmap1 - native, approx, bitmap2 - native); if (bits) { - printf("Skipping constant action bitmap expectations: they appear unsupported.\n"); + ksft_print_msg("Skipping constant action bitmap expectations: they appear unsupported.\n"); goto out; } @@ -242,7 +245,7 @@ int main(int argc, char *argv[]) goto out; more_samples: - printf("Saw unexpected benchmark result. Try running again with more samples?\n"); + ksft_print_msg("Saw unexpected benchmark result. Try running again with more samples?\n"); out: return 0; } From b54761f6e9773350c0d1fb8e1e5aacaba7769d0f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 24 Jan 2024 13:00:19 +0000 Subject: [PATCH 573/768] kselftest/seccomp: Report each expectation we assert as a KTAP test The seccomp benchmark test makes a number of checks on the performance it measures and logs them to the output but does so in a custom format which none of the automated test runners understand meaning that the chances that anyone is paying attention are slim. Let's additionally log each result in KTAP format so that automated systems parsing the test output will see each comparison as a test case. The original logs are left in place since they provide the actual numbers for analysis. As part of this rework the flow for the main program so that when we skip tests we still log all the tests we skip, this is because the standard KTAP headers and footers include counts of the number of expected and run tests. Tested-by: Anders Roxell Acked-by: Kees Cook Signed-off-by: Mark Brown Signed-off-by: Shuah Khan --- .../selftests/seccomp/seccomp_benchmark.c | 61 +++++++++++++------ 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index 93168dd2c1e3..97b86980b768 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -98,24 +98,36 @@ bool le(int i_one, int i_two) } long compare(const char *name_one, const char *name_eval, const char *name_two, - unsigned long long one, bool (*eval)(int, int), unsigned long long two) + unsigned long long one, bool (*eval)(int, int), unsigned long long two, + bool skip) { bool good; + if (skip) { + ksft_test_result_skip("%s %s %s\n", name_one, name_eval, + name_two); + return 0; + } + ksft_print_msg("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two, (long long)one, name_eval, (long long)two); if (one > INT_MAX) { ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)one); - return 1; + good = false; + goto out; } if (two > INT_MAX) { ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)two); - return 1; + good = false; + goto out; } good = eval(one, two); printf("%s\n", good ? "✔️" : "❌"); +out: + ksft_test_result(good, "%s %s %s\n", name_one, name_eval, name_two); + return good ? 0 : 1; } @@ -142,9 +154,13 @@ int main(int argc, char *argv[]) unsigned long long samples, calc; unsigned long long native, filter1, filter2, bitmap1, bitmap2; unsigned long long entry, per_filter1, per_filter2; + bool skip = false; setbuf(stdout, NULL); + ksft_print_header(); + ksft_set_plan(7); + ksft_print_msg("Running on:\n"); ksft_print_msg(""); system("uname -a"); @@ -202,8 +218,10 @@ int main(int argc, char *argv[]) #define ESTIMATE(fmt, var, what) do { \ var = (what); \ ksft_print_msg("Estimated " fmt ": %llu ns\n", var); \ - if (var > INT_MAX) \ - goto more_samples; \ + if (var > INT_MAX) { \ + skip = true; \ + ret |= 1; \ + } \ } while (0) ESTIMATE("total seccomp overhead for 1 bitmapped filter", calc, @@ -222,30 +240,33 @@ int main(int argc, char *argv[]) (filter2 - native - entry) / 4); ksft_print_msg("Expectations:\n"); - ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1); - bits = compare("native", "≤", "1 filter", native, le, filter1); + ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1, + skip); + bits = compare("native", "≤", "1 filter", native, le, filter1, + skip); if (bits) - goto more_samples; + skip = true; ret |= compare("per-filter (last 2 diff)", "≈", "per-filter (filters / 4)", - per_filter1, approx, per_filter2); + per_filter1, approx, per_filter2, skip); bits = compare("1 bitmapped", "≈", "2 bitmapped", - bitmap1 - native, approx, bitmap2 - native); + bitmap1 - native, approx, bitmap2 - native, skip); if (bits) { ksft_print_msg("Skipping constant action bitmap expectations: they appear unsupported.\n"); - goto out; + skip = true; } - ret |= compare("entry", "≈", "1 bitmapped", entry, approx, bitmap1 - native); - ret |= compare("entry", "≈", "2 bitmapped", entry, approx, bitmap2 - native); + ret |= compare("entry", "≈", "1 bitmapped", entry, approx, + bitmap1 - native, skip); + ret |= compare("entry", "≈", "2 bitmapped", entry, approx, + bitmap2 - native, skip); ret |= compare("native + entry + (per filter * 4)", "≈", "4 filters total", - entry + (per_filter1 * 4) + native, approx, filter2); - if (ret == 0) - goto out; + entry + (per_filter1 * 4) + native, approx, filter2, + skip); -more_samples: - ksft_print_msg("Saw unexpected benchmark result. Try running again with more samples?\n"); -out: - return 0; + if (ret) + ksft_print_msg("Saw unexpected benchmark result. Try running again with more samples?\n"); + + ksft_finished(); } From 8b1d72395635af45410b66cc4c4ab37a12c4a831 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 20 Jan 2024 15:29:27 +0100 Subject: [PATCH 574/768] parisc: Fix random data corruption from exception handler The current exception handler implementation, which assists when accessing user space memory, may exhibit random data corruption if the compiler decides to use a different register than the specified register %r29 (defined in ASM_EXCEPTIONTABLE_REG) for the error code. If the compiler choose another register, the fault handler will nevertheless store -EFAULT into %r29 and thus trash whatever this register is used for. Looking at the assembly I found that this happens sometimes in emulate_ldd(). To solve the issue, the easiest solution would be if it somehow is possible to tell the fault handler which register is used to hold the error code. Using %0 or %1 in the inline assembly is not posssible as it will show up as e.g. %r29 (with the "%r" prefix), which the GNU assembler can not convert to an integer. This patch takes another, better and more flexible approach: We extend the __ex_table (which is out of the execution path) by one 32-word. In this word we tell the compiler to insert the assembler instruction "or %r0,%r0,%reg", where %reg references the register which the compiler choosed for the error return code. In case of an access failure, the fault handler finds the __ex_table entry and can examine the opcode. The used register is encoded in the lowest 5 bits, and the fault handler can then store -EFAULT into this register. Since we extend the __ex_table to 3 words we can't use the BUILDTIME_TABLE_SORT config option any longer. Signed-off-by: Helge Deller Cc: # v6.0+ --- arch/parisc/Kconfig | 1 - arch/parisc/include/asm/assembly.h | 1 + arch/parisc/include/asm/extable.h | 64 +++++++++++++++++++++++++ arch/parisc/include/asm/special_insns.h | 6 ++- arch/parisc/include/asm/uaccess.h | 48 +++---------------- arch/parisc/kernel/cache.c | 4 +- arch/parisc/kernel/unaligned.c | 44 ++++++++--------- arch/parisc/mm/fault.c | 11 +++-- 8 files changed, 108 insertions(+), 71 deletions(-) create mode 100644 arch/parisc/include/asm/extable.h diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index d14ccc948a29..5c845e8d59d9 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -25,7 +25,6 @@ config PARISC select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE select BUG - select BUILDTIME_TABLE_SORT select HAVE_KERNEL_UNCOMPRESSED select HAVE_PCI select HAVE_PERF_EVENTS diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 74d17d7e759d..5937d5edaba1 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -576,6 +576,7 @@ .section __ex_table,"aw" ! \ .align 4 ! \ .word (fault_addr - .), (except_addr - .) ! \ + or %r0,%r0,%r0 ! \ .previous diff --git a/arch/parisc/include/asm/extable.h b/arch/parisc/include/asm/extable.h new file mode 100644 index 000000000000..4ea23e3d79dc --- /dev/null +++ b/arch/parisc/include/asm/extable.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PARISC_EXTABLE_H +#define __PARISC_EXTABLE_H + +#include +#include + +/* + * The exception table consists of three addresses: + * + * - A relative address to the instruction that is allowed to fault. + * - A relative address at which the program should continue (fixup routine) + * - An asm statement which specifies which CPU register will + * receive -EFAULT when an exception happens if the lowest bit in + * the fixup address is set. + * + * Note: The register specified in the err_opcode instruction will be + * modified at runtime if a fault happens. Register %r0 will be ignored. + * + * Since relative addresses are used, 32bit values are sufficient even on + * 64bit kernel. + */ + +struct pt_regs; +int fixup_exception(struct pt_regs *regs); + +#define ARCH_HAS_RELATIVE_EXTABLE +struct exception_table_entry { + int insn; /* relative address of insn that is allowed to fault. */ + int fixup; /* relative address of fixup routine */ + int err_opcode; /* sample opcode with register which holds error code */ +}; + +#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr, opcode )\ + ".section __ex_table,\"aw\"\n" \ + ".align 4\n" \ + ".word (" #fault_addr " - .), (" #except_addr " - .)\n" \ + opcode "\n" \ + ".previous\n" + +/* + * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry + * (with lowest bit set) for which the fault handler in fixup_exception() will + * load -EFAULT on fault into the register specified by the err_opcode instruction, + * and zeroes the target register in case of a read fault in get_user(). + */ +#define ASM_EXCEPTIONTABLE_VAR(__err_var) \ + int __err_var = 0 +#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr, register )\ + ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1, "or %%r0,%%r0," register) + +static inline void swap_ex_entry_fixup(struct exception_table_entry *a, + struct exception_table_entry *b, + struct exception_table_entry tmp, + int delta) +{ + a->fixup = b->fixup + delta; + b->fixup = tmp.fixup - delta; + a->err_opcode = b->err_opcode; + b->err_opcode = tmp.err_opcode; +} +#define swap_ex_entry_fixup swap_ex_entry_fixup + +#endif diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h index c822bd0c0e3c..51f40eaf7780 100644 --- a/arch/parisc/include/asm/special_insns.h +++ b/arch/parisc/include/asm/special_insns.h @@ -8,7 +8,8 @@ "copy %%r0,%0\n" \ "8:\tlpa %%r0(%1),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \ + "or %%r0,%%r0,%%r0") \ : "=&r" (pa) \ : "r" (va) \ : "memory" \ @@ -22,7 +23,8 @@ "copy %%r0,%0\n" \ "8:\tlpa %%r0(%%sr3,%1),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \ + "or %%r0,%%r0,%%r0") \ : "=&r" (pa) \ : "r" (va) \ : "memory" \ diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 4165079898d9..88d0ae5769dd 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -7,6 +7,7 @@ */ #include #include +#include #include #include @@ -26,37 +27,6 @@ #define STD_USER(sr, x, ptr) __put_user_asm(sr, "std", x, ptr) #endif -/* - * The exception table contains two values: the first is the relative offset to - * the address of the instruction that is allowed to fault, and the second is - * the relative offset to the address of the fixup routine. Since relative - * addresses are used, 32bit values are sufficient even on 64bit kernel. - */ - -#define ARCH_HAS_RELATIVE_EXTABLE -struct exception_table_entry { - int insn; /* relative address of insn that is allowed to fault. */ - int fixup; /* relative address of fixup routine */ -}; - -#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\ - ".section __ex_table,\"aw\"\n" \ - ".align 4\n" \ - ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \ - ".previous\n" - -/* - * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry - * (with lowest bit set) for which the fault handler in fixup_exception() will - * load -EFAULT into %r29 for a read or write fault, and zeroes the target - * register in case of a read fault in get_user(). - */ -#define ASM_EXCEPTIONTABLE_REG 29 -#define ASM_EXCEPTIONTABLE_VAR(__variable) \ - register long __variable __asm__ ("r29") = 0 -#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\ - ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1) - #define __get_user_internal(sr, val, ptr) \ ({ \ ASM_EXCEPTIONTABLE_VAR(__gu_err); \ @@ -83,7 +53,7 @@ struct exception_table_entry { \ __asm__("1: " ldx " 0(%%sr%2,%3),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \ : "=r"(__gu_val), "+r"(__gu_err) \ : "i"(sr), "r"(ptr)); \ \ @@ -115,8 +85,8 @@ struct exception_table_entry { "1: ldw 0(%%sr%2,%3),%0\n" \ "2: ldw 4(%%sr%2,%3),%R0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%1") \ : "=&r"(__gu_tmp.l), "+r"(__gu_err) \ : "i"(sr), "r"(ptr)); \ \ @@ -174,7 +144,7 @@ struct exception_table_entry { __asm__ __volatile__ ( \ "1: " stx " %1,0(%%sr%2,%3)\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \ : "+r"(__pu_err) \ : "r"(x), "i"(sr), "r"(ptr)) @@ -186,15 +156,14 @@ struct exception_table_entry { "1: stw %1,0(%%sr%2,%3)\n" \ "2: stw %R1,4(%%sr%2,%3)\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%0") \ : "+r"(__pu_err) \ : "r"(__val), "i"(sr), "r"(ptr)); \ } while (0) #endif /* !defined(CONFIG_64BIT) */ - /* * Complex access routines -- external declarations */ @@ -216,7 +185,4 @@ unsigned long __must_check raw_copy_from_user(void *dst, const void __user *src, #define INLINE_COPY_TO_USER #define INLINE_COPY_FROM_USER -struct pt_regs; -int fixup_exception(struct pt_regs *regs); - #endif /* __PARISC_UACCESS_H */ diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 0c015487e5db..5552602fcaef 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -854,7 +854,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes, #endif " fic,m %3(%4,%0)\n" "2: sync\n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b, "%1") : "+r" (start), "+r" (error) : "r" (end), "r" (dcache_stride), "i" (SR_USER)); } @@ -869,7 +869,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes, #endif " fdc,m %3(%4,%0)\n" "2: sync\n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b, "%1") : "+r" (start), "+r" (error) : "r" (end), "r" (icache_stride), "i" (SR_USER)); } diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index ce25acfe4889..c520e551a165 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -120,8 +120,8 @@ static int emulate_ldh(struct pt_regs *regs, int toreg) "2: ldbs 1(%%sr1,%3), %0\n" " depw %2, 23, 24, %0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "+r" (val), "+r" (ret), "=&r" (temp1) : "r" (saddr), "r" (regs->isr) ); @@ -152,8 +152,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop) " mtctl %2,11\n" " vshd %0,%3,%0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "+r" (val), "+r" (ret), "=&r" (temp1), "=&r" (temp2) : "r" (saddr), "r" (regs->isr) ); @@ -189,8 +189,8 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) " mtsar %%r19\n" " shrpd %0,%%r20,%%sar,%0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "=r" (val), "+r" (ret) : "0" (val), "r" (saddr), "r" (regs->isr) : "r19", "r20" ); @@ -209,9 +209,9 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) " vshd %0,%R0,%0\n" " vshd %R0,%4,%R0\n" "4: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1") : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1) : "r" (regs->isr) ); } @@ -244,8 +244,8 @@ static int emulate_sth(struct pt_regs *regs, int frreg) "1: stb %1, 0(%%sr1, %3)\n" "2: stb %2, 1(%%sr1, %3)\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0") : "+r" (ret), "=&r" (temp1) : "r" (val), "r" (regs->ior), "r" (regs->isr) ); @@ -285,8 +285,8 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop) " stw %%r20,0(%%sr1,%2)\n" " stw %%r21,4(%%sr1,%2)\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0") : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1" ); @@ -329,10 +329,10 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) "3: std %%r20,0(%%sr1,%2)\n" "4: std %%r21,8(%%sr1,%2)\n" "5: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b, "%0") : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1" ); @@ -357,11 +357,11 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) "4: stw %%r1,4(%%sr1,%2)\n" "5: stw %R1,8(%%sr1,%2)\n" "6: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b, "%0") : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r1" ); diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 2fe5b44986e0..c39de84e98b0 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -150,11 +150,16 @@ int fixup_exception(struct pt_regs *regs) * Fix up get_user() and put_user(). * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant * bit in the relative address of the fixup routine to indicate - * that gr[ASM_EXCEPTIONTABLE_REG] should be loaded with - * -EFAULT to report a userspace access error. + * that the register encoded in the "or %r0,%r0,register" + * opcode should be loaded with -EFAULT to report a userspace + * access error. */ if (fix->fixup & 1) { - regs->gr[ASM_EXCEPTIONTABLE_REG] = -EFAULT; + int fault_error_reg = fix->err_opcode & 0x1f; + if (!WARN_ON(!fault_error_reg)) + regs->gr[fault_error_reg] = -EFAULT; + pr_debug("Unalignment fixup of register %d at %pS\n", + fault_error_reg, (void*)regs->iaoq[0]); /* zero target register for get_user() */ if (parisc_acctyp(0, regs->iir) == VM_READ) { From a22fe1d6dec7e98535b97249fdc95c2be79120bb Mon Sep 17 00:00:00 2001 From: Frank Li Date: Tue, 23 Jan 2024 12:28:41 -0500 Subject: [PATCH 575/768] dmaengine: fix is_slave_direction() return false when DMA_DEV_TO_DEV is_slave_direction() should return true when direction is DMA_DEV_TO_DEV. Fixes: 49920bc66984 ("dmaengine: add new enum dma_transfer_direction") Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240123172842.3764529-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 3df70d6131c8..752dbde4cec1 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -953,7 +953,8 @@ static inline int dmaengine_slave_config(struct dma_chan *chan, static inline bool is_slave_direction(enum dma_transfer_direction direction) { - return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM); + return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM) || + (direction == DMA_DEV_TO_DEV); } static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single( From 7104ba0f1958adb250319e68a15eff89ec4fd36d Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sun, 28 Jan 2024 14:05:54 +0200 Subject: [PATCH 576/768] phy: ti: phy-omap-usb2: Fix NULL pointer dereference for SRP If the external phy working together with phy-omap-usb2 does not implement send_srp(), we may still attempt to call it. This can happen on an idle Ethernet gadget triggering a wakeup for example: configfs-gadget.g1 gadget.0: ECM Suspend configfs-gadget.g1 gadget.0: Port suspended. Triggering wakeup ... Unable to handle kernel NULL pointer dereference at virtual address 00000000 when execute ... PC is at 0x0 LR is at musb_gadget_wakeup+0x1d4/0x254 [musb_hdrc] ... musb_gadget_wakeup [musb_hdrc] from usb_gadget_wakeup+0x1c/0x3c [udc_core] usb_gadget_wakeup [udc_core] from eth_start_xmit+0x3b0/0x3d4 [u_ether] eth_start_xmit [u_ether] from dev_hard_start_xmit+0x94/0x24c dev_hard_start_xmit from sch_direct_xmit+0x104/0x2e4 sch_direct_xmit from __dev_queue_xmit+0x334/0xd88 __dev_queue_xmit from arp_solicit+0xf0/0x268 arp_solicit from neigh_probe+0x54/0x7c neigh_probe from __neigh_event_send+0x22c/0x47c __neigh_event_send from neigh_resolve_output+0x14c/0x1c0 neigh_resolve_output from ip_finish_output2+0x1c8/0x628 ip_finish_output2 from ip_send_skb+0x40/0xd8 ip_send_skb from udp_send_skb+0x124/0x340 udp_send_skb from udp_sendmsg+0x780/0x984 udp_sendmsg from __sys_sendto+0xd8/0x158 __sys_sendto from ret_fast_syscall+0x0/0x58 Let's fix the issue by checking for send_srp() and set_vbus() before calling them. For USB peripheral only cases these both could be NULL. Fixes: 657b306a7bdf ("usb: phy: add a new driver for omap usb2 phy") Signed-off-by: Tony Lindgren Link: https://lore.kernel.org/r/20240128120556.8848-1-tony@atomide.com Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-omap-usb2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/ti/phy-omap-usb2.c b/drivers/phy/ti/phy-omap-usb2.c index dd2913ac0fa2..78e19b128962 100644 --- a/drivers/phy/ti/phy-omap-usb2.c +++ b/drivers/phy/ti/phy-omap-usb2.c @@ -117,7 +117,7 @@ static int omap_usb_set_vbus(struct usb_otg *otg, bool enabled) { struct omap_usb *phy = phy_to_omapusb(otg->usb_phy); - if (!phy->comparator) + if (!phy->comparator || !phy->comparator->set_vbus) return -ENODEV; return phy->comparator->set_vbus(phy->comparator, enabled); @@ -127,7 +127,7 @@ static int omap_usb_start_srp(struct usb_otg *otg) { struct omap_usb *phy = phy_to_omapusb(otg->usb_phy); - if (!phy->comparator) + if (!phy->comparator || !phy->comparator->start_srp) return -ENODEV; return phy->comparator->start_srp(phy->comparator); From bd504bcfec41a503b32054da5472904b404341a4 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 9 Jan 2024 15:57:56 +0100 Subject: [PATCH 577/768] dm: limit the number of targets and parameter size area The kvmalloc function fails with a warning if the size is larger than INT_MAX. The warning was triggered by a syscall testing robot. In order to avoid the warning, this commit limits the number of targets to 1048576 and the size of the parameter area to 1073741824. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-core.h | 2 ++ drivers/md/dm-ioctl.c | 3 ++- drivers/md/dm-table.c | 9 +++++++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 095b9b49aa82..e6757a30dcca 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -22,6 +22,8 @@ #include "dm-ima.h" #define DM_RESERVED_MAX_IOS 1024 +#define DM_MAX_TARGETS 1048576 +#define DM_MAX_TARGET_PARAMS 1024 struct dm_io; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index e65058e0ed06..3b1ad7127cb8 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1941,7 +1941,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern minimum_data_size - sizeof(param_kernel->version))) return -EFAULT; - if (param_kernel->data_size < minimum_data_size) { + if (unlikely(param_kernel->data_size < minimum_data_size) || + unlikely(param_kernel->data_size > DM_MAX_TARGETS * DM_MAX_TARGET_PARAMS)) { DMERR("Invalid data size in the ioctl structure: %u", param_kernel->data_size); return -EINVAL; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 260b5b8f2b0d..41f1d731ae5a 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -129,7 +129,12 @@ static int alloc_targets(struct dm_table *t, unsigned int num) int dm_table_create(struct dm_table **result, blk_mode_t mode, unsigned int num_targets, struct mapped_device *md) { - struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL); + struct dm_table *t; + + if (num_targets > DM_MAX_TARGETS) + return -EOVERFLOW; + + t = kzalloc(sizeof(*t), GFP_KERNEL); if (!t) return -ENOMEM; @@ -144,7 +149,7 @@ int dm_table_create(struct dm_table **result, blk_mode_t mode, if (!num_targets) { kfree(t); - return -ENOMEM; + return -EOVERFLOW; } if (alloc_targets(t, num_targets)) { From 9cf11ce06ea52911245578032761e40a6409cf35 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 9 Jan 2024 15:59:15 +0100 Subject: [PATCH 578/768] dm stats: limit the number of entries The kvmalloc function fails with a warning if the size is larger than INT_MAX. Linus said that there should be limits that prevent this warning from being hit. This commit adds the limits to the dm-stats subsystem in DM core. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-stats.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index bdc14ec99814..1e5d988f44da 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -66,6 +66,9 @@ struct dm_stats_last_position { unsigned int last_rw; }; +#define DM_STAT_MAX_ENTRIES 8388608 +#define DM_STAT_MAX_HISTOGRAM_ENTRIES 134217728 + /* * A typo on the command line could possibly make the kernel run out of memory * and crash. To prevent the crash we account all used memory. We fail if we @@ -285,6 +288,9 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, if (n_entries != (size_t)n_entries || !(size_t)(n_entries + 1)) return -EOVERFLOW; + if (n_entries > DM_STAT_MAX_ENTRIES) + return -EOVERFLOW; + shared_alloc_size = struct_size(s, stat_shared, n_entries); if ((shared_alloc_size - sizeof(struct dm_stat)) / sizeof(struct dm_stat_shared) != n_entries) return -EOVERFLOW; @@ -297,6 +303,9 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, if (histogram_alloc_size / (n_histogram_entries + 1) != (size_t)n_entries * sizeof(unsigned long long)) return -EOVERFLOW; + if ((n_histogram_entries + 1) * (size_t)n_entries > DM_STAT_MAX_HISTOGRAM_ENTRIES) + return -EOVERFLOW; + if (!check_shared_memory(shared_alloc_size + histogram_alloc_size + num_possible_cpus() * (percpu_alloc_size + histogram_alloc_size))) return -ENOMEM; From 40ef8756fbdd9faec4da5d70352b28f1196132ed Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 9 Jan 2024 16:00:57 +0100 Subject: [PATCH 579/768] dm writecache: allow allocations larger than 2GiB The function kvmalloc_node limits the allocation size to INT_MAX. This limit will be overflowed if dm-writecache attempts to map a device with 1TiB or larger length. This commit changes kvmalloc_array to vmalloc_array to avoid the limit. The commit also changes vmalloc(array_size()) to vmalloc_array(). Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-writecache.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 074cb785eafc..b463c28c39ad 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -299,7 +299,7 @@ static int persistent_memory_claim(struct dm_writecache *wc) long i; wc->memory_map = NULL; - pages = kvmalloc_array(p, sizeof(struct page *), GFP_KERNEL); + pages = vmalloc_array(p, sizeof(struct page *)); if (!pages) { r = -ENOMEM; goto err2; @@ -330,7 +330,7 @@ static int persistent_memory_claim(struct dm_writecache *wc) r = -ENOMEM; goto err3; } - kvfree(pages); + vfree(pages); wc->memory_vmapped = true; } @@ -341,7 +341,7 @@ static int persistent_memory_claim(struct dm_writecache *wc) return 0; err3: - kvfree(pages); + vfree(pages); err2: dax_read_unlock(id); err1: @@ -962,7 +962,7 @@ static int writecache_alloc_entries(struct dm_writecache *wc) if (wc->entries) return 0; - wc->entries = vmalloc(array_size(sizeof(struct wc_entry), wc->n_blocks)); + wc->entries = vmalloc_array(wc->n_blocks, sizeof(struct wc_entry)); if (!wc->entries) return -ENOMEM; for (b = 0; b < wc->n_blocks; b++) { From 53ed2ac8fc1de6658aadae5714627ac99b9dddb0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 30 Jan 2024 11:34:49 -0800 Subject: [PATCH 580/768] soc: apple: mailbox: error pointers are negative integers In an entirely unrelated discussion where I pointed out a stupid thinko of mine, Rasmus piped up and noted that that obvious mistake already existed elsewhere in the kernel tree. An "error pointer" is the negative error value encoded as a pointer, making the whole "return error or valid pointer" use-case simple and straightforward. We use it all over the kernel. But the key here is that errors are _negative_ error numbers, not the horrid UNIX user-level model of "-1 and the value of 'errno'". The Apple mailbox driver used the positive error values, and thus just returned invalid normal pointers instead of actual errors. Of course, the reason nobody ever noticed is that the errors presumably never actually happen, so this is fixing a conceptual bug rather than an actual one. Reported-by: Rasmus Villemoes Link: https://lore.kernel.org/all/5c30afe0-f9fb-45d5-9333-dd914a1ea93a@prevas.dk/ Signed-off-by: Linus Torvalds --- drivers/soc/apple/mailbox.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/soc/apple/mailbox.c b/drivers/soc/apple/mailbox.c index 780199bf351e..49a0955e82d6 100644 --- a/drivers/soc/apple/mailbox.c +++ b/drivers/soc/apple/mailbox.c @@ -296,14 +296,14 @@ struct apple_mbox *apple_mbox_get(struct device *dev, int index) of_node_put(args.np); if (!pdev) - return ERR_PTR(EPROBE_DEFER); + return ERR_PTR(-EPROBE_DEFER); mbox = platform_get_drvdata(pdev); if (!mbox) - return ERR_PTR(EPROBE_DEFER); + return ERR_PTR(-EPROBE_DEFER); if (!device_link_add(dev, &pdev->dev, DL_FLAG_AUTOREMOVE_CONSUMER)) - return ERR_PTR(ENODEV); + return ERR_PTR(-ENODEV); return mbox; } From 5a287d3d2b9de2b3e747132c615599907ba5c3c1 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 26 Jan 2024 19:45:31 +0100 Subject: [PATCH 581/768] lsm: fix default return value of the socket_getpeersec_*() hooks For these hooks the true "neutral" value is -EOPNOTSUPP, which is currently what is returned when no LSM provides this hook and what LSMs return when there is no security context set on the socket. Correct the value in and adjust the dispatch functions in security/security.c to avoid issues when the BPF LSM is enabled. Cc: stable@vger.kernel.org Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks") Signed-off-by: Ondrej Mosnacek [PM: subject line tweak] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 4 ++-- security/security.c | 31 +++++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 185924c56378..76458b6d53da 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -315,9 +315,9 @@ LSM_HOOK(int, 0, socket_getsockopt, struct socket *sock, int level, int optname) LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname) LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how) LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb) -LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock, +LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_stream, struct socket *sock, sockptr_t optval, sockptr_t optlen, unsigned int len) -LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock, +LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_dgram, struct socket *sock, struct sk_buff *skb, u32 *secid) LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority) LSM_HOOK(void, LSM_RET_VOID, sk_free_security, struct sock *sk) diff --git a/security/security.c b/security/security.c index 6196ccaba433..3aaad75c9ce8 100644 --- a/security/security.c +++ b/security/security.c @@ -4624,8 +4624,20 @@ EXPORT_SYMBOL(security_sock_rcv_skb); int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval, sockptr_t optlen, unsigned int len) { - return call_int_hook(socket_getpeersec_stream, -ENOPROTOOPT, sock, - optval, optlen, len); + struct security_hook_list *hp; + int rc; + + /* + * Only one module will provide a security context. + */ + hlist_for_each_entry(hp, &security_hook_heads.socket_getpeersec_stream, + list) { + rc = hp->hook.socket_getpeersec_stream(sock, optval, optlen, + len); + if (rc != LSM_RET_DEFAULT(socket_getpeersec_stream)) + return rc; + } + return LSM_RET_DEFAULT(socket_getpeersec_stream); } /** @@ -4645,8 +4657,19 @@ int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval, int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid) { - return call_int_hook(socket_getpeersec_dgram, -ENOPROTOOPT, sock, - skb, secid); + struct security_hook_list *hp; + int rc; + + /* + * Only one module will provide a security context. + */ + hlist_for_each_entry(hp, &security_hook_heads.socket_getpeersec_dgram, + list) { + rc = hp->hook.socket_getpeersec_dgram(sock, skb, secid); + if (rc != LSM_RET_DEFAULT(socket_getpeersec_dgram)) + return rc; + } + return LSM_RET_DEFAULT(socket_getpeersec_dgram); } EXPORT_SYMBOL(security_socket_getpeersec_dgram); From 80441f76ee67002437db61f3b317ed80cce085d2 Mon Sep 17 00:00:00 2001 From: Brenton Simpson Date: Tue, 30 Jan 2024 13:34:16 -0800 Subject: [PATCH 582/768] Input: xpad - add Lenovo Legion Go controllers The Lenovo Legion Go is a handheld gaming system, similar to a Steam Deck. It has a gamepad (including rear paddles), 3 gyroscopes, a trackpad, volume buttons, a power button, and 2 LED ring lights. The Legion Go firmware presents these controls as a USB hub with various devices attached. In its default state, the gamepad is presented as an Xbox controller connected to this hub. (By holding a combination of buttons, it can be changed to use the older DirectInput API.) This patch teaches the existing Xbox controller module `xpad` to bind to the controller in the Legion Go, which enables support for the: - directional pad, - analog sticks (including clicks), - X, Y, A, B, - start and select (or menu and capture), - shoulder buttons, and - rumble. The trackpad, touchscreen, volume controls, and power button are already supported via existing kernel modules. Two of the face buttons, the gyroscopes, rear paddles, and LEDs are not. After this patch lands, the Legion Go will be mostly functional in Linux, out-of-the-box. The various components of the USB hub can be synthesized into a single logical controller (including the additional buttons) in userspace with [Handheld Daemon](https://github.com/hhd-dev/hhd), which makes the Go fully functional. Signed-off-by: Brenton Simpson Link: https://lore.kernel.org/r/20240118183546.418064-1-appsforartists@google.com Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index b1244d7df6cc..7c4b2a5cc1b5 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -294,6 +294,7 @@ static const struct xpad_device { { 0x1689, 0xfd00, "Razer Onza Tournament Edition", 0, XTYPE_XBOX360 }, { 0x1689, 0xfd01, "Razer Onza Classic Edition", 0, XTYPE_XBOX360 }, { 0x1689, 0xfe00, "Razer Sabertooth", 0, XTYPE_XBOX360 }, + { 0x17ef, 0x6182, "Lenovo Legion Controller for Windows", 0, XTYPE_XBOX360 }, { 0x1949, 0x041a, "Amazon Game Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, @@ -491,6 +492,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x15e4), /* Numark Xbox 360 controllers */ XPAD_XBOX360_VENDOR(0x162e), /* Joytech Xbox 360 controllers */ XPAD_XBOX360_VENDOR(0x1689), /* Razer Onza */ + XPAD_XBOX360_VENDOR(0x17ef), /* Lenovo */ XPAD_XBOX360_VENDOR(0x1949), /* Amazon controllers */ XPAD_XBOX360_VENDOR(0x1bad), /* Harmonix Rock Band guitar and drums */ XPAD_XBOX360_VENDOR(0x20d6), /* PowerA controllers */ From a4e61de63e34860c36a71d1a364edba16fb6203b Mon Sep 17 00:00:00 2001 From: Ekansh Gupta Date: Mon, 8 Jan 2024 17:18:33 +0530 Subject: [PATCH 583/768] misc: fastrpc: Mark all sessions as invalid in cb_remove In remoteproc shutdown sequence, rpmsg_remove will get called which would depopulate all the child nodes that have been created during rpmsg_probe. This would result in cb_remove call for all the context banks for the remoteproc. In cb_remove function, session 0 is getting skipped which is not correct as session 0 will never become available again. Add changes to mark session 0 also as invalid. Fixes: f6f9279f2bf0 ("misc: fastrpc: Add Qualcomm fastrpc basic driver model") Cc: stable Signed-off-by: Ekansh Gupta Link: https://lore.kernel.org/r/20240108114833.20480-1-quic_ekangupt@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 1c6c62a7f7f5..03319a1fa97f 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -2191,7 +2191,7 @@ static int fastrpc_cb_remove(struct platform_device *pdev) int i; spin_lock_irqsave(&cctx->lock, flags); - for (i = 1; i < FASTRPC_MAX_SESSIONS; i++) { + for (i = 0; i < FASTRPC_MAX_SESSIONS; i++) { if (cctx->session[i].sid == sess->sid) { cctx->session[i].valid = false; cctx->sesscount--; From ac9762a74c7ca7cbfcb4c65f5871373653a046ac Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 26 Jan 2024 15:24:10 +0000 Subject: [PATCH 584/768] misc: open-dice: Fix spurious lockdep warning When probing the open-dice driver with PROVE_LOCKING=y, lockdep complains that the mutex in 'drvdata->lock' has a non-static key: | INFO: trying to register non-static key. | The code is fine but needs lockdep annotation, or maybe | you didn't initialize this object before use? | turning off the locking correctness validator. Fix the problem by initialising the mutex memory with mutex_init() instead of __MUTEX_INITIALIZER(). Cc: Arnd Bergmann Cc: David Brazdil Cc: Greg Kroah-Hartman Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20240126152410.10148-1-will@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/open-dice.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/open-dice.c b/drivers/misc/open-dice.c index 8aea2d070a40..d279a4f195e2 100644 --- a/drivers/misc/open-dice.c +++ b/drivers/misc/open-dice.c @@ -140,7 +140,6 @@ static int __init open_dice_probe(struct platform_device *pdev) return -ENOMEM; *drvdata = (struct open_dice_drvdata){ - .lock = __MUTEX_INITIALIZER(drvdata->lock), .rmem = rmem, .misc = (struct miscdevice){ .parent = dev, @@ -150,6 +149,7 @@ static int __init open_dice_probe(struct platform_device *pdev) .mode = 0600, }, }; + mutex_init(&drvdata->lock); /* Index overflow check not needed, misc_register() will fail. */ snprintf(drvdata->name, sizeof(drvdata->name), DRIVER_NAME"%u", dev_idx++); From b40f873a7c80dbafbb6f4a7a569f2dcaf969d283 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 29 Jan 2024 14:37:03 +0200 Subject: [PATCH 585/768] selftests: net: Add missing matchall classifier One of the test cases in the test_bridge_backup_port.sh selftest relies on a matchall classifier to drop unrelated traffic so that the Tx drop counter on the VXLAN device will only be incremented as a result of traffic generated by the test. However, the configuration option for the matchall classifier is missing from the configuration file which might explain the failures we see in the netdev CI [1]. Fix by adding CONFIG_NET_CLS_MATCHALL to the configuration file. [1] # Backup nexthop ID - invalid IDs # ------------------------------- [...] # TEST: Forwarding out of vx0 [ OK ] # TEST: No forwarding using backup nexthop ID [ OK ] # TEST: Tx drop increased [FAIL] # TEST: IPv6 address family nexthop as backup nexthop [ OK ] # TEST: No forwarding out of swp1 [ OK ] # TEST: Forwarding out of vx0 [ OK ] # TEST: No forwarding using backup nexthop ID [ OK ] # TEST: Tx drop increased [FAIL] [...] Fixes: b408453053fb ("selftests: net: Add bridge backup port and backup nexthop ID test") Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20240129123703.1857843-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index d4b38177ed04..3d908b52f22f 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -52,6 +52,7 @@ CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GACT=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_U32=m CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m From f5c3eb4b7251baba5cd72c9e93920e710ac8194a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Sat, 27 Jan 2024 18:50:32 +0100 Subject: [PATCH 586/768] bridge: mcast: fix disabled snooping after long uptime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original idea of the delay_time check was to not apply multicast snooping too early when an MLD querier appears. And to instead wait at least for MLD reports to arrive before switching from flooding to group based, MLD snooped forwarding, to avoid temporary packet loss. However in a batman-adv mesh network it was noticed that after 248 days of uptime 32bit MIPS based devices would start to signal that they had stopped applying multicast snooping due to missing queriers - even though they were the elected querier and still sending MLD queries themselves. While time_is_before_jiffies() generally is safe against jiffies wrap-arounds, like the code comments in jiffies.h explain, it won't be able to track a difference larger than ULONG_MAX/2. With a 32bit large jiffies and one jiffies tick every 10ms (CONFIG_HZ=100) on these MIPS devices running OpenWrt this would result in a difference larger than ULONG_MAX/2 after 248 (= 2^32/100/60/60/24/2) days and time_is_before_jiffies() would then start to return false instead of true. Leading to multicast snooping not being applied to multicast packets anymore. Fix this issue by using a proper timer_list object which won't have this ULONG_MAX/2 difference limitation. Fixes: b00589af3b04 ("bridge: disable snooping if there is no querier") Signed-off-by: Linus Lüssing Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20240127175033.9640-1-linus.luessing@c0d3.blue Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 20 +++++++++++++++----- net/bridge/br_private.h | 4 ++-- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index d7d021af1029..2d7b73242958 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1762,6 +1762,10 @@ static void br_ip6_multicast_querier_expired(struct timer_list *t) } #endif +static void br_multicast_query_delay_expired(struct timer_list *t) +{ +} + static void br_multicast_select_own_querier(struct net_bridge_mcast *brmctx, struct br_ip *ip, struct sk_buff *skb) @@ -3198,7 +3202,7 @@ br_multicast_update_query_timer(struct net_bridge_mcast *brmctx, unsigned long max_delay) { if (!timer_pending(&query->timer)) - query->delay_time = jiffies + max_delay; + mod_timer(&query->delay_timer, jiffies + max_delay); mod_timer(&query->timer, jiffies + brmctx->multicast_querier_interval); } @@ -4041,13 +4045,11 @@ void br_multicast_ctx_init(struct net_bridge *br, brmctx->multicast_querier_interval = 255 * HZ; brmctx->multicast_membership_interval = 260 * HZ; - brmctx->ip4_other_query.delay_time = 0; brmctx->ip4_querier.port_ifidx = 0; seqcount_spinlock_init(&brmctx->ip4_querier.seq, &br->multicast_lock); brmctx->multicast_igmp_version = 2; #if IS_ENABLED(CONFIG_IPV6) brmctx->multicast_mld_version = 1; - brmctx->ip6_other_query.delay_time = 0; brmctx->ip6_querier.port_ifidx = 0; seqcount_spinlock_init(&brmctx->ip6_querier.seq, &br->multicast_lock); #endif @@ -4056,6 +4058,8 @@ void br_multicast_ctx_init(struct net_bridge *br, br_ip4_multicast_local_router_expired, 0); timer_setup(&brmctx->ip4_other_query.timer, br_ip4_multicast_querier_expired, 0); + timer_setup(&brmctx->ip4_other_query.delay_timer, + br_multicast_query_delay_expired, 0); timer_setup(&brmctx->ip4_own_query.timer, br_ip4_multicast_query_expired, 0); #if IS_ENABLED(CONFIG_IPV6) @@ -4063,6 +4067,8 @@ void br_multicast_ctx_init(struct net_bridge *br, br_ip6_multicast_local_router_expired, 0); timer_setup(&brmctx->ip6_other_query.timer, br_ip6_multicast_querier_expired, 0); + timer_setup(&brmctx->ip6_other_query.delay_timer, + br_multicast_query_delay_expired, 0); timer_setup(&brmctx->ip6_own_query.timer, br_ip6_multicast_query_expired, 0); #endif @@ -4197,10 +4203,12 @@ static void __br_multicast_stop(struct net_bridge_mcast *brmctx) { del_timer_sync(&brmctx->ip4_mc_router_timer); del_timer_sync(&brmctx->ip4_other_query.timer); + del_timer_sync(&brmctx->ip4_other_query.delay_timer); del_timer_sync(&brmctx->ip4_own_query.timer); #if IS_ENABLED(CONFIG_IPV6) del_timer_sync(&brmctx->ip6_mc_router_timer); del_timer_sync(&brmctx->ip6_other_query.timer); + del_timer_sync(&brmctx->ip6_other_query.delay_timer); del_timer_sync(&brmctx->ip6_own_query.timer); #endif } @@ -4643,13 +4651,15 @@ int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val) max_delay = brmctx->multicast_query_response_interval; if (!timer_pending(&brmctx->ip4_other_query.timer)) - brmctx->ip4_other_query.delay_time = jiffies + max_delay; + mod_timer(&brmctx->ip4_other_query.delay_timer, + jiffies + max_delay); br_multicast_start_querier(brmctx, &brmctx->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) if (!timer_pending(&brmctx->ip6_other_query.timer)) - brmctx->ip6_other_query.delay_time = jiffies + max_delay; + mod_timer(&brmctx->ip6_other_query.delay_timer, + jiffies + max_delay); br_multicast_start_querier(brmctx, &brmctx->ip6_own_query); #endif diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index b0a92c344722..86ea5e6689b5 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -78,7 +78,7 @@ struct bridge_mcast_own_query { /* other querier */ struct bridge_mcast_other_query { struct timer_list timer; - unsigned long delay_time; + struct timer_list delay_timer; }; /* selected querier */ @@ -1159,7 +1159,7 @@ __br_multicast_querier_exists(struct net_bridge_mcast *brmctx, own_querier_enabled = false; } - return time_is_before_jiffies(querier->delay_time) && + return !timer_pending(&querier->delay_timer) && (own_querier_enabled || timer_pending(&querier->timer)); } From 1a89e24f8bfd3e3562d69709c9d9cd185ded869b Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 29 Jan 2024 21:10:59 +0200 Subject: [PATCH 587/768] devlink: Fix referring to hw_addr attribute during state validation When port function state change is requested, and when the driver does not support it, it refers to the hw address attribute instead of state attribute. Seems like a copy paste error. Fix it by referring to the port function state attribute. Fixes: c0bea69d1ca7 ("devlink: Validate port function request") Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240129191059.129030-1-parav@nvidia.com Signed-off-by: Jakub Kicinski --- net/devlink/port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/devlink/port.c b/net/devlink/port.c index 62e54e152ecf..78592912f657 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -674,7 +674,7 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port, return -EOPNOTSUPP; } if (tb[DEVLINK_PORT_FN_ATTR_STATE] && !ops->port_fn_state_set) { - NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR], + NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FN_ATTR_STATE], "Function does not support state setting"); return -EOPNOTSUPP; } From b4a1f4eaf1d798066affc6ad040f76eb1a16e1c9 Mon Sep 17 00:00:00 2001 From: Puliang Lu Date: Wed, 31 Jan 2024 17:12:24 +0800 Subject: [PATCH 588/768] USB: serial: option: add Fibocom FM101-GL variant Update the USB serial option driver support for the Fibocom FM101-GL LTE modules as there are actually several different variants. - VID:PID 2cb7:01a3, FM101-GL are laptop M.2 cards (with MBIM interfaces for /Linux/Chrome OS) 0x01a3:mbim,gnss Here are the outputs of usb-devices: T: Bus=04 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 3 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=2cb7 ProdID=01a3 Rev=05.04 S: Manufacturer=Fibocom Wireless Inc. S: Product=Fibocom FM101-GL Module S: SerialNumber=5ccd5cd4 C: #Ifs= 3 Cfg#= 1 Atr=a0 MxPwr=896mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=40 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms Signed-off-by: Puliang Lu Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 72390dbf0769..2ae124c49d44 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2269,6 +2269,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0111, 0xff) }, /* Fibocom FM160 (MBIM mode) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a3, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff), /* Fibocom FM101-GL (laptop MBIM) */ .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */ From 7cdd2108903a4e369eb37579830afc12a6877ec2 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 24 Jan 2024 12:26:57 +0100 Subject: [PATCH 589/768] HID: bpf: remove double fdget() When the kfunc hid_bpf_attach_prog() is called, we called twice fdget(): one for fetching the type of the bpf program, and one for actually attaching the program to the device. The problem is that between those two calls, we have no guarantees that the prog_fd is still the same file descriptor for the given program. Solve this by calling bpf_prog_get() earlier, and use this to fetch the program type. Reported-by: Dan Carpenter Link: https://lore.kernel.org/bpf/CAO-hwJJ8vh8JD3-P43L-_CLNmPx0hWj44aom0O838vfP4=_1CA@mail.gmail.com/T/#t Cc: Fixes: f5c27da4e3c8 ("HID: initial BPF implementation") Link: https://lore.kernel.org/r/20240124-b4-hid-bpf-fixes-v2-1-052520b1e5e6@kernel.org Signed-off-by: Benjamin Tissoires --- drivers/hid/bpf/hid_bpf_dispatch.c | 66 +++++++++++++++++++---------- drivers/hid/bpf/hid_bpf_dispatch.h | 4 +- drivers/hid/bpf/hid_bpf_jmp_table.c | 20 ++------- 3 files changed, 49 insertions(+), 41 deletions(-) diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index d9ef45fcaeab..5111d1fef0d3 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -241,6 +241,39 @@ int hid_bpf_reconnect(struct hid_device *hdev) return 0; } +static int do_hid_bpf_attach_prog(struct hid_device *hdev, int prog_fd, struct bpf_prog *prog, + __u32 flags) +{ + int fd, err, prog_type; + + prog_type = hid_bpf_get_prog_attach_type(prog); + if (prog_type < 0) + return prog_type; + + if (prog_type >= HID_BPF_PROG_TYPE_MAX) + return -EINVAL; + + if (prog_type == HID_BPF_PROG_TYPE_DEVICE_EVENT) { + err = hid_bpf_allocate_event_data(hdev); + if (err) + return err; + } + + fd = __hid_bpf_attach_prog(hdev, prog_type, prog_fd, prog, flags); + if (fd < 0) + return fd; + + if (prog_type == HID_BPF_PROG_TYPE_RDESC_FIXUP) { + err = hid_bpf_reconnect(hdev); + if (err) { + close_fd(fd); + return err; + } + } + + return fd; +} + /** * hid_bpf_attach_prog - Attach the given @prog_fd to the given HID device * @@ -257,18 +290,13 @@ noinline int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) { struct hid_device *hdev; + struct bpf_prog *prog; struct device *dev; - int fd, err, prog_type = hid_bpf_get_prog_attach_type(prog_fd); + int fd; if (!hid_bpf_ops) return -EINVAL; - if (prog_type < 0) - return prog_type; - - if (prog_type >= HID_BPF_PROG_TYPE_MAX) - return -EINVAL; - if ((flags & ~HID_BPF_FLAG_MASK)) return -EINVAL; @@ -278,23 +306,17 @@ hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) hdev = to_hid_device(dev); - if (prog_type == HID_BPF_PROG_TYPE_DEVICE_EVENT) { - err = hid_bpf_allocate_event_data(hdev); - if (err) - return err; - } + /* + * take a ref on the prog itself, it will be released + * on errors or when it'll be detached + */ + prog = bpf_prog_get(prog_fd); + if (IS_ERR(prog)) + return PTR_ERR(prog); - fd = __hid_bpf_attach_prog(hdev, prog_type, prog_fd, flags); + fd = do_hid_bpf_attach_prog(hdev, prog_fd, prog, flags); if (fd < 0) - return fd; - - if (prog_type == HID_BPF_PROG_TYPE_RDESC_FIXUP) { - err = hid_bpf_reconnect(hdev); - if (err) { - close_fd(fd); - return err; - } - } + bpf_prog_put(prog); return fd; } diff --git a/drivers/hid/bpf/hid_bpf_dispatch.h b/drivers/hid/bpf/hid_bpf_dispatch.h index 63dfc8605cd2..fbe0639d09f2 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.h +++ b/drivers/hid/bpf/hid_bpf_dispatch.h @@ -12,9 +12,9 @@ struct hid_bpf_ctx_kern { int hid_bpf_preload_skel(void); void hid_bpf_free_links_and_skel(void); -int hid_bpf_get_prog_attach_type(int prog_fd); +int hid_bpf_get_prog_attach_type(struct bpf_prog *prog); int __hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type, int prog_fd, - __u32 flags); + struct bpf_prog *prog, __u32 flags); void __hid_bpf_destroy_device(struct hid_device *hdev); int hid_bpf_prog_run(struct hid_device *hdev, enum hid_bpf_prog_type type, struct hid_bpf_ctx_kern *ctx_kern); diff --git a/drivers/hid/bpf/hid_bpf_jmp_table.c b/drivers/hid/bpf/hid_bpf_jmp_table.c index eca34b7372f9..12f7cebddd73 100644 --- a/drivers/hid/bpf/hid_bpf_jmp_table.c +++ b/drivers/hid/bpf/hid_bpf_jmp_table.c @@ -333,15 +333,10 @@ static int hid_bpf_insert_prog(int prog_fd, struct bpf_prog *prog) return err; } -int hid_bpf_get_prog_attach_type(int prog_fd) +int hid_bpf_get_prog_attach_type(struct bpf_prog *prog) { - struct bpf_prog *prog = NULL; - int i; int prog_type = HID_BPF_PROG_TYPE_UNDEF; - - prog = bpf_prog_get(prog_fd); - if (IS_ERR(prog)) - return PTR_ERR(prog); + int i; for (i = 0; i < HID_BPF_PROG_TYPE_MAX; i++) { if (hid_bpf_btf_ids[i] == prog->aux->attach_btf_id) { @@ -350,8 +345,6 @@ int hid_bpf_get_prog_attach_type(int prog_fd) } } - bpf_prog_put(prog); - return prog_type; } @@ -388,19 +381,13 @@ static const struct bpf_link_ops hid_bpf_link_lops = { /* called from syscall */ noinline int __hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type, - int prog_fd, __u32 flags) + int prog_fd, struct bpf_prog *prog, __u32 flags) { struct bpf_link_primer link_primer; struct hid_bpf_link *link; - struct bpf_prog *prog = NULL; struct hid_bpf_prog_entry *prog_entry; int cnt, err = -EINVAL, prog_table_idx = -1; - /* take a ref on the prog itself */ - prog = bpf_prog_get(prog_fd); - if (IS_ERR(prog)) - return PTR_ERR(prog); - mutex_lock(&hid_bpf_attach_lock); link = kzalloc(sizeof(*link), GFP_USER); @@ -467,7 +454,6 @@ __hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type, err_unlock: mutex_unlock(&hid_bpf_attach_lock); - bpf_prog_put(prog); kfree(link); return err; From 89be8aa5b0ecb3b729c7bcff64bb2af7921fec63 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 24 Jan 2024 12:26:58 +0100 Subject: [PATCH 590/768] HID: bpf: actually free hdev memory after attaching a HID-BPF program Turns out that I got my reference counts wrong and each successful bus_find_device() actually calls get_device(), and we need to manually call put_device(). Ensure each bus_find_device() gets a matching put_device() when releasing the bpf programs and fix all the error paths. Cc: Fixes: f5c27da4e3c8 ("HID: initial BPF implementation") Link: https://lore.kernel.org/r/20240124-b4-hid-bpf-fixes-v2-2-052520b1e5e6@kernel.org Signed-off-by: Benjamin Tissoires --- drivers/hid/bpf/hid_bpf_dispatch.c | 29 +++++++++++++++++++++++------ drivers/hid/bpf/hid_bpf_jmp_table.c | 20 +++++++++++++++++--- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index 5111d1fef0d3..7903c8638e81 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -292,7 +292,7 @@ hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) struct hid_device *hdev; struct bpf_prog *prog; struct device *dev; - int fd; + int err, fd; if (!hid_bpf_ops) return -EINVAL; @@ -311,14 +311,24 @@ hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) * on errors or when it'll be detached */ prog = bpf_prog_get(prog_fd); - if (IS_ERR(prog)) - return PTR_ERR(prog); + if (IS_ERR(prog)) { + err = PTR_ERR(prog); + goto out_dev_put; + } fd = do_hid_bpf_attach_prog(hdev, prog_fd, prog, flags); - if (fd < 0) - bpf_prog_put(prog); + if (fd < 0) { + err = fd; + goto out_prog_put; + } return fd; + + out_prog_put: + bpf_prog_put(prog); + out_dev_put: + put_device(dev); + return err; } /** @@ -345,8 +355,10 @@ hid_bpf_allocate_context(unsigned int hid_id) hdev = to_hid_device(dev); ctx_kern = kzalloc(sizeof(*ctx_kern), GFP_KERNEL); - if (!ctx_kern) + if (!ctx_kern) { + put_device(dev); return NULL; + } ctx_kern->ctx.hid = hdev; @@ -363,10 +375,15 @@ noinline void hid_bpf_release_context(struct hid_bpf_ctx *ctx) { struct hid_bpf_ctx_kern *ctx_kern; + struct hid_device *hid; ctx_kern = container_of(ctx, struct hid_bpf_ctx_kern, ctx); + hid = (struct hid_device *)ctx_kern->ctx.hid; /* ignore const */ kfree(ctx_kern); + + /* get_device() is called by bus_find_device() */ + put_device(&hid->dev); } /** diff --git a/drivers/hid/bpf/hid_bpf_jmp_table.c b/drivers/hid/bpf/hid_bpf_jmp_table.c index 12f7cebddd73..aa8e1c79cdf5 100644 --- a/drivers/hid/bpf/hid_bpf_jmp_table.c +++ b/drivers/hid/bpf/hid_bpf_jmp_table.c @@ -196,6 +196,7 @@ static void __hid_bpf_do_release_prog(int map_fd, unsigned int idx) static void hid_bpf_release_progs(struct work_struct *work) { int i, j, n, map_fd = -1; + bool hdev_destroyed; if (!jmp_table.map) return; @@ -220,6 +221,12 @@ static void hid_bpf_release_progs(struct work_struct *work) if (entry->hdev) { hdev = entry->hdev; type = entry->type; + /* + * hdev is still valid, even if we are called after hid_destroy_device(): + * when hid_bpf_attach() gets called, it takes a ref on the dev through + * bus_find_device() + */ + hdev_destroyed = hdev->bpf.destroyed; hid_bpf_populate_hdev(hdev, type); @@ -232,12 +239,19 @@ static void hid_bpf_release_progs(struct work_struct *work) if (test_bit(next->idx, jmp_table.enabled)) continue; - if (next->hdev == hdev && next->type == type) + if (next->hdev == hdev && next->type == type) { + /* + * clear the hdev reference and decrement the device ref + * that was taken during bus_find_device() while calling + * hid_bpf_attach() + */ next->hdev = NULL; + put_device(&hdev->dev); + } } - /* if type was rdesc fixup, reconnect device */ - if (type == HID_BPF_PROG_TYPE_RDESC_FIXUP) + /* if type was rdesc fixup and the device is not gone, reconnect device */ + if (type == HID_BPF_PROG_TYPE_RDESC_FIXUP && !hdev_destroyed) hid_bpf_reconnect(hdev); } } From 764ad6b02777d77dca3659ca490f0898aa593670 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 24 Jan 2024 12:26:59 +0100 Subject: [PATCH 591/768] HID: bpf: use __bpf_kfunc instead of noinline Follow the docs at Documentation/bpf/kfuncs.rst: - declare the function with `__bpf_kfunc` - disables missing prototype warnings, which allows to remove them from include/linux/hid-bpf.h Removing the prototypes is not an issue because we currently have to redeclare them when writing the BPF program. They will eventually be generated by bpftool directly AFAIU. Link: https://lore.kernel.org/r/20240124-b4-hid-bpf-fixes-v2-3-052520b1e5e6@kernel.org Signed-off-by: Benjamin Tissoires --- drivers/hid/bpf/hid_bpf_dispatch.c | 18 +++++++++++++----- include/linux/hid_bpf.h | 11 ----------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index 7903c8638e81..470ae2c29c94 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -143,6 +143,9 @@ u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *s } EXPORT_SYMBOL_GPL(call_hid_bpf_rdesc_fixup); +/* Disables missing prototype warnings */ +__bpf_kfunc_start_defs(); + /** * hid_bpf_get_data - Get the kernel memory pointer associated with the context @ctx * @@ -152,7 +155,7 @@ EXPORT_SYMBOL_GPL(call_hid_bpf_rdesc_fixup); * * @returns %NULL on error, an %__u8 memory pointer on success */ -noinline __u8 * +__bpf_kfunc __u8 * hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t rdwr_buf_size) { struct hid_bpf_ctx_kern *ctx_kern; @@ -167,6 +170,7 @@ hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t rdwr return ctx_kern->data + offset; } +__bpf_kfunc_end_defs(); /* * The following set contains all functions we agree BPF programs @@ -274,6 +278,9 @@ static int do_hid_bpf_attach_prog(struct hid_device *hdev, int prog_fd, struct b return fd; } +/* Disables missing prototype warnings */ +__bpf_kfunc_start_defs(); + /** * hid_bpf_attach_prog - Attach the given @prog_fd to the given HID device * @@ -286,7 +293,7 @@ static int do_hid_bpf_attach_prog(struct hid_device *hdev, int prog_fd, struct b * is pinned to the BPF file system). */ /* called from syscall */ -noinline int +__bpf_kfunc int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) { struct hid_device *hdev; @@ -338,7 +345,7 @@ hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) * * @returns A pointer to &struct hid_bpf_ctx on success, %NULL on error. */ -noinline struct hid_bpf_ctx * +__bpf_kfunc struct hid_bpf_ctx * hid_bpf_allocate_context(unsigned int hid_id) { struct hid_device *hdev; @@ -371,7 +378,7 @@ hid_bpf_allocate_context(unsigned int hid_id) * @ctx: the HID-BPF context to release * */ -noinline void +__bpf_kfunc void hid_bpf_release_context(struct hid_bpf_ctx *ctx) { struct hid_bpf_ctx_kern *ctx_kern; @@ -397,7 +404,7 @@ hid_bpf_release_context(struct hid_bpf_ctx *ctx) * * @returns %0 on success, a negative error code otherwise. */ -noinline int +__bpf_kfunc int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz, enum hid_report_type rtype, enum hid_class_request reqtype) { @@ -465,6 +472,7 @@ hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz, kfree(dma_data); return ret; } +__bpf_kfunc_end_defs(); /* our HID-BPF entrypoints */ BTF_SET8_START(hid_bpf_fmodret_ids) diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 840cd254172d..7118ac28d468 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -77,17 +77,6 @@ enum hid_bpf_attach_flags { int hid_bpf_device_event(struct hid_bpf_ctx *ctx); int hid_bpf_rdesc_fixup(struct hid_bpf_ctx *ctx); -/* Following functions are kfunc that we export to BPF programs */ -/* available everywhere in HID-BPF */ -__u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t __sz); - -/* only available in syscall */ -int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags); -int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz, - enum hid_report_type rtype, enum hid_class_request reqtype); -struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id); -void hid_bpf_release_context(struct hid_bpf_ctx *ctx); - /* * Below is HID internal */ From 51af8f255bdaca6d501afc0d085b808f67b44d91 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 30 Jan 2024 15:21:51 +0200 Subject: [PATCH 592/768] ahci: Extend ASM1061 43-bit DMA address quirk to other ASM106x parts ASMedia have confirmed that all ASM106x parts currently listed in ahci_pci_tbl[] suffer from the 43-bit DMA address limitation that we ran into on the ASM1061, and therefore, we need to apply the quirk added by commit 20730e9b2778 ("ahci: add 43-bit DMA address quirk for ASMedia ASM1061 controllers") to the other supported ASM106x parts as well. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/linux-ide/ZbopwKZJAKQRA4Xv@x1-carbon/ Signed-off-by: Lennert Buytenhek [cassel: add link to ASMedia confirmation email] Signed-off-by: Niklas Cassel --- drivers/ata/ahci.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index d2460fa985b7..da2e74fce2d9 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -606,13 +606,13 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(PROMISE, 0x3781), board_ahci }, /* FastTrak TX8660 ahci-mode */ /* ASMedia */ - { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci }, /* ASM1060 */ - { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci }, /* ASM1060 */ + { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci_43bit_dma }, /* ASM1060 */ + { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci_43bit_dma }, /* ASM1060 */ { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci_43bit_dma }, /* ASM1061 */ { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci_43bit_dma }, /* ASM1061/1062 */ - { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci }, /* ASM1061R */ - { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci }, /* ASM1062R */ - { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci }, /* ASM1062+JMB575 */ + { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci_43bit_dma }, /* ASM1061R */ + { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci_43bit_dma }, /* ASM1062R */ + { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci_43bit_dma }, /* ASM1062+JMB575 */ { PCI_VDEVICE(ASMEDIA, 0x1062), board_ahci }, /* ASM1062A */ { PCI_VDEVICE(ASMEDIA, 0x1064), board_ahci }, /* ASM1064 */ { PCI_VDEVICE(ASMEDIA, 0x1164), board_ahci }, /* ASM1164 */ From 913b9d443a0180cf0de3548f1ab3149378998486 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 31 Jan 2024 13:37:25 +0100 Subject: [PATCH 593/768] parisc: BTLB: Fix crash when setting up BTLB at CPU bringup When using hotplug and bringing up a 32-bit CPU, ask the firmware about the BTLB information to set up the static (block) TLB entries. For that write access to the static btlb_info struct is needed, but since it is marked __ro_after_init the kernel segfaults with missing write permissions. Fix the crash by dropping the __ro_after_init annotation. Fixes: e5ef93d02d6c ("parisc: BTLB: Initialize BTLB tables at CPU startup") Signed-off-by: Helge Deller Cc: # v6.6+ --- arch/parisc/kernel/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 5552602fcaef..422f3e1e6d9c 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -58,7 +58,7 @@ int pa_serialize_tlb_flushes __ro_after_init; struct pdc_cache_info cache_info __ro_after_init; #ifndef CONFIG_PA20 -struct pdc_btlb_info btlb_info __ro_after_init; +struct pdc_btlb_info btlb_info; #endif DEFINE_STATIC_KEY_TRUE(parisc_has_cache); From 2468e8922d2f6da81a6192b73023eff67e3fefdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Relvas?= Date: Wed, 31 Jan 2024 11:34:09 +0000 Subject: [PATCH 594/768] ALSA: hda/realtek: Apply headset jack quirk for non-bass alc287 thinkpads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There currently exists two thinkpad headset jack fixups: ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK ALC285_FIXUP_THINKPAD_HEADSET_JACK The latter is applied to alc285 and alc287 thinkpads which contain bass speakers. However, the former was only being applied to alc285 thinkpads, leaving non-bass alc287 thinkpads with no headset button controls. This patch fixes that by adding ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK to the alc287 chains, allowing the detection of headset buttons. Signed-off-by: José Relvas Cc: Link: https://lore.kernel.org/r/20240131113407.34698-3-josemonsantorelvas@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bfa244028b99..f98520c0bed9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9579,7 +9579,7 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = cs35l41_fixup_i2c_two, .chained = true, - .chain_id = ALC269_FIXUP_THINKPAD_ACPI, + .chain_id = ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK, }, [ALC287_FIXUP_TAS2781_I2C] = { .type = HDA_FIXUP_FUNC, @@ -9606,6 +9606,8 @@ static const struct hda_fixup alc269_fixups[] = { [ALC287_FIXUP_THINKPAD_I2S_SPK] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_bind_dacs, + .chained = true, + .chain_id = ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK, }, [ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD] = { .type = HDA_FIXUP_FUNC, From 89876175c8c83c35cf0cc8e21b7460dfed7b118a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 20 Jan 2024 17:32:55 +0900 Subject: [PATCH 595/768] kbuild: fix W= flags in the help message W=c and W=e are supported. Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9869f57c3fb3..02e3500c2c17 100644 --- a/Makefile +++ b/Makefile @@ -1662,7 +1662,7 @@ help: @echo ' (sparse by default)' @echo ' make C=2 [targets] Force check of all c source with $$CHECK' @echo ' make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections' - @echo ' make W=n [targets] Enable extra build checks, n=1,2,3 where' + @echo ' make W=n [targets] Enable extra build checks, n=1,2,3,c,e where' @echo ' 1: warnings which may be relevant and do not occur too often' @echo ' 2: warnings which occur quite often but may still be relevant' @echo ' 3: more obscure warnings, can most likely be ignored' From cda5f94e88b45c9209599bac15fc44add5a59f60 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 27 Jan 2024 22:28:11 +0900 Subject: [PATCH 596/768] modpost: avoid using the alias attribute Aiden Leong reported modpost fails to build on macOS since commit 16a473f60edc ("modpost: inform compilers that fatal() never returns"): scripts/mod/modpost.c:93:21: error: aliases are not supported on darwin Nathan's research indicates that Darwin seems to support weak aliases at least [1]. Although the situation might be improved in future Clang versions, we can achieve a similar outcome without relying on it. This commit makes fatal() a macro of error() + exit(1) in modpost.h, as compilers recognize that exit() never returns. [1]: https://github.com/llvm/llvm-project/issues/71001 Fixes: 16a473f60edc ("modpost: inform compilers that fatal() never returns") Reported-by: Aiden Leong Closes: https://lore.kernel.org/all/d9ac2960-6644-4a87-b5e4-4bfb6e0364a8@aibsd.com/ Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 12 +----------- scripts/mod/modpost.h | 6 +----- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index acf72112acd8..267b9a0a3abc 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -70,9 +70,7 @@ void modpost_log(enum loglevel loglevel, const char *fmt, ...) break; case LOG_ERROR: fprintf(stderr, "ERROR: "); - break; - case LOG_FATAL: - fprintf(stderr, "FATAL: "); + error_occurred = true; break; default: /* invalid loglevel, ignore */ break; @@ -83,16 +81,8 @@ void modpost_log(enum loglevel loglevel, const char *fmt, ...) va_start(arglist, fmt); vfprintf(stderr, fmt, arglist); va_end(arglist); - - if (loglevel == LOG_FATAL) - exit(1); - if (loglevel == LOG_ERROR) - error_occurred = true; } -void __attribute__((alias("modpost_log"))) -modpost_log_noret(enum loglevel loglevel, const char *fmt, ...); - static inline bool strends(const char *str, const char *postfix) { if (strlen(str) < strlen(postfix)) diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h index 835cababf1b0..ee43c7950636 100644 --- a/scripts/mod/modpost.h +++ b/scripts/mod/modpost.h @@ -194,15 +194,11 @@ void *sym_get_data(const struct elf_info *info, const Elf_Sym *sym); enum loglevel { LOG_WARN, LOG_ERROR, - LOG_FATAL }; void __attribute__((format(printf, 2, 3))) modpost_log(enum loglevel loglevel, const char *fmt, ...); -void __attribute__((format(printf, 2, 3), noreturn)) -modpost_log_noret(enum loglevel loglevel, const char *fmt, ...); - /* * warn - show the given message, then let modpost continue running, still * allowing modpost to exit successfully. This should be used when @@ -218,4 +214,4 @@ modpost_log_noret(enum loglevel loglevel, const char *fmt, ...); */ #define warn(fmt, args...) modpost_log(LOG_WARN, fmt, ##args) #define error(fmt, args...) modpost_log(LOG_ERROR, fmt, ##args) -#define fatal(fmt, args...) modpost_log_noret(LOG_FATAL, fmt, ##args) +#define fatal(fmt, args...) do { error(fmt, ##args); exit(1); } while (1) From 82175d1f9430d5a026e2231782d13da0bf57155c Mon Sep 17 00:00:00 2001 From: Dmitry Goncharov Date: Sun, 28 Jan 2024 10:10:39 -0500 Subject: [PATCH 597/768] kbuild: Replace tabs with spaces when followed by conditionals This is needed for the future (post make-4.4.1) versions of gnu make. Starting from https://git.savannah.gnu.org/cgit/make.git/commit/?id=07fcee35f058a876447c8a021f9eb1943f902534 gnu make won't allow conditionals to follow recipe prefix. For example there is a tab followed by ifeq on line 324 in the root Makefile. With the new make this conditional causes the following $ make cpu.o /home/dgoncharov/src/linux-kbuild/Makefile:2063: *** missing 'endif'. Stop. make: *** [Makefile:240: __sub-make] Error 2 This patch replaces tabs followed by conditionals with 8 spaces. See https://savannah.gnu.org/bugs/?64185 and https://savannah.gnu.org/bugs/?64259 for details. Signed-off-by: Dmitry Goncharov Reported-by: Martin Dorey Reviewed-by: Miguel Ojeda Signed-off-by: Masahiro Yamada --- Makefile | 12 ++++++------ arch/m68k/Makefile | 4 ++-- arch/parisc/Makefile | 4 ++-- arch/x86/Makefile | 10 +++++----- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index 02e3500c2c17..087c9c64f611 100644 --- a/Makefile +++ b/Makefile @@ -294,15 +294,15 @@ may-sync-config := 1 single-build := ifneq ($(filter $(no-dot-config-targets), $(MAKECMDGOALS)),) - ifeq ($(filter-out $(no-dot-config-targets), $(MAKECMDGOALS)),) + ifeq ($(filter-out $(no-dot-config-targets), $(MAKECMDGOALS)),) need-config := - endif + endif endif ifneq ($(filter $(no-sync-config-targets), $(MAKECMDGOALS)),) - ifeq ($(filter-out $(no-sync-config-targets), $(MAKECMDGOALS)),) + ifeq ($(filter-out $(no-sync-config-targets), $(MAKECMDGOALS)),) may-sync-config := - endif + endif endif need-compiler := $(may-sync-config) @@ -323,9 +323,9 @@ endif # We cannot build single targets and the others at the same time ifneq ($(filter $(single-targets), $(MAKECMDGOALS)),) single-build := 1 - ifneq ($(filter-out $(single-targets), $(MAKECMDGOALS)),) + ifneq ($(filter-out $(single-targets), $(MAKECMDGOALS)),) mixed-build := 1 - endif + endif endif # For "make -j clean all", "make -j mrproper defconfig all", etc. diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile index 43e39040d3ac..76ef1a67c361 100644 --- a/arch/m68k/Makefile +++ b/arch/m68k/Makefile @@ -15,10 +15,10 @@ KBUILD_DEFCONFIG := multi_defconfig ifdef cross_compiling - ifeq ($(CROSS_COMPILE),) + ifeq ($(CROSS_COMPILE),) CROSS_COMPILE := $(call cc-cross-prefix, \ m68k-linux-gnu- m68k-linux- m68k-unknown-linux-gnu-) - endif + endif endif # diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 920db57b6b4c..7486b3b30594 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -50,12 +50,12 @@ export CROSS32CC # Set default cross compiler for kernel build ifdef cross_compiling - ifeq ($(CROSS_COMPILE),) + ifeq ($(CROSS_COMPILE),) CC_SUFFIXES = linux linux-gnu unknown-linux-gnu suse-linux CROSS_COMPILE := $(call cc-cross-prefix, \ $(foreach a,$(CC_ARCHES), \ $(foreach s,$(CC_SUFFIXES),$(a)-$(s)-))) - endif + endif endif ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1a068de12a56..2264db14a25d 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -112,13 +112,13 @@ ifeq ($(CONFIG_X86_32),y) # temporary until string.h is fixed KBUILD_CFLAGS += -ffreestanding - ifeq ($(CONFIG_STACKPROTECTOR),y) - ifeq ($(CONFIG_SMP),y) + ifeq ($(CONFIG_STACKPROTECTOR),y) + ifeq ($(CONFIG_SMP),y) KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard - else + else KBUILD_CFLAGS += -mstack-protector-guard=global - endif - endif + endif + endif else BITS := 64 UTS_MACHINE := x86_64 From 358de8b4f201bc05712484b15f0109b1ae3516a8 Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Mon, 29 Jan 2024 10:28:19 +0100 Subject: [PATCH 598/768] kbuild: rpm-pkg: simplify installkernel %post The new installkernel application that is now included in systemd-udev package allows installation although destination files are already present in the boot directory of the kernel package, but is failing with the implemented workaround for the old installkernel application from grubby package. For the new installkernel application, as Davide says: <> But we need to keep the old behavior as well, because the old installkernel application from grubby package, does not allow this simplification and we need to be backward compatible to avoid issues with the different packages. Mimic Fedora shipping process and store vmlinuz, config amd System.map in the module directory instead of the boot directory. In this way, we will avoid the commented problem for all the cases, because the new destination files are not going to exist in the boot directory of the kernel package. Replace installkernel tool with kernel-install tool, because the latter is more complete. Besides, after installkernel tool execution, check to complete if the correct package files vmlinuz, System.map and config files are present in /boot directory, and if necessary, copy manually for install operation. In this way, take into account if files were not previously copied from /usr/lib/kernel/install.d/* scripts and if the suitable files for the requested package are present (it could be others if the rpm files were replace with a new pacakge with the same release and a different build). Tested with Fedora 38, Fedora 39, RHEL 9, Oracle Linux 9.3, openSUSE Tumbleweed and openMandrive ROME, using dnf/zypper and rpm tools. cc: stable@vger.kernel.org Co-Developed-by: Davide Cavalca Signed-off-by: Jose Ignacio Tornos Martinez Signed-off-by: Masahiro Yamada --- scripts/package/kernel.spec | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec index 89298983a169..f58726671fb3 100644 --- a/scripts/package/kernel.spec +++ b/scripts/package/kernel.spec @@ -55,12 +55,12 @@ patch -p1 < %{SOURCE2} %{make} %{makeflags} KERNELRELEASE=%{KERNELRELEASE} KBUILD_BUILD_VERSION=%{release} %install -mkdir -p %{buildroot}/boot -cp $(%{make} %{makeflags} -s image_name) %{buildroot}/boot/vmlinuz-%{KERNELRELEASE} +mkdir -p %{buildroot}/lib/modules/%{KERNELRELEASE} +cp $(%{make} %{makeflags} -s image_name) %{buildroot}/lib/modules/%{KERNELRELEASE}/vmlinuz %{make} %{makeflags} INSTALL_MOD_PATH=%{buildroot} modules_install %{make} %{makeflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install -cp System.map %{buildroot}/boot/System.map-%{KERNELRELEASE} -cp .config %{buildroot}/boot/config-%{KERNELRELEASE} +cp System.map %{buildroot}/lib/modules/%{KERNELRELEASE} +cp .config %{buildroot}/lib/modules/%{KERNELRELEASE}/config ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEASE}/build %if %{with_devel} %{make} %{makeflags} run-command KBUILD_RUN_COMMAND='${srctree}/scripts/package/install-extmod-build %{buildroot}/usr/src/kernels/%{KERNELRELEASE}' @@ -70,13 +70,14 @@ ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEA rm -rf %{buildroot} %post -if [ -x /sbin/installkernel -a -r /boot/vmlinuz-%{KERNELRELEASE} -a -r /boot/System.map-%{KERNELRELEASE} ]; then -cp /boot/vmlinuz-%{KERNELRELEASE} /boot/.vmlinuz-%{KERNELRELEASE}-rpm -cp /boot/System.map-%{KERNELRELEASE} /boot/.System.map-%{KERNELRELEASE}-rpm -rm -f /boot/vmlinuz-%{KERNELRELEASE} /boot/System.map-%{KERNELRELEASE} -/sbin/installkernel %{KERNELRELEASE} /boot/.vmlinuz-%{KERNELRELEASE}-rpm /boot/.System.map-%{KERNELRELEASE}-rpm -rm -f /boot/.vmlinuz-%{KERNELRELEASE}-rpm /boot/.System.map-%{KERNELRELEASE}-rpm +if [ -x /usr/bin/kernel-install ]; then + /usr/bin/kernel-install add %{KERNELRELEASE} /lib/modules/%{KERNELRELEASE}/vmlinuz fi +for file in vmlinuz System.map config; do + if ! cmp --silent "/lib/modules/%{KERNELRELEASE}/${file}" "/boot/${file}-%{KERNELRELEASE}"; then + cp "/lib/modules/%{KERNELRELEASE}/${file}" "/boot/${file}-%{KERNELRELEASE}" + fi +done %preun if [ -x /sbin/new-kernel-pkg ]; then @@ -94,7 +95,6 @@ fi %defattr (-, root, root) /lib/modules/%{KERNELRELEASE} %exclude /lib/modules/%{KERNELRELEASE}/build -/boot/* %files headers %defattr (-, root, root) From 1c1914d6e8c6edbf5b45047419ff51abdb1dce96 Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Wed, 17 Jan 2024 18:11:40 +0000 Subject: [PATCH 599/768] dma-buf: heaps: Don't track CMA dma-buf pages under RssFile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DMA buffers allocated from the CMA dma-buf heap get counted under RssFile for processes that map them and trigger page faults. In addition to the incorrect accounting reported to userspace, reclaim behavior was influenced by the MM_FILEPAGES counter until linux 6.8, but this memory is not reclaimable. [1] Change the CMA dma-buf heap to set VM_PFNMAP on the VMA so MM does not poke at the memory managed by this dma-buf heap, and use vmf_insert_pfn to correct the RSS accounting. The system dma-buf heap does not suffer from this issue since remap_pfn_range is used during the mmap of the buffer, which also sets VM_PFNMAP on the VMA. [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/mm/vmscan.c?id=fb46e22a9e3863e08aef8815df9f17d0f4b9aede Fixes: b61614ec318a ("dma-buf: heaps: Add CMA heap to dmabuf heaps") Signed-off-by: T.J. Mercier Acked-by: Christian König Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20240117181141.286383-1-tjmercier@google.com --- drivers/dma-buf/heaps/cma_heap.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c index ee899f8e6721..4a63567e93ba 100644 --- a/drivers/dma-buf/heaps/cma_heap.c +++ b/drivers/dma-buf/heaps/cma_heap.c @@ -168,10 +168,7 @@ static vm_fault_t cma_heap_vm_fault(struct vm_fault *vmf) if (vmf->pgoff > buffer->pagecount) return VM_FAULT_SIGBUS; - vmf->page = buffer->pages[vmf->pgoff]; - get_page(vmf->page); - - return 0; + return vmf_insert_pfn(vma, vmf->address, page_to_pfn(buffer->pages[vmf->pgoff])); } static const struct vm_operations_struct dma_heap_vm_ops = { @@ -185,6 +182,8 @@ static int cma_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0) return -EINVAL; + vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); + vma->vm_ops = &dma_heap_vm_ops; vma->vm_private_data = buffer; From bfef491df67022c56aab3b831044f8d259f9441f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 26 Jan 2024 22:30:10 +0900 Subject: [PATCH 600/768] kconfig: initialize sym->curr.tri to 'no' for all symbol types again Geert Uytterhoeven reported that commit 4e244c10eab3 ("kconfig: remove unneeded symbol_empty variable") changed the default value of CONFIG_LOG_CPU_MAX_BUF_SHIFT from 12 to 0. As it turned out, this is an undefined behavior because sym_calc_value() stopped setting the sym->curr.tri field for 'int', 'hex', and 'string' symbols. This commit restores the original behavior, where 'int', 'hex', 'string' symbols are interpreted as false if used in boolean contexts. CONFIG_LOG_CPU_MAX_BUF_SHIFT will default to 12 again, irrespective of CONFIG_BASE_SMALL. Presumably, this is not the intended behavior, as already reported [1], but this is another issue that should be addressed by a separate patch. [1]: https://lore.kernel.org/all/f6856be8-54b7-0fa0-1d17-39632bf29ada@oracle.com/ Fixes: 4e244c10eab3 ("kconfig: remove unneeded symbol_empty variable") Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/all/CAMuHMdWm6u1wX7efZQf=2XUAHascps76YQac6rdnQGhc8nop_Q@mail.gmail.com/ Tested-by: Geert Uytterhoeven Signed-off-by: Masahiro Yamada --- scripts/kconfig/symbol.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index 3e808528aaea..e9e9fb8d8674 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -345,6 +345,8 @@ void sym_calc_value(struct symbol *sym) oldval = sym->curr; + newval.tri = no; + switch (sym->type) { case S_INT: newval.val = "0"; @@ -357,7 +359,7 @@ void sym_calc_value(struct symbol *sym) break; case S_BOOLEAN: case S_TRISTATE: - newval = symbol_no.curr; + newval.val = "n"; break; default: sym->curr.val = sym->name; From 1e560864159d002b453da42bd2c13a1805515a20 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 30 Jan 2024 11:02:43 +0100 Subject: [PATCH 601/768] PCI/ASPM: Fix deadlock when enabling ASPM A last minute revert in 6.7-final introduced a potential deadlock when enabling ASPM during probe of Qualcomm PCIe controllers as reported by lockdep: ============================================ WARNING: possible recursive locking detected 6.7.0 #40 Not tainted -------------------------------------------- kworker/u16:5/90 is trying to acquire lock: ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pcie_aspm_pm_state_change+0x58/0xdc but task is already holding lock: ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pci_walk_bus+0x34/0xbc other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(pci_bus_sem); lock(pci_bus_sem); *** DEADLOCK *** Call trace: print_deadlock_bug+0x25c/0x348 __lock_acquire+0x10a4/0x2064 lock_acquire+0x1e8/0x318 down_read+0x60/0x184 pcie_aspm_pm_state_change+0x58/0xdc pci_set_full_power_state+0xa8/0x114 pci_set_power_state+0xc4/0x120 qcom_pcie_enable_aspm+0x1c/0x3c [pcie_qcom] pci_walk_bus+0x64/0xbc qcom_pcie_host_post_init_2_7_0+0x28/0x34 [pcie_qcom] The deadlock can easily be reproduced on machines like the Lenovo ThinkPad X13s by adding a delay to increase the race window during asynchronous probe where another thread can take a write lock. Add a new pci_set_power_state_locked() and associated helper functions that can be called with the PCI bus semaphore held to avoid taking the read lock twice. Link: https://lore.kernel.org/r/ZZu0qx2cmn7IwTyQ@hovoldconsulting.com Link: https://lore.kernel.org/r/20240130100243.11011-1-johan+linaro@kernel.org Fixes: f93e71aea6c6 ("Revert "PCI/ASPM: Remove pcie_aspm_pm_state_change()"") Signed-off-by: Johan Hovold Signed-off-by: Bjorn Helgaas Cc: # 6.7 --- drivers/pci/bus.c | 49 ++++++++++------ drivers/pci/controller/dwc/pcie-qcom.c | 2 +- drivers/pci/pci.c | 78 +++++++++++++++++--------- drivers/pci/pci.h | 4 +- drivers/pci/pcie/aspm.c | 13 +++-- include/linux/pci.h | 5 ++ 6 files changed, 101 insertions(+), 50 deletions(-) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 9c2137dae429..826b5016a101 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -386,21 +386,8 @@ void pci_bus_add_devices(const struct pci_bus *bus) } EXPORT_SYMBOL(pci_bus_add_devices); -/** pci_walk_bus - walk devices on/under bus, calling callback. - * @top bus whose devices should be walked - * @cb callback to be called for each device found - * @userdata arbitrary pointer to be passed to callback. - * - * Walk the given bus, including any bridged devices - * on buses under this bus. Call the provided callback - * on each device found. - * - * We check the return of @cb each time. If it returns anything - * other than 0, we break out. - * - */ -void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), - void *userdata) +static void __pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), + void *userdata, bool locked) { struct pci_dev *dev; struct pci_bus *bus; @@ -408,7 +395,8 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), int retval; bus = top; - down_read(&pci_bus_sem); + if (!locked) + down_read(&pci_bus_sem); next = top->devices.next; for (;;) { if (next == &bus->devices) { @@ -431,10 +419,37 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), if (retval) break; } - up_read(&pci_bus_sem); + if (!locked) + up_read(&pci_bus_sem); +} + +/** + * pci_walk_bus - walk devices on/under bus, calling callback. + * @top: bus whose devices should be walked + * @cb: callback to be called for each device found + * @userdata: arbitrary pointer to be passed to callback + * + * Walk the given bus, including any bridged devices + * on buses under this bus. Call the provided callback + * on each device found. + * + * We check the return of @cb each time. If it returns anything + * other than 0, we break out. + */ +void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata) +{ + __pci_walk_bus(top, cb, userdata, false); } EXPORT_SYMBOL_GPL(pci_walk_bus); +void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata) +{ + lockdep_assert_held(&pci_bus_sem); + + __pci_walk_bus(top, cb, userdata, true); +} +EXPORT_SYMBOL_GPL(pci_walk_bus_locked); + struct pci_bus *pci_bus_get(struct pci_bus *bus) { if (bus) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 10f2d0bb86be..2ce2a3bd932b 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -972,7 +972,7 @@ static int qcom_pcie_enable_aspm(struct pci_dev *pdev, void *userdata) * Downstream devices need to be in D0 state before enabling PCI PM * substates. */ - pci_set_power_state(pdev, PCI_D0); + pci_set_power_state_locked(pdev, PCI_D0); pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL); return 0; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index d8f11a078924..9ab9b1008d8b 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1354,6 +1354,7 @@ end: /** * pci_set_full_power_state - Put a PCI device into D0 and update its state * @dev: PCI device to power up + * @locked: whether pci_bus_sem is held * * Call pci_power_up() to put @dev into D0, read from its PCI_PM_CTRL register * to confirm the state change, restore its BARs if they might be lost and @@ -1363,7 +1364,7 @@ end: * to D0, it is more efficient to use pci_power_up() directly instead of this * function. */ -static int pci_set_full_power_state(struct pci_dev *dev) +static int pci_set_full_power_state(struct pci_dev *dev, bool locked) { u16 pmcsr; int ret; @@ -1399,7 +1400,7 @@ static int pci_set_full_power_state(struct pci_dev *dev) } if (dev->bus->self) - pcie_aspm_pm_state_change(dev->bus->self); + pcie_aspm_pm_state_change(dev->bus->self, locked); return 0; } @@ -1428,10 +1429,22 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state) pci_walk_bus(bus, __pci_dev_set_current_state, &state); } +static void __pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state, bool locked) +{ + if (!bus) + return; + + if (locked) + pci_walk_bus_locked(bus, __pci_dev_set_current_state, &state); + else + pci_walk_bus(bus, __pci_dev_set_current_state, &state); +} + /** * pci_set_low_power_state - Put a PCI device into a low-power state. * @dev: PCI device to handle. * @state: PCI power state (D1, D2, D3hot) to put the device into. + * @locked: whether pci_bus_sem is held * * Use the device's PCI_PM_CTRL register to put it into a low-power state. * @@ -1442,7 +1455,7 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state) * 0 if device already is in the requested state. * 0 if device's power state has been successfully changed. */ -static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state) +static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state, bool locked) { u16 pmcsr; @@ -1496,29 +1509,12 @@ static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state) pci_power_name(state)); if (dev->bus->self) - pcie_aspm_pm_state_change(dev->bus->self); + pcie_aspm_pm_state_change(dev->bus->self, locked); return 0; } -/** - * pci_set_power_state - Set the power state of a PCI device - * @dev: PCI device to handle. - * @state: PCI power state (D0, D1, D2, D3hot) to put the device into. - * - * Transition a device to a new power state, using the platform firmware and/or - * the device's PCI PM registers. - * - * RETURN VALUE: - * -EINVAL if the requested state is invalid. - * -EIO if device does not support PCI PM or its PM capabilities register has a - * wrong version, or device doesn't support the requested state. - * 0 if the transition is to D1 or D2 but D1 and D2 are not supported. - * 0 if device already is in the requested state. - * 0 if the transition is to D3 but D3 is not supported. - * 0 if device's power state has been successfully changed. - */ -int pci_set_power_state(struct pci_dev *dev, pci_power_t state) +static int __pci_set_power_state(struct pci_dev *dev, pci_power_t state, bool locked) { int error; @@ -1542,7 +1538,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) return 0; if (state == PCI_D0) - return pci_set_full_power_state(dev); + return pci_set_full_power_state(dev, locked); /* * This device is quirked not to be put into D3, so don't put it in @@ -1556,16 +1552,16 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) * To put the device in D3cold, put it into D3hot in the native * way, then put it into D3cold using platform ops. */ - error = pci_set_low_power_state(dev, PCI_D3hot); + error = pci_set_low_power_state(dev, PCI_D3hot, locked); if (pci_platform_power_transition(dev, PCI_D3cold)) return error; /* Powering off a bridge may power off the whole hierarchy */ if (dev->current_state == PCI_D3cold) - pci_bus_set_current_state(dev->subordinate, PCI_D3cold); + __pci_bus_set_current_state(dev->subordinate, PCI_D3cold, locked); } else { - error = pci_set_low_power_state(dev, state); + error = pci_set_low_power_state(dev, state, locked); if (pci_platform_power_transition(dev, state)) return error; @@ -1573,8 +1569,38 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) return 0; } + +/** + * pci_set_power_state - Set the power state of a PCI device + * @dev: PCI device to handle. + * @state: PCI power state (D0, D1, D2, D3hot) to put the device into. + * + * Transition a device to a new power state, using the platform firmware and/or + * the device's PCI PM registers. + * + * RETURN VALUE: + * -EINVAL if the requested state is invalid. + * -EIO if device does not support PCI PM or its PM capabilities register has a + * wrong version, or device doesn't support the requested state. + * 0 if the transition is to D1 or D2 but D1 and D2 are not supported. + * 0 if device already is in the requested state. + * 0 if the transition is to D3 but D3 is not supported. + * 0 if device's power state has been successfully changed. + */ +int pci_set_power_state(struct pci_dev *dev, pci_power_t state) +{ + return __pci_set_power_state(dev, state, false); +} EXPORT_SYMBOL(pci_set_power_state); +int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state) +{ + lockdep_assert_held(&pci_bus_sem); + + return __pci_set_power_state(dev, state, true); +} +EXPORT_SYMBOL(pci_set_power_state_locked); + #define PCI_EXP_SAVE_REGS 7 static struct pci_cap_saved_state *_pci_find_saved_cap(struct pci_dev *pci_dev, diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 2336a8d1edab..e9750b1b19ba 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -571,12 +571,12 @@ int pcie_retrain_link(struct pci_dev *pdev, bool use_lt); #ifdef CONFIG_PCIEASPM void pcie_aspm_init_link_state(struct pci_dev *pdev); void pcie_aspm_exit_link_state(struct pci_dev *pdev); -void pcie_aspm_pm_state_change(struct pci_dev *pdev); +void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked); void pcie_aspm_powersave_config_link(struct pci_dev *pdev); #else static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { } -static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev) { } +static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked) { } static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { } #endif diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 5a0066ecc3c5..bc0bd86695ec 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -1003,8 +1003,11 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev) up_read(&pci_bus_sem); } -/* @pdev: the root port or switch downstream port */ -void pcie_aspm_pm_state_change(struct pci_dev *pdev) +/* + * @pdev: the root port or switch downstream port + * @locked: whether pci_bus_sem is held + */ +void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked) { struct pcie_link_state *link = pdev->link_state; @@ -1014,12 +1017,14 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev) * Devices changed PM state, we should recheck if latency * meets all functions' requirement */ - down_read(&pci_bus_sem); + if (!locked) + down_read(&pci_bus_sem); mutex_lock(&aspm_lock); pcie_update_aspm_capable(link->root); pcie_config_aspm_path(link); mutex_unlock(&aspm_lock); - up_read(&pci_bus_sem); + if (!locked) + up_read(&pci_bus_sem); } void pcie_aspm_powersave_config_link(struct pci_dev *pdev) diff --git a/include/linux/pci.h b/include/linux/pci.h index add9368e6314..7ab0d13672da 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1422,6 +1422,7 @@ int pci_load_and_free_saved_state(struct pci_dev *dev, struct pci_saved_state **state); int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state); int pci_set_power_state(struct pci_dev *dev, pci_power_t state); +int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state); pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state); bool pci_pme_capable(struct pci_dev *dev, pci_power_t state); void pci_pme_active(struct pci_dev *dev, bool enable); @@ -1625,6 +1626,8 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata); +void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), + void *userdata); int pci_cfg_space_size(struct pci_dev *dev); unsigned char pci_bus_max_busnr(struct pci_bus *bus); void pci_setup_bridge(struct pci_bus *bus); @@ -2025,6 +2028,8 @@ static inline int pci_save_state(struct pci_dev *dev) { return 0; } static inline void pci_restore_state(struct pci_dev *dev) { } static inline int pci_set_power_state(struct pci_dev *dev, pci_power_t state) { return 0; } +static inline int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state) +{ return 0; } static inline int pci_wake_from_d3(struct pci_dev *dev, bool enable) { return 0; } static inline pci_power_t pci_choose_state(struct pci_dev *dev, From 925bd5e08106bd5bfbd1cb8e124c89a0b4003569 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 29 Jan 2024 17:59:33 +0100 Subject: [PATCH 602/768] MAINTAINERS: Add Manivannan Sadhasivam as PCI Endpoint maintainer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PCI endpoint subsystem is evolving at a rate I cannot keep up with, therefore I am standing down as a maintainer handing over to Manivannan (currently reviewer for this code) and Krzysztof who are doing an excellent job on the matter - they don't need my help any longer. Link: https://lore.kernel.org/r/20240129165933.33428-1-lpieralisi@kernel.org Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Acked-by: Manivannan Sadhasivam Cc: Krzysztof Wilczyński --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a69..a40cfcd1c65e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16856,9 +16856,8 @@ F: Documentation/devicetree/bindings/pci/xilinx-versal-cpm.yaml F: drivers/pci/controller/pcie-xilinx-cpm.c PCI ENDPOINT SUBSYSTEM -M: Lorenzo Pieralisi +M: Manivannan Sadhasivam M: Krzysztof Wilczyński -R: Manivannan Sadhasivam R: Kishon Vijay Abraham I L: linux-pci@vger.kernel.org S: Supported From 9189526c46f2ad14df25dbdc30443f79d03dc7bd Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Wed, 24 Jan 2024 23:50:31 +0100 Subject: [PATCH 603/768] MAINTAINERS: Update i2c host drivers repository The i2c host patches are now set to be merged into the following repository: git://git.kernel.org/pub/scm/linux/kernel/git/andi.shyti/linux.git Cc: Wolfram Sang Acked-by: Wolfram Sang Signed-off-by: Andi Shyti --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8999497011a2..179929cce68c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10091,7 +10091,7 @@ L: linux-i2c@vger.kernel.org S: Maintained W: https://i2c.wiki.kernel.org/ Q: https://patchwork.ozlabs.org/project/linux-i2c/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/andi.shyti/linux.git F: Documentation/devicetree/bindings/i2c/ F: drivers/i2c/algos/ F: drivers/i2c/busses/ From c9ec85153fea6873c52ed4f5055c87263f1b54f9 Mon Sep 17 00:00:00 2001 From: Matthias May Date: Tue, 30 Jan 2024 10:12:18 +0000 Subject: [PATCH 604/768] selftests: net: add missing config for GENEVE l2_tos_ttl_inherit.sh verifies the inheritance of tos and ttl for GRETAP, VXLAN and GENEVE. Before testing it checks if the required module is available and if not skips the tests accordingly. Currently only GRETAP and VXLAN are tested because the GENEVE module is missing. Fixes: b690842d12fd ("selftests/net: test l2 tunnel TOS/TTL inheriting") Signed-off-by: Matthias May Link: https://lore.kernel.org/r/20240130101157.196006-1-matthias.may@westermo.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 3d908b52f22f..77a173635a29 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -19,6 +19,7 @@ CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_BRIDGE=y CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_VLAN_8021Q=y +CONFIG_GENEVE=m CONFIG_IFB=y CONFIG_INET_DIAG=y CONFIG_IP_GRE=m From fdd0ae72b34e56eb5e896d067c49a78ecb451032 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 31 Jan 2024 11:24:40 -0300 Subject: [PATCH 605/768] perf tools headers: update the asm-generic/unaligned.h copy with the kernel sources To pick up the changes in: 1ab33c03145d0f6c ("asm-generic: make sparse happy with odd-sized put_unaligned_*()") Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/include/asm-generic/unaligned.h include/asm-generic/unaligned.h Cc: Adrian Hunter Cc: Dmitry Torokhov Cc: Ian Rogers Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/Zbp9I7rmFj1Owhug@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/asm-generic/unaligned.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/include/asm-generic/unaligned.h b/tools/include/asm-generic/unaligned.h index 2fd551915c20..cdd2fd078027 100644 --- a/tools/include/asm-generic/unaligned.h +++ b/tools/include/asm-generic/unaligned.h @@ -105,9 +105,9 @@ static inline u32 get_unaligned_le24(const void *p) static inline void __put_unaligned_be24(const u32 val, u8 *p) { - *p++ = val >> 16; - *p++ = val >> 8; - *p++ = val; + *p++ = (val >> 16) & 0xff; + *p++ = (val >> 8) & 0xff; + *p++ = val & 0xff; } static inline void put_unaligned_be24(const u32 val, void *p) @@ -117,9 +117,9 @@ static inline void put_unaligned_be24(const u32 val, void *p) static inline void __put_unaligned_le24(const u32 val, u8 *p) { - *p++ = val; - *p++ = val >> 8; - *p++ = val >> 16; + *p++ = val & 0xff; + *p++ = (val >> 8) & 0xff; + *p++ = (val >> 16) & 0xff; } static inline void put_unaligned_le24(const u32 val, void *p) @@ -129,12 +129,12 @@ static inline void put_unaligned_le24(const u32 val, void *p) static inline void __put_unaligned_be48(const u64 val, u8 *p) { - *p++ = val >> 40; - *p++ = val >> 32; - *p++ = val >> 24; - *p++ = val >> 16; - *p++ = val >> 8; - *p++ = val; + *p++ = (val >> 40) & 0xff; + *p++ = (val >> 32) & 0xff; + *p++ = (val >> 24) & 0xff; + *p++ = (val >> 16) & 0xff; + *p++ = (val >> 8) & 0xff; + *p++ = val & 0xff; } static inline void put_unaligned_be48(const u64 val, void *p) From 2b9c3eb32a699acdd4784d6b93743271b4970899 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Sat, 14 Oct 2023 12:20:15 +0200 Subject: [PATCH 606/768] Input: bcm5974 - check endpoint type before starting traffic syzbot has found a type mismatch between a USB pipe and the transfer endpoint, which is triggered by the bcm5974 driver[1]. This driver expects the device to provide input interrupt endpoints and if that is not the case, the driver registration should terminate. Repros are available to reproduce this issue with a certain setup for the dummy_hcd, leading to an interrupt/bulk mismatch which is caught in the USB core after calling usb_submit_urb() with the following message: "BOGUS urb xfer, pipe 1 != type 3" Some other device drivers (like the appletouch driver bcm5974 is mainly based on) provide some checking mechanism to make sure that an IN interrupt endpoint is available. In this particular case the endpoint addresses are provided by a config table, so the checking can be targeted to the provided endpoints. Add some basic checking to guarantee that the endpoints available match the expected type for both the trackpad and button endpoints. This issue was only found for the trackpad endpoint, but the checking has been added to the button endpoint as well for the same reasons. Given that there was never a check for the endpoint type, this bug has been there since the first implementation of the driver (f89bd95c5c94). [1] https://syzkaller.appspot.com/bug?extid=348331f63b034f89b622 Fixes: f89bd95c5c94 ("Input: bcm5974 - add driver for Macbook Air and Pro Penryn touchpads") Signed-off-by: Javier Carrasco Reported-and-tested-by: syzbot+348331f63b034f89b622@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20231007-topic-bcm5974_bulk-v3-1-d0f38b9d2935@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/bcm5974.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c index ca150618d32f..953992b458e9 100644 --- a/drivers/input/mouse/bcm5974.c +++ b/drivers/input/mouse/bcm5974.c @@ -19,6 +19,7 @@ * Copyright (C) 2006 Nicolas Boichat (nicolas@boichat.ch) */ +#include "linux/usb.h" #include #include #include @@ -193,6 +194,8 @@ enum tp_type { /* list of device capability bits */ #define HAS_INTEGRATED_BUTTON 1 +/* maximum number of supported endpoints (currently trackpad and button) */ +#define MAX_ENDPOINTS 2 /* trackpad finger data block size */ #define FSIZE_TYPE1 (14 * sizeof(__le16)) @@ -891,6 +894,18 @@ static int bcm5974_resume(struct usb_interface *iface) return error; } +static bool bcm5974_check_endpoints(struct usb_interface *iface, + const struct bcm5974_config *cfg) +{ + u8 ep_addr[MAX_ENDPOINTS + 1] = {0}; + + ep_addr[0] = cfg->tp_ep; + if (cfg->tp_type == TYPE1) + ep_addr[1] = cfg->bt_ep; + + return usb_check_int_endpoints(iface, ep_addr); +} + static int bcm5974_probe(struct usb_interface *iface, const struct usb_device_id *id) { @@ -903,6 +918,11 @@ static int bcm5974_probe(struct usb_interface *iface, /* find the product index */ cfg = bcm5974_get_config(udev); + if (!bcm5974_check_endpoints(iface, cfg)) { + dev_err(&iface->dev, "Unexpected non-int endpoint\n"); + return -ENODEV; + } + /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(struct bcm5974), GFP_KERNEL); input_dev = input_allocate_device(); From 66bbea9ed6446b8471d365a22734dc00556c4785 Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Wed, 31 Jan 2024 14:09:55 +0000 Subject: [PATCH 607/768] ring-buffer: Clean ring_buffer_poll_wait() error return The return type for ring_buffer_poll_wait() is __poll_t. This is behind the scenes an unsigned where we can set event bits. In case of a non-allocated CPU, we do return instead -EINVAL (0xffffffea). Lucky us, this ends up setting few error bits (EPOLLERR | EPOLLHUP | EPOLLNVAL), so user-space at least is aware something went wrong. Nonetheless, this is an incorrect code. Replace that -EINVAL with a proper EPOLLERR to clean that output. As this doesn't change the behaviour, there's no need to treat this change as a bug fix. Link: https://lore.kernel.org/linux-trace-kernel/20240131140955.3322792-1-vdonnefort@google.com Cc: stable@vger.kernel.org Fixes: 6721cb6002262 ("ring-buffer: Do not poll non allocated cpu buffers") Signed-off-by: Vincent Donnefort Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 13aaf5e85b81..fd4bfe3ecf01 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -944,7 +944,7 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, full = 0; } else { if (!cpumask_test_cpu(cpu, buffer->cpumask)) - return -EINVAL; + return EPOLLERR; cpu_buffer = buffer->buffers[cpu]; work = &cpu_buffer->irq_work; From d81786f53aec14fd4d56263145a0635afbc64617 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 31 Jan 2024 13:49:19 -0500 Subject: [PATCH 608/768] tracefs: Zero out the tracefs_inode when allocating it eventfs uses the tracefs_inode and assumes that it's already initialized to zero. That is, it doesn't set fields to zero (like ti->private) after getting its tracefs_inode. This causes bugs due to stale values. Just initialize the entire structure to zero on allocation so there isn't any more surprises. This is a partial fix to access to ti->private. The assignment still needs to be made before the dentry is instantiated. Link: https://lore.kernel.org/linux-trace-kernel/20240131185512.315825944@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Al Viro Cc: Ajay Kaher Cc: Greg Kroah-Hartman Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202401291043.e62e89dc-oliver.sang@intel.com Suggested-by: Linus Torvalds Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/inode.c | 6 ++++-- fs/tracefs/internal.h | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index e1b172c0e091..888e42087847 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -38,8 +38,6 @@ static struct inode *tracefs_alloc_inode(struct super_block *sb) if (!ti) return NULL; - ti->flags = 0; - return &ti->vfs_inode; } @@ -779,7 +777,11 @@ static void init_once(void *foo) { struct tracefs_inode *ti = (struct tracefs_inode *) foo; + /* inode_init_once() calls memset() on the vfs_inode portion */ inode_init_once(&ti->vfs_inode); + + /* Zero out the rest */ + memset_after(ti, 0, vfs_inode); } static int __init tracefs_init(void) diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 91c2bf0b91d9..7d84349ade87 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -11,9 +11,10 @@ enum { }; struct tracefs_inode { + struct inode vfs_inode; + /* The below gets initialized with memset_after(ti, 0, vfs_inode) */ unsigned long flags; void *private; - struct inode vfs_inode; }; /* From 4fa4b010b83fb2f837b5ef79e38072a79e96e4f1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 31 Jan 2024 13:49:20 -0500 Subject: [PATCH 609/768] eventfs: Initialize the tracefs inode properly The tracefs-specific fields in the inode were not initialized before the inode was exposed to others through the dentry with 'd_instantiate()'. Move the field initializations up to before the d_instantiate. Link: https://lore.kernel.org/linux-trace-kernel/20240131185512.478449628@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Al Viro Cc: Ajay Kaher Cc: Greg Kroah-Hartman Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202401291043.e62e89dc-oliver.sang@intel.com Signed-off-by: Linus Torvalds Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 1c3dd0ad4660..824b1811e342 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -370,6 +370,8 @@ static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent ti = get_tracefs(inode); ti->flags |= TRACEFS_EVENT_INODE; + /* Only directories have ti->private set to an ei, not files */ + ti->private = ei; inc_nlink(inode); d_instantiate(dentry, inode); @@ -515,7 +517,6 @@ create_file_dentry(struct eventfs_inode *ei, int idx, static void eventfs_post_create_dir(struct eventfs_inode *ei) { struct eventfs_inode *ei_child; - struct tracefs_inode *ti; lockdep_assert_held(&eventfs_mutex); @@ -525,9 +526,6 @@ static void eventfs_post_create_dir(struct eventfs_inode *ei) srcu_read_lock_held(&eventfs_srcu)) { ei_child->d_parent = ei->dentry; } - - ti = get_tracefs(ei->dentry->d_inode); - ti->private = ei; } /** From 99c001cb617df409dac275a059d6c3f187a2da7a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 31 Jan 2024 13:49:21 -0500 Subject: [PATCH 610/768] tracefs: Avoid using the ei->dentry pointer unnecessarily The eventfs_find_events() code tries to walk up the tree to find the event directory that a dentry belongs to, in order to then find the eventfs inode that is associated with that event directory. However, it uses an odd combination of walking the dentry parent, looking up the eventfs inode associated with that, and then looking up the dentry from there. Repeat. But the code shouldn't have back-pointers to dentries in the first place, and it should just walk the dentry parenthood chain directly. Similarly, 'set_top_events_ownership()' looks up the dentry from the eventfs inode, but the only reason it wants a dentry is to look up the superblock in order to look up the root dentry. But it already has the real filesystem inode, which has that same superblock pointer. So just pass in the superblock pointer using the information that's already there, instead of looking up extraneous data that is irrelevant. Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/ Link: https://lore.kernel.org/linux-trace-kernel/20240131185512.638645365@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Al Viro Cc: Ajay Kaher Cc: Greg Kroah-Hartman Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions") Signed-off-by: Linus Torvalds Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 824b1811e342..e9819d719d2a 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -156,33 +156,30 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, return ret; } -static void update_top_events_attr(struct eventfs_inode *ei, struct dentry *dentry) +static void update_top_events_attr(struct eventfs_inode *ei, struct super_block *sb) { - struct inode *inode; + struct inode *root; /* Only update if the "events" was on the top level */ if (!ei || !(ei->attr.mode & EVENTFS_TOPLEVEL)) return; /* Get the tracefs root inode. */ - inode = d_inode(dentry->d_sb->s_root); - ei->attr.uid = inode->i_uid; - ei->attr.gid = inode->i_gid; + root = d_inode(sb->s_root); + ei->attr.uid = root->i_uid; + ei->attr.gid = root->i_gid; } static void set_top_events_ownership(struct inode *inode) { struct tracefs_inode *ti = get_tracefs(inode); struct eventfs_inode *ei = ti->private; - struct dentry *dentry; /* The top events directory doesn't get automatically updated */ if (!ei || !ei->is_events || !(ei->attr.mode & EVENTFS_TOPLEVEL)) return; - dentry = ei->dentry; - - update_top_events_attr(ei, dentry); + update_top_events_attr(ei, inode->i_sb); if (!(ei->attr.mode & EVENTFS_SAVE_UID)) inode->i_uid = ei->attr.uid; @@ -235,8 +232,10 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) mutex_lock(&eventfs_mutex); do { - /* The parent always has an ei, except for events itself */ - ei = dentry->d_parent->d_fsdata; + // The parent is stable because we do not do renames + dentry = dentry->d_parent; + // ... and directories always have d_fsdata + ei = dentry->d_fsdata; /* * If the ei is being freed, the ownership of the children @@ -246,12 +245,11 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) ei = NULL; break; } - - dentry = ei->dentry; + // Walk upwards until you find the events inode } while (!ei->is_events); mutex_unlock(&eventfs_mutex); - update_top_events_attr(ei, dentry); + update_top_events_attr(ei, dentry->d_sb); return ei; } From db2aad036e77100e04a96c67f65ae7d49fb538fb Mon Sep 17 00:00:00 2001 From: Le Ma Date: Thu, 25 Jan 2024 12:00:34 +0800 Subject: [PATCH 611/768] drm/amdgpu: move the drm client creation behind drm device registration This patch is to eliminate interrupt warning below: "[drm] Fence fallback timer expired on ring sdma0.0". An early vm pt clearing job is sent to SDMA ahead of interrupt enabled. And re-locating the drm client creation following after drm_dev_register looks like a more proper flow. v2: wrap the drm client creation Fixes: 1819200166ce ("drm/amdkfd: Export DMABufs from KFD using GEM handles") Signed-off-by: Le Ma Reviewed-by: Felix Kuehling Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 ++++++++++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 +++ 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 77e263660288..41db030ddc4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -141,11 +141,31 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work) static const struct drm_client_funcs kfd_client_funcs = { .unregister = drm_client_release, }; + +int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev) +{ + int ret; + + if (!adev->kfd.init_complete) + return 0; + + ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", + &kfd_client_funcs); + if (ret) { + dev_err(adev->dev, "Failed to init DRM client: %d\n", + ret); + return ret; + } + + drm_client_register(&adev->kfd.client); + + return 0; +} + void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i; int last_valid_bit; - int ret; amdgpu_amdkfd_gpuvm_init_mem_limits(); @@ -164,12 +184,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) .enable_mes = adev->enable_mes, }; - ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs); - if (ret) { - dev_err(adev->dev, "Failed to init DRM client: %d\n", ret); - return; - } - /* this is going to have a few of the MSBs set that we need to * clear */ @@ -208,10 +222,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); - if (adev->kfd.init_complete) - drm_client_register(&adev->kfd.client); - else - drm_client_release(&adev->kfd.client); amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index f262b9d89541..937d0f0b21df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -182,6 +182,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, struct mm_struct *mm, struct svm_range_bo *svm_bo); + +int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev); #if defined(CONFIG_DEBUG_FS) int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index cc69005f5b46..971acf01bea6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2255,6 +2255,10 @@ retry_init: if (ret) goto err_pci; + ret = amdgpu_amdkfd_drm_client_create(adev); + if (ret) + goto err_pci; + /* * 1. don't init fbdev on hw without DCE * 2. don't init fbdev if there are no connectors From fb366fc7541a1de521ab3df58471746aa793b833 Mon Sep 17 00:00:00 2001 From: Ryan Schaefer Date: Sun, 21 Jan 2024 21:51:44 +0000 Subject: [PATCH 612/768] netfilter: conntrack: correct window scaling with retransmitted SYN commit c7aab4f17021 ("netfilter: nf_conntrack_tcp: re-init for syn packets only") introduces a bug where SYNs in ORIGINAL direction on reused 5-tuple result in incorrect window scale negotiation. This commit merged the SYN re-initialization and simultaneous open or SYN retransmits cases. Merging this block added the logic in tcp_init_sender() that performed window scale negotiation to the retransmitted syn case. Previously. this would only result in updating the sender's scale and flags. After the merge the additional logic results in improperly clearing the scale in ORIGINAL direction before any packets in the REPLY direction are received. This results in packets incorrectly being marked invalid for being out-of-window. This can be reproduced with the following trace: Packet Sequence: > Flags [S], seq 1687765604, win 62727, options [.. wscale 7], length 0 > Flags [S], seq 1944817196, win 62727, options [.. wscale 7], length 0 In order to fix the issue, only evaluate window negotiation for packets in the REPLY direction. This was tested with simultaneous open, fast open, and the above reproduction. Fixes: c7aab4f17021 ("netfilter: nf_conntrack_tcp: re-init for syn packets only") Signed-off-by: Ryan Schaefer Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index e573be5afde7..ae493599a3ef 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -457,7 +457,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender, const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, - u32 end, u32 win) + u32 end, u32 win, + enum ip_conntrack_dir dir) { /* SYN-ACK in reply to a SYN * or SYN from reply direction in simultaneous open. @@ -471,7 +472,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender, * Both sides must send the Window Scale option * to enable window scaling in either direction. */ - if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && + if (dir == IP_CT_DIR_REPLY && + !(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) { sender->td_scale = 0; receiver->td_scale = 0; @@ -542,7 +544,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir, if (tcph->syn) { tcp_init_sender(sender, receiver, skb, dataoff, tcph, - end, win); + end, win, dir); if (!tcph->ack) /* Simultaneous open */ return NFCT_TCP_ACCEPT; @@ -585,7 +587,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir, */ tcp_init_sender(sender, receiver, skb, dataoff, tcph, - end, win); + end, win, dir); if (dir == IP_CT_DIR_REPLY && !tcph->ack) return NFCT_TCP_ACCEPT; From 776d451648443f9884be4a1b4e38e8faf1c621f9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 23 Jan 2024 23:45:32 +0100 Subject: [PATCH 613/768] netfilter: nf_tables: restrict tunnel object to NFPROTO_NETDEV Bail out on using the tunnel dst template from other than netdev family. Add the infrastructure to check for the family in objects. Fixes: af308b94a2a4 ("netfilter: nf_tables: add tunnel support") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 14 +++++++++----- net/netfilter/nft_tunnel.c | 1 + 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 4e1ea18eb5f0..001226c34621 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1351,6 +1351,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, * @type: stateful object numeric type * @owner: module owner * @maxattr: maximum netlink attribute + * @family: address family for AF-specific object types * @policy: netlink attribute policy */ struct nft_object_type { @@ -1360,6 +1361,7 @@ struct nft_object_type { struct list_head list; u32 type; unsigned int maxattr; + u8 family; struct module *owner; const struct nla_policy *policy; }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c537104411e7..fc016befb46f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7551,11 +7551,15 @@ nla_put_failure: return -1; } -static const struct nft_object_type *__nft_obj_type_get(u32 objtype) +static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family) { const struct nft_object_type *type; list_for_each_entry(type, &nf_tables_objects, list) { + if (type->family != NFPROTO_UNSPEC && + type->family != family) + continue; + if (objtype == type->type) return type; } @@ -7563,11 +7567,11 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype) } static const struct nft_object_type * -nft_obj_type_get(struct net *net, u32 objtype) +nft_obj_type_get(struct net *net, u32 objtype, u8 family) { const struct nft_object_type *type; - type = __nft_obj_type_get(objtype); + type = __nft_obj_type_get(objtype, family); if (type != NULL && try_module_get(type->owner)) return type; @@ -7660,7 +7664,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, if (info->nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - type = __nft_obj_type_get(objtype); + type = __nft_obj_type_get(objtype, family); if (WARN_ON_ONCE(!type)) return -ENOENT; @@ -7674,7 +7678,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, if (!nft_use_inc(&table->use)) return -EMFILE; - type = nft_obj_type_get(net, objtype); + type = nft_obj_type_get(net, objtype, family); if (IS_ERR(type)) { err = PTR_ERR(type); goto err_type; diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 9f21953c7433..f735d79d8be5 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -713,6 +713,7 @@ static const struct nft_object_ops nft_tunnel_obj_ops = { static struct nft_object_type nft_tunnel_obj_type __read_mostly = { .type = NFT_OBJECT_TUNNEL, + .family = NFPROTO_NETDEV, .ops = &nft_tunnel_obj_ops, .maxattr = NFTA_TUNNEL_KEY_MAX, .policy = nft_tunnel_key_policy, From 97830f3c3088638ff90b20dfba2eb4d487bf14d7 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Wed, 31 Jan 2024 21:53:46 +0000 Subject: [PATCH 614/768] binder: signal epoll threads of self-work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In (e)poll mode, threads often depend on I/O events to determine when data is ready for consumption. Within binder, a thread may initiate a command via BINDER_WRITE_READ without a read buffer and then make use of epoll_wait() or similar to consume any responses afterwards. It is then crucial that epoll threads are signaled via wakeup when they queue their own work. Otherwise, they risk waiting indefinitely for an event leaving their work unhandled. What is worse, subsequent commands won't trigger a wakeup either as the thread has pending work. Fixes: 457b9a6f09f0 ("Staging: android: add binder driver") Cc: Arve Hjønnevåg Cc: Martijn Coenen Cc: Alice Ryhl Cc: Steven Moreland Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20240131215347.1808751-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 8dd23b19e997..eca24f41556d 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -478,6 +478,16 @@ binder_enqueue_thread_work_ilocked(struct binder_thread *thread, { WARN_ON(!list_empty(&thread->waiting_thread_node)); binder_enqueue_work_ilocked(work, &thread->todo); + + /* (e)poll-based threads require an explicit wakeup signal when + * queuing their own work; they rely on these events to consume + * messages without I/O block. Without it, threads risk waiting + * indefinitely without handling the work. + */ + if (thread->looper & BINDER_LOOPER_STATE_POLL && + thread->pid == current->pid && !thread->process_todo) + wake_up_interruptible_sync(&thread->wait); + thread->process_todo = true; } From 6e348067ee4bc5905e35faa3a8fafa91c9124bc7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 25 Jan 2024 17:29:46 -0500 Subject: [PATCH 615/768] netfilter: conntrack: check SCTP_CID_SHUTDOWN_ACK for vtag setting in sctp_new The annotation says in sctp_new(): "If it is a shutdown ack OOTB packet, we expect a return shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8)". However, it does not check SCTP_CID_SHUTDOWN_ACK before setting vtag[REPLY] in the conntrack entry(ct). Because of that, if the ct in Router disappears for some reason in [1] with the packet sequence like below: Client > Server: sctp (1) [INIT] [init tag: 3201533963] Server > Client: sctp (1) [INIT ACK] [init tag: 972498433] Client > Server: sctp (1) [COOKIE ECHO] Server > Client: sctp (1) [COOKIE ACK] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057809] Server > Client: sctp (1) [SACK] [cum ack 3075057809] Server > Client: sctp (1) [HB REQ] (the ct in Router disappears somehow) <-------- [1] Client > Server: sctp (1) [HB ACK] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057810] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057810] Client > Server: sctp (1) [HB REQ] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057810] Client > Server: sctp (1) [HB REQ] Client > Server: sctp (1) [ABORT] when processing HB ACK packet in Router it calls sctp_new() to initialize the new ct with vtag[REPLY] set to HB_ACK packet's vtag. Later when sending DATA from Client, all the SACKs from Server will get dropped in Router, as the SACK packet's vtag does not match vtag[REPLY] in the ct. The worst thing is the vtag in this ct will never get fixed by the upcoming packets from Server. This patch fixes it by checking SCTP_CID_SHUTDOWN_ACK before setting vtag[REPLY] in the ct in sctp_new() as the annotation says. With this fix, it will leave vtag[REPLY] in ct to 0 in the case above, and the next HB REQ/ACK from Server is able to fix the vtag as its value is 0 in nf_conntrack_sctp_packet(). Signed-off-by: Xin Long Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_sctp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index c6bd533983c1..4cc97f971264 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -283,7 +283,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb, pr_debug("Setting vtag %x for secondary conntrack\n", sh->vtag); ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag; - } else { + } else if (sch->type == SCTP_CID_SHUTDOWN_ACK) { /* If it is a shutdown ack OOTB packet, we expect a return shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ pr_debug("Setting vtag %x for new conn OOTB\n", From 97f7cf1cd80eeed3b7c808b7c12463295c751001 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 29 Jan 2024 10:57:01 +0100 Subject: [PATCH 616/768] netfilter: ipset: fix performance regression in swap operation The patch "netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test", commit 28628fa9 fixes a race condition. But the synchronize_rcu() added to the swap function unnecessarily slows it down: it can safely be moved to destroy and use call_rcu() instead. Eric Dumazet pointed out that simply calling the destroy functions as rcu callback does not work: sets with timeout use garbage collectors which need cancelling at destroy which can wait. Therefore the destroy functions are split into two: cancelling garbage collectors safely at executing the command received by netlink and moving the remaining part only into the rcu callback. Link: https://lore.kernel.org/lkml/C0829B10-EAA6-4809-874E-E1E9C05A8D84@automattic.com/ Fixes: 28628fa952fe ("netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test") Reported-by: Ale Crismani Reported-by: David Wang <00107082@163.com> Tested-by: David Wang <00107082@163.com> Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 4 +++ net/netfilter/ipset/ip_set_bitmap_gen.h | 14 ++++++++-- net/netfilter/ipset/ip_set_core.c | 37 +++++++++++++++++++------ net/netfilter/ipset/ip_set_hash_gen.h | 15 ++++++++-- net/netfilter/ipset/ip_set_list_set.c | 13 +++++++-- 5 files changed, 65 insertions(+), 18 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index e8c350a3ade1..e9f4f845d760 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -186,6 +186,8 @@ struct ip_set_type_variant { /* Return true if "b" set is the same as "a" * according to the create set parameters */ bool (*same_set)(const struct ip_set *a, const struct ip_set *b); + /* Cancel ongoing garbage collectors before destroying the set*/ + void (*cancel_gc)(struct ip_set *set); /* Region-locking is used */ bool region_lock; }; @@ -242,6 +244,8 @@ extern void ip_set_type_unregister(struct ip_set_type *set_type); /* A generic IP set */ struct ip_set { + /* For call_cru in destroy */ + struct rcu_head rcu; /* The name of the set */ char name[IPSET_MAXNAMELEN]; /* Lock protecting the set data */ diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 21f7860e8fa1..cb48a2b9cb9f 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -30,6 +30,7 @@ #define mtype_del IPSET_TOKEN(MTYPE, _del) #define mtype_list IPSET_TOKEN(MTYPE, _list) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype MTYPE #define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id))) @@ -59,9 +60,6 @@ mtype_destroy(struct ip_set *set) { struct mtype *map = set->data; - if (SET_WITH_TIMEOUT(set)) - del_timer_sync(&map->gc); - if (set->dsize && set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set); ip_set_free(map->members); @@ -290,6 +288,15 @@ mtype_gc(struct timer_list *t) add_timer(&map->gc); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct mtype *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + del_timer_sync(&map->gc); +} + static const struct ip_set_type_variant mtype = { .kadt = mtype_kadt, .uadt = mtype_uadt, @@ -303,6 +310,7 @@ static const struct ip_set_type_variant mtype = { .head = mtype_head, .list = mtype_list, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, }; #endif /* __IP_SET_BITMAP_IP_GEN_H */ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 4c133e06be1d..bcaad9c009fe 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1182,6 +1182,14 @@ ip_set_destroy_set(struct ip_set *set) kfree(set); } +static void +ip_set_destroy_set_rcu(struct rcu_head *head) +{ + struct ip_set *set = container_of(head, struct ip_set, rcu); + + ip_set_destroy_set(set); +} + static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { @@ -1193,8 +1201,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* Must wait for flush to be really finished in list:set */ - rcu_barrier(); /* Commands are serialized and references are * protected by the ip_set_ref_lock. @@ -1206,8 +1212,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, * counter, so if it's already zero, we can proceed * without holding the lock. */ - read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { + /* Must wait for flush to be really finished in list:set */ + rcu_barrier(); + read_lock_bh(&ip_set_ref_lock); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); if (s && (s->ref || s->ref_netlink)) { @@ -1221,6 +1229,8 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, s = ip_set(inst, i); if (s) { ip_set(inst, i) = NULL; + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); ip_set_destroy_set(s); } } @@ -1228,6 +1238,9 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, inst->is_destroyed = false; } else { u32 flags = flag_exist(info->nlh); + u16 features = 0; + + read_lock_bh(&ip_set_ref_lock); s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &i); if (!s) { @@ -1238,10 +1251,16 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, ret = -IPSET_ERR_BUSY; goto out; } + features = s->type->features; ip_set(inst, i) = NULL; read_unlock_bh(&ip_set_ref_lock); - - ip_set_destroy_set(s); + if (features & IPSET_TYPE_NAME) { + /* Must wait for flush to be really finished */ + rcu_barrier(); + } + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); + call_rcu(&s->rcu, ip_set_destroy_set_rcu); } return 0; out: @@ -1394,9 +1413,6 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info, ip_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); - /* Make sure all readers of the old set pointers are completed. */ - synchronize_rcu(); - return 0; } @@ -2409,8 +2425,11 @@ ip_set_fini(void) { nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - unregister_pernet_subsys(&ip_set_net_ops); + + /* Wait for call_rcu() in destroy */ + rcu_barrier(); + pr_debug("these are the famous last words\n"); } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index cbf80da9a01c..1136510521a8 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -222,6 +222,7 @@ static const union nf_inet_addr zeromask = {}; #undef mtype_gc_do #undef mtype_gc #undef mtype_gc_init +#undef mtype_cancel_gc #undef mtype_variant #undef mtype_data_match @@ -266,6 +267,7 @@ static const union nf_inet_addr zeromask = {}; #define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype_variant IPSET_TOKEN(MTYPE, _variant) #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) @@ -450,9 +452,6 @@ mtype_destroy(struct ip_set *set) struct htype *h = set->data; struct list_head *l, *lt; - if (SET_WITH_TIMEOUT(set)) - cancel_delayed_work_sync(&h->gc.dwork); - mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true); list_for_each_safe(l, lt, &h->ad) { list_del(l); @@ -599,6 +598,15 @@ mtype_gc_init(struct htable_gc *gc) queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct htype *h = set->data; + + if (SET_WITH_TIMEOUT(set)) + cancel_delayed_work_sync(&h->gc.dwork); +} + static int mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags); @@ -1441,6 +1449,7 @@ static const struct ip_set_type_variant mtype_variant = { .uref = mtype_uref, .resize = mtype_resize, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, .region_lock = true, }; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index e162636525cf..6c3f28bc59b3 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set) struct list_set *map = set->data; struct set_elem *e, *n; - if (SET_WITH_TIMEOUT(set)) - timer_shutdown_sync(&map->gc); - list_for_each_entry_safe(e, n, &map->members, list) { list_del(&e->list); ip_set_put_byindex(map->net, e->id); @@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b) a->extensions == b->extensions; } +static void +list_set_cancel_gc(struct ip_set *set) +{ + struct list_set *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + timer_shutdown_sync(&map->gc); +} + static const struct ip_set_type_variant set_variant = { .kadt = list_set_kadt, .uadt = list_set_uadt, @@ -558,6 +564,7 @@ static const struct ip_set_type_variant set_variant = { .head = list_set_head, .list = list_set_list, .same_set = list_set_same_set, + .cancel_gc = list_set_cancel_gc, }; static void From 259eb32971e9eb24d1777a28d82730659f50fdcb Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 29 Jan 2024 11:09:43 +0100 Subject: [PATCH 617/768] netfilter: nf_log: replace BUG_ON by WARN_ON_ONCE when putting logger Module reference is bumped for each user, this should not ever happen. But BUG_ON check should use rcu_access_pointer() instead. If this ever happens, do WARN_ON_ONCE() instead of BUG_ON() and consolidate pointer check under the rcu read side lock section. Fixes: fab4085f4e24 ("netfilter: log: nf_log_packet() as real unified interface") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_log.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 8cc52d2bd31b..e16f158388bb 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -193,11 +193,12 @@ void nf_logger_put(int pf, enum nf_log_type type) return; } - BUG_ON(loggers[pf][type] == NULL); - rcu_read_lock(); logger = rcu_dereference(loggers[pf][type]); - module_put(logger->me); + if (!logger) + WARN_ON_ONCE(1); + else + module_put(logger->me); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nf_logger_put); From 8059918a1377f2f1fff06af4f5a4ed3d5acd6bc4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 29 Jan 2024 13:12:33 +0100 Subject: [PATCH 618/768] netfilter: nft_ct: sanitize layer 3 and 4 protocol number in custom expectations - Disallow families other than NFPROTO_{IPV4,IPV6,INET}. - Disallow layer 4 protocol with no ports, since destination port is a mandatory attribute for this object. Fixes: 857b46027d6f ("netfilter: nft_ct: add ct expectations support") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 86bb9d7797d9..aac98a3c966e 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1250,7 +1250,31 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx, if (tb[NFTA_CT_EXPECT_L3PROTO]) priv->l3num = ntohs(nla_get_be16(tb[NFTA_CT_EXPECT_L3PROTO])); + switch (priv->l3num) { + case NFPROTO_IPV4: + case NFPROTO_IPV6: + if (priv->l3num != ctx->family) + return -EINVAL; + + fallthrough; + case NFPROTO_INET: + break; + default: + return -EOPNOTSUPP; + } + priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]); + switch (priv->l4proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_DCCP: + case IPPROTO_SCTP: + break; + default: + return -EOPNOTSUPP; + } + priv->dport = nla_get_be16(tb[NFTA_CT_EXPECT_DPORT]); priv->timeout = nla_get_u32(tb[NFTA_CT_EXPECT_TIMEOUT]); priv->size = nla_get_u8(tb[NFTA_CT_EXPECT_SIZE]); From 961df3085416ffabea192989941c89ffbf2af2d5 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 15 Jan 2024 13:37:47 -0500 Subject: [PATCH 619/768] drm/amdkfd: Correct partial migration virtual addr Partial migration to system memory should use migrate.addr, not prange->start as virtual address to allocate system memory page. Fixes: a546a2768440 ("drm/amdkfd: Use partial migrations/mapping for GPU/CPU page faults in SVM") Signed-off-by: Philip Yang Reviewed-by: Xiaogang Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index f856901055d3..bdc01ca9609a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -574,7 +574,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, prange->last); - addr = prange->start << PAGE_SHIFT; + addr = migrate->start; src = (uint64_t *)(scratch + npages); dst = scratch; From c49bf4fcfc2f5516f76a706b06fcad5886cc25e1 Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Fri, 5 Jan 2024 08:49:06 -0600 Subject: [PATCH 620/768] drm/amdkfd: Use S_ENDPGM_SAVED in trap handler This instruction has no functional difference to S_ENDPGM but allows performance counters to track save events correctly. Signed-off-by: Jay Cornwall Reviewed-by: Laurent Morichetti Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 14 +++++++------- .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm | 2 +- .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index df75863393fc..d1caaf0e6a7c 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -674,7 +674,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0x86ea6a6a, 0x8f6e837a, 0xb96ee0c2, 0xbf800002, 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf810000, + 0xbe801f6c, 0xbf9b0000, }; static const uint32_t cwsr_trap_nv1x_hex[] = { @@ -1091,7 +1091,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xb9eef807, 0x876dff6d, 0x0000ffff, 0x87fe7e7e, 0x87ea6a6a, 0xb9faf802, - 0xbe80226c, 0xbf810000, + 0xbe80226c, 0xbf9b0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0x00000000, @@ -1574,7 +1574,7 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0x86ea6a6a, 0x8f6e837a, 0xb96ee0c2, 0xbf800002, 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf810000, + 0xbe801f6c, 0xbf9b0000, }; static const uint32_t cwsr_trap_aldebaran_hex[] = { @@ -2065,7 +2065,7 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0x86ea6a6a, 0x8f6e837a, 0xb96ee0c2, 0xbf800002, 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf810000, + 0xbe801f6c, 0xbf9b0000, }; static const uint32_t cwsr_trap_gfx10_hex[] = { @@ -2500,7 +2500,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x876dff6d, 0x0000ffff, 0x87fe7e7e, 0x87ea6a6a, 0xb9faf802, 0xbe80226c, - 0xbf810000, 0xbf9f0000, + 0xbf9b0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, }; @@ -2944,7 +2944,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = { 0xb8eef802, 0xbf0d866e, 0xbfa20002, 0xb97af802, 0xbe80486c, 0xb97af802, - 0xbe804a6c, 0xbfb00000, + 0xbe804a6c, 0xbfb10000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0x00000000, @@ -3436,5 +3436,5 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { 0x86ea6a6a, 0x8f6e837a, 0xb96ee0c2, 0xbf800002, 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf810000, + 0xbe801f6c, 0xbf9b0000, }; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm index e0140df0b0ec..71b3dc0c7363 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm @@ -1104,7 +1104,7 @@ L_RETURN_WITHOUT_PRIV: s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution L_END_PGM: - s_endpgm + s_endpgm_saved end function write_hwreg_to_mem(s, s_rsrc, s_mem_offset) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index e506411ad28a..bb26338204f4 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -921,7 +921,7 @@ L_RESTORE: /* the END */ /**************************************************************************/ L_END_PGM: - s_endpgm + s_endpgm_saved end From 4119734e06a7f30e7e8eb666692a58b85dca0269 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 26 Jan 2024 15:14:50 -0500 Subject: [PATCH 621/768] drm/amdkfd: Use correct drm device for cgroup permission check On GFX 9.4.3, for a given KFD node, fetch the correct drm device from XCP manager when checking for cgroup permissions. Signed-off-by: Mukul Joshi Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 17fbedbf3651..677281c0793e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1488,10 +1488,15 @@ void kfd_dec_compute_active(struct kfd_node *dev); /* Cgroup Support */ /* Check with device cgroup if @kfd device is accessible */ -static inline int kfd_devcgroup_check_permission(struct kfd_node *kfd) +static inline int kfd_devcgroup_check_permission(struct kfd_node *node) { #if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) - struct drm_device *ddev = adev_to_drm(kfd->adev); + struct drm_device *ddev; + + if (node->xcp) + ddev = node->xcp->ddev; + else + ddev = adev_to_drm(node->adev); return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR, ddev->render->index, From 514312c07f6cd2f1ffe5a90d42b6080868a03a26 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Tue, 9 Jan 2024 11:31:20 -0500 Subject: [PATCH 622/768] Revert "drm/amd/display: initialize all the dpm level's stutter latency" Revert commit 885c71ad791c ("drm/amd/display: initialize all the dpm level's stutter latency") Because it causes some regression Reviewed-by: Muhammad Ahmed Acked-by: Tom Chung Signed-off-by: Charlene Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 64d01a9cd68c..8b0f930be5ae 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -341,9 +341,6 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, break; } - if (dml2->config.bbox_overrides.clks_table.num_states) - p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states; - /* Override from passed values, if available */ for (i = 0; i < p->in_states->num_states; i++) { if (dml2->config.bbox_overrides.sr_exit_latency_us) { @@ -400,6 +397,7 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, } /* Copy clocks tables entries, if available */ if (dml2->config.bbox_overrides.clks_table.num_states) { + p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states; for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels; i++) { p->in_states->state_array[i].dcfclk_mhz = dml2->config.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz; From 2ff33c759a4247c84ec0b7815f1f223e155ba82a Mon Sep 17 00:00:00 2001 From: Sohaib Nadeem Date: Tue, 16 Jan 2024 11:00:00 -0500 Subject: [PATCH 623/768] drm/amd/display: increased min_dcfclk_mhz and min_fclk_mhz [why] Originally, PMFW said min FCLK is 300Mhz, but min DCFCLK can be increased to 400Mhz because min FCLK is now 600Mhz so FCLK >= 1.5 * DCFCLK hardware requirement will still be satisfied. Increasing min DCFCLK addresses underflow issues (underflow occurs when phantom pipe is turned on for some Sub-Viewport configs). [how] Increasing DCFCLK by raising the min_dcfclk_mhz Reviewed-by: Chaitanya Dhere Reviewed-by: Alvin Lee Acked-by: Tom Chung Signed-off-by: Sohaib Nadeem Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 9f37f717a1f8..b13a6fd7cc83 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -2753,7 +2753,7 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk struct _vcs_dpi_voltage_scaling_st entry = {0}; struct clk_limit_table_entry max_clk_data = {0}; - unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; + unsigned int min_dcfclk_mhz = 399, min_fclk_mhz = 599; static const unsigned int num_dcfclk_stas = 5; unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; From b5abd7f983e14054593dc91d6df2aa5f8cc67652 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Tue, 16 Jan 2024 19:54:15 -0500 Subject: [PATCH 624/768] drm/amd/display: fix USB-C flag update after enc10 feature init [why] BIOS's integration info table not following the original order which is phy instance is ext_displaypath's array index. [how] Move them to follow the original order. Reviewed-by: Muhammad Ahmed Acked-by: Tom Chung Signed-off-by: Charlene Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c index 501388014855..d761b0df2878 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c @@ -203,12 +203,12 @@ void dcn32_link_encoder_construct( enc10->base.hpd_source = init_data->hpd_source; enc10->base.connector = init_data->connector; - if (enc10->base.connector.id == CONNECTOR_ID_USBC) - enc10->base.features.flags.bits.DP_IS_USB_C = 1; enc10->base.preferred_engine = ENGINE_ID_UNKNOWN; enc10->base.features = *enc_features; + if (enc10->base.connector.id == CONNECTOR_ID_USBC) + enc10->base.features.flags.bits.DP_IS_USB_C = 1; enc10->base.transmitter = init_data->transmitter; diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c index da94e5309fba..81e349d5835b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c @@ -184,8 +184,6 @@ void dcn35_link_encoder_construct( enc10->base.hpd_source = init_data->hpd_source; enc10->base.connector = init_data->connector; - if (enc10->base.connector.id == CONNECTOR_ID_USBC) - enc10->base.features.flags.bits.DP_IS_USB_C = 1; enc10->base.preferred_engine = ENGINE_ID_UNKNOWN; @@ -240,6 +238,8 @@ void dcn35_link_encoder_construct( } enc10->base.features.flags.bits.HDMI_6GB_EN = 1; + if (enc10->base.connector.id == CONNECTOR_ID_USBC) + enc10->base.features.flags.bits.DP_IS_USB_C = 1; if (bp_funcs->get_connector_speed_cap_info) result = bp_funcs->get_connector_speed_cap_info(enc10->base.ctx->dc_bios, From 31c2bf25eaf51c2d45f092284a28e97f43b54c15 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Wed, 17 Jan 2024 16:46:02 -0500 Subject: [PATCH 625/768] drm/amd/display: Fix DPSTREAM CLK on and off sequence [Why] Secondary DP2 display fails to light up in some instances [How] Clock needs to be on when DPSTREAMCLK*_EN =1. This change moves dtbclk_p enable/disable point to make sure this is the case Reviewed-by: Charlene Liu Reviewed-by: Dmytro Laktyushkin Acked-by: Tom Chung Signed-off-by: Daniel Miess Signed-off-by: Dmytro Laktyushkin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 2 +- .../gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 5660f15da291..2352428bcea3 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1183,9 +1183,9 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx) dto_params.timing = &pipe_ctx->stream->timing; dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; if (dccg) { - dccg->funcs->set_dtbclk_dto(dccg, &dto_params); dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst); dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst); + dccg->funcs->set_dtbclk_dto(dccg, &dto_params); } } else if (dccg && dccg->funcs->disable_symclk_se) { dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index e931342fcf4c..4853ecac53f9 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -2790,18 +2790,17 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx) } if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) { - dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; - dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst); - - phyd32clk = get_phyd32clk_src(link); - dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk); - dto_params.otg_inst = tg->inst; dto_params.pixclk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10; dto_params.num_odm_segments = get_odm_segment_count(pipe_ctx); dto_params.timing = &pipe_ctx->stream->timing; dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr); dccg->funcs->set_dtbclk_dto(dccg, &dto_params); + dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; + dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst); + + phyd32clk = get_phyd32clk_src(link); + dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk); } else { if (dccg->funcs->enable_symclk_se) dccg->funcs->enable_symclk_se(dccg, stream_enc->stream_enc_inst, From 39079fe8e660851abbafa90cd55cbf029210661f Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Thu, 18 Jan 2024 15:14:15 -0500 Subject: [PATCH 626/768] drm/amd/display: fix incorrect mpc_combine array size [why] MAX_SURFACES is per stream, while MAX_PLANES is per asic. The mpc_combine is an array that records all the planes per asic. Therefore MAX_PLANES should be used as the array size. Using MAX_SURFACES causes array overflow when there are more than 3 planes. [how] Use the MAX_PLANES for the mpc_combine array size. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Rodrigo Siqueira Reviewed-by: Nevenko Stupar Reviewed-by: Chaitanya Dhere Acked-by: Tom Chung Signed-off-by: Wenjing Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index b13a6fd7cc83..dd781a20692e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -1112,7 +1112,7 @@ struct pipe_slice_table { struct pipe_ctx *pri_pipe; struct dc_plane_state *plane; int slice_count; - } mpc_combines[MAX_SURFACES]; + } mpc_combines[MAX_PLANES]; int mpc_combine_count; }; From 191cb4ed33a61c90feed8bda0f0df3a419604fc8 Mon Sep 17 00:00:00 2001 From: Nicholas Susanto Date: Thu, 18 Jan 2024 13:34:40 -0500 Subject: [PATCH 627/768] drm/amd/display: Underflow workaround by increasing SR exit latency [Why] On 14us for exit latency time causes underflow for 8K monitor with HDR on. Increasing the latency to 28us fixes the underflow. [How] Increase the latency to 28us. This workaround should be sufficient before we figure out why SR exit so long. Reviewed-by: Chaitanya Dhere Acked-by: Tom Chung Signed-off-by: Nicholas Susanto Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 32 +++++++++---------- .../drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 4 +-- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 9c660d1facc7..14cec1c7b718 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -437,32 +437,32 @@ static struct wm_table ddr5_wm_table = { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, } @@ -474,32 +474,32 @@ static struct wm_table lpddr5_wm_table = { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 475c4ec43c01..7ea2bd5374d5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -164,8 +164,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { }, }, .num_states = 5, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .sr_exit_z8_time_us = 210.0, .sr_enter_plus_exit_z8_time_us = 320.0, .fclk_change_latency_us = 24.0, From faf51b201bc42adf500945732abb6220c707d6f3 Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Thu, 11 Jan 2024 14:46:01 -0500 Subject: [PATCH 628/768] drm/amd/display: Fix dcn35 8k30 Underflow/Corruption Issue [why] odm calculation is missing for pipe split policy determination and cause Underflow/Corruption issue. [how] Add the odm calculation. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Charlene Liu Acked-by: Tom Chung Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/dml2/dml2_translation_helper.c | 27 +++++++------------ .../gpu/drm/amd/display/dc/inc/core_types.h | 2 ++ 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 8b0f930be5ae..23a608274096 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -791,35 +791,28 @@ static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_p } } -/*TODO no support for mpc combine, need rework - should calculate scaling params based on plane+stream*/ -static struct scaler_data get_scaler_data_for_plane(const struct dc_plane_state *in, const struct dc_state *context) +static struct scaler_data get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc_state *context) { int i; - struct scaler_data data = { 0 }; + struct pipe_ctx *temp_pipe = &context->res_ctx.temp_pipe; + + memset(temp_pipe, 0, sizeof(struct pipe_ctx)); for (i = 0; i < MAX_PIPES; i++) { const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; if (pipe->plane_state == in && !pipe->prev_odm_pipe) { - const struct pipe_ctx *next_pipe = pipe->next_odm_pipe; + temp_pipe->stream = pipe->stream; + temp_pipe->plane_state = pipe->plane_state; + temp_pipe->plane_res.scl_data.taps = pipe->plane_res.scl_data.taps; - data = context->res_ctx.pipe_ctx[i].plane_res.scl_data; - while (next_pipe) { - data.h_active += next_pipe->plane_res.scl_data.h_active; - data.recout.width += next_pipe->plane_res.scl_data.recout.width; - if (in->rotation == ROTATION_ANGLE_0 || in->rotation == ROTATION_ANGLE_180) { - data.viewport.width += next_pipe->plane_res.scl_data.viewport.width; - } else { - data.viewport.height += next_pipe->plane_res.scl_data.viewport.height; - } - next_pipe = next_pipe->next_odm_pipe; - } + resource_build_scaling_params(temp_pipe); break; } } ASSERT(i < MAX_PIPES); - return data; + return temp_pipe->plane_res.scl_data; } static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_stream_state *in) @@ -864,7 +857,7 @@ static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned out->ScalerEnabled[location] = false; } -static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_plane_state *in, const struct dc_state *context) +static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_plane_state *in, struct dc_state *context) { const struct scaler_data scaler_data = get_scaler_data_for_plane(in, context); diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index f74ae0d41d3c..3a6bf77a6873 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -469,6 +469,8 @@ struct resource_context { unsigned int hpo_dp_link_enc_to_link_idx[MAX_HPO_DP2_LINK_ENCODERS]; int hpo_dp_link_enc_ref_cnts[MAX_HPO_DP2_LINK_ENCODERS]; bool is_mpc_3dlut_acquired[MAX_PIPES]; + /* solely used for build scalar data in dml2 */ + struct pipe_ctx temp_pipe; }; struct dce_bw_output { From bb34bc2cd3ee284d7992df24a3f7d24f61a59268 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Fri, 5 Jan 2024 14:05:25 +0800 Subject: [PATCH 629/768] drm/amdgpu: Fix the warning info in mode1 reset Fix the warning info below during mode1 reset. [ +0.000004] Call Trace: [ +0.000004] [ +0.000006] ? show_regs+0x6e/0x80 [ +0.000011] ? __flush_work.isra.0+0x2e8/0x390 [ +0.000005] ? __warn+0x91/0x150 [ +0.000009] ? __flush_work.isra.0+0x2e8/0x390 [ +0.000006] ? report_bug+0x19d/0x1b0 [ +0.000013] ? handle_bug+0x46/0x80 [ +0.000012] ? exc_invalid_op+0x1d/0x80 [ +0.000011] ? asm_exc_invalid_op+0x1f/0x30 [ +0.000014] ? __flush_work.isra.0+0x2e8/0x390 [ +0.000007] ? __flush_work.isra.0+0x208/0x390 [ +0.000007] ? _prb_read_valid+0x216/0x290 [ +0.000008] __cancel_work_timer+0x11d/0x1a0 [ +0.000007] ? try_to_grab_pending+0xe8/0x190 [ +0.000012] cancel_work_sync+0x14/0x20 [ +0.000008] amddrm_sched_stop+0x3c/0x1d0 [amd_sched] [ +0.000032] amdgpu_device_gpu_recover+0x29a/0xe90 [amdgpu] This warning info was printed after applying the patch "drm/sched: Convert drm scheduler to use a work queue rather than kthread". The root cause is that amdgpu driver tries to use the uninitialized work_struct in the struct drm_gpu_scheduler v2: - Rename the function to amdgpu_ring_sched_ready and move it to amdgpu_ring.c (Alex) v3: - Fix a few more checks based on Vitaly's patch (Alex) v4: - squash in fix noticed by Bert in https://gitlab.freedesktop.org/drm/amd/-/issues/3139 Fixes: 11b3b9f461c5 ("drm/sched: Check scheduler ready before calling timeout handling") Reviewed-by: Alex Deucher Signed-off-by: Vitaly Prosyak Signed-off-by: Ma Jun Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 12 ++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 +- 5 files changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 899e31e3a5e8..3a3f3ce09f00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -290,7 +290,7 @@ static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool sus for (i = 0; i < adev->gfx.num_compute_rings; i++) { struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - if (!(ring && drm_sched_wqueue_ready(&ring->sched))) + if (!amdgpu_ring_sched_ready(ring)) continue; /* stop secheduler and drain ring. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index e485dd3357c6..1afbb2e932c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1678,7 +1678,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_wqueue_stop(&ring->sched); } @@ -1694,7 +1694,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_wqueue_start(&ring->sched); } @@ -1916,8 +1916,8 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) ring = adev->rings[val]; - if (!ring || !ring->funcs->preempt_ib || - !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring) || + !ring->funcs->preempt_ib) return -EINVAL; /* the last preemption failed */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 31b28e6f35b2..fdde7488d0ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5021,7 +5021,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; spin_lock(&ring->sched.job_list_lock); @@ -5160,7 +5160,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; /* Clear job fence from fence drv to avoid force_completion @@ -5627,7 +5627,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_stop(&ring->sched, job ? &job->base : NULL); @@ -5696,7 +5696,7 @@ skip_hw_reset: for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_start(&ring->sched, true); @@ -6051,7 +6051,7 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_stop(&ring->sched, NULL); @@ -6179,7 +6179,7 @@ void amdgpu_pci_resume(struct pci_dev *pdev) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_start(&ring->sched, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 45424ebf9681..5505d646f43a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -635,6 +635,7 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring) ring->name); ring->sched.ready = !r; + return r; } @@ -717,3 +718,14 @@ void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring) if (ring->is_sw_ring) amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE); } + +bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring) +{ + if (!ring) + return false; + + if (ring->no_scheduler || !drm_sched_wqueue_ready(&ring->sched)) + return false; + + return true; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index bbb53720a018..fe1a61eb6e4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -450,5 +450,5 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, int amdgpu_ib_pool_init(struct amdgpu_device *adev); void amdgpu_ib_pool_fini(struct amdgpu_device *adev); int amdgpu_ib_ring_tests(struct amdgpu_device *adev); - +bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring); #endif From 8ef85a0ce24a6d9322dfa2a67477e473c3619b4f Mon Sep 17 00:00:00 2001 From: David McFarland Date: Mon, 29 Jan 2024 18:18:22 -0400 Subject: [PATCH 630/768] drm/amd: Don't init MEC2 firmware when it fails to load The same calls are made directly above, but conditional on the firmware loading and validating successfully. Cc: stable@vger.kernel.org Fixes: 9931b67690cf ("drm/amd: Load GFX10 microcode during early_init") Signed-off-by: David McFarland Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index ecb622b7f970..dcdecb18b230 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4027,8 +4027,6 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) err = 0; adev->gfx.mec2_fw = NULL; } - amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); - amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT); gfx_v10_0_check_fw_write_wait(adev); out: From 492a1e67ee59312b27c85c275298080fde392190 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 30 Jan 2024 14:06:43 +0530 Subject: [PATCH 631/768] drm/amd/display: Add NULL check for kzalloc in 'amdgpu_dm_atomic_commit_tail()' Add a NULL check for the kzalloc call that allocates memory for dummy_updates in the amdgpu_dm_atomic_commit_tail function. Previously, if kzalloc failed to allocate memory and returned NULL, the code would attempt to use the NULL pointer. The fix is to check if kzalloc returns NULL, and if so, log an error message and skip the rest of the current loop iteration with the continue statement. This prevents the code from attempting to use the NULL pointer. Cc: Julia Lawall Cc: Aurabindo Pillai Cc: Rodrigo Siqueira Cc: Alex Hung Cc: Alex Deucher Reported-by: Julia Lawall Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202401300629.ICnCt983-lkp@intel.com/ Fixes: 135fd1b35690 ("drm/amd/display: Reduce stack size") Signed-off-by: Srinivasan Shanmugam Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 6cda5b536362..d292f290cd6e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9187,6 +9187,10 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) * To fix this, DC should permit updating only stream properties. */ dummy_updates = kzalloc(sizeof(struct dc_surface_update) * MAX_SURFACES, GFP_ATOMIC); + if (!dummy_updates) { + DRM_ERROR("Failed to allocate memory for dummy_updates.\n"); + continue; + } for (j = 0; j < status->plane_count; j++) dummy_updates[j].surface = status->plane_states[0]; From 97cba232549b9fe7e491fb60a69cf93075015f29 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 29 Jan 2024 21:17:09 +0530 Subject: [PATCH 632/768] drm/amd/display: Fix buffer overflow in 'get_host_router_total_dp_tunnel_bw()' The error message buffer overflow 'dc->links' 12 <= 12 suggests that the code is trying to access an element of the dc->links array that is beyond its bounds. In C, arrays are zero-indexed, so an array with 12 elements has valid indices from 0 to 11. Trying to access dc->links[12] would be an attempt to access the 13th element of a 12-element array, which is a buffer overflow. To fix this, ensure that the loop does not go beyond the last valid index when accessing dc->links[i + 1] by subtracting 1 from the loop condition. This would ensure that i + 1 is always a valid index in the array. Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_dpia_bw.c:208 get_host_router_total_dp_tunnel_bw() error: buffer overflow 'dc->links' 12 <= 12 Fixes: 59f1622a5f05 ("drm/amd/display: Add dpia display mode validation logic") Cc: PeiChen Huang Cc: Aric Cyr Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Cc: Meenakshikumar Somasundaram Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c index dd0d2b206462..5491b707cec8 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c @@ -196,7 +196,7 @@ static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_in struct dc_link *link_dpia_primary, *link_dpia_secondary; int total_bw = 0; - for (uint8_t i = 0; i < MAX_PIPES * 2; ++i) { + for (uint8_t i = 0; i < (MAX_PIPES * 2) - 1; ++i) { if (!dc->links[i] || dc->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA) continue; From 16da399091dca3d1e48109086403587af37cc196 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 30 Jan 2024 12:10:38 +0530 Subject: [PATCH 633/768] drm/amdgpu: Fix missing error code in 'gmc_v6/7/8/9_0_hw_init()' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return 0 for success scenairos in 'gmc_v6/7/8/9_0_hw_init()' Fixes the below: drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c:920 gmc_v6_0_hw_init() warn: missing error code? 'r' drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c:1104 gmc_v7_0_hw_init() warn: missing error code? 'r' drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c:1224 gmc_v8_0_hw_init() warn: missing error code? 'r' drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c:2347 gmc_v9_0_hw_init() warn: missing error code? 'r' Fixes: fac4ebd79fed ("drm/amdgpu: Fix with right return code '-EIO' in 'amdgpu_gmc_vram_checking()'") Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 42e103d7077d..59d9215e5556 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -915,8 +915,8 @@ static int gmc_v6_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } static int gmc_v6_0_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index efc16e580f1e..45a2f8e031a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1099,8 +1099,8 @@ static int gmc_v7_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } static int gmc_v7_0_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index ff4ae73d27ec..4422b27a3cc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1219,8 +1219,8 @@ static int gmc_v8_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } static int gmc_v8_0_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 17b7a25121b0..40a00ea0009f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -2341,8 +2341,8 @@ static int gmc_v9_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } /** From 9c29282ecbeeb1b43fced3055c6a5bb244b9390b Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Thu, 11 Jan 2024 12:27:07 +0800 Subject: [PATCH 634/768] drm/amdkfd: reserve the BO before validating it Fix a warning. v2: Avoid unmapping attachment repeatedly when ERESTARTSYS. v3: Lock the BO before accessing ttm->sg to avoid race conditions.(Felix) [ 41.708711] WARNING: CPU: 0 PID: 1463 at drivers/gpu/drm/ttm/ttm_bo.c:846 ttm_bo_validate+0x146/0x1b0 [ttm] [ 41.708989] Call Trace: [ 41.708992] [ 41.708996] ? show_regs+0x6c/0x80 [ 41.709000] ? ttm_bo_validate+0x146/0x1b0 [ttm] [ 41.709008] ? __warn+0x93/0x190 [ 41.709014] ? ttm_bo_validate+0x146/0x1b0 [ttm] [ 41.709024] ? report_bug+0x1f9/0x210 [ 41.709035] ? handle_bug+0x46/0x80 [ 41.709041] ? exc_invalid_op+0x1d/0x80 [ 41.709048] ? asm_exc_invalid_op+0x1f/0x30 [ 41.709057] ? amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x2c/0x80 [amdgpu] [ 41.709185] ? ttm_bo_validate+0x146/0x1b0 [ttm] [ 41.709197] ? amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x2c/0x80 [amdgpu] [ 41.709337] ? srso_alias_return_thunk+0x5/0x7f [ 41.709346] kfd_mem_dmaunmap_attachment+0x9e/0x1e0 [amdgpu] [ 41.709467] amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x56/0x80 [amdgpu] [ 41.709586] kfd_ioctl_unmap_memory_from_gpu+0x1b7/0x300 [amdgpu] [ 41.709710] kfd_ioctl+0x1ec/0x650 [amdgpu] [ 41.709822] ? __pfx_kfd_ioctl_unmap_memory_from_gpu+0x10/0x10 [amdgpu] [ 41.709945] ? srso_alias_return_thunk+0x5/0x7f [ 41.709949] ? tomoyo_file_ioctl+0x20/0x30 [ 41.709959] __x64_sys_ioctl+0x9c/0xd0 [ 41.709967] do_syscall_64+0x3f/0x90 [ 41.709973] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Fixes: 101b8104307e ("drm/amdkfd: Move dma unmapping after TLB flush") Signed-off-by: Lang Yu Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 20 ++++++++++++++++--- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 +++- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 937d0f0b21df..27c61c535e29 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -303,7 +303,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv); -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv); +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_sync_memory( struct amdgpu_device *adev, struct kgd_mem *mem, bool intr); int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index f183d7faeeec..231fd927dcfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2085,21 +2085,35 @@ out: return ret; } -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv) +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv) { struct kfd_mem_attachment *entry; struct amdgpu_vm *vm; + int ret; vm = drm_priv_to_vm(drm_priv); mutex_lock(&mem->lock); + ret = amdgpu_bo_reserve(mem->bo, true); + if (ret) + goto out; + list_for_each_entry(entry, &mem->attachments, list) { - if (entry->bo_va->base.vm == vm) - kfd_mem_dmaunmap_attachment(mem, entry); + if (entry->bo_va->base.vm != vm) + continue; + if (entry->bo_va->base.bo->tbo.ttm && + !entry->bo_va->base.bo->tbo.ttm->sg) + continue; + + kfd_mem_dmaunmap_attachment(mem, entry); } + amdgpu_bo_unreserve(mem->bo); +out: mutex_unlock(&mem->lock); + + return ret; } int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index ce4c52ec34d8..80e90fdef291 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1442,7 +1442,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT); /* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */ - amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv); + err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv); + if (err) + goto sync_memory_failed; } mutex_unlock(&p->mutex); From de4a733868df3a1b899fd4b05c32e92474cc8f73 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Tue, 30 Jan 2024 13:14:39 +0800 Subject: [PATCH 635/768] drm/amdgpu: drm/amdgpu: remove golden setting for gfx 11.5.0 No need to set GC golden settings in driver from gfx 11.5.0 onwards. Signed-off-by: Yifan Zhang Acked-by: Alex Deucher Reviewed-by: Lang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index d9cf9fd03d30..4f3bfdc75b37 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -107,23 +107,6 @@ static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a) }; -static const struct soc15_reg_golden golden_settings_gc_11_5_0[] = { - SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_DEBUG5, 0xffffffff, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL, 0xffffffff, 0xf37fff3f), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xfffffffb, 0x00f40188), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x80009007), - SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf1ffffff, 0x00880007), - SOC15_REG_GOLDEN_VALUE(GC, 0, regPC_CONFIG_CNTL_1, 0xffffffff, 0x00010000), - SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), - SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL2, 0x007f0000, 0x00000000), - SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xffcfffff, 0x0000200a), - SOC15_REG_GOLDEN_VALUE(GC, 0, regUTCL1_CTRL_2, 0xffffffff, 0x0000048f) -}; - #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -304,11 +287,6 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_11_0_1, (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); break; - case IP_VERSION(11, 5, 0): - soc15_program_register_sequence(adev, - golden_settings_gc_11_5_0, - (const u32)ARRAY_SIZE(golden_settings_gc_11_5_0)); - break; default: break; } From 4f56acdee4c69224afde328bb6402a48b93f8221 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Tue, 30 Jan 2024 21:01:42 +0800 Subject: [PATCH 636/768] drm/amdgpu: remove asymmetrical irq disabling in vcn 4.0.5 suspend There is no irq enabled in vcn 4.0.5 resume, causing wrong amdgpu_irq_src status. Beside, current set function callbacks are empty with no real effect. Signed-off-by: Yifan Zhang Acked-by: Saleemkhan Jamadar Reviewed-by: Veerabadhran Gopalakrishnan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 17 ----------------- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 19 ------------------- 2 files changed, 36 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 169ed400ee7b..8ab01ae919d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -2017,22 +2017,6 @@ static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_sta return ret; } -/** - * vcn_v4_0_set_interrupt_state - set VCN block interrupt state - * - * @adev: amdgpu_device pointer - * @source: interrupt sources - * @type: interrupt types - * @state: interrupt states - * - * Set VCN block interrupt state - */ -static int vcn_v4_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, enum amdgpu_interrupt_state state) -{ - return 0; -} - /** * vcn_v4_0_set_ras_interrupt_state - set VCN block RAS interrupt state * @@ -2097,7 +2081,6 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_ } static const struct amdgpu_irq_src_funcs vcn_v4_0_irq_funcs = { - .set = vcn_v4_0_set_interrupt_state, .process = vcn_v4_0_process_interrupt, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 2eda30e78f61..49e4c3c09aca 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -269,8 +269,6 @@ static int vcn_v4_0_5_hw_fini(void *handle) vcn_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE); } } - - amdgpu_irq_put(adev, &adev->vcn.inst[i].irq, 0); } return 0; @@ -1668,22 +1666,6 @@ static int vcn_v4_0_5_set_powergating_state(void *handle, enum amd_powergating_s return ret; } -/** - * vcn_v4_0_5_set_interrupt_state - set VCN block interrupt state - * - * @adev: amdgpu_device pointer - * @source: interrupt sources - * @type: interrupt types - * @state: interrupt states - * - * Set VCN block interrupt state - */ -static int vcn_v4_0_5_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, enum amdgpu_interrupt_state state) -{ - return 0; -} - /** * vcn_v4_0_5_process_interrupt - process VCN block interrupt * @@ -1726,7 +1708,6 @@ static int vcn_v4_0_5_process_interrupt(struct amdgpu_device *adev, struct amdgp } static const struct amdgpu_irq_src_funcs vcn_v4_0_5_irq_funcs = { - .set = vcn_v4_0_5_set_interrupt_state, .process = vcn_v4_0_5_process_interrupt, }; From 7330256268664ea0a7dd5b07a3fed363093477dd Mon Sep 17 00:00:00 2001 From: Friedrich Vock Date: Tue, 23 Jan 2024 12:52:03 +0100 Subject: [PATCH 637/768] drm/amdgpu: Reset IH OVERFLOW_CLEAR bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows us to detect subsequent IH ring buffer overflows as well. Cc: Joshua Ashton Cc: Alex Deucher Cc: Christian König Cc: stable@vger.kernel.org Signed-off-by: Friedrich Vock Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik_ih.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/cz_ih.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/iceland_ih.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/ih_v6_0.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/ih_v6_1.c | 7 +++++++ drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/si_ih.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/tonga_ih.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 6 ++++++ 10 files changed, 59 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index 6f7c031dd197..f24e34dc33d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -204,6 +204,12 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32(mmIH_RB_CNTL); tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; WREG32(mmIH_RB_CNTL, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; + WREG32(mmIH_RB_CNTL, tmp); } return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index b8c47e0cf37a..c19681492efa 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -216,6 +216,11 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); out: return (wptr & ih->ptr_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index aecad530b10a..2c02ae69883d 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -215,6 +215,11 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); out: return (wptr & ih->ptr_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index d9ed7332d805..ad4ad39f128f 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -418,6 +418,12 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev, tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c index 8fb05eae340a..b8da0fc29378 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c @@ -418,6 +418,13 @@ static u32 ih_v6_1_get_wptr(struct amdgpu_device *adev, tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index e64b33115848..de93614726c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -442,6 +442,12 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 9a24f17a5750..cada9f300a7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -119,6 +119,12 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32(IH_RB_CNTL); tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; WREG32(IH_RB_CNTL, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; + WREG32(IH_RB_CNTL, tmp); } return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 917707bba7f3..450b6e831509 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -219,6 +219,12 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index d364c6dd152c..bf68e18e3824 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -373,6 +373,12 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index ddfc6941f9d5..db66e6cccaf2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -421,6 +421,12 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: return (wptr & ih->ptr_mask); } From ee36a3b345c433a846effcdcfba437c2298eeda5 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 29 Jan 2024 13:58:13 +0000 Subject: [PATCH 638/768] cifs: make sure that channel scaling is done only once Following a successful cifs_tree_connect, we have the code to scale up/down the number of channels in the session. However, it is not protected by a lock today. As a result, this code can be executed by several processes that select the same channel. The core functions handle this well, as they pick chan_lock. However, we've seen cases where smb2_reconnect throws some warnings. To fix that, this change introduces a flags bitmap inside the cifs_ses structure. A new flag type is used to ensure that only one process enters this section at any time. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 3 +++ fs/smb/client/smb2pdu.c | 20 +++++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 16befff4cbb4..9093c507042f 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1032,6 +1032,8 @@ struct cifs_chan { __u8 signkey[SMB3_SIGN_KEY_SIZE]; }; +#define CIFS_SES_FLAG_SCALE_CHANNELS (0x1) + /* * Session structure. One of these for each uid session with a particular host */ @@ -1064,6 +1066,7 @@ struct cifs_ses { enum securityEnum sectype; /* what security flavor was specified? */ bool sign; /* is signing required? */ bool domainAuto:1; + unsigned int flags; __u16 session_flags; __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE]; __u8 smb3encryptionkey[SMB3_ENC_DEC_KEY_SIZE]; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 86f6f35b7f32..6db54c6ef571 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -399,6 +399,15 @@ skip_sess_setup: goto out; } + spin_lock(&ses->ses_lock); + if (ses->flags & CIFS_SES_FLAG_SCALE_CHANNELS) { + spin_unlock(&ses->ses_lock); + mutex_unlock(&ses->session_mutex); + goto skip_add_channels; + } + ses->flags |= CIFS_SES_FLAG_SCALE_CHANNELS; + spin_unlock(&ses->ses_lock); + if (!rc && (server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { mutex_unlock(&ses->session_mutex); @@ -428,17 +437,22 @@ skip_sess_setup: if (ses->chan_max > ses->chan_count && ses->iface_count && !SERVER_IS_CHAN(server)) { - if (ses->chan_count == 1) + if (ses->chan_count == 1) { cifs_server_dbg(VFS, "supports multichannel now\n"); + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); + } cifs_try_adding_channels(ses); - queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, - (SMB_INTERFACE_POLL_INTERVAL * HZ)); } } else { mutex_unlock(&ses->session_mutex); } + skip_add_channels: + spin_lock(&ses->ses_lock); + ses->flags &= ~CIFS_SES_FLAG_SCALE_CHANNELS; + spin_unlock(&ses->ses_lock); if (smb2_command != SMB2_INTERNAL_CMD) mod_delayed_work(cifsiod_wq, &server->reconnect, 0); From e028243003ad6e1417cc8ac19af9ded672b9ec59 Mon Sep 17 00:00:00 2001 From: Andrew Halaney Date: Mon, 29 Jan 2024 11:12:11 -0600 Subject: [PATCH 639/768] MAINTAINERS: Drop unreachable reviewer for Qualcomm ETHQOS ethernet driver Bhupesh's email responds indicating they've changed employers and with no new contact information. Let's drop the line from MAINTAINERS to avoid getting the same response over and over. Signed-off-by: Andrew Halaney Link: https://lore.kernel.org/r/20240129-remove-dwmac-qcom-ethqos-reviewer-v1-1-2645eab61451@redhat.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 9435ea0f8cda..6ae30c50e79f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18085,7 +18085,6 @@ F: drivers/net/ethernet/qualcomm/emac/ QUALCOMM ETHQOS ETHERNET DRIVER M: Vinod Koul -R: Bhupesh Sharma L: netdev@vger.kernel.org L: linux-arm-msm@vger.kernel.org S: Maintained From 5dee6d6923458e26966717f2a3eae7d09fc10bf6 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Mon, 29 Jan 2024 17:10:17 +0800 Subject: [PATCH 640/768] net: ipv4: fix a memleak in ip_setup_cork When inetdev_valid_mtu fails, cork->opt should be freed if it is allocated in ip_setup_cork. Otherwise there could be a memleak. Fixes: 501a90c94510 ("inet: protect against too small mtu values.") Signed-off-by: Zhipeng Lu Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240129091017.2938835-1-alexious@zju.edu.cn Signed-off-by: Jakub Kicinski --- net/ipv4/ip_output.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b06f678b03a1..41537d18eecf 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1287,6 +1287,12 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, if (unlikely(!rt)) return -EFAULT; + cork->fragsize = ip_sk_use_pmtu(sk) ? + dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); + + if (!inetdev_valid_mtu(cork->fragsize)) + return -ENETUNREACH; + /* * setup for corking. */ @@ -1303,12 +1309,6 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->addr = ipc->addr; } - cork->fragsize = ip_sk_use_pmtu(sk) ? - dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); - - if (!inetdev_valid_mtu(cork->fragsize)) - return -ENETUNREACH; - cork->gso_size = ipc->gso_size; cork->dst = &rt->dst; From 585b40e25dc9ff3d2b03d1495150540849009e5b Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 29 Jan 2024 23:49:48 +0100 Subject: [PATCH 641/768] net: dsa: mv88e6xxx: Fix failed probe due to unsupported C45 reads Not all mv88e6xxx device support C45 read/write operations. Those which do not return -EOPNOTSUPP. However, when phylib scans the bus, it considers this fatal, and the probe of the MDIO bus fails, which in term causes the mv88e6xxx probe as a whole to fail. When there is no device on the bus for a given address, the pull up resistor on the data line results in the read returning 0xffff. The phylib core code understands this when scanning for devices on the bus. C45 allows multiple devices to be supported at one address, so phylib will perform a few reads at each address, so although thought not the most efficient solution, it is a way to avoid fatal errors. Make use of this as a minimal fix for stable to fix the probing problems. Follow up patches will rework how C45 operates to make it similar to C22 which considers -ENODEV as a none-fatal, and swap mv88e6xxx to using this. Cc: stable@vger.kernel.org Fixes: 743a19e38d02 ("net: dsa: mv88e6xxx: Separate C22 and C45 transactions") Reported-by: Tim Menninger Signed-off-by: Andrew Lunn Link: https://lore.kernel.org/r/20240129224948.1531452-1-andrew@lunn.ch Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 383b3c4d6f59..614cabb5c1b0 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3659,7 +3659,7 @@ static int mv88e6xxx_mdio_read_c45(struct mii_bus *bus, int phy, int devad, int err; if (!chip->info->ops->phy_read_c45) - return -EOPNOTSUPP; + return 0xffff; mv88e6xxx_reg_lock(chip); err = chip->info->ops->phy_read_c45(chip, bus, phy, devad, reg, &val); From f9ddefb6c0de771038b041eaad5cc15e61fe283f Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 29 Jan 2024 07:39:45 +0100 Subject: [PATCH 642/768] nvme-auth: open-code single-use macros No point in having macros just for a single function nvme_auth_submit(). Open-code them into the caller. Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/auth.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 2a12ee878783..3dce480d932e 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -48,11 +48,6 @@ struct nvme_dhchap_queue_context { static struct workqueue_struct *nvme_auth_wq; -#define nvme_auth_flags_from_qid(qid) \ - (qid == 0) ? 0 : BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED -#define nvme_auth_queue_from_qid(ctrl, qid) \ - (qid == 0) ? (ctrl)->fabrics_q : (ctrl)->connect_q - static inline int ctrl_max_dhchaps(struct nvme_ctrl *ctrl) { return ctrl->opts->nr_io_queues + ctrl->opts->nr_write_queues + @@ -63,10 +58,15 @@ static int nvme_auth_submit(struct nvme_ctrl *ctrl, int qid, void *data, size_t data_len, bool auth_send) { struct nvme_command cmd = {}; - blk_mq_req_flags_t flags = nvme_auth_flags_from_qid(qid); - struct request_queue *q = nvme_auth_queue_from_qid(ctrl, qid); + blk_mq_req_flags_t flags = 0; + struct request_queue *q = ctrl->fabrics_q; int ret; + if (qid != 0) { + flags |= BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED; + q = ctrl->connect_q; + } + cmd.auth_common.opcode = nvme_fabrics_command; cmd.auth_common.secp = NVME_AUTH_DHCHAP_PROTOCOL_IDENTIFIER; cmd.auth_common.spsp0 = 0x01; From bd2687f2e5940f6ee14bfae787b3c1f5f0574907 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 29 Jan 2024 07:39:46 +0100 Subject: [PATCH 643/768] nvme: change __nvme_submit_sync_cmd() calling conventions Combine the two arguments 'flags' and 'at_head' from __nvme_submit_sync_cmd() into a single 'flags' argument and use function-specific values to indicate what should be set within the function. Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/auth.c | 7 +++---- drivers/nvme/host/core.c | 19 ++++++++++++------- drivers/nvme/host/fabrics.c | 18 +++++++++++------- drivers/nvme/host/nvme.h | 17 +++++++++++++++-- 4 files changed, 41 insertions(+), 20 deletions(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 3dce480d932e..bf69be8966f4 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -58,12 +58,12 @@ static int nvme_auth_submit(struct nvme_ctrl *ctrl, int qid, void *data, size_t data_len, bool auth_send) { struct nvme_command cmd = {}; - blk_mq_req_flags_t flags = 0; + nvme_submit_flags_t flags = 0; struct request_queue *q = ctrl->fabrics_q; int ret; if (qid != 0) { - flags |= BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED; + flags |= NVME_SUBMIT_NOWAIT | NVME_SUBMIT_RESERVED; q = ctrl->connect_q; } @@ -80,8 +80,7 @@ static int nvme_auth_submit(struct nvme_ctrl *ctrl, int qid, } ret = __nvme_submit_sync_cmd(q, &cmd, NULL, data, data_len, - qid == 0 ? NVME_QID_ANY : qid, - 0, flags); + qid == 0 ? NVME_QID_ANY : qid, flags); if (ret > 0) dev_warn(ctrl->device, "qid %d auth_send failed with status %d\n", qid, ret); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a42c347d08e8..aed099a45791 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1051,15 +1051,20 @@ EXPORT_SYMBOL_NS_GPL(nvme_execute_rq, NVME_TARGET_PASSTHRU); */ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, union nvme_result *result, void *buffer, unsigned bufflen, - int qid, int at_head, blk_mq_req_flags_t flags) + int qid, nvme_submit_flags_t flags) { struct request *req; int ret; + blk_mq_req_flags_t blk_flags = 0; + if (flags & NVME_SUBMIT_NOWAIT) + blk_flags |= BLK_MQ_REQ_NOWAIT; + if (flags & NVME_SUBMIT_RESERVED) + blk_flags |= BLK_MQ_REQ_RESERVED; if (qid == NVME_QID_ANY) - req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags); + req = blk_mq_alloc_request(q, nvme_req_op(cmd), blk_flags); else - req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags, + req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), blk_flags, qid - 1); if (IS_ERR(req)) @@ -1072,7 +1077,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, goto out; } - ret = nvme_execute_rq(req, at_head); + ret = nvme_execute_rq(req, flags & NVME_SUBMIT_AT_HEAD); if (result && ret >= 0) *result = nvme_req(req)->result; out: @@ -1085,7 +1090,7 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buffer, unsigned bufflen) { return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen, - NVME_QID_ANY, 0, 0); + NVME_QID_ANY, 0); } EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd); @@ -1560,7 +1565,7 @@ static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid, c.features.dword11 = cpu_to_le32(dword11); ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, - buffer, buflen, NVME_QID_ANY, 0, 0); + buffer, buflen, NVME_QID_ANY, 0); if (ret >= 0 && result) *result = le32_to_cpu(res.u32); return ret; @@ -2172,7 +2177,7 @@ static int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t l cmd.common.cdw11 = cpu_to_le32(len); return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len, - NVME_QID_ANY, 1, 0); + NVME_QID_ANY, NVME_SUBMIT_AT_HEAD); } static void nvme_configure_opal(struct nvme_ctrl *ctrl, bool was_suspended) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 373ed08e6b92..3499acbf6a82 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -180,7 +180,7 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) cmd.prop_get.offset = cpu_to_le32(off); ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0, - NVME_QID_ANY, 0, 0); + NVME_QID_ANY, 0); if (ret >= 0) *val = le64_to_cpu(res.u64); @@ -226,7 +226,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) cmd.prop_get.offset = cpu_to_le32(off); ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0, - NVME_QID_ANY, 0, 0); + NVME_QID_ANY, 0); if (ret >= 0) *val = le64_to_cpu(res.u64); @@ -271,7 +271,7 @@ int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val) cmd.prop_set.value = cpu_to_le64(val); ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, NULL, NULL, 0, - NVME_QID_ANY, 0, 0); + NVME_QID_ANY, 0); if (unlikely(ret)) dev_err(ctrl->device, "Property Set error: %d, offset %#x\n", @@ -450,8 +450,10 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) return -ENOMEM; ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, - data, sizeof(*data), NVME_QID_ANY, 1, - BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); + data, sizeof(*data), NVME_QID_ANY, + NVME_SUBMIT_AT_HEAD | + NVME_SUBMIT_NOWAIT | + NVME_SUBMIT_RESERVED); if (ret) { nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32), &cmd, data); @@ -525,8 +527,10 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) return -ENOMEM; ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res, - data, sizeof(*data), qid, 1, - BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); + data, sizeof(*data), qid, + NVME_SUBMIT_AT_HEAD | + NVME_SUBMIT_RESERVED | + NVME_SUBMIT_NOWAIT); if (ret) { nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32), &cmd, data); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1700063bc24d..7d315b670e53 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -837,12 +837,25 @@ static inline bool nvme_is_unique_nsid(struct nvme_ctrl *ctrl, (ctrl->ctratt & NVME_CTRL_CTRATT_NVM_SETS); } +/* + * Flags for __nvme_submit_sync_cmd() + */ +typedef __u32 __bitwise nvme_submit_flags_t; + +enum { + /* Insert request at the head of the queue */ + NVME_SUBMIT_AT_HEAD = (__force nvme_submit_flags_t)(1 << 0), + /* Set BLK_MQ_REQ_NOWAIT when allocating request */ + NVME_SUBMIT_NOWAIT = (__force nvme_submit_flags_t)(1 << 1), + /* Set BLK_MQ_REQ_RESERVED when allocating request */ + NVME_SUBMIT_RESERVED = (__force nvme_submit_flags_t)(1 << 2), +}; + int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buf, unsigned bufflen); int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, union nvme_result *result, void *buffer, unsigned bufflen, - int qid, int at_head, - blk_mq_req_flags_t flags); + int qid, nvme_submit_flags_t flags); int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid, unsigned int dword11, void *buffer, size_t buflen, u32 *result); From 48dae46676d1acb632a3e1e14d02879d57618b5d Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 29 Jan 2024 07:39:48 +0100 Subject: [PATCH 644/768] nvme: enable retries for authentication commands Authentication commands might trigger a lengthy computation on the controller or even a callout to an external entity. In these cases the controller might return a status without the DNR bit set, indicating that the command should be retried. This patch enables retries for authentication commands by setting NVME_SUBMIT_RETRY for __nvme_submit_sync_cmd(). Reported-by: Martin George Signed-off-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/auth.c | 2 +- drivers/nvme/host/core.c | 2 ++ drivers/nvme/host/nvme.h | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index bf69be8966f4..a264b3ae078b 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -58,7 +58,7 @@ static int nvme_auth_submit(struct nvme_ctrl *ctrl, int qid, void *data, size_t data_len, bool auth_send) { struct nvme_command cmd = {}; - nvme_submit_flags_t flags = 0; + nvme_submit_flags_t flags = NVME_SUBMIT_RETRY; struct request_queue *q = ctrl->fabrics_q; int ret; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index aed099a45791..8b48b7f7871a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1070,6 +1070,8 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, if (IS_ERR(req)) return PTR_ERR(req); nvme_init_request(req, cmd); + if (flags & NVME_SUBMIT_RETRY) + req->cmd_flags &= ~REQ_FAILFAST_DRIVER; if (buffer && bufflen) { ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 7d315b670e53..945bf15dccec 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -849,6 +849,8 @@ enum { NVME_SUBMIT_NOWAIT = (__force nvme_submit_flags_t)(1 << 1), /* Set BLK_MQ_REQ_RESERVED when allocating request */ NVME_SUBMIT_RESERVED = (__force nvme_submit_flags_t)(1 << 2), + /* Retry command when NVME_SC_DNR is not set in the result */ + NVME_SUBMIT_RETRY = (__force nvme_submit_flags_t)(1 << 3), }; int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, From 0945b43b4ef8833d73daf5d057d07b64a23b4220 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 31 Jan 2024 15:01:38 -0800 Subject: [PATCH 645/768] nvme-common: add module description Add MODULE_DESCRIPTION() in order to remove warnings & get clean build:- WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/common/nvme-auth.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/common/nvme-keyring.o Signed-off-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/common/auth.c | 1 + drivers/nvme/common/keyring.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c index a23ab5c968b9..a3455f1d67fa 100644 --- a/drivers/nvme/common/auth.c +++ b/drivers/nvme/common/auth.c @@ -471,4 +471,5 @@ int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key) } EXPORT_SYMBOL_GPL(nvme_auth_generate_key); +MODULE_DESCRIPTION("NVMe Authentication framework"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c index a5c0431c101c..6f7e7a8fa5ae 100644 --- a/drivers/nvme/common/keyring.c +++ b/drivers/nvme/common/keyring.c @@ -181,5 +181,6 @@ static void __exit nvme_keyring_exit(void) MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Hannes Reinecke "); +MODULE_DESCRIPTION("NVMe Keyring implementation"); module_init(nvme_keyring_init); module_exit(nvme_keyring_exit); From 4b6821940eeb238a0cc9af322e9ebe8e12613f6a Mon Sep 17 00:00:00 2001 From: Caleb Sander Date: Wed, 31 Jan 2024 09:43:11 -0700 Subject: [PATCH 646/768] nvme: return string as char *, not unsigned char * The functions in drivers/nvme/host/constants.c returning human-readable status and opcode strings currently use type "const unsigned char *". Typically string constants use type "const char *", so remove "unsigned" from the return types. This is a purely cosmetic change to clarify that the functions return text strings instead of an array of bytes, for example. Signed-off-by: Caleb Sander Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/constants.c | 8 ++++---- drivers/nvme/host/nvme.h | 18 +++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c index 20f46c230885..8791283ec6ad 100644 --- a/drivers/nvme/host/constants.c +++ b/drivers/nvme/host/constants.c @@ -171,7 +171,7 @@ static const char * const nvme_statuses[] = { [NVME_SC_HOST_ABORTED_CMD] = "Host Aborted Command", }; -const unsigned char *nvme_get_error_status_str(u16 status) +const char *nvme_get_error_status_str(u16 status) { status &= 0x7ff; if (status < ARRAY_SIZE(nvme_statuses) && nvme_statuses[status]) @@ -179,7 +179,7 @@ const unsigned char *nvme_get_error_status_str(u16 status) return "Unknown"; } -const unsigned char *nvme_get_opcode_str(u8 opcode) +const char *nvme_get_opcode_str(u8 opcode) { if (opcode < ARRAY_SIZE(nvme_ops) && nvme_ops[opcode]) return nvme_ops[opcode]; @@ -187,7 +187,7 @@ const unsigned char *nvme_get_opcode_str(u8 opcode) } EXPORT_SYMBOL_GPL(nvme_get_opcode_str); -const unsigned char *nvme_get_admin_opcode_str(u8 opcode) +const char *nvme_get_admin_opcode_str(u8 opcode) { if (opcode < ARRAY_SIZE(nvme_admin_ops) && nvme_admin_ops[opcode]) return nvme_admin_ops[opcode]; @@ -195,7 +195,7 @@ const unsigned char *nvme_get_admin_opcode_str(u8 opcode) } EXPORT_SYMBOL_GPL(nvme_get_admin_opcode_str); -const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode) { +const char *nvme_get_fabrics_opcode_str(u8 opcode) { if (opcode < ARRAY_SIZE(nvme_fabrics_ops) && nvme_fabrics_ops[opcode]) return nvme_fabrics_ops[opcode]; return "Unknown"; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 945bf15dccec..acdf0e5e7e9a 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -1140,31 +1140,31 @@ static inline bool nvme_multi_css(struct nvme_ctrl *ctrl) } #ifdef CONFIG_NVME_VERBOSE_ERRORS -const unsigned char *nvme_get_error_status_str(u16 status); -const unsigned char *nvme_get_opcode_str(u8 opcode); -const unsigned char *nvme_get_admin_opcode_str(u8 opcode); -const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode); +const char *nvme_get_error_status_str(u16 status); +const char *nvme_get_opcode_str(u8 opcode); +const char *nvme_get_admin_opcode_str(u8 opcode); +const char *nvme_get_fabrics_opcode_str(u8 opcode); #else /* CONFIG_NVME_VERBOSE_ERRORS */ -static inline const unsigned char *nvme_get_error_status_str(u16 status) +static inline const char *nvme_get_error_status_str(u16 status) { return "I/O Error"; } -static inline const unsigned char *nvme_get_opcode_str(u8 opcode) +static inline const char *nvme_get_opcode_str(u8 opcode) { return "I/O Cmd"; } -static inline const unsigned char *nvme_get_admin_opcode_str(u8 opcode) +static inline const char *nvme_get_admin_opcode_str(u8 opcode) { return "Admin Cmd"; } -static inline const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode) +static inline const char *nvme_get_fabrics_opcode_str(u8 opcode) { return "Fabrics Cmd"; } #endif /* CONFIG_NVME_VERBOSE_ERRORS */ -static inline const unsigned char *nvme_opcode_str(int qid, u8 opcode, u8 fctype) +static inline const char *nvme_opcode_str(int qid, u8 opcode, u8 fctype) { if (opcode == nvme_fabrics_command) return nvme_get_fabrics_opcode_str(fctype); From 6f9a71c61183d6849d5aeab085b210c10398af9a Mon Sep 17 00:00:00 2001 From: Caleb Sander Date: Wed, 31 Jan 2024 09:43:12 -0700 Subject: [PATCH 647/768] nvme: remove redundant status mask In nvme_get_error_status_str(), the status code is already masked with 0x7ff at the beginning of the function. Don't bother masking it again when indexing nvme_statuses. Signed-off-by: Caleb Sander Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/constants.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c index 8791283ec6ad..6f2ebb5fcdb0 100644 --- a/drivers/nvme/host/constants.c +++ b/drivers/nvme/host/constants.c @@ -175,7 +175,7 @@ const char *nvme_get_error_status_str(u16 status) { status &= 0x7ff; if (status < ARRAY_SIZE(nvme_statuses) && nvme_statuses[status]) - return nvme_statuses[status & 0x7ff]; + return nvme_statuses[status]; return "Unknown"; } From f9e9115d0c014dec3278d68823eaff159f98f4d6 Mon Sep 17 00:00:00 2001 From: Caleb Sander Date: Wed, 31 Jan 2024 09:43:13 -0700 Subject: [PATCH 648/768] nvme: take const cmd pointer in read-only helpers nvme_is_fabrics() and nvme_is_write() only read struct nvme_command, so take it by const pointer. This allows callers to pass a const pointer and communicates that these functions don't modify the command. Signed-off-by: Caleb Sander Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- include/linux/nvme.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 68eff8c86ce3..bc605ec4a3fd 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1812,7 +1812,7 @@ struct nvme_command { }; }; -static inline bool nvme_is_fabrics(struct nvme_command *cmd) +static inline bool nvme_is_fabrics(const struct nvme_command *cmd) { return cmd->common.opcode == nvme_fabrics_command; } @@ -1831,7 +1831,7 @@ struct nvme_error_slot { __u8 resv2[24]; }; -static inline bool nvme_is_write(struct nvme_command *cmd) +static inline bool nvme_is_write(const struct nvme_command *cmd) { /* * What a mess... From d8f5df1fcea54923b74558035b8de8fb2da3e816 Mon Sep 17 00:00:00 2001 From: Mohammad Nassiri Date: Tue, 30 Jan 2024 03:51:52 +0000 Subject: [PATCH 649/768] selftests/net: Argument value mismatch when calling verify_counters() The end_server() function only operates in the server thread and always takes an accept socket instead of a listen socket as its input argument. To align with this, invert the boolean values used when calling verify_counters() within the end_server() function. As a result of this typo, the test didn't correctly check for the non-symmetrical scenario, where i.e. peer-A uses a key <100:200> to send data, but peer-B uses another key <105:205> to send its data. So, in simple words, different keys for TX and RX. Fixes: 3c3ead555648 ("selftests/net: Add TCP-AO key-management test") Signed-off-by: Mohammad Nassiri Link: https://lore.kernel.org/all/934627c5-eebb-4626-be23-cfb134c01d1a@arista.com/ [amended 'Fixes' tag, added the issue description and carried-over to lkml] Signed-off-by: Dmitry Safonov Link: https://lore.kernel.org/r/20240130-tcp-ao-test-key-mgmt-v2-1-d190430a6c60@arista.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tcp_ao/key-management.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c index c48b4970ca17..f6a9395e3cd7 100644 --- a/tools/testing/selftests/net/tcp_ao/key-management.c +++ b/tools/testing/selftests/net/tcp_ao/key-management.c @@ -843,7 +843,7 @@ static void end_server(const char *tst_name, int sk, synchronize_threads(); /* 4: verified => closed */ close(sk); - verify_counters(tst_name, true, false, begin, &end); + verify_counters(tst_name, false, true, begin, &end); synchronize_threads(); /* 5: counters */ } From 384aa16d3776a9ca5c0c1f1e7af4030fe176a993 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 30 Jan 2024 03:51:53 +0000 Subject: [PATCH 650/768] selftests/net: Rectify key counters checks As the names of (struct test_key) members didn't reflect whether the key was used for TX or RX, the verification for the counters was done incorrectly for asymmetrical selftests. Rename these with _tx appendix and fix checks in verify_counters(). While at it, as the checks are now correct, introduce skip_counters_checks, which is intended for tests where it's expected that a key that was set with setsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, ...) might had no chance of getting used on the wire. Fixes the following failures, exposed by the previous commit: > not ok 51 server: Check current != rnext keys set before connect(): Counter pkt_good was expected to increase 0 => 0 for key 132:5 > not ok 52 server: Check current != rnext keys set before connect(): Counter pkt_good was not expected to increase 0 => 21 for key 137:10 > > not ok 63 server: Check current flapping back on peer's RnextKey request: Counter pkt_good was expected to increase 0 => 0 for key 132:5 > not ok 64 server: Check current flapping back on peer's RnextKey request: Counter pkt_good was not expected to increase 0 => 40 for key 137:10 Cc: Mohammad Nassiri Fixes: 3c3ead555648 ("selftests/net: Add TCP-AO key-management test") Signed-off-by: Dmitry Safonov Link: https://lore.kernel.org/r/20240130-tcp-ao-test-key-mgmt-v2-2-d190430a6c60@arista.com Signed-off-by: Jakub Kicinski --- .../selftests/net/tcp_ao/key-management.c | 44 +++++++++++-------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c index f6a9395e3cd7..24e62120b792 100644 --- a/tools/testing/selftests/net/tcp_ao/key-management.c +++ b/tools/testing/selftests/net/tcp_ao/key-management.c @@ -417,9 +417,9 @@ struct test_key { matches_vrf : 1, is_current : 1, is_rnext : 1, - used_on_handshake : 1, - used_after_accept : 1, - used_on_client : 1; + used_on_server_tx : 1, + used_on_client_tx : 1, + skip_counters_checks : 1; }; struct key_collection { @@ -609,16 +609,14 @@ static int key_collection_socket(bool server, unsigned int port) addr = &this_ip_dest; sndid = key->client_keyid; rcvid = key->server_keyid; - set_current = key->is_current; - set_rnext = key->is_rnext; + key->used_on_client_tx = set_current = key->is_current; + key->used_on_server_tx = set_rnext = key->is_rnext; } if (test_add_key_cr(sk, key->password, key->len, *addr, vrf, sndid, rcvid, key->maclen, key->alg, set_current, set_rnext)) test_key_error("setsockopt(TCP_AO_ADD_KEY)", key); - if (set_current || set_rnext) - key->used_on_handshake = 1; #ifdef DEBUG test_print("%s [%u/%u] key: { %s, %u:%u, %u, %u:%u:%u:%u (%u)}", server ? "server" : "client", i, collection.nr_keys, @@ -640,22 +638,22 @@ static void verify_counters(const char *tst_name, bool is_listen_sk, bool server for (i = 0; i < collection.nr_keys; i++) { struct test_key *key = &collection.keys[i]; uint8_t sndid, rcvid; - bool was_used; + bool rx_cnt_expected; + if (key->skip_counters_checks) + continue; if (server) { sndid = key->server_keyid; rcvid = key->client_keyid; - if (is_listen_sk) - was_used = key->used_on_handshake; - else - was_used = key->used_after_accept; + rx_cnt_expected = key->used_on_client_tx; } else { sndid = key->client_keyid; rcvid = key->server_keyid; - was_used = key->used_on_client; + rx_cnt_expected = key->used_on_server_tx; } - test_tcp_ao_key_counters_cmp(tst_name, a, b, was_used, + test_tcp_ao_key_counters_cmp(tst_name, a, b, + rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0, sndid, rcvid); } test_tcp_ao_counters_free(a); @@ -916,9 +914,8 @@ static int run_client(const char *tst_name, unsigned int port, current_index = nr_keys - 1; if (rnext_index < 0) rnext_index = nr_keys - 1; - collection.keys[current_index].used_on_handshake = 1; - collection.keys[rnext_index].used_after_accept = 1; - collection.keys[rnext_index].used_on_client = 1; + collection.keys[current_index].used_on_client_tx = 1; + collection.keys[rnext_index].used_on_server_tx = 1; synchronize_threads(); /* 3: accepted => send data */ if (test_client_verify(sk, msg_sz, msg_nr, TEST_TIMEOUT_SEC)) { @@ -1059,7 +1056,16 @@ static void check_current_back(const char *tst_name, unsigned int port, test_error("Can't change the current key"); if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) test_fail("verify failed"); - collection.keys[rotate_to_index].used_after_accept = 1; + /* There is a race here: between setting the current_key with + * setsockopt(TCP_AO_INFO) and starting to send some data - there + * might have been a segment received with the desired + * RNext_key set. In turn that would mean that the first outgoing + * segment will have the desired current_key (flipped back). + * Which is what the user/test wants. As it's racy, skip checking + * the counters, yet check what are the resulting current/rnext + * keys on both sides. + */ + collection.keys[rotate_to_index].skip_counters_checks = 1; end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp); } @@ -1089,7 +1095,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port, } verify_current_rnext(tst_name, sk, -1, collection.keys[i].server_keyid); - collection.keys[i].used_on_client = 1; + collection.keys[i].used_on_server_tx = 1; synchronize_threads(); /* verify current/rnext */ } end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp); From 6caf3adcc877b3470e98133d52b360ebe5f7a6a3 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 30 Jan 2024 03:51:54 +0000 Subject: [PATCH 651/768] selftests/net: Repair RST passive reset selftest Currently, the test is racy and seems to not pass anymore. In order to rectify it, aim on TCP_TW_RST. Doesn't seem way too good with this sleep() part, but it seems as a reasonable compromise for the test. There is a plan in-line comment on how-to improve it, going to do it on the top, at this moment I want it to run on netdev/patchwork selftests dashboard. It also slightly changes tcp_ao-lib in order to get SO_ERROR propagated to test_client_verify() return value. Fixes: c6df7b2361d7 ("selftests/net: Add TCP-AO RST test") Signed-off-by: Dmitry Safonov Link: https://lore.kernel.org/r/20240130-tcp-ao-test-key-mgmt-v2-3-d190430a6c60@arista.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tcp_ao/lib/sock.c | 12 +- tools/testing/selftests/net/tcp_ao/rst.c | 138 ++++++++++++------ 2 files changed, 98 insertions(+), 52 deletions(-) diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c index c75d82885a2e..15aeb0963058 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/sock.c +++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c @@ -62,7 +62,9 @@ int test_wait_fd(int sk, time_t sec, bool write) return -ETIMEDOUT; } - if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &ret, &slen) || ret) + if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &ret, &slen)) + return -errno; + if (ret) return -ret; return 0; } @@ -584,9 +586,11 @@ int test_client_verify(int sk, const size_t msg_len, const size_t nr, { size_t buf_sz = msg_len * nr; char *buf = alloca(buf_sz); + ssize_t ret; randomize_buffer(buf, buf_sz); - if (test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec) != buf_sz) - return -1; - return 0; + ret = test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec); + if (ret < 0) + return (int)ret; + return ret != buf_sz ? -1 : 0; } diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c index ac06009a7f5f..7df8b8700e39 100644 --- a/tools/testing/selftests/net/tcp_ao/rst.c +++ b/tools/testing/selftests/net/tcp_ao/rst.c @@ -1,10 +1,33 @@ // SPDX-License-Identifier: GPL-2.0 -/* Author: Dmitry Safonov */ +/* + * The test checks that both active and passive reset have correct TCP-AO + * signature. An "active" reset (abort) here is procured from closing + * listen() socket with non-accepted connections in the queue: + * inet_csk_listen_stop() => inet_child_forget() => + * => tcp_disconnect() => tcp_send_active_reset() + * + * The passive reset is quite hard to get on established TCP connections. + * It could be procured from non-established states, but the synchronization + * part from userspace in order to reliably get RST seems uneasy. + * So, instead it's procured by corrupting SEQ number on TIMED-WAIT state. + * + * It's important to test both passive and active RST as they go through + * different code-paths: + * - tcp_send_active_reset() makes no-data skb, sends it with tcp_transmit_skb() + * - tcp_v*_send_reset() create their reply skbs and send them with + * ip_send_unicast_reply() + * + * In both cases TCP-AO signatures have to be correct, which is verified by + * (1) checking that the TCP-AO connection was reset and (2) TCP-AO counters. + * + * Author: Dmitry Safonov + */ #include #include "../../../../include/linux/kernel.h" #include "aolib.h" const size_t quota = 1000; +const size_t packet_sz = 100; /* * Backlog == 0 means 1 connection in queue, see: * commit 64a146513f8f ("[NET]: Revert incorrect accept queue...") @@ -59,26 +82,6 @@ static void close_forced(int sk) close(sk); } -static int test_wait_for_exception(int sk, time_t sec) -{ - struct timeval tv = { .tv_sec = sec }; - struct timeval *ptv = NULL; - fd_set efds; - int ret; - - FD_ZERO(&efds); - FD_SET(sk, &efds); - - if (sec) - ptv = &tv; - - errno = 0; - ret = select(sk + 1, NULL, NULL, &efds, ptv); - if (ret < 0) - return -errno; - return ret ? sk : 0; -} - static void test_server_active_rst(unsigned int port) { struct tcp_ao_counters cnt1, cnt2; @@ -155,17 +158,16 @@ static void test_server_passive_rst(unsigned int port) test_fail("server returned %zd", bytes); } - synchronize_threads(); /* 3: chekpoint/restore the connection */ + synchronize_threads(); /* 3: checkpoint the client */ + synchronize_threads(); /* 4: close the server, creating twsk */ if (test_get_tcp_ao_counters(sk, &ao2)) test_error("test_get_tcp_ao_counters()"); - - synchronize_threads(); /* 4: terminate server + send more on client */ - bytes = test_server_run(sk, quota, TEST_RETRANSMIT_SEC); close(sk); + + synchronize_threads(); /* 5: restore the socket, send more data */ test_tcp_ao_counters_cmp("passive RST server", &ao1, &ao2, TEST_CNT_GOOD); - synchronize_threads(); /* 5: verified => closed */ - close(sk); + synchronize_threads(); /* 6: server exits */ } static void *server_fn(void *arg) @@ -284,7 +286,7 @@ static void test_client_active_rst(unsigned int port) test_error("test_wait_fds(): %d", err); synchronize_threads(); /* 3: close listen socket */ - if (test_client_verify(sk[0], 100, quota / 100, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) test_fail("Failed to send data on connected socket"); else test_ok("Verified established tcp connection"); @@ -323,7 +325,6 @@ static void test_client_passive_rst(unsigned int port) struct tcp_sock_state img; sockaddr_af saddr; int sk, err; - socklen_t slen = sizeof(err); sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); if (sk < 0) @@ -337,18 +338,51 @@ static void test_client_passive_rst(unsigned int port) test_error("failed to connect()"); synchronize_threads(); /* 2: accepted => send data */ - if (test_client_verify(sk, 100, quota / 100, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk, packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) test_fail("Failed to send data on connected socket"); else test_ok("Verified established tcp connection"); - synchronize_threads(); /* 3: chekpoint/restore the connection */ + synchronize_threads(); /* 3: checkpoint the client */ test_enable_repair(sk); test_sock_checkpoint(sk, &img, &saddr); test_ao_checkpoint(sk, &ao_img); - test_kill_sk(sk); + test_disable_repair(sk); - img.out.seq += quota; + synchronize_threads(); /* 4: close the server, creating twsk */ + + /* + * The "corruption" in SEQ has to be small enough to fit into TCP + * window, see tcp_timewait_state_process() for out-of-window + * segments. + */ + img.out.seq += 5; /* 5 is more noticeable in tcpdump than 1 */ + + /* + * FIXME: This is kind-of ugly and dirty, but it works. + * + * At this moment, the server has close'ed(sk). + * The passive RST that is being targeted here is new data after + * half-duplex close, see tcp_timewait_state_process() => TCP_TW_RST + * + * What is needed here is: + * (1) wait for FIN from the server + * (2) make sure that the ACK from the client went out + * (3) make sure that the ACK was received and processed by the server + * + * Otherwise, the data that will be sent from "repaired" socket + * post SEQ corruption may get to the server before it's in + * TCP_FIN_WAIT2. + * + * (1) is easy with select()/poll() + * (2) is possible by polling tcpi_state from TCP_INFO + * (3) is quite complex: as server's socket was already closed, + * probably the way to do it would be tcp-diag. + */ + sleep(TEST_RETRANSMIT_SEC); + + synchronize_threads(); /* 5: restore the socket, send more data */ + test_kill_sk(sk); sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); if (sk < 0) @@ -366,25 +400,33 @@ static void test_client_passive_rst(unsigned int port) test_disable_repair(sk); test_sock_state_free(&img); - synchronize_threads(); /* 4: terminate server + send more on client */ - if (test_client_verify(sk, 100, quota / 100, 2 * TEST_TIMEOUT_SEC)) - test_ok("client connection broken post-seq-adjust"); + /* + * This is how "passive reset" is acquired in this test from TCP_TW_RST: + * + * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [P.], seq 901:1001, ack 1001, win 249, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x10217d6c36a22379086ef3b1], length 100 + * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [F.], seq 1001, ack 1001, win 249, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x104ffc99b98c10a5298cc268], length 0 + * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [.], ack 1002, win 251, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0xe496dd4f7f5a8a66873c6f93,nop,nop,sack 1 {1001:1002}], length 0 + * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [P.], seq 1006:1106, ack 1001, win 251, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x1b5f3330fb23fbcd0c77d0ca], length 100 + * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [R], seq 3215596252, win 0, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x0bcfbbf497bce844312304b2], length 0 + */ + err = test_client_verify(sk, packet_sz, quota / packet_sz, 2 * TEST_TIMEOUT_SEC); + /* Make sure that the connection was reset, not timeouted */ + if (err && err == -ECONNRESET) + test_ok("client sock was passively reset post-seq-adjust"); + else if (err) + test_fail("client sock was not reset post-seq-adjust: %d", err); else - test_fail("client connection still works post-seq-adjust"); - - test_wait_for_exception(sk, TEST_TIMEOUT_SEC); - - if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &err, &slen)) - test_error("getsockopt()"); - if (err != ECONNRESET && err != EPIPE) - test_fail("client connection was not reset: %d", err); - else - test_ok("client connection was reset"); + test_fail("client sock is yet connected post-seq-adjust"); if (test_get_tcp_ao_counters(sk, &ao2)) test_error("test_get_tcp_ao_counters()"); - synchronize_threads(); /* 5: verified => closed */ + synchronize_threads(); /* 6: server exits */ close(sk); test_tcp_ao_counters_cmp("client passive RST", &ao1, &ao2, TEST_CNT_GOOD); } @@ -410,6 +452,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(15, server_fn, client_fn); + test_init(14, server_fn, client_fn); return 0; } From 7d23e836b00eec96610141549f59e5902dc55fe8 Mon Sep 17 00:00:00 2001 From: Caleb Sander Date: Wed, 31 Jan 2024 09:43:14 -0700 Subject: [PATCH 652/768] nvme: split out fabrics version of nvme_opcode_str() nvme_opcode_str() currently supports admin, IO, and fabrics commands. However, fabrics commands aren't allowed for the pci transport. Currently the pci caller passes 0 as the fctype, which means any fabrics command would be displayed as "Property Set". Move fabrics command support into a function nvme_fabrics_opcode_str() and remove the fctype argument to nvme_opcode_str(). This way, a fabrics command will display as "Unknown" for pci. Convert the rdma and tcp transports to use nvme_fabrics_opcode_str(). Signed-off-by: Caleb Sander Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/nvme.h | 13 ++++++++++--- drivers/nvme/host/pci.c | 2 +- drivers/nvme/host/rdma.c | 7 +++---- drivers/nvme/host/tcp.c | 6 +++--- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index acdf0e5e7e9a..b70b333a0874 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -1164,11 +1164,18 @@ static inline const char *nvme_get_fabrics_opcode_str(u8 opcode) } #endif /* CONFIG_NVME_VERBOSE_ERRORS */ -static inline const char *nvme_opcode_str(int qid, u8 opcode, u8 fctype) +static inline const char *nvme_opcode_str(int qid, u8 opcode) { - if (opcode == nvme_fabrics_command) - return nvme_get_fabrics_opcode_str(fctype); return qid ? nvme_get_opcode_str(opcode) : nvme_get_admin_opcode_str(opcode); } + +static inline const char *nvme_fabrics_opcode_str( + int qid, const struct nvme_command *cmd) +{ + if (nvme_is_fabrics(cmd)) + return nvme_get_fabrics_opcode_str(cmd->fabrics.fctype); + + return nvme_opcode_str(qid, cmd->common.opcode); +} #endif /* _NVME_H */ diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 25eb72779541..e6267a6aa380 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1349,7 +1349,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) dev_warn(dev->ctrl.device, "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, reset controller\n", req->tag, nvme_cid(req), opcode, - nvme_opcode_str(nvmeq->qid, opcode, 0), nvmeq->qid); + nvme_opcode_str(nvmeq->qid, opcode), nvmeq->qid); nvme_req(req)->flags |= NVME_REQ_CANCELLED; goto disable; } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 3f393ee20281..6adf2c19a712 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1951,14 +1951,13 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq) struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_queue *queue = req->queue; struct nvme_rdma_ctrl *ctrl = queue->ctrl; - u8 opcode = req->req.cmd->common.opcode; - u8 fctype = req->req.cmd->fabrics.fctype; + struct nvme_command *cmd = req->req.cmd; int qid = nvme_rdma_queue_idx(queue); dev_warn(ctrl->ctrl.device, "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout\n", - rq->tag, nvme_cid(rq), opcode, - nvme_opcode_str(qid, opcode, fctype), qid); + rq->tag, nvme_cid(rq), cmd->common.opcode, + nvme_fabrics_opcode_str(qid, cmd), qid); if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) { /* diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 4393cf244025..9f8dea2e2c7d 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2428,13 +2428,13 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq) struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); - u8 opc = pdu->cmd.common.opcode, fctype = pdu->cmd.fabrics.fctype; + struct nvme_command *cmd = &pdu->cmd; int qid = nvme_tcp_queue_id(req->queue); dev_warn(ctrl->device, "I/O tag %d (%04x) type %d opcode %#x (%s) QID %d timeout\n", - rq->tag, nvme_cid(rq), pdu->hdr.type, opc, - nvme_opcode_str(qid, opc, fctype), qid); + rq->tag, nvme_cid(rq), pdu->hdr.type, cmd->common.opcode, + nvme_fabrics_opcode_str(qid, cmd), qid); if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) { /* From 0d150beff26e636aa07ab889133a0015eaee4227 Mon Sep 17 00:00:00 2001 From: Caleb Sander Date: Wed, 31 Jan 2024 09:43:15 -0700 Subject: [PATCH 653/768] nvme-fc: log human-readable opcode on timeout The fc transport logs the opcode and fctype on command timeout. This is sufficient information to identify the command issued, but not very human-readable. Use the nvme_fabrics_opcode_str() helper to also log the name of the command, as rdma and tcp already do. Signed-off-by: Caleb Sander Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index e2308119f8f0..63a2e2839a78 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2574,6 +2574,7 @@ static enum blk_eh_timer_return nvme_fc_timeout(struct request *rq) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_ctrl *ctrl = op->ctrl; + u16 qnum = op->queue->qnum; struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; struct nvme_command *sqe = &cmdiu->sqe; @@ -2582,10 +2583,11 @@ static enum blk_eh_timer_return nvme_fc_timeout(struct request *rq) * will detect the aborted io and will fail the connection. */ dev_info(ctrl->ctrl.device, - "NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: " + "NVME-FC{%d.%d}: io timeout: opcode %d fctype %d (%s) w10/11: " "x%08x/x%08x\n", - ctrl->cnum, op->queue->qnum, sqe->common.opcode, - sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11); + ctrl->cnum, qnum, sqe->common.opcode, sqe->fabrics.fctype, + nvme_fabrics_opcode_str(qnum, sqe), + sqe->common.cdw10, sqe->common.cdw11); if (__nvme_fc_abort_op(ctrl, op)) nvme_fc_error_recovery(ctrl, "io timeout abort failed"); From 4d322dce82a1d44f8c83f0f54f95dd1b8dcf46c9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Jan 2024 18:42:35 +0000 Subject: [PATCH 654/768] af_unix: fix lockdep positive in sk_diag_dump_icons() syzbot reported a lockdep splat [1]. Blamed commit hinted about the possible lockdep violation, and code used unix_state_lock_nested() in an attempt to silence lockdep. It is not sufficient, because unix_state_lock_nested() is already used from unix_state_double_lock(). We need to use a separate subclass. This patch adds a distinct enumeration to make things more explicit. Also use swap() in unix_state_double_lock() as a clean up. v2: add a missing inline keyword to unix_state_lock_nested() [1] WARNING: possible circular locking dependency detected 6.8.0-rc1-syzkaller-00356-g8a696a29c690 #0 Not tainted syz-executor.1/2542 is trying to acquire lock: ffff88808b5df9e8 (rlock-AF_UNIX){+.+.}-{2:2}, at: skb_queue_tail+0x36/0x120 net/core/skbuff.c:3863 but task is already holding lock: ffff88808b5dfe70 (&u->lock/1){+.+.}-{2:2}, at: unix_dgram_sendmsg+0xfc7/0x2200 net/unix/af_unix.c:2089 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&u->lock/1){+.+.}-{2:2}: lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 _raw_spin_lock_nested+0x31/0x40 kernel/locking/spinlock.c:378 sk_diag_dump_icons net/unix/diag.c:87 [inline] sk_diag_fill+0x6ea/0xfe0 net/unix/diag.c:157 sk_diag_dump net/unix/diag.c:196 [inline] unix_diag_dump+0x3e9/0x630 net/unix/diag.c:220 netlink_dump+0x5c1/0xcd0 net/netlink/af_netlink.c:2264 __netlink_dump_start+0x5d7/0x780 net/netlink/af_netlink.c:2370 netlink_dump_start include/linux/netlink.h:338 [inline] unix_diag_handler_dump+0x1c3/0x8f0 net/unix/diag.c:319 sock_diag_rcv_msg+0xe3/0x400 netlink_rcv_skb+0x1df/0x430 net/netlink/af_netlink.c:2543 sock_diag_rcv+0x2a/0x40 net/core/sock_diag.c:280 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7e6/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa37/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] sock_write_iter+0x39a/0x520 net/socket.c:1160 call_write_iter include/linux/fs.h:2085 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0xa74/0xca0 fs/read_write.c:590 ksys_write+0x1a0/0x2c0 fs/read_write.c:643 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b -> #0 (rlock-AF_UNIX){+.+.}-{2:2}: check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x1909/0x5ab0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1345/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0xd5/0x120 kernel/locking/spinlock.c:162 skb_queue_tail+0x36/0x120 net/core/skbuff.c:3863 unix_dgram_sendmsg+0x15d9/0x2200 net/unix/af_unix.c:2112 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] ____sys_sendmsg+0x592/0x890 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmmsg+0x3b2/0x730 net/socket.c:2724 __do_sys_sendmmsg net/socket.c:2753 [inline] __se_sys_sendmmsg net/socket.c:2750 [inline] __x64_sys_sendmmsg+0xa0/0xb0 net/socket.c:2750 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&u->lock/1); lock(rlock-AF_UNIX); lock(&u->lock/1); lock(rlock-AF_UNIX); *** DEADLOCK *** 1 lock held by syz-executor.1/2542: #0: ffff88808b5dfe70 (&u->lock/1){+.+.}-{2:2}, at: unix_dgram_sendmsg+0xfc7/0x2200 net/unix/af_unix.c:2089 stack backtrace: CPU: 1 PID: 2542 Comm: syz-executor.1 Not tainted 6.8.0-rc1-syzkaller-00356-g8a696a29c690 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e7/0x2d0 lib/dump_stack.c:106 check_noncircular+0x366/0x490 kernel/locking/lockdep.c:2187 check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x1909/0x5ab0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1345/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0xd5/0x120 kernel/locking/spinlock.c:162 skb_queue_tail+0x36/0x120 net/core/skbuff.c:3863 unix_dgram_sendmsg+0x15d9/0x2200 net/unix/af_unix.c:2112 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] ____sys_sendmsg+0x592/0x890 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmmsg+0x3b2/0x730 net/socket.c:2724 __do_sys_sendmmsg net/socket.c:2753 [inline] __se_sys_sendmmsg net/socket.c:2750 [inline] __x64_sys_sendmmsg+0xa0/0xb0 net/socket.c:2750 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b RIP: 0033:0x7f26d887cda9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f26d95a60c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 00007f26d89abf80 RCX: 00007f26d887cda9 RDX: 000000000000003e RSI: 00000000200bd000 RDI: 0000000000000004 RBP: 00007f26d88c947a R08: 0000000000000000 R09: 0000000000000000 R10: 00000000000008c0 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007f26d89abf80 R15: 00007ffcfe081a68 Fixes: 2aac7a2cb0d9 ("unix_diag: Pending connections IDs NLA") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240130184235.1620738-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/af_unix.h | 20 ++++++++++++++------ net/unix/af_unix.c | 14 ++++++-------- net/unix/diag.c | 2 +- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 49c4640027d8..afd40dce40f3 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -46,12 +46,6 @@ struct scm_stat { #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) -#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) -#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) -#define unix_state_lock_nested(s) \ - spin_lock_nested(&unix_sk(s)->lock, \ - SINGLE_DEPTH_NESTING) - /* The AF_UNIX socket */ struct unix_sock { /* WARNING: sk has to be the first member */ @@ -77,6 +71,20 @@ struct unix_sock { #define unix_sk(ptr) container_of_const(ptr, struct unix_sock, sk) #define unix_peer(sk) (unix_sk(sk)->peer) +#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) +#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) +enum unix_socket_lock_class { + U_LOCK_NORMAL, + U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */ + U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */ +}; + +static inline void unix_state_lock_nested(struct sock *sk, + enum unix_socket_lock_class subclass) +{ + spin_lock_nested(&unix_sk(sk)->lock, subclass); +} + #define peer_wait peer_wq.wait long unix_inq_len(struct sock *sk); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ac1f2bc18fc9..30b178ebba60 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1344,13 +1344,11 @@ static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) unix_state_lock(sk1); return; } - if (sk1 < sk2) { - unix_state_lock(sk1); - unix_state_lock_nested(sk2); - } else { - unix_state_lock(sk2); - unix_state_lock_nested(sk1); - } + if (sk1 > sk2) + swap(sk1, sk2); + + unix_state_lock(sk1); + unix_state_lock_nested(sk2, U_LOCK_SECOND); } static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) @@ -1591,7 +1589,7 @@ restart: goto out_unlock; } - unix_state_lock_nested(sk); + unix_state_lock_nested(sk, U_LOCK_SECOND); if (sk->sk_state != st) { unix_state_unlock(sk); diff --git a/net/unix/diag.c b/net/unix/diag.c index bec09a3a1d44..be19827eca36 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb) * queue lock. With the other's queue locked it's * OK to lock the state. */ - unix_state_lock_nested(req); + unix_state_lock_nested(req, U_LOCK_DIAG); peer = unix_sk(req)->peer; buf[i++] = (peer ? sock_i_ino(peer) : 0); unix_state_unlock(req); From d9407ff11809c6812bb84fe7be9c1367d758e5c8 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 29 Jan 2024 15:40:30 -0800 Subject: [PATCH 655/768] pds_core: Prevent health thread from running during reset/remove The PCIe reset handlers can run at the same time as the health thread. This can cause the health thread to stomp on the PCIe reset. Fix this by preventing the health thread from running while a PCIe reset is happening. As part of this use timer_shutdown_sync() during reset and remove to make sure the timer doesn't ever get rearmed. Fixes: ffa55858330f ("pds_core: implement pci reset handlers") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20240129234035.69802-2-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/main.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index 3080898d7b95..5172a5ad8ec6 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -293,7 +293,7 @@ err_out_stop: err_out_teardown: pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING); err_out_unmap_bars: - del_timer_sync(&pdsc->wdtimer); + timer_shutdown_sync(&pdsc->wdtimer); if (pdsc->wq) destroy_workqueue(pdsc->wq); mutex_destroy(&pdsc->config_lock); @@ -420,7 +420,7 @@ static void pdsc_remove(struct pci_dev *pdev) */ pdsc_sriov_configure(pdev, 0); - del_timer_sync(&pdsc->wdtimer); + timer_shutdown_sync(&pdsc->wdtimer); if (pdsc->wq) destroy_workqueue(pdsc->wq); @@ -445,10 +445,24 @@ static void pdsc_remove(struct pci_dev *pdev) devlink_free(dl); } +static void pdsc_stop_health_thread(struct pdsc *pdsc) +{ + timer_shutdown_sync(&pdsc->wdtimer); + if (pdsc->health_work.func) + cancel_work_sync(&pdsc->health_work); +} + +static void pdsc_restart_health_thread(struct pdsc *pdsc) +{ + timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0); + mod_timer(&pdsc->wdtimer, jiffies + 1); +} + void pdsc_reset_prepare(struct pci_dev *pdev) { struct pdsc *pdsc = pci_get_drvdata(pdev); + pdsc_stop_health_thread(pdsc); pdsc_fw_down(pdsc); pci_free_irq_vectors(pdev); @@ -486,6 +500,7 @@ void pdsc_reset_done(struct pci_dev *pdev) } pdsc_fw_up(pdsc); + pdsc_restart_health_thread(pdsc); } static const struct pci_error_handlers pdsc_err_handler = { From d321067e2cfa4d5e45401a00912ca9da8d1af631 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 29 Jan 2024 15:40:31 -0800 Subject: [PATCH 656/768] pds_core: Cancel AQ work on teardown There is a small window where pdsc_work_thread() calls pdsc_process_adminq() and pdsc_process_adminq() passes the PDSC_S_STOPPING_DRIVER check and starts to process adminq/notifyq work and then the driver starts a fw_down cycle. This could cause some undefined behavior if the notifyqcq/adminqcq are free'd while pdsc_process_adminq() is running. Use cancel_work_sync() on the adminqcq's work struct to make sure any pending work items are cancelled and any in progress work items are completed. Also, make sure to not call cancel_work_sync() if the work item has not be initialized. Without this, traces will happen in cases where a reset fails and teardown is called again or if reset fails and the driver is removed. Fixes: 01ba61b55b20 ("pds_core: Add adminq processing and commands") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20240129234035.69802-3-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 0d2091e9eb28..b582729331eb 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -464,6 +464,8 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing) if (!pdsc->pdev->is_virtfn) pdsc_devcmd_reset(pdsc); + if (pdsc->adminqcq.work.func) + cancel_work_sync(&pdsc->adminqcq.work); pdsc_qcq_free(pdsc, &pdsc->notifyqcq); pdsc_qcq_free(pdsc, &pdsc->adminqcq); From 951705151e50f9022bc96ec8b3fd5697380b1df6 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 29 Jan 2024 15:40:32 -0800 Subject: [PATCH 657/768] pds_core: Use struct pdsc for the pdsc_adminq_isr private data The initial design for the adminq interrupt was done based on client drivers having their own adminq and adminq interrupt. So, each client driver's adminq isr would use their specific adminqcq for the private data struct. For the time being the design has changed to only use a single adminq for all clients. So, instead use the struct pdsc for the private data to simplify things a bit. This also has the benefit of not dereferencing the adminqcq to access the pdsc struct when the PDSC_S_STOPPING_DRIVER bit is set and the adminqcq has actually been cleared/freed. Fixes: 01ba61b55b20 ("pds_core: Add adminq processing and commands") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20240129234035.69802-4-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/adminq.c | 5 +++-- drivers/net/ethernet/amd/pds_core/core.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c index 5beadabc2136..68be5ea251fc 100644 --- a/drivers/net/ethernet/amd/pds_core/adminq.c +++ b/drivers/net/ethernet/amd/pds_core/adminq.c @@ -135,8 +135,8 @@ void pdsc_work_thread(struct work_struct *work) irqreturn_t pdsc_adminq_isr(int irq, void *data) { - struct pdsc_qcq *qcq = data; - struct pdsc *pdsc = qcq->pdsc; + struct pdsc *pdsc = data; + struct pdsc_qcq *qcq; /* Don't process AdminQ when shutting down */ if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER)) { @@ -145,6 +145,7 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data) return IRQ_HANDLED; } + qcq = &pdsc->adminqcq; queue_work(pdsc->wq, &qcq->work); pds_core_intr_mask(&pdsc->intr_ctrl[qcq->intx], PDS_CORE_INTR_MASK_CLEAR); diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index b582729331eb..0356e56a6e99 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -125,7 +125,7 @@ static int pdsc_qcq_intr_alloc(struct pdsc *pdsc, struct pdsc_qcq *qcq) snprintf(name, sizeof(name), "%s-%d-%s", PDS_CORE_DRV_NAME, pdsc->pdev->bus->number, qcq->q.name); - index = pdsc_intr_alloc(pdsc, name, pdsc_adminq_isr, qcq); + index = pdsc_intr_alloc(pdsc, name, pdsc_adminq_isr, pdsc); if (index < 0) return index; qcq->intx = index; From 7e82a8745b951b1e794cc780d46f3fbee5e93447 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 29 Jan 2024 15:40:33 -0800 Subject: [PATCH 658/768] pds_core: Prevent race issues involving the adminq There are multiple paths that can result in using the pdsc's adminq. [1] pdsc_adminq_isr and the resulting work from queue_work(), i.e. pdsc_work_thread()->pdsc_process_adminq() [2] pdsc_adminq_post() When the device goes through reset via PCIe reset and/or a fw_down/fw_up cycle due to bad PCIe state or bad device state the adminq is destroyed and recreated. A NULL pointer dereference can happen if [1] or [2] happens after the adminq is already destroyed. In order to fix this, add some further state checks and implement reference counting for adminq uses. Reference counting was used because multiple threads can attempt to access the adminq at the same time via [1] or [2]. Additionally, multiple clients (i.e. pds-vfio-pci) can be using [2] at the same time. The adminq_refcnt is initialized to 1 when the adminq has been allocated and is ready to use. Users/clients of the adminq (i.e. [1] and [2]) will increment the refcnt when they are using the adminq. When the driver goes into a fw_down cycle it will set the PDSC_S_FW_DEAD bit and then wait for the adminq_refcnt to hit 1. Setting the PDSC_S_FW_DEAD before waiting will prevent any further adminq_refcnt increments. Waiting for the adminq_refcnt to hit 1 allows for any current users of the adminq to finish before the driver frees the adminq. Once the adminq_refcnt hits 1 the driver clears the refcnt to signify that the adminq is deleted and cannot be used. On the fw_up cycle the driver will once again initialize the adminq_refcnt to 1 allowing the adminq to be used again. Fixes: 01ba61b55b20 ("pds_core: Add adminq processing and commands") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20240129234035.69802-5-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/adminq.c | 31 +++++++++++++++++----- drivers/net/ethernet/amd/pds_core/core.c | 21 +++++++++++++++ drivers/net/ethernet/amd/pds_core/core.h | 1 + 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c index 68be5ea251fc..5edff33d56f3 100644 --- a/drivers/net/ethernet/amd/pds_core/adminq.c +++ b/drivers/net/ethernet/amd/pds_core/adminq.c @@ -63,6 +63,15 @@ static int pdsc_process_notifyq(struct pdsc_qcq *qcq) return nq_work; } +static bool pdsc_adminq_inc_if_up(struct pdsc *pdsc) +{ + if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER) || + pdsc->state & BIT_ULL(PDSC_S_FW_DEAD)) + return false; + + return refcount_inc_not_zero(&pdsc->adminq_refcnt); +} + void pdsc_process_adminq(struct pdsc_qcq *qcq) { union pds_core_adminq_comp *comp; @@ -75,9 +84,9 @@ void pdsc_process_adminq(struct pdsc_qcq *qcq) int aq_work = 0; int credits; - /* Don't process AdminQ when shutting down */ - if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER)) { - dev_err(pdsc->dev, "%s: called while PDSC_S_STOPPING_DRIVER\n", + /* Don't process AdminQ when it's not up */ + if (!pdsc_adminq_inc_if_up(pdsc)) { + dev_err(pdsc->dev, "%s: called while adminq is unavailable\n", __func__); return; } @@ -124,6 +133,7 @@ credits: pds_core_intr_credits(&pdsc->intr_ctrl[qcq->intx], credits, PDS_CORE_INTR_CRED_REARM); + refcount_dec(&pdsc->adminq_refcnt); } void pdsc_work_thread(struct work_struct *work) @@ -138,9 +148,9 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data) struct pdsc *pdsc = data; struct pdsc_qcq *qcq; - /* Don't process AdminQ when shutting down */ - if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER)) { - dev_err(pdsc->dev, "%s: called while PDSC_S_STOPPING_DRIVER\n", + /* Don't process AdminQ when it's not up */ + if (!pdsc_adminq_inc_if_up(pdsc)) { + dev_err(pdsc->dev, "%s: called while adminq is unavailable\n", __func__); return IRQ_HANDLED; } @@ -148,6 +158,7 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data) qcq = &pdsc->adminqcq; queue_work(pdsc->wq, &qcq->work); pds_core_intr_mask(&pdsc->intr_ctrl[qcq->intx], PDS_CORE_INTR_MASK_CLEAR); + refcount_dec(&pdsc->adminq_refcnt); return IRQ_HANDLED; } @@ -231,6 +242,12 @@ int pdsc_adminq_post(struct pdsc *pdsc, int err = 0; int index; + if (!pdsc_adminq_inc_if_up(pdsc)) { + dev_dbg(pdsc->dev, "%s: preventing adminq cmd %u\n", + __func__, cmd->opcode); + return -ENXIO; + } + wc.qcq = &pdsc->adminqcq; index = __pdsc_adminq_post(pdsc, &pdsc->adminqcq, cmd, comp, &wc); if (index < 0) { @@ -286,6 +303,8 @@ err_out: queue_work(pdsc->wq, &pdsc->health_work); } + refcount_dec(&pdsc->adminq_refcnt); + return err; } EXPORT_SYMBOL_GPL(pdsc_adminq_post); diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 0356e56a6e99..f44333bd1256 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -450,6 +450,7 @@ int pdsc_setup(struct pdsc *pdsc, bool init) pdsc_debugfs_add_viftype(pdsc); } + refcount_set(&pdsc->adminq_refcnt, 1); clear_bit(PDSC_S_FW_DEAD, &pdsc->state); return 0; @@ -514,6 +515,24 @@ void pdsc_stop(struct pdsc *pdsc) PDS_CORE_INTR_MASK_SET); } +static void pdsc_adminq_wait_and_dec_once_unused(struct pdsc *pdsc) +{ + /* The driver initializes the adminq_refcnt to 1 when the adminq is + * allocated and ready for use. Other users/requesters will increment + * the refcnt while in use. If the refcnt is down to 1 then the adminq + * is not in use and the refcnt can be cleared and adminq freed. Before + * calling this function the driver will set PDSC_S_FW_DEAD, which + * prevent subsequent attempts to use the adminq and increment the + * refcnt to fail. This guarantees that this function will eventually + * exit. + */ + while (!refcount_dec_if_one(&pdsc->adminq_refcnt)) { + dev_dbg_ratelimited(pdsc->dev, "%s: adminq in use\n", + __func__); + cpu_relax(); + } +} + void pdsc_fw_down(struct pdsc *pdsc) { union pds_core_notifyq_comp reset_event = { @@ -529,6 +548,8 @@ void pdsc_fw_down(struct pdsc *pdsc) if (pdsc->pdev->is_virtfn) return; + pdsc_adminq_wait_and_dec_once_unused(pdsc); + /* Notify clients of fw_down */ if (pdsc->fw_reporter) devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc); diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index e35d3e7006bf..cbd5716f46e6 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -184,6 +184,7 @@ struct pdsc { struct mutex devcmd_lock; /* lock for dev_cmd operations */ struct mutex config_lock; /* lock for configuration operations */ spinlock_t adminq_lock; /* lock for adminq operations */ + refcount_t adminq_refcnt; struct pds_core_dev_info_regs __iomem *info_regs; struct pds_core_dev_cmd_regs __iomem *cmd_regs; struct pds_core_intr __iomem *intr_ctrl; From e96094c1d11cce4deb5da3c0500d49041ab845b8 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 29 Jan 2024 15:40:34 -0800 Subject: [PATCH 659/768] pds_core: Clear BARs on reset During reset the BARs might be accessed when they are unmapped. This can cause unexpected issues, so fix it by clearing the cached BAR values so they are not accessed until they are re-mapped. Also, make sure any places that can access the BARs when they are NULL are prevented. Fixes: 49ce92fbee0b ("pds_core: add FW update feature to devlink") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20240129234035.69802-6-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/adminq.c | 24 +++++++++++++++------ drivers/net/ethernet/amd/pds_core/core.c | 8 ++++++- drivers/net/ethernet/amd/pds_core/dev.c | 9 +++++++- drivers/net/ethernet/amd/pds_core/devlink.c | 3 ++- drivers/net/ethernet/amd/pds_core/fw.c | 3 +++ drivers/net/ethernet/amd/pds_core/main.c | 5 +++++ 6 files changed, 43 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c index 5edff33d56f3..ea773cfa0af6 100644 --- a/drivers/net/ethernet/amd/pds_core/adminq.c +++ b/drivers/net/ethernet/amd/pds_core/adminq.c @@ -191,10 +191,16 @@ static int __pdsc_adminq_post(struct pdsc *pdsc, /* Check that the FW is running */ if (!pdsc_is_fw_running(pdsc)) { - u8 fw_status = ioread8(&pdsc->info_regs->fw_status); + if (pdsc->info_regs) { + u8 fw_status = + ioread8(&pdsc->info_regs->fw_status); - dev_info(pdsc->dev, "%s: post failed - fw not running %#02x:\n", - __func__, fw_status); + dev_info(pdsc->dev, "%s: post failed - fw not running %#02x:\n", + __func__, fw_status); + } else { + dev_info(pdsc->dev, "%s: post failed - BARs not setup\n", + __func__); + } ret = -ENXIO; goto err_out_unlock; @@ -266,10 +272,16 @@ int pdsc_adminq_post(struct pdsc *pdsc, break; if (!pdsc_is_fw_running(pdsc)) { - u8 fw_status = ioread8(&pdsc->info_regs->fw_status); + if (pdsc->info_regs) { + u8 fw_status = + ioread8(&pdsc->info_regs->fw_status); - dev_dbg(pdsc->dev, "%s: post wait failed - fw not running %#02x:\n", - __func__, fw_status); + dev_dbg(pdsc->dev, "%s: post wait failed - fw not running %#02x:\n", + __func__, fw_status); + } else { + dev_dbg(pdsc->dev, "%s: post wait failed - BARs not setup\n", + __func__); + } err = -ENXIO; break; } diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index f44333bd1256..65c8a7072e35 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -600,7 +600,13 @@ err_out: static void pdsc_check_pci_health(struct pdsc *pdsc) { - u8 fw_status = ioread8(&pdsc->info_regs->fw_status); + u8 fw_status; + + /* some sort of teardown already in progress */ + if (!pdsc->info_regs) + return; + + fw_status = ioread8(&pdsc->info_regs->fw_status); /* is PCI broken? */ if (fw_status != PDS_RC_BAD_PCI) diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index 31940b857e0e..62a38e0a8454 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -57,6 +57,9 @@ int pdsc_err_to_errno(enum pds_core_status_code code) bool pdsc_is_fw_running(struct pdsc *pdsc) { + if (!pdsc->info_regs) + return false; + pdsc->fw_status = ioread8(&pdsc->info_regs->fw_status); pdsc->last_fw_time = jiffies; pdsc->last_hb = ioread32(&pdsc->info_regs->fw_heartbeat); @@ -182,13 +185,17 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd, { int err; + if (!pdsc->cmd_regs) + return -ENXIO; + memcpy_toio(&pdsc->cmd_regs->cmd, cmd, sizeof(*cmd)); pdsc_devcmd_dbell(pdsc); err = pdsc_devcmd_wait(pdsc, cmd->opcode, max_seconds); - memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp)); if ((err == -ENXIO || err == -ETIMEDOUT) && pdsc->wq) queue_work(pdsc->wq, &pdsc->health_work); + else + memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp)); return err; } diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c index e9948ea5bbcd..54864f27c87a 100644 --- a/drivers/net/ethernet/amd/pds_core/devlink.c +++ b/drivers/net/ethernet/amd/pds_core/devlink.c @@ -111,7 +111,8 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req, mutex_lock(&pdsc->devcmd_lock); err = pdsc_devcmd_locked(pdsc, &cmd, &comp, pdsc->devcmd_timeout * 2); - memcpy_fromio(&fw_list, pdsc->cmd_regs->data, sizeof(fw_list)); + if (!err) + memcpy_fromio(&fw_list, pdsc->cmd_regs->data, sizeof(fw_list)); mutex_unlock(&pdsc->devcmd_lock); if (err && err != -EIO) return err; diff --git a/drivers/net/ethernet/amd/pds_core/fw.c b/drivers/net/ethernet/amd/pds_core/fw.c index 90a811f3878a..fa626719e68d 100644 --- a/drivers/net/ethernet/amd/pds_core/fw.c +++ b/drivers/net/ethernet/amd/pds_core/fw.c @@ -107,6 +107,9 @@ int pdsc_firmware_update(struct pdsc *pdsc, const struct firmware *fw, dev_info(pdsc->dev, "Installing firmware\n"); + if (!pdsc->cmd_regs) + return -ENXIO; + dl = priv_to_devlink(pdsc); devlink_flash_update_status_notify(dl, "Preparing to flash", NULL, 0, 0); diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index 5172a5ad8ec6..05fdeb235e5f 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -37,6 +37,11 @@ static void pdsc_unmap_bars(struct pdsc *pdsc) struct pdsc_dev_bar *bars = pdsc->bars; unsigned int i; + pdsc->info_regs = NULL; + pdsc->cmd_regs = NULL; + pdsc->intr_status = NULL; + pdsc->intr_ctrl = NULL; + for (i = 0; i < PDS_CORE_BARS_MAX; i++) { if (bars[i].vaddr) pci_iounmap(pdsc->pdev, bars[i].vaddr); From bc90fbe0c3182157d2be100a2f6c2edbb1820677 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 29 Jan 2024 15:40:35 -0800 Subject: [PATCH 660/768] pds_core: Rework teardown/setup flow to be more common Currently the teardown/setup flow for driver probe/remove is quite a bit different from the reset flows in pdsc_fw_down()/pdsc_fw_up(). One key piece that's missing are the calls to pci_alloc_irq_vectors() and pci_free_irq_vectors(). The pcie reset case is calling pci_free_irq_vectors() on reset_prepare, but not calling the corresponding pci_alloc_irq_vectors() on reset_done. This is causing unexpected/unwanted interrupt behavior due to the adminq interrupt being accidentally put into legacy interrupt mode. Also, the pci_alloc_irq_vectors()/pci_free_irq_vectors() functions are being called directly in probe/remove respectively. Fix this inconsistency by making the following changes: 1. Always call pdsc_dev_init() in pdsc_setup(), which calls pci_alloc_irq_vectors() and get rid of the now unused pds_dev_reinit(). 2. Always free/clear the pdsc->intr_info in pdsc_teardown() since this structure will get re-alloced in pdsc_setup(). 3. Move the calls of pci_free_irq_vectors() to pdsc_teardown() since pci_alloc_irq_vectors() will always be called in pdsc_setup()->pdsc_dev_init() for both the probe/remove and reset flows. 4. Make sure to only create the debugfs "identity" entry when it doesn't already exist, which it will in the reset case because it's already been created in the initial call to pdsc_dev_init(). Fixes: ffa55858330f ("pds_core: implement pci reset handlers") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Link: https://lore.kernel.org/r/20240129234035.69802-7-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/core.c | 13 +++++-------- drivers/net/ethernet/amd/pds_core/core.h | 1 - drivers/net/ethernet/amd/pds_core/debugfs.c | 4 ++++ drivers/net/ethernet/amd/pds_core/dev.c | 7 ------- drivers/net/ethernet/amd/pds_core/main.c | 2 -- 5 files changed, 9 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 65c8a7072e35..7658a7286767 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -404,10 +404,7 @@ int pdsc_setup(struct pdsc *pdsc, bool init) int numdescs; int err; - if (init) - err = pdsc_dev_init(pdsc); - else - err = pdsc_dev_reinit(pdsc); + err = pdsc_dev_init(pdsc); if (err) return err; @@ -479,10 +476,9 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing) for (i = 0; i < pdsc->nintrs; i++) pdsc_intr_free(pdsc, i); - if (removing) { - kfree(pdsc->intr_info); - pdsc->intr_info = NULL; - } + kfree(pdsc->intr_info); + pdsc->intr_info = NULL; + pdsc->nintrs = 0; } if (pdsc->kern_dbpage) { @@ -490,6 +486,7 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing) pdsc->kern_dbpage = NULL; } + pci_free_irq_vectors(pdsc->pdev); set_bit(PDSC_S_FW_DEAD, &pdsc->state); } diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index cbd5716f46e6..110c4b826b22 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -281,7 +281,6 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd, union pds_core_dev_comp *comp, int max_seconds); int pdsc_devcmd_init(struct pdsc *pdsc); int pdsc_devcmd_reset(struct pdsc *pdsc); -int pdsc_dev_reinit(struct pdsc *pdsc); int pdsc_dev_init(struct pdsc *pdsc); void pdsc_reset_prepare(struct pci_dev *pdev); diff --git a/drivers/net/ethernet/amd/pds_core/debugfs.c b/drivers/net/ethernet/amd/pds_core/debugfs.c index 8ec392299b7d..4e8579ca1c8c 100644 --- a/drivers/net/ethernet/amd/pds_core/debugfs.c +++ b/drivers/net/ethernet/amd/pds_core/debugfs.c @@ -64,6 +64,10 @@ DEFINE_SHOW_ATTRIBUTE(identity); void pdsc_debugfs_add_ident(struct pdsc *pdsc) { + /* This file will already exist in the reset flow */ + if (debugfs_lookup("identity", pdsc->dentry)) + return; + debugfs_create_file("identity", 0400, pdsc->dentry, pdsc, &identity_fops); } diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index 62a38e0a8454..e65a1632df50 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -316,13 +316,6 @@ static int pdsc_identify(struct pdsc *pdsc) return 0; } -int pdsc_dev_reinit(struct pdsc *pdsc) -{ - pdsc_init_devinfo(pdsc); - - return pdsc_identify(pdsc); -} - int pdsc_dev_init(struct pdsc *pdsc) { unsigned int nintrs; diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index 05fdeb235e5f..cdbf053b5376 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -438,7 +438,6 @@ static void pdsc_remove(struct pci_dev *pdev) mutex_destroy(&pdsc->config_lock); mutex_destroy(&pdsc->devcmd_lock); - pci_free_irq_vectors(pdev); pdsc_unmap_bars(pdsc); pci_release_regions(pdev); } @@ -470,7 +469,6 @@ void pdsc_reset_prepare(struct pci_dev *pdev) pdsc_stop_health_thread(pdsc); pdsc_fw_down(pdsc); - pci_free_irq_vectors(pdev); pdsc_unmap_bars(pdsc); pci_release_regions(pdev); pci_disable_device(pdev); From f7c25d8e17dd759d97ca093faf92eeb7da7b3890 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 30 Jan 2024 18:47:16 +0100 Subject: [PATCH 661/768] selftests: net: add missing config for pmtu.sh tests The mentioned test uses a few Kconfig still missing the net config, add them. Before: # Error: Specified qdisc kind is unknown. # Error: Specified qdisc kind is unknown. # Error: Qdisc not classful. # We have an error talking to the kernel # Error: Qdisc not classful. # We have an error talking to the kernel # policy_routing not supported # TEST: ICMPv4 with DSCP and ECN: PMTU exceptions [SKIP] After: # TEST: ICMPv4 with DSCP and ECN: PMTU exceptions [ OK ] Fixes: ec730c3e1f0e ("selftest: net: Test IPv4 PMTU exceptions with DSCP and ECN") Signed-off-by: Paolo Abeni Reviewed-by: Guillaume Nault Reviewed-by: David Ahern Link: https://lore.kernel.org/r/8d27bf6762a5c7b3acc457d6e6872c533040f9c1.1706635101.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 77a173635a29..98c6bd2228c6 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -49,8 +49,10 @@ CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_NAT=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GACT=m +CONFIG_NET_ACT_PEDIT=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_BPF=m CONFIG_NET_CLS_MATCHALL=m @@ -62,6 +64,7 @@ CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y +CONFIG_NET_SCH_PRIO=m CONFIG_NFT_COMPAT=m CONFIG_NF_FLOW_TABLE=m CONFIG_PSAMPLE=m From e4e4b6d568d2549583cbda3f8ce567e586cb05da Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 30 Jan 2024 18:47:17 +0100 Subject: [PATCH 662/768] selftests: net: fix available tunnels detection The pmtu.sh test tries to detect the tunnel protocols available in the running kernel and properly skip the unsupported cases. In a few more complex setup, such detection is unsuccessful, as the script currently ignores some intermediate error code at setup time. Before: # which: no nettest in (/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin) # TEST: vti6: PMTU exceptions (ESP-in-UDP) [FAIL] # PMTU exception wasn't created after creating tunnel exceeding link layer MTU # ./pmtu.sh: line 931: kill: (7543) - No such process # ./pmtu.sh: line 931: kill: (7544) - No such process After: # xfrm4 not supported # TEST: vti4: PMTU exceptions [SKIP] Fixes: ece1278a9b81 ("selftests: net: add ESP-in-UDP PMTU test") Signed-off-by: Paolo Abeni Reviewed-by: Guillaume Nault Reviewed-by: David Ahern Link: https://lore.kernel.org/r/cab10e75fda618e6fff8c595b632f47db58b9309.1706635101.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/pmtu.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index f10879788f61..31892b366913 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -707,23 +707,23 @@ setup_xfrm6() { } setup_xfrm4udp() { - setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 4 4500 + setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 4 4500 } setup_xfrm6udp() { - setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 6 4500 + setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 6 4500 } setup_xfrm4udprouted() { - setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 4 4500 + setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 4 4500 } setup_xfrm6udprouted() { - setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 6 4500 + setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 6 4500 } setup_routing_old() { From bc0970d5ac1d1317e212bdf55533935ecb6ae95c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 30 Jan 2024 18:47:18 +0100 Subject: [PATCH 663/768] selftests: net: don't access /dev/stdout in pmtu.sh When running the pmtu.sh via the kselftest infra, accessing /dev/stdout gives unexpected results: # dd: failed to open '/dev/stdout': Device or resource busy # TEST: IPv4, bridged vxlan4: PMTU exceptions [FAIL] Let dd use directly the standard output to fix the above: # TEST: IPv4, bridged vxlan4: PMTU exceptions - nexthop objects [ OK ] Fixes: 136a1b434bbb ("selftests: net: test vxlan pmtu exceptions with tcp") Signed-off-by: Paolo Abeni Reviewed-by: Guillaume Nault Reviewed-by: David Ahern Link: https://lore.kernel.org/r/23d7592c5d77d75cff9b34f15c227f92e911c2ae.1706635101.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/pmtu.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 31892b366913..3f118e3f1c66 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -1339,7 +1339,7 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { sleep 1 - dd if=/dev/zero of=/dev/stdout status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3 + dd if=/dev/zero status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3 size=$(du -sb $tmpoutfile) size=${size%%/tmp/*} From f9c15a678db3acbe769635e3c49f979e2f88a514 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Wed, 24 Jan 2024 09:18:30 -0800 Subject: [PATCH 664/768] drm/xe: Fix crash in trace_dma_fence_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit trace_dma_fence_init() uses dma_fence_ops functions like get_driver_name() and get_timeline_name() to generate trace information but the Xe KMD implementation of those functions makes use of xe_hw_fence_ctx that was being set after dma_fence_init(). So here just inverting the order to fix the crash. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: José Roberto de Souza Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240124171830.95774-1-jose.souza@intel.com (cherry picked from commit c6878e47431c72168da08dfbc1496c09b2d3c246) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_hw_fence.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index a6094c81f2ad..a5de3e7b0bd6 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -217,13 +217,13 @@ struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx, if (!fence) return ERR_PTR(-ENOMEM); - dma_fence_init(&fence->dma, &xe_hw_fence_ops, &ctx->irq->lock, - ctx->dma_fence_ctx, ctx->next_seqno++); - fence->ctx = ctx; fence->seqno_map = seqno_map; INIT_LIST_HEAD(&fence->irq_link); + dma_fence_init(&fence->dma, &xe_hw_fence_ops, &ctx->irq->lock, + ctx->dma_fence_ctx, ctx->next_seqno++); + trace_xe_hw_fence_create(fence); return fence; From 6d2096239af11f1c9fa03e8fc74400ce048078b0 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 26 Jan 2024 14:06:14 -0800 Subject: [PATCH 665/768] drm/xe: Grab mem_access when disabling C6 on skip_guc_pc platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If skip_guc_pc is set for a platform, C6 is disabled directly without acquiring a mem_access reference, triggering an assertion inside xe_gt_idle_disable_c6. Fixes: 975e4a3795d4 ("drm/xe: Manually setup C6 when skip_guc_pc is set") Cc: Rodrigo Vivi Cc: Vinay Belgaumkar Signed-off-by: Matt Roper Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240126220613.865939-2-matthew.d.roper@intel.com (cherry picked from commit 9f5971bdf78e0937206556534247243ad56cd735) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_guc_pc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index f71085228cb3..d91702592520 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -963,7 +963,9 @@ void xe_guc_pc_fini(struct xe_guc_pc *pc) struct xe_device *xe = pc_to_xe(pc); if (xe->info.skip_guc_pc) { + xe_device_mem_access_get(xe); xe_gt_idle_disable_c6(pc_to_gt(pc)); + xe_device_mem_access_put(xe); return; } From efeff7b38ef62fc65069bd2200d151a9d5d38907 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Jan 2024 15:44:13 -0800 Subject: [PATCH 666/768] drm/xe: Only allow 1 ufence per exec / bind IOCTL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The way exec ufences are coded only 1 ufence per IOCTL will be signaled. It is possible to fix this but for current use cases 1 ufence per IOCTL is sufficient. Enforce a limit of 1 ufence per IOCTL (both exec and bind to be uniform). v2: - Add fixes tag (Thomas) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Mika Kahola Cc: Thomas Hellström Signed-off-by: Matthew Brost Reviewed-by: Brian Welty Link: https://patchwork.freedesktop.org/patch/msgid/20240124234413.1640825-1-matthew.brost@intel.com (cherry picked from commit d1df9bfbf68c65418f30917f406b6d5bd597714e) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_exec.c | 10 +++++++++- drivers/gpu/drm/xe/xe_sync.h | 5 +++++ drivers/gpu/drm/xe/xe_vm.c | 10 +++++++++- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index b853feed9ccc..17f26952e665 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -111,7 +111,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u64 addresses[XE_HW_ENGINE_MAX_INSTANCE]; struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn}; struct drm_exec *exec = &vm_exec.exec; - u32 i, num_syncs = 0; + u32 i, num_syncs = 0, num_ufence = 0; struct xe_sched_job *job; struct dma_fence *rebind_fence; struct xe_vm *vm; @@ -157,6 +157,14 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) SYNC_PARSE_FLAG_LR_MODE : 0)); if (err) goto err_syncs; + + if (xe_sync_is_ufence(&syncs[i])) + num_ufence++; + } + + if (XE_IOCTL_DBG(xe, num_ufence > 1)) { + err = -EINVAL; + goto err_syncs; } if (xe_exec_queue_is_parallel(q)) { diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index d284afbe917c..f43cdcaca6c5 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -33,4 +33,9 @@ struct dma_fence * xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, struct xe_exec_queue *q, struct xe_vm *vm); +static inline bool xe_sync_is_ufence(struct xe_sync_entry *sync) +{ + return !!sync->ufence; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 53833ab81424..32ae51945439 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2851,7 +2851,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct drm_gpuva_ops **ops = NULL; struct xe_vm *vm; struct xe_exec_queue *q = NULL; - u32 num_syncs; + u32 num_syncs, num_ufence = 0; struct xe_sync_entry *syncs = NULL; struct drm_xe_vm_bind_op *bind_ops; LIST_HEAD(ops_list); @@ -2988,6 +2988,14 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); if (err) goto free_syncs; + + if (xe_sync_is_ufence(&syncs[num_syncs])) + num_ufence++; + } + + if (XE_IOCTL_DBG(xe, num_ufence > 1)) { + err = -EINVAL; + goto free_syncs; } if (!args->num_binds) { From 3ecf036b04b9dc72ca5bd62359748e14568fcf3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 17 Jan 2024 14:40:46 +0100 Subject: [PATCH 667/768] drm/xe: Annotate mcr_[un]lock() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These functions acquire and release the gt::mcr_lock. Annotate accordingly. Fix the corresponding sparse warning. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Fixes: fb1d55efdfcb ("drm/xe: Cleanup OPEN_BRACE style issues") Cc: Francois Dugast Cc: Rodrigo Vivi Cc: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240117134048.165425-4-thomas.hellstrom@linux.intel.com (cherry picked from commit 97fd7a7e4e877676a2ab1a687ba958b70931abcc) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_gt_mcr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 77925b35cf8d..8546cd3cc50d 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -480,7 +480,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, * to synchronize with external clients (e.g., firmware), so a semaphore * register will also need to be taken. */ -static void mcr_lock(struct xe_gt *gt) +static void mcr_lock(struct xe_gt *gt) __acquires(>->mcr_lock) { struct xe_device *xe = gt_to_xe(gt); int ret = 0; @@ -500,7 +500,7 @@ static void mcr_lock(struct xe_gt *gt) drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT); } -static void mcr_unlock(struct xe_gt *gt) +static void mcr_unlock(struct xe_gt *gt) __releases(>->mcr_lock) { /* Release hardware semaphore - this is done by writing 1 to the register */ if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) From ef87557928d1ab3a1487520962f55cd7163e621b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 17 Jan 2024 14:40:47 +0100 Subject: [PATCH 668/768] drm/xe: Don't use __user error pointers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The error pointer macros are not aware of __user pointers and as a consequence sparse warns. Have the copy_mask() function return an integer instead of a __user pointer. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Rodrigo Vivi Cc: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240117134048.165425-5-thomas.hellstrom@linux.intel.com (cherry picked from commit 78366eed6853aa6a5deccb2eb182f9334d2bd208) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_query.c | 50 +++++++++++++++++------------------ 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 9b35673b286c..7e924faeeea0 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -459,21 +459,21 @@ static size_t calc_topo_query_size(struct xe_device *xe) sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss)); } -static void __user *copy_mask(void __user *ptr, - struct drm_xe_query_topology_mask *topo, - void *mask, size_t mask_size) +static int copy_mask(void __user **ptr, + struct drm_xe_query_topology_mask *topo, + void *mask, size_t mask_size) { topo->num_bytes = mask_size; - if (copy_to_user(ptr, topo, sizeof(*topo))) - return ERR_PTR(-EFAULT); - ptr += sizeof(topo); + if (copy_to_user(*ptr, topo, sizeof(*topo))) + return -EFAULT; + *ptr += sizeof(topo); - if (copy_to_user(ptr, mask, mask_size)) - return ERR_PTR(-EFAULT); - ptr += mask_size; + if (copy_to_user(*ptr, mask, mask_size)) + return -EFAULT; + *ptr += mask_size; - return ptr; + return 0; } static int query_gt_topology(struct xe_device *xe, @@ -493,28 +493,28 @@ static int query_gt_topology(struct xe_device *xe, } for_each_gt(gt, xe, id) { + int err; + topo.gt_id = id; topo.type = DRM_XE_TOPO_DSS_GEOMETRY; - query_ptr = copy_mask(query_ptr, &topo, - gt->fuse_topo.g_dss_mask, - sizeof(gt->fuse_topo.g_dss_mask)); - if (IS_ERR(query_ptr)) - return PTR_ERR(query_ptr); + err = copy_mask(&query_ptr, &topo, gt->fuse_topo.g_dss_mask, + sizeof(gt->fuse_topo.g_dss_mask)); + if (err) + return err; topo.type = DRM_XE_TOPO_DSS_COMPUTE; - query_ptr = copy_mask(query_ptr, &topo, - gt->fuse_topo.c_dss_mask, - sizeof(gt->fuse_topo.c_dss_mask)); - if (IS_ERR(query_ptr)) - return PTR_ERR(query_ptr); + err = copy_mask(&query_ptr, &topo, gt->fuse_topo.c_dss_mask, + sizeof(gt->fuse_topo.c_dss_mask)); + if (err) + return err; topo.type = DRM_XE_TOPO_EU_PER_DSS; - query_ptr = copy_mask(query_ptr, &topo, - gt->fuse_topo.eu_mask_per_dss, - sizeof(gt->fuse_topo.eu_mask_per_dss)); - if (IS_ERR(query_ptr)) - return PTR_ERR(query_ptr); + err = copy_mask(&query_ptr, &topo, + gt->fuse_topo.eu_mask_per_dss, + sizeof(gt->fuse_topo.eu_mask_per_dss)); + if (err) + return err; } return 0; From 89642db3b28849c23f42baadc88b40435ba6c5c6 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 23 Jan 2024 13:26:38 -0800 Subject: [PATCH 669/768] drm/xe: Use LRC prefix rather than CTX prefix in lrc desc defines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sparc build fails [1] due to CTX_VALID being redefined. Fix this by using a better naming convention of LRC_VALID as this define is used in setting bits in the lrc descriptor. To be uniform, change other define with LRC prefix too. [1] https://lore.kernel.org/all/20240123111235.3097079-1-geert@linux-m68k.org/ v2: - s/LEGACY_64B_CONTEXT/LRC_LEGACY_64B_CONTEXT (Lucas) Fixes: 0bc519d20ffa ("drm/xe: Remove GEN[0-9]*_ prefixes") Cc: Thomas Hellström Cc: Lucas De Marchi Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240123212638.1605626-1-matthew.brost@intel.com (cherry picked from commit 152ca51d8db03f08a71c25e999812e263839fdce) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_lrc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index b7fa3831b684..0ec5ad2539f1 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -21,10 +21,10 @@ #include "xe_map.h" #include "xe_vm.h" -#define CTX_VALID (1 << 0) -#define CTX_PRIVILEGE (1 << 8) -#define CTX_ADDRESSING_MODE_SHIFT 3 -#define LEGACY_64B_CONTEXT 3 +#define LRC_VALID (1 << 0) +#define LRC_PRIVILEGE (1 << 8) +#define LRC_ADDRESSING_MODE_SHIFT 3 +#define LRC_LEGACY_64B_CONTEXT 3 #define ENGINE_CLASS_SHIFT 61 #define ENGINE_INSTANCE_SHIFT 48 @@ -762,15 +762,15 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, (q->usm.acc_notify << ACC_NOTIFY_S) | q->usm.acc_trigger); - lrc->desc = CTX_VALID; - lrc->desc |= LEGACY_64B_CONTEXT << CTX_ADDRESSING_MODE_SHIFT; + lrc->desc = LRC_VALID; + lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT; /* TODO: Priority */ /* While this appears to have something about privileged batches or * some such, it really just means PPGTT mode. */ if (vm) - lrc->desc |= CTX_PRIVILEGE; + lrc->desc |= LRC_PRIVILEGE; if (GRAPHICS_VERx100(xe) < 1250) { lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT; From ed2bdf3b264d627e1c2f26272660e1d7c2115000 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 31 Jan 2024 10:16:28 +0100 Subject: [PATCH 670/768] drm/xe/vm: Subclass userptr vmas MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The construct allocating only parts of the vma structure when the userptr part is not needed is very fragile. A developer could add additional fields below the userptr part, and the code could easily attempt to access the userptr part even if its not persent. So introduce xe_userptr_vma which subclasses struct xe_vma the proper way, and accordingly modify a couple of interfaces. This should also help if adding userptr helpers to drm_gpuvm. v2: - Fix documentation of to_userptr_vma() (Matthew Brost) - Fix allocation and freeing of vmas to clearer distinguish between the types. Closes: https://lore.kernel.org/intel-xe/0c4cc1a7-f409-4597-b110-81f9e45d1ffe@embeddedor.com/T/#u Fixes: a4cc60a55fd9 ("drm/xe: Only alloc userptr part of xe_vma for userptrs") Cc: Rodrigo Vivi Cc: Matthew Brost Cc: Lucas De Marchi Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240131091628.12318-1-thomas.hellstrom@linux.intel.com (cherry picked from commit 5bd24e78829ad569fa1c3ce9a05b59bb97b91f3d) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 11 +- drivers/gpu/drm/xe/xe_pt.c | 32 +++--- drivers/gpu/drm/xe/xe_vm.c | 155 ++++++++++++++++----------- drivers/gpu/drm/xe/xe_vm.h | 16 ++- drivers/gpu/drm/xe/xe_vm_types.h | 16 +-- 5 files changed, 137 insertions(+), 93 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 59a70d2e0a7a..9c2fe1697d6e 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -165,7 +165,8 @@ retry_userptr: goto unlock_vm; } - if (!xe_vma_is_userptr(vma) || !xe_vma_userptr_check_repin(vma)) { + if (!xe_vma_is_userptr(vma) || + !xe_vma_userptr_check_repin(to_userptr_vma(vma))) { downgrade_write(&vm->lock); write_locked = false; } @@ -181,11 +182,13 @@ retry_userptr: /* TODO: Validate fault */ if (xe_vma_is_userptr(vma) && write_locked) { + struct xe_userptr_vma *uvma = to_userptr_vma(vma); + spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&vma->userptr.invalidate_link); + list_del_init(&uvma->userptr.invalidate_link); spin_unlock(&vm->userptr.invalidated_lock); - ret = xe_vma_userptr_pin_pages(vma); + ret = xe_vma_userptr_pin_pages(uvma); if (ret) goto unlock_vm; @@ -220,7 +223,7 @@ retry_userptr: dma_fence_put(fence); if (xe_vma_is_userptr(vma)) - ret = xe_vma_userptr_check_repin(vma); + ret = xe_vma_userptr_check_repin(to_userptr_vma(vma)); vma->usm.tile_invalidated &= ~BIT(tile->id); unlock_dma_resv: diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index de1030a47588..e45b37c3f0c2 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -618,8 +618,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, if (!xe_vma_is_null(vma)) { if (xe_vma_is_userptr(vma)) - xe_res_first_sg(vma->userptr.sg, 0, xe_vma_size(vma), - &curs); + xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, + xe_vma_size(vma), &curs); else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), xe_vma_size(vma), &curs); @@ -906,17 +906,17 @@ static void xe_vm_dbg_print_entries(struct xe_device *xe, #ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT -static int xe_pt_userptr_inject_eagain(struct xe_vma *vma) +static int xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) { - u32 divisor = vma->userptr.divisor ? vma->userptr.divisor : 2; + u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2; static u32 count; if (count++ % divisor == divisor - 1) { - struct xe_vm *vm = xe_vma_vm(vma); + struct xe_vm *vm = xe_vma_vm(&uvma->vma); - vma->userptr.divisor = divisor << 1; + uvma->userptr.divisor = divisor << 1; spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&vma->userptr.invalidate_link, + list_move_tail(&uvma->userptr.invalidate_link, &vm->userptr.invalidated); spin_unlock(&vm->userptr.invalidated_lock); return true; @@ -927,7 +927,7 @@ static int xe_pt_userptr_inject_eagain(struct xe_vma *vma) #else -static bool xe_pt_userptr_inject_eagain(struct xe_vma *vma) +static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) { return false; } @@ -1000,9 +1000,9 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) { struct xe_pt_migrate_pt_update *userptr_update = container_of(pt_update, typeof(*userptr_update), base); - struct xe_vma *vma = pt_update->vma; - unsigned long notifier_seq = vma->userptr.notifier_seq; - struct xe_vm *vm = xe_vma_vm(vma); + struct xe_userptr_vma *uvma = to_userptr_vma(pt_update->vma); + unsigned long notifier_seq = uvma->userptr.notifier_seq; + struct xe_vm *vm = xe_vma_vm(&uvma->vma); int err = xe_pt_vm_dependencies(pt_update->job, &vm->rftree[pt_update->tile_id], pt_update->start, @@ -1023,7 +1023,7 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) */ do { down_read(&vm->userptr.notifier_lock); - if (!mmu_interval_read_retry(&vma->userptr.notifier, + if (!mmu_interval_read_retry(&uvma->userptr.notifier, notifier_seq)) break; @@ -1032,11 +1032,11 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) if (userptr_update->bind) return -EAGAIN; - notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier); + notifier_seq = mmu_interval_read_begin(&uvma->userptr.notifier); } while (true); /* Inject errors to test_whether they are handled correctly */ - if (userptr_update->bind && xe_pt_userptr_inject_eagain(vma)) { + if (userptr_update->bind && xe_pt_userptr_inject_eagain(uvma)) { up_read(&vm->userptr.notifier_lock); return -EAGAIN; } @@ -1297,7 +1297,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue vma->tile_present |= BIT(tile->id); if (bind_pt_update.locked) { - vma->userptr.initial_bind = true; + to_userptr_vma(vma)->userptr.initial_bind = true; up_read(&vm->userptr.notifier_lock); xe_bo_put_commit(&deferred); } @@ -1642,7 +1642,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu if (!vma->tile_present) { spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&vma->userptr.invalidate_link); + list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); spin_unlock(&vm->userptr.invalidated_lock); } up_read(&vm->userptr.notifier_lock); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 32ae51945439..30db264d34a3 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -46,7 +46,7 @@ static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) /** * xe_vma_userptr_check_repin() - Advisory check for repin needed - * @vma: The userptr vma + * @uvma: The userptr vma * * Check if the userptr vma has been invalidated since last successful * repin. The check is advisory only and can the function can be called @@ -56,15 +56,17 @@ static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) * * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. */ -int xe_vma_userptr_check_repin(struct xe_vma *vma) +int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) { - return mmu_interval_check_retry(&vma->userptr.notifier, - vma->userptr.notifier_seq) ? + return mmu_interval_check_retry(&uvma->userptr.notifier, + uvma->userptr.notifier_seq) ? -EAGAIN : 0; } -int xe_vma_userptr_pin_pages(struct xe_vma *vma) +int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) { + struct xe_userptr *userptr = &uvma->userptr; + struct xe_vma *vma = &uvma->vma; struct xe_vm *vm = xe_vma_vm(vma); struct xe_device *xe = vm->xe; const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT; @@ -80,30 +82,30 @@ retry: if (vma->gpuva.flags & XE_VMA_DESTROYED) return 0; - notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier); - if (notifier_seq == vma->userptr.notifier_seq) + notifier_seq = mmu_interval_read_begin(&userptr->notifier); + if (notifier_seq == userptr->notifier_seq) return 0; pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL); if (!pages) return -ENOMEM; - if (vma->userptr.sg) { + if (userptr->sg) { dma_unmap_sgtable(xe->drm.dev, - vma->userptr.sg, + userptr->sg, read_only ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL, 0); - sg_free_table(vma->userptr.sg); - vma->userptr.sg = NULL; + sg_free_table(userptr->sg); + userptr->sg = NULL; } pinned = ret = 0; if (in_kthread) { - if (!mmget_not_zero(vma->userptr.notifier.mm)) { + if (!mmget_not_zero(userptr->notifier.mm)) { ret = -EFAULT; goto mm_closed; } - kthread_use_mm(vma->userptr.notifier.mm); + kthread_use_mm(userptr->notifier.mm); } while (pinned < num_pages) { @@ -123,32 +125,32 @@ retry: } if (in_kthread) { - kthread_unuse_mm(vma->userptr.notifier.mm); - mmput(vma->userptr.notifier.mm); + kthread_unuse_mm(userptr->notifier.mm); + mmput(userptr->notifier.mm); } mm_closed: if (ret) goto out; - ret = sg_alloc_table_from_pages_segment(&vma->userptr.sgt, pages, + ret = sg_alloc_table_from_pages_segment(&userptr->sgt, pages, pinned, 0, (u64)pinned << PAGE_SHIFT, xe_sg_segment_size(xe->drm.dev), GFP_KERNEL); if (ret) { - vma->userptr.sg = NULL; + userptr->sg = NULL; goto out; } - vma->userptr.sg = &vma->userptr.sgt; + userptr->sg = &userptr->sgt; - ret = dma_map_sgtable(xe->drm.dev, vma->userptr.sg, + ret = dma_map_sgtable(xe->drm.dev, userptr->sg, read_only ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); if (ret) { - sg_free_table(vma->userptr.sg); - vma->userptr.sg = NULL; + sg_free_table(userptr->sg); + userptr->sg = NULL; goto out; } @@ -167,8 +169,8 @@ out: kvfree(pages); if (!(ret < 0)) { - vma->userptr.notifier_seq = notifier_seq; - if (xe_vma_userptr_check_repin(vma) == -EAGAIN) + userptr->notifier_seq = notifier_seq; + if (xe_vma_userptr_check_repin(uvma) == -EAGAIN) goto retry; } @@ -635,7 +637,9 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, const struct mmu_notifier_range *range, unsigned long cur_seq) { - struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier); + struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier); + struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr); + struct xe_vma *vma = &uvma->vma; struct xe_vm *vm = xe_vma_vm(vma); struct dma_resv_iter cursor; struct dma_fence *fence; @@ -651,7 +655,7 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, mmu_interval_set_seq(mni, cur_seq); /* No need to stop gpu access if the userptr is not yet bound. */ - if (!vma->userptr.initial_bind) { + if (!userptr->initial_bind) { up_write(&vm->userptr.notifier_lock); return true; } @@ -663,7 +667,7 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, if (!xe_vm_in_fault_mode(vm) && !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) { spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&vma->userptr.invalidate_link, + list_move_tail(&userptr->invalidate_link, &vm->userptr.invalidated); spin_unlock(&vm->userptr.invalidated_lock); } @@ -703,7 +707,7 @@ static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { int xe_vm_userptr_pin(struct xe_vm *vm) { - struct xe_vma *vma, *next; + struct xe_userptr_vma *uvma, *next; int err = 0; LIST_HEAD(tmp_evict); @@ -711,22 +715,23 @@ int xe_vm_userptr_pin(struct xe_vm *vm) /* Collect invalidated userptrs */ spin_lock(&vm->userptr.invalidated_lock); - list_for_each_entry_safe(vma, next, &vm->userptr.invalidated, + list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, userptr.invalidate_link) { - list_del_init(&vma->userptr.invalidate_link); - list_move_tail(&vma->combined_links.userptr, + list_del_init(&uvma->userptr.invalidate_link); + list_move_tail(&uvma->userptr.repin_link, &vm->userptr.repin_list); } spin_unlock(&vm->userptr.invalidated_lock); /* Pin and move to temporary list */ - list_for_each_entry_safe(vma, next, &vm->userptr.repin_list, - combined_links.userptr) { - err = xe_vma_userptr_pin_pages(vma); + list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, + userptr.repin_link) { + err = xe_vma_userptr_pin_pages(uvma); if (err < 0) return err; - list_move_tail(&vma->combined_links.userptr, &vm->rebind_list); + list_del_init(&uvma->userptr.repin_link); + list_move_tail(&uvma->vma.combined_links.rebind, &vm->rebind_list); } return 0; @@ -782,6 +787,14 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) return fence; } +static void xe_vma_free(struct xe_vma *vma) +{ + if (xe_vma_is_userptr(vma)) + kfree(to_userptr_vma(vma)); + else + kfree(vma); +} + #define VMA_CREATE_FLAG_READ_ONLY BIT(0) #define VMA_CREATE_FLAG_IS_NULL BIT(1) @@ -800,14 +813,26 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, xe_assert(vm->xe, start < end); xe_assert(vm->xe, end < vm->size); - if (!bo && !is_null) /* userptr */ + /* + * Allocate and ensure that the xe_vma_is_userptr() return + * matches what was allocated. + */ + if (!bo && !is_null) { + struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); + + if (!uvma) + return ERR_PTR(-ENOMEM); + + vma = &uvma->vma; + } else { vma = kzalloc(sizeof(*vma), GFP_KERNEL); - else - vma = kzalloc(sizeof(*vma) - sizeof(struct xe_userptr), - GFP_KERNEL); - if (!vma) { - vma = ERR_PTR(-ENOMEM); - return vma; + if (!vma) + return ERR_PTR(-ENOMEM); + + if (is_null) + vma->gpuva.flags |= DRM_GPUVA_SPARSE; + if (bo) + vma->gpuva.gem.obj = &bo->ttm.base; } INIT_LIST_HEAD(&vma->combined_links.rebind); @@ -818,8 +843,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vma->gpuva.va.range = end - start + 1; if (read_only) vma->gpuva.flags |= XE_VMA_READ_ONLY; - if (is_null) - vma->gpuva.flags |= DRM_GPUVA_SPARSE; for_each_tile(tile, vm->xe, id) vma->tile_mask |= 0x1 << id; @@ -836,35 +859,35 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); if (IS_ERR(vm_bo)) { - kfree(vma); + xe_vma_free(vma); return ERR_CAST(vm_bo); } drm_gpuvm_bo_extobj_add(vm_bo); drm_gem_object_get(&bo->ttm.base); - vma->gpuva.gem.obj = &bo->ttm.base; vma->gpuva.gem.offset = bo_offset_or_userptr; drm_gpuva_link(&vma->gpuva, vm_bo); drm_gpuvm_bo_put(vm_bo); } else /* userptr or null */ { if (!is_null) { + struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; u64 size = end - start + 1; int err; - INIT_LIST_HEAD(&vma->userptr.invalidate_link); + INIT_LIST_HEAD(&userptr->invalidate_link); + INIT_LIST_HEAD(&userptr->repin_link); vma->gpuva.gem.offset = bo_offset_or_userptr; - err = mmu_interval_notifier_insert(&vma->userptr.notifier, + err = mmu_interval_notifier_insert(&userptr->notifier, current->mm, xe_vma_userptr(vma), size, &vma_userptr_notifier_ops); if (err) { - kfree(vma); - vma = ERR_PTR(err); - return vma; + xe_vma_free(vma); + return ERR_PTR(err); } - vma->userptr.notifier_seq = LONG_MAX; + userptr->notifier_seq = LONG_MAX; } xe_vm_get(vm); @@ -880,13 +903,15 @@ static void xe_vma_destroy_late(struct xe_vma *vma) bool read_only = xe_vma_read_only(vma); if (xe_vma_is_userptr(vma)) { - if (vma->userptr.sg) { + struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; + + if (userptr->sg) { dma_unmap_sgtable(xe->drm.dev, - vma->userptr.sg, + userptr->sg, read_only ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL, 0); - sg_free_table(vma->userptr.sg); - vma->userptr.sg = NULL; + sg_free_table(userptr->sg); + userptr->sg = NULL; } /* @@ -894,7 +919,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma) * the notifer until we're sure the GPU is not accessing * them anymore */ - mmu_interval_notifier_remove(&vma->userptr.notifier); + mmu_interval_notifier_remove(&userptr->notifier); xe_vm_put(vm); } else if (xe_vma_is_null(vma)) { xe_vm_put(vm); @@ -902,7 +927,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma) xe_bo_put(xe_vma_bo(vma)); } - kfree(vma); + xe_vma_free(vma); } static void vma_destroy_work_func(struct work_struct *w) @@ -933,7 +958,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); spin_lock(&vm->userptr.invalidated_lock); - list_del(&vma->userptr.invalidate_link); + list_del(&to_userptr_vma(vma)->userptr.invalidate_link); spin_unlock(&vm->userptr.invalidated_lock); } else if (!xe_vma_is_null(vma)) { xe_bo_assert_held(xe_vma_bo(vma)); @@ -2150,7 +2175,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, drm_exec_fini(&exec); if (xe_vma_is_userptr(vma)) { - err = xe_vma_userptr_pin_pages(vma); + err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); if (err) { prep_vma_destroy(vm, vma, false); xe_vma_destroy_unlocked(vma); @@ -2507,7 +2532,7 @@ retry_userptr: if (err == -EAGAIN && xe_vma_is_userptr(vma)) { lockdep_assert_held_write(&vm->lock); - err = xe_vma_userptr_pin_pages(vma); + err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); if (!err) goto retry_userptr; @@ -3138,8 +3163,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { if (xe_vma_is_userptr(vma)) { WARN_ON_ONCE(!mmu_interval_check_retry - (&vma->userptr.notifier, - vma->userptr.notifier_seq)); + (&to_userptr_vma(vma)->userptr.notifier, + to_userptr_vma(vma)->userptr.notifier_seq)); WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)), DMA_RESV_USAGE_BOOKKEEP)); @@ -3200,11 +3225,11 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) if (is_null) { addr = 0; } else if (is_userptr) { + struct sg_table *sg = to_userptr_vma(vma)->userptr.sg; struct xe_res_cursor cur; - if (vma->userptr.sg) { - xe_res_first_sg(vma->userptr.sg, 0, XE_PAGE_SIZE, - &cur); + if (sg) { + xe_res_first_sg(sg, 0, XE_PAGE_SIZE, &cur); addr = xe_res_dma(&cur); } else { addr = 0; diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index cf2f96e8c1ab..9654a0612fc2 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -160,6 +160,18 @@ static inline bool xe_vma_is_userptr(struct xe_vma *vma) return xe_vma_has_no_bo(vma) && !xe_vma_is_null(vma); } +/** + * to_userptr_vma() - Return a pointer to an embedding userptr vma + * @vma: Pointer to the embedded struct xe_vma + * + * Return: Pointer to the embedding userptr vma + */ +static inline struct xe_userptr_vma *to_userptr_vma(struct xe_vma *vma) +{ + xe_assert(xe_vma_vm(vma)->xe, xe_vma_is_userptr(vma)); + return container_of(vma, struct xe_userptr_vma, vma); +} + u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile); int xe_vm_create_ioctl(struct drm_device *dev, void *data, @@ -224,9 +236,9 @@ static inline void xe_vm_reactivate_rebind(struct xe_vm *vm) } } -int xe_vma_userptr_pin_pages(struct xe_vma *vma); +int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma); -int xe_vma_userptr_check_repin(struct xe_vma *vma); +int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma); bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 63e8a50b88e9..1fec66ae2eb2 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -37,6 +37,8 @@ struct xe_vm; struct xe_userptr { /** @invalidate_link: Link for the vm::userptr.invalidated list */ struct list_head invalidate_link; + /** @userptr: link into VM repin list if userptr. */ + struct list_head repin_link; /** * @notifier: MMU notifier for user pointer (invalidation call back) */ @@ -68,8 +70,6 @@ struct xe_vma { * resv. */ union { - /** @userptr: link into VM repin list if userptr. */ - struct list_head userptr; /** @rebind: link into VM if this VMA needs rebinding. */ struct list_head rebind; /** @destroy: link to contested list when VM is being closed. */ @@ -105,11 +105,15 @@ struct xe_vma { * @pat_index: The pat index to use when encoding the PTEs for this vma. */ u16 pat_index; +}; - /** - * @userptr: user pointer state, only allocated for VMAs that are - * user pointers - */ +/** + * struct xe_userptr_vma - A userptr vma subclass + * @vma: The vma. + * @userptr: Additional userptr information. + */ +struct xe_userptr_vma { + struct xe_vma vma; struct xe_userptr userptr; }; From c9cfed29f5fe13f97e46c3879517d8c41ae251d6 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 30 Jan 2024 18:54:24 -0800 Subject: [PATCH 671/768] drm/xe: Make all GuC ABI shift values unsigned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All GuC ABI definitions are unsigned and not defining as unsigned is causing build errors [1]. [1] https://lore.kernel.org/all/20240123111235.3097079-1-geert@linux-m68k.org/ Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Thomas Hellström Cc: Lucas De Marchi Cc: Michal Wajdeczko Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240131025424.2087936-1-matthew.brost@intel.com (cherry picked from commit d83d8ae275c6bf87506b71b8a1acd98452137dc5) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/abi/guc_actions_abi.h | 4 ++-- drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h | 4 ++-- .../drm/xe/abi/guc_communication_ctb_abi.h | 8 ++++---- drivers/gpu/drm/xe/abi/guc_klvs_abi.h | 6 +++--- drivers/gpu/drm/xe/abi/guc_messages_abi.h | 20 +++++++++---------- 5 files changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h index 3062e0e0d467..79ba98a169f9 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -50,8 +50,8 @@ #define HOST2GUC_SELF_CFG_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 3u) #define HOST2GUC_SELF_CFG_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 -#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY (0xffff << 16) -#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN (0xffff << 0) +#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY (0xffffu << 16) +#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN (0xffffu << 0) #define HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32 GUC_HXG_REQUEST_MSG_n_DATAn #define HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64 GUC_HXG_REQUEST_MSG_n_DATAn diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h index 811add10c30d..c165e26c0976 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h @@ -242,8 +242,8 @@ struct slpc_shared_data { (HOST2GUC_PC_SLPC_REQUEST_REQUEST_MSG_MIN_LEN + \ HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS) #define HOST2GUC_PC_SLPC_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 -#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID (0xff << 8) -#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC (0xff << 0) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID (0xffu << 8) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC (0xffu << 0) #define HOST2GUC_PC_SLPC_REQUEST_MSG_N_EVENT_DATA_N GUC_HXG_REQUEST_MSG_n_DATAn #endif diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h index 3b83f907ece4..0b1146d0c997 100644 --- a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h @@ -82,11 +82,11 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64); #define GUC_CTB_HDR_LEN 1u #define GUC_CTB_MSG_MIN_LEN GUC_CTB_HDR_LEN #define GUC_CTB_MSG_MAX_LEN 256u -#define GUC_CTB_MSG_0_FENCE (0xffff << 16) -#define GUC_CTB_MSG_0_FORMAT (0xf << 12) +#define GUC_CTB_MSG_0_FENCE (0xffffu << 16) +#define GUC_CTB_MSG_0_FORMAT (0xfu << 12) #define GUC_CTB_FORMAT_HXG 0u -#define GUC_CTB_MSG_0_RESERVED (0xf << 8) -#define GUC_CTB_MSG_0_NUM_DWORDS (0xff << 0) +#define GUC_CTB_MSG_0_RESERVED (0xfu << 8) +#define GUC_CTB_MSG_0_NUM_DWORDS (0xffu << 0) /** * DOC: CTB HXG Message diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 47094b9b044c..0400bc0fccdc 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -31,9 +31,9 @@ */ #define GUC_KLV_LEN_MIN 1u -#define GUC_KLV_0_KEY (0xffff << 16) -#define GUC_KLV_0_LEN (0xffff << 0) -#define GUC_KLV_n_VALUE (0xffffffff << 0) +#define GUC_KLV_0_KEY (0xffffu << 16) +#define GUC_KLV_0_LEN (0xffffu << 0) +#define GUC_KLV_n_VALUE (0xffffffffu << 0) /** * DOC: GuC Self Config KLVs diff --git a/drivers/gpu/drm/xe/abi/guc_messages_abi.h b/drivers/gpu/drm/xe/abi/guc_messages_abi.h index 3d199016cf88..29e414c82d56 100644 --- a/drivers/gpu/drm/xe/abi/guc_messages_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_messages_abi.h @@ -40,18 +40,18 @@ */ #define GUC_HXG_MSG_MIN_LEN 1u -#define GUC_HXG_MSG_0_ORIGIN (0x1 << 31) +#define GUC_HXG_MSG_0_ORIGIN (0x1u << 31) #define GUC_HXG_ORIGIN_HOST 0u #define GUC_HXG_ORIGIN_GUC 1u -#define GUC_HXG_MSG_0_TYPE (0x7 << 28) +#define GUC_HXG_MSG_0_TYPE (0x7u << 28) #define GUC_HXG_TYPE_REQUEST 0u #define GUC_HXG_TYPE_EVENT 1u #define GUC_HXG_TYPE_NO_RESPONSE_BUSY 3u #define GUC_HXG_TYPE_NO_RESPONSE_RETRY 5u #define GUC_HXG_TYPE_RESPONSE_FAILURE 6u #define GUC_HXG_TYPE_RESPONSE_SUCCESS 7u -#define GUC_HXG_MSG_0_AUX (0xfffffff << 0) -#define GUC_HXG_MSG_n_PAYLOAD (0xffffffff << 0) +#define GUC_HXG_MSG_0_AUX (0xfffffffu << 0) +#define GUC_HXG_MSG_n_PAYLOAD (0xffffffffu << 0) /** * DOC: HXG Request @@ -85,8 +85,8 @@ */ #define GUC_HXG_REQUEST_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN -#define GUC_HXG_REQUEST_MSG_0_DATA0 (0xfff << 16) -#define GUC_HXG_REQUEST_MSG_0_ACTION (0xffff << 0) +#define GUC_HXG_REQUEST_MSG_0_DATA0 (0xfffu << 16) +#define GUC_HXG_REQUEST_MSG_0_ACTION (0xffffu << 0) #define GUC_HXG_REQUEST_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD /** @@ -117,8 +117,8 @@ */ #define GUC_HXG_EVENT_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN -#define GUC_HXG_EVENT_MSG_0_DATA0 (0xfff << 16) -#define GUC_HXG_EVENT_MSG_0_ACTION (0xffff << 0) +#define GUC_HXG_EVENT_MSG_0_DATA0 (0xfffu << 16) +#define GUC_HXG_EVENT_MSG_0_ACTION (0xffffu << 0) #define GUC_HXG_EVENT_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD /** @@ -188,8 +188,8 @@ */ #define GUC_HXG_FAILURE_MSG_LEN GUC_HXG_MSG_MIN_LEN -#define GUC_HXG_FAILURE_MSG_0_HINT (0xfff << 16) -#define GUC_HXG_FAILURE_MSG_0_ERROR (0xffff << 0) +#define GUC_HXG_FAILURE_MSG_0_HINT (0xfffu << 16) +#define GUC_HXG_FAILURE_MSG_0_ERROR (0xffffu << 0) /** * DOC: HXG Response From 5f16ee27cd5abd5166e28b2311ac693c204063ff Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Sat, 27 Jan 2024 22:20:40 +0530 Subject: [PATCH 672/768] drm/hwmon: Fix abi doc warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes warnings in xe, i915 hwmon docs: Warning: /sys/devices/.../hwmon/hwmon/curr1_crit is defined 2 times: Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon:35 Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon:52 Warning: /sys/devices/.../hwmon/hwmon/energy1_input is defined 2 times: Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon:54 Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon:65 Warning: /sys/devices/.../hwmon/hwmon/in0_input is defined 2 times: Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon:46 Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon:0 Warning: /sys/devices/.../hwmon/hwmon/power1_crit is defined 2 times: Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon:22 Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon:39 Warning: /sys/devices/.../hwmon/hwmon/power1_max is defined 2 times: Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon:0 Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon:8 Warning: /sys/devices/.../hwmon/hwmon/power1_max_interval is defined 2 times: Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon:62 Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon:30 Warning: /sys/devices/.../hwmon/hwmon/power1_rated_max is defined 2 times: Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon:14 Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon:22 Use a path containing the driver name to differentiate the documentation of each entry. Fixes: fb1b70607f73 ("drm/xe/hwmon: Expose power attributes") Fixes: 92d44a422d0d ("drm/xe/hwmon: Expose card reactive critical power") Fixes: fbcdc9d3bf58 ("drm/xe/hwmon: Expose input voltage attribute") Fixes: 71d0a32524f9 ("drm/xe/hwmon: Expose hwmon energy attribute") Fixes: 4446fcf220ce ("drm/xe/hwmon: Expose power1_max_interval") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/all/20240125113345.291118ff@canb.auug.org.au/ Signed-off-by: Badal Nilawar Reviewed-by: Ashutosh Dixit Reviewed-by: Lucas De Marchi Acked-by: Rodrigo Vivi Acked-by: Jani Nikula Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240127165040.2348009-1-badal.nilawar@intel.com (cherry picked from commit 20485e3a810c480cef60caf53988619f61127e7b) Signed-off-by: Thomas Hellström --- .../ABI/testing/sysfs-driver-intel-i915-hwmon | 14 +++++++------- .../ABI/testing/sysfs-driver-intel-xe-hwmon | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon index 8d7d8f05f6cd..92fe7c5c5ac1 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon @@ -1,4 +1,4 @@ -What: /sys/devices/.../hwmon/hwmon/in0_input +What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon/in0_input Date: February 2023 KernelVersion: 6.2 Contact: intel-gfx@lists.freedesktop.org @@ -6,7 +6,7 @@ Description: RO. Current Voltage in millivolt. Only supported for particular Intel i915 graphics platforms. -What: /sys/devices/.../hwmon/hwmon/power1_max +What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon/power1_max Date: February 2023 KernelVersion: 6.2 Contact: intel-gfx@lists.freedesktop.org @@ -20,7 +20,7 @@ Description: RW. Card reactive sustained (PL1/Tau) power limit in microwatts. Only supported for particular Intel i915 graphics platforms. -What: /sys/devices/.../hwmon/hwmon/power1_rated_max +What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon/power1_rated_max Date: February 2023 KernelVersion: 6.2 Contact: intel-gfx@lists.freedesktop.org @@ -28,7 +28,7 @@ Description: RO. Card default power limit (default TDP setting). Only supported for particular Intel i915 graphics platforms. -What: /sys/devices/.../hwmon/hwmon/power1_max_interval +What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon/power1_max_interval Date: February 2023 KernelVersion: 6.2 Contact: intel-gfx@lists.freedesktop.org @@ -37,7 +37,7 @@ Description: RW. Sustained power limit interval (Tau in PL1/Tau) in Only supported for particular Intel i915 graphics platforms. -What: /sys/devices/.../hwmon/hwmon/power1_crit +What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon/power1_crit Date: February 2023 KernelVersion: 6.2 Contact: intel-gfx@lists.freedesktop.org @@ -50,7 +50,7 @@ Description: RW. Card reactive critical (I1) power limit in microwatts. Only supported for particular Intel i915 graphics platforms. -What: /sys/devices/.../hwmon/hwmon/curr1_crit +What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon/curr1_crit Date: February 2023 KernelVersion: 6.2 Contact: intel-gfx@lists.freedesktop.org @@ -63,7 +63,7 @@ Description: RW. Card reactive critical (I1) power limit in milliamperes. Only supported for particular Intel i915 graphics platforms. -What: /sys/devices/.../hwmon/hwmon/energy1_input +What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon/energy1_input Date: February 2023 KernelVersion: 6.2 Contact: intel-gfx@lists.freedesktop.org diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon index 8c321bc9dc04..023fd82de3f7 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon @@ -1,4 +1,4 @@ -What: /sys/devices/.../hwmon/hwmon/power1_max +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/power1_max Date: September 2023 KernelVersion: 6.5 Contact: intel-xe@lists.freedesktop.org @@ -12,7 +12,7 @@ Description: RW. Card reactive sustained (PL1) power limit in microwatts. Only supported for particular Intel xe graphics platforms. -What: /sys/devices/.../hwmon/hwmon/power1_rated_max +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/power1_rated_max Date: September 2023 KernelVersion: 6.5 Contact: intel-xe@lists.freedesktop.org @@ -20,7 +20,7 @@ Description: RO. Card default power limit (default TDP setting). Only supported for particular Intel xe graphics platforms. -What: /sys/devices/.../hwmon/hwmon/power1_crit +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/power1_crit Date: September 2023 KernelVersion: 6.5 Contact: intel-xe@lists.freedesktop.org @@ -33,7 +33,7 @@ Description: RW. Card reactive critical (I1) power limit in microwatts. Only supported for particular Intel xe graphics platforms. -What: /sys/devices/.../hwmon/hwmon/curr1_crit +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/curr1_crit Date: September 2023 KernelVersion: 6.5 Contact: intel-xe@lists.freedesktop.org @@ -44,7 +44,7 @@ Description: RW. Card reactive critical (I1) power limit in milliamperes. the operating frequency if the power averaged over a window exceeds this limit. -What: /sys/devices/.../hwmon/hwmon/in0_input +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/in0_input Date: September 2023 KernelVersion: 6.5 Contact: intel-xe@lists.freedesktop.org @@ -52,7 +52,7 @@ Description: RO. Current Voltage in millivolt. Only supported for particular Intel xe graphics platforms. -What: /sys/devices/.../hwmon/hwmon/energy1_input +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/energy1_input Date: September 2023 KernelVersion: 6.5 Contact: intel-xe@lists.freedesktop.org @@ -60,7 +60,7 @@ Description: RO. Energy input of device in microjoules. Only supported for particular Intel xe graphics platforms. -What: /sys/devices/.../hwmon/hwmon/power1_max_interval +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/power1_max_interval Date: October 2023 KernelVersion: 6.6 Contact: intel-xe@lists.freedesktop.org From 5f9ab17394f831cb7986ec50900fa37507a127f1 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 1 Feb 2024 20:53:18 +0900 Subject: [PATCH 673/768] firewire: core: correct documentation of fw_csr_string() kernel API Against its current description, the kernel API can accepts all types of directory entries. This commit corrects the documentation. Cc: stable@vger.kernel.org Fixes: 3c2c58cb33b3 ("firewire: core: fw_csr_string addendum") Link: https://lore.kernel.org/r/20240130100409.30128-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-device.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index 0547253d16fe..e4cb5106fb7d 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -118,10 +118,9 @@ static int textual_leaf_to_string(const u32 *block, char *buf, size_t size) * @buf: where to put the string * @size: size of @buf, in bytes * - * The string is taken from a minimal ASCII text descriptor leaf after - * the immediate entry with @key. The string is zero-terminated. - * An overlong string is silently truncated such that it and the - * zero byte fit into @size. + * The string is taken from a minimal ASCII text descriptor leaf just after the entry with the + * @key. The string is zero-terminated. An overlong string is silently truncated such that it + * and the zero byte fit into @size. * * Returns strlen(buf) or a negative error code. */ From 47dc55181dcbee69fc84c902e7fb060213b9b8a5 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 1 Feb 2024 20:53:18 +0900 Subject: [PATCH 674/768] firewire: core: search descriptor leaf just after vendor directory entry in root directory It appears that Sony DVMC-DA1 has a quirk that the descriptor leaf entry locates just after the vendor directory entry in root directory. This is not conformant to the legacy layout of configuration ROM described in Configuration ROM for AV/C Devices 1.0 (1394 Trading Association, Dec 2000, TA Document 1999027). This commit changes current implementation to parse configuration ROM for device attributes so that the descriptor leaf entry can be detected for the vendor name. $ config-rom-pretty-printer < Sony-DVMC-DA1.img ROM header and bus information block ----------------------------------------------------------------- 1024 041ee7fb bus_info_length 4, crc_length 30, crc 59387 1028 31333934 bus_name "1394" 1032 e0644000 irmc 1, cmc 1, isc 1, bmc 0, cyc_clk_acc 100, max_rec 4 (32) 1036 08004603 company_id 080046 | 1040 0014193c device_id 12886219068 | EUI-64 576537731003586876 root directory ----------------------------------------------------------------- 1044 0006b681 directory_length 6, crc 46721 1048 03080046 vendor 1052 0c0083c0 node capabilities: per IEEE 1394 1056 8d00000a --> eui-64 leaf at 1096 1060 d1000003 --> unit directory at 1072 1064 c3000005 --> vendor directory at 1084 1068 8100000a --> descriptor leaf at 1108 unit directory at 1072 ----------------------------------------------------------------- 1072 0002cdbf directory_length 2, crc 52671 1076 1200a02d specifier id 1080 13010000 version vendor directory at 1084 ----------------------------------------------------------------- 1084 00020cfe directory_length 2, crc 3326 1088 17fa0000 model 1092 81000008 --> descriptor leaf at 1124 eui-64 leaf at 1096 ----------------------------------------------------------------- 1096 0002c66e leaf_length 2, crc 50798 1100 08004603 company_id 080046 | 1104 0014193c device_id 12886219068 | EUI-64 576537731003586876 descriptor leaf at 1108 ----------------------------------------------------------------- 1108 00039e26 leaf_length 3, crc 40486 1112 00000000 textual descriptor 1116 00000000 minimal ASCII 1120 536f6e79 "Sony" descriptor leaf at 1124 ----------------------------------------------------------------- 1124 0005001d leaf_length 5, crc 29 1128 00000000 textual descriptor 1132 00000000 minimal ASCII 1136 44564d43 "DVMC" 1140 2d444131 "-DA1" 1144 00000000 Suggested-by: Adam Goldman Tested-by: Adam Goldman Link: https://lore.kernel.org/r/20240130100409.30128-3-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-device.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index e4cb5106fb7d..7d3346b3a2bf 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -367,8 +367,17 @@ static ssize_t show_text_leaf(struct device *dev, for (i = 0; i < ARRAY_SIZE(directories) && !!directories[i]; ++i) { int result = fw_csr_string(directories[i], attr->key, buf, bufsize); // Detected. - if (result >= 0) + if (result >= 0) { ret = result; + } else if (i == 0 && attr->key == CSR_VENDOR) { + // Sony DVMC-DA1 has configuration ROM such that the descriptor leaf entry + // in the root directory follows to the directory entry for vendor ID + // instead of the immediate value for vendor ID. + result = fw_csr_string(directories[i], CSR_DIRECTORY | attr->key, buf, + bufsize); + if (result >= 0) + ret = result; + } } if (ret >= 0) { From 83b3836bf83f09beea5f592b126cfdd1bc921e48 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 30 Jan 2024 12:12:53 -0400 Subject: [PATCH 675/768] iommu: Allow ops->default_domain to work when !CONFIG_IOMMU_DMA The ops->default_domain flow used a 0 req_type to select the default domain and this was enforced by iommu_group_alloc_default_domain(). When !CONFIG_IOMMU_DMA started forcing the old ARM32 drivers into IDENTITY it also overroad the 0 req_type of the ops->default_domain drivers to IDENTITY which ends up causing failures during device probe. Make iommu_group_alloc_default_domain() accept a req_type that matches the ops->default_domain and have iommu_group_alloc_default_domain() generate a req_type that matches the default_domain. This way the req_type always describes what kind of domain should be attached and ops->default_domain overrides all other mechanisms to choose the default domain. Fixes: 2ad56efa80db ("powerpc/iommu: Setup a default domain and remove set_platform_dma_ops") Fixes: 0f6a90436a57 ("iommu: Do not use IOMMU_DOMAIN_DMA if CONFIG_IOMMU_DMA is not enabled") Reported-by: Ovidiu Panait Closes: https://lore.kernel.org/linux-iommu/20240123165829.630276-1-ovidiu.panait@windriver.com/ Reported-by: Shivaprasad G Bhat Closes: https://lore.kernel.org/linux-iommu/170618452753.3805.4425669653666211728.stgit@ltcd48-lp2.aus.stglab.ibm.com/ Tested-by: Ovidiu Panait Tested-by: Shivaprasad G Bhat Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/0-v1-755bd21c4a64+525b8-iommu_def_dom_fix_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 68e648b55767..d14413916f93 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1799,7 +1799,7 @@ iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) * domain. Do not use in new drivers. */ if (ops->default_domain) { - if (req_type) + if (req_type != ops->default_domain->type) return ERR_PTR(-EINVAL); return ops->default_domain; } @@ -1871,10 +1871,18 @@ static int iommu_get_def_domain_type(struct iommu_group *group, const struct iommu_ops *ops = dev_iommu_ops(dev); int type; - if (!ops->def_domain_type) - return cur_type; - - type = ops->def_domain_type(dev); + if (ops->default_domain) { + /* + * Drivers that declare a global static default_domain will + * always choose that. + */ + type = ops->default_domain->type; + } else { + if (ops->def_domain_type) + type = ops->def_domain_type(dev); + else + return cur_type; + } if (!type || cur_type == type) return cur_type; if (!cur_type) From fae6e669cdc52fdbb843e7fb1b8419642b6b8cba Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 30 Jan 2024 12:14:54 -0400 Subject: [PATCH 676/768] drm/tegra: Do not assume that a NULL domain means no DMA IOMMU Previously with tegra-smmu, even with CONFIG_IOMMU_DMA, the default domain could have been left as NULL. The NULL domain is specially recognized by host1x_client_iommu_attach() as meaning it is not the DMA domain and should be replaced with the special shared domain. This happened prior to the below commit because tegra-smmu was using the NULL domain to mean IDENTITY. Now that the domain is properly labled the test in DRM doesn't see NULL. Check for IDENTITY as well to enable the special domains. Fixes: c8cc2655cc6c ("iommu/tegra-smmu: Implement an IDENTITY domain") Reported-by: diogo.ivo@tecnico.ulisboa.pt Closes: https://lore.kernel.org/all/bbmhcoghrprmbdibnjum6lefix2eoquxrde7wyqeulm4xabmlm@b6jy32saugqh/ Tested-by: diogo.ivo@tecnico.ulisboa.pt Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/0-v1-3049f92c4812+16691-host1x_def_dom_fix_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/gpu/drm/tegra/drm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index ff36171c8fb7..a73cff7a3070 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -960,7 +960,8 @@ int host1x_client_iommu_attach(struct host1x_client *client) * not the shared IOMMU domain, don't try to attach it to a different * domain. This allows using the IOMMU-backed DMA API. */ - if (domain && domain != tegra->domain) + if (domain && domain->type != IOMMU_DOMAIN_IDENTITY && + domain != tegra->domain) return 0; if (tegra->domain) { From 3657e4cb5a8abd9edf6c944e022fe9ef06989960 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:20 +0000 Subject: [PATCH 677/768] ASoC: wm_adsp: Fix firmware file search order Check for the cases of system-specific bin file without a wmfw before falling back to looking for a generic wmfw. All system-specific options should be tried before falling back to loading a generic wmfw/bin. With the original code, the presence of a fallback generic wmfw on the filesystem would prevent using a system-specific tuning with a ROM firmware. Signed-off-by: Richard Fitzgerald Fixes: 0e7d82cbea8b ("ASoC: wm_adsp: Add support for loading bin files without wmfw") Link: https://msgid.link/r/20240129162737.497-2-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 44 ++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index c01e31175015..bd60ceebb6a9 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -823,6 +823,23 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp, } } + /* Check system-specific bin without wmfw before falling back to generic */ + if (dsp->wmfw_optional && system_name) { + if (asoc_component_prefix) + wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, + cirrus_dir, system_name, + asoc_component_prefix, "bin"); + + if (!*coeff_firmware) + wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, + cirrus_dir, system_name, + NULL, "bin"); + + if (*coeff_firmware) + return 0; + } + + /* Check legacy location */ if (!wm_adsp_request_firmware_file(dsp, wmfw_firmware, wmfw_filename, "", NULL, NULL, "wmfw")) { wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, @@ -830,38 +847,15 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp, return 0; } + /* Fall back to generic wmfw and optional matching bin */ ret = wm_adsp_request_firmware_file(dsp, wmfw_firmware, wmfw_filename, cirrus_dir, NULL, NULL, "wmfw"); - if (!ret) { + if (!ret || dsp->wmfw_optional) { wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, cirrus_dir, NULL, NULL, "bin"); return 0; } - if (dsp->wmfw_optional) { - if (system_name) { - if (asoc_component_prefix) - wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, - cirrus_dir, system_name, - asoc_component_prefix, "bin"); - - if (!*coeff_firmware) - wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, - cirrus_dir, system_name, - NULL, "bin"); - } - - if (!*coeff_firmware) - wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, - "", NULL, NULL, "bin"); - - if (!*coeff_firmware) - wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, - cirrus_dir, NULL, NULL, "bin"); - - return 0; - } - adsp_err(dsp, "Failed to request firmware <%s>%s-%s-%s<-%s<%s>>.wmfw\n", cirrus_dir, dsp->part, dsp->fwf_name, wm_adsp_fw[dsp->fw].file, system_name, asoc_component_prefix); From daf3f0f99cde93a066240462b7a87cdfeedc04c0 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:21 +0000 Subject: [PATCH 678/768] ASoC: wm_adsp: Don't overwrite fwf_name with the default There's no need to overwrite fwf_name with a kstrdup() of the cs_dsp part name. It is trivial to select either fwf_name or cs_dsp.part as the string to use when building the filename in wm_adsp_request_firmware_file(). This leaves fwf_name entirely owned by the codec driver. It also avoids problems with freeing the pointer. With the original code fwf_name was either a pointer owned by the codec driver, or a kstrdup() created by wm_adsp. This meant wm_adsp must free it if it set it, but not if the codec driver set it. The code was handling this by using devm_kstrdup(). But there is no absolute requirement that wm_adsp_common_init() must be called from probe(), so this was a pseudo-memory leak - each new call to wm_adsp_common_init() would allocate another block of memory but these would only be freed if the owning codec driver was removed. Signed-off-by: Richard Fitzgerald Link: https://msgid.link/r/20240129162737.497-3-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index bd60ceebb6a9..36ea0dcdc7ab 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -739,19 +739,25 @@ static int wm_adsp_request_firmware_file(struct wm_adsp *dsp, const char *filetype) { struct cs_dsp *cs_dsp = &dsp->cs_dsp; + const char *fwf; char *s, c; int ret = 0; + if (dsp->fwf_name) + fwf = dsp->fwf_name; + else + fwf = dsp->cs_dsp.name; + if (system_name && asoc_component_prefix) *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s-%s-%s.%s", dir, dsp->part, - dsp->fwf_name, wm_adsp_fw[dsp->fw].file, system_name, + fwf, wm_adsp_fw[dsp->fw].file, system_name, asoc_component_prefix, filetype); else if (system_name) *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s-%s.%s", dir, dsp->part, - dsp->fwf_name, wm_adsp_fw[dsp->fw].file, system_name, + fwf, wm_adsp_fw[dsp->fw].file, system_name, filetype); else - *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s.%s", dir, dsp->part, dsp->fwf_name, + *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s.%s", dir, dsp->part, fwf, wm_adsp_fw[dsp->fw].file, filetype); if (*filename == NULL) @@ -857,29 +863,18 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp, } adsp_err(dsp, "Failed to request firmware <%s>%s-%s-%s<-%s<%s>>.wmfw\n", - cirrus_dir, dsp->part, dsp->fwf_name, wm_adsp_fw[dsp->fw].file, - system_name, asoc_component_prefix); + cirrus_dir, dsp->part, + dsp->fwf_name ? dsp->fwf_name : dsp->cs_dsp.name, + wm_adsp_fw[dsp->fw].file, system_name, asoc_component_prefix); return -ENOENT; } static int wm_adsp_common_init(struct wm_adsp *dsp) { - char *p; - INIT_LIST_HEAD(&dsp->compr_list); INIT_LIST_HEAD(&dsp->buffer_list); - if (!dsp->fwf_name) { - p = devm_kstrdup(dsp->cs_dsp.dev, dsp->cs_dsp.name, GFP_KERNEL); - if (!p) - return -ENOMEM; - - dsp->fwf_name = p; - for (; *p != 0; ++p) - *p = tolower(*p); - } - return 0; } From ae861c466ee57e15a29d97629e1c564e3f714a4f Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:22 +0000 Subject: [PATCH 679/768] ASoC: cs35l56: cs35l56_component_remove() must clear cs35l56->component The cs35l56->component pointer is used by the suspend-resume handling to know whether the driver is fully instantiated. This is to prevent it queuing dsp_work which would result in calling wm_adsp when the driver is not an instantiated ASoC component. So this pointer must be cleared by cs35l56_component_remove(). Signed-off-by: Richard Fitzgerald Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Link: https://msgid.link/r/20240129162737.497-4-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 45b4de3eff94..09944db4db30 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -809,6 +809,8 @@ static void cs35l56_component_remove(struct snd_soc_component *component) struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component); cancel_work_sync(&cs35l56->dsp_work); + + cs35l56->component = NULL; } static int cs35l56_set_bias_level(struct snd_soc_component *component, From cd38ccbecdace1469b4e0cfb3ddeec72a3fad226 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:23 +0000 Subject: [PATCH 680/768] ASoC: cs35l56: cs35l56_component_remove() must clean up wm_adsp cs35l56_component_remove() must call wm_adsp_power_down() and wm_adsp2_component_remove(). Signed-off-by: Richard Fitzgerald Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Link: https://msgid.link/r/20240129162737.497-5-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 09944db4db30..491da77112c3 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -810,6 +810,11 @@ static void cs35l56_component_remove(struct snd_soc_component *component) cancel_work_sync(&cs35l56->dsp_work); + if (cs35l56->dsp.cs_dsp.booted) + wm_adsp_power_down(&cs35l56->dsp); + + wm_adsp2_component_remove(&cs35l56->dsp, component); + cs35l56->component = NULL; } From 07687cd0539f8185b6ba0c0afba8473517116d6a Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:24 +0000 Subject: [PATCH 681/768] ASoC: cs35l56: Don't add the same register patch multiple times Move the call to cs35l56_set_patch() earlier in cs35l56_init() so that it only adds the register patch on first-time initialization. The call was after the post_soft_reset label, so every time this function was run to re-initialize the hardware after a reset it would call regmap_register_patch() and add the same reg_sequence again. Signed-off-by: Richard Fitzgerald Fixes: 898673b905b9 ("ASoC: cs35l56: Move shared data into a common data structure") Link: https://msgid.link/r/20240129162737.497-6-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 491da77112c3..ea5d2b2eb82a 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -1159,6 +1159,10 @@ int cs35l56_init(struct cs35l56_private *cs35l56) if (ret < 0) return ret; + ret = cs35l56_set_patch(&cs35l56->base); + if (ret) + return ret; + /* Populate the DSP information with the revision and security state */ cs35l56->dsp.part = devm_kasprintf(cs35l56->base.dev, GFP_KERNEL, "cs35l56%s-%02x", cs35l56->base.secured ? "s" : "", cs35l56->base.rev); @@ -1197,10 +1201,6 @@ post_soft_reset: if (ret) return ret; - ret = cs35l56_set_patch(&cs35l56->base); - if (ret) - return ret; - /* Registers could be dirty after soft reset or SoundWire enumeration */ regcache_sync(cs35l56->base.regmap); From 3739cc0733ba7eeafc08d4d4208d1f3c2451eabd Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:25 +0000 Subject: [PATCH 682/768] ASoC: cs35l56: Remove buggy checks from cs35l56_is_fw_reload_needed() Remove the check of fw_patched from cs35l56_is_fw_reload_needed(). Also remove the redundant check for control of the reset GPIO. The fw_patched flag is set when cs35l56_dsp_work() has completed its steps to download firmware and power-up wm_adsp. There was a check in cs35l56_is_fw_reload_needed() to make a quick exit of 'false' if !fw_patched. The original idea was that the system might be suspended before the driver has ever made any attempt to download firmware, and in that case the driver doesn't need to return to a patched state because it was never in a patched state. This check of fw_patched is buggy because it prevented ever recovering from a failed patch. If a previous attempt to patch and reboot the silicon had failed it would leave fw_patched==false. This would mean the driver never attempted another download even though the fault may have been cleared (by a hard reset, for example). It is also a redundant check because the calling code already makes a quick exit if cs35l56_component_probe() has not been called, which deals with the original intent of this check but in a safer way. The check for reset GPIO is redundant: if the silicon was hard-reset the FIRMWARE_MISSING flag will be 1. But this check created an expectation that the suspend/resume code toggles reset. This can't easily be protected against accidental code breakage. The only reason for the check was to skip runtime-resuming the driver to read the PROTECTION_STATUS register when it already knows it reset the silicon. But in that case the driver will have to be runtime-resumed to do the firmware download. So it created an assumption for no benefit. Signed-off-by: Richard Fitzgerald Fixes: 8a731fd37f8b ("ASoC: cs35l56: Move utility functions to shared file") Link: https://msgid.link/r/20240129162737.497-7-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56-shared.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 953ba066bab1..0cd572de73a9 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -400,17 +400,6 @@ int cs35l56_is_fw_reload_needed(struct cs35l56_base *cs35l56_base) unsigned int val; int ret; - /* Nothing to re-patch if we haven't done any patching yet. */ - if (!cs35l56_base->fw_patched) - return false; - - /* - * If we have control of RESET we will have asserted it so the firmware - * will need re-patching. - */ - if (cs35l56_base->reset_gpio) - return true; - /* * In secure mode FIRMWARE_MISSING is cleared by the BIOS loader so * can't be used here to test for memory retention. From 72a77d7631c6e392677c0134343cf5edcd3a4572 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:26 +0000 Subject: [PATCH 683/768] ASoC: cs35l56: Fix to ensure ASP1 registers match cache Add a dummy SUPPLY widget connected to the ASP that forces the chip registers to match the regmap cache when the ASP is powered-up. On a SoundWire system the ASP is free for use as a chip-to-chip interconnect. This can be either for the firmware on multiple CS35L56 to share reference audio; or as a bridge to another device. If it is a firmware interconnect it is owned by the firmware and the Linux driver should avoid writing the registers. However. If it is a bridge then Linux may take over and handle it as a normal codec-to-codec link. CS35L56 is designed for SDCA and a generic SDCA driver would know nothing about these chip-specific registers. So if the ASP is being used on a SoundWire system the firmware sets up the ASP registers. This means that we can't assume the default state of the ASP registers. But we don't know the initial state that the firmware set them to until after the firmware has been downloaded and booted, which can take several seconds when downloading multiple amps. To avoid blocking probe() for several seconds waiting for the firmware, the silicon defaults are assumed. This allows the machine driver to setup the ASP configuration during probe() without being blocked. If the ASP is hooked up and used, the SUPPLY widget ensures that the chip registers match what was configured in the regmap cache. If the machine driver does not hook up the ASP, it is assumed that it won't call any functions to configure the ASP DAI. Therefore the regmap cache will be clean for these registers so a regcache_sync() will not overwrite the chip registers. If the DAI is not hooked up, the dummy SUPPLY widget will not be invoked so it will never force-overwrite the chip registers. Backport note: This won't apply cleanly to kernels older than v6.6. Signed-off-by: Richard Fitzgerald Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Link: https://msgid.link/r/20240129162737.497-8-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 1 + sound/soc/codecs/cs35l56-shared.c | 41 +++++++++++++++++++++++++++++++ sound/soc/codecs/cs35l56.c | 21 ++++++++++++++++ 3 files changed, 63 insertions(+) diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index 8c18e8b6d27d..4db36c893d9d 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -272,6 +272,7 @@ extern const char * const cs35l56_tx_input_texts[CS35L56_NUM_INPUT_SRC]; extern const unsigned int cs35l56_tx_input_values[CS35L56_NUM_INPUT_SRC]; int cs35l56_set_patch(struct cs35l56_base *cs35l56_base); +int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_base *cs35l56_base); int cs35l56_mbox_send(struct cs35l56_base *cs35l56_base, unsigned int command); int cs35l56_firmware_shutdown(struct cs35l56_base *cs35l56_base); int cs35l56_wait_for_firmware_boot(struct cs35l56_base *cs35l56_base); diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 0cd572de73a9..35789ffc63af 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -195,6 +195,47 @@ static bool cs35l56_volatile_reg(struct device *dev, unsigned int reg) } } +/* + * The firmware boot sequence can overwrite the ASP1 config registers so that + * they don't match regmap's view of their values. Rewrite the values from the + * regmap cache into the hardware registers. + */ +int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_base *cs35l56_base) +{ + struct reg_sequence asp1_regs[] = { + { .reg = CS35L56_ASP1_ENABLES1 }, + { .reg = CS35L56_ASP1_CONTROL1 }, + { .reg = CS35L56_ASP1_CONTROL2 }, + { .reg = CS35L56_ASP1_CONTROL3 }, + { .reg = CS35L56_ASP1_FRAME_CONTROL1 }, + { .reg = CS35L56_ASP1_FRAME_CONTROL5 }, + { .reg = CS35L56_ASP1_DATA_CONTROL1 }, + { .reg = CS35L56_ASP1_DATA_CONTROL5 }, + }; + int i, ret; + + /* Read values from regmap cache into a write sequence */ + for (i = 0; i < ARRAY_SIZE(asp1_regs); ++i) { + ret = regmap_read(cs35l56_base->regmap, asp1_regs[i].reg, &asp1_regs[i].def); + if (ret) + goto err; + } + + /* Write the values cache-bypassed so that they will be written to silicon */ + ret = regmap_multi_reg_write_bypassed(cs35l56_base->regmap, asp1_regs, + ARRAY_SIZE(asp1_regs)); + if (ret) + goto err; + + return 0; + +err: + dev_err(cs35l56_base->dev, "Failed to sync ASP1 registers: %d\n", ret); + + return ret; +} +EXPORT_SYMBOL_NS_GPL(cs35l56_force_sync_asp1_registers_from_cache, SND_SOC_CS35L56_SHARED); + int cs35l56_mbox_send(struct cs35l56_base *cs35l56_base, unsigned int command) { unsigned int val; diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index ea5d2b2eb82a..41aa79848b15 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -148,6 +148,21 @@ static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_sdw1tx4_enum, static const struct snd_kcontrol_new sdw1_tx4_mux = SOC_DAPM_ENUM("SDW1TX4 SRC", cs35l56_sdw1tx4_enum); +static int cs35l56_asp1_cfg_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); + struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component); + + switch (event) { + case SND_SOC_DAPM_PRE_PMU: + /* Override register values set by firmware boot */ + return cs35l56_force_sync_asp1_registers_from_cache(&cs35l56->base); + default: + return 0; + } +} + static int cs35l56_play_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { @@ -184,6 +199,9 @@ static const struct snd_soc_dapm_widget cs35l56_dapm_widgets[] = { SND_SOC_DAPM_REGULATOR_SUPPLY("VDD_B", 0, 0), SND_SOC_DAPM_REGULATOR_SUPPLY("VDD_AMP", 0, 0), + SND_SOC_DAPM_SUPPLY("ASP1 CFG", SND_SOC_NOPM, 0, 0, cs35l56_asp1_cfg_event, + SND_SOC_DAPM_PRE_PMU), + SND_SOC_DAPM_SUPPLY("PLAY", SND_SOC_NOPM, 0, 0, cs35l56_play_event, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD), @@ -251,6 +269,9 @@ static const struct snd_soc_dapm_route cs35l56_audio_map[] = { { "AMP", NULL, "VDD_B" }, { "AMP", NULL, "VDD_AMP" }, + { "ASP1 Playback", NULL, "ASP1 CFG" }, + { "ASP1 Capture", NULL, "ASP1 CFG" }, + { "ASP1 Playback", NULL, "PLAY" }, { "SDW1 Playback", NULL, "PLAY" }, From 782e6c538be43a17e34f552ab49e8c713cac7883 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:27 +0000 Subject: [PATCH 684/768] ASoC: cs35l56: Fix default SDW TX mixer registers Patch the SDW TX mixer registers to silicon defaults. CS35L56 is designed for SDCA and a generic SDCA driver would know nothing about these chip-specific registers. So the firmware sets up the SDW TX mixer registers to whatever audio is relevant on a specific system. This means that the driver cannot assume the initial values of these registers. But Linux has ALSA controls to configure routing, so the registers can be patched to silicon default and the ALSA controls used to select what audio to feed back to the host capture path. Backport note: This won't apply to kernels older than v6.6. Signed-off-by: Richard Fitzgerald Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Link: https://msgid.link/r/20240129162737.497-9-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56-shared.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 35789ffc63af..a812abf90836 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -12,6 +12,15 @@ #include "cs35l56.h" static const struct reg_sequence cs35l56_patch[] = { + /* + * Firmware can change these to non-defaults to satisfy SDCA. + * Ensure that they are at known defaults. + */ + { CS35L56_SWIRE_DP3_CH1_INPUT, 0x00000018 }, + { CS35L56_SWIRE_DP3_CH2_INPUT, 0x00000019 }, + { CS35L56_SWIRE_DP3_CH3_INPUT, 0x00000029 }, + { CS35L56_SWIRE_DP3_CH4_INPUT, 0x00000028 }, + /* These are not reset by a soft-reset, so patch to defaults. */ { CS35L56_MAIN_RENDER_USER_MUTE, 0x00000000 }, { CS35L56_MAIN_RENDER_USER_VOLUME, 0x00000000 }, From 856ce8982169acb31a25c5f2ecd2570ab8a6af46 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:28 +0000 Subject: [PATCH 685/768] ALSA: hda: cs35l56: Initialize all ASP1 registers Add ASP1_FRAME_CONTROL1, ASP1_FRAME_CONTROL5 and the ASP1_TX?_INPUT registers to the sequence used to initialize the ASP configuration. Write this sequence to the cache and directly to the registers to ensure that they match. A system-specific firmware can patch these registers to values that are not the silicon default, so that the CS35L56 boots already in the configuration used by Windows or by "driverless" Windows setups such as factory tuning. These may not match how Linux is configuring the HDA codec. And anyway on Linux the ALSA controls are used to configure routing options. Signed-off-by: Richard Fitzgerald Fixes: 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Link: https://msgid.link/r/20240129162737.497-10-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/pci/hda/cs35l56_hda.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index b61e1de8c4bf..f22bcb104a4e 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -30,14 +30,23 @@ * ASP1_RX_WL = 24 bits per sample * ASP1_TX_WL = 24 bits per sample * ASP1_RXn_EN 1..3 and ASP1_TXn_EN 1..4 disabled + * + * Override any Windows-specific mixer settings applied by the firmware. */ static const struct reg_sequence cs35l56_hda_dai_config[] = { { CS35L56_ASP1_CONTROL1, 0x00000021 }, { CS35L56_ASP1_CONTROL2, 0x20200200 }, { CS35L56_ASP1_CONTROL3, 0x00000003 }, + { CS35L56_ASP1_FRAME_CONTROL1, 0x03020100 }, + { CS35L56_ASP1_FRAME_CONTROL5, 0x00020100 }, { CS35L56_ASP1_DATA_CONTROL5, 0x00000018 }, { CS35L56_ASP1_DATA_CONTROL1, 0x00000018 }, { CS35L56_ASP1_ENABLES1, 0x00000000 }, + { CS35L56_ASP1TX1_INPUT, 0x00000018 }, + { CS35L56_ASP1TX2_INPUT, 0x00000019 }, + { CS35L56_ASP1TX3_INPUT, 0x00000020 }, + { CS35L56_ASP1TX4_INPUT, 0x00000028 }, + }; static void cs35l56_hda_play(struct cs35l56_hda *cs35l56) @@ -133,6 +142,10 @@ static int cs35l56_hda_runtime_resume(struct device *dev) } } + ret = cs35l56_force_sync_asp1_registers_from_cache(&cs35l56->base); + if (ret) + goto err; + return 0; err: @@ -976,6 +989,9 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int id) regmap_multi_reg_write(cs35l56->base.regmap, cs35l56_hda_dai_config, ARRAY_SIZE(cs35l56_hda_dai_config)); + ret = cs35l56_force_sync_asp1_registers_from_cache(&cs35l56->base); + if (ret) + goto err; /* * By default only enable one ASP1TXn, where n=amplifier index, From 07f7d6e7a124d3e4de36771e2a4926d0e31c2258 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:29 +0000 Subject: [PATCH 686/768] ASoC: cs35l56: Fix for initializing ASP1 mixer registers Defer initializing the state of the ASP1 mixer registers until the firmware has been downloaded and rebooted. On a SoundWire system the ASP is free for use as a chip-to-chip interconnect. This can be either for the firmware on multiple CS35L56 to share reference audio; or as a bridge to another device. If it is a firmware interconnect it is owned by the firmware and the Linux driver should avoid writing the registers. However, if it is a bridge then Linux may take over and handle it as a normal codec-to-codec link. Even if the ASP is used as a firmware-firmware interconnect it is useful to have ALSA controls for the ASP mixer. They are at least useful for debugging. CS35L56 is designed for SDCA and a generic SDCA driver would know nothing about these chip-specific registers. So if the ASP is being used on a SoundWire system the firmware sets up the ASP mixer registers. This means that we can't assume the default state of these registers. But we don't know the initial state that the firmware set them to until after the firmware has been downloaded and booted, which can take several seconds when downloading multiple amps. DAPM normally reads the initial state of mux registers during probe() but this would mean blocking probe() for several seconds until the firmware has initialized them. To avoid this, the mixer muxes are set SND_SOC_NOPM to prevent DAPM trying to read the register state. Custom get/set callbacks are implemented for ALSA control access, and these can safely block waiting for the firmware download. After the firmware download has completed, the state of the mux registers is known so a work job is queued to call snd_soc_dapm_mux_update_power() on each of the mux widgets. Backport note: This won't apply cleanly to kernels older than v6.6. Signed-off-by: Richard Fitzgerald Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Link: https://msgid.link/r/20240129162737.497-11-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56-shared.c | 7 +- sound/soc/codecs/cs35l56.c | 172 +++++++++++++++++++++++++++--- sound/soc/codecs/cs35l56.h | 1 + 3 files changed, 163 insertions(+), 17 deletions(-) diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index a812abf90836..9a70db0fa418 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -43,10 +43,9 @@ static const struct reg_default cs35l56_reg_defaults[] = { { CS35L56_ASP1_FRAME_CONTROL5, 0x00020100 }, { CS35L56_ASP1_DATA_CONTROL1, 0x00000018 }, { CS35L56_ASP1_DATA_CONTROL5, 0x00000018 }, - { CS35L56_ASP1TX1_INPUT, 0x00000018 }, - { CS35L56_ASP1TX2_INPUT, 0x00000019 }, - { CS35L56_ASP1TX3_INPUT, 0x00000020 }, - { CS35L56_ASP1TX4_INPUT, 0x00000028 }, + + /* no defaults for ASP1TX mixer */ + { CS35L56_SWIRE_DP3_CH1_INPUT, 0x00000018 }, { CS35L56_SWIRE_DP3_CH2_INPUT, 0x00000019 }, { CS35L56_SWIRE_DP3_CH3_INPUT, 0x00000029 }, diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 41aa79848b15..1b51650a19ff 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -59,6 +59,135 @@ static int cs35l56_dspwait_put_volsw(struct snd_kcontrol *kcontrol, return snd_soc_put_volsw(kcontrol, ucontrol); } +static const unsigned short cs35l56_asp1_mixer_regs[] = { + CS35L56_ASP1TX1_INPUT, CS35L56_ASP1TX2_INPUT, + CS35L56_ASP1TX3_INPUT, CS35L56_ASP1TX4_INPUT, +}; + +static const char * const cs35l56_asp1_mux_control_names[] = { + "ASP1 TX1 Source", "ASP1 TX2 Source", "ASP1 TX3 Source", "ASP1 TX4 Source" +}; + +static int cs35l56_dspwait_asp1tx_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = snd_soc_dapm_kcontrol_component(kcontrol); + struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component); + struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; + int index = e->shift_l; + unsigned int addr, val; + int ret; + + /* Wait for mux to be initialized */ + cs35l56_wait_dsp_ready(cs35l56); + flush_work(&cs35l56->mux_init_work); + + addr = cs35l56_asp1_mixer_regs[index]; + ret = regmap_read(cs35l56->base.regmap, addr, &val); + if (ret) + return ret; + + val &= CS35L56_ASP_TXn_SRC_MASK; + ucontrol->value.enumerated.item[0] = snd_soc_enum_val_to_item(e, val); + + return 0; +} + +static int cs35l56_dspwait_asp1tx_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = snd_soc_dapm_kcontrol_component(kcontrol); + struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_dapm(kcontrol); + struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component); + struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; + int item = ucontrol->value.enumerated.item[0]; + int index = e->shift_l; + unsigned int addr, val; + bool changed; + int ret; + + /* Wait for mux to be initialized */ + cs35l56_wait_dsp_ready(cs35l56); + flush_work(&cs35l56->mux_init_work); + + addr = cs35l56_asp1_mixer_regs[index]; + val = snd_soc_enum_item_to_val(e, item); + + ret = regmap_update_bits_check(cs35l56->base.regmap, addr, + CS35L56_ASP_TXn_SRC_MASK, val, &changed); + if (!ret) + return ret; + + if (changed) + snd_soc_dapm_mux_update_power(dapm, kcontrol, item, e, NULL); + + return changed; +} + +static void cs35l56_mark_asp1_mixer_widgets_dirty(struct cs35l56_private *cs35l56) +{ + struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(cs35l56->component); + const char *prefix = cs35l56->component->name_prefix; + char full_name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; + const char *name; + struct snd_kcontrol *kcontrol; + struct soc_enum *e; + unsigned int val[4]; + int i, item, ret; + + /* + * Resume so we can read the registers from silicon if the regmap + * cache has not yet been populated. + */ + ret = pm_runtime_resume_and_get(cs35l56->base.dev); + if (ret < 0) + return; + + ret = regmap_bulk_read(cs35l56->base.regmap, CS35L56_ASP1TX1_INPUT, + val, ARRAY_SIZE(val)); + + pm_runtime_mark_last_busy(cs35l56->base.dev); + pm_runtime_put_autosuspend(cs35l56->base.dev); + + if (ret) { + dev_err(cs35l56->base.dev, "Failed to read ASP1 mixer regs: %d\n", ret); + return; + } + + snd_soc_card_mutex_lock(dapm->card); + WARN_ON(!dapm->card->instantiated); + + for (i = 0; i < ARRAY_SIZE(cs35l56_asp1_mux_control_names); ++i) { + name = cs35l56_asp1_mux_control_names[i]; + + if (prefix) { + snprintf(full_name, sizeof(full_name), "%s %s", prefix, name); + name = full_name; + } + + kcontrol = snd_soc_card_get_kcontrol(dapm->card, name); + if (!kcontrol) { + dev_warn(cs35l56->base.dev, "Could not find control %s\n", name); + continue; + } + + e = (struct soc_enum *)kcontrol->private_value; + item = snd_soc_enum_val_to_item(e, val[i] & CS35L56_ASP_TXn_SRC_MASK); + snd_soc_dapm_mux_update_power(dapm, kcontrol, item, e, NULL); + } + + snd_soc_card_mutex_unlock(dapm->card); +} + +static void cs35l56_mux_init_work(struct work_struct *work) +{ + struct cs35l56_private *cs35l56 = container_of(work, + struct cs35l56_private, + mux_init_work); + + cs35l56_mark_asp1_mixer_widgets_dirty(cs35l56); +} + static DECLARE_TLV_DB_SCALE(vol_tlv, -10000, 25, 0); static const struct snd_kcontrol_new cs35l56_controls[] = { @@ -77,40 +206,44 @@ static const struct snd_kcontrol_new cs35l56_controls[] = { }; static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_asp1tx1_enum, - CS35L56_ASP1TX1_INPUT, - 0, CS35L56_ASP_TXn_SRC_MASK, + SND_SOC_NOPM, + 0, 0, cs35l56_tx_input_texts, cs35l56_tx_input_values); static const struct snd_kcontrol_new asp1_tx1_mux = - SOC_DAPM_ENUM("ASP1TX1 SRC", cs35l56_asp1tx1_enum); + SOC_DAPM_ENUM_EXT("ASP1TX1 SRC", cs35l56_asp1tx1_enum, + cs35l56_dspwait_asp1tx_get, cs35l56_dspwait_asp1tx_put); static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_asp1tx2_enum, - CS35L56_ASP1TX2_INPUT, - 0, CS35L56_ASP_TXn_SRC_MASK, + SND_SOC_NOPM, + 1, 0, cs35l56_tx_input_texts, cs35l56_tx_input_values); static const struct snd_kcontrol_new asp1_tx2_mux = - SOC_DAPM_ENUM("ASP1TX2 SRC", cs35l56_asp1tx2_enum); + SOC_DAPM_ENUM_EXT("ASP1TX2 SRC", cs35l56_asp1tx2_enum, + cs35l56_dspwait_asp1tx_get, cs35l56_dspwait_asp1tx_put); static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_asp1tx3_enum, - CS35L56_ASP1TX3_INPUT, - 0, CS35L56_ASP_TXn_SRC_MASK, + SND_SOC_NOPM, + 2, 0, cs35l56_tx_input_texts, cs35l56_tx_input_values); static const struct snd_kcontrol_new asp1_tx3_mux = - SOC_DAPM_ENUM("ASP1TX3 SRC", cs35l56_asp1tx3_enum); + SOC_DAPM_ENUM_EXT("ASP1TX3 SRC", cs35l56_asp1tx3_enum, + cs35l56_dspwait_asp1tx_get, cs35l56_dspwait_asp1tx_put); static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_asp1tx4_enum, - CS35L56_ASP1TX4_INPUT, - 0, CS35L56_ASP_TXn_SRC_MASK, + SND_SOC_NOPM, + 3, 0, cs35l56_tx_input_texts, cs35l56_tx_input_values); static const struct snd_kcontrol_new asp1_tx4_mux = - SOC_DAPM_ENUM("ASP1TX4 SRC", cs35l56_asp1tx4_enum); + SOC_DAPM_ENUM_EXT("ASP1TX4 SRC", cs35l56_asp1tx4_enum, + cs35l56_dspwait_asp1tx_get, cs35l56_dspwait_asp1tx_put); static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_sdw1tx1_enum, CS35L56_SWIRE_DP3_CH1_INPUT, @@ -785,6 +918,15 @@ static void cs35l56_dsp_work(struct work_struct *work) else cs35l56_patch(cs35l56); + + /* + * Set starting value of ASP1 mux widgets. Updating a mux takes + * the DAPM mutex. Post this to a separate job so that DAPM + * power-up can wait for dsp_work to complete without deadlocking + * on the DAPM mutex. + */ + queue_work(cs35l56->dsp_wq, &cs35l56->mux_init_work); + pm_runtime_mark_last_busy(cs35l56->base.dev); pm_runtime_put_autosuspend(cs35l56->base.dev); } @@ -830,6 +972,7 @@ static void cs35l56_component_remove(struct snd_soc_component *component) struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component); cancel_work_sync(&cs35l56->dsp_work); + cancel_work_sync(&cs35l56->mux_init_work); if (cs35l56->dsp.cs_dsp.booted) wm_adsp_power_down(&cs35l56->dsp); @@ -897,8 +1040,10 @@ int cs35l56_system_suspend(struct device *dev) dev_dbg(dev, "system_suspend\n"); - if (cs35l56->component) + if (cs35l56->component) { flush_work(&cs35l56->dsp_work); + cancel_work_sync(&cs35l56->mux_init_work); + } /* * The interrupt line is normally shared, but after we start suspending @@ -1049,6 +1194,7 @@ static int cs35l56_dsp_init(struct cs35l56_private *cs35l56) return -ENOMEM; INIT_WORK(&cs35l56->dsp_work, cs35l56_dsp_work); + INIT_WORK(&cs35l56->mux_init_work, cs35l56_mux_init_work); dsp = &cs35l56->dsp; cs35l56_init_cs_dsp(&cs35l56->base, &dsp->cs_dsp); diff --git a/sound/soc/codecs/cs35l56.h b/sound/soc/codecs/cs35l56.h index 8159c3e217d9..dc2fe4c91e67 100644 --- a/sound/soc/codecs/cs35l56.h +++ b/sound/soc/codecs/cs35l56.h @@ -34,6 +34,7 @@ struct cs35l56_private { struct wm_adsp dsp; /* must be first member */ struct cs35l56_base base; struct work_struct dsp_work; + struct work_struct mux_init_work; struct workqueue_struct *dsp_wq; struct snd_soc_component *component; struct regulator_bulk_data supplies[CS35L56_NUM_BULK_SUPPLIES]; From f6c967941c5d6fa526fdd64733a8d86bf2bfab31 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:30 +0000 Subject: [PATCH 687/768] ASoC: cs35l56: Fix misuse of wm_adsp 'part' string for silicon revision Put the silicon revision and secured flag in the wm_adsp fwf_name string instead of including them in the part string. This changes the format of the firmware name string from cs35l56[s]-rev-misc[-system_name] to cs35l56-rev[-s]-misc[-system_name] No firmware files have been published, so this doesn't cause a compatibility break. Silicon revision and secured flag are included in the firmware filename to pick a firmware compatible with the part. These strings were being added to the part string, but that is a misuse of the string. The correct place for these is the fwf_name string, which is specifically intended to select between multiple firmware files for the same part. Backport note: This won't apply to kernels older than v6.6. Signed-off-by: Richard Fitzgerald Fixes: 608f1b0dbdde ("ASoC: cs35l56: Move DSP part string generation so that it is done only once") Link: https://msgid.link/r/20240129162737.497-12-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 1b51650a19ff..8899c02c6dea 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -907,6 +907,18 @@ static void cs35l56_dsp_work(struct work_struct *work) pm_runtime_get_sync(cs35l56->base.dev); + /* Populate fw file qualifier with the revision and security state */ + if (!cs35l56->dsp.fwf_name) { + cs35l56->dsp.fwf_name = kasprintf(GFP_KERNEL, "%02x%s-dsp1", + cs35l56->base.rev, + cs35l56->base.secured ? "-s" : ""); + if (!cs35l56->dsp.fwf_name) + goto err; + } + + dev_dbg(cs35l56->base.dev, "DSP fwf name: '%s' system name: '%s'\n", + cs35l56->dsp.fwf_name, cs35l56->dsp.system_name); + /* * When the device is running in secure mode the firmware files can * only contain insecure tunings and therefore we do not need to @@ -926,7 +938,7 @@ static void cs35l56_dsp_work(struct work_struct *work) * on the DAPM mutex. */ queue_work(cs35l56->dsp_wq, &cs35l56->mux_init_work); - +err: pm_runtime_mark_last_busy(cs35l56->base.dev); pm_runtime_put_autosuspend(cs35l56->base.dev); } @@ -979,6 +991,9 @@ static void cs35l56_component_remove(struct snd_soc_component *component) wm_adsp2_component_remove(&cs35l56->dsp, component); + kfree(cs35l56->dsp.fwf_name); + cs35l56->dsp.fwf_name = NULL; + cs35l56->component = NULL; } @@ -1330,12 +1345,6 @@ int cs35l56_init(struct cs35l56_private *cs35l56) if (ret) return ret; - /* Populate the DSP information with the revision and security state */ - cs35l56->dsp.part = devm_kasprintf(cs35l56->base.dev, GFP_KERNEL, "cs35l56%s-%02x", - cs35l56->base.secured ? "s" : "", cs35l56->base.rev); - if (!cs35l56->dsp.part) - return -ENOMEM; - if (!cs35l56->base.reset_gpio) { dev_dbg(cs35l56->base.dev, "No reset gpio: using soft reset\n"); cs35l56->soft_resetting = true; From f4ef5149953f2fc04907ca5b34db3df667dcddef Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:31 +0000 Subject: [PATCH 688/768] ASoC: cs35l56: Firmware file must match the version of preloaded firmware Check during initialization whether the firmware is already patched. If so, include the firmware version in the wm_adsp fwf_name string. If the firmware has already been patched by the BIOS the driver can only replace it if it has control of hard RESET. If the driver cannot replace the firmware, it can still load a wmfw (for ALSA control definitions) and/or a bin (for additional tunings). But these must match the version of firmware that is running on the CS35L56. The firmware is pre-patched if FIRMWARE_MISSING == 0. Including the firmware version in the fwf_name string will qualify the firmware file name: Normal (unpatched or replaceable firmware): cs35l56-rev-dsp1-misc[-system_name].[wmfw|bin] Preloaded firmware: cs35l56-rev[-s]-VVVVVV-dsp1-misc[-system_name].[wmfw|bin] Where: [-s] is an optional -s added into the name for a secured CS35L56 VVVVVV is the 24-bit firmware version in hexadecimal. Signed-off-by: Richard Fitzgerald Fixes: 608f1b0dbdde ("ASoC: cs35l56: Move DSP part string generation so that it is done only once") Link: https://msgid.link/r/20240129162737.497-13-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 3 ++ sound/soc/codecs/cs35l56-shared.c | 36 +++++++++++++++++++-- sound/soc/codecs/cs35l56.c | 52 +++++++++++++++++-------------- 3 files changed, 65 insertions(+), 26 deletions(-) diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index 4db36c893d9d..5d6aefc41e64 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -75,6 +75,7 @@ #define CS35L56_DSP1_AHBM_WINDOW_DEBUG_0 0x25E2040 #define CS35L56_DSP1_AHBM_WINDOW_DEBUG_1 0x25E2044 #define CS35L56_DSP1_XMEM_UNPACKED24_0 0x2800000 +#define CS35L56_DSP1_FW_VER 0x2800010 #define CS35L56_DSP1_HALO_STATE_A1 0x2801E58 #define CS35L56_DSP1_HALO_STATE 0x28021E0 #define CS35L56_DSP1_PM_CUR_STATE_A1 0x2804000 @@ -285,6 +286,8 @@ int cs35l56_is_fw_reload_needed(struct cs35l56_base *cs35l56_base); int cs35l56_runtime_suspend_common(struct cs35l56_base *cs35l56_base); int cs35l56_runtime_resume_common(struct cs35l56_base *cs35l56_base, bool is_soundwire); void cs35l56_init_cs_dsp(struct cs35l56_base *cs35l56_base, struct cs_dsp *cs_dsp); +int cs35l56_read_prot_status(struct cs35l56_base *cs35l56_base, + bool *fw_missing, unsigned int *fw_version); int cs35l56_hw_init(struct cs35l56_base *cs35l56_base); int cs35l56_get_bclk_freq_id(unsigned int freq); void cs35l56_fill_supply_names(struct regulator_bulk_data *data); diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 9a70db0fa418..33835535ef84 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -628,10 +628,35 @@ void cs35l56_init_cs_dsp(struct cs35l56_base *cs35l56_base, struct cs_dsp *cs_ds } EXPORT_SYMBOL_NS_GPL(cs35l56_init_cs_dsp, SND_SOC_CS35L56_SHARED); +int cs35l56_read_prot_status(struct cs35l56_base *cs35l56_base, + bool *fw_missing, unsigned int *fw_version) +{ + unsigned int prot_status; + int ret; + + ret = regmap_read(cs35l56_base->regmap, CS35L56_PROTECTION_STATUS, &prot_status); + if (ret) { + dev_err(cs35l56_base->dev, "Get PROTECTION_STATUS failed: %d\n", ret); + return ret; + } + + *fw_missing = !!(prot_status & CS35L56_FIRMWARE_MISSING); + + ret = regmap_read(cs35l56_base->regmap, CS35L56_DSP1_FW_VER, fw_version); + if (ret) { + dev_err(cs35l56_base->dev, "Get FW VER failed: %d\n", ret); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cs35l56_read_prot_status, SND_SOC_CS35L56_SHARED); + int cs35l56_hw_init(struct cs35l56_base *cs35l56_base) { int ret; - unsigned int devid, revid, otpid, secured; + unsigned int devid, revid, otpid, secured, fw_ver; + bool fw_missing; /* * When the system is not using a reset_gpio ensure the device is @@ -690,8 +715,13 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base) return ret; } - dev_info(cs35l56_base->dev, "Cirrus Logic CS35L56%s Rev %02X OTP%d\n", - cs35l56_base->secured ? "s" : "", cs35l56_base->rev, otpid); + ret = cs35l56_read_prot_status(cs35l56_base, &fw_missing, &fw_ver); + if (ret) + return ret; + + dev_info(cs35l56_base->dev, "Cirrus Logic CS35L56%s Rev %02X OTP%d fw:%d.%d.%d (patched=%u)\n", + cs35l56_base->secured ? "s" : "", cs35l56_base->rev, otpid, + fw_ver >> 16, (fw_ver >> 8) & 0xff, fw_ver & 0xff, !fw_missing); /* Wake source and *_BLOCKED interrupts default to unmasked, so mask them */ regmap_write(cs35l56_base->regmap, CS35L56_IRQ1_MASK_20, 0xffffffff); diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 8899c02c6dea..597677422547 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -804,7 +804,7 @@ static struct snd_soc_dai_driver cs35l56_dai[] = { } }; -static void cs35l56_secure_patch(struct cs35l56_private *cs35l56) +static void cs35l56_reinit_patch(struct cs35l56_private *cs35l56) { int ret; @@ -816,19 +816,10 @@ static void cs35l56_secure_patch(struct cs35l56_private *cs35l56) cs35l56_mbox_send(&cs35l56->base, CS35L56_MBOX_CMD_AUDIO_REINIT); } -static void cs35l56_patch(struct cs35l56_private *cs35l56) +static void cs35l56_patch(struct cs35l56_private *cs35l56, bool firmware_missing) { - unsigned int firmware_missing; int ret; - ret = regmap_read(cs35l56->base.regmap, CS35L56_PROTECTION_STATUS, &firmware_missing); - if (ret) { - dev_err(cs35l56->base.dev, "Failed to read PROTECTION_STATUS: %d\n", ret); - return; - } - - firmware_missing &= CS35L56_FIRMWARE_MISSING; - /* * Disable SoundWire interrupts to prevent race with IRQ work. * Setting sdw_irq_no_unmask prevents the handler re-enabling @@ -901,34 +892,49 @@ static void cs35l56_dsp_work(struct work_struct *work) struct cs35l56_private *cs35l56 = container_of(work, struct cs35l56_private, dsp_work); + unsigned int firmware_version; + bool firmware_missing; + int ret; if (!cs35l56->base.init_done) return; pm_runtime_get_sync(cs35l56->base.dev); + ret = cs35l56_read_prot_status(&cs35l56->base, &firmware_missing, &firmware_version); + if (ret) + goto err; + /* Populate fw file qualifier with the revision and security state */ - if (!cs35l56->dsp.fwf_name) { - cs35l56->dsp.fwf_name = kasprintf(GFP_KERNEL, "%02x%s-dsp1", + kfree(cs35l56->dsp.fwf_name); + if (firmware_missing) { + cs35l56->dsp.fwf_name = kasprintf(GFP_KERNEL, "%02x-dsp1", cs35l56->base.rev); + } else { + /* Firmware files must match the running firmware version */ + cs35l56->dsp.fwf_name = kasprintf(GFP_KERNEL, + "%02x%s-%06x-dsp1", cs35l56->base.rev, - cs35l56->base.secured ? "-s" : ""); - if (!cs35l56->dsp.fwf_name) - goto err; + cs35l56->base.secured ? "-s" : "", + firmware_version); } + if (!cs35l56->dsp.fwf_name) + goto err; + dev_dbg(cs35l56->base.dev, "DSP fwf name: '%s' system name: '%s'\n", cs35l56->dsp.fwf_name, cs35l56->dsp.system_name); /* - * When the device is running in secure mode the firmware files can - * only contain insecure tunings and therefore we do not need to - * shutdown the firmware to apply them and can use the lower cost - * reinit sequence instead. + * The firmware cannot be patched if it is already running from + * patch RAM. In this case the firmware files are versioned to + * match the running firmware version and will only contain + * tunings. We do not need to shutdown the firmware to apply + * tunings so can use the lower cost reinit sequence instead. */ - if (cs35l56->base.secured) - cs35l56_secure_patch(cs35l56); + if (!firmware_missing) + cs35l56_reinit_patch(cs35l56); else - cs35l56_patch(cs35l56); + cs35l56_patch(cs35l56, firmware_missing); /* From 245eeff18d7a37693815250ae15979ce98c3d190 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:32 +0000 Subject: [PATCH 689/768] ASoC: cs35l56: Load tunings for the correct speaker models If the "spk-id-gpios" property is present it points to GPIOs whose value must be used to select the correct bin file to match the speakers. Some manufacturers use multiple sources of speakers, which need different tunings for best performance. On these models the type of speaker fitted is indicated by the values of one or more GPIOs. The number formed by the GPIOs identifies the tuning required. The speaker ID must be used in combination with the subsystem ID (either from PCI SSID or cirrus,firmware-uid property), because the GPIOs can only indicate variants of a specific model. Signed-off-by: Richard Fitzgerald Fixes: 1a1c3d794ef6 ("ASoC: cs35l56: Use PCI SSID as the firmware UID") Link: https://msgid.link/r/20240129162737.497-14-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 1 + sound/soc/codecs/cs35l56-shared.c | 36 +++++++++++++++++++++++++++++++ sound/soc/codecs/cs35l56.c | 32 ++++++++++++++++++++++----- sound/soc/codecs/cs35l56.h | 1 + 4 files changed, 65 insertions(+), 5 deletions(-) diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index 5d6aefc41e64..23da6298ab37 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -289,6 +289,7 @@ void cs35l56_init_cs_dsp(struct cs35l56_base *cs35l56_base, struct cs_dsp *cs_ds int cs35l56_read_prot_status(struct cs35l56_base *cs35l56_base, bool *fw_missing, unsigned int *fw_version); int cs35l56_hw_init(struct cs35l56_base *cs35l56_base); +int cs35l56_get_speaker_id(struct cs35l56_base *cs35l56_base); int cs35l56_get_bclk_freq_id(unsigned int freq); void cs35l56_fill_supply_names(struct regulator_bulk_data *data); diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 33835535ef84..02fba4bc0a14 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -5,6 +5,7 @@ // Copyright (C) 2023 Cirrus Logic, Inc. and // Cirrus Logic International Semiconductor Ltd. +#include #include #include #include @@ -736,6 +737,41 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base) } EXPORT_SYMBOL_NS_GPL(cs35l56_hw_init, SND_SOC_CS35L56_SHARED); +int cs35l56_get_speaker_id(struct cs35l56_base *cs35l56_base) +{ + struct gpio_descs *descs; + int speaker_id; + int i, ret; + + /* Read the speaker type qualifier from the motherboard GPIOs */ + descs = gpiod_get_array_optional(cs35l56_base->dev, "spk-id", GPIOD_IN); + if (!descs) { + return -ENOENT; + } else if (IS_ERR(descs)) { + ret = PTR_ERR(descs); + return dev_err_probe(cs35l56_base->dev, ret, "Failed to get spk-id-gpios\n"); + } + + speaker_id = 0; + for (i = 0; i < descs->ndescs; i++) { + ret = gpiod_get_value_cansleep(descs->desc[i]); + if (ret < 0) { + dev_err_probe(cs35l56_base->dev, ret, "Failed to read spk-id[%d]\n", i); + goto err; + } + + speaker_id |= (ret << i); + } + + dev_dbg(cs35l56_base->dev, "Speaker ID = %d\n", speaker_id); + ret = speaker_id; +err: + gpiod_put_array(descs); + + return ret; +} +EXPORT_SYMBOL_NS_GPL(cs35l56_get_speaker_id, SND_SOC_CS35L56_SHARED); + static const u32 cs35l56_bclk_valid_for_pll_freq_table[] = { [0x0C] = 128000, [0x0F] = 256000, diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 597677422547..c23e29da4cfb 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -959,10 +959,19 @@ static int cs35l56_component_probe(struct snd_soc_component *component) if (!cs35l56->dsp.system_name && (snd_soc_card_get_pci_ssid(component->card, &vendor, &device) == 0)) { - cs35l56->dsp.system_name = devm_kasprintf(cs35l56->base.dev, - GFP_KERNEL, - "%04x%04x", - vendor, device); + /* Append a speaker qualifier if there is a speaker ID */ + if (cs35l56->speaker_id >= 0) { + cs35l56->dsp.system_name = devm_kasprintf(cs35l56->base.dev, + GFP_KERNEL, + "%04x%04x-spkid%d", + vendor, device, + cs35l56->speaker_id); + } else { + cs35l56->dsp.system_name = devm_kasprintf(cs35l56->base.dev, + GFP_KERNEL, + "%04x%04x", + vendor, device); + } if (!cs35l56->dsp.system_name) return -ENOMEM; } @@ -1245,7 +1254,13 @@ static int cs35l56_get_firmware_uid(struct cs35l56_private *cs35l56) if (ret < 0) return 0; - cs35l56->dsp.system_name = devm_kstrdup(dev, prop, GFP_KERNEL); + /* Append a speaker qualifier if there is a speaker ID */ + if (cs35l56->speaker_id >= 0) + cs35l56->dsp.system_name = devm_kasprintf(dev, GFP_KERNEL, "%s-spkid%d", + prop, cs35l56->speaker_id); + else + cs35l56->dsp.system_name = devm_kstrdup(dev, prop, GFP_KERNEL); + if (cs35l56->dsp.system_name == NULL) return -ENOMEM; @@ -1260,6 +1275,7 @@ int cs35l56_common_probe(struct cs35l56_private *cs35l56) init_completion(&cs35l56->init_completion); mutex_init(&cs35l56->base.irq_lock); + cs35l56->speaker_id = -ENOENT; dev_set_drvdata(cs35l56->base.dev, cs35l56); @@ -1296,6 +1312,12 @@ int cs35l56_common_probe(struct cs35l56_private *cs35l56) gpiod_set_value_cansleep(cs35l56->base.reset_gpio, 1); } + ret = cs35l56_get_speaker_id(&cs35l56->base); + if ((ret < 0) && (ret != -ENOENT)) + goto err; + + cs35l56->speaker_id = ret; + ret = cs35l56_get_firmware_uid(cs35l56); if (ret != 0) goto err; diff --git a/sound/soc/codecs/cs35l56.h b/sound/soc/codecs/cs35l56.h index dc2fe4c91e67..596b141e3f96 100644 --- a/sound/soc/codecs/cs35l56.h +++ b/sound/soc/codecs/cs35l56.h @@ -45,6 +45,7 @@ struct cs35l56_private { bool sdw_attached; struct completion init_completion; + int speaker_id; u32 rx_mask; u32 tx_mask; u8 asp_slot_width; From 9e92b77ceb6f362eb2e7995dad6c7f9863053d97 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:33 +0000 Subject: [PATCH 690/768] ASoC: cs35l56: Allow more time for firmware to boot The original 50ms timeout for firmware boot is not long enough for worst-case time to reboot after a firmware download. Increase the timeout to 250ms. Signed-off-by: Richard Fitzgerald Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Link: https://msgid.link/r/20240129162737.497-15-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index 23da6298ab37..b24716ab2750 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -242,7 +242,7 @@ #define CS35L56_CONTROL_PORT_READY_US 2200 #define CS35L56_HALO_STATE_POLL_US 1000 -#define CS35L56_HALO_STATE_TIMEOUT_US 50000 +#define CS35L56_HALO_STATE_TIMEOUT_US 250000 #define CS35L56_RESET_PULSE_MIN_US 1100 #define CS35L56_WAKE_HOLD_TIME_US 1000 From 77c60722ded7d6739805e045e9648cda82dde5ed Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:34 +0000 Subject: [PATCH 691/768] ALSA: hda: cs35l56: Fix order of searching for firmware files Check for the cases of system-specific bin file without a wmfw before falling back to looking for a generic wmfw. All system-specific options should be tried before falling back to loading a generic wmfw/bin. With the original code, the presence of a fallback generic wmfw on the filesystem would prevent using a system-specific tuning with a ROM firmware. Signed-off-by: Richard Fitzgerald Fixes: 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Link: https://msgid.link/r/20240129162737.497-16-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/pci/hda/cs35l56_hda.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index f22bcb104a4e..7ba7234d8d9c 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -483,6 +483,20 @@ static void cs35l56_hda_request_firmware_files(struct cs35l56_hda *cs35l56, NULL, "bin"); return; } + + /* + * Check for system-specific bin files without wmfw before + * falling back to generic firmware + */ + if (amp_name) + cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, + cirrus_dir, system_name, amp_name, "bin"); + if (!*coeff_firmware) + cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, + cirrus_dir, system_name, NULL, "bin"); + + if (*coeff_firmware) + return; } ret = cs35l56_hda_request_firmware_file(cs35l56, wmfw_firmware, wmfw_filename, @@ -493,16 +507,6 @@ static void cs35l56_hda_request_firmware_files(struct cs35l56_hda *cs35l56, return; } - /* When a firmware file is not found must still search for the coeff files */ - if (system_name) { - if (amp_name) - cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, - cirrus_dir, system_name, amp_name, "bin"); - if (!*coeff_firmware) - cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, - cirrus_dir, system_name, NULL, "bin"); - } - if (!*coeff_firmware) cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, cirrus_dir, NULL, NULL, "bin"); From e82bc517c6ef5d5c04b845420406e694c31bdb8a Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:35 +0000 Subject: [PATCH 692/768] ALSA: hda: cs35l56: Fix filename string field layout Change the filename field layout to: cs35l56-rev[-s]-dsp1-misc[-sub].[wmfw|bin] This is to keep the same firmware file naming scheme as the CS35L56 ASoC driver. This is not a compatibility break because no firmware files have been published. The original field layout matched the ASoC driver, but the way the ASoC driver used the wm_adsp driver config to form this filename was bugged. Fixing the ASoC driver to use the correct wm_adsp config strings means that the 's' flag (to indicate a secured part) has to move to somewhere after the first '-'. Signed-off-by: Richard Fitzgerald Fixes: 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Link: https://msgid.link/r/20240129162737.497-17-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/pci/hda/cs35l56_hda.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 7ba7234d8d9c..081479f65fe7 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -405,16 +405,19 @@ static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56, int ret = 0; if (system_name && amp_name) - *filename = kasprintf(GFP_KERNEL, "%scs35l56%s-%02x-dsp1-misc-%s-%s.%s", dir, - cs35l56->base.secured ? "s" : "", cs35l56->base.rev, + *filename = kasprintf(GFP_KERNEL, "%scs35l56-%02x%s-dsp1-misc-%s-%s.%s", dir, + cs35l56->base.rev, + cs35l56->base.secured ? "-s" : "", system_name, amp_name, filetype); else if (system_name) - *filename = kasprintf(GFP_KERNEL, "%scs35l56%s-%02x-dsp1-misc-%s.%s", dir, - cs35l56->base.secured ? "s" : "", cs35l56->base.rev, + *filename = kasprintf(GFP_KERNEL, "%scs35l56-%02x%s-dsp1-misc-%s.%s", dir, + cs35l56->base.rev, + cs35l56->base.secured ? "-s" : "", system_name, filetype); else - *filename = kasprintf(GFP_KERNEL, "%scs35l56%s-%02x-dsp1-misc.%s", dir, - cs35l56->base.secured ? "s" : "", cs35l56->base.rev, + *filename = kasprintf(GFP_KERNEL, "%scs35l56-%02x%s-dsp1-misc.%s", dir, + cs35l56->base.rev, + cs35l56->base.secured ? "-s" : "", filetype); if (!*filename) From 6f8ad0480d82245961dae4d3280908611633872d Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:36 +0000 Subject: [PATCH 693/768] ALSA: hda: cs35l56: Firmware file must match the version of preloaded firmware Check whether the firmware is already patched. If so, include the firmware version in the firmware file name. If the firmware has already been patched by the BIOS the driver can only replace it if it has control of hard RESET. If the driver cannot replace the firmware, it can still load a wmfw (for ALSA control definitions) and/or a bin (for additional tunings). But these must match the version of firmware that is running on the CS35L56. The firmware is pre-patched if either: - FIRMWARE_MISSING == 0, or - it is a secured CS35L56 (which implies that is was already patched), cs35l56_hw_init() will set preloaded_fw_ver to the (non-zero) firmware version if either of these conditions is true. Normal (unpatched or replaceable firmware): cs35l56-rev-dsp1-misc[-system_name].[wmfw|bin] Preloaded firmware: cs35l56-rev[-s]-VVVVVV-dsp1-misc[-system_name].[wmfw|bin] Where: [-s] is an optional -s added into the name for a secured CS35L56 VVVVVV is the 24-bit firmware version in hexadecimal. Backport note: This won't apply to kernel versions older than v6.6. Signed-off-by: Richard Fitzgerald Fixes: 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Link: https://msgid.link/r/20240129162737.497-18-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/pci/hda/cs35l56_hda.c | 95 +++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 45 deletions(-) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 081479f65fe7..32736d3e45ba 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -397,7 +397,7 @@ static const struct cs_dsp_client_ops cs35l56_hda_client_ops = { static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56, const struct firmware **firmware, char **filename, - const char *dir, const char *system_name, + const char *base_name, const char *system_name, const char *amp_name, const char *filetype) { @@ -405,20 +405,13 @@ static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56, int ret = 0; if (system_name && amp_name) - *filename = kasprintf(GFP_KERNEL, "%scs35l56-%02x%s-dsp1-misc-%s-%s.%s", dir, - cs35l56->base.rev, - cs35l56->base.secured ? "-s" : "", + *filename = kasprintf(GFP_KERNEL, "%s-%s-%s.%s", base_name, system_name, amp_name, filetype); else if (system_name) - *filename = kasprintf(GFP_KERNEL, "%scs35l56-%02x%s-dsp1-misc-%s.%s", dir, - cs35l56->base.rev, - cs35l56->base.secured ? "-s" : "", + *filename = kasprintf(GFP_KERNEL, "%s-%s.%s", base_name, system_name, filetype); else - *filename = kasprintf(GFP_KERNEL, "%scs35l56-%02x%s-dsp1-misc.%s", dir, - cs35l56->base.rev, - cs35l56->base.secured ? "-s" : "", - filetype); + *filename = kasprintf(GFP_KERNEL, "%s.%s", base_name, filetype); if (!*filename) return -ENOMEM; @@ -451,8 +444,8 @@ static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56, return 0; } -static const char cirrus_dir[] = "cirrus/"; static void cs35l56_hda_request_firmware_files(struct cs35l56_hda *cs35l56, + unsigned int preloaded_fw_ver, const struct firmware **wmfw_firmware, char **wmfw_filename, const struct firmware **coeff_firmware, @@ -460,29 +453,43 @@ static void cs35l56_hda_request_firmware_files(struct cs35l56_hda *cs35l56, { const char *system_name = cs35l56->system_name; const char *amp_name = cs35l56->amp_name; + char base_name[37]; int ret; + if (preloaded_fw_ver) { + snprintf(base_name, sizeof(base_name), + "cirrus/cs35l56-%02x%s-%06x-dsp1-misc", + cs35l56->base.rev, + cs35l56->base.secured ? "-s" : "", + preloaded_fw_ver & 0xffffff); + } else { + snprintf(base_name, sizeof(base_name), + "cirrus/cs35l56-%02x%s-dsp1-misc", + cs35l56->base.rev, + cs35l56->base.secured ? "-s" : ""); + } + if (system_name && amp_name) { if (!cs35l56_hda_request_firmware_file(cs35l56, wmfw_firmware, wmfw_filename, - cirrus_dir, system_name, amp_name, "wmfw")) { + base_name, system_name, amp_name, "wmfw")) { cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, - cirrus_dir, system_name, amp_name, "bin"); + base_name, system_name, amp_name, "bin"); return; } } if (system_name) { if (!cs35l56_hda_request_firmware_file(cs35l56, wmfw_firmware, wmfw_filename, - cirrus_dir, system_name, NULL, "wmfw")) { + base_name, system_name, NULL, "wmfw")) { if (amp_name) cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, - cirrus_dir, system_name, + base_name, system_name, amp_name, "bin"); if (!*coeff_firmware) cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, - cirrus_dir, system_name, + base_name, system_name, NULL, "bin"); return; } @@ -493,26 +500,26 @@ static void cs35l56_hda_request_firmware_files(struct cs35l56_hda *cs35l56, */ if (amp_name) cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, - cirrus_dir, system_name, amp_name, "bin"); + base_name, system_name, amp_name, "bin"); if (!*coeff_firmware) cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, - cirrus_dir, system_name, NULL, "bin"); + base_name, system_name, NULL, "bin"); if (*coeff_firmware) return; } ret = cs35l56_hda_request_firmware_file(cs35l56, wmfw_firmware, wmfw_filename, - cirrus_dir, NULL, NULL, "wmfw"); + base_name, NULL, NULL, "wmfw"); if (!ret) { cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, - cirrus_dir, NULL, NULL, "bin"); + base_name, NULL, NULL, "bin"); return; } if (!*coeff_firmware) cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, - cirrus_dir, NULL, NULL, "bin"); + base_name, NULL, NULL, "bin"); } static void cs35l56_hda_release_firmware_files(const struct firmware *wmfw_firmware, @@ -546,7 +553,8 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56) const struct firmware *wmfw_firmware = NULL; char *coeff_filename = NULL; char *wmfw_filename = NULL; - unsigned int firmware_missing; + unsigned int preloaded_fw_ver; + bool firmware_missing; int ret = 0; /* Prepare for a new DSP power-up */ @@ -557,24 +565,21 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56) pm_runtime_get_sync(cs35l56->base.dev); - ret = regmap_read(cs35l56->base.regmap, CS35L56_PROTECTION_STATUS, &firmware_missing); - if (ret) { - dev_err(cs35l56->base.dev, "Failed to read PROTECTION_STATUS: %d\n", ret); - goto err_pm_put; - } - - firmware_missing &= CS35L56_FIRMWARE_MISSING; - /* - * Firmware can only be downloaded if the CS35L56 is secured or is - * running from the built-in ROM. If it is secured the BIOS will have - * downloaded firmware, and the wmfw/bin files will only contain - * tunings that are safe to download with the firmware running. + * The firmware can only be upgraded if it is currently running + * from the built-in ROM. If not, the wmfw/bin must be for the + * version of firmware that is running on the chip. */ - if (cs35l56->base.secured || firmware_missing) { - cs35l56_hda_request_firmware_files(cs35l56, &wmfw_firmware, &wmfw_filename, - &coeff_firmware, &coeff_filename); - } + ret = cs35l56_read_prot_status(&cs35l56->base, &firmware_missing, &preloaded_fw_ver); + if (ret) + goto err_pm_put; + + if (firmware_missing) + preloaded_fw_ver = 0; + + cs35l56_hda_request_firmware_files(cs35l56, preloaded_fw_ver, + &wmfw_firmware, &wmfw_filename, + &coeff_firmware, &coeff_filename); /* * If the BIOS didn't patch the firmware a bin file is mandatory to @@ -589,12 +594,12 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56) mutex_lock(&cs35l56->base.irq_lock); /* - * When the device is running in secure mode the firmware files can - * only contain insecure tunings and therefore we do not need to - * shutdown the firmware to apply them and can use the lower cost - * reinit sequence instead. + * If the firmware hasn't been patched it must be shutdown before + * doing a full patch and reset afterwards. If it is already + * running a patched version the firmware files only contain + * tunings and we can use the lower cost reinit sequence instead. */ - if (!cs35l56->base.secured && (wmfw_firmware || coeff_firmware)) { + if (firmware_missing && (wmfw_firmware || coeff_firmware)) { ret = cs35l56_firmware_shutdown(&cs35l56->base); if (ret) goto err; @@ -613,7 +618,7 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56) if (coeff_filename) dev_dbg(cs35l56->base.dev, "Loaded Coefficients: %s\n", coeff_filename); - if (cs35l56->base.secured) { + if (!firmware_missing) { ret = cs35l56_mbox_send(&cs35l56->base, CS35L56_MBOX_CMD_AUDIO_REINIT); if (ret) goto err_powered_up; From 28876c1ae8b8cd1dacef50bd6c0555824774f0d2 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:37 +0000 Subject: [PATCH 694/768] ALSA: hda: cs35l56: Remove unused test stub function Remove an unused stub function that calls a non-existant function. This function was accidentally added as part of commit 2144833e7b41 ("ALSA: hda: cirrus_scodec: Add KUnit test"). It was a relic of an earlier version of the test that should have been removed. Signed-off-by: Richard Fitzgerald Fixes: 2144833e7b41 ("ALSA: hda: cirrus_scodec: Add KUnit test") Link: https://msgid.link/r/20240129162737.497-19-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/pci/hda/cs35l56_hda.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 32736d3e45ba..75a14ba54fcd 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -1063,16 +1063,6 @@ const struct dev_pm_ops cs35l56_hda_pm_ops = { }; EXPORT_SYMBOL_NS_GPL(cs35l56_hda_pm_ops, SND_HDA_SCODEC_CS35L56); -#if IS_ENABLED(CONFIG_SND_HDA_SCODEC_CS35L56_KUNIT_TEST) -/* Hooks to export static function to KUnit test */ - -int cs35l56_hda_test_hook_get_speaker_id(struct device *dev, int amp_index, int num_amps) -{ - return cs35l56_hda_get_speaker_id(dev, amp_index, num_amps); -} -EXPORT_SYMBOL_NS_GPL(cs35l56_hda_test_hook_get_speaker_id, SND_HDA_SCODEC_CS35L56); -#endif - MODULE_DESCRIPTION("CS35L56 HDA Driver"); MODULE_IMPORT_NS(SND_HDA_CIRRUS_SCODEC); MODULE_IMPORT_NS(SND_HDA_CS_DSP_CONTROLS); From c7de2d9bb68a5fc71c25ff96705a80a76c8436eb Mon Sep 17 00:00:00 2001 From: Edson Juliano Drosdeck Date: Thu, 1 Feb 2024 09:21:14 -0300 Subject: [PATCH 695/768] ALSA: hda/realtek: Enable headset mic on Vaio VJFE-ADL Vaio VJFE-ADL is equipped with ALC269VC, and it needs ALC298_FIXUP_SPK_VOLUME quirk to make its headset mic work. Signed-off-by: Edson Juliano Drosdeck Cc: Link: https://lore.kernel.org/r/20240201122114.30080-1-edson.drosdeck@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f98520c0bed9..6994c4c5073c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10331,6 +10331,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO), + SND_PCI_QUIRK(0x2782, 0x1707, "Vaio VJFE-ADL", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC), SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10), From 72bd80252feeb3bef8724230ee15d9f7ab541c6e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 1 Feb 2024 06:42:36 -0700 Subject: [PATCH 696/768] io_uring/net: fix sr->len for IORING_OP_RECV with MSG_WAITALL and buffers If we use IORING_OP_RECV with provided buffers and pass in '0' as the length of the request, the length is retrieved from the selected buffer. If MSG_WAITALL is also set and we get a short receive, then we may hit the retry path which decrements sr->len and increments the buffer for a retry. However, the length is still zero at this point, which means that sr->len now becomes huge and import_ubuf() will cap it to MAX_RW_COUNT and subsequently return -EFAULT for the range as a whole. Fix this by always assigning sr->len once the buffer has been selected. Cc: stable@vger.kernel.org Fixes: 7ba89d2af17a ("io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly") Signed-off-by: Jens Axboe --- io_uring/net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/net.c b/io_uring/net.c index a12ff69e6843..43bc9a5f96f9 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -923,6 +923,7 @@ retry_multishot: if (!buf) return -ENOBUFS; sr->buf = buf; + sr->len = len; } ret = import_ubuf(ITER_DEST, sr->buf, len, &msg.msg_iter); From 6813cdca4ab94a238f8eb0cef3d3f3fcbdfb0ee0 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Wed, 31 Jan 2024 10:19:20 +0800 Subject: [PATCH 697/768] drm/amdgpu/pm: Use inline function for IP version check Use existing inline function for IP version check. Signed-off-by: Ma Jun Reviewed-by: Yang Wang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 3230701d0d38..a9954ffc02c5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2944,7 +2944,7 @@ static bool smu_v13_0_0_wbrf_support_check(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - switch (adev->ip_versions[MP1_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(13, 0, 0): return smu->smc_fw_version >= 0x004e6300; case IP_VERSION(13, 0, 10): From 04f647c8e456fcfabe9c252a4dcaee03b586fa4f Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Tue, 30 Jan 2024 17:36:10 +0530 Subject: [PATCH 698/768] octeontx2-pf: Remove xdp queues on program detach XDP queues are created/destroyed when a XDP program is attached/detached. In current driver xdp_queues are not getting destroyed on program exit due to incorrect xdp_queue and tot_tx_queue count values. This patch fixes the issue by setting tot_tx_queue and xdp_queue count to correct values. It also fixes xdp.data_hard_start address. Fixes: 06059a1a9a4a ("octeontx2-pf: Add XDP support to netdev PF") Signed-off-by: Geetha sowjanya Link: https://lore.kernel.org/r/20240130120610.16673-1-gakula@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c | 1 - drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 3 +-- drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 7 +++---- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 2928898c7f8d..7f786de61014 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -314,7 +314,6 @@ static int otx2_set_channels(struct net_device *dev, pfvf->hw.tx_queues = channel->tx_count; if (pfvf->xdp_prog) pfvf->hw.xdp_queues = channel->rx_count; - pfvf->hw.non_qos_queues = pfvf->hw.tx_queues + pfvf->hw.xdp_queues; if (if_up) err = dev->netdev_ops->ndo_open(dev); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index a57455aebff6..e5fe67e73865 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1744,6 +1744,7 @@ int otx2_open(struct net_device *netdev) /* RQ and SQs are mapped to different CQs, * so find out max CQ IRQs (i.e CINTs) needed. */ + pf->hw.non_qos_queues = pf->hw.tx_queues + pf->hw.xdp_queues; pf->hw.cint_cnt = max3(pf->hw.rx_queues, pf->hw.tx_queues, pf->hw.tc_tx_queues); @@ -2643,8 +2644,6 @@ static int otx2_xdp_setup(struct otx2_nic *pf, struct bpf_prog *prog) xdp_features_clear_redirect_target(dev); } - pf->hw.non_qos_queues += pf->hw.xdp_queues; - if (if_up) otx2_open(pf->netdev); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 4d519ea833b2..f828d32737af 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -1403,7 +1403,7 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, bool *need_xdp_flush) { - unsigned char *hard_start, *data; + unsigned char *hard_start; int qidx = cq->cq_idx; struct xdp_buff xdp; struct page *page; @@ -1417,9 +1417,8 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf, xdp_init_buff(&xdp, pfvf->rbsize, &cq->xdp_rxq); - data = (unsigned char *)phys_to_virt(pa); - hard_start = page_address(page); - xdp_prepare_buff(&xdp, hard_start, data - hard_start, + hard_start = (unsigned char *)phys_to_virt(pa); + xdp_prepare_buff(&xdp, hard_start, OTX2_HEAD_ROOM, cqe->sg.seg_size, false); act = bpf_prog_run_xdp(prog, &xdp); From eaa1b01fe709d6a236a9cec74813e0400601fd23 Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Thu, 1 Feb 2024 14:53:08 +0300 Subject: [PATCH 699/768] ALSA: usb-audio: Ignore clock selector errors for single connection For devices with multiple clock sources connected to a selector, we need to check what a clock selector control request has returned. This is needed to ensure that a requested clock source is indeed selected and for autoclock feature to work. For devices with single clock source connected, if we get an error there is nothing else we can do about it. We can't skip clock selector setup as it is required by some devices. So lets just ignore error in this case. This should fix various buggy Mackie devices: [ 649.109785] usb 1-1.3: parse_audio_format_rates_v2v3(): unable to find clock source (clock -32) [ 649.111946] usb 1-1.3: parse_audio_format_rates_v2v3(): unable to find clock source (clock -32) [ 649.113822] usb 1-1.3: parse_audio_format_rates_v2v3(): unable to find clock source (clock -32) There is also interesting info from the Windows documentation [1] (this is probably why manufacturers dont't even test this feature): "The USB Audio 2.0 driver doesn't support clock selection. The driver uses the Clock Source Entity, which is selected by default and never issues a Clock Selector Control SET CUR request." Link: https://learn.microsoft.com/en-us/windows-hardware/drivers/audio/usb-2-0-audio-drivers [1] Link: https://bugzilla.kernel.org/show_bug.cgi?id=217314 Link: https://bugzilla.kernel.org/show_bug.cgi?id=218175 Link: https://bugzilla.kernel.org/show_bug.cgi?id=218342 Signed-off-by: Alexander Tsoy Link: https://lore.kernel.org/r/20240201115308.17838-1-alexander@tsoy.me Signed-off-by: Takashi Iwai --- sound/usb/clock.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/usb/clock.c b/sound/usb/clock.c index a8204c6d6fac..60fcb872a80b 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -347,8 +347,16 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, !writeable) return ret; err = uac_clock_selector_set_val(chip, entity_id, cur); - if (err < 0) + if (err < 0) { + if (pins == 1) { + usb_audio_dbg(chip, + "%s(): selector returned an error, " + "assuming a firmware bug, id %d, ret %d\n", + __func__, clock_id, err); + return ret; + } return err; + } } if (!validate || ret > 0 || !chip->autoclock) From 49304c2b93e4f7468b51ef717cbe637981397115 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 31 Jan 2024 23:32:27 -0500 Subject: [PATCH 700/768] tracefs: dentry lookup crapectomy The dentry lookup for eventfs files was very broken, and had lots of signs of the old situation where the filesystem names were all created statically in the dentry tree, rather than being looked up dynamically based on the eventfs data structures. You could see it in the naming - how it claimed to "create" dentries rather than just look up the dentries that were given it. You could see it in various nonsensical and very incorrect operations, like using "simple_lookup()" on the dentries that were passed in, which only results in those dentries becoming negative dentries. Which meant that any other lookup would possibly return ENOENT if it saw that negative dentry before the data was then later filled in. You could see it in the immense amount of nonsensical code that didn't actually just do lookups. Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/ Link: https://lore.kernel.org/linux-trace-kernel/20240131233227.73db55e1@gandalf.local.home Cc: stable@vger.kernel.org Cc: Al Viro Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Greg Kroah-Hartman Cc: Ajay Kaher Cc: Mark Rutland Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions") Signed-off-by: Linus Torvalds Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 275 +++++++-------------------------------- fs/tracefs/inode.c | 69 ---------- fs/tracefs/internal.h | 3 - 3 files changed, 50 insertions(+), 297 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index e9819d719d2a..04c2ab90f93e 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -230,7 +230,6 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) { struct eventfs_inode *ei; - mutex_lock(&eventfs_mutex); do { // The parent is stable because we do not do renames dentry = dentry->d_parent; @@ -247,7 +246,6 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) } // Walk upwards until you find the events inode } while (!ei->is_events); - mutex_unlock(&eventfs_mutex); update_top_events_attr(ei, dentry->d_sb); @@ -280,11 +278,10 @@ static void update_inode_attr(struct dentry *dentry, struct inode *inode, } /** - * create_file - create a file in the tracefs filesystem - * @name: the name of the file to create. + * lookup_file - look up a file in the tracefs filesystem + * @dentry: the dentry to look up * @mode: the permission that the file should have. * @attr: saved attributes changed by user - * @parent: parent dentry for this file. * @data: something that the caller will want to get to later on. * @fop: struct file_operations that should be used for this file. * @@ -292,13 +289,13 @@ static void update_inode_attr(struct dentry *dentry, struct inode *inode, * directory. The inode.i_private pointer will point to @data in the open() * call. */ -static struct dentry *create_file(const char *name, umode_t mode, +static struct dentry *lookup_file(struct dentry *dentry, + umode_t mode, struct eventfs_attr *attr, - struct dentry *parent, void *data, + void *data, const struct file_operations *fop) { struct tracefs_inode *ti; - struct dentry *dentry; struct inode *inode; if (!(mode & S_IFMT)) @@ -307,15 +304,9 @@ static struct dentry *create_file(const char *name, umode_t mode, if (WARN_ON_ONCE(!S_ISREG(mode))) return NULL; - WARN_ON_ONCE(!parent); - dentry = eventfs_start_creating(name, parent); - - if (IS_ERR(dentry)) - return dentry; - inode = tracefs_get_inode(dentry->d_sb); if (unlikely(!inode)) - return eventfs_failed_creating(dentry); + return ERR_PTR(-ENOMEM); /* If the user updated the directory's attributes, use them */ update_inode_attr(dentry, inode, attr, mode); @@ -329,32 +320,29 @@ static struct dentry *create_file(const char *name, umode_t mode, ti = get_tracefs(inode); ti->flags |= TRACEFS_EVENT_INODE; - d_instantiate(dentry, inode); + + d_add(dentry, inode); fsnotify_create(dentry->d_parent->d_inode, dentry); - return eventfs_end_creating(dentry); + return dentry; }; /** - * create_dir - create a dir in the tracefs filesystem + * lookup_dir_entry - look up a dir in the tracefs filesystem + * @dentry: the directory to look up * @ei: the eventfs_inode that represents the directory to create - * @parent: parent dentry for this file. * - * This function will create a dentry for a directory represented by + * This function will look up a dentry for a directory represented by * a eventfs_inode. */ -static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent) +static struct dentry *lookup_dir_entry(struct dentry *dentry, + struct eventfs_inode *pei, struct eventfs_inode *ei) { struct tracefs_inode *ti; - struct dentry *dentry; struct inode *inode; - dentry = eventfs_start_creating(ei->name, parent); - if (IS_ERR(dentry)) - return dentry; - inode = tracefs_get_inode(dentry->d_sb); if (unlikely(!inode)) - return eventfs_failed_creating(dentry); + return ERR_PTR(-ENOMEM); /* If the user updated the directory's attributes, use them */ update_inode_attr(dentry, inode, &ei->attr, @@ -371,11 +359,14 @@ static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent /* Only directories have ti->private set to an ei, not files */ ti->private = ei; + dentry->d_fsdata = ei; + ei->dentry = dentry; // Remove me! + inc_nlink(inode); - d_instantiate(dentry, inode); + d_add(dentry, inode); inc_nlink(dentry->d_parent->d_inode); fsnotify_mkdir(dentry->d_parent->d_inode, dentry); - return eventfs_end_creating(dentry); + return dentry; } static void free_ei(struct eventfs_inode *ei) @@ -425,7 +416,7 @@ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry) } /** - * create_file_dentry - create a dentry for a file of an eventfs_inode + * lookup_file_dentry - create a dentry for a file of an eventfs_inode * @ei: the eventfs_inode that the file will be created under * @idx: the index into the d_children[] of the @ei * @parent: The parent dentry of the created file. @@ -438,157 +429,21 @@ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry) * address located at @e_dentry. */ static struct dentry * -create_file_dentry(struct eventfs_inode *ei, int idx, - struct dentry *parent, const char *name, umode_t mode, void *data, +lookup_file_dentry(struct dentry *dentry, + struct eventfs_inode *ei, int idx, + umode_t mode, void *data, const struct file_operations *fops) { struct eventfs_attr *attr = NULL; struct dentry **e_dentry = &ei->d_children[idx]; - struct dentry *dentry; - WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); - - mutex_lock(&eventfs_mutex); - if (ei->is_freed) { - mutex_unlock(&eventfs_mutex); - return NULL; - } - /* If the e_dentry already has a dentry, use it */ - if (*e_dentry) { - dget(*e_dentry); - mutex_unlock(&eventfs_mutex); - return *e_dentry; - } - - /* ei->entry_attrs are protected by SRCU */ if (ei->entry_attrs) attr = &ei->entry_attrs[idx]; - mutex_unlock(&eventfs_mutex); + dentry->d_fsdata = ei; // NOTE: ei of _parent_ + lookup_file(dentry, mode, attr, data, fops); - dentry = create_file(name, mode, attr, parent, data, fops); - - mutex_lock(&eventfs_mutex); - - if (IS_ERR_OR_NULL(dentry)) { - /* - * When the mutex was released, something else could have - * created the dentry for this e_dentry. In which case - * use that one. - * - * If ei->is_freed is set, the e_dentry is currently on its - * way to being freed, don't return it. If e_dentry is NULL - * it means it was already freed. - */ - if (ei->is_freed) { - dentry = NULL; - } else { - dentry = *e_dentry; - dget(dentry); - } - mutex_unlock(&eventfs_mutex); - return dentry; - } - - if (!*e_dentry && !ei->is_freed) { - *e_dentry = dentry; - dentry->d_fsdata = ei; - } else { - /* - * Should never happen unless we get here due to being freed. - * Otherwise it means two dentries exist with the same name. - */ - WARN_ON_ONCE(!ei->is_freed); - dentry = NULL; - } - mutex_unlock(&eventfs_mutex); - - return dentry; -} - -/** - * eventfs_post_create_dir - post create dir routine - * @ei: eventfs_inode of recently created dir - * - * Map the meta-data of files within an eventfs dir to their parent dentry - */ -static void eventfs_post_create_dir(struct eventfs_inode *ei) -{ - struct eventfs_inode *ei_child; - - lockdep_assert_held(&eventfs_mutex); - - /* srcu lock already held */ - /* fill parent-child relation */ - list_for_each_entry_srcu(ei_child, &ei->children, list, - srcu_read_lock_held(&eventfs_srcu)) { - ei_child->d_parent = ei->dentry; - } -} - -/** - * create_dir_dentry - Create a directory dentry for the eventfs_inode - * @pei: The eventfs_inode parent of ei. - * @ei: The eventfs_inode to create the directory for - * @parent: The dentry of the parent of this directory - * - * This creates and attaches a directory dentry to the eventfs_inode @ei. - */ -static struct dentry * -create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, - struct dentry *parent) -{ - struct dentry *dentry = NULL; - - WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); - - mutex_lock(&eventfs_mutex); - if (pei->is_freed || ei->is_freed) { - mutex_unlock(&eventfs_mutex); - return NULL; - } - if (ei->dentry) { - /* If the eventfs_inode already has a dentry, use it */ - dentry = ei->dentry; - dget(dentry); - mutex_unlock(&eventfs_mutex); - return dentry; - } - mutex_unlock(&eventfs_mutex); - - dentry = create_dir(ei, parent); - - mutex_lock(&eventfs_mutex); - - if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) { - /* - * When the mutex was released, something else could have - * created the dentry for this e_dentry. In which case - * use that one. - * - * If ei->is_freed is set, the e_dentry is currently on its - * way to being freed. - */ - dentry = ei->dentry; - if (dentry) - dget(dentry); - mutex_unlock(&eventfs_mutex); - return dentry; - } - - if (!ei->dentry && !ei->is_freed) { - ei->dentry = dentry; - eventfs_post_create_dir(ei); - dentry->d_fsdata = ei; - } else { - /* - * Should never happen unless we get here due to being freed. - * Otherwise it means two dentries exist with the same name. - */ - WARN_ON_ONCE(!ei->is_freed); - dentry = NULL; - } - mutex_unlock(&eventfs_mutex); + *e_dentry = dentry; // Remove me return dentry; } @@ -607,79 +462,49 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - const struct file_operations *fops; - const struct eventfs_entry *entry; struct eventfs_inode *ei_child; struct tracefs_inode *ti; struct eventfs_inode *ei; - struct dentry *ei_dentry = NULL; - struct dentry *ret = NULL; - struct dentry *d; const char *name = dentry->d_name.name; - umode_t mode; - void *data; - int idx; - int i; - int r; ti = get_tracefs(dir); if (!(ti->flags & TRACEFS_EVENT_INODE)) - return NULL; + return ERR_PTR(-EIO); - /* Grab srcu to prevent the ei from going away */ - idx = srcu_read_lock(&eventfs_srcu); - - /* - * Grab the eventfs_mutex to consistent value from ti->private. - * This s - */ mutex_lock(&eventfs_mutex); - ei = READ_ONCE(ti->private); - if (ei && !ei->is_freed) - ei_dentry = READ_ONCE(ei->dentry); - mutex_unlock(&eventfs_mutex); - if (!ei || !ei_dentry) + ei = ti->private; + if (!ei || ei->is_freed) goto out; - data = ei->data; - - list_for_each_entry_srcu(ei_child, &ei->children, list, - srcu_read_lock_held(&eventfs_srcu)) { + list_for_each_entry(ei_child, &ei->children, list) { if (strcmp(ei_child->name, name) != 0) continue; - ret = simple_lookup(dir, dentry, flags); - if (IS_ERR(ret)) + if (ei_child->is_freed) goto out; - d = create_dir_dentry(ei, ei_child, ei_dentry); - dput(d); + lookup_dir_entry(dentry, ei, ei_child); goto out; } - for (i = 0; i < ei->nr_entries; i++) { - entry = &ei->entries[i]; - if (strcmp(name, entry->name) == 0) { - void *cdata = data; - mutex_lock(&eventfs_mutex); - /* If ei->is_freed, then the event itself may be too */ - if (!ei->is_freed) - r = entry->callback(name, &mode, &cdata, &fops); - else - r = -1; - mutex_unlock(&eventfs_mutex); - if (r <= 0) - continue; - ret = simple_lookup(dir, dentry, flags); - if (IS_ERR(ret)) - goto out; - d = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops); - dput(d); - break; - } + for (int i = 0; i < ei->nr_entries; i++) { + void *data; + umode_t mode; + const struct file_operations *fops; + const struct eventfs_entry *entry = &ei->entries[i]; + + if (strcmp(name, entry->name) != 0) + continue; + + data = ei->data; + if (entry->callback(name, &mode, &data, &fops) <= 0) + goto out; + + lookup_file_dentry(dentry, ei, i, mode, data, fops); + goto out; } out: - srcu_read_unlock(&eventfs_srcu, idx); - return ret; + mutex_unlock(&eventfs_mutex); + return NULL; } /* diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 888e42087847..5c84460feeeb 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -495,75 +495,6 @@ struct dentry *tracefs_end_creating(struct dentry *dentry) return dentry; } -/** - * eventfs_start_creating - start the process of creating a dentry - * @name: Name of the file created for the dentry - * @parent: The parent dentry where this dentry will be created - * - * This is a simple helper function for the dynamically created eventfs - * files. When the directory of the eventfs files are accessed, their - * dentries are created on the fly. This function is used to start that - * process. - */ -struct dentry *eventfs_start_creating(const char *name, struct dentry *parent) -{ - struct dentry *dentry; - int error; - - /* Must always have a parent. */ - if (WARN_ON_ONCE(!parent)) - return ERR_PTR(-EINVAL); - - error = simple_pin_fs(&trace_fs_type, &tracefs_mount, - &tracefs_mount_count); - if (error) - return ERR_PTR(error); - - if (unlikely(IS_DEADDIR(parent->d_inode))) - dentry = ERR_PTR(-ENOENT); - else - dentry = lookup_one_len(name, parent, strlen(name)); - - if (!IS_ERR(dentry) && dentry->d_inode) { - dput(dentry); - dentry = ERR_PTR(-EEXIST); - } - - if (IS_ERR(dentry)) - simple_release_fs(&tracefs_mount, &tracefs_mount_count); - - return dentry; -} - -/** - * eventfs_failed_creating - clean up a failed eventfs dentry creation - * @dentry: The dentry to clean up - * - * If after calling eventfs_start_creating(), a failure is detected, the - * resources created by eventfs_start_creating() needs to be cleaned up. In - * that case, this function should be called to perform that clean up. - */ -struct dentry *eventfs_failed_creating(struct dentry *dentry) -{ - dput(dentry); - simple_release_fs(&tracefs_mount, &tracefs_mount_count); - return NULL; -} - -/** - * eventfs_end_creating - Finish the process of creating a eventfs dentry - * @dentry: The dentry that has successfully been created. - * - * This function is currently just a place holder to match - * eventfs_start_creating(). In case any synchronization needs to be added, - * this function will be used to implement that without having to modify - * the callers of eventfs_start_creating(). - */ -struct dentry *eventfs_end_creating(struct dentry *dentry) -{ - return dentry; -} - /* Find the inode that this will use for default */ static struct inode *instance_inode(struct dentry *parent, struct inode *inode) { diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 7d84349ade87..09037e2c173d 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -80,9 +80,6 @@ struct dentry *tracefs_start_creating(const char *name, struct dentry *parent); struct dentry *tracefs_end_creating(struct dentry *dentry); struct dentry *tracefs_failed_creating(struct dentry *dentry); struct inode *tracefs_get_inode(struct super_block *sb); -struct dentry *eventfs_start_creating(const char *name, struct dentry *parent); -struct dentry *eventfs_failed_creating(struct dentry *dentry); -struct dentry *eventfs_end_creating(struct dentry *dentry); void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry); #endif /* _TRACEFS_INTERNAL_H */ From 408600be78cdb8c650a97ecc7ff411cb216811b5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 31 Jan 2024 13:49:23 -0500 Subject: [PATCH 701/768] eventfs: Remove unused d_parent pointer field It's never used Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/ Link: https://lore.kernel.org/linux-trace-kernel/20240131185512.961772428@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Al Viro Cc: Ajay Kaher Cc: Greg Kroah-Hartman Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions") Signed-off-by: Linus Torvalds Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 4 +--- fs/tracefs/internal.h | 2 -- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 04c2ab90f93e..16ca8d9759b1 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -680,10 +680,8 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode INIT_LIST_HEAD(&ei->list); mutex_lock(&eventfs_mutex); - if (!parent->is_freed) { + if (!parent->is_freed) list_add_tail(&ei->list, &parent->children); - ei->d_parent = parent->dentry; - } mutex_unlock(&eventfs_mutex); /* Was the parent freed? */ diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 09037e2c173d..932733a2696a 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -36,7 +36,6 @@ struct eventfs_attr { * @name: the name of the directory to create * @children: link list into the child eventfs_inode * @dentry: the dentry of the directory - * @d_parent: pointer to the parent's dentry * @d_children: The array of dentries to represent the files when created * @entry_attrs: Saved mode and ownership of the @d_children * @attr: Saved mode and ownership of eventfs_inode itself @@ -51,7 +50,6 @@ struct eventfs_inode { const char *name; struct list_head children; struct dentry *dentry; /* Check is_freed to access */ - struct dentry *d_parent; struct dentry **d_children; struct eventfs_attr *entry_attrs; struct eventfs_attr attr; From 8dce06e98c70a7fcbb4bca7d90faf40522e65c58 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 31 Jan 2024 13:49:24 -0500 Subject: [PATCH 702/768] eventfs: Clean up dentry ops and add revalidate function In order for the dentries to stay up-to-date with the eventfs changes, just add a 'd_revalidate' function that checks the 'is_freed' bit. Also, clean up the dentry release to actually use d_release() rather than the slightly odd d_iput() function. We don't care about the inode, all we want to do is to get rid of the refcount to the eventfs data added by dentry->d_fsdata. It would probably be cleaner to make eventfs its own filesystem, or at least set its own dentry ops when looking up eventfs files. But as it is, only eventfs dentries use d_fsdata, so we don't really need to split these things up by use. Another thing that might be worth doing is to make all eventfs lookups mark their dentries as not worth caching. We could do that with d_delete(), but the DCACHE_DONTCACHE flag would likely be even better. As it is, the dentries are all freeable, but they only tend to get freed at memory pressure rather than more proactively. But that's a separate issue. Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/ Link: https://lore.kernel.org/linux-trace-kernel/20240131185513.124644253@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Al Viro Cc: Ajay Kaher Cc: Greg Kroah-Hartman Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions") Signed-off-by: Linus Torvalds Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 5 ++--- fs/tracefs/inode.c | 27 ++++++++++++++++++--------- fs/tracefs/internal.h | 3 ++- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 16ca8d9759b1..b2285d5f3fed 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -378,13 +378,12 @@ static void free_ei(struct eventfs_inode *ei) } /** - * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode - * @ti: the tracefs_inode of the dentry + * eventfs_d_release - dentry is going away * @dentry: dentry which has the reference to remove. * * Remove the association between a dentry from an eventfs_inode. */ -void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry) +void eventfs_d_release(struct dentry *dentry) { struct eventfs_inode *ei; int i; diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 5c84460feeeb..d65ffad4c327 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -377,21 +377,30 @@ static const struct super_operations tracefs_super_operations = { .show_options = tracefs_show_options, }; -static void tracefs_dentry_iput(struct dentry *dentry, struct inode *inode) +/* + * It would be cleaner if eventfs had its own dentry ops. + * + * Note that d_revalidate is called potentially under RCU, + * so it can't take the eventfs mutex etc. It's fine - if + * we open a file just as it's marked dead, things will + * still work just fine, and just see the old stale case. + */ +static void tracefs_d_release(struct dentry *dentry) { - struct tracefs_inode *ti; + if (dentry->d_fsdata) + eventfs_d_release(dentry); +} - if (!dentry || !inode) - return; +static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct eventfs_inode *ei = dentry->d_fsdata; - ti = get_tracefs(inode); - if (ti && ti->flags & TRACEFS_EVENT_INODE) - eventfs_set_ei_status_free(ti, dentry); - iput(inode); + return !(ei && ei->is_freed); } static const struct dentry_operations tracefs_dentry_operations = { - .d_iput = tracefs_dentry_iput, + .d_revalidate = tracefs_d_revalidate, + .d_release = tracefs_d_release, }; static int trace_fill_super(struct super_block *sb, void *data, int silent) diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 932733a2696a..4b50a0668055 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -78,6 +78,7 @@ struct dentry *tracefs_start_creating(const char *name, struct dentry *parent); struct dentry *tracefs_end_creating(struct dentry *dentry); struct dentry *tracefs_failed_creating(struct dentry *dentry); struct inode *tracefs_get_inode(struct super_block *sb); -void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry); + +void eventfs_d_release(struct dentry *dentry); #endif /* _TRACEFS_INTERNAL_H */ From 43aa6f97c2d03a52c1ddb86768575fc84344bdbb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 31 Jan 2024 13:49:25 -0500 Subject: [PATCH 703/768] eventfs: Get rid of dentry pointers without refcounts The eventfs inode had pointers to dentries (and child dentries) without actually holding a refcount on said pointer. That is fundamentally broken, and while eventfs tried to then maintain coherence with dentries going away by hooking into the '.d_iput' callback, that doesn't actually work since it's not ordered wrt lookups. There were two reasonms why eventfs tried to keep a pointer to a dentry: - the creation of a 'events' directory would actually have a stable dentry pointer that it created with tracefs_start_creating(). And it needed that dentry when tearing it all down again in eventfs_remove_events_dir(). This use is actually ok, because the special top-level events directory dentries are actually stable, not just a temporary cache of the eventfs data structures. - the 'eventfs_inode' (aka ei) needs to stay around as long as there are dentries that refer to it. It then used these dentry pointers as a replacement for doing reference counting: it would try to make sure that there was only ever one dentry associated with an event_inode, and keep a child dentry array around to see which dentries might still refer to the parent ei. This gets rid of the invalid dentry pointer use, and renames the one valid case to a different name to make it clear that it's not just any random dentry. The magic child dentry array that is kind of a "reverse reference list" is simply replaced by having child dentries take a ref to the ei. As does the directory dentries. That makes the broken use case go away. Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/ Link: https://lore.kernel.org/linux-trace-kernel/20240131185513.280463000@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Al Viro Cc: Ajay Kaher Cc: Greg Kroah-Hartman Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions") Signed-off-by: Linus Torvalds Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 248 ++++++++++++--------------------------- fs/tracefs/internal.h | 7 +- 2 files changed, 78 insertions(+), 177 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index b2285d5f3fed..515fdace1eea 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -62,6 +62,35 @@ enum { #define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1) +/* + * eventfs_inode reference count management. + * + * NOTE! We count only references from dentries, in the + * form 'dentry->d_fsdata'. There are also references from + * directory inodes ('ti->private'), but the dentry reference + * count is always a superset of the inode reference count. + */ +static void release_ei(struct kref *ref) +{ + struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref); + kfree(ei->entry_attrs); + kfree_const(ei->name); + kfree_rcu(ei, rcu); +} + +static inline void put_ei(struct eventfs_inode *ei) +{ + if (ei) + kref_put(&ei->kref, release_ei); +} + +static inline struct eventfs_inode *get_ei(struct eventfs_inode *ei) +{ + if (ei) + kref_get(&ei->kref); + return ei; +} + static struct dentry *eventfs_root_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); @@ -289,7 +318,8 @@ static void update_inode_attr(struct dentry *dentry, struct inode *inode, * directory. The inode.i_private pointer will point to @data in the open() * call. */ -static struct dentry *lookup_file(struct dentry *dentry, +static struct dentry *lookup_file(struct eventfs_inode *parent_ei, + struct dentry *dentry, umode_t mode, struct eventfs_attr *attr, void *data, @@ -302,7 +332,7 @@ static struct dentry *lookup_file(struct dentry *dentry, mode |= S_IFREG; if (WARN_ON_ONCE(!S_ISREG(mode))) - return NULL; + return ERR_PTR(-EIO); inode = tracefs_get_inode(dentry->d_sb); if (unlikely(!inode)) @@ -321,9 +351,12 @@ static struct dentry *lookup_file(struct dentry *dentry, ti = get_tracefs(inode); ti->flags |= TRACEFS_EVENT_INODE; + // Files have their parent's ei as their fsdata + dentry->d_fsdata = get_ei(parent_ei); + d_add(dentry, inode); fsnotify_create(dentry->d_parent->d_inode, dentry); - return dentry; + return NULL; }; /** @@ -359,22 +392,29 @@ static struct dentry *lookup_dir_entry(struct dentry *dentry, /* Only directories have ti->private set to an ei, not files */ ti->private = ei; - dentry->d_fsdata = ei; - ei->dentry = dentry; // Remove me! + dentry->d_fsdata = get_ei(ei); inc_nlink(inode); d_add(dentry, inode); inc_nlink(dentry->d_parent->d_inode); fsnotify_mkdir(dentry->d_parent->d_inode, dentry); - return dentry; + return NULL; } -static void free_ei(struct eventfs_inode *ei) +static inline struct eventfs_inode *alloc_ei(const char *name) { - kfree_const(ei->name); - kfree(ei->d_children); - kfree(ei->entry_attrs); - kfree(ei); + struct eventfs_inode *ei = kzalloc(sizeof(*ei), GFP_KERNEL); + + if (!ei) + return NULL; + + ei->name = kstrdup_const(name, GFP_KERNEL); + if (!ei->name) { + kfree(ei); + return NULL; + } + kref_init(&ei->kref); + return ei; } /** @@ -385,39 +425,13 @@ static void free_ei(struct eventfs_inode *ei) */ void eventfs_d_release(struct dentry *dentry) { - struct eventfs_inode *ei; - int i; - - mutex_lock(&eventfs_mutex); - - ei = dentry->d_fsdata; - if (!ei) - goto out; - - /* This could belong to one of the files of the ei */ - if (ei->dentry != dentry) { - for (i = 0; i < ei->nr_entries; i++) { - if (ei->d_children[i] == dentry) - break; - } - if (WARN_ON_ONCE(i == ei->nr_entries)) - goto out; - ei->d_children[i] = NULL; - } else if (ei->is_freed) { - free_ei(ei); - } else { - ei->dentry = NULL; - } - - dentry->d_fsdata = NULL; - out: - mutex_unlock(&eventfs_mutex); + put_ei(dentry->d_fsdata); } /** * lookup_file_dentry - create a dentry for a file of an eventfs_inode * @ei: the eventfs_inode that the file will be created under - * @idx: the index into the d_children[] of the @ei + * @idx: the index into the entry_attrs[] of the @ei * @parent: The parent dentry of the created file. * @name: The name of the file to create * @mode: The mode of the file. @@ -434,17 +448,11 @@ lookup_file_dentry(struct dentry *dentry, const struct file_operations *fops) { struct eventfs_attr *attr = NULL; - struct dentry **e_dentry = &ei->d_children[idx]; if (ei->entry_attrs) attr = &ei->entry_attrs[idx]; - dentry->d_fsdata = ei; // NOTE: ei of _parent_ - lookup_file(dentry, mode, attr, data, fops); - - *e_dentry = dentry; // Remove me - - return dentry; + return lookup_file(ei, dentry, mode, attr, data, fops); } /** @@ -465,6 +473,7 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, struct tracefs_inode *ti; struct eventfs_inode *ei; const char *name = dentry->d_name.name; + struct dentry *result = NULL; ti = get_tracefs(dir); if (!(ti->flags & TRACEFS_EVENT_INODE)) @@ -481,7 +490,7 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, continue; if (ei_child->is_freed) goto out; - lookup_dir_entry(dentry, ei, ei_child); + result = lookup_dir_entry(dentry, ei, ei_child); goto out; } @@ -498,12 +507,12 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, if (entry->callback(name, &mode, &data, &fops) <= 0) goto out; - lookup_file_dentry(dentry, ei, i, mode, data, fops); + result = lookup_file_dentry(dentry, ei, i, mode, data, fops); goto out; } out: mutex_unlock(&eventfs_mutex); - return NULL; + return result; } /* @@ -653,25 +662,10 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode if (!parent) return ERR_PTR(-EINVAL); - ei = kzalloc(sizeof(*ei), GFP_KERNEL); + ei = alloc_ei(name); if (!ei) return ERR_PTR(-ENOMEM); - ei->name = kstrdup_const(name, GFP_KERNEL); - if (!ei->name) { - kfree(ei); - return ERR_PTR(-ENOMEM); - } - - if (size) { - ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL); - if (!ei->d_children) { - kfree_const(ei->name); - kfree(ei); - return ERR_PTR(-ENOMEM); - } - } - ei->entries = entries; ei->nr_entries = size; ei->data = data; @@ -685,7 +679,7 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode /* Was the parent freed? */ if (list_empty(&ei->list)) { - free_ei(ei); + put_ei(ei); ei = NULL; } return ei; @@ -720,28 +714,20 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry if (IS_ERR(dentry)) return ERR_CAST(dentry); - ei = kzalloc(sizeof(*ei), GFP_KERNEL); + ei = alloc_ei(name); if (!ei) - goto fail_ei; + goto fail; inode = tracefs_get_inode(dentry->d_sb); if (unlikely(!inode)) goto fail; - if (size) { - ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL); - if (!ei->d_children) - goto fail; - } - - ei->dentry = dentry; + // Note: we have a ref to the dentry from tracefs_start_creating() + ei->events_dir = dentry; ei->entries = entries; ei->nr_entries = size; ei->is_events = 1; ei->data = data; - ei->name = kstrdup_const(name, GFP_KERNEL); - if (!ei->name) - goto fail; /* Save the ownership of this directory */ uid = d_inode(dentry->d_parent)->i_uid; @@ -772,7 +758,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry inode->i_op = &eventfs_root_dir_inode_operations; inode->i_fop = &eventfs_file_operations; - dentry->d_fsdata = ei; + dentry->d_fsdata = get_ei(ei); /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); @@ -784,72 +770,11 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry return ei; fail: - kfree(ei->d_children); - kfree(ei); - fail_ei: + put_ei(ei); tracefs_failed_creating(dentry); return ERR_PTR(-ENOMEM); } -static LLIST_HEAD(free_list); - -static void eventfs_workfn(struct work_struct *work) -{ - struct eventfs_inode *ei, *tmp; - struct llist_node *llnode; - - llnode = llist_del_all(&free_list); - llist_for_each_entry_safe(ei, tmp, llnode, llist) { - /* This dput() matches the dget() from unhook_dentry() */ - for (int i = 0; i < ei->nr_entries; i++) { - if (ei->d_children[i]) - dput(ei->d_children[i]); - } - /* This should only get here if it had a dentry */ - if (!WARN_ON_ONCE(!ei->dentry)) - dput(ei->dentry); - } -} - -static DECLARE_WORK(eventfs_work, eventfs_workfn); - -static void free_rcu_ei(struct rcu_head *head) -{ - struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu); - - if (ei->dentry) { - /* Do not free the ei until all references of dentry are gone */ - if (llist_add(&ei->llist, &free_list)) - queue_work(system_unbound_wq, &eventfs_work); - return; - } - - /* If the ei doesn't have a dentry, neither should its children */ - for (int i = 0; i < ei->nr_entries; i++) { - WARN_ON_ONCE(ei->d_children[i]); - } - - free_ei(ei); -} - -static void unhook_dentry(struct dentry *dentry) -{ - if (!dentry) - return; - /* - * Need to add a reference to the dentry that is expected by - * simple_recursive_removal(), which will include a dput(). - */ - dget(dentry); - - /* - * Also add a reference for the dput() in eventfs_workfn(). - * That is required as that dput() will free the ei after - * the SRCU grace period is over. - */ - dget(dentry); -} - /** * eventfs_remove_rec - remove eventfs dir or file from list * @ei: eventfs_inode to be removed. @@ -862,8 +787,6 @@ static void eventfs_remove_rec(struct eventfs_inode *ei, int level) { struct eventfs_inode *ei_child; - if (!ei) - return; /* * Check recursion depth. It should never be greater than 3: * 0 - events/ @@ -875,28 +798,12 @@ static void eventfs_remove_rec(struct eventfs_inode *ei, int level) return; /* search for nested folders or files */ - list_for_each_entry_srcu(ei_child, &ei->children, list, - lockdep_is_held(&eventfs_mutex)) { - /* Children only have dentry if parent does */ - WARN_ON_ONCE(ei_child->dentry && !ei->dentry); + list_for_each_entry(ei_child, &ei->children, list) eventfs_remove_rec(ei_child, level + 1); - } - ei->is_freed = 1; - - for (int i = 0; i < ei->nr_entries; i++) { - if (ei->d_children[i]) { - /* Children only have dentry if parent does */ - WARN_ON_ONCE(!ei->dentry); - unhook_dentry(ei->d_children[i]); - } - } - - unhook_dentry(ei->dentry); - - list_del_rcu(&ei->list); - call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei); + list_del(&ei->list); + put_ei(ei); } /** @@ -907,22 +814,12 @@ static void eventfs_remove_rec(struct eventfs_inode *ei, int level) */ void eventfs_remove_dir(struct eventfs_inode *ei) { - struct dentry *dentry; - if (!ei) return; mutex_lock(&eventfs_mutex); - dentry = ei->dentry; eventfs_remove_rec(ei, 0); mutex_unlock(&eventfs_mutex); - - /* - * If any of the ei children has a dentry, then the ei itself - * must have a dentry. - */ - if (dentry) - simple_recursive_removal(dentry, NULL); } /** @@ -935,7 +832,11 @@ void eventfs_remove_events_dir(struct eventfs_inode *ei) { struct dentry *dentry; - dentry = ei->dentry; + dentry = ei->events_dir; + if (!dentry) + return; + + ei->events_dir = NULL; eventfs_remove_dir(ei); /* @@ -945,5 +846,6 @@ void eventfs_remove_events_dir(struct eventfs_inode *ei) * sticks around while the other ei->dentry are created * and destroyed dynamically. */ + d_invalidate(dentry); dput(dentry); } diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 4b50a0668055..1886f1826cd8 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -35,8 +35,7 @@ struct eventfs_attr { * @entries: the array of entries representing the files in the directory * @name: the name of the directory to create * @children: link list into the child eventfs_inode - * @dentry: the dentry of the directory - * @d_children: The array of dentries to represent the files when created + * @events_dir: the dentry of the events directory * @entry_attrs: Saved mode and ownership of the @d_children * @attr: Saved mode and ownership of eventfs_inode itself * @data: The private data to pass to the callbacks @@ -45,12 +44,12 @@ struct eventfs_attr { * @nr_entries: The number of items in @entries */ struct eventfs_inode { + struct kref kref; struct list_head list; const struct eventfs_entry *entries; const char *name; struct list_head children; - struct dentry *dentry; /* Check is_freed to access */ - struct dentry **d_children; + struct dentry *events_dir; struct eventfs_attr *entry_attrs; struct eventfs_attr attr; void *data; From 70fbfc47a392b98e5f8dba70c6efc6839205c982 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:01 +0100 Subject: [PATCH 704/768] nvme-fc: do not wait in vain when unloading module The module exit path has race between deleting all controllers and freeing 'left over IDs'. To prevent double free a synchronization between nvme_delete_ctrl and ida_destroy has been added by the initial commit. There is some logic around trying to prevent from hanging forever in wait_for_completion, though it does not handling all cases. E.g. blktests is able to reproduce the situation where the module unload hangs forever. If we completely rely on the cleanup code executed from the nvme_delete_ctrl path, all IDs will be freed eventually. This makes calling ida_destroy unnecessary. We only have to ensure that all nvme_delete_ctrl code has been executed before we leave nvme_fc_exit_module. This is done by flushing the nvme_delete_wq workqueue. While at it, remove the unused nvme_fc_wq workqueue too. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 47 ++++++------------------------------------ 1 file changed, 6 insertions(+), 41 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 63a2e2839a78..5630d5689f28 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -221,11 +221,6 @@ static LIST_HEAD(nvme_fc_lport_list); static DEFINE_IDA(nvme_fc_local_port_cnt); static DEFINE_IDA(nvme_fc_ctrl_cnt); -static struct workqueue_struct *nvme_fc_wq; - -static bool nvme_fc_waiting_to_unload; -static DECLARE_COMPLETION(nvme_fc_unload_proceed); - /* * These items are short-term. They will eventually be moved into * a generic FC class. See comments in module init. @@ -255,8 +250,6 @@ nvme_fc_free_lport(struct kref *ref) /* remove from transport list */ spin_lock_irqsave(&nvme_fc_lock, flags); list_del(&lport->port_list); - if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list)) - complete(&nvme_fc_unload_proceed); spin_unlock_irqrestore(&nvme_fc_lock, flags); ida_free(&nvme_fc_local_port_cnt, lport->localport.port_num); @@ -3896,10 +3889,6 @@ static int __init nvme_fc_init_module(void) { int ret; - nvme_fc_wq = alloc_workqueue("nvme_fc_wq", WQ_MEM_RECLAIM, 0); - if (!nvme_fc_wq) - return -ENOMEM; - /* * NOTE: * It is expected that in the future the kernel will combine @@ -3917,7 +3906,7 @@ static int __init nvme_fc_init_module(void) ret = class_register(&fc_class); if (ret) { pr_err("couldn't register class fc\n"); - goto out_destroy_wq; + return ret; } /* @@ -3941,8 +3930,6 @@ out_destroy_device: device_destroy(&fc_class, MKDEV(0, 0)); out_destroy_class: class_unregister(&fc_class); -out_destroy_wq: - destroy_workqueue(nvme_fc_wq); return ret; } @@ -3962,45 +3949,23 @@ nvme_fc_delete_controllers(struct nvme_fc_rport *rport) spin_unlock(&rport->lock); } -static void -nvme_fc_cleanup_for_unload(void) +static void __exit nvme_fc_exit_module(void) { struct nvme_fc_lport *lport; struct nvme_fc_rport *rport; - - list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { - list_for_each_entry(rport, &lport->endp_list, endp_list) { - nvme_fc_delete_controllers(rport); - } - } -} - -static void __exit nvme_fc_exit_module(void) -{ unsigned long flags; - bool need_cleanup = false; spin_lock_irqsave(&nvme_fc_lock, flags); - nvme_fc_waiting_to_unload = true; - if (!list_empty(&nvme_fc_lport_list)) { - need_cleanup = true; - nvme_fc_cleanup_for_unload(); - } + list_for_each_entry(lport, &nvme_fc_lport_list, port_list) + list_for_each_entry(rport, &lport->endp_list, endp_list) + nvme_fc_delete_controllers(rport); spin_unlock_irqrestore(&nvme_fc_lock, flags); - if (need_cleanup) { - pr_info("%s: waiting for ctlr deletes\n", __func__); - wait_for_completion(&nvme_fc_unload_proceed); - pr_info("%s: ctrl deletes complete\n", __func__); - } + flush_workqueue(nvme_delete_wq); nvmf_unregister_transport(&nvme_fc_transport); - ida_destroy(&nvme_fc_local_port_cnt); - ida_destroy(&nvme_fc_ctrl_cnt); - device_destroy(&fc_class, MKDEV(0, 0)); class_unregister(&fc_class); - destroy_workqueue(nvme_fc_wq); } module_init(nvme_fc_init_module); From dcfad4ab4d6733f2861cd241d8532a0004fc835a Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:02 +0100 Subject: [PATCH 705/768] nvmet-fcloop: swap the list_add_tail arguments The first argument of list_add_tail function is the new element which should be added to the list which is the second argument. Swap the arguments to allow processing more than one element at a time. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fcloop.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index c1faeb1e9e55..1471af250ea6 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -358,7 +358,7 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, if (!rport->targetport) { tls_req->status = -ECONNREFUSED; spin_lock(&rport->lock); - list_add_tail(&rport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &rport->ls_list); spin_unlock(&rport->lock); queue_work(nvmet_wq, &rport->ls_work); return ret; @@ -391,7 +391,7 @@ fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, if (remoteport) { rport = remoteport->private; spin_lock(&rport->lock); - list_add_tail(&rport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &rport->ls_list); spin_unlock(&rport->lock); queue_work(nvmet_wq, &rport->ls_work); } @@ -446,7 +446,7 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle, if (!tport->remoteport) { tls_req->status = -ECONNREFUSED; spin_lock(&tport->lock); - list_add_tail(&tport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &tport->ls_list); spin_unlock(&tport->lock); queue_work(nvmet_wq, &tport->ls_work); return ret; From c691e6d7e13dab81ac8c7489c83b5dea972522a5 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:03 +0100 Subject: [PATCH 706/768] nvmet-fc: release reference on target port In case we return early out of __nvmet_fc_finish_ls_req() we still have to release the reference on the target port. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 05e6a755b330..3d53d9dc1099 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -360,7 +360,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) if (!lsop->req_queued) { spin_unlock_irqrestore(&tgtport->lock, flags); - return; + goto out_puttgtport; } list_del(&lsop->lsreq_list); @@ -373,6 +373,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) (lsreq->rqstlen + lsreq->rsplen), DMA_BIDIRECTIONAL); +out_puttgtport: nvmet_fc_tgtport_put(tgtport); } From 4049dc96b8de7aeb3addcea039446e464726a525 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:04 +0100 Subject: [PATCH 707/768] nvmet-fc: defer cleanup using RCU properly When the target executes a disconnect and the host triggers a reconnect immediately, the reconnect command still finds an existing association. The reconnect crashes later on because nvmet_fc_delete_target_assoc blindly removes resources while the reconnect code wants to use it. To address this, nvmet_fc_find_target_assoc should not be able to lookup an association which is being removed. The association list is already under RCU lifetime management, so let's properly use it and remove the association from the list and wait for a grace period before cleaning up all. This means we also can drop the RCU management on the queues, because this is now handled via the association itself. A second step split the execution context so that the initial disconnect command can complete without running the reconnect code in the same context. As usual, this is done by deferring the ->done to a workqueue. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 83 ++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 46 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 3d53d9dc1099..11ecfef41bd1 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -166,7 +166,7 @@ struct nvmet_fc_tgt_assoc { struct nvmet_fc_hostport *hostport; struct nvmet_fc_ls_iod *rcv_disconn; struct list_head a_list; - struct nvmet_fc_tgt_queue __rcu *queues[NVMET_NR_QUEUES + 1]; + struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1]; struct kref ref; struct work_struct del_work; struct rcu_head rcu; @@ -803,14 +803,11 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, if (!queue) return NULL; - if (!nvmet_fc_tgt_a_get(assoc)) - goto out_free_queue; - queue->work_q = alloc_workqueue("ntfc%d.%d.%d", 0, 0, assoc->tgtport->fc_target_port.port_num, assoc->a_id, qid); if (!queue->work_q) - goto out_a_put; + goto out_free_queue; queue->qid = qid; queue->sqsize = sqsize; @@ -832,15 +829,13 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, goto out_fail_iodlist; WARN_ON(assoc->queues[qid]); - rcu_assign_pointer(assoc->queues[qid], queue); + assoc->queues[qid] = queue; return queue; out_fail_iodlist: nvmet_fc_destroy_fcp_iodlist(assoc->tgtport, queue); destroy_workqueue(queue->work_q); -out_a_put: - nvmet_fc_tgt_a_put(assoc); out_free_queue: kfree(queue); return NULL; @@ -853,12 +848,8 @@ nvmet_fc_tgt_queue_free(struct kref *ref) struct nvmet_fc_tgt_queue *queue = container_of(ref, struct nvmet_fc_tgt_queue, ref); - rcu_assign_pointer(queue->assoc->queues[queue->qid], NULL); - nvmet_fc_destroy_fcp_iodlist(queue->assoc->tgtport, queue); - nvmet_fc_tgt_a_put(queue->assoc); - destroy_workqueue(queue->work_q); kfree_rcu(queue, rcu); @@ -970,7 +961,7 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport, rcu_read_lock(); list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (association_id == assoc->association_id) { - queue = rcu_dereference(assoc->queues[qid]); + queue = assoc->queues[qid]; if (queue && (!atomic_read(&queue->connected) || !nvmet_fc_tgt_q_get(queue))) @@ -1173,13 +1164,18 @@ nvmet_fc_target_assoc_free(struct kref *ref) struct nvmet_fc_tgtport *tgtport = assoc->tgtport; struct nvmet_fc_ls_iod *oldls; unsigned long flags; + int i; + + for (i = NVMET_NR_QUEUES; i >= 0; i--) { + if (assoc->queues[i]) + nvmet_fc_delete_target_queue(assoc->queues[i]); + } /* Send Disconnect now that all i/o has completed */ nvmet_fc_xmt_disconnect_assoc(assoc); nvmet_fc_free_hostport(assoc->hostport); spin_lock_irqsave(&tgtport->lock, flags); - list_del_rcu(&assoc->a_list); oldls = assoc->rcv_disconn; spin_unlock_irqrestore(&tgtport->lock, flags); /* if pending Rcv Disconnect Association LS, send rsp now */ @@ -1209,7 +1205,7 @@ static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) { struct nvmet_fc_tgtport *tgtport = assoc->tgtport; - struct nvmet_fc_tgt_queue *queue; + unsigned long flags; int i, terminating; terminating = atomic_xchg(&assoc->terminating, 1); @@ -1218,29 +1214,21 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) if (terminating) return; + spin_lock_irqsave(&tgtport->lock, flags); + list_del_rcu(&assoc->a_list); + spin_unlock_irqrestore(&tgtport->lock, flags); + synchronize_rcu(); + + /* ensure all in-flight I/Os have been processed */ for (i = NVMET_NR_QUEUES; i >= 0; i--) { - rcu_read_lock(); - queue = rcu_dereference(assoc->queues[i]); - if (!queue) { - rcu_read_unlock(); - continue; - } - - if (!nvmet_fc_tgt_q_get(queue)) { - rcu_read_unlock(); - continue; - } - rcu_read_unlock(); - nvmet_fc_delete_target_queue(queue); - nvmet_fc_tgt_q_put(queue); + if (assoc->queues[i]) + flush_workqueue(assoc->queues[i]->work_q); } dev_info(tgtport->dev, "{%d:%d} Association deleted\n", tgtport->fc_target_port.port_num, assoc->a_id); - - nvmet_fc_tgt_a_put(assoc); } static struct nvmet_fc_tgt_assoc * @@ -1493,9 +1481,8 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (!nvmet_fc_tgt_a_get(assoc)) continue; - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_tgt_a_put(assoc); } rcu_read_unlock(); } @@ -1548,9 +1535,8 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, continue; assoc->hostport->invalid = 1; noassoc = false; - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_tgt_a_put(assoc); } spin_unlock_irqrestore(&tgtport->lock, flags); @@ -1582,7 +1568,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) rcu_read_lock(); list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { - queue = rcu_dereference(assoc->queues[0]); + queue = assoc->queues[0]; if (queue && queue->nvme_sq.ctrl == ctrl) { if (nvmet_fc_tgt_a_get(assoc)) found_ctrl = true; @@ -1594,9 +1580,8 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_tgt_a_put(assoc); return; } @@ -1626,6 +1611,8 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) /* terminate any outstanding associations */ __nvmet_fc_free_assocs(tgtport); + flush_workqueue(nvmet_wq); + /* * should terminate LS's as well. However, LS's will be generated * at the tail end of association termination, so they likely don't @@ -1871,9 +1858,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, sizeof(struct fcnvme_ls_disconnect_assoc_acc)), FCNVME_LS_DISCONNECT_ASSOC); - /* release get taken in nvmet_fc_find_target_assoc */ - nvmet_fc_tgt_a_put(assoc); - /* * The rules for LS response says the response cannot * go back until ABTS's have been sent for all outstanding @@ -1888,8 +1872,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, assoc->rcv_disconn = iod; spin_unlock_irqrestore(&tgtport->lock, flags); - nvmet_fc_delete_target_assoc(assoc); - if (oldls) { dev_info(tgtport->dev, "{%d:%d} Multiple Disconnect Association LS's " @@ -1905,6 +1887,9 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, nvmet_fc_xmt_ls_rsp(tgtport, oldls); } + queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_tgt_a_put(assoc); + return false; } @@ -2903,6 +2888,9 @@ nvmet_fc_remove_port(struct nvmet_port *port) nvmet_fc_portentry_unbind(pe); + /* terminate any outstanding associations */ + __nvmet_fc_free_assocs(pe->tgtport); + kfree(pe); } @@ -2934,6 +2922,9 @@ static int __init nvmet_fc_init_module(void) static void __exit nvmet_fc_exit_module(void) { + /* ensure any shutdown operation, e.g. delete ctrls have finished */ + flush_workqueue(nvmet_wq); + /* sanity check - all lports should be removed */ if (!list_empty(&nvmet_fc_target_list)) pr_warn("%s: targetport list not empty\n", __func__); From c5e27b1a779ec25779d04c3af65aebaee6bd4304 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:05 +0100 Subject: [PATCH 708/768] nvmet-fc: free queue and assoc directly Neither struct nvmet_fc_tgt_queue nor struct nvmet_fc_tgt_assoc are data structure which are used in a RCU context. So there is no reason to delay the free operation. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 11ecfef41bd1..b44b99525c44 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -145,7 +145,6 @@ struct nvmet_fc_tgt_queue { struct list_head avail_defer_list; struct workqueue_struct *work_q; struct kref ref; - struct rcu_head rcu; /* array of fcp_iods */ struct nvmet_fc_fcp_iod fod[] __counted_by(sqsize); } __aligned(sizeof(unsigned long long)); @@ -169,7 +168,6 @@ struct nvmet_fc_tgt_assoc { struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1]; struct kref ref; struct work_struct del_work; - struct rcu_head rcu; }; @@ -852,7 +850,7 @@ nvmet_fc_tgt_queue_free(struct kref *ref) destroy_workqueue(queue->work_q); - kfree_rcu(queue, rcu); + kfree(queue); } static void @@ -1185,8 +1183,8 @@ nvmet_fc_target_assoc_free(struct kref *ref) dev_info(tgtport->dev, "{%d:%d} Association freed\n", tgtport->fc_target_port.port_num, assoc->a_id); - kfree_rcu(assoc, rcu); nvmet_fc_tgtport_put(tgtport); + kfree(assoc); } static void From ca121a0f7515591dba0eb5532bfa7ace4dc153ce Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:06 +0100 Subject: [PATCH 709/768] nvmet-fc: hold reference on hostport match The hostport data structure is shared between the association, this why we keep track of the users via a refcount. So we should not decrement the refcount on a match and free the hostport several times. Reported by KASAN. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index b44b99525c44..205a12b1e841 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1068,8 +1068,6 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) /* new allocation not needed */ kfree(newhost); newhost = match; - /* no new allocation - release reference */ - nvmet_fc_tgtport_put(tgtport); } else { newhost->tgtport = tgtport; newhost->hosthandle = hosthandle; From 50b474e1faff50f770410f402ebbdf48bf8cc9b0 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:07 +0100 Subject: [PATCH 710/768] nvmet-fc: remove null hostport pointer check An association has always a valid hostport pointer. Remove useless null pointer check. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 205a12b1e841..849d9b17c60a 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -488,8 +488,7 @@ nvmet_fc_xmt_disconnect_assoc(struct nvmet_fc_tgt_assoc *assoc) * message is normal. Otherwise, send unless the hostport has * already been invalidated by the lldd. */ - if (!tgtport->ops->ls_req || !assoc->hostport || - assoc->hostport->invalid) + if (!tgtport->ops->ls_req || assoc->hostport->invalid) return; lsop = kzalloc((sizeof(*lsop) + @@ -1524,8 +1523,7 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, spin_lock_irqsave(&tgtport->lock, flags); list_for_each_entry_safe(assoc, next, &tgtport->assoc_list, a_list) { - if (!assoc->hostport || - assoc->hostport->hosthandle != hosthandle) + if (assoc->hostport->hosthandle != hosthandle) continue; if (!nvmet_fc_tgt_a_get(assoc)) continue; From 1c110588dd95d21782397ff3cbaa55820b4e1fad Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:08 +0100 Subject: [PATCH 711/768] nvmet-fc: do not tack refs on tgtports from assoc The association life time is tied to the life time of the target port. That means we should not take extra a refcount when creating a association. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 849d9b17c60a..fe3246024836 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1107,12 +1107,9 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) if (idx < 0) goto out_free_assoc; - if (!nvmet_fc_tgtport_get(tgtport)) - goto out_ida; - assoc->hostport = nvmet_fc_alloc_hostport(tgtport, hosthandle); if (IS_ERR(assoc->hostport)) - goto out_put; + goto out_ida; assoc->tgtport = tgtport; assoc->a_id = idx; @@ -1142,8 +1139,6 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) return assoc; -out_put: - nvmet_fc_tgtport_put(tgtport); out_ida: ida_free(&tgtport->assoc_cnt, idx); out_free_assoc: @@ -1180,7 +1175,6 @@ nvmet_fc_target_assoc_free(struct kref *ref) dev_info(tgtport->dev, "{%d:%d} Association freed\n", tgtport->fc_target_port.port_num, assoc->a_id); - nvmet_fc_tgtport_put(tgtport); kfree(assoc); } From 3146345c2e9c2f661527054e402b0cfad80105a4 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:09 +0100 Subject: [PATCH 712/768] nvmet-fc: abort command when there is no binding When the target port has not active port binding, there is no point in trying to process the command as it has to fail anyway. Instead adding checks to all commands abort the command early. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index fe3246024836..c80b8a066fd1 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1099,6 +1099,9 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) int idx; bool needrandom = true; + if (!tgtport->pe) + return NULL; + assoc = kzalloc(sizeof(*assoc), GFP_KERNEL); if (!assoc) return NULL; @@ -2514,8 +2517,9 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, fod->req.cmd = &fod->cmdiubuf.sqe; fod->req.cqe = &fod->rspiubuf.cqe; - if (tgtport->pe) - fod->req.port = tgtport->pe->port; + if (!tgtport->pe) + goto transport_error; + fod->req.port = tgtport->pe->port; /* clear any response payload */ memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf)); From 710c69dbaccdac312e32931abcb8499c1525d397 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:10 +0100 Subject: [PATCH 713/768] nvmet-fc: avoid deadlock on delete association path When deleting an association the shutdown path is deadlocking because we try to flush the nvmet_wq nested. Avoid this by deadlock by deferring the put work into its own work item. Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index c80b8a066fd1..3e0d391e631b 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -111,6 +111,8 @@ struct nvmet_fc_tgtport { struct nvmet_fc_port_entry *pe; struct kref ref; u32 max_sg_cnt; + + struct work_struct put_work; }; struct nvmet_fc_port_entry { @@ -247,6 +249,13 @@ static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc); static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue); static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue); static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport); +static void nvmet_fc_put_tgtport_work(struct work_struct *work) +{ + struct nvmet_fc_tgtport *tgtport = + container_of(work, struct nvmet_fc_tgtport, put_work); + + nvmet_fc_tgtport_put(tgtport); +} static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport); static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod); @@ -358,7 +367,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) if (!lsop->req_queued) { spin_unlock_irqrestore(&tgtport->lock, flags); - goto out_puttgtport; + goto out_putwork; } list_del(&lsop->lsreq_list); @@ -371,8 +380,8 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) (lsreq->rqstlen + lsreq->rsplen), DMA_BIDIRECTIONAL); -out_puttgtport: - nvmet_fc_tgtport_put(tgtport); +out_putwork: + queue_work(nvmet_wq, &tgtport->put_work); } static int @@ -1396,6 +1405,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, kref_init(&newrec->ref); ida_init(&newrec->assoc_cnt); newrec->max_sg_cnt = template->max_sgl_segments; + INIT_WORK(&newrec->put_work, nvmet_fc_put_tgtport_work); ret = nvmet_fc_alloc_ls_iodlist(newrec); if (ret) { From fe506a74589326183297d5abdda02d0c76ae5a8b Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:11 +0100 Subject: [PATCH 714/768] nvmet-fc: take ref count on tgtport before delete assoc We have to ensure that the tgtport is not going away before be have remove all the associations. Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 3e0d391e631b..671d096745a5 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1090,13 +1090,28 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) } static void -nvmet_fc_delete_assoc(struct work_struct *work) +nvmet_fc_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + nvmet_fc_delete_target_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); +} + +static void +nvmet_fc_delete_assoc_work(struct work_struct *work) { struct nvmet_fc_tgt_assoc *assoc = container_of(work, struct nvmet_fc_tgt_assoc, del_work); + struct nvmet_fc_tgtport *tgtport = assoc->tgtport; - nvmet_fc_delete_target_assoc(assoc); - nvmet_fc_tgt_a_put(assoc); + nvmet_fc_delete_assoc(assoc); + nvmet_fc_tgtport_put(tgtport); +} + +static void +nvmet_fc_schedule_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + nvmet_fc_tgtport_get(assoc->tgtport); + queue_work(nvmet_wq, &assoc->del_work); } static struct nvmet_fc_tgt_assoc * @@ -1127,7 +1142,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) assoc->a_id = idx; INIT_LIST_HEAD(&assoc->a_list); kref_init(&assoc->ref); - INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc); + INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc_work); atomic_set(&assoc->terminating, 0); while (needrandom) { @@ -1483,7 +1498,7 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (!nvmet_fc_tgt_a_get(assoc)) continue; - queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_schedule_delete_assoc(assoc); nvmet_fc_tgt_a_put(assoc); } rcu_read_unlock(); @@ -1536,7 +1551,7 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, continue; assoc->hostport->invalid = 1; noassoc = false; - queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_schedule_delete_assoc(assoc); nvmet_fc_tgt_a_put(assoc); } spin_unlock_irqrestore(&tgtport->lock, flags); @@ -1581,7 +1596,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_schedule_delete_assoc(assoc); nvmet_fc_tgt_a_put(assoc); return; } @@ -1888,7 +1903,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, nvmet_fc_xmt_ls_rsp(tgtport, oldls); } - queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_schedule_delete_assoc(assoc); nvmet_fc_tgt_a_put(assoc); return false; From a90ac7b348770ff3d246fac575306783de381e51 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:12 +0100 Subject: [PATCH 715/768] nvmet-fc: use RCU list iterator for assoc_list The assoc_list is a RCU protected list, thus use the RCU flavor of list functions. Let's use this opportunity and refactor this code and move the lookup into a helper and give it a descriptive name. Signed-off-by: Daniel Wagner Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 671d096745a5..fd229f310c93 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1114,14 +1114,27 @@ nvmet_fc_schedule_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) queue_work(nvmet_wq, &assoc->del_work); } +static bool +nvmet_fc_assoc_exits(struct nvmet_fc_tgtport *tgtport, u64 association_id) +{ + struct nvmet_fc_tgt_assoc *a; + + list_for_each_entry_rcu(a, &tgtport->assoc_list, a_list) { + if (association_id == a->association_id) + return true; + } + + return false; +} + static struct nvmet_fc_tgt_assoc * nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) { - struct nvmet_fc_tgt_assoc *assoc, *tmpassoc; + struct nvmet_fc_tgt_assoc *assoc; unsigned long flags; + bool done; u64 ran; int idx; - bool needrandom = true; if (!tgtport->pe) return NULL; @@ -1145,24 +1158,21 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc_work); atomic_set(&assoc->terminating, 0); - while (needrandom) { + done = false; + do { get_random_bytes(&ran, sizeof(ran) - BYTES_FOR_QID); ran = ran << BYTES_FOR_QID_SHIFT; spin_lock_irqsave(&tgtport->lock, flags); - needrandom = false; - list_for_each_entry(tmpassoc, &tgtport->assoc_list, a_list) { - if (ran == tmpassoc->association_id) { - needrandom = true; - break; - } - } - if (!needrandom) { + rcu_read_lock(); + if (!nvmet_fc_assoc_exits(tgtport, ran)) { assoc->association_id = ran; list_add_tail_rcu(&assoc->a_list, &tgtport->assoc_list); + done = true; } + rcu_read_unlock(); spin_unlock_irqrestore(&tgtport->lock, flags); - } + } while (!done); return assoc; From 524719b4c60dc5c5edff0cdfd715b4e2d6c16ede Mon Sep 17 00:00:00 2001 From: "Nitin U. Yewale" Date: Mon, 29 Jan 2024 16:36:37 +0530 Subject: [PATCH 716/768] nvme-tcp: show hostnqn when connecting to tcp target Log hostnqn when connecting to nvme target. As hostnqn could be changed, logging this information in syslog at appropriate time may help in troubleshooting. Signed-off-by: Nitin U. Yewale Reviewed-by: John Meneghini Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 9f8dea2e2c7d..a6d596e05602 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2753,8 +2753,8 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, if (ret) goto out_uninit_ctrl; - dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n", - nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr); + dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp, hostnqn: %s\n", + nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr, opts->host->nqn); mutex_lock(&nvme_tcp_ctrl_mutex); list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list); From d2045e6a4e2f3bcb3a2dab6684f8fbc95239d566 Mon Sep 17 00:00:00 2001 From: "Nitin U. Yewale" Date: Mon, 29 Jan 2024 16:36:38 +0530 Subject: [PATCH 717/768] nvme-rdma: show hostnqn when connecting to rdma target Log hostnqn when connecting to nvme target. As hostnqn could be changed, logging this information in syslog at appropriate time may help in troubleshooting. Signed-off-by: Nitin U. Yewale Reviewed-by: John Meneghini Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/rdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 6adf2c19a712..20fdd40b1879 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2300,8 +2300,8 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, if (ret) goto out_uninit_ctrl; - dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n", - nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr); + dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs, hostnqn: %s\n", + nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr, opts->host->nqn); mutex_lock(&nvme_rdma_ctrl_mutex); list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list); From c3a846fe4bf3ecbdf48e561b90fea6cc27ac6423 Mon Sep 17 00:00:00 2001 From: "Nitin U. Yewale" Date: Mon, 29 Jan 2024 16:36:39 +0530 Subject: [PATCH 718/768] nvme-fc: show hostnqn when connecting to fc target Log hostnqn when connecting to nvme target. As hostnqn could be changed, logging this information in syslog at appropriate time may help in troubleshooting. Signed-off-by: Nitin U. Yewale Reviewed-by: John Meneghini Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 5630d5689f28..68a5d971657b 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3570,8 +3570,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, flush_delayed_work(&ctrl->connect_work); dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", - ctrl->cnum, nvmf_ctrl_subsysnqn(&ctrl->ctrl)); + "NVME-FC{%d}: new ctrl: NQN \"%s\", hostnqn: %s\n", + ctrl->cnum, nvmf_ctrl_subsysnqn(&ctrl->ctrl), opts->host->nqn); return &ctrl->ctrl; From 9f079dda14339ee87d864306a9dc8c6b4e4da40b Mon Sep 17 00:00:00 2001 From: Alan Adamson Date: Mon, 29 Jan 2024 16:19:38 -0800 Subject: [PATCH 719/768] nvme: allow passthru cmd error logging Commit d7ac8dca938c ("nvme: quiet user passthrough command errors") disabled error logging for user passthrough commands. This commit adds the ability to opt-in to passthrough admin error logging. IO commands initiated as passthrough will always be logged. The logging output for passthrough commands (Admin and IO) has been changed to include CDWXX fields. nvme0n1: Read(0x2), LBA Out of Range (sct 0x0 / sc 0x80) DNR cdw10=0x0 cdw11=0x1 cdw12=0x70000 cdw13=0x0 cdw14=0x0 cdw15=0x0 Add a helper function nvme_log_err_passthru() which allows us to log error for passthru commands by decoding cdw10-cdw15 values of nvme command. Add a new sysfs attr passthru_err_log_enabled that allows user to conditionally enable passthrough command logging for either passthrough Admin commands sent to the controller or passthrough IO commands sent to a namespace. By default, passthrough error logging is disabled. To enable passthrough admin error logging: echo 1 > /sys/class/nvme/nvme0/passthru_err_log_enabled To disable passthrough admin error logging: echo 0 > /sys/class/nvme/nvme0/passthru_err_log_enabled To enable passthrough io error logging: echo 1 > /sys/class/nvme/nvme0/nvme0n1/passthru_err_log_enabled To disable passthrough io error logging: echo 0 > /sys/class/nvme/nvme0/nvme0n1/passthru_err_log_enabled Signed-off-by: Alan Adamson Signed-off-by: Chaitanya Kulkarni Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 59 +++++++++++++++++++++++++++++++++++---- drivers/nvme/host/nvme.h | 3 +- drivers/nvme/host/sysfs.c | 58 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8b48b7f7871a..0d124a8ca9c3 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -338,6 +338,30 @@ static void nvme_log_error(struct request *req) nr->status & NVME_SC_DNR ? "DNR " : ""); } +static void nvme_log_err_passthru(struct request *req) +{ + struct nvme_ns *ns = req->q->queuedata; + struct nvme_request *nr = nvme_req(req); + + pr_err_ratelimited("%s: %s(0x%x), %s (sct 0x%x / sc 0x%x) %s%s" + "cdw10=0x%x cdw11=0x%x cdw12=0x%x cdw13=0x%x cdw14=0x%x cdw15=0x%x\n", + ns ? ns->disk->disk_name : dev_name(nr->ctrl->device), + ns ? nvme_get_opcode_str(nr->cmd->common.opcode) : + nvme_get_admin_opcode_str(nr->cmd->common.opcode), + nr->cmd->common.opcode, + nvme_get_error_status_str(nr->status), + nr->status >> 8 & 7, /* Status Code Type */ + nr->status & 0xff, /* Status Code */ + nr->status & NVME_SC_MORE ? "MORE " : "", + nr->status & NVME_SC_DNR ? "DNR " : "", + nr->cmd->common.cdw10, + nr->cmd->common.cdw11, + nr->cmd->common.cdw12, + nr->cmd->common.cdw13, + nr->cmd->common.cdw14, + nr->cmd->common.cdw14); +} + enum nvme_disposition { COMPLETE, RETRY, @@ -385,8 +409,12 @@ static inline void nvme_end_req(struct request *req) { blk_status_t status = nvme_error_status(nvme_req(req)->status); - if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) - nvme_log_error(req); + if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) { + if (blk_rq_is_passthrough(req)) + nvme_log_err_passthru(req); + else + nvme_log_error(req); + } nvme_end_req_zoned(req); nvme_trace_bio_complete(req); if (req->cmd_flags & REQ_NVME_MPATH) @@ -679,10 +707,21 @@ static inline void nvme_clear_nvme_request(struct request *req) /* initialize a passthrough request */ void nvme_init_request(struct request *req, struct nvme_command *cmd) { - if (req->q->queuedata) + struct nvme_request *nr = nvme_req(req); + bool logging_enabled; + + if (req->q->queuedata) { + struct nvme_ns *ns = req->q->disk->private_data; + + logging_enabled = ns->passthru_err_log_enabled; req->timeout = NVME_IO_TIMEOUT; - else /* no queuedata implies admin queue */ + } else { /* no queuedata implies admin queue */ + logging_enabled = nr->ctrl->passthru_err_log_enabled; req->timeout = NVME_ADMIN_TIMEOUT; + } + + if (!logging_enabled) + req->rq_flags |= RQF_QUIET; /* passthru commands should let the driver set the SGL flags */ cmd->common.flags &= ~NVME_CMD_SGL_ALL; @@ -691,8 +730,7 @@ void nvme_init_request(struct request *req, struct nvme_command *cmd) if (req->mq_hctx->type == HCTX_TYPE_POLL) req->cmd_flags |= REQ_POLLED; nvme_clear_nvme_request(req); - req->rq_flags |= RQF_QUIET; - memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd)); + memcpy(nr->cmd, cmd, sizeof(*cmd)); } EXPORT_SYMBOL_GPL(nvme_init_request); @@ -3658,6 +3696,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) ns->disk = disk; ns->queue = disk->queue; + ns->passthru_err_log_enabled = false; if (ctrl->opts && ctrl->opts->data_digest) blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->queue); @@ -3721,6 +3760,13 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) nvme_mpath_add_disk(ns, info->anagrpid); nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name); + /* + * Set ns->disk->device->driver_data to ns so we can access + * ns->logging_enabled in nvme_passthru_err_log_enabled_store() and + * nvme_passthru_err_log_enabled_show(). + */ + dev_set_drvdata(disk_to_dev(ns->disk), ns); + return; out_cleanup_ns_from_list: @@ -4521,6 +4567,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, int ret; WRITE_ONCE(ctrl->state, NVME_CTRL_NEW); + ctrl->passthru_err_log_enabled = false; clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags); spin_lock_init(&ctrl->lock); mutex_init(&ctrl->scan_lock); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index b70b333a0874..3897334e3950 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -263,6 +263,7 @@ enum nvme_ctrl_flags { struct nvme_ctrl { bool comp_seen; bool identified; + bool passthru_err_log_enabled; enum nvme_ctrl_state state; spinlock_t lock; struct mutex scan_lock; @@ -522,7 +523,7 @@ struct nvme_ns { struct device cdev_device; struct nvme_fault_inject fault_inject; - + bool passthru_err_log_enabled; }; /* NVMe ns supports metadata actions by the controller (generate/strip) */ diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 6b2f06f190de..d099218e494a 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -35,6 +35,62 @@ static ssize_t nvme_sysfs_rescan(struct device *dev, } static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL, nvme_sysfs_rescan); +static ssize_t nvme_adm_passthru_err_log_enabled_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + return sysfs_emit(buf, + ctrl->passthru_err_log_enabled ? "on\n" : "off\n"); +} + +static ssize_t nvme_adm_passthru_err_log_enabled_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + int err; + bool passthru_err_log_enabled; + + err = kstrtobool(buf, &passthru_err_log_enabled); + if (err) + return -EINVAL; + + ctrl->passthru_err_log_enabled = passthru_err_log_enabled; + + return count; +} + +static ssize_t nvme_io_passthru_err_log_enabled_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ns *n = dev_get_drvdata(dev); + + return sysfs_emit(buf, n->passthru_err_log_enabled ? "on\n" : "off\n"); +} + +static ssize_t nvme_io_passthru_err_log_enabled_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct nvme_ns *ns = dev_get_drvdata(dev); + int err; + bool passthru_err_log_enabled; + + err = kstrtobool(buf, &passthru_err_log_enabled); + if (err) + return -EINVAL; + ns->passthru_err_log_enabled = passthru_err_log_enabled; + + return count; +} + +static struct device_attribute dev_attr_adm_passthru_err_log_enabled = \ + __ATTR(passthru_err_log_enabled, S_IRUGO | S_IWUSR, \ + nvme_adm_passthru_err_log_enabled_show, nvme_adm_passthru_err_log_enabled_store); + +static struct device_attribute dev_attr_io_passthru_err_log_enabled = \ + __ATTR(passthru_err_log_enabled, S_IRUGO | S_IWUSR, \ + nvme_io_passthru_err_log_enabled_show, nvme_io_passthru_err_log_enabled_store); + static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev) { struct gendisk *disk = dev_to_disk(dev); @@ -208,6 +264,7 @@ static struct attribute *nvme_ns_attrs[] = { &dev_attr_ana_grpid.attr, &dev_attr_ana_state.attr, #endif + &dev_attr_io_passthru_err_log_enabled.attr, NULL, }; @@ -655,6 +712,7 @@ static struct attribute *nvme_dev_attrs[] = { #ifdef CONFIG_NVME_TCP_TLS &dev_attr_tls_key.attr, #endif + &dev_attr_adm_passthru_err_log_enabled.attr, NULL }; From ae3f4b44641dfff969604735a0dcbf931f383285 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 31 Jan 2024 02:21:49 -0800 Subject: [PATCH 720/768] net: sysfs: Fix /sys/class/net/ path The documentation is pointing to the wrong path for the interface. Documentation is pointing to /sys/class/, instead of /sys/class/net/. Fix it by adding the `net/` directory before the interface. Fixes: 1a02ef76acfa ("net: sysfs: add documentation entries for /sys/class//queues") Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240131102150.728960-2-leitao@debian.org Signed-off-by: Jakub Kicinski --- .../ABI/testing/sysfs-class-net-queues | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-net-queues b/Documentation/ABI/testing/sysfs-class-net-queues index 906ff3ca928a..5bff64d256c2 100644 --- a/Documentation/ABI/testing/sysfs-class-net-queues +++ b/Documentation/ABI/testing/sysfs-class-net-queues @@ -1,4 +1,4 @@ -What: /sys/class//queues/rx-/rps_cpus +What: /sys/class/net//queues/rx-/rps_cpus Date: March 2010 KernelVersion: 2.6.35 Contact: netdev@vger.kernel.org @@ -8,7 +8,7 @@ Description: network device queue. Possible values depend on the number of available CPU(s) in the system. -What: /sys/class//queues/rx-/rps_flow_cnt +What: /sys/class/net//queues/rx-/rps_flow_cnt Date: April 2010 KernelVersion: 2.6.35 Contact: netdev@vger.kernel.org @@ -16,7 +16,7 @@ Description: Number of Receive Packet Steering flows being currently processed by this particular network device receive queue. -What: /sys/class//queues/tx-/tx_timeout +What: /sys/class/net//queues/tx-/tx_timeout Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -24,7 +24,7 @@ Description: Indicates the number of transmit timeout events seen by this network interface transmit queue. -What: /sys/class//queues/tx-/tx_maxrate +What: /sys/class/net//queues/tx-/tx_maxrate Date: March 2015 KernelVersion: 4.1 Contact: netdev@vger.kernel.org @@ -32,7 +32,7 @@ Description: A Mbps max-rate set for the queue, a value of zero means disabled, default is disabled. -What: /sys/class//queues/tx-/xps_cpus +What: /sys/class/net//queues/tx-/xps_cpus Date: November 2010 KernelVersion: 2.6.38 Contact: netdev@vger.kernel.org @@ -42,7 +42,7 @@ Description: network device transmit queue. Possible values depend on the number of available CPU(s) in the system. -What: /sys/class//queues/tx-/xps_rxqs +What: /sys/class/net//queues/tx-/xps_rxqs Date: June 2018 KernelVersion: 4.18.0 Contact: netdev@vger.kernel.org @@ -53,7 +53,7 @@ Description: number of available receive queue(s) in the network device. Default is disabled. -What: /sys/class//queues/tx-/byte_queue_limits/hold_time +What: /sys/class/net//queues/tx-/byte_queue_limits/hold_time Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -62,7 +62,7 @@ Description: of this particular network device transmit queue. Default value is 1000. -What: /sys/class//queues/tx-/byte_queue_limits/inflight +What: /sys/class/net//queues/tx-/byte_queue_limits/inflight Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -70,7 +70,7 @@ Description: Indicates the number of bytes (objects) in flight on this network device transmit queue. -What: /sys/class//queues/tx-/byte_queue_limits/limit +What: /sys/class/net//queues/tx-/byte_queue_limits/limit Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -79,7 +79,7 @@ Description: on this network device transmit queue. This value is clamped to be within the bounds defined by limit_max and limit_min. -What: /sys/class//queues/tx-/byte_queue_limits/limit_max +What: /sys/class/net//queues/tx-/byte_queue_limits/limit_max Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -88,7 +88,7 @@ Description: queued on this network device transmit queue. See include/linux/dynamic_queue_limits.h for the default value. -What: /sys/class//queues/tx-/byte_queue_limits/limit_min +What: /sys/class/net//queues/tx-/byte_queue_limits/limit_min Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org From 7b55984c96ffe9e236eb9c82a2196e0b1f84990d Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 29 Jan 2024 14:03:08 +0100 Subject: [PATCH 721/768] xen-netback: properly sync TX responses Invoking the make_tx_response() / push_tx_responses() pair with no lock held would be acceptable only if all such invocations happened from the same context (NAPI instance or dealloc thread). Since this isn't the case, and since the interface "spec" also doesn't demand that multicast operations may only be performed with no in-flight transmits, MCAST_{ADD,DEL} processing also needs to acquire the response lock around the invocations. To prevent similar mistakes going forward, "downgrade" the present functions to private helpers of just the two remaining ones using them directly, with no forward declarations anymore. This involves renaming what so far was make_tx_response(), for the new function of that name to serve the new (wrapper) purpose. While there, - constify the txp parameters, - correct xenvif_idx_release()'s status parameter's type, - rename {,_}make_tx_response()'s status parameters for consistency with xenvif_idx_release()'s. Fixes: 210c34dcd8d9 ("xen-netback: add support for multicast control") Cc: stable@vger.kernel.org Signed-off-by: Jan Beulich Reviewed-by: Paul Durrant Link: https://lore.kernel.org/r/980c6c3d-e10e-4459-8565-e8fbde122f00@suse.com Signed-off-by: Jakub Kicinski --- drivers/net/xen-netback/netback.c | 100 ++++++++++++++---------------- 1 file changed, 48 insertions(+), 52 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index d7503aef599f..fab361a250d6 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -104,13 +104,12 @@ bool provides_xdp_headroom = true; module_param(provides_xdp_headroom, bool, 0644); static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, - u8 status); + s8 status); static void make_tx_response(struct xenvif_queue *queue, - struct xen_netif_tx_request *txp, + const struct xen_netif_tx_request *txp, unsigned int extra_count, - s8 st); -static void push_tx_responses(struct xenvif_queue *queue); + s8 status); static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); @@ -208,13 +207,9 @@ static void xenvif_tx_err(struct xenvif_queue *queue, unsigned int extra_count, RING_IDX end) { RING_IDX cons = queue->tx.req_cons; - unsigned long flags; do { - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, extra_count, XEN_NETIF_RSP_ERROR); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, flags); if (cons == end) break; RING_COPY_REQUEST(&queue->tx, cons++, txp); @@ -465,12 +460,7 @@ static void xenvif_get_requests(struct xenvif_queue *queue, for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS; nr_slots--) { if (unlikely(!txp->size)) { - unsigned long flags; - - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, flags); ++txp; continue; } @@ -496,14 +486,8 @@ static void xenvif_get_requests(struct xenvif_queue *queue, for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; ++txp) { if (unlikely(!txp->size)) { - unsigned long flags; - - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, - flags); continue; } @@ -995,7 +979,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, (ret == 0) ? XEN_NETIF_RSP_OKAY : XEN_NETIF_RSP_ERROR); - push_tx_responses(queue); continue; } @@ -1007,7 +990,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, make_tx_response(queue, &txreq, extra_count, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); continue; } @@ -1433,44 +1415,17 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget) return work_done; } -static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, - u8 status) -{ - struct pending_tx_info *pending_tx_info; - pending_ring_idx_t index; - unsigned long flags; - - pending_tx_info = &queue->pending_tx_info[pending_idx]; - - spin_lock_irqsave(&queue->response_lock, flags); - - make_tx_response(queue, &pending_tx_info->req, - pending_tx_info->extra_count, status); - - /* Release the pending index before pusing the Tx response so - * its available before a new Tx request is pushed by the - * frontend. - */ - index = pending_index(queue->pending_prod++); - queue->pending_ring[index] = pending_idx; - - push_tx_responses(queue); - - spin_unlock_irqrestore(&queue->response_lock, flags); -} - - -static void make_tx_response(struct xenvif_queue *queue, - struct xen_netif_tx_request *txp, +static void _make_tx_response(struct xenvif_queue *queue, + const struct xen_netif_tx_request *txp, unsigned int extra_count, - s8 st) + s8 status) { RING_IDX i = queue->tx.rsp_prod_pvt; struct xen_netif_tx_response *resp; resp = RING_GET_RESPONSE(&queue->tx, i); resp->id = txp->id; - resp->status = st; + resp->status = status; while (extra_count-- != 0) RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL; @@ -1487,6 +1442,47 @@ static void push_tx_responses(struct xenvif_queue *queue) notify_remote_via_irq(queue->tx_irq); } +static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, + s8 status) +{ + struct pending_tx_info *pending_tx_info; + pending_ring_idx_t index; + unsigned long flags; + + pending_tx_info = &queue->pending_tx_info[pending_idx]; + + spin_lock_irqsave(&queue->response_lock, flags); + + _make_tx_response(queue, &pending_tx_info->req, + pending_tx_info->extra_count, status); + + /* Release the pending index before pusing the Tx response so + * its available before a new Tx request is pushed by the + * frontend. + */ + index = pending_index(queue->pending_prod++); + queue->pending_ring[index] = pending_idx; + + push_tx_responses(queue); + + spin_unlock_irqrestore(&queue->response_lock, flags); +} + +static void make_tx_response(struct xenvif_queue *queue, + const struct xen_netif_tx_request *txp, + unsigned int extra_count, + s8 status) +{ + unsigned long flags; + + spin_lock_irqsave(&queue->response_lock, flags); + + _make_tx_response(queue, txp, extra_count, status); + push_tx_responses(queue); + + spin_unlock_irqrestore(&queue->response_lock, flags); +} + static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) { int ret; From e0526ec5360a48ad3ab2e26e802b0532302a7e11 Mon Sep 17 00:00:00 2001 From: Souradeep Chakrabarti Date: Tue, 30 Jan 2024 23:35:51 -0800 Subject: [PATCH 722/768] hv_netvsc: Fix race condition between netvsc_probe and netvsc_remove In commit ac5047671758 ("hv_netvsc: Disable NAPI before closing the VMBus channel"), napi_disable was getting called for all channels, including all subchannels without confirming if they are enabled or not. This caused hv_netvsc getting hung at napi_disable, when netvsc_probe() has finished running but nvdev->subchan_work has not started yet. netvsc_subchan_work() -> rndis_set_subchannel() has not created the sub-channels and because of that netvsc_sc_open() is not running. netvsc_remove() calls cancel_work_sync(&nvdev->subchan_work), for which netvsc_subchan_work did not run. netif_napi_add() sets the bit NAPI_STATE_SCHED because it ensures NAPI cannot be scheduled. Then netvsc_sc_open() -> napi_enable will clear the NAPIF_STATE_SCHED bit, so it can be scheduled. napi_disable() does the opposite. Now during netvsc_device_remove(), when napi_disable is called for those subchannels, napi_disable gets stuck on infinite msleep. This fix addresses this problem by ensuring that napi_disable() is not getting called for non-enabled NAPI struct. But netif_napi_del() is still necessary for these non-enabled NAPI struct for cleanup purpose. Call trace: [ 654.559417] task:modprobe state:D stack: 0 pid: 2321 ppid: 1091 flags:0x00004002 [ 654.568030] Call Trace: [ 654.571221] [ 654.573790] __schedule+0x2d6/0x960 [ 654.577733] schedule+0x69/0xf0 [ 654.581214] schedule_timeout+0x87/0x140 [ 654.585463] ? __bpf_trace_tick_stop+0x20/0x20 [ 654.590291] msleep+0x2d/0x40 [ 654.593625] napi_disable+0x2b/0x80 [ 654.597437] netvsc_device_remove+0x8a/0x1f0 [hv_netvsc] [ 654.603935] rndis_filter_device_remove+0x194/0x1c0 [hv_netvsc] [ 654.611101] ? do_wait_intr+0xb0/0xb0 [ 654.615753] netvsc_remove+0x7c/0x120 [hv_netvsc] [ 654.621675] vmbus_remove+0x27/0x40 [hv_vmbus] Cc: stable@vger.kernel.org Fixes: ac5047671758 ("hv_netvsc: Disable NAPI before closing the VMBus channel") Signed-off-by: Souradeep Chakrabarti Reviewed-by: Dexuan Cui Reviewed-by: Haiyang Zhang Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/1706686551-28510-1-git-send-email-schakrabarti@linux.microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/netvsc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 1dafa44155d0..a6fcbda64ecc 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -708,7 +708,10 @@ void netvsc_device_remove(struct hv_device *device) /* Disable NAPI and disassociate its context from the device. */ for (i = 0; i < net_device->num_chn; i++) { /* See also vmbus_reset_channel_cb(). */ - napi_disable(&net_device->chan_table[i].napi); + /* only disable enabled NAPI channel */ + if (i < ndev->real_num_rx_queues) + napi_disable(&net_device->chan_table[i].napi); + netif_napi_del(&net_device->chan_table[i].napi); } From 7b6fb3050d8f5e2b6858eef344e47ac1f5442827 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 31 Jan 2024 09:08:44 -0500 Subject: [PATCH 723/768] selftests: team: Add missing config options Similar to commit dd2d40acdbb2 ("selftests: bonding: Add more missing config options"), add more networking-specific config options which are needed for team device tests. For testing, I used the minimal config generated by virtme-ng and I added the options in the config file. Afterwards, the team device test passed. Fixes: bbb774d921e2 ("net: Add tests for bonding and team address list management") Reviewed-by: Petr Machata Signed-off-by: Benjamin Poirier Link: https://lore.kernel.org/r/20240131140848.360618-2-bpoirier@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/team/config | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config index 265b6882cc21..b5e3a3aad4bf 100644 --- a/tools/testing/selftests/drivers/net/team/config +++ b/tools/testing/selftests/drivers/net/team/config @@ -1,3 +1,5 @@ +CONFIG_DUMMY=y +CONFIG_IPV6=y +CONFIG_MACVLAN=y CONFIG_NET_TEAM=y CONFIG_NET_TEAM_MODE_LOADBALANCE=y -CONFIG_MACVLAN=y From 8cc063ae1b3dbe416ce62a15d49af4c2314b45fe Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 31 Jan 2024 09:08:45 -0500 Subject: [PATCH 724/768] selftests: bonding: Check initial state The purpose of the test_LAG_cleanup() function is to check that some hardware addresses are removed from underlying devices after they have been unenslaved. The test function simply checks that those addresses are not present at the end. However, if the addresses were never added to begin with due to some error in device setup, the test function currently passes. This is a false positive since in that situation the test did not actually exercise the intended functionality. Add a check that the expected addresses are indeed present after device setup. This makes the test function more robust. I noticed this problem when running the team/dev_addr_lists.sh test on a system without support for dummy and ipv6: tools/testing/selftests/drivers/net/team# ./dev_addr_lists.sh Error: Unknown device type. Error: Unknown device type. This program is not intended to be run as root. RTNETLINK answers: Operation not supported TEST: team cleanup mode lacp [ OK ] Fixes: bbb774d921e2 ("net: Add tests for bonding and team address list management") Signed-off-by: Benjamin Poirier Link: https://lore.kernel.org/r/20240131140848.360618-3-bpoirier@nvidia.com Signed-off-by: Jakub Kicinski --- .../testing/selftests/drivers/net/bonding/lag_lib.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh index 2a268b17b61f..dbdd736a41d3 100644 --- a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh +++ b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh @@ -48,6 +48,17 @@ test_LAG_cleanup() ip link add mv0 link "$name" up address "$ucaddr" type macvlan # Used to test dev->mc handling ip address add "$addr6" dev "$name" + + # Check that addresses were added as expected + (grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy1 || + grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy2) >/dev/null + check_err $? "macvlan unicast address not found on a slave" + + # mcaddr is added asynchronously by addrconf_dad_work(), use busywait + (busywait 10000 grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy1 || + grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy2) >/dev/null + check_err $? "IPv6 solicited-node multicast mac address not found on a slave" + ip link set dev "$name" down ip link del "$name" From 9d851dd4dab63e95c1911a2fa847796d1ec5d58d Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 31 Jan 2024 09:08:46 -0500 Subject: [PATCH 725/768] selftests: net: Remove executable bits from library scripts setup_loopback.sh and net_helper.sh are meant to be sourced from other scripts, not executed directly. Therefore, remove the executable bits from those files' permissions. This change is similar to commit 49078c1b80b6 ("selftests: forwarding: Remove executable bits from lib.sh") Fixes: 7d1575014a63 ("selftests/net: GRO coalesce test") Fixes: 3bdd9fd29cb0 ("selftests/net: synchronize udpgro tests' tx and rx connection") Suggested-by: Paolo Abeni Signed-off-by: Benjamin Poirier Link: https://lore.kernel.org/r/20240131140848.360618-4-bpoirier@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/net_helper.sh | 0 tools/testing/selftests/net/setup_loopback.sh | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 tools/testing/selftests/net/net_helper.sh mode change 100755 => 100644 tools/testing/selftests/net/setup_loopback.sh diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh old mode 100755 new mode 100644 diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh old mode 100755 new mode 100644 From 06efafd8608dac0c3a480539acc66ee41d2fb430 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 31 Jan 2024 09:08:47 -0500 Subject: [PATCH 726/768] selftests: net: List helper scripts in TEST_FILES Makefile variable Some scripts are not tests themselves; they contain utility functions used by other tests. According to Documentation/dev-tools/kselftest.rst, such files should be listed in TEST_FILES. Move those utility scripts to TEST_FILES. Fixes: 1751eb42ddb5 ("selftests: net: use TEST_PROGS_EXTENDED") Fixes: 25ae948b4478 ("selftests/net: add lib.sh") Fixes: b99ac1841147 ("kselftests/net: add missed setup_loopback.sh/setup_veth.sh to Makefile") Fixes: f5173fe3e13b ("selftests: net: included needed helper in the install targets") Suggested-by: Petr Machata Signed-off-by: Benjamin Poirier Link: https://lore.kernel.org/r/20240131140848.360618-5-bpoirier@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 48c6f93b8149..211753756bde 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -53,9 +53,7 @@ TEST_PROGS += bind_bhash.sh TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh -TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh -TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh lib.sh -TEST_PROGS_EXTENDED += net_helper.sh +TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite @@ -97,6 +95,7 @@ TEST_PROGS += fq_band_pktlimit.sh TEST_PROGS += vlan_hw_filter.sh TEST_FILES := settings +TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh include ../lib.mk From 96cd5ac4c0e6b91b74c8fbfcaa7e5c943dfa4835 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 31 Jan 2024 09:08:48 -0500 Subject: [PATCH 727/768] selftests: forwarding: List helper scripts in TEST_FILES Makefile variable Some scripts are not tests themselves; they contain utility functions used by other tests. According to Documentation/dev-tools/kselftest.rst, such files should be listed in TEST_FILES. Currently they are incorrectly listed in TEST_PROGS_EXTENDED so rename the variable. Fixes: c085dbfb1cfc ("selftests/net/forwarding: define libs as TEST_PROGS_EXTENDED") Suggested-by: Petr Machata Signed-off-by: Benjamin Poirier Link: https://lore.kernel.org/r/20240131140848.360618-6-bpoirier@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 452693514be4..4de92632f483 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -112,7 +112,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ vxlan_symmetric_ipv6.sh \ vxlan_symmetric.sh -TEST_PROGS_EXTENDED := devlink_lib.sh \ +TEST_FILES := devlink_lib.sh \ ethtool_lib.sh \ fib_offload_lib.sh \ forwarding.config.sample \ From 1939f738c73dfdb8389839bdc9624c765e3326e6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 31 Jan 2024 08:56:05 -0800 Subject: [PATCH 728/768] selftests: net: add missing config for NF_TARGET_TTL amt test uses the TTL iptables module: ip netns exec "${RELAY}" iptables -t mangle -I PREROUTING \ -d 239.0.0.1 -j TTL --ttl-set 2 Fixes: c08e8baea78e ("selftests: add amt interface selftest script") Link: https://lore.kernel.org/r/20240131165605.4051645-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 98c6bd2228c6..24a7c7bcbbc1 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -33,6 +33,7 @@ CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_RAW=m CONFIG_IP_NF_NAT=m CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_TARGET_TTL=m CONFIG_IPV6_GRE=m CONFIG_IPV6_SEG6_LWTUNNEL=y CONFIG_L2TP_ETH=m From c15a729c9d45aa142fb01a3afee822ab1f0e62a8 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 31 Jan 2024 18:52:29 +0100 Subject: [PATCH 729/768] selftests: net: enable some more knobs The rtnetlink tests require additional options currently off by default. Fixes: 2766a11161cc ("selftests: rtnetlink: add ipsec offload API test") Fixes: 5e596ee171ba ("selftests: add xfrm state-policy-monitor to rtnetlink.sh") Signed-off-by: Paolo Abeni Link: https://lore.kernel.org/r/9048ca58e49b962f35dba1dfb2beaf3dab3e0411.1706723341.git.pabeni@redhat.com/ Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 24a7c7bcbbc1..3b749addd364 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -22,6 +22,8 @@ CONFIG_VLAN_8021Q=y CONFIG_GENEVE=m CONFIG_IFB=y CONFIG_INET_DIAG=y +CONFIG_INET_ESP=y +CONFIG_INET_ESP_OFFLOAD=y CONFIG_IP_GRE=m CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y @@ -93,3 +95,4 @@ CONFIG_IP_SCTP=m CONFIG_NETFILTER_XT_MATCH_POLICY=m CONFIG_CRYPTO_ARIA=y CONFIG_XFRM_INTERFACE=m +CONFIG_XFRM_USER=m From 1389358bb008e7625942846e9f03554319b7fecc Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Thu, 1 Feb 2024 16:13:39 +0100 Subject: [PATCH 730/768] tracing/timerlat: Move hrtimer_init to timerlat_fd open() Currently, the timerlat's hrtimer is initialized at the first read of timerlat_fd, and destroyed at close(). It works, but it causes an error if the user program open() and close() the file without reading. Here's an example: # echo NO_OSNOISE_WORKLOAD > /sys/kernel/debug/tracing/osnoise/options # echo timerlat > /sys/kernel/debug/tracing/current_tracer # cat < ./timerlat_load.py # !/usr/bin/env python3 timerlat_fd = open("/sys/kernel/tracing/osnoise/per_cpu/cpu0/timerlat_fd", 'r') timerlat_fd.close(); EOF # ./taskset -c 0 ./timerlat_load.py BUG: kernel NULL pointer dereference, address: 0000000000000010 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 1 PID: 2673 Comm: python3 Not tainted 6.6.13-200.fc39.x86_64 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-1.fc39 04/01/2014 RIP: 0010:hrtimer_active+0xd/0x50 Code: 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 0f 1f 44 00 00 48 8b 57 30 <8b> 42 10 a8 01 74 09 f3 90 8b 42 10 a8 01 75 f7 80 7f 38 00 75 1d RSP: 0018:ffffb031009b7e10 EFLAGS: 00010286 RAX: 000000000002db00 RBX: ffff9118f786db08 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff9117a0e64400 RDI: ffff9118f786db08 RBP: ffff9118f786db80 R08: ffff9117a0ddd420 R09: ffff9117804d4f70 R10: 0000000000000000 R11: 0000000000000000 R12: ffff9118f786db08 R13: ffff91178fdd5e20 R14: ffff9117840978c0 R15: 0000000000000000 FS: 00007f2ffbab1740(0000) GS:ffff9118f7840000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 00000001b402e000 CR4: 0000000000750ee0 PKRU: 55555554 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x171/0x4e0 ? srso_alias_return_thunk+0x5/0x7f ? avc_has_extended_perms+0x237/0x520 ? exc_page_fault+0x7f/0x180 ? asm_exc_page_fault+0x26/0x30 ? hrtimer_active+0xd/0x50 hrtimer_cancel+0x15/0x40 timerlat_fd_release+0x48/0xe0 __fput+0xf5/0x290 __x64_sys_close+0x3d/0x80 do_syscall_64+0x60/0x90 ? srso_alias_return_thunk+0x5/0x7f ? __x64_sys_ioctl+0x72/0xd0 ? srso_alias_return_thunk+0x5/0x7f ? syscall_exit_to_user_mode+0x2b/0x40 ? srso_alias_return_thunk+0x5/0x7f ? do_syscall_64+0x6c/0x90 ? srso_alias_return_thunk+0x5/0x7f ? exit_to_user_mode_prepare+0x142/0x1f0 ? srso_alias_return_thunk+0x5/0x7f ? syscall_exit_to_user_mode+0x2b/0x40 ? srso_alias_return_thunk+0x5/0x7f ? do_syscall_64+0x6c/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 RIP: 0033:0x7f2ffb321594 Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 80 3d d5 cd 0d 00 00 74 13 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 3c c3 0f 1f 00 55 48 89 e5 48 83 ec 10 89 7d RSP: 002b:00007ffe8d8eef18 EFLAGS: 00000202 ORIG_RAX: 0000000000000003 RAX: ffffffffffffffda RBX: 00007f2ffba4e668 RCX: 00007f2ffb321594 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003 RBP: 00007ffe8d8eef40 R08: 0000000000000000 R09: 0000000000000000 R10: 55c926e3167eae79 R11: 0000000000000202 R12: 0000000000000003 R13: 00007ffe8d8ef030 R14: 0000000000000000 R15: 00007f2ffba4e668 CR2: 0000000000000010 ---[ end trace 0000000000000000 ]--- Move hrtimer_init to timerlat_fd open() to avoid this problem. Link: https://lore.kernel.org/linux-trace-kernel/7324dd3fc0035658c99b825204a66049389c56e3.1706798888.git.bristot@kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: stable@vger.kernel.org Fixes: e88ed227f639 ("tracing/timerlat: Add user-space interface") Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_osnoise.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index bd0d01d00fb9..a8e28f9b9271 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -2444,6 +2444,9 @@ static int timerlat_fd_open(struct inode *inode, struct file *file) tlat = this_cpu_tmr_var(); tlat->count = 0; + hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); + tlat->timer.function = timerlat_irq; + migrate_enable(); return 0; }; @@ -2526,9 +2529,6 @@ timerlat_fd_read(struct file *file, char __user *ubuf, size_t count, tlat->tracing_thread = false; tlat->kthread = current; - hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); - tlat->timer.function = timerlat_irq; - /* Annotate now to drift new period */ tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); From 5a49f996046ba947466bc7461e4b19c4d1daf978 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 1 Feb 2024 10:34:47 -0500 Subject: [PATCH 731/768] eventfs: Warn if an eventfs_inode is freed without is_freed being set There should never be a case where an evenfs_inode is being freed without is_freed being set. Add a WARN_ON_ONCE() if it ever happens. That would mean there was one too many put_ei()s. Link: https://lore.kernel.org/linux-trace-kernel/20240201161616.843551963@goodmis.org Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Al Viro Cc: Ajay Kaher Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 515fdace1eea..ca7daee7c811 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -73,6 +73,9 @@ enum { static void release_ei(struct kref *ref) { struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref); + + WARN_ON_ONCE(!ei->is_freed); + kfree(ei->entry_attrs); kfree_const(ei->name); kfree_rcu(ei, rcu); @@ -84,6 +87,14 @@ static inline void put_ei(struct eventfs_inode *ei) kref_put(&ei->kref, release_ei); } +static inline void free_ei(struct eventfs_inode *ei) +{ + if (ei) { + ei->is_freed = 1; + put_ei(ei); + } +} + static inline struct eventfs_inode *get_ei(struct eventfs_inode *ei) { if (ei) @@ -679,7 +690,7 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode /* Was the parent freed? */ if (list_empty(&ei->list)) { - put_ei(ei); + free_ei(ei); ei = NULL; } return ei; @@ -770,7 +781,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry return ei; fail: - put_ei(ei); + free_ei(ei); tracefs_failed_creating(dentry); return ERR_PTR(-ENOMEM); } @@ -801,9 +812,8 @@ static void eventfs_remove_rec(struct eventfs_inode *ei, int level) list_for_each_entry(ei_child, &ei->children, list) eventfs_remove_rec(ei_child, level + 1); - ei->is_freed = 1; list_del(&ei->list); - put_ei(ei); + free_ei(ei); } /** From 264424dfdd5cbd92bc5b5ddf93944929fc877fac Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 1 Feb 2024 10:34:48 -0500 Subject: [PATCH 732/768] eventfs: Restructure eventfs_inode structure to be more condensed Some of the eventfs_inode structure has holes in it. Rework the structure to be a bit more condensed, and also remove the no longer used llist field. Link: https://lore.kernel.org/linux-trace-kernel/20240201161617.002321438@goodmis.org Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Al Viro Cc: Ajay Kaher Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/internal.h | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 1886f1826cd8..beb3dcd0e434 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -32,40 +32,37 @@ struct eventfs_attr { /* * struct eventfs_inode - hold the properties of the eventfs directories. * @list: link list into the parent directory + * @rcu: Union with @list for freeing + * @children: link list into the child eventfs_inode * @entries: the array of entries representing the files in the directory * @name: the name of the directory to create - * @children: link list into the child eventfs_inode * @events_dir: the dentry of the events directory * @entry_attrs: Saved mode and ownership of the @d_children - * @attr: Saved mode and ownership of eventfs_inode itself * @data: The private data to pass to the callbacks + * @attr: Saved mode and ownership of eventfs_inode itself * @is_freed: Flag set if the eventfs is on its way to be freed * Note if is_freed is set, then dentry is corrupted. + * @is_events: Flag set for only the top level "events" directory * @nr_entries: The number of items in @entries + * @ino: The saved inode number */ struct eventfs_inode { - struct kref kref; - struct list_head list; + union { + struct list_head list; + struct rcu_head rcu; + }; + struct list_head children; const struct eventfs_entry *entries; const char *name; - struct list_head children; struct dentry *events_dir; struct eventfs_attr *entry_attrs; - struct eventfs_attr attr; void *data; + struct eventfs_attr attr; + struct kref kref; unsigned int is_freed:1; unsigned int is_events:1; unsigned int nr_entries:30; unsigned int ino; - /* - * Union - used for deletion - * @llist: for calling dput() if needed after RCU - * @rcu: eventfs_inode to delete in RCU - */ - union { - struct llist_node llist; - struct rcu_head rcu; - }; }; static inline struct tracefs_inode *get_tracefs(const struct inode *inode) From 12d823b31fadf47c8f36ecada7abac5f903cac33 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 1 Feb 2024 10:34:49 -0500 Subject: [PATCH 733/768] eventfs: Remove fsnotify*() functions from lookup() The dentries and inodes are created when referenced in the lookup code. There's no reason to call fsnotify_*() functions when they are created by a reference. It doesn't make any sense. Link: https://lore.kernel.org/linux-trace-kernel/20240201002719.GS2087318@ZenIV/ Link: https://lore.kernel.org/linux-trace-kernel/20240201161617.166973329@goodmis.org Cc: stable@vger.kernel.org Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Al Viro Cc: Ajay Kaher Fixes: a376007917776 ("eventfs: Implement functions to create files and dirs when accessed"); Suggested-by: Al Viro Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index ca7daee7c811..9e031e5a2713 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -366,7 +366,6 @@ static struct dentry *lookup_file(struct eventfs_inode *parent_ei, dentry->d_fsdata = get_ei(parent_ei); d_add(dentry, inode); - fsnotify_create(dentry->d_parent->d_inode, dentry); return NULL; }; @@ -408,7 +407,6 @@ static struct dentry *lookup_dir_entry(struct dentry *dentry, inc_nlink(inode); d_add(dentry, inode); inc_nlink(dentry->d_parent->d_inode); - fsnotify_mkdir(dentry->d_parent->d_inode, dentry); return NULL; } From ca185770db914869ff9fe773bac5e0e5e4165b83 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 1 Feb 2024 10:34:50 -0500 Subject: [PATCH 734/768] eventfs: Keep all directory links at 1 The directory link count in eventfs was somewhat bogus. It was only being updated when a directory child was being looked up and not on creation. One solution would be to update in get_attr() the link count by iterating the ei->children list and then adding 2. But that could slow down simple stat() calls, especially if it's done on all directories in eventfs. Another solution would be to add a parent pointer to the eventfs_inode and keep track of the number of sub directories it has on creation. But this adds overhead for something not really worthwhile. The solution decided upon is to keep all directory links in eventfs as 1. This tells user space not to rely on the hard links of directories. Which in this case it shouldn't. Link: https://lore.kernel.org/linux-trace-kernel/20240201002719.GS2087318@ZenIV/ Link: https://lore.kernel.org/linux-trace-kernel/20240201161617.339968298@goodmis.org Cc: stable@vger.kernel.org Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Al Viro Cc: Ajay Kaher Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions") Suggested-by: Al Viro Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 9e031e5a2713..110e8a272189 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -404,9 +404,7 @@ static struct dentry *lookup_dir_entry(struct dentry *dentry, dentry->d_fsdata = get_ei(ei); - inc_nlink(inode); d_add(dentry, inode); - inc_nlink(dentry->d_parent->d_inode); return NULL; } @@ -769,9 +767,17 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry dentry->d_fsdata = get_ei(ei); - /* directory inodes start off with i_nlink == 2 (for "." entry) */ - inc_nlink(inode); + /* + * Keep all eventfs directories with i_nlink == 1. + * Due to the dynamic nature of the dentry creations and not + * wanting to add a pointer to the parent eventfs_inode in the + * eventfs_inode structure, keeping the i_nlink in sync with the + * number of directories would cause too much complexity for + * something not worth much. Keeping directory links at 1 + * tells userspace not to trust the link number. + */ d_instantiate(dentry, inode); + /* The dentry of the "events" parent does keep track though */ inc_nlink(dentry->d_parent->d_inode); fsnotify_mkdir(dentry->d_parent->d_inode, dentry); tracefs_end_creating(dentry); From b6c620dc43ccb4e802894e54b651cf81495e9598 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 31 Jan 2024 22:49:46 +0100 Subject: [PATCH 735/768] mptcp: fix data re-injection from stale subflow When the MPTCP PM detects that a subflow is stale, all the packet scheduler must re-inject all the mptcp-level unacked data. To avoid acquiring unneeded locks, it first try to check if any unacked data is present at all in the RTX queue, but such check is currently broken, as it uses TCP-specific helper on an MPTCP socket. Funnily enough fuzzers and static checkers are happy, as the accessed memory still belongs to the mptcp_sock struct, and even from a functional perspective the recovery completed successfully, as the short-cut test always failed. A recent unrelated TCP change - commit d5fed5addb2b ("tcp: reorganize tcp_sock fast path variables") - exposed the issue, as the tcp field reorganization makes the mptcp code always skip the re-inection. Fix the issue dropping the bogus call: we are on a slow path, the early optimization proved once again to be evil. Fixes: 1e1d9d6f119c ("mptcp: handle pending data on closed subflow") Cc: stable@vger.kernel.org Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/468 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-1-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3ed4709a7509..028e8b473626 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2314,9 +2314,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk) if (__mptcp_check_fallback(msk)) return false; - if (tcp_rtx_and_write_queues_empty(sk)) - return false; - /* the closing socket has some data untransmitted and/or unacked: * some data in the mptcp rtx queue has not really xmitted yet. * keep it simple and re-inject the whole mptcp level rtx queue From 3645c844902bd4e173d6704fc2a37e8746904d67 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:47 +0100 Subject: [PATCH 736/768] selftests: mptcp: add missing kconfig for NF Filter Since the commit mentioned below, 'mptcp_join' selftests is using IPTables to add rules to the Filter table. It is then required to have IP_NF_FILTER KConfig. This KConfig is usually enabled by default in many defconfig, but we recently noticed that some CI were running our selftests without them enabled. Fixes: 8d014eaa9254 ("selftests: mptcp: add ADD_ADDR timeout test case") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index e317c2e44dae..2a00bf4acdfa 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -22,6 +22,7 @@ CONFIG_NFT_TPROXY=m CONFIG_NFT_SOCKET=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_ACT_CSUM=m From 8c86fad2cecdc6bf7283ecd298b4d0555bd8b8aa Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:48 +0100 Subject: [PATCH 737/768] selftests: mptcp: add missing kconfig for NF Filter in v6 Since the commit mentioned below, 'mptcp_join' selftests is using IPTables to add rules to the Filter table for IPv6. It is then required to have IP6_NF_FILTER KConfig. This KConfig is usually enabled by default in many defconfig, but we recently noticed that some CI were running our selftests without them enabled. Fixes: 523514ed0a99 ("selftests: mptcp: add ADD_ADDR IPv6 test cases") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-3-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 2a00bf4acdfa..26fe466f803d 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -25,6 +25,7 @@ CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IP6_NF_FILTER=m CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_CLS_ACT=y From 2d41f10fa497182df9012d3e95d9cea24eb42e61 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:49 +0100 Subject: [PATCH 738/768] selftests: mptcp: add missing kconfig for NF Mangle Since the commit mentioned below, 'mptcp_join' selftests is using IPTables to add rules to the Mangle table, only in IPv4. This KConfig is usually enabled by default in many defconfig, but we recently noticed that some CI were running our selftests without them enabled. Fixes: b6e074e171bc ("selftests: mptcp: add infinite map testcase") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-4-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 26fe466f803d..4f80014cae49 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -23,6 +23,7 @@ CONFIG_NFT_SOCKET=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IP6_NF_FILTER=m From 4d4dfb2019d7010efb65926d9d1c1793f9a367c6 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:50 +0100 Subject: [PATCH 739/768] selftests: mptcp: increase timeout to 30 min On very slow environments -- e.g. when QEmu is used without KVM --, mptcp_join.sh selftest can take a bit more than 20 minutes. Bump the default timeout by 50% as it seems normal to take that long on some environments. When a debug kernel config is used, this selftest will take even longer, but that's certainly not a common test env to consider for the timeout. The Fixes tag that has been picked here is there simply to help having this patch backported to older stable versions. It is difficult to point to the exact commit that made some env reaching the timeout from time to time. Fixes: d17b968b9876 ("selftests: mptcp: increase timeout to 20 minutes") Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-5-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/settings | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings index 79b65bdf05db..abc5648b59ab 100644 --- a/tools/testing/selftests/net/mptcp/settings +++ b/tools/testing/selftests/net/mptcp/settings @@ -1 +1 @@ -timeout=1200 +timeout=1800 From 5e2f3c65af47e527ccac54060cf909e3306652ff Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:51 +0100 Subject: [PATCH 740/768] selftests: mptcp: decrease BW in simult flows When running the simult_flow selftest in slow environments -- e.g. QEmu without KVM support --, the results can be unstable. This selftest checks if the aggregated bandwidth is (almost) fully used as expected. To help improving the stability while still keeping the same validation in place, the BW and the delay are reduced to lower the pressure on the CPU. Fixes: 1a418cb8e888 ("mptcp: simult flow self-tests") Fixes: 219d04992b68 ("mptcp: push pending frames when subflow has free space") Cc: stable@vger.kernel.org Suggested-by: Paolo Abeni Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-6-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/simult_flows.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index ae8ad5d6fb9d..0cc964e6f2c1 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -284,12 +284,12 @@ done setup run_test 10 10 0 0 "balanced bwidth" -run_test 10 10 1 50 "balanced bwidth with unbalanced delay" +run_test 10 10 1 25 "balanced bwidth with unbalanced delay" # we still need some additional infrastructure to pass the following test-cases -run_test 30 10 0 0 "unbalanced bwidth" -run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" -run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" +run_test 10 3 0 0 "unbalanced bwidth" +run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay" +run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay" mptcp_lib_result_print_all_tap exit $ret From de46d138e7735eded9756906747fd3a8c3a42225 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:52 +0100 Subject: [PATCH 741/768] selftests: mptcp: allow changing subtests prefix If a CI executes the same selftest multiple times with different options, all results from the same subtests will have the same title, which confuse the CI. With the same title printed in TAP, the tests are considered as the same ones. Now, it is possible to override this prefix by using MPTCP_LIB_KSFT_TEST env var, and have a different title. While at it, use 'basename' to remove the suffix as well instead of using an extra 'sed'. Fixes: c4192967e62f ("selftests: mptcp: lib: format subtests results in TAP") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-7-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 022262a2cfe0..3a2abae5993e 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -6,7 +6,7 @@ readonly KSFT_FAIL=1 readonly KSFT_SKIP=4 # shellcheck disable=SC2155 # declare and assign separately -readonly KSFT_TEST=$(basename "${0}" | sed 's/\.sh$//g') +readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}" MPTCP_LIB_SUBTESTS=() From 31ee4ad86afd6ed6f4bb1b38c43011216080c42a Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:53 +0100 Subject: [PATCH 742/768] selftests: mptcp: join: stop transfer when check is done (part 1) Since the "Fixes" commit mentioned below, "userspace pm" subtests of mptcp_join selftests introduced in v6.5 are launching the whole transfer in the background, do the required checks, then wait for the end of transfer. There is no need to wait longer, especially because the checks at the end of the transfer are ignored (which is fine). This saves quite a few seconds in slow environments. Note that old versions will need commit bdbef0a6ff10 ("selftests: mptcp: add mptcp_lib_kill_wait") as well to get 'mptcp_lib_kill_wait()' helper. Fixes: 4369c198e599 ("selftests: mptcp: test userspace pm out of transfer") Cc: stable@vger.kernel.org # 6.5.x: bdbef0a6ff10: selftests: mptcp: add mptcp_lib_kill_wait Cc: stable@vger.kernel.org # 6.5.x Reviewed-and-tested-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-8-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 3a5b63026191..85bcc95f4ede 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3453,7 +3453,7 @@ userspace_tests() chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi # userspace pm create destroy subflow @@ -3475,7 +3475,7 @@ userspace_tests() chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi # userspace pm create id 0 subflow From 04b57c9e096a9479fe0ad31e3956e336fa589cb2 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:54 +0100 Subject: [PATCH 743/768] selftests: mptcp: join: stop transfer when check is done (part 2) Since the "Fixes" commits mentioned below, the newly added "userspace pm" subtests of mptcp_join selftests are launching the whole transfer in the background, do the required checks, then wait for the end of transfer. There is no need to wait longer, especially because the checks at the end of the transfer are ignored (which is fine). This saves quite a few seconds on slow environments. While at it, use 'mptcp_lib_kill_wait()' helper everywhere, instead of on a specific one with 'kill_tests_wait()'. Fixes: b2e2248f365a ("selftests: mptcp: userspace pm create id 0 subflow") Fixes: e3b47e460b4b ("selftests: mptcp: userspace pm remove initial subflow") Fixes: b9fb176081fb ("selftests: mptcp: userspace pm send RM_ADDR for ID 0") Cc: stable@vger.kernel.org Reviewed-and-tested-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-9-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 85bcc95f4ede..c07386e21e0a 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -643,13 +643,6 @@ kill_events_pids() mptcp_lib_kill_wait $evts_ns2_pid } -kill_tests_wait() -{ - #shellcheck disable=SC2046 - kill -SIGUSR1 $(ip netns pids $ns2) $(ip netns pids $ns1) - wait -} - pm_nl_set_limits() { local ns=$1 @@ -3494,7 +3487,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi # userspace pm remove initial subflow @@ -3518,7 +3511,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 1 1 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi # userspace pm send RM_ADDR for ID 0 @@ -3544,7 +3537,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 1 1 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi } @@ -3558,7 +3551,8 @@ endpoint_tests() pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal speed=slow \ - run_tests $ns1 $ns2 10.0.1.1 2>/dev/null & + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! wait_mpj $ns1 pm_nl_check_endpoint "creation" \ @@ -3573,7 +3567,7 @@ endpoint_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags signal pm_nl_check_endpoint "modif is allowed" \ $ns2 10.0.2.2 id 1 flags signal - kill_tests_wait + mptcp_lib_kill_wait $tests_pid fi if reset "delete and re-add" && @@ -3582,7 +3576,8 @@ endpoint_tests() pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow test_linkfail=4 speed=20 \ - run_tests $ns1 $ns2 10.0.1.1 2>/dev/null & + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! wait_mpj $ns2 chk_subflow_nr "before delete" 2 @@ -3597,7 +3592,7 @@ endpoint_tests() wait_mpj $ns2 chk_subflow_nr "after re-add" 2 chk_mptcp_info subflows 1 subflows 1 - kill_tests_wait + mptcp_lib_kill_wait $tests_pid fi } From f0588b157f48b9c6277a75c9f14650e86d969e03 Mon Sep 17 00:00:00 2001 From: Pavan Kumar Linga Date: Wed, 31 Jan 2024 14:22:40 -0800 Subject: [PATCH 744/768] idpf: avoid compiler padding in virtchnl2_ptype struct In the arm random config file, kconfig option 'CONFIG_AEABI' is disabled which results in adding the compiler flag '-mabi=apcs-gnu'. This causes the compiler to add padding in virtchnl2_ptype structure to align it to 8 bytes, resulting in the following size check failure: include/linux/build_bug.h:78:41: error: static assertion failed: "(6) == sizeof(struct virtchnl2_ptype)" 78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) | ^~~~~~~~~~~~~~ include/linux/build_bug.h:77:34: note: in expansion of macro '__static_assert' 77 | #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr) | ^~~~~~~~~~~~~~~ drivers/net/ethernet/intel/idpf/virtchnl2.h:26:9: note: in expansion of macro 'static_assert' 26 | static_assert((n) == sizeof(struct X)) | ^~~~~~~~~~~~~ drivers/net/ethernet/intel/idpf/virtchnl2.h:982:1: note: in expansion of macro 'VIRTCHNL2_CHECK_STRUCT_LEN' 982 | VIRTCHNL2_CHECK_STRUCT_LEN(6, virtchnl2_ptype); | ^~~~~~~~~~~~~~~~~~~~~~~~~~ Avoid the compiler padding by using "__packed" structure attribute for the virtchnl2_ptype struct. Also align the structure by using "__aligned(2)" for better code optimization. Fixes: 0d7502a9b4a7 ("virtchnl: add virtchnl version 2 ops") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312220250.ufEm8doQ-lkp@intel.com Reviewed-by: Przemek Kitszel Reviewed-by: Paul Menzel Reviewed-by: Simon Horman Signed-off-by: Pavan Kumar Linga Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240131222241.2087516-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/virtchnl2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/idpf/virtchnl2.h b/drivers/net/ethernet/intel/idpf/virtchnl2.h index 8dc837889723..4a3c4454d25a 100644 --- a/drivers/net/ethernet/intel/idpf/virtchnl2.h +++ b/drivers/net/ethernet/intel/idpf/virtchnl2.h @@ -978,7 +978,7 @@ struct virtchnl2_ptype { u8 proto_id_count; __le16 pad; __le16 proto_id[]; -}; +} __packed __aligned(2); VIRTCHNL2_CHECK_STRUCT_LEN(6, virtchnl2_ptype); /** From 069a6ed2992df8eaae90d69d7770de8d545327d9 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Thu, 1 Feb 2024 11:38:53 +0000 Subject: [PATCH 745/768] doc/netlink/specs: Add missing attr in rt_link spec IFLA_DPLL_PIN was added to rt_link messages but not to the spec, which breaks ynl. Add the missing definitions to the rt_link ynl spec. Fixes: 5f1842692880 ("netdev: expose DPLL pin handle for netdevice") Signed-off-by: Donald Hunter Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240201113853.37432-1-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_link.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml index 1ad01d52a863..8e4d19adee8c 100644 --- a/Documentation/netlink/specs/rt_link.yaml +++ b/Documentation/netlink/specs/rt_link.yaml @@ -942,6 +942,10 @@ attribute-sets: - name: gro-ipv4-max-size type: u32 + - + name: dpll-pin + type: nest + nested-attributes: link-dpll-pin-attrs - name: af-spec-attrs attributes: @@ -1627,6 +1631,12 @@ attribute-sets: - name: used type: u8 + - + name: link-dpll-pin-attrs + attributes: + - + name: id + type: u32 sub-messages: - From f3c89983cb4fc00be64eb0d5cbcfcdf2cacb965e Mon Sep 17 00:00:00 2001 From: Hongyu Jin Date: Tue, 30 Jan 2024 15:26:34 -0500 Subject: [PATCH 746/768] block: Fix where bio IO priority gets set Commit 82b74cac2849 ("blk-ioprio: Convert from rqos policy to direct call") pushed setting bio I/O priority down into blk_mq_submit_bio() -- which is too low within block core's submit_bio() because it skips setting I/O priority for block drivers that implement fops->submit_bio() (e.g. DM, MD, etc). Fix this by moving bio_set_ioprio() up from blk-mq.c to blk-core.c and call it from submit_bio(). This ensures all block drivers call bio_set_ioprio() during initial bio submission. Fixes: a78418e6a04c ("block: Always initialize bio IO priority on submit") Co-developed-by: Yibin Ding Signed-off-by: Yibin Ding Signed-off-by: Hongyu Jin Reviewed-by: Eric Biggers Reviewed-by: Mikulas Patocka [snitzer: revised commit header] Signed-off-by: Mike Snitzer Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20240130202638.62600-2-snitzer@kernel.org Signed-off-by: Jens Axboe --- block/blk-core.c | 10 ++++++++++ block/blk-mq.c | 10 ---------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 11342af420d0..de771093b526 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -49,6 +49,7 @@ #include "blk-pm.h" #include "blk-cgroup.h" #include "blk-throttle.h" +#include "blk-ioprio.h" struct dentry *blk_debugfs_root; @@ -833,6 +834,14 @@ end_io: } EXPORT_SYMBOL(submit_bio_noacct); +static void bio_set_ioprio(struct bio *bio) +{ + /* Nobody set ioprio so far? Initialize it based on task's nice value */ + if (IOPRIO_PRIO_CLASS(bio->bi_ioprio) == IOPRIO_CLASS_NONE) + bio->bi_ioprio = get_current_ioprio(); + blkcg_set_ioprio(bio); +} + /** * submit_bio - submit a bio to the block device layer for I/O * @bio: The &struct bio which describes the I/O @@ -855,6 +864,7 @@ void submit_bio(struct bio *bio) count_vm_events(PGPGOUT, bio_sectors(bio)); } + bio_set_ioprio(bio); submit_bio_noacct(bio); } EXPORT_SYMBOL(submit_bio); diff --git a/block/blk-mq.c b/block/blk-mq.c index aa87fcfda1ec..2dc01551e27c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -40,7 +40,6 @@ #include "blk-stat.h" #include "blk-mq-sched.h" #include "blk-rq-qos.h" -#include "blk-ioprio.h" static DEFINE_PER_CPU(struct llist_head, blk_cpu_done); static DEFINE_PER_CPU(call_single_data_t, blk_cpu_csd); @@ -2944,14 +2943,6 @@ static bool blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug, return true; } -static void bio_set_ioprio(struct bio *bio) -{ - /* Nobody set ioprio so far? Initialize it based on task's nice value */ - if (IOPRIO_PRIO_CLASS(bio->bi_ioprio) == IOPRIO_CLASS_NONE) - bio->bi_ioprio = get_current_ioprio(); - blkcg_set_ioprio(bio); -} - /** * blk_mq_submit_bio - Create and send a request to block device. * @bio: Bio pointer. @@ -2976,7 +2967,6 @@ void blk_mq_submit_bio(struct bio *bio) blk_status_t ret; bio = blk_queue_bounce(bio, q); - bio_set_ioprio(bio); if (plug) { rq = rq_list_peek(&plug->cached_rq); From e77e15fa5eb1c830597c5ca53ea7af973bae2f78 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Thu, 1 Feb 2024 11:15:26 +0000 Subject: [PATCH 747/768] cifs: avoid redundant calls to disable multichannel When the server reports query network interface info call as unsupported following a tree connect, it means that multichannel is unsupported, even if the server capabilities report otherwise. When this happens, cifs_chan_skip_or_disable is called to disable multichannel on the client. However, we only need to call this when multichannel is currently setup. Fixes: f591062bdbf4 ("cifs: handle servers that still advertise multichannel after disabling") Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 6db54c6ef571..e257b10bc820 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -419,7 +419,7 @@ skip_sess_setup: rc = SMB3_request_interfaces(xid, tcon, false); free_xid(xid); - if (rc == -EOPNOTSUPP) { + if (rc == -EOPNOTSUPP && ses->chan_count > 1) { /* * some servers like Azure SMB server do not advertise * that multichannel has been disabled with server From 88675b22d34e6e815ad4bde09c590ccb2d50c59d Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Thu, 1 Feb 2024 11:15:28 +0000 Subject: [PATCH 748/768] cifs: do not search for channel if server is terminating In order to scale down the channels, the following sequence of operations happen: 1. server struct is marked for terminate 2. the channel is deallocated in the ses->chans array 3. at a later point the cifsd thread actually terminates the server Between 2 and 3, there can be calls to find the channel for a server struct. When that happens, there can be an ugly warning that's logged. But this is expected. So this change does two things: 1. in cifs_ses_get_chan_index, if server->terminate is set, return 2. always make sure server->terminate is set with chan_lock held Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/sess.c | 4 ++++ fs/smb/client/smb2pdu.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index cde81042bebd..3d2548c35c9d 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -75,6 +75,10 @@ cifs_ses_get_chan_index(struct cifs_ses *ses, { unsigned int i; + /* if the channel is waiting for termination */ + if (server->terminate) + return CIFS_INVAL_CHAN_INDEX; + for (i = 0; i < ses->chan_count; i++) { if (ses->chans[i].server == server) return i; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index e257b10bc820..c58fa44dd6b0 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -178,6 +178,7 @@ cifs_chan_skip_or_disable(struct cifs_ses *ses, } ses->chans[chan_index].server = NULL; + server->terminate = true; spin_unlock(&ses->chan_lock); /* @@ -188,7 +189,6 @@ cifs_chan_skip_or_disable(struct cifs_ses *ses, */ cifs_put_tcp_session(server, from_reconnect); - server->terminate = true; cifs_signal_cifsd_for_reconnect(server, false); /* mark primary server as needing reconnect */ From 6aac002bcfd554aff6d3ebb55e1660d078d70ab0 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Thu, 1 Feb 2024 11:15:29 +0000 Subject: [PATCH 749/768] cifs: failure to add channel on iface should bump up weight After the interface selection policy change to do a weighted round robin, each iface maintains a weight_fulfilled. When the weight_fulfilled reaches the total weight for the iface, we know that the weights can be reset and ifaces can be allocated from scratch again. During channel allocation failures on a particular channel, weight_fulfilled is not incremented. If a few interfaces are inactive, we could end up in a situation where the active interfaces are all allocated for the total_weight, and inactive ones are all that remain. This can cause a situation where no more channels can be allocated further. This change fixes it by increasing weight_fulfilled, even when channel allocation failure happens. This could mean that if there are temporary failures in channel allocation, the iface weights may not strictly be adhered to. But that's still okay. Fixes: a6d8fb54a515 ("cifs: distribute channels across interfaces based on speed") Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/sess.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 3d2548c35c9d..ed4bd88dd528 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -273,6 +273,8 @@ int cifs_try_adding_channels(struct cifs_ses *ses) &iface->sockaddr, rc); kref_put(&iface->refcount, release_iface); + /* failure to add chan should increase weight */ + iface->weight_fulfilled++; continue; } From 11d4d1dba3315f73d2d1d386f5bf4811a8241d45 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 29 Jan 2024 21:04:44 -0300 Subject: [PATCH 750/768] smb: client: increase number of PDUs allowed in a compound request With the introduction of SMB2_OP_QUERY_WSL_EA, the client may now send 5 commands in a single compound request in order to query xattrs from potential WSL reparse points, which should be fine as we currently allow up to 5 PDUs in a single compound request. However, if encryption is enabled (e.g. 'seal' mount option) or enforced by the server, current MAX_COMPOUND(5) won't be enough as we require an extra PDU for the transform header. Fix this by increasing MAX_COMPOUND to 7 and, while we're at it, add an WARN_ON_ONCE() and return -EIO instead of -ENOMEM in case we attempt to send a compound request that couldn't include the extra transform header. Signed-off-by: Paulo Alcantara Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 2 +- fs/smb/client/transport.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 9093c507042f..c86a72c9d9ec 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -87,7 +87,7 @@ #define SMB_INTERFACE_POLL_INTERVAL 600 /* maximum number of PDUs in one compound */ -#define MAX_COMPOUND 5 +#define MAX_COMPOUND 7 /* * Default number of credits to keep available for SMB3. diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index e00278fcfa4f..994d70193432 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -435,8 +435,8 @@ smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, if (!(flags & CIFS_TRANSFORM_REQ)) return __smb_send_rqst(server, num_rqst, rqst); - if (num_rqst > MAX_COMPOUND - 1) - return -ENOMEM; + if (WARN_ON_ONCE(num_rqst > MAX_COMPOUND - 1)) + return -EIO; if (!server->ops->init_transform_rq) { cifs_server_dbg(VFS, "Encryption requested but transform callback is missing\n"); From 021533194476035883300d60fbb3136426ac8ea5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 1 Feb 2024 14:57:17 -0800 Subject: [PATCH 751/768] Kconfig: Disable -Wstringop-overflow for GCC globally It turns out it was never just gcc-11 that was broken. Apparently it just happens to work on x86-64 with other gcc versions. On arm64, I see warnings with gcc version 13.2.1, and the kernel test robot reports the same problem on s390 with gcc 13.2.0. Admittedly it seems to be just the new Xe drm driver, but this is keeping me from doing my normal arm64 build testing. So it gets reverted until somebody figures out what causes the problem (and why it doesn't show on x86-64, which is what makes me suspect it was never just about gcc-11, and more about just random happenstance). This also changes the Kconfig naming a bit - just make the "disable this for GCC" conditional be one simple Kconfig entry, and we can put the gcc version dependencies in that entry once we figure out what the correct rules are. The version dependency _may_ still end up being "gcc version larger than 11" if the issue is purely in the Xe driver, but even if that ends up the case, let's make that all part of the "GCC_NO_STRINGOP_OVERFLOW" logic. For now, we just disable it for all gcc versions while the exact cause is unknown. Link: https://lore.kernel.org/all/202401161031.hjGJHMiJ-lkp@intel.com/T/ Cc: Gustavo A. R. Silva Cc: Kees Cook Signed-off-by: Linus Torvalds --- init/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 8d4e836e1b6b..deda3d14135b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -876,13 +876,13 @@ config CC_NO_ARRAY_BOUNDS bool default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC11_NO_ARRAY_BOUNDS -# Currently, disable -Wstringop-overflow for GCC 11, globally. -config GCC11_NO_STRINGOP_OVERFLOW +# Currently, disable -Wstringop-overflow for GCC globally. +config GCC_NO_STRINGOP_OVERFLOW def_bool y config CC_NO_STRINGOP_OVERFLOW bool - default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC_VERSION < 120000 && GCC11_NO_STRINGOP_OVERFLOW + default y if CC_IS_GCC && GCC_NO_STRINGOP_OVERFLOW config CC_STRINGOP_OVERFLOW bool From 3fcc2b887a1ba4c1f45319cd8c54daa263ecbc36 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 27 Jan 2024 09:58:00 +0800 Subject: [PATCH 752/768] ext4: refactor ext4_da_map_blocks() Refactor and cleanup ext4_da_map_blocks(), reduce some unnecessary parameters and branches, no logic changes. Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240127015825.1608160-2-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4cae8698f70c..bbd5ee6dd3f3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1704,7 +1704,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { if (ext4_es_is_hole(&es)) { - retval = 0; down_read(&EXT4_I(inode)->i_data_sem); goto add_delayed; } @@ -1749,26 +1748,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, retval = ext4_ext_map_blocks(NULL, inode, map, 0); else retval = ext4_ind_map_blocks(NULL, inode, map, 0); - -add_delayed: - if (retval == 0) { - int ret; - - /* - * XXX: __block_prepare_write() unmaps passed block, - * is it OK? - */ - - ret = ext4_insert_delayed_block(inode, map->m_lblk); - if (ret != 0) { - retval = ret; - goto out_unlock; - } - - map_bh(bh, inode->i_sb, invalid_block); - set_buffer_new(bh); - set_buffer_delay(bh); - } else if (retval > 0) { + if (retval < 0) + goto out_unlock; + if (retval > 0) { unsigned int status; if (unlikely(retval != map->m_len)) { @@ -1783,11 +1765,24 @@ add_delayed: EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; ext4_es_insert_extent(inode, map->m_lblk, map->m_len, map->m_pblk, status); + goto out_unlock; } +add_delayed: + /* + * XXX: __block_prepare_write() unmaps passed block, + * is it OK? + */ + retval = ext4_insert_delayed_block(inode, map->m_lblk); + if (retval) + goto out_unlock; + + map_bh(bh, inode->i_sb, invalid_block); + set_buffer_new(bh); + set_buffer_delay(bh); + out_unlock: up_read((&EXT4_I(inode)->i_data_sem)); - return retval; } From acf795dc161f3cf481db20f05db4250714e375e5 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 27 Jan 2024 09:58:01 +0800 Subject: [PATCH 753/768] ext4: convert to exclusive lock while inserting delalloc extents ext4_da_map_blocks() only hold i_data_sem in shared mode and i_rwsem when inserting delalloc extents, it could be raced by another querying path of ext4_map_blocks() without i_rwsem, .e.g buffered read path. Suppose we buffered read a file containing just a hole, and without any cached extents tree, then it is raced by another delayed buffered write to the same area or the near area belongs to the same hole, and the new delalloc extent could be overwritten to a hole extent. pread() pwrite() filemap_read_folio() ext4_mpage_readpages() ext4_map_blocks() down_read(i_data_sem) ext4_ext_determine_hole() //find hole ext4_ext_put_gap_in_cache() ext4_es_find_extent_range() //no delalloc extent ext4_da_map_blocks() down_read(i_data_sem) ext4_insert_delayed_block() //insert delalloc extent ext4_es_insert_extent() //overwrite delalloc extent to hole This race could lead to inconsistent delalloc extents tree and incorrect reserved space counter. Fix this by converting to hold i_data_sem in exclusive mode when adding a new delalloc extent in ext4_da_map_blocks(). Cc: stable@vger.kernel.org Signed-off-by: Zhang Yi Suggested-by: Jan Kara Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240127015825.1608160-3-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bbd5ee6dd3f3..b040337501e3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1703,10 +1703,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { - if (ext4_es_is_hole(&es)) { - down_read(&EXT4_I(inode)->i_data_sem); + if (ext4_es_is_hole(&es)) goto add_delayed; - } /* * Delayed extent could be allocated by fallocate. @@ -1748,8 +1746,10 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, retval = ext4_ext_map_blocks(NULL, inode, map, 0); else retval = ext4_ind_map_blocks(NULL, inode, map, 0); - if (retval < 0) - goto out_unlock; + if (retval < 0) { + up_read(&EXT4_I(inode)->i_data_sem); + return retval; + } if (retval > 0) { unsigned int status; @@ -1765,24 +1765,21 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; ext4_es_insert_extent(inode, map->m_lblk, map->m_len, map->m_pblk, status); - goto out_unlock; + up_read(&EXT4_I(inode)->i_data_sem); + return retval; } + up_read(&EXT4_I(inode)->i_data_sem); add_delayed: - /* - * XXX: __block_prepare_write() unmaps passed block, - * is it OK? - */ + down_write(&EXT4_I(inode)->i_data_sem); retval = ext4_insert_delayed_block(inode, map->m_lblk); + up_write(&EXT4_I(inode)->i_data_sem); if (retval) - goto out_unlock; + return retval; map_bh(bh, inode->i_sb, invalid_block); set_buffer_new(bh); set_buffer_delay(bh); - -out_unlock: - up_read((&EXT4_I(inode)->i_data_sem)); return retval; } From 6430dea07e85958fa87d0276c0c4388dd51e630b Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 27 Jan 2024 09:58:02 +0800 Subject: [PATCH 754/768] ext4: correct the hole length returned by ext4_map_blocks() In ext4_map_blocks(), if we can't find a range of mapping in the extents cache, we are calling ext4_ext_map_blocks() to search the real path and ext4_ext_determine_hole() to determine the hole range. But if the querying range was partially or completely overlaped by a delalloc extent, we can't find it in the real extent path, so the returned hole length could be incorrect. Fortunately, ext4_ext_put_gap_in_cache() have already handle delalloc extent, but it searches start from the expanded hole_start, doesn't start from the querying range, so the delalloc extent found could not be the one that overlaped the querying range, plus, it also didn't adjust the hole length. Let's just remove ext4_ext_put_gap_in_cache(), handle delalloc and insert adjusted hole extent in ext4_ext_determine_hole(). Signed-off-by: Zhang Yi Suggested-by: Jan Kara Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240127015825.1608160-4-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 111 +++++++++++++++++++++++++++++----------------- 1 file changed, 70 insertions(+), 41 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 9106715254ac..c5b54fb9eb8b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2229,7 +2229,7 @@ static int ext4_fill_es_cache_info(struct inode *inode, /* - * ext4_ext_determine_hole - determine hole around given block + * ext4_ext_find_hole - find hole around given block according to the given path * @inode: inode we lookup in * @path: path in extent tree to @lblk * @lblk: pointer to logical block around which we want to determine hole @@ -2241,9 +2241,9 @@ static int ext4_fill_es_cache_info(struct inode *inode, * The function returns the length of a hole starting at @lblk. We update @lblk * to the beginning of the hole if we managed to find it. */ -static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode, - struct ext4_ext_path *path, - ext4_lblk_t *lblk) +static ext4_lblk_t ext4_ext_find_hole(struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t *lblk) { int depth = ext_depth(inode); struct ext4_extent *ex; @@ -2270,30 +2270,6 @@ static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode, return len; } -/* - * ext4_ext_put_gap_in_cache: - * calculate boundaries of the gap that the requested block fits into - * and cache this gap - */ -static void -ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start, - ext4_lblk_t hole_len) -{ - struct extent_status es; - - ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start, - hole_start + hole_len - 1, &es); - if (es.es_len) { - /* There's delayed extent containing lblock? */ - if (es.es_lblk <= hole_start) - return; - hole_len = min(es.es_lblk - hole_start, hole_len); - } - ext_debug(inode, " -> %u:%u\n", hole_start, hole_len); - ext4_es_insert_extent(inode, hole_start, hole_len, ~0, - EXTENT_STATUS_HOLE); -} - /* * ext4_ext_rm_idx: * removes index from the index block. @@ -4062,6 +4038,69 @@ static int get_implied_cluster_alloc(struct super_block *sb, return 0; } +/* + * Determine hole length around the given logical block, first try to + * locate and expand the hole from the given @path, and then adjust it + * if it's partially or completely converted to delayed extents, insert + * it into the extent cache tree if it's indeed a hole, finally return + * the length of the determined extent. + */ +static ext4_lblk_t ext4_ext_determine_insert_hole(struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t lblk) +{ + ext4_lblk_t hole_start, len; + struct extent_status es; + + hole_start = lblk; + len = ext4_ext_find_hole(inode, path, &hole_start); +again: + ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start, + hole_start + len - 1, &es); + if (!es.es_len) + goto insert_hole; + + /* + * There's a delalloc extent in the hole, handle it if the delalloc + * extent is in front of, behind and straddle the queried range. + */ + if (lblk >= es.es_lblk + es.es_len) { + /* + * The delalloc extent is in front of the queried range, + * find again from the queried start block. + */ + len -= lblk - hole_start; + hole_start = lblk; + goto again; + } else if (in_range(lblk, es.es_lblk, es.es_len)) { + /* + * The delalloc extent containing lblk, it must have been + * added after ext4_map_blocks() checked the extent status + * tree, adjust the length to the delalloc extent's after + * lblk. + */ + len = es.es_lblk + es.es_len - lblk; + return len; + } else { + /* + * The delalloc extent is partially or completely behind + * the queried range, update hole length until the + * beginning of the delalloc extent. + */ + len = min(es.es_lblk - hole_start, len); + } + +insert_hole: + /* Put just found gap into cache to speed up subsequent requests */ + ext_debug(inode, " -> %u:%u\n", hole_start, len); + ext4_es_insert_extent(inode, hole_start, len, ~0, EXTENT_STATUS_HOLE); + + /* Update hole_len to reflect hole size after lblk */ + if (hole_start != lblk) + len -= lblk - hole_start; + + return len; +} /* * Block allocation/map/preallocation routine for extents based files @@ -4179,22 +4218,12 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, * we couldn't try to create block if create flag is zero */ if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { - ext4_lblk_t hole_start, hole_len; + ext4_lblk_t len; - hole_start = map->m_lblk; - hole_len = ext4_ext_determine_hole(inode, path, &hole_start); - /* - * put just found gap into cache to speed up - * subsequent requests - */ - ext4_ext_put_gap_in_cache(inode, hole_start, hole_len); + len = ext4_ext_determine_insert_hole(inode, path, map->m_lblk); - /* Update hole_len to reflect hole size after map->m_lblk */ - if (hole_start != map->m_lblk) - hole_len -= map->m_lblk - hole_start; map->m_pblk = 0; - map->m_len = min_t(unsigned int, map->m_len, hole_len); - + map->m_len = min_t(unsigned int, map->m_len, len); goto out; } From 683cd8259a9b883a51973511f860976db2550a6e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 26 Jan 2024 17:07:23 +0100 Subject: [PATCH 755/768] Input: atkbd - skip ATKBD_CMD_SETLEDS when skipping ATKBD_CMD_GETID After commit 936e4d49ecbc ("Input: atkbd - skip ATKBD_CMD_GETID in translated mode") the keyboard on Dell XPS 13 9350 / 9360 / 9370 models has stopped working after a suspend/resume. The problem appears to be that atkbd_probe() fails when called from atkbd_reconnect() on resume, which on systems where ATKBD_CMD_GETID is skipped can only happen by ATKBD_CMD_SETLEDS failing. ATKBD_CMD_SETLEDS failing because ATKBD_CMD_GETID was skipped is weird, but apparently that is what is happening. Fix this by also skipping ATKBD_CMD_SETLEDS when skipping ATKBD_CMD_GETID. Fixes: 936e4d49ecbc ("Input: atkbd - skip ATKBD_CMD_GETID in translated mode") Reported-by: Paul Menzel Closes: https://lore.kernel.org/linux-input/0aa4a61f-c939-46fe-a572-08022e8931c7@molgen.mpg.de/ Closes: https://bbs.archlinux.org/viewtopic.php?pid=2146300 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218424 Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2260517 Tested-by: Paul Menzel Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240126160724.13278-2-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/atkbd.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index 13ef6284223d..c229bd6b3f7f 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -811,7 +811,6 @@ static int atkbd_probe(struct atkbd *atkbd) { struct ps2dev *ps2dev = &atkbd->ps2dev; unsigned char param[2]; - bool skip_getid; /* * Some systems, where the bit-twiddling when testing the io-lines of the @@ -825,6 +824,11 @@ static int atkbd_probe(struct atkbd *atkbd) "keyboard reset failed on %s\n", ps2dev->serio->phys); + if (atkbd_skip_getid(atkbd)) { + atkbd->id = 0xab83; + return 0; + } + /* * Then we check the keyboard ID. We should get 0xab83 under normal conditions. * Some keyboards report different values, but the first byte is always 0xab or @@ -833,18 +837,17 @@ static int atkbd_probe(struct atkbd *atkbd) */ param[0] = param[1] = 0xa5; /* initialize with invalid values */ - skip_getid = atkbd_skip_getid(atkbd); - if (skip_getid || ps2_command(ps2dev, param, ATKBD_CMD_GETID)) { + if (ps2_command(ps2dev, param, ATKBD_CMD_GETID)) { /* - * If the get ID command was skipped or failed, we check if we can at least set + * If the get ID command failed, we check if we can at least set * the LEDs on the keyboard. This should work on every keyboard out there. * It also turns the LEDs off, which we want anyway. */ param[0] = 0; if (ps2_command(ps2dev, param, ATKBD_CMD_SETLEDS)) return -1; - atkbd->id = skip_getid ? 0xab83 : 0xabba; + atkbd->id = 0xabba; return 0; } From 9cf6e24c9fbf17e52de9fff07f12be7565ea6d61 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 26 Jan 2024 17:07:24 +0100 Subject: [PATCH 756/768] Input: atkbd - do not skip atkbd_deactivate() when skipping ATKBD_CMD_GETID After commit 936e4d49ecbc ("Input: atkbd - skip ATKBD_CMD_GETID in translated mode") not only the getid command is skipped, but also the de-activating of the keyboard at the end of atkbd_probe(), potentially re-introducing the problem fixed by commit be2d7e4233a4 ("Input: atkbd - fix multi-byte scancode handling on reconnect"). Make sure multi-byte scancode handling on reconnect is still handled correctly by not skipping the atkbd_deactivate() call. Fixes: 936e4d49ecbc ("Input: atkbd - skip ATKBD_CMD_GETID in translated mode") Tested-by: Paul Menzel Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240126160724.13278-3-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/atkbd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index c229bd6b3f7f..7f67f9f2946b 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -826,7 +826,7 @@ static int atkbd_probe(struct atkbd *atkbd) if (atkbd_skip_getid(atkbd)) { atkbd->id = 0xab83; - return 0; + goto deactivate_kbd; } /* @@ -863,6 +863,7 @@ static int atkbd_probe(struct atkbd *atkbd) return -1; } +deactivate_kbd: /* * Make sure nothing is coming from the keyboard and disturbs our * internal state. From 9f1118223aa080021fe9751fa221590654d27669 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 27 Jan 2024 09:58:03 +0800 Subject: [PATCH 757/768] ext4: add a hole extent entry in cache after punch In order to cache hole extents in the extent status tree and keep the hole length as long as possible, re-add a hole entry to the cache just after punching a hole. Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240127015825.1608160-5-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b040337501e3..af205b416794 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4007,12 +4007,12 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) /* If there are blocks to remove, do it */ if (stop_block > first_block) { + ext4_lblk_t hole_len = stop_block - first_block; down_write(&EXT4_I(inode)->i_data_sem); ext4_discard_preallocations(inode); - ext4_es_remove_extent(inode, first_block, - stop_block - first_block); + ext4_es_remove_extent(inode, first_block, hole_len); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) ret = ext4_ext_remove_space(inode, first_block, @@ -4021,6 +4021,8 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) ret = ext4_ind_remove_space(handle, inode, first_block, stop_block); + ext4_es_insert_extent(inode, first_block, hole_len, ~0, + EXTENT_STATUS_HOLE); up_write(&EXT4_I(inode)->i_data_sem); } ext4_fc_track_range(handle, inode, first_block, stop_block); From 874eaba96d39334fe9c729ff631e65523616d4d8 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 27 Jan 2024 09:58:04 +0800 Subject: [PATCH 758/768] ext4: make ext4_map_blocks() distinguish delalloc only extent Add a new map flag EXT4_MAP_DELAYED to indicate the mapping range is a delayed allocated only (not unwritten) one, and making ext4_map_blocks() can distinguish it, no longer mixing it with holes. Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240127015825.1608160-6-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 4 +++- fs/ext4/extents.c | 7 +++++-- fs/ext4/inode.c | 2 ++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 786b6857ab47..023571f8dd1b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -252,8 +252,10 @@ struct ext4_allocation_request { #define EXT4_MAP_MAPPED BIT(BH_Mapped) #define EXT4_MAP_UNWRITTEN BIT(BH_Unwritten) #define EXT4_MAP_BOUNDARY BIT(BH_Boundary) +#define EXT4_MAP_DELAYED BIT(BH_Delay) #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ - EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY) + EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ + EXT4_MAP_DELAYED) struct ext4_map_blocks { ext4_fsblk_t m_pblk; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c5b54fb9eb8b..7669d154c05e 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4076,8 +4076,11 @@ again: /* * The delalloc extent containing lblk, it must have been * added after ext4_map_blocks() checked the extent status - * tree, adjust the length to the delalloc extent's after - * lblk. + * tree so we are not holding i_rwsem and delalloc info is + * only stabilized by i_data_sem we are going to release + * soon. Don't modify the extent status tree and report + * extent as a hole, just adjust the length to the delalloc + * extent's after lblk. */ len = es.es_lblk + es.es_len - lblk; return len; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index af205b416794..60f0d2dc1c37 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -515,6 +515,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, map->m_len = retval; } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) { map->m_pblk = 0; + map->m_flags |= ext4_es_is_delayed(&es) ? + EXT4_MAP_DELAYED : 0; retval = es.es_len - (map->m_lblk - es.es_lblk); if (retval > map->m_len) retval = map->m_len; From ec9d669eba4c276d00af88951947fe0e82a6b84c Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 27 Jan 2024 09:58:05 +0800 Subject: [PATCH 759/768] ext4: make ext4_set_iomap() recognize IOMAP_DELALLOC map type Since ext4_map_blocks() can recognize a delayed allocated only extent, make ext4_set_iomap() can also recognize it, and remove the useless separate check in ext4_iomap_begin_report(). Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240127015825.1608160-7-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 32 +++----------------------------- 1 file changed, 3 insertions(+), 29 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 60f0d2dc1c37..2ccf3b5e3a7c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3262,6 +3262,9 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap, iomap->addr = (u64) map->m_pblk << blkbits; if (flags & IOMAP_DAX) iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off; + } else if (map->m_flags & EXT4_MAP_DELAYED) { + iomap->type = IOMAP_DELALLOC; + iomap->addr = IOMAP_NULL_ADDR; } else { iomap->type = IOMAP_HOLE; iomap->addr = IOMAP_NULL_ADDR; @@ -3424,35 +3427,11 @@ const struct iomap_ops ext4_iomap_overwrite_ops = { .iomap_end = ext4_iomap_end, }; -static bool ext4_iomap_is_delalloc(struct inode *inode, - struct ext4_map_blocks *map) -{ - struct extent_status es; - ext4_lblk_t offset = 0, end = map->m_lblk + map->m_len - 1; - - ext4_es_find_extent_range(inode, &ext4_es_is_delayed, - map->m_lblk, end, &es); - - if (!es.es_len || es.es_lblk > end) - return false; - - if (es.es_lblk > map->m_lblk) { - map->m_len = es.es_lblk - map->m_lblk; - return false; - } - - offset = map->m_lblk - es.es_lblk; - map->m_len = es.es_len - offset; - - return true; -} - static int ext4_iomap_begin_report(struct inode *inode, loff_t offset, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { int ret; - bool delalloc = false; struct ext4_map_blocks map; u8 blkbits = inode->i_blkbits; @@ -3493,13 +3472,8 @@ static int ext4_iomap_begin_report(struct inode *inode, loff_t offset, ret = ext4_map_blocks(NULL, inode, &map, 0); if (ret < 0) return ret; - if (ret == 0) - delalloc = ext4_iomap_is_delalloc(inode, &map); - set_iomap: ext4_set_iomap(inode, iomap, &map, offset, length, flags); - if (delalloc && iomap->type == IOMAP_HOLE) - iomap->type = IOMAP_DELALLOC; return 0; } From b5e69be185495696652405088a27ab0b21812147 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 30 Jan 2024 13:24:40 +1000 Subject: [PATCH 760/768] nouveau/gsp: use correct size for registry rpc. Timur pointed this out before, and it just slipped my mind, but this might help some things work better, around pcie power management. Fixes: 8d55b0a940bb ("nouveau/gsp: add some basic registry entries.") Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/576336/ --- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index 9ee58e2a0eb2..5e1fa176aac4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -1078,7 +1078,6 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp) if (IS_ERR(rpc)) return PTR_ERR(rpc); - rpc->size = sizeof(*rpc); rpc->numEntries = NV_GSP_REG_NUM_ENTRIES; str_offset = offsetof(typeof(*rpc), entries[NV_GSP_REG_NUM_ENTRIES]); @@ -1094,6 +1093,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp) strings += name_len; str_offset += name_len; } + rpc->size = str_offset; return nvkm_gsp_rpc_wr(gsp, rpc, false); } From 39126abc5e20611579602f03b66627d7cd1422f0 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 29 Jan 2024 11:26:45 +1000 Subject: [PATCH 761/768] nouveau: offload fence uevents work to workqueue This should break the deadlock between the fctx lock and the irq lock. This offloads the processing off the work from the irq into a workqueue. Cc: linux-stable@vger.kernel.org Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/576237/ --- drivers/gpu/drm/nouveau/nouveau_fence.c | 26 ++++++++++++++++++------- drivers/gpu/drm/nouveau/nouveau_fence.h | 1 + 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index ca762ea55413..93f08f9479d8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -103,6 +103,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) void nouveau_fence_context_del(struct nouveau_fence_chan *fctx) { + cancel_work_sync(&fctx->uevent_work); nouveau_fence_context_kill(fctx, 0); nvif_event_dtor(&fctx->event); fctx->dead = 1; @@ -145,12 +146,13 @@ nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fc return drop; } -static int -nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc) +static void +nouveau_fence_uevent_work(struct work_struct *work) { - struct nouveau_fence_chan *fctx = container_of(event, typeof(*fctx), event); + struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan, + uevent_work); unsigned long flags; - int ret = NVIF_EVENT_KEEP; + int drop = 0; spin_lock_irqsave(&fctx->lock, flags); if (!list_empty(&fctx->pending)) { @@ -160,11 +162,20 @@ nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc fence = list_entry(fctx->pending.next, typeof(*fence), head); chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock)); if (nouveau_fence_update(chan, fctx)) - ret = NVIF_EVENT_DROP; + drop = 1; } - spin_unlock_irqrestore(&fctx->lock, flags); + if (drop) + nvif_event_block(&fctx->event); - return ret; + spin_unlock_irqrestore(&fctx->lock, flags); +} + +static int +nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc) +{ + struct nouveau_fence_chan *fctx = container_of(event, typeof(*fctx), event); + schedule_work(&fctx->uevent_work); + return NVIF_EVENT_KEEP; } void @@ -178,6 +189,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha } args; int ret; + INIT_WORK(&fctx->uevent_work, nouveau_fence_uevent_work); INIT_LIST_HEAD(&fctx->flip); INIT_LIST_HEAD(&fctx->pending); spin_lock_init(&fctx->lock); diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 64d33ae7f356..8bc065acfe35 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -44,6 +44,7 @@ struct nouveau_fence_chan { u32 context; char name[32]; + struct work_struct uevent_work; struct nvif_event event; int notify_ref, dead, killed; }; From d2d00e15808c37ec476a5c040ee2cdd23854ef18 Mon Sep 17 00:00:00 2001 From: Shivaprasad G Bhat Date: Fri, 26 Jan 2024 09:09:18 -0600 Subject: [PATCH 762/768] powerpc: iommu: Bring back table group release_ownership() call The commit 2ad56efa80db ("powerpc/iommu: Setup a default domain and remove set_platform_dma_ops") refactored the code removing the set_platform_dma_ops(). It missed out the table group release_ownership() call which would have got called otherwise during the guest shutdown via vfio_group_detach_container(). On PPC64, this particular call actually sets up the 32-bit TCE table, and enables the 64-bit DMA bypass etc. Now after guest shutdown, the subsequent host driver (e.g megaraid-sas) probe post unbind from vfio-pci fails like, megaraid_sas 0031:01:00.0: Warning: IOMMU dma not supported: mask 0x7fffffffffffffff, table unavailable megaraid_sas 0031:01:00.0: Warning: IOMMU dma not supported: mask 0xffffffff, table unavailable megaraid_sas 0031:01:00.0: Failed to set DMA mask megaraid_sas 0031:01:00.0: Failed from megasas_init_fw 6539 The patch brings back the call to table_group release_ownership() call when switching back to PLATFORM domain from BLOCKED, while also separates the domain_ops for both. Fixes: 2ad56efa80db ("powerpc/iommu: Setup a default domain and remove set_platform_dma_ops") Signed-off-by: Shivaprasad G Bhat Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/170628173462.3742.18330000394415935845.stgit@ltcd48-lp2.aus.stglab.ibm.com Signed-off-by: Joerg Roedel --- arch/powerpc/kernel/iommu.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index ebe259bdd462..d71eac3b2887 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1287,20 +1287,20 @@ spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain, struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct iommu_group *grp = iommu_group_get(dev); struct iommu_table_group *table_group; - int ret = -EINVAL; /* At first attach the ownership is already set */ if (!domain) return 0; - if (!grp) - return -ENODEV; - table_group = iommu_group_get_iommudata(grp); - ret = table_group->ops->take_ownership(table_group); + /* + * The domain being set to PLATFORM from earlier + * BLOCKED. The table_group ownership has to be released. + */ + table_group->ops->release_ownership(table_group); iommu_group_put(grp); - return ret; + return 0; } static const struct iommu_domain_ops spapr_tce_platform_domain_ops = { @@ -1312,13 +1312,32 @@ static struct iommu_domain spapr_tce_platform_domain = { .ops = &spapr_tce_platform_domain_ops, }; -static struct iommu_domain spapr_tce_blocked_domain = { - .type = IOMMU_DOMAIN_BLOCKED, +static int +spapr_tce_blocked_iommu_attach_dev(struct iommu_domain *platform_domain, + struct device *dev) +{ + struct iommu_group *grp = iommu_group_get(dev); + struct iommu_table_group *table_group; + int ret = -EINVAL; + /* * FIXME: SPAPR mixes blocked and platform behaviors, the blocked domain * also sets the dma_api ops */ - .ops = &spapr_tce_platform_domain_ops, + table_group = iommu_group_get_iommudata(grp); + ret = table_group->ops->take_ownership(table_group); + iommu_group_put(grp); + + return ret; +} + +static const struct iommu_domain_ops spapr_tce_blocked_domain_ops = { + .attach_dev = spapr_tce_blocked_iommu_attach_dev, +}; + +static struct iommu_domain spapr_tce_blocked_domain = { + .type = IOMMU_DOMAIN_BLOCKED, + .ops = &spapr_tce_blocked_domain_ops, }; static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap) From bd6081be2251c3700eca8a7bbe071e1bb8cd2af4 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Tue, 30 Jan 2024 22:02:16 +0530 Subject: [PATCH 763/768] dmaengine: at_hdmac: add missing kernel-doc style description We get following warning with W=1: drivers/dma/at_hdmac.c:243: warning: Function parameter or struct member 'boundary' not described in 'at_desc' drivers/dma/at_hdmac.c:243: warning: Function parameter or struct member 'dst_hole' not described in 'at_desc' drivers/dma/at_hdmac.c:243: warning: Function parameter or struct member 'src_hole' not described in 'at_desc' drivers/dma/at_hdmac.c:243: warning: Function parameter or struct member 'memset_buffer' not described in 'at_desc' drivers/dma/at_hdmac.c:243: warning: Function parameter or struct member 'memset_paddr' not described in 'at_desc' drivers/dma/at_hdmac.c:243: warning: Function parameter or struct member 'memset_vaddr' not described in 'at_desc' drivers/dma/at_hdmac.c:255: warning: Enum value 'ATC_IS_PAUSED' not described in enum 'atc_status' drivers/dma/at_hdmac.c:255: warning: Enum value 'ATC_IS_CYCLIC' not described in enum 'atc_status' drivers/dma/at_hdmac.c:287: warning: Function parameter or struct member 'cyclic' not described in 'at_dma_chan' drivers/dma/at_hdmac.c:350: warning: Function parameter or struct member 'memset_pool' not described in 'at_dma' Fix this by adding the required description and also drop unused struct member 'cyclic' in 'at_dma_chan' Reviewed-by: Tudor Ambarus Signed-off-by: Vinod Koul Link: https://lore.kernel.org/r/20240130163216.633034-1-vkoul@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 6bad536e0492..40052d1bd0b5 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -224,6 +224,12 @@ struct atdma_sg { * @total_len: total transaction byte count * @sglen: number of sg entries. * @sg: array of sgs. + * @boundary: number of transfers to perform before the automatic address increment operation + * @dst_hole: value to add to the destination address when the boundary has been reached + * @src_hole: value to add to the source address when the boundary has been reached + * @memset_buffer: buffer used for the memset operation + * @memset_paddr: physical address of the buffer used for the memset operation + * @memset_vaddr: virtual address of the buffer used for the memset operation */ struct at_desc { struct virt_dma_desc vd; @@ -247,6 +253,9 @@ struct at_desc { /** * enum atc_status - information bits stored in channel status flag * + * @ATC_IS_PAUSED: If channel is pauses + * @ATC_IS_CYCLIC: If channel is cyclic + * * Manipulated with atomic operations. */ enum atc_status { @@ -282,7 +291,6 @@ struct at_dma_chan { u32 save_cfg; u32 save_dscr; struct dma_slave_config dma_sconfig; - bool cyclic; struct at_desc *desc; }; @@ -333,6 +341,7 @@ static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width) * @save_imr: interrupt mask register that is saved on suspend/resume cycle * @all_chan_mask: all channels availlable in a mask * @lli_pool: hw lli table + * @memset_pool: hw memset pool * @chan: channels table to store at_dma_chan structures */ struct at_dma { From e9f1e6bb55bea4f8cb48f8f7443bbac99b60d285 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 2 Feb 2024 17:11:25 +0100 Subject: [PATCH 764/768] Revert "gfs2: Use GL_NOBLOCK flag for non-blocking lookups" Commit "gfs2: Use GL_NOBLOCK flag for non-blocking lookups" has several issues, some of which are non-trivial to fix, so revert it for now: https://lore.kernel.org/gfs2/20240202050230.GA875515@ZenIV/T/ This reverts commit dd00aaeb343255a8a30de671bd27bde79a47c8e5. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/dentry.c | 23 +++++++++-------------- fs/gfs2/inode.c | 8 ++++---- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 177f1f41f225..2e215e8c3c88 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -32,25 +32,21 @@ static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags) { - struct dentry *parent = NULL; + struct dentry *parent; struct gfs2_sbd *sdp; struct gfs2_inode *dip; - struct inode *dinode, *inode; + struct inode *inode; struct gfs2_holder d_gh; struct gfs2_inode *ip = NULL; int error, valid = 0; int had_lock = 0; - if (flags & LOOKUP_RCU) { - dinode = d_inode_rcu(READ_ONCE(dentry->d_parent)); - if (!dinode) - return -ECHILD; - } else { - parent = dget_parent(dentry); - dinode = d_inode(parent); - } - sdp = GFS2_SB(dinode); - dip = GFS2_I(dinode); + if (flags & LOOKUP_RCU) + return -ECHILD; + + parent = dget_parent(dentry); + sdp = GFS2_SB(d_inode(parent)); + dip = GFS2_I(d_inode(parent)); inode = d_inode(dentry); if (inode) { @@ -66,8 +62,7 @@ static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags) had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL); if (!had_lock) { - error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, - flags & LOOKUP_RCU ? GL_NOBLOCK : 0, &d_gh); + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); if (error) goto out; } diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 6bfc9383b7b8..1b95db2c3aac 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1882,10 +1882,10 @@ int gfs2_permission(struct mnt_idmap *idmap, struct inode *inode, WARN_ON_ONCE(!may_not_block); return -ECHILD; } - if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { - int noblock = may_not_block ? GL_NOBLOCK : 0; - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, - LM_FLAG_ANY | noblock, &i_gh); + if (gfs2_glock_is_locked_by_me(gl) == NULL) { + if (may_not_block) + return -ECHILD; + error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; } From 0a9bab391e336489169b95cb0d4553d921302189 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 31 Jan 2024 21:57:27 +0100 Subject: [PATCH 765/768] dm-crypt, dm-verity: disable tasklets Tasklets have an inherent problem with memory corruption. The function tasklet_action_common calls tasklet_trylock, then it calls the tasklet callback and then it calls tasklet_unlock. If the tasklet callback frees the structure that contains the tasklet or if it calls some code that may free it, tasklet_unlock will write into free memory. The commits 8e14f610159d and d9a02e016aaf try to fix it for dm-crypt, but it is not a sufficient fix and the data corruption can still happen [1]. There is no fix for dm-verity and dm-verity will write into free memory with every tasklet-processed bio. There will be atomic workqueues implemented in the kernel 6.9 [2]. They will have better interface and they will not suffer from the memory corruption problem. But we need something that stops the memory corruption now and that can be backported to the stable kernels. So, I'm proposing this commit that disables tasklets in both dm-crypt and dm-verity. This commit doesn't remove the tasklet support, because the tasklet code will be reused when atomic workqueues will be implemented. [1] https://lore.kernel.org/all/d390d7ee-f142-44d3-822a-87949e14608b@suse.de/T/ [2] https://lore.kernel.org/lkml/20240130091300.2968534-1-tj@kernel.org/ Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Fixes: 39d42fa96ba1b ("dm crypt: add flags to optionally bypass kcryptd workqueues") Fixes: 5721d4e5a9cdb ("dm verity: Add optional "try_verify_in_tasklet" feature") Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 38 ++--------------------------------- drivers/md/dm-verity-target.c | 26 ++---------------------- drivers/md/dm-verity.h | 1 - 3 files changed, 4 insertions(+), 61 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 855b482cbff1..f745f8508243 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -73,10 +73,8 @@ struct dm_crypt_io { struct bio *base_bio; u8 *integrity_metadata; bool integrity_metadata_from_pool:1; - bool in_tasklet:1; struct work_struct work; - struct tasklet_struct tasklet; struct convert_context ctx; @@ -1762,7 +1760,6 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, io->ctx.r.req = NULL; io->integrity_metadata = NULL; io->integrity_metadata_from_pool = false; - io->in_tasklet = false; atomic_set(&io->io_pending, 0); } @@ -1771,13 +1768,6 @@ static void crypt_inc_pending(struct dm_crypt_io *io) atomic_inc(&io->io_pending); } -static void kcryptd_io_bio_endio(struct work_struct *work) -{ - struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); - - bio_endio(io->base_bio); -} - /* * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. @@ -1801,20 +1791,6 @@ static void crypt_dec_pending(struct dm_crypt_io *io) base_bio->bi_status = error; - /* - * If we are running this function from our tasklet, - * we can't call bio_endio() here, because it will call - * clone_endio() from dm.c, which in turn will - * free the current struct dm_crypt_io structure with - * our tasklet. In this case we need to delay bio_endio() - * execution to after the tasklet is done and dequeued. - */ - if (io->in_tasklet) { - INIT_WORK(&io->work, kcryptd_io_bio_endio); - queue_work(cc->io_queue, &io->work); - return; - } - bio_endio(base_bio); } @@ -2246,11 +2222,6 @@ static void kcryptd_crypt(struct work_struct *work) kcryptd_crypt_write_convert(io); } -static void kcryptd_crypt_tasklet(unsigned long work) -{ - kcryptd_crypt((struct work_struct *)work); -} - static void kcryptd_queue_crypt(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; @@ -2262,15 +2233,10 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io) * irqs_disabled(): the kernel may run some IO completion from the idle thread, but * it is being executed with irqs disabled. */ - if (in_hardirq() || irqs_disabled()) { - io->in_tasklet = true; - tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work); - tasklet_schedule(&io->tasklet); + if (!(in_hardirq() || irqs_disabled())) { + kcryptd_crypt(&io->work); return; } - - kcryptd_crypt(&io->work); - return; } INIT_WORK(&io->work, kcryptd_crypt); diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 14e58ae70521..82662f5769c4 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -645,23 +645,6 @@ static void verity_work(struct work_struct *w) verity_finish_io(io, errno_to_blk_status(verity_verify_io(io))); } -static void verity_tasklet(unsigned long data) -{ - struct dm_verity_io *io = (struct dm_verity_io *)data; - int err; - - io->in_tasklet = true; - err = verity_verify_io(io); - if (err == -EAGAIN || err == -ENOMEM) { - /* fallback to retrying with work-queue */ - INIT_WORK(&io->work, verity_work); - queue_work(io->v->verify_wq, &io->work); - return; - } - - verity_finish_io(io, errno_to_blk_status(err)); -} - static void verity_end_io(struct bio *bio) { struct dm_verity_io *io = bio->bi_private; @@ -674,13 +657,8 @@ static void verity_end_io(struct bio *bio) return; } - if (static_branch_unlikely(&use_tasklet_enabled) && io->v->use_tasklet) { - tasklet_init(&io->tasklet, verity_tasklet, (unsigned long)io); - tasklet_schedule(&io->tasklet); - } else { - INIT_WORK(&io->work, verity_work); - queue_work(io->v->verify_wq, &io->work); - } + INIT_WORK(&io->work, verity_work); + queue_work(io->v->verify_wq, &io->work); } /* diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index f9d522c870e6..f3f607008419 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -83,7 +83,6 @@ struct dm_verity_io { struct bvec_iter iter; struct work_struct work; - struct tasklet_struct tasklet; /* * Three variably-size fields follow this struct: From a60e6c3918d20848906ffcdfcf72ca6a8cfbcf2e Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Tue, 5 Dec 2023 17:36:01 +0100 Subject: [PATCH 766/768] Input: i8042 - fix strange behavior of touchpad on Clevo NS70PU When closing the laptop lid with an external screen connected, the mouse pointer has a constant movement to the lower right corner. Opening the lid again stops this movement, but after that the touchpad does no longer register clicks. The touchpad is connected both via i2c-hid and PS/2, the predecessor of this device (NS70MU) has the same layout in this regard and also strange behaviour caused by the psmouse and the i2c-hid driver fighting over touchpad control. This fix is reusing the same workaround by just disabling the PS/2 aux port, that is only used by the touchpad, to give the i2c-hid driver the lone control over the touchpad. v2: Rebased on current master Signed-off-by: Werner Sembach Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231205163602.16106-1-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index b585b1dab870..cd45a65e17f2 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1208,6 +1208,12 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "NS5x_7xPU"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOAUX) + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NJ50_70CU"), From 4255447ad34c5c3785fcdcf76cfa0271d6e5ed39 Mon Sep 17 00:00:00 2001 From: Szilard Fabian Date: Fri, 2 Feb 2024 10:28:59 -0800 Subject: [PATCH 767/768] Input: i8042 - add Fujitsu Lifebook U728 to i8042 quirk table Another Fujitsu-related patch. In the initial boot stage the integrated keyboard of Fujitsu Lifebook U728 refuses to work and it's not possible to type for example a dm-crypt passphrase without the help of an external keyboard. i8042.nomux kernel parameter resolves this issue but using that a PS/2 mouse is detected. This input device is unused even when the i2c-hid-acpi kernel module is blacklisted making the integrated ELAN touchpad (04F3:3092) not working at all. So this notebook uses a hid-over-i2c touchpad which is managed by the i2c_designware input driver. Since you can't find a PS/2 mouse port on this computer and you can't connect a PS/2 mouse to it even with an official port replicator I think it's safe to not use the PS/2 mouse port at all. Signed-off-by: Szilard Fabian Link: https://lore.kernel.org/r/20240103014717.127307-2-szfabian@bluemarch.art Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index cd45a65e17f2..dfc6c581873b 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -634,6 +634,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { }, .driver_data = (void *)(SERIO_QUIRK_NOAUX) }, + { + /* Fujitsu Lifebook U728 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U728"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOAUX) + }, { /* Gigabyte M912 */ .matches = { From 54be6c6c5ae8e0d93a6c4641cb7528eb0b6ba478 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 Feb 2024 12:20:36 +0000 Subject: [PATCH 768/768] Linux 6.8-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 113f7c762f0a..a171eafce2a3 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION*