linux-stable/drivers/gpu/drm/nouveau/nouveau_backlight.c

324 lines
8.5 KiB
C
Raw Normal View History

/*
* Copyright (C) 2009 Red Hat <mjg@redhat.com>
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/*
* Authors:
* Matthew Garrett <mjg@redhat.com>
*
* Register locations derived from NVClock by Roderick Colenbrander
*/
#include <linux/apple-gmux.h>
#include <linux/backlight.h>
#include <linux/idr.h>
#include "nouveau_drv.h"
#include "nouveau_reg.h"
#include "nouveau_encoder.h"
drm/nouveau: Move backlight device into nouveau_connector Currently module unloading is broken in nouveau due to a rather annoying race condition resulting from nouveau_backlight.c having gone a bit stale over time: [ 1960.791143] ================================================================== [ 1960.791394] BUG: KASAN: use-after-free in nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791460] Read of size 4 at addr ffff88075accf350 by task zsh/11185 [ 1960.791521] [ 1960.791545] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G O 4.18.0Lyude-Test+ #4 [ 1960.791580] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.791628] Call Trace: [ 1960.791680] dump_stack+0xa4/0xfd [ 1960.791721] print_address_description+0x71/0x239 [ 1960.791833] ? nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791877] kasan_report.cold.6+0x242/0x2fe [ 1960.791919] __asan_report_load4_noabort+0x19/0x20 [ 1960.792012] nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.792081] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.792150] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.792265] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.792347] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.792378] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.792406] ? trace_hardirqs_on+0xd/0x10 [ 1960.792472] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.792502] pci_device_remove+0x112/0x2d0 [ 1960.792530] ? pcibios_free_irq+0x10/0x10 [ 1960.792558] ? kasan_check_write+0x14/0x20 [ 1960.792587] device_release_driver_internal+0x35c/0x650 [ 1960.792617] device_release_driver+0x12/0x20 [ 1960.792643] pci_stop_bus_device+0x172/0x1e0 [ 1960.792671] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.792715] remove_store+0xcb/0xe0 [ 1960.792753] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.792779] ? __lock_is_held+0xb5/0x140 [ 1960.792808] ? component_add+0x530/0x530 [ 1960.792834] dev_attr_store+0x3f/0x70 [ 1960.792859] ? sysfs_file_ops+0x11d/0x170 [ 1960.792885] sysfs_kf_write+0x104/0x150 [ 1960.792915] ? sysfs_file_ops+0x170/0x170 [ 1960.792940] kernfs_fop_write+0x24f/0x400 [ 1960.792978] ? __lock_acquire+0x6ea/0x47f0 [ 1960.793021] __vfs_write+0xeb/0x760 [ 1960.793048] ? kernel_read+0x130/0x130 [ 1960.793076] ? __lock_is_held+0xb5/0x140 [ 1960.793107] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.793135] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.793162] ? __sb_start_write+0x183/0x220 [ 1960.793189] vfs_write+0x14d/0x4a0 [ 1960.793229] ksys_write+0xd2/0x1b0 [ 1960.793255] ? __ia32_sys_read+0xb0/0xb0 [ 1960.793298] ? fput+0x1d/0x120 [ 1960.793324] ? filp_close+0xf3/0x130 [ 1960.793349] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.793380] __x64_sys_write+0x73/0xb0 [ 1960.793407] do_syscall_64+0xaa/0x400 [ 1960.793433] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.793460] RIP: 0033:0x7f59df433164 [ 1960.793486] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.793541] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.793576] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.793620] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.793665] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.793696] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.793730] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.793768] [ 1960.793790] Allocated by task 11167: [ 1960.793816] save_stack+0x43/0xd0 [ 1960.793841] kasan_kmalloc+0xc4/0xe0 [ 1960.793880] kasan_slab_alloc+0x11/0x20 [ 1960.793905] kmem_cache_alloc+0xd7/0x270 [ 1960.793944] getname_flags+0xbd/0x520 [ 1960.793969] user_path_at_empty+0x23/0x50 [ 1960.793994] do_faccessat+0x1fc/0x5d0 [ 1960.794018] __x64_sys_access+0x59/0x80 [ 1960.794043] do_syscall_64+0xaa/0x400 [ 1960.794067] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794093] [ 1960.794127] Freed by task 11167: [ 1960.794152] save_stack+0x43/0xd0 [ 1960.794190] __kasan_slab_free+0x139/0x190 [ 1960.794215] kasan_slab_free+0xe/0x10 [ 1960.794239] kmem_cache_free+0xcb/0x2c0 [ 1960.794264] putname+0xad/0xe0 [ 1960.794287] filename_lookup.part.59+0x1f1/0x360 [ 1960.794313] user_path_at_empty+0x3e/0x50 [ 1960.794338] do_faccessat+0x1fc/0x5d0 [ 1960.794362] __x64_sys_access+0x59/0x80 [ 1960.794393] do_syscall_64+0xaa/0x400 [ 1960.794421] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794461] [ 1960.794483] The buggy address belongs to the object at ffff88075acceac0 [ 1960.794483] which belongs to the cache names_cache of size 4096 [ 1960.794540] The buggy address is located 2192 bytes inside of [ 1960.794540] 4096-byte region [ffff88075acceac0, ffff88075accfac0) [ 1960.794581] The buggy address belongs to the page: [ 1960.794609] page:ffffea001d6b3200 count:1 mapcount:0 mapping:ffff880778e4b1c0 index:0x0 compound_mapcount: 0 [ 1960.794651] flags: 0x8000000000008100(slab|head) [ 1960.794679] raw: 8000000000008100 ffffea001d39e808 ffffea001d39ea08 ffff880778e4b1c0 [ 1960.794739] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 1960.794785] page dumped because: kasan: bad access detected [ 1960.794813] [ 1960.794834] Memory state around the buggy address: [ 1960.794861] ffff88075accf200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794894] ffff88075accf280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794925] >ffff88075accf300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794956] ^ [ 1960.794985] ffff88075accf380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795017] ffff88075accf400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795061] ================================================================== [ 1960.795106] Disabling lock debugging due to kernel taint [ 1960.795131] ------------[ cut here ]------------ [ 1960.795148] ida_remove called for id=1802201963 which is not allocated. [ 1960.795193] WARNING: CPU: 7 PID: 11185 at lib/idr.c:521 ida_remove+0x184/0x210 [ 1960.795213] Modules linked in: nouveau(O) mxm_wmi ttm i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm joydev vfat fat intel_rapl x86_pkg_temp_thermal coretemp crc32_pclmul iTCO_wdt psmouse wmi_bmof mei_me tpm_tis mei tpm_tis_core tpm i2c_i801 thinkpad_acpi pcc_cpufreq crc32c_intel serio_raw xhci_pci xhci_hcd wmi video i2c_dev i2c_core [ 1960.795305] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G B O 4.18.0Lyude-Test+ #4 [ 1960.795330] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.795352] RIP: 0010:ida_remove+0x184/0x210 [ 1960.795370] Code: 4c 89 f7 e8 ae c8 00 00 eb 22 41 83 c4 02 4c 89 e8 41 83 fc 3f 0f 86 64 ff ff ff 44 89 fe 48 c7 c7 20 94 1e 83 e8 54 ed 81 fe <0f> 0b 48 b8 00 00 00 00 00 fc ff df 48 01 c3 c7 03 00 00 00 00 c7 [ 1960.795402] RSP: 0018:ffff88074d4df7b8 EFLAGS: 00010082 [ 1960.795421] RAX: 0000000000000000 RBX: 1ffff100e9a9befa RCX: ffffffff81479975 [ 1960.795440] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88077c1de690 [ 1960.795460] RBP: ffff88074d4df878 R08: ffffed00ef83bcd3 R09: ffffed00ef83bcd2 [ 1960.795479] R10: ffffed00ef83bcd2 R11: ffff88077c1de697 R12: 000000000000036b [ 1960.795498] R13: 0000000000000202 R14: ffffffffa0aa7fa0 R15: 000000006b6b6b6b [ 1960.795518] FS: 00007f59e0995b80(0000) GS:ffff88077c1c0000(0000) knlGS:0000000000000000 [ 1960.795553] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1960.795571] CR2: 00007f59e09a2010 CR3: 00000004a1a70005 CR4: 00000000003606e0 [ 1960.795596] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1960.795629] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1960.795649] Call Trace: [ 1960.795667] ? ida_destroy+0x1d0/0x1d0 [ 1960.795686] ? kasan_check_write+0x14/0x20 [ 1960.795704] ? do_raw_spin_lock+0xc2/0x1c0 [ 1960.795724] ida_simple_remove+0x26/0x40 [ 1960.795794] nouveau_backlight_exit+0x9d/0x150 [nouveau] [ 1960.795867] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.795930] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.795989] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.796047] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.796067] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.796089] ? trace_hardirqs_on+0xd/0x10 [ 1960.796146] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.796167] pci_device_remove+0x112/0x2d0 [ 1960.796186] ? pcibios_free_irq+0x10/0x10 [ 1960.796218] ? kasan_check_write+0x14/0x20 [ 1960.796237] device_release_driver_internal+0x35c/0x650 [ 1960.796257] device_release_driver+0x12/0x20 [ 1960.796289] pci_stop_bus_device+0x172/0x1e0 [ 1960.796308] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.796328] remove_store+0xcb/0xe0 [ 1960.796345] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.796364] ? __lock_is_held+0xb5/0x140 [ 1960.796383] ? component_add+0x530/0x530 [ 1960.796401] dev_attr_store+0x3f/0x70 [ 1960.796419] ? sysfs_file_ops+0x11d/0x170 [ 1960.796436] sysfs_kf_write+0x104/0x150 [ 1960.796454] ? sysfs_file_ops+0x170/0x170 [ 1960.796471] kernfs_fop_write+0x24f/0x400 [ 1960.796488] ? __lock_acquire+0x6ea/0x47f0 [ 1960.796520] __vfs_write+0xeb/0x760 [ 1960.796538] ? kernel_read+0x130/0x130 [ 1960.796556] ? __lock_is_held+0xb5/0x140 [ 1960.796590] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.796608] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.796626] ? __sb_start_write+0x183/0x220 [ 1960.796648] vfs_write+0x14d/0x4a0 [ 1960.796666] ksys_write+0xd2/0x1b0 [ 1960.796684] ? __ia32_sys_read+0xb0/0xb0 [ 1960.796701] ? fput+0x1d/0x120 [ 1960.796732] ? filp_close+0xf3/0x130 [ 1960.796749] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.796768] __x64_sys_write+0x73/0xb0 [ 1960.796800] do_syscall_64+0xaa/0x400 [ 1960.796818] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.796836] RIP: 0033:0x7f59df433164 [ 1960.796854] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.796884] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.796906] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.796926] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.796946] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.796966] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.796985] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.797008] irq event stamp: 509990 [ 1960.797026] hardirqs last enabled at (509989): [<ffffffff8119ff78>] flush_work+0x4b8/0x6d0 [ 1960.797063] hardirqs last disabled at (509990): [<ffffffff8297c395>] _raw_spin_lock_irqsave+0x25/0x60 [ 1960.797085] softirqs last enabled at (509744): [<ffffffff82c005ad>] __do_softirq+0x5ad/0x8c0 [ 1960.797121] softirqs last disabled at (509735): [<ffffffff8115aa15>] irq_exit+0x1a5/0x1e0 [ 1960.797142] ---[ end trace fb1342325f1846b8 ]--- While I haven't actually gone into the details of what's causing this to happen (maybe the kernel removes the backlight device in the device core before we get to it?), it doesn't really matter anyway because the way nouveau handles backlights has long since been deprecated. According to the documentation on the drm_connector->late_register() hook, the ->late_register() hook should be used for adding extra connector-related devices. Vice versa, the ->early_unregister() hook is meant to be used for removing those devices. So: gut nouveau_drm->bl_list and nouveau_drm->backlight, and replace them with per-connector backlight structures. Additionally, move backlight registration/teardown into the ->late_register() and ->early_unregister() hooks so that DRM can give us a chance to remove the backlight before the connector is even removed. This appears to fix the problem once and for all. Changes since v2: - Use NV_INFO_ONCE for printing GMUX information, since otherwise this will end up printing that message for as many times as we have connectors Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-09-06 21:43:23 +00:00
#include "nouveau_connector.h"
static struct ida bl_ida;
#define BL_NAME_SIZE 15 // 12 for name + 2 for digits + 1 for '\0'
drm/nouveau: Move backlight device into nouveau_connector Currently module unloading is broken in nouveau due to a rather annoying race condition resulting from nouveau_backlight.c having gone a bit stale over time: [ 1960.791143] ================================================================== [ 1960.791394] BUG: KASAN: use-after-free in nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791460] Read of size 4 at addr ffff88075accf350 by task zsh/11185 [ 1960.791521] [ 1960.791545] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G O 4.18.0Lyude-Test+ #4 [ 1960.791580] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.791628] Call Trace: [ 1960.791680] dump_stack+0xa4/0xfd [ 1960.791721] print_address_description+0x71/0x239 [ 1960.791833] ? nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791877] kasan_report.cold.6+0x242/0x2fe [ 1960.791919] __asan_report_load4_noabort+0x19/0x20 [ 1960.792012] nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.792081] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.792150] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.792265] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.792347] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.792378] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.792406] ? trace_hardirqs_on+0xd/0x10 [ 1960.792472] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.792502] pci_device_remove+0x112/0x2d0 [ 1960.792530] ? pcibios_free_irq+0x10/0x10 [ 1960.792558] ? kasan_check_write+0x14/0x20 [ 1960.792587] device_release_driver_internal+0x35c/0x650 [ 1960.792617] device_release_driver+0x12/0x20 [ 1960.792643] pci_stop_bus_device+0x172/0x1e0 [ 1960.792671] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.792715] remove_store+0xcb/0xe0 [ 1960.792753] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.792779] ? __lock_is_held+0xb5/0x140 [ 1960.792808] ? component_add+0x530/0x530 [ 1960.792834] dev_attr_store+0x3f/0x70 [ 1960.792859] ? sysfs_file_ops+0x11d/0x170 [ 1960.792885] sysfs_kf_write+0x104/0x150 [ 1960.792915] ? sysfs_file_ops+0x170/0x170 [ 1960.792940] kernfs_fop_write+0x24f/0x400 [ 1960.792978] ? __lock_acquire+0x6ea/0x47f0 [ 1960.793021] __vfs_write+0xeb/0x760 [ 1960.793048] ? kernel_read+0x130/0x130 [ 1960.793076] ? __lock_is_held+0xb5/0x140 [ 1960.793107] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.793135] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.793162] ? __sb_start_write+0x183/0x220 [ 1960.793189] vfs_write+0x14d/0x4a0 [ 1960.793229] ksys_write+0xd2/0x1b0 [ 1960.793255] ? __ia32_sys_read+0xb0/0xb0 [ 1960.793298] ? fput+0x1d/0x120 [ 1960.793324] ? filp_close+0xf3/0x130 [ 1960.793349] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.793380] __x64_sys_write+0x73/0xb0 [ 1960.793407] do_syscall_64+0xaa/0x400 [ 1960.793433] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.793460] RIP: 0033:0x7f59df433164 [ 1960.793486] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.793541] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.793576] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.793620] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.793665] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.793696] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.793730] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.793768] [ 1960.793790] Allocated by task 11167: [ 1960.793816] save_stack+0x43/0xd0 [ 1960.793841] kasan_kmalloc+0xc4/0xe0 [ 1960.793880] kasan_slab_alloc+0x11/0x20 [ 1960.793905] kmem_cache_alloc+0xd7/0x270 [ 1960.793944] getname_flags+0xbd/0x520 [ 1960.793969] user_path_at_empty+0x23/0x50 [ 1960.793994] do_faccessat+0x1fc/0x5d0 [ 1960.794018] __x64_sys_access+0x59/0x80 [ 1960.794043] do_syscall_64+0xaa/0x400 [ 1960.794067] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794093] [ 1960.794127] Freed by task 11167: [ 1960.794152] save_stack+0x43/0xd0 [ 1960.794190] __kasan_slab_free+0x139/0x190 [ 1960.794215] kasan_slab_free+0xe/0x10 [ 1960.794239] kmem_cache_free+0xcb/0x2c0 [ 1960.794264] putname+0xad/0xe0 [ 1960.794287] filename_lookup.part.59+0x1f1/0x360 [ 1960.794313] user_path_at_empty+0x3e/0x50 [ 1960.794338] do_faccessat+0x1fc/0x5d0 [ 1960.794362] __x64_sys_access+0x59/0x80 [ 1960.794393] do_syscall_64+0xaa/0x400 [ 1960.794421] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794461] [ 1960.794483] The buggy address belongs to the object at ffff88075acceac0 [ 1960.794483] which belongs to the cache names_cache of size 4096 [ 1960.794540] The buggy address is located 2192 bytes inside of [ 1960.794540] 4096-byte region [ffff88075acceac0, ffff88075accfac0) [ 1960.794581] The buggy address belongs to the page: [ 1960.794609] page:ffffea001d6b3200 count:1 mapcount:0 mapping:ffff880778e4b1c0 index:0x0 compound_mapcount: 0 [ 1960.794651] flags: 0x8000000000008100(slab|head) [ 1960.794679] raw: 8000000000008100 ffffea001d39e808 ffffea001d39ea08 ffff880778e4b1c0 [ 1960.794739] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 1960.794785] page dumped because: kasan: bad access detected [ 1960.794813] [ 1960.794834] Memory state around the buggy address: [ 1960.794861] ffff88075accf200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794894] ffff88075accf280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794925] >ffff88075accf300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794956] ^ [ 1960.794985] ffff88075accf380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795017] ffff88075accf400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795061] ================================================================== [ 1960.795106] Disabling lock debugging due to kernel taint [ 1960.795131] ------------[ cut here ]------------ [ 1960.795148] ida_remove called for id=1802201963 which is not allocated. [ 1960.795193] WARNING: CPU: 7 PID: 11185 at lib/idr.c:521 ida_remove+0x184/0x210 [ 1960.795213] Modules linked in: nouveau(O) mxm_wmi ttm i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm joydev vfat fat intel_rapl x86_pkg_temp_thermal coretemp crc32_pclmul iTCO_wdt psmouse wmi_bmof mei_me tpm_tis mei tpm_tis_core tpm i2c_i801 thinkpad_acpi pcc_cpufreq crc32c_intel serio_raw xhci_pci xhci_hcd wmi video i2c_dev i2c_core [ 1960.795305] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G B O 4.18.0Lyude-Test+ #4 [ 1960.795330] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.795352] RIP: 0010:ida_remove+0x184/0x210 [ 1960.795370] Code: 4c 89 f7 e8 ae c8 00 00 eb 22 41 83 c4 02 4c 89 e8 41 83 fc 3f 0f 86 64 ff ff ff 44 89 fe 48 c7 c7 20 94 1e 83 e8 54 ed 81 fe <0f> 0b 48 b8 00 00 00 00 00 fc ff df 48 01 c3 c7 03 00 00 00 00 c7 [ 1960.795402] RSP: 0018:ffff88074d4df7b8 EFLAGS: 00010082 [ 1960.795421] RAX: 0000000000000000 RBX: 1ffff100e9a9befa RCX: ffffffff81479975 [ 1960.795440] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88077c1de690 [ 1960.795460] RBP: ffff88074d4df878 R08: ffffed00ef83bcd3 R09: ffffed00ef83bcd2 [ 1960.795479] R10: ffffed00ef83bcd2 R11: ffff88077c1de697 R12: 000000000000036b [ 1960.795498] R13: 0000000000000202 R14: ffffffffa0aa7fa0 R15: 000000006b6b6b6b [ 1960.795518] FS: 00007f59e0995b80(0000) GS:ffff88077c1c0000(0000) knlGS:0000000000000000 [ 1960.795553] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1960.795571] CR2: 00007f59e09a2010 CR3: 00000004a1a70005 CR4: 00000000003606e0 [ 1960.795596] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1960.795629] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1960.795649] Call Trace: [ 1960.795667] ? ida_destroy+0x1d0/0x1d0 [ 1960.795686] ? kasan_check_write+0x14/0x20 [ 1960.795704] ? do_raw_spin_lock+0xc2/0x1c0 [ 1960.795724] ida_simple_remove+0x26/0x40 [ 1960.795794] nouveau_backlight_exit+0x9d/0x150 [nouveau] [ 1960.795867] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.795930] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.795989] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.796047] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.796067] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.796089] ? trace_hardirqs_on+0xd/0x10 [ 1960.796146] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.796167] pci_device_remove+0x112/0x2d0 [ 1960.796186] ? pcibios_free_irq+0x10/0x10 [ 1960.796218] ? kasan_check_write+0x14/0x20 [ 1960.796237] device_release_driver_internal+0x35c/0x650 [ 1960.796257] device_release_driver+0x12/0x20 [ 1960.796289] pci_stop_bus_device+0x172/0x1e0 [ 1960.796308] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.796328] remove_store+0xcb/0xe0 [ 1960.796345] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.796364] ? __lock_is_held+0xb5/0x140 [ 1960.796383] ? component_add+0x530/0x530 [ 1960.796401] dev_attr_store+0x3f/0x70 [ 1960.796419] ? sysfs_file_ops+0x11d/0x170 [ 1960.796436] sysfs_kf_write+0x104/0x150 [ 1960.796454] ? sysfs_file_ops+0x170/0x170 [ 1960.796471] kernfs_fop_write+0x24f/0x400 [ 1960.796488] ? __lock_acquire+0x6ea/0x47f0 [ 1960.796520] __vfs_write+0xeb/0x760 [ 1960.796538] ? kernel_read+0x130/0x130 [ 1960.796556] ? __lock_is_held+0xb5/0x140 [ 1960.796590] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.796608] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.796626] ? __sb_start_write+0x183/0x220 [ 1960.796648] vfs_write+0x14d/0x4a0 [ 1960.796666] ksys_write+0xd2/0x1b0 [ 1960.796684] ? __ia32_sys_read+0xb0/0xb0 [ 1960.796701] ? fput+0x1d/0x120 [ 1960.796732] ? filp_close+0xf3/0x130 [ 1960.796749] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.796768] __x64_sys_write+0x73/0xb0 [ 1960.796800] do_syscall_64+0xaa/0x400 [ 1960.796818] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.796836] RIP: 0033:0x7f59df433164 [ 1960.796854] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.796884] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.796906] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.796926] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.796946] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.796966] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.796985] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.797008] irq event stamp: 509990 [ 1960.797026] hardirqs last enabled at (509989): [<ffffffff8119ff78>] flush_work+0x4b8/0x6d0 [ 1960.797063] hardirqs last disabled at (509990): [<ffffffff8297c395>] _raw_spin_lock_irqsave+0x25/0x60 [ 1960.797085] softirqs last enabled at (509744): [<ffffffff82c005ad>] __do_softirq+0x5ad/0x8c0 [ 1960.797121] softirqs last disabled at (509735): [<ffffffff8115aa15>] irq_exit+0x1a5/0x1e0 [ 1960.797142] ---[ end trace fb1342325f1846b8 ]--- While I haven't actually gone into the details of what's causing this to happen (maybe the kernel removes the backlight device in the device core before we get to it?), it doesn't really matter anyway because the way nouveau handles backlights has long since been deprecated. According to the documentation on the drm_connector->late_register() hook, the ->late_register() hook should be used for adding extra connector-related devices. Vice versa, the ->early_unregister() hook is meant to be used for removing those devices. So: gut nouveau_drm->bl_list and nouveau_drm->backlight, and replace them with per-connector backlight structures. Additionally, move backlight registration/teardown into the ->late_register() and ->early_unregister() hooks so that DRM can give us a chance to remove the backlight before the connector is even removed. This appears to fix the problem once and for all. Changes since v2: - Use NV_INFO_ONCE for printing GMUX information, since otherwise this will end up printing that message for as many times as we have connectors Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-09-06 21:43:23 +00:00
struct nouveau_backlight {
struct backlight_device *dev;
int id;
};
static bool
drm/nouveau: Move backlight device into nouveau_connector Currently module unloading is broken in nouveau due to a rather annoying race condition resulting from nouveau_backlight.c having gone a bit stale over time: [ 1960.791143] ================================================================== [ 1960.791394] BUG: KASAN: use-after-free in nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791460] Read of size 4 at addr ffff88075accf350 by task zsh/11185 [ 1960.791521] [ 1960.791545] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G O 4.18.0Lyude-Test+ #4 [ 1960.791580] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.791628] Call Trace: [ 1960.791680] dump_stack+0xa4/0xfd [ 1960.791721] print_address_description+0x71/0x239 [ 1960.791833] ? nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791877] kasan_report.cold.6+0x242/0x2fe [ 1960.791919] __asan_report_load4_noabort+0x19/0x20 [ 1960.792012] nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.792081] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.792150] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.792265] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.792347] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.792378] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.792406] ? trace_hardirqs_on+0xd/0x10 [ 1960.792472] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.792502] pci_device_remove+0x112/0x2d0 [ 1960.792530] ? pcibios_free_irq+0x10/0x10 [ 1960.792558] ? kasan_check_write+0x14/0x20 [ 1960.792587] device_release_driver_internal+0x35c/0x650 [ 1960.792617] device_release_driver+0x12/0x20 [ 1960.792643] pci_stop_bus_device+0x172/0x1e0 [ 1960.792671] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.792715] remove_store+0xcb/0xe0 [ 1960.792753] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.792779] ? __lock_is_held+0xb5/0x140 [ 1960.792808] ? component_add+0x530/0x530 [ 1960.792834] dev_attr_store+0x3f/0x70 [ 1960.792859] ? sysfs_file_ops+0x11d/0x170 [ 1960.792885] sysfs_kf_write+0x104/0x150 [ 1960.792915] ? sysfs_file_ops+0x170/0x170 [ 1960.792940] kernfs_fop_write+0x24f/0x400 [ 1960.792978] ? __lock_acquire+0x6ea/0x47f0 [ 1960.793021] __vfs_write+0xeb/0x760 [ 1960.793048] ? kernel_read+0x130/0x130 [ 1960.793076] ? __lock_is_held+0xb5/0x140 [ 1960.793107] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.793135] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.793162] ? __sb_start_write+0x183/0x220 [ 1960.793189] vfs_write+0x14d/0x4a0 [ 1960.793229] ksys_write+0xd2/0x1b0 [ 1960.793255] ? __ia32_sys_read+0xb0/0xb0 [ 1960.793298] ? fput+0x1d/0x120 [ 1960.793324] ? filp_close+0xf3/0x130 [ 1960.793349] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.793380] __x64_sys_write+0x73/0xb0 [ 1960.793407] do_syscall_64+0xaa/0x400 [ 1960.793433] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.793460] RIP: 0033:0x7f59df433164 [ 1960.793486] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.793541] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.793576] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.793620] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.793665] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.793696] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.793730] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.793768] [ 1960.793790] Allocated by task 11167: [ 1960.793816] save_stack+0x43/0xd0 [ 1960.793841] kasan_kmalloc+0xc4/0xe0 [ 1960.793880] kasan_slab_alloc+0x11/0x20 [ 1960.793905] kmem_cache_alloc+0xd7/0x270 [ 1960.793944] getname_flags+0xbd/0x520 [ 1960.793969] user_path_at_empty+0x23/0x50 [ 1960.793994] do_faccessat+0x1fc/0x5d0 [ 1960.794018] __x64_sys_access+0x59/0x80 [ 1960.794043] do_syscall_64+0xaa/0x400 [ 1960.794067] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794093] [ 1960.794127] Freed by task 11167: [ 1960.794152] save_stack+0x43/0xd0 [ 1960.794190] __kasan_slab_free+0x139/0x190 [ 1960.794215] kasan_slab_free+0xe/0x10 [ 1960.794239] kmem_cache_free+0xcb/0x2c0 [ 1960.794264] putname+0xad/0xe0 [ 1960.794287] filename_lookup.part.59+0x1f1/0x360 [ 1960.794313] user_path_at_empty+0x3e/0x50 [ 1960.794338] do_faccessat+0x1fc/0x5d0 [ 1960.794362] __x64_sys_access+0x59/0x80 [ 1960.794393] do_syscall_64+0xaa/0x400 [ 1960.794421] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794461] [ 1960.794483] The buggy address belongs to the object at ffff88075acceac0 [ 1960.794483] which belongs to the cache names_cache of size 4096 [ 1960.794540] The buggy address is located 2192 bytes inside of [ 1960.794540] 4096-byte region [ffff88075acceac0, ffff88075accfac0) [ 1960.794581] The buggy address belongs to the page: [ 1960.794609] page:ffffea001d6b3200 count:1 mapcount:0 mapping:ffff880778e4b1c0 index:0x0 compound_mapcount: 0 [ 1960.794651] flags: 0x8000000000008100(slab|head) [ 1960.794679] raw: 8000000000008100 ffffea001d39e808 ffffea001d39ea08 ffff880778e4b1c0 [ 1960.794739] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 1960.794785] page dumped because: kasan: bad access detected [ 1960.794813] [ 1960.794834] Memory state around the buggy address: [ 1960.794861] ffff88075accf200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794894] ffff88075accf280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794925] >ffff88075accf300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794956] ^ [ 1960.794985] ffff88075accf380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795017] ffff88075accf400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795061] ================================================================== [ 1960.795106] Disabling lock debugging due to kernel taint [ 1960.795131] ------------[ cut here ]------------ [ 1960.795148] ida_remove called for id=1802201963 which is not allocated. [ 1960.795193] WARNING: CPU: 7 PID: 11185 at lib/idr.c:521 ida_remove+0x184/0x210 [ 1960.795213] Modules linked in: nouveau(O) mxm_wmi ttm i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm joydev vfat fat intel_rapl x86_pkg_temp_thermal coretemp crc32_pclmul iTCO_wdt psmouse wmi_bmof mei_me tpm_tis mei tpm_tis_core tpm i2c_i801 thinkpad_acpi pcc_cpufreq crc32c_intel serio_raw xhci_pci xhci_hcd wmi video i2c_dev i2c_core [ 1960.795305] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G B O 4.18.0Lyude-Test+ #4 [ 1960.795330] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.795352] RIP: 0010:ida_remove+0x184/0x210 [ 1960.795370] Code: 4c 89 f7 e8 ae c8 00 00 eb 22 41 83 c4 02 4c 89 e8 41 83 fc 3f 0f 86 64 ff ff ff 44 89 fe 48 c7 c7 20 94 1e 83 e8 54 ed 81 fe <0f> 0b 48 b8 00 00 00 00 00 fc ff df 48 01 c3 c7 03 00 00 00 00 c7 [ 1960.795402] RSP: 0018:ffff88074d4df7b8 EFLAGS: 00010082 [ 1960.795421] RAX: 0000000000000000 RBX: 1ffff100e9a9befa RCX: ffffffff81479975 [ 1960.795440] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88077c1de690 [ 1960.795460] RBP: ffff88074d4df878 R08: ffffed00ef83bcd3 R09: ffffed00ef83bcd2 [ 1960.795479] R10: ffffed00ef83bcd2 R11: ffff88077c1de697 R12: 000000000000036b [ 1960.795498] R13: 0000000000000202 R14: ffffffffa0aa7fa0 R15: 000000006b6b6b6b [ 1960.795518] FS: 00007f59e0995b80(0000) GS:ffff88077c1c0000(0000) knlGS:0000000000000000 [ 1960.795553] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1960.795571] CR2: 00007f59e09a2010 CR3: 00000004a1a70005 CR4: 00000000003606e0 [ 1960.795596] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1960.795629] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1960.795649] Call Trace: [ 1960.795667] ? ida_destroy+0x1d0/0x1d0 [ 1960.795686] ? kasan_check_write+0x14/0x20 [ 1960.795704] ? do_raw_spin_lock+0xc2/0x1c0 [ 1960.795724] ida_simple_remove+0x26/0x40 [ 1960.795794] nouveau_backlight_exit+0x9d/0x150 [nouveau] [ 1960.795867] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.795930] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.795989] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.796047] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.796067] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.796089] ? trace_hardirqs_on+0xd/0x10 [ 1960.796146] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.796167] pci_device_remove+0x112/0x2d0 [ 1960.796186] ? pcibios_free_irq+0x10/0x10 [ 1960.796218] ? kasan_check_write+0x14/0x20 [ 1960.796237] device_release_driver_internal+0x35c/0x650 [ 1960.796257] device_release_driver+0x12/0x20 [ 1960.796289] pci_stop_bus_device+0x172/0x1e0 [ 1960.796308] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.796328] remove_store+0xcb/0xe0 [ 1960.796345] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.796364] ? __lock_is_held+0xb5/0x140 [ 1960.796383] ? component_add+0x530/0x530 [ 1960.796401] dev_attr_store+0x3f/0x70 [ 1960.796419] ? sysfs_file_ops+0x11d/0x170 [ 1960.796436] sysfs_kf_write+0x104/0x150 [ 1960.796454] ? sysfs_file_ops+0x170/0x170 [ 1960.796471] kernfs_fop_write+0x24f/0x400 [ 1960.796488] ? __lock_acquire+0x6ea/0x47f0 [ 1960.796520] __vfs_write+0xeb/0x760 [ 1960.796538] ? kernel_read+0x130/0x130 [ 1960.796556] ? __lock_is_held+0xb5/0x140 [ 1960.796590] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.796608] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.796626] ? __sb_start_write+0x183/0x220 [ 1960.796648] vfs_write+0x14d/0x4a0 [ 1960.796666] ksys_write+0xd2/0x1b0 [ 1960.796684] ? __ia32_sys_read+0xb0/0xb0 [ 1960.796701] ? fput+0x1d/0x120 [ 1960.796732] ? filp_close+0xf3/0x130 [ 1960.796749] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.796768] __x64_sys_write+0x73/0xb0 [ 1960.796800] do_syscall_64+0xaa/0x400 [ 1960.796818] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.796836] RIP: 0033:0x7f59df433164 [ 1960.796854] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.796884] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.796906] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.796926] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.796946] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.796966] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.796985] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.797008] irq event stamp: 509990 [ 1960.797026] hardirqs last enabled at (509989): [<ffffffff8119ff78>] flush_work+0x4b8/0x6d0 [ 1960.797063] hardirqs last disabled at (509990): [<ffffffff8297c395>] _raw_spin_lock_irqsave+0x25/0x60 [ 1960.797085] softirqs last enabled at (509744): [<ffffffff82c005ad>] __do_softirq+0x5ad/0x8c0 [ 1960.797121] softirqs last disabled at (509735): [<ffffffff8115aa15>] irq_exit+0x1a5/0x1e0 [ 1960.797142] ---[ end trace fb1342325f1846b8 ]--- While I haven't actually gone into the details of what's causing this to happen (maybe the kernel removes the backlight device in the device core before we get to it?), it doesn't really matter anyway because the way nouveau handles backlights has long since been deprecated. According to the documentation on the drm_connector->late_register() hook, the ->late_register() hook should be used for adding extra connector-related devices. Vice versa, the ->early_unregister() hook is meant to be used for removing those devices. So: gut nouveau_drm->bl_list and nouveau_drm->backlight, and replace them with per-connector backlight structures. Additionally, move backlight registration/teardown into the ->late_register() and ->early_unregister() hooks so that DRM can give us a chance to remove the backlight before the connector is even removed. This appears to fix the problem once and for all. Changes since v2: - Use NV_INFO_ONCE for printing GMUX information, since otherwise this will end up printing that message for as many times as we have connectors Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-09-06 21:43:23 +00:00
nouveau_get_backlight_name(char backlight_name[BL_NAME_SIZE],
struct nouveau_backlight *bl)
{
const int nb = ida_simple_get(&bl_ida, 0, 0, GFP_KERNEL);
if (nb < 0 || nb >= 100)
return false;
if (nb > 0)
snprintf(backlight_name, BL_NAME_SIZE, "nv_backlight%d", nb);
else
snprintf(backlight_name, BL_NAME_SIZE, "nv_backlight");
drm/nouveau: Move backlight device into nouveau_connector Currently module unloading is broken in nouveau due to a rather annoying race condition resulting from nouveau_backlight.c having gone a bit stale over time: [ 1960.791143] ================================================================== [ 1960.791394] BUG: KASAN: use-after-free in nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791460] Read of size 4 at addr ffff88075accf350 by task zsh/11185 [ 1960.791521] [ 1960.791545] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G O 4.18.0Lyude-Test+ #4 [ 1960.791580] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.791628] Call Trace: [ 1960.791680] dump_stack+0xa4/0xfd [ 1960.791721] print_address_description+0x71/0x239 [ 1960.791833] ? nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791877] kasan_report.cold.6+0x242/0x2fe [ 1960.791919] __asan_report_load4_noabort+0x19/0x20 [ 1960.792012] nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.792081] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.792150] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.792265] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.792347] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.792378] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.792406] ? trace_hardirqs_on+0xd/0x10 [ 1960.792472] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.792502] pci_device_remove+0x112/0x2d0 [ 1960.792530] ? pcibios_free_irq+0x10/0x10 [ 1960.792558] ? kasan_check_write+0x14/0x20 [ 1960.792587] device_release_driver_internal+0x35c/0x650 [ 1960.792617] device_release_driver+0x12/0x20 [ 1960.792643] pci_stop_bus_device+0x172/0x1e0 [ 1960.792671] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.792715] remove_store+0xcb/0xe0 [ 1960.792753] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.792779] ? __lock_is_held+0xb5/0x140 [ 1960.792808] ? component_add+0x530/0x530 [ 1960.792834] dev_attr_store+0x3f/0x70 [ 1960.792859] ? sysfs_file_ops+0x11d/0x170 [ 1960.792885] sysfs_kf_write+0x104/0x150 [ 1960.792915] ? sysfs_file_ops+0x170/0x170 [ 1960.792940] kernfs_fop_write+0x24f/0x400 [ 1960.792978] ? __lock_acquire+0x6ea/0x47f0 [ 1960.793021] __vfs_write+0xeb/0x760 [ 1960.793048] ? kernel_read+0x130/0x130 [ 1960.793076] ? __lock_is_held+0xb5/0x140 [ 1960.793107] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.793135] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.793162] ? __sb_start_write+0x183/0x220 [ 1960.793189] vfs_write+0x14d/0x4a0 [ 1960.793229] ksys_write+0xd2/0x1b0 [ 1960.793255] ? __ia32_sys_read+0xb0/0xb0 [ 1960.793298] ? fput+0x1d/0x120 [ 1960.793324] ? filp_close+0xf3/0x130 [ 1960.793349] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.793380] __x64_sys_write+0x73/0xb0 [ 1960.793407] do_syscall_64+0xaa/0x400 [ 1960.793433] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.793460] RIP: 0033:0x7f59df433164 [ 1960.793486] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.793541] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.793576] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.793620] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.793665] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.793696] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.793730] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.793768] [ 1960.793790] Allocated by task 11167: [ 1960.793816] save_stack+0x43/0xd0 [ 1960.793841] kasan_kmalloc+0xc4/0xe0 [ 1960.793880] kasan_slab_alloc+0x11/0x20 [ 1960.793905] kmem_cache_alloc+0xd7/0x270 [ 1960.793944] getname_flags+0xbd/0x520 [ 1960.793969] user_path_at_empty+0x23/0x50 [ 1960.793994] do_faccessat+0x1fc/0x5d0 [ 1960.794018] __x64_sys_access+0x59/0x80 [ 1960.794043] do_syscall_64+0xaa/0x400 [ 1960.794067] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794093] [ 1960.794127] Freed by task 11167: [ 1960.794152] save_stack+0x43/0xd0 [ 1960.794190] __kasan_slab_free+0x139/0x190 [ 1960.794215] kasan_slab_free+0xe/0x10 [ 1960.794239] kmem_cache_free+0xcb/0x2c0 [ 1960.794264] putname+0xad/0xe0 [ 1960.794287] filename_lookup.part.59+0x1f1/0x360 [ 1960.794313] user_path_at_empty+0x3e/0x50 [ 1960.794338] do_faccessat+0x1fc/0x5d0 [ 1960.794362] __x64_sys_access+0x59/0x80 [ 1960.794393] do_syscall_64+0xaa/0x400 [ 1960.794421] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794461] [ 1960.794483] The buggy address belongs to the object at ffff88075acceac0 [ 1960.794483] which belongs to the cache names_cache of size 4096 [ 1960.794540] The buggy address is located 2192 bytes inside of [ 1960.794540] 4096-byte region [ffff88075acceac0, ffff88075accfac0) [ 1960.794581] The buggy address belongs to the page: [ 1960.794609] page:ffffea001d6b3200 count:1 mapcount:0 mapping:ffff880778e4b1c0 index:0x0 compound_mapcount: 0 [ 1960.794651] flags: 0x8000000000008100(slab|head) [ 1960.794679] raw: 8000000000008100 ffffea001d39e808 ffffea001d39ea08 ffff880778e4b1c0 [ 1960.794739] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 1960.794785] page dumped because: kasan: bad access detected [ 1960.794813] [ 1960.794834] Memory state around the buggy address: [ 1960.794861] ffff88075accf200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794894] ffff88075accf280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794925] >ffff88075accf300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794956] ^ [ 1960.794985] ffff88075accf380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795017] ffff88075accf400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795061] ================================================================== [ 1960.795106] Disabling lock debugging due to kernel taint [ 1960.795131] ------------[ cut here ]------------ [ 1960.795148] ida_remove called for id=1802201963 which is not allocated. [ 1960.795193] WARNING: CPU: 7 PID: 11185 at lib/idr.c:521 ida_remove+0x184/0x210 [ 1960.795213] Modules linked in: nouveau(O) mxm_wmi ttm i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm joydev vfat fat intel_rapl x86_pkg_temp_thermal coretemp crc32_pclmul iTCO_wdt psmouse wmi_bmof mei_me tpm_tis mei tpm_tis_core tpm i2c_i801 thinkpad_acpi pcc_cpufreq crc32c_intel serio_raw xhci_pci xhci_hcd wmi video i2c_dev i2c_core [ 1960.795305] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G B O 4.18.0Lyude-Test+ #4 [ 1960.795330] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.795352] RIP: 0010:ida_remove+0x184/0x210 [ 1960.795370] Code: 4c 89 f7 e8 ae c8 00 00 eb 22 41 83 c4 02 4c 89 e8 41 83 fc 3f 0f 86 64 ff ff ff 44 89 fe 48 c7 c7 20 94 1e 83 e8 54 ed 81 fe <0f> 0b 48 b8 00 00 00 00 00 fc ff df 48 01 c3 c7 03 00 00 00 00 c7 [ 1960.795402] RSP: 0018:ffff88074d4df7b8 EFLAGS: 00010082 [ 1960.795421] RAX: 0000000000000000 RBX: 1ffff100e9a9befa RCX: ffffffff81479975 [ 1960.795440] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88077c1de690 [ 1960.795460] RBP: ffff88074d4df878 R08: ffffed00ef83bcd3 R09: ffffed00ef83bcd2 [ 1960.795479] R10: ffffed00ef83bcd2 R11: ffff88077c1de697 R12: 000000000000036b [ 1960.795498] R13: 0000000000000202 R14: ffffffffa0aa7fa0 R15: 000000006b6b6b6b [ 1960.795518] FS: 00007f59e0995b80(0000) GS:ffff88077c1c0000(0000) knlGS:0000000000000000 [ 1960.795553] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1960.795571] CR2: 00007f59e09a2010 CR3: 00000004a1a70005 CR4: 00000000003606e0 [ 1960.795596] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1960.795629] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1960.795649] Call Trace: [ 1960.795667] ? ida_destroy+0x1d0/0x1d0 [ 1960.795686] ? kasan_check_write+0x14/0x20 [ 1960.795704] ? do_raw_spin_lock+0xc2/0x1c0 [ 1960.795724] ida_simple_remove+0x26/0x40 [ 1960.795794] nouveau_backlight_exit+0x9d/0x150 [nouveau] [ 1960.795867] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.795930] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.795989] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.796047] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.796067] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.796089] ? trace_hardirqs_on+0xd/0x10 [ 1960.796146] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.796167] pci_device_remove+0x112/0x2d0 [ 1960.796186] ? pcibios_free_irq+0x10/0x10 [ 1960.796218] ? kasan_check_write+0x14/0x20 [ 1960.796237] device_release_driver_internal+0x35c/0x650 [ 1960.796257] device_release_driver+0x12/0x20 [ 1960.796289] pci_stop_bus_device+0x172/0x1e0 [ 1960.796308] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.796328] remove_store+0xcb/0xe0 [ 1960.796345] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.796364] ? __lock_is_held+0xb5/0x140 [ 1960.796383] ? component_add+0x530/0x530 [ 1960.796401] dev_attr_store+0x3f/0x70 [ 1960.796419] ? sysfs_file_ops+0x11d/0x170 [ 1960.796436] sysfs_kf_write+0x104/0x150 [ 1960.796454] ? sysfs_file_ops+0x170/0x170 [ 1960.796471] kernfs_fop_write+0x24f/0x400 [ 1960.796488] ? __lock_acquire+0x6ea/0x47f0 [ 1960.796520] __vfs_write+0xeb/0x760 [ 1960.796538] ? kernel_read+0x130/0x130 [ 1960.796556] ? __lock_is_held+0xb5/0x140 [ 1960.796590] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.796608] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.796626] ? __sb_start_write+0x183/0x220 [ 1960.796648] vfs_write+0x14d/0x4a0 [ 1960.796666] ksys_write+0xd2/0x1b0 [ 1960.796684] ? __ia32_sys_read+0xb0/0xb0 [ 1960.796701] ? fput+0x1d/0x120 [ 1960.796732] ? filp_close+0xf3/0x130 [ 1960.796749] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.796768] __x64_sys_write+0x73/0xb0 [ 1960.796800] do_syscall_64+0xaa/0x400 [ 1960.796818] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.796836] RIP: 0033:0x7f59df433164 [ 1960.796854] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.796884] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.796906] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.796926] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.796946] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.796966] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.796985] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.797008] irq event stamp: 509990 [ 1960.797026] hardirqs last enabled at (509989): [<ffffffff8119ff78>] flush_work+0x4b8/0x6d0 [ 1960.797063] hardirqs last disabled at (509990): [<ffffffff8297c395>] _raw_spin_lock_irqsave+0x25/0x60 [ 1960.797085] softirqs last enabled at (509744): [<ffffffff82c005ad>] __do_softirq+0x5ad/0x8c0 [ 1960.797121] softirqs last disabled at (509735): [<ffffffff8115aa15>] irq_exit+0x1a5/0x1e0 [ 1960.797142] ---[ end trace fb1342325f1846b8 ]--- While I haven't actually gone into the details of what's causing this to happen (maybe the kernel removes the backlight device in the device core before we get to it?), it doesn't really matter anyway because the way nouveau handles backlights has long since been deprecated. According to the documentation on the drm_connector->late_register() hook, the ->late_register() hook should be used for adding extra connector-related devices. Vice versa, the ->early_unregister() hook is meant to be used for removing those devices. So: gut nouveau_drm->bl_list and nouveau_drm->backlight, and replace them with per-connector backlight structures. Additionally, move backlight registration/teardown into the ->late_register() and ->early_unregister() hooks so that DRM can give us a chance to remove the backlight before the connector is even removed. This appears to fix the problem once and for all. Changes since v2: - Use NV_INFO_ONCE for printing GMUX information, since otherwise this will end up printing that message for as many times as we have connectors Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-09-06 21:43:23 +00:00
bl->id = nb;
return true;
}
static int
nv40_get_intensity(struct backlight_device *bd)
{
struct nouveau_encoder *nv_encoder = bl_get_data(bd);
struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
struct nvif_object *device = &drm->client.device.object;
int val = (nvif_rd32(device, NV40_PMC_BACKLIGHT) &
NV40_PMC_BACKLIGHT_MASK) >> 16;
return val;
}
static int
nv40_set_intensity(struct backlight_device *bd)
{
struct nouveau_encoder *nv_encoder = bl_get_data(bd);
struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
struct nvif_object *device = &drm->client.device.object;
int val = bd->props.brightness;
int reg = nvif_rd32(device, NV40_PMC_BACKLIGHT);
nvif_wr32(device, NV40_PMC_BACKLIGHT,
(val << 16) | (reg & ~NV40_PMC_BACKLIGHT_MASK));
return 0;
}
static const struct backlight_ops nv40_bl_ops = {
.options = BL_CORE_SUSPENDRESUME,
.get_brightness = nv40_get_intensity,
.update_status = nv40_set_intensity,
};
static int
nv40_backlight_init(struct nouveau_encoder *encoder,
struct backlight_properties *props,
const struct backlight_ops **ops)
{
struct nouveau_drm *drm = nouveau_drm(encoder->base.base.dev);
struct nvif_object *device = &drm->client.device.object;
if (!(nvif_rd32(device, NV40_PMC_BACKLIGHT) & NV40_PMC_BACKLIGHT_MASK))
return -ENODEV;
props->type = BACKLIGHT_RAW;
props->max_brightness = 31;
*ops = &nv40_bl_ops;
return 0;
}
static int
nv50_get_intensity(struct backlight_device *bd)
{
struct nouveau_encoder *nv_encoder = bl_get_data(bd);
struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
struct nvif_object *device = &drm->client.device.object;
int or = ffs(nv_encoder->dcb->or) - 1;
u32 div = 1025;
u32 val;
val = nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(or));
val &= NV50_PDISP_SOR_PWM_CTL_VAL;
return ((val * 100) + (div / 2)) / div;
}
static int
nv50_set_intensity(struct backlight_device *bd)
{
struct nouveau_encoder *nv_encoder = bl_get_data(bd);
struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
struct nvif_object *device = &drm->client.device.object;
int or = ffs(nv_encoder->dcb->or) - 1;
u32 div = 1025;
u32 val = (bd->props.brightness * div) / 100;
nvif_wr32(device, NV50_PDISP_SOR_PWM_CTL(or),
NV50_PDISP_SOR_PWM_CTL_NEW | val);
return 0;
}
static const struct backlight_ops nv50_bl_ops = {
.options = BL_CORE_SUSPENDRESUME,
.get_brightness = nv50_get_intensity,
.update_status = nv50_set_intensity,
};
static int
nva3_get_intensity(struct backlight_device *bd)
{
struct nouveau_encoder *nv_encoder = bl_get_data(bd);
struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
struct nvif_object *device = &drm->client.device.object;
int or = ffs(nv_encoder->dcb->or) - 1;
u32 div, val;
div = nvif_rd32(device, NV50_PDISP_SOR_PWM_DIV(or));
val = nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(or));
val &= NVA3_PDISP_SOR_PWM_CTL_VAL;
if (div && div >= val)
return ((val * 100) + (div / 2)) / div;
return 100;
}
static int
nva3_set_intensity(struct backlight_device *bd)
{
struct nouveau_encoder *nv_encoder = bl_get_data(bd);
struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
struct nvif_object *device = &drm->client.device.object;
int or = ffs(nv_encoder->dcb->or) - 1;
u32 div, val;
div = nvif_rd32(device, NV50_PDISP_SOR_PWM_DIV(or));
val = (bd->props.brightness * div) / 100;
if (div) {
nvif_wr32(device, NV50_PDISP_SOR_PWM_CTL(or),
val |
NV50_PDISP_SOR_PWM_CTL_NEW |
NVA3_PDISP_SOR_PWM_CTL_UNK);
return 0;
}
return -EINVAL;
}
static const struct backlight_ops nva3_bl_ops = {
.options = BL_CORE_SUSPENDRESUME,
.get_brightness = nva3_get_intensity,
.update_status = nva3_set_intensity,
};
static int
nv50_backlight_init(struct nouveau_encoder *nv_encoder,
struct backlight_properties *props,
const struct backlight_ops **ops)
{
struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
struct nvif_object *device = &drm->client.device.object;
if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(ffs(nv_encoder->dcb->or) - 1)))
return -ENODEV;
drm/nouveau: Move backlight device into nouveau_connector Currently module unloading is broken in nouveau due to a rather annoying race condition resulting from nouveau_backlight.c having gone a bit stale over time: [ 1960.791143] ================================================================== [ 1960.791394] BUG: KASAN: use-after-free in nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791460] Read of size 4 at addr ffff88075accf350 by task zsh/11185 [ 1960.791521] [ 1960.791545] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G O 4.18.0Lyude-Test+ #4 [ 1960.791580] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.791628] Call Trace: [ 1960.791680] dump_stack+0xa4/0xfd [ 1960.791721] print_address_description+0x71/0x239 [ 1960.791833] ? nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791877] kasan_report.cold.6+0x242/0x2fe [ 1960.791919] __asan_report_load4_noabort+0x19/0x20 [ 1960.792012] nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.792081] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.792150] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.792265] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.792347] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.792378] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.792406] ? trace_hardirqs_on+0xd/0x10 [ 1960.792472] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.792502] pci_device_remove+0x112/0x2d0 [ 1960.792530] ? pcibios_free_irq+0x10/0x10 [ 1960.792558] ? kasan_check_write+0x14/0x20 [ 1960.792587] device_release_driver_internal+0x35c/0x650 [ 1960.792617] device_release_driver+0x12/0x20 [ 1960.792643] pci_stop_bus_device+0x172/0x1e0 [ 1960.792671] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.792715] remove_store+0xcb/0xe0 [ 1960.792753] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.792779] ? __lock_is_held+0xb5/0x140 [ 1960.792808] ? component_add+0x530/0x530 [ 1960.792834] dev_attr_store+0x3f/0x70 [ 1960.792859] ? sysfs_file_ops+0x11d/0x170 [ 1960.792885] sysfs_kf_write+0x104/0x150 [ 1960.792915] ? sysfs_file_ops+0x170/0x170 [ 1960.792940] kernfs_fop_write+0x24f/0x400 [ 1960.792978] ? __lock_acquire+0x6ea/0x47f0 [ 1960.793021] __vfs_write+0xeb/0x760 [ 1960.793048] ? kernel_read+0x130/0x130 [ 1960.793076] ? __lock_is_held+0xb5/0x140 [ 1960.793107] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.793135] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.793162] ? __sb_start_write+0x183/0x220 [ 1960.793189] vfs_write+0x14d/0x4a0 [ 1960.793229] ksys_write+0xd2/0x1b0 [ 1960.793255] ? __ia32_sys_read+0xb0/0xb0 [ 1960.793298] ? fput+0x1d/0x120 [ 1960.793324] ? filp_close+0xf3/0x130 [ 1960.793349] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.793380] __x64_sys_write+0x73/0xb0 [ 1960.793407] do_syscall_64+0xaa/0x400 [ 1960.793433] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.793460] RIP: 0033:0x7f59df433164 [ 1960.793486] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.793541] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.793576] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.793620] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.793665] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.793696] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.793730] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.793768] [ 1960.793790] Allocated by task 11167: [ 1960.793816] save_stack+0x43/0xd0 [ 1960.793841] kasan_kmalloc+0xc4/0xe0 [ 1960.793880] kasan_slab_alloc+0x11/0x20 [ 1960.793905] kmem_cache_alloc+0xd7/0x270 [ 1960.793944] getname_flags+0xbd/0x520 [ 1960.793969] user_path_at_empty+0x23/0x50 [ 1960.793994] do_faccessat+0x1fc/0x5d0 [ 1960.794018] __x64_sys_access+0x59/0x80 [ 1960.794043] do_syscall_64+0xaa/0x400 [ 1960.794067] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794093] [ 1960.794127] Freed by task 11167: [ 1960.794152] save_stack+0x43/0xd0 [ 1960.794190] __kasan_slab_free+0x139/0x190 [ 1960.794215] kasan_slab_free+0xe/0x10 [ 1960.794239] kmem_cache_free+0xcb/0x2c0 [ 1960.794264] putname+0xad/0xe0 [ 1960.794287] filename_lookup.part.59+0x1f1/0x360 [ 1960.794313] user_path_at_empty+0x3e/0x50 [ 1960.794338] do_faccessat+0x1fc/0x5d0 [ 1960.794362] __x64_sys_access+0x59/0x80 [ 1960.794393] do_syscall_64+0xaa/0x400 [ 1960.794421] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794461] [ 1960.794483] The buggy address belongs to the object at ffff88075acceac0 [ 1960.794483] which belongs to the cache names_cache of size 4096 [ 1960.794540] The buggy address is located 2192 bytes inside of [ 1960.794540] 4096-byte region [ffff88075acceac0, ffff88075accfac0) [ 1960.794581] The buggy address belongs to the page: [ 1960.794609] page:ffffea001d6b3200 count:1 mapcount:0 mapping:ffff880778e4b1c0 index:0x0 compound_mapcount: 0 [ 1960.794651] flags: 0x8000000000008100(slab|head) [ 1960.794679] raw: 8000000000008100 ffffea001d39e808 ffffea001d39ea08 ffff880778e4b1c0 [ 1960.794739] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 1960.794785] page dumped because: kasan: bad access detected [ 1960.794813] [ 1960.794834] Memory state around the buggy address: [ 1960.794861] ffff88075accf200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794894] ffff88075accf280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794925] >ffff88075accf300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794956] ^ [ 1960.794985] ffff88075accf380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795017] ffff88075accf400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795061] ================================================================== [ 1960.795106] Disabling lock debugging due to kernel taint [ 1960.795131] ------------[ cut here ]------------ [ 1960.795148] ida_remove called for id=1802201963 which is not allocated. [ 1960.795193] WARNING: CPU: 7 PID: 11185 at lib/idr.c:521 ida_remove+0x184/0x210 [ 1960.795213] Modules linked in: nouveau(O) mxm_wmi ttm i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm joydev vfat fat intel_rapl x86_pkg_temp_thermal coretemp crc32_pclmul iTCO_wdt psmouse wmi_bmof mei_me tpm_tis mei tpm_tis_core tpm i2c_i801 thinkpad_acpi pcc_cpufreq crc32c_intel serio_raw xhci_pci xhci_hcd wmi video i2c_dev i2c_core [ 1960.795305] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G B O 4.18.0Lyude-Test+ #4 [ 1960.795330] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.795352] RIP: 0010:ida_remove+0x184/0x210 [ 1960.795370] Code: 4c 89 f7 e8 ae c8 00 00 eb 22 41 83 c4 02 4c 89 e8 41 83 fc 3f 0f 86 64 ff ff ff 44 89 fe 48 c7 c7 20 94 1e 83 e8 54 ed 81 fe <0f> 0b 48 b8 00 00 00 00 00 fc ff df 48 01 c3 c7 03 00 00 00 00 c7 [ 1960.795402] RSP: 0018:ffff88074d4df7b8 EFLAGS: 00010082 [ 1960.795421] RAX: 0000000000000000 RBX: 1ffff100e9a9befa RCX: ffffffff81479975 [ 1960.795440] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88077c1de690 [ 1960.795460] RBP: ffff88074d4df878 R08: ffffed00ef83bcd3 R09: ffffed00ef83bcd2 [ 1960.795479] R10: ffffed00ef83bcd2 R11: ffff88077c1de697 R12: 000000000000036b [ 1960.795498] R13: 0000000000000202 R14: ffffffffa0aa7fa0 R15: 000000006b6b6b6b [ 1960.795518] FS: 00007f59e0995b80(0000) GS:ffff88077c1c0000(0000) knlGS:0000000000000000 [ 1960.795553] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1960.795571] CR2: 00007f59e09a2010 CR3: 00000004a1a70005 CR4: 00000000003606e0 [ 1960.795596] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1960.795629] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1960.795649] Call Trace: [ 1960.795667] ? ida_destroy+0x1d0/0x1d0 [ 1960.795686] ? kasan_check_write+0x14/0x20 [ 1960.795704] ? do_raw_spin_lock+0xc2/0x1c0 [ 1960.795724] ida_simple_remove+0x26/0x40 [ 1960.795794] nouveau_backlight_exit+0x9d/0x150 [nouveau] [ 1960.795867] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.795930] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.795989] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.796047] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.796067] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.796089] ? trace_hardirqs_on+0xd/0x10 [ 1960.796146] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.796167] pci_device_remove+0x112/0x2d0 [ 1960.796186] ? pcibios_free_irq+0x10/0x10 [ 1960.796218] ? kasan_check_write+0x14/0x20 [ 1960.796237] device_release_driver_internal+0x35c/0x650 [ 1960.796257] device_release_driver+0x12/0x20 [ 1960.796289] pci_stop_bus_device+0x172/0x1e0 [ 1960.796308] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.796328] remove_store+0xcb/0xe0 [ 1960.796345] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.796364] ? __lock_is_held+0xb5/0x140 [ 1960.796383] ? component_add+0x530/0x530 [ 1960.796401] dev_attr_store+0x3f/0x70 [ 1960.796419] ? sysfs_file_ops+0x11d/0x170 [ 1960.796436] sysfs_kf_write+0x104/0x150 [ 1960.796454] ? sysfs_file_ops+0x170/0x170 [ 1960.796471] kernfs_fop_write+0x24f/0x400 [ 1960.796488] ? __lock_acquire+0x6ea/0x47f0 [ 1960.796520] __vfs_write+0xeb/0x760 [ 1960.796538] ? kernel_read+0x130/0x130 [ 1960.796556] ? __lock_is_held+0xb5/0x140 [ 1960.796590] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.796608] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.796626] ? __sb_start_write+0x183/0x220 [ 1960.796648] vfs_write+0x14d/0x4a0 [ 1960.796666] ksys_write+0xd2/0x1b0 [ 1960.796684] ? __ia32_sys_read+0xb0/0xb0 [ 1960.796701] ? fput+0x1d/0x120 [ 1960.796732] ? filp_close+0xf3/0x130 [ 1960.796749] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.796768] __x64_sys_write+0x73/0xb0 [ 1960.796800] do_syscall_64+0xaa/0x400 [ 1960.796818] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.796836] RIP: 0033:0x7f59df433164 [ 1960.796854] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.796884] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.796906] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.796926] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.796946] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.796966] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.796985] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.797008] irq event stamp: 509990 [ 1960.797026] hardirqs last enabled at (509989): [<ffffffff8119ff78>] flush_work+0x4b8/0x6d0 [ 1960.797063] hardirqs last disabled at (509990): [<ffffffff8297c395>] _raw_spin_lock_irqsave+0x25/0x60 [ 1960.797085] softirqs last enabled at (509744): [<ffffffff82c005ad>] __do_softirq+0x5ad/0x8c0 [ 1960.797121] softirqs last disabled at (509735): [<ffffffff8115aa15>] irq_exit+0x1a5/0x1e0 [ 1960.797142] ---[ end trace fb1342325f1846b8 ]--- While I haven't actually gone into the details of what's causing this to happen (maybe the kernel removes the backlight device in the device core before we get to it?), it doesn't really matter anyway because the way nouveau handles backlights has long since been deprecated. According to the documentation on the drm_connector->late_register() hook, the ->late_register() hook should be used for adding extra connector-related devices. Vice versa, the ->early_unregister() hook is meant to be used for removing those devices. So: gut nouveau_drm->bl_list and nouveau_drm->backlight, and replace them with per-connector backlight structures. Additionally, move backlight registration/teardown into the ->late_register() and ->early_unregister() hooks so that DRM can give us a chance to remove the backlight before the connector is even removed. This appears to fix the problem once and for all. Changes since v2: - Use NV_INFO_ONCE for printing GMUX information, since otherwise this will end up printing that message for as many times as we have connectors Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-09-06 21:43:23 +00:00
if (drm->client.device.info.chipset <= 0xa0 ||
drm->client.device.info.chipset == 0xaa ||
drm->client.device.info.chipset == 0xac)
*ops = &nv50_bl_ops;
else
*ops = &nva3_bl_ops;
props->type = BACKLIGHT_RAW;
props->max_brightness = 100;
return 0;
}
int
drm/nouveau: Move backlight device into nouveau_connector Currently module unloading is broken in nouveau due to a rather annoying race condition resulting from nouveau_backlight.c having gone a bit stale over time: [ 1960.791143] ================================================================== [ 1960.791394] BUG: KASAN: use-after-free in nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791460] Read of size 4 at addr ffff88075accf350 by task zsh/11185 [ 1960.791521] [ 1960.791545] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G O 4.18.0Lyude-Test+ #4 [ 1960.791580] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.791628] Call Trace: [ 1960.791680] dump_stack+0xa4/0xfd [ 1960.791721] print_address_description+0x71/0x239 [ 1960.791833] ? nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791877] kasan_report.cold.6+0x242/0x2fe [ 1960.791919] __asan_report_load4_noabort+0x19/0x20 [ 1960.792012] nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.792081] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.792150] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.792265] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.792347] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.792378] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.792406] ? trace_hardirqs_on+0xd/0x10 [ 1960.792472] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.792502] pci_device_remove+0x112/0x2d0 [ 1960.792530] ? pcibios_free_irq+0x10/0x10 [ 1960.792558] ? kasan_check_write+0x14/0x20 [ 1960.792587] device_release_driver_internal+0x35c/0x650 [ 1960.792617] device_release_driver+0x12/0x20 [ 1960.792643] pci_stop_bus_device+0x172/0x1e0 [ 1960.792671] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.792715] remove_store+0xcb/0xe0 [ 1960.792753] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.792779] ? __lock_is_held+0xb5/0x140 [ 1960.792808] ? component_add+0x530/0x530 [ 1960.792834] dev_attr_store+0x3f/0x70 [ 1960.792859] ? sysfs_file_ops+0x11d/0x170 [ 1960.792885] sysfs_kf_write+0x104/0x150 [ 1960.792915] ? sysfs_file_ops+0x170/0x170 [ 1960.792940] kernfs_fop_write+0x24f/0x400 [ 1960.792978] ? __lock_acquire+0x6ea/0x47f0 [ 1960.793021] __vfs_write+0xeb/0x760 [ 1960.793048] ? kernel_read+0x130/0x130 [ 1960.793076] ? __lock_is_held+0xb5/0x140 [ 1960.793107] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.793135] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.793162] ? __sb_start_write+0x183/0x220 [ 1960.793189] vfs_write+0x14d/0x4a0 [ 1960.793229] ksys_write+0xd2/0x1b0 [ 1960.793255] ? __ia32_sys_read+0xb0/0xb0 [ 1960.793298] ? fput+0x1d/0x120 [ 1960.793324] ? filp_close+0xf3/0x130 [ 1960.793349] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.793380] __x64_sys_write+0x73/0xb0 [ 1960.793407] do_syscall_64+0xaa/0x400 [ 1960.793433] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.793460] RIP: 0033:0x7f59df433164 [ 1960.793486] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.793541] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.793576] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.793620] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.793665] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.793696] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.793730] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.793768] [ 1960.793790] Allocated by task 11167: [ 1960.793816] save_stack+0x43/0xd0 [ 1960.793841] kasan_kmalloc+0xc4/0xe0 [ 1960.793880] kasan_slab_alloc+0x11/0x20 [ 1960.793905] kmem_cache_alloc+0xd7/0x270 [ 1960.793944] getname_flags+0xbd/0x520 [ 1960.793969] user_path_at_empty+0x23/0x50 [ 1960.793994] do_faccessat+0x1fc/0x5d0 [ 1960.794018] __x64_sys_access+0x59/0x80 [ 1960.794043] do_syscall_64+0xaa/0x400 [ 1960.794067] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794093] [ 1960.794127] Freed by task 11167: [ 1960.794152] save_stack+0x43/0xd0 [ 1960.794190] __kasan_slab_free+0x139/0x190 [ 1960.794215] kasan_slab_free+0xe/0x10 [ 1960.794239] kmem_cache_free+0xcb/0x2c0 [ 1960.794264] putname+0xad/0xe0 [ 1960.794287] filename_lookup.part.59+0x1f1/0x360 [ 1960.794313] user_path_at_empty+0x3e/0x50 [ 1960.794338] do_faccessat+0x1fc/0x5d0 [ 1960.794362] __x64_sys_access+0x59/0x80 [ 1960.794393] do_syscall_64+0xaa/0x400 [ 1960.794421] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794461] [ 1960.794483] The buggy address belongs to the object at ffff88075acceac0 [ 1960.794483] which belongs to the cache names_cache of size 4096 [ 1960.794540] The buggy address is located 2192 bytes inside of [ 1960.794540] 4096-byte region [ffff88075acceac0, ffff88075accfac0) [ 1960.794581] The buggy address belongs to the page: [ 1960.794609] page:ffffea001d6b3200 count:1 mapcount:0 mapping:ffff880778e4b1c0 index:0x0 compound_mapcount: 0 [ 1960.794651] flags: 0x8000000000008100(slab|head) [ 1960.794679] raw: 8000000000008100 ffffea001d39e808 ffffea001d39ea08 ffff880778e4b1c0 [ 1960.794739] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 1960.794785] page dumped because: kasan: bad access detected [ 1960.794813] [ 1960.794834] Memory state around the buggy address: [ 1960.794861] ffff88075accf200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794894] ffff88075accf280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794925] >ffff88075accf300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794956] ^ [ 1960.794985] ffff88075accf380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795017] ffff88075accf400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795061] ================================================================== [ 1960.795106] Disabling lock debugging due to kernel taint [ 1960.795131] ------------[ cut here ]------------ [ 1960.795148] ida_remove called for id=1802201963 which is not allocated. [ 1960.795193] WARNING: CPU: 7 PID: 11185 at lib/idr.c:521 ida_remove+0x184/0x210 [ 1960.795213] Modules linked in: nouveau(O) mxm_wmi ttm i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm joydev vfat fat intel_rapl x86_pkg_temp_thermal coretemp crc32_pclmul iTCO_wdt psmouse wmi_bmof mei_me tpm_tis mei tpm_tis_core tpm i2c_i801 thinkpad_acpi pcc_cpufreq crc32c_intel serio_raw xhci_pci xhci_hcd wmi video i2c_dev i2c_core [ 1960.795305] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G B O 4.18.0Lyude-Test+ #4 [ 1960.795330] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.795352] RIP: 0010:ida_remove+0x184/0x210 [ 1960.795370] Code: 4c 89 f7 e8 ae c8 00 00 eb 22 41 83 c4 02 4c 89 e8 41 83 fc 3f 0f 86 64 ff ff ff 44 89 fe 48 c7 c7 20 94 1e 83 e8 54 ed 81 fe <0f> 0b 48 b8 00 00 00 00 00 fc ff df 48 01 c3 c7 03 00 00 00 00 c7 [ 1960.795402] RSP: 0018:ffff88074d4df7b8 EFLAGS: 00010082 [ 1960.795421] RAX: 0000000000000000 RBX: 1ffff100e9a9befa RCX: ffffffff81479975 [ 1960.795440] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88077c1de690 [ 1960.795460] RBP: ffff88074d4df878 R08: ffffed00ef83bcd3 R09: ffffed00ef83bcd2 [ 1960.795479] R10: ffffed00ef83bcd2 R11: ffff88077c1de697 R12: 000000000000036b [ 1960.795498] R13: 0000000000000202 R14: ffffffffa0aa7fa0 R15: 000000006b6b6b6b [ 1960.795518] FS: 00007f59e0995b80(0000) GS:ffff88077c1c0000(0000) knlGS:0000000000000000 [ 1960.795553] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1960.795571] CR2: 00007f59e09a2010 CR3: 00000004a1a70005 CR4: 00000000003606e0 [ 1960.795596] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1960.795629] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1960.795649] Call Trace: [ 1960.795667] ? ida_destroy+0x1d0/0x1d0 [ 1960.795686] ? kasan_check_write+0x14/0x20 [ 1960.795704] ? do_raw_spin_lock+0xc2/0x1c0 [ 1960.795724] ida_simple_remove+0x26/0x40 [ 1960.795794] nouveau_backlight_exit+0x9d/0x150 [nouveau] [ 1960.795867] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.795930] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.795989] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.796047] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.796067] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.796089] ? trace_hardirqs_on+0xd/0x10 [ 1960.796146] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.796167] pci_device_remove+0x112/0x2d0 [ 1960.796186] ? pcibios_free_irq+0x10/0x10 [ 1960.796218] ? kasan_check_write+0x14/0x20 [ 1960.796237] device_release_driver_internal+0x35c/0x650 [ 1960.796257] device_release_driver+0x12/0x20 [ 1960.796289] pci_stop_bus_device+0x172/0x1e0 [ 1960.796308] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.796328] remove_store+0xcb/0xe0 [ 1960.796345] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.796364] ? __lock_is_held+0xb5/0x140 [ 1960.796383] ? component_add+0x530/0x530 [ 1960.796401] dev_attr_store+0x3f/0x70 [ 1960.796419] ? sysfs_file_ops+0x11d/0x170 [ 1960.796436] sysfs_kf_write+0x104/0x150 [ 1960.796454] ? sysfs_file_ops+0x170/0x170 [ 1960.796471] kernfs_fop_write+0x24f/0x400 [ 1960.796488] ? __lock_acquire+0x6ea/0x47f0 [ 1960.796520] __vfs_write+0xeb/0x760 [ 1960.796538] ? kernel_read+0x130/0x130 [ 1960.796556] ? __lock_is_held+0xb5/0x140 [ 1960.796590] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.796608] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.796626] ? __sb_start_write+0x183/0x220 [ 1960.796648] vfs_write+0x14d/0x4a0 [ 1960.796666] ksys_write+0xd2/0x1b0 [ 1960.796684] ? __ia32_sys_read+0xb0/0xb0 [ 1960.796701] ? fput+0x1d/0x120 [ 1960.796732] ? filp_close+0xf3/0x130 [ 1960.796749] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.796768] __x64_sys_write+0x73/0xb0 [ 1960.796800] do_syscall_64+0xaa/0x400 [ 1960.796818] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.796836] RIP: 0033:0x7f59df433164 [ 1960.796854] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.796884] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.796906] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.796926] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.796946] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.796966] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.796985] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.797008] irq event stamp: 509990 [ 1960.797026] hardirqs last enabled at (509989): [<ffffffff8119ff78>] flush_work+0x4b8/0x6d0 [ 1960.797063] hardirqs last disabled at (509990): [<ffffffff8297c395>] _raw_spin_lock_irqsave+0x25/0x60 [ 1960.797085] softirqs last enabled at (509744): [<ffffffff82c005ad>] __do_softirq+0x5ad/0x8c0 [ 1960.797121] softirqs last disabled at (509735): [<ffffffff8115aa15>] irq_exit+0x1a5/0x1e0 [ 1960.797142] ---[ end trace fb1342325f1846b8 ]--- While I haven't actually gone into the details of what's causing this to happen (maybe the kernel removes the backlight device in the device core before we get to it?), it doesn't really matter anyway because the way nouveau handles backlights has long since been deprecated. According to the documentation on the drm_connector->late_register() hook, the ->late_register() hook should be used for adding extra connector-related devices. Vice versa, the ->early_unregister() hook is meant to be used for removing those devices. So: gut nouveau_drm->bl_list and nouveau_drm->backlight, and replace them with per-connector backlight structures. Additionally, move backlight registration/teardown into the ->late_register() and ->early_unregister() hooks so that DRM can give us a chance to remove the backlight before the connector is even removed. This appears to fix the problem once and for all. Changes since v2: - Use NV_INFO_ONCE for printing GMUX information, since otherwise this will end up printing that message for as many times as we have connectors Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-09-06 21:43:23 +00:00
nouveau_backlight_init(struct drm_connector *connector)
{
drm/nouveau: Move backlight device into nouveau_connector Currently module unloading is broken in nouveau due to a rather annoying race condition resulting from nouveau_backlight.c having gone a bit stale over time: [ 1960.791143] ================================================================== [ 1960.791394] BUG: KASAN: use-after-free in nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791460] Read of size 4 at addr ffff88075accf350 by task zsh/11185 [ 1960.791521] [ 1960.791545] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G O 4.18.0Lyude-Test+ #4 [ 1960.791580] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.791628] Call Trace: [ 1960.791680] dump_stack+0xa4/0xfd [ 1960.791721] print_address_description+0x71/0x239 [ 1960.791833] ? nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791877] kasan_report.cold.6+0x242/0x2fe [ 1960.791919] __asan_report_load4_noabort+0x19/0x20 [ 1960.792012] nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.792081] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.792150] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.792265] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.792347] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.792378] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.792406] ? trace_hardirqs_on+0xd/0x10 [ 1960.792472] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.792502] pci_device_remove+0x112/0x2d0 [ 1960.792530] ? pcibios_free_irq+0x10/0x10 [ 1960.792558] ? kasan_check_write+0x14/0x20 [ 1960.792587] device_release_driver_internal+0x35c/0x650 [ 1960.792617] device_release_driver+0x12/0x20 [ 1960.792643] pci_stop_bus_device+0x172/0x1e0 [ 1960.792671] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.792715] remove_store+0xcb/0xe0 [ 1960.792753] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.792779] ? __lock_is_held+0xb5/0x140 [ 1960.792808] ? component_add+0x530/0x530 [ 1960.792834] dev_attr_store+0x3f/0x70 [ 1960.792859] ? sysfs_file_ops+0x11d/0x170 [ 1960.792885] sysfs_kf_write+0x104/0x150 [ 1960.792915] ? sysfs_file_ops+0x170/0x170 [ 1960.792940] kernfs_fop_write+0x24f/0x400 [ 1960.792978] ? __lock_acquire+0x6ea/0x47f0 [ 1960.793021] __vfs_write+0xeb/0x760 [ 1960.793048] ? kernel_read+0x130/0x130 [ 1960.793076] ? __lock_is_held+0xb5/0x140 [ 1960.793107] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.793135] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.793162] ? __sb_start_write+0x183/0x220 [ 1960.793189] vfs_write+0x14d/0x4a0 [ 1960.793229] ksys_write+0xd2/0x1b0 [ 1960.793255] ? __ia32_sys_read+0xb0/0xb0 [ 1960.793298] ? fput+0x1d/0x120 [ 1960.793324] ? filp_close+0xf3/0x130 [ 1960.793349] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.793380] __x64_sys_write+0x73/0xb0 [ 1960.793407] do_syscall_64+0xaa/0x400 [ 1960.793433] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.793460] RIP: 0033:0x7f59df433164 [ 1960.793486] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.793541] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.793576] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.793620] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.793665] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.793696] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.793730] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.793768] [ 1960.793790] Allocated by task 11167: [ 1960.793816] save_stack+0x43/0xd0 [ 1960.793841] kasan_kmalloc+0xc4/0xe0 [ 1960.793880] kasan_slab_alloc+0x11/0x20 [ 1960.793905] kmem_cache_alloc+0xd7/0x270 [ 1960.793944] getname_flags+0xbd/0x520 [ 1960.793969] user_path_at_empty+0x23/0x50 [ 1960.793994] do_faccessat+0x1fc/0x5d0 [ 1960.794018] __x64_sys_access+0x59/0x80 [ 1960.794043] do_syscall_64+0xaa/0x400 [ 1960.794067] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794093] [ 1960.794127] Freed by task 11167: [ 1960.794152] save_stack+0x43/0xd0 [ 1960.794190] __kasan_slab_free+0x139/0x190 [ 1960.794215] kasan_slab_free+0xe/0x10 [ 1960.794239] kmem_cache_free+0xcb/0x2c0 [ 1960.794264] putname+0xad/0xe0 [ 1960.794287] filename_lookup.part.59+0x1f1/0x360 [ 1960.794313] user_path_at_empty+0x3e/0x50 [ 1960.794338] do_faccessat+0x1fc/0x5d0 [ 1960.794362] __x64_sys_access+0x59/0x80 [ 1960.794393] do_syscall_64+0xaa/0x400 [ 1960.794421] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794461] [ 1960.794483] The buggy address belongs to the object at ffff88075acceac0 [ 1960.794483] which belongs to the cache names_cache of size 4096 [ 1960.794540] The buggy address is located 2192 bytes inside of [ 1960.794540] 4096-byte region [ffff88075acceac0, ffff88075accfac0) [ 1960.794581] The buggy address belongs to the page: [ 1960.794609] page:ffffea001d6b3200 count:1 mapcount:0 mapping:ffff880778e4b1c0 index:0x0 compound_mapcount: 0 [ 1960.794651] flags: 0x8000000000008100(slab|head) [ 1960.794679] raw: 8000000000008100 ffffea001d39e808 ffffea001d39ea08 ffff880778e4b1c0 [ 1960.794739] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 1960.794785] page dumped because: kasan: bad access detected [ 1960.794813] [ 1960.794834] Memory state around the buggy address: [ 1960.794861] ffff88075accf200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794894] ffff88075accf280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794925] >ffff88075accf300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794956] ^ [ 1960.794985] ffff88075accf380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795017] ffff88075accf400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795061] ================================================================== [ 1960.795106] Disabling lock debugging due to kernel taint [ 1960.795131] ------------[ cut here ]------------ [ 1960.795148] ida_remove called for id=1802201963 which is not allocated. [ 1960.795193] WARNING: CPU: 7 PID: 11185 at lib/idr.c:521 ida_remove+0x184/0x210 [ 1960.795213] Modules linked in: nouveau(O) mxm_wmi ttm i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm joydev vfat fat intel_rapl x86_pkg_temp_thermal coretemp crc32_pclmul iTCO_wdt psmouse wmi_bmof mei_me tpm_tis mei tpm_tis_core tpm i2c_i801 thinkpad_acpi pcc_cpufreq crc32c_intel serio_raw xhci_pci xhci_hcd wmi video i2c_dev i2c_core [ 1960.795305] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G B O 4.18.0Lyude-Test+ #4 [ 1960.795330] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.795352] RIP: 0010:ida_remove+0x184/0x210 [ 1960.795370] Code: 4c 89 f7 e8 ae c8 00 00 eb 22 41 83 c4 02 4c 89 e8 41 83 fc 3f 0f 86 64 ff ff ff 44 89 fe 48 c7 c7 20 94 1e 83 e8 54 ed 81 fe <0f> 0b 48 b8 00 00 00 00 00 fc ff df 48 01 c3 c7 03 00 00 00 00 c7 [ 1960.795402] RSP: 0018:ffff88074d4df7b8 EFLAGS: 00010082 [ 1960.795421] RAX: 0000000000000000 RBX: 1ffff100e9a9befa RCX: ffffffff81479975 [ 1960.795440] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88077c1de690 [ 1960.795460] RBP: ffff88074d4df878 R08: ffffed00ef83bcd3 R09: ffffed00ef83bcd2 [ 1960.795479] R10: ffffed00ef83bcd2 R11: ffff88077c1de697 R12: 000000000000036b [ 1960.795498] R13: 0000000000000202 R14: ffffffffa0aa7fa0 R15: 000000006b6b6b6b [ 1960.795518] FS: 00007f59e0995b80(0000) GS:ffff88077c1c0000(0000) knlGS:0000000000000000 [ 1960.795553] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1960.795571] CR2: 00007f59e09a2010 CR3: 00000004a1a70005 CR4: 00000000003606e0 [ 1960.795596] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1960.795629] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1960.795649] Call Trace: [ 1960.795667] ? ida_destroy+0x1d0/0x1d0 [ 1960.795686] ? kasan_check_write+0x14/0x20 [ 1960.795704] ? do_raw_spin_lock+0xc2/0x1c0 [ 1960.795724] ida_simple_remove+0x26/0x40 [ 1960.795794] nouveau_backlight_exit+0x9d/0x150 [nouveau] [ 1960.795867] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.795930] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.795989] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.796047] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.796067] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.796089] ? trace_hardirqs_on+0xd/0x10 [ 1960.796146] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.796167] pci_device_remove+0x112/0x2d0 [ 1960.796186] ? pcibios_free_irq+0x10/0x10 [ 1960.796218] ? kasan_check_write+0x14/0x20 [ 1960.796237] device_release_driver_internal+0x35c/0x650 [ 1960.796257] device_release_driver+0x12/0x20 [ 1960.796289] pci_stop_bus_device+0x172/0x1e0 [ 1960.796308] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.796328] remove_store+0xcb/0xe0 [ 1960.796345] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.796364] ? __lock_is_held+0xb5/0x140 [ 1960.796383] ? component_add+0x530/0x530 [ 1960.796401] dev_attr_store+0x3f/0x70 [ 1960.796419] ? sysfs_file_ops+0x11d/0x170 [ 1960.796436] sysfs_kf_write+0x104/0x150 [ 1960.796454] ? sysfs_file_ops+0x170/0x170 [ 1960.796471] kernfs_fop_write+0x24f/0x400 [ 1960.796488] ? __lock_acquire+0x6ea/0x47f0 [ 1960.796520] __vfs_write+0xeb/0x760 [ 1960.796538] ? kernel_read+0x130/0x130 [ 1960.796556] ? __lock_is_held+0xb5/0x140 [ 1960.796590] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.796608] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.796626] ? __sb_start_write+0x183/0x220 [ 1960.796648] vfs_write+0x14d/0x4a0 [ 1960.796666] ksys_write+0xd2/0x1b0 [ 1960.796684] ? __ia32_sys_read+0xb0/0xb0 [ 1960.796701] ? fput+0x1d/0x120 [ 1960.796732] ? filp_close+0xf3/0x130 [ 1960.796749] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.796768] __x64_sys_write+0x73/0xb0 [ 1960.796800] do_syscall_64+0xaa/0x400 [ 1960.796818] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.796836] RIP: 0033:0x7f59df433164 [ 1960.796854] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.796884] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.796906] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.796926] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.796946] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.796966] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.796985] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.797008] irq event stamp: 509990 [ 1960.797026] hardirqs last enabled at (509989): [<ffffffff8119ff78>] flush_work+0x4b8/0x6d0 [ 1960.797063] hardirqs last disabled at (509990): [<ffffffff8297c395>] _raw_spin_lock_irqsave+0x25/0x60 [ 1960.797085] softirqs last enabled at (509744): [<ffffffff82c005ad>] __do_softirq+0x5ad/0x8c0 [ 1960.797121] softirqs last disabled at (509735): [<ffffffff8115aa15>] irq_exit+0x1a5/0x1e0 [ 1960.797142] ---[ end trace fb1342325f1846b8 ]--- While I haven't actually gone into the details of what's causing this to happen (maybe the kernel removes the backlight device in the device core before we get to it?), it doesn't really matter anyway because the way nouveau handles backlights has long since been deprecated. According to the documentation on the drm_connector->late_register() hook, the ->late_register() hook should be used for adding extra connector-related devices. Vice versa, the ->early_unregister() hook is meant to be used for removing those devices. So: gut nouveau_drm->bl_list and nouveau_drm->backlight, and replace them with per-connector backlight structures. Additionally, move backlight registration/teardown into the ->late_register() and ->early_unregister() hooks so that DRM can give us a chance to remove the backlight before the connector is even removed. This appears to fix the problem once and for all. Changes since v2: - Use NV_INFO_ONCE for printing GMUX information, since otherwise this will end up printing that message for as many times as we have connectors Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-09-06 21:43:23 +00:00
struct nouveau_drm *drm = nouveau_drm(connector->dev);
struct nouveau_backlight *bl;
struct nouveau_encoder *nv_encoder = NULL;
struct nvif_device *device = &drm->client.device;
char backlight_name[BL_NAME_SIZE];
struct backlight_properties props = {0};
const struct backlight_ops *ops;
int ret;
if (apple_gmux_present()) {
drm/nouveau: Move backlight device into nouveau_connector Currently module unloading is broken in nouveau due to a rather annoying race condition resulting from nouveau_backlight.c having gone a bit stale over time: [ 1960.791143] ================================================================== [ 1960.791394] BUG: KASAN: use-after-free in nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791460] Read of size 4 at addr ffff88075accf350 by task zsh/11185 [ 1960.791521] [ 1960.791545] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G O 4.18.0Lyude-Test+ #4 [ 1960.791580] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.791628] Call Trace: [ 1960.791680] dump_stack+0xa4/0xfd [ 1960.791721] print_address_description+0x71/0x239 [ 1960.791833] ? nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791877] kasan_report.cold.6+0x242/0x2fe [ 1960.791919] __asan_report_load4_noabort+0x19/0x20 [ 1960.792012] nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.792081] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.792150] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.792265] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.792347] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.792378] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.792406] ? trace_hardirqs_on+0xd/0x10 [ 1960.792472] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.792502] pci_device_remove+0x112/0x2d0 [ 1960.792530] ? pcibios_free_irq+0x10/0x10 [ 1960.792558] ? kasan_check_write+0x14/0x20 [ 1960.792587] device_release_driver_internal+0x35c/0x650 [ 1960.792617] device_release_driver+0x12/0x20 [ 1960.792643] pci_stop_bus_device+0x172/0x1e0 [ 1960.792671] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.792715] remove_store+0xcb/0xe0 [ 1960.792753] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.792779] ? __lock_is_held+0xb5/0x140 [ 1960.792808] ? component_add+0x530/0x530 [ 1960.792834] dev_attr_store+0x3f/0x70 [ 1960.792859] ? sysfs_file_ops+0x11d/0x170 [ 1960.792885] sysfs_kf_write+0x104/0x150 [ 1960.792915] ? sysfs_file_ops+0x170/0x170 [ 1960.792940] kernfs_fop_write+0x24f/0x400 [ 1960.792978] ? __lock_acquire+0x6ea/0x47f0 [ 1960.793021] __vfs_write+0xeb/0x760 [ 1960.793048] ? kernel_read+0x130/0x130 [ 1960.793076] ? __lock_is_held+0xb5/0x140 [ 1960.793107] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.793135] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.793162] ? __sb_start_write+0x183/0x220 [ 1960.793189] vfs_write+0x14d/0x4a0 [ 1960.793229] ksys_write+0xd2/0x1b0 [ 1960.793255] ? __ia32_sys_read+0xb0/0xb0 [ 1960.793298] ? fput+0x1d/0x120 [ 1960.793324] ? filp_close+0xf3/0x130 [ 1960.793349] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.793380] __x64_sys_write+0x73/0xb0 [ 1960.793407] do_syscall_64+0xaa/0x400 [ 1960.793433] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.793460] RIP: 0033:0x7f59df433164 [ 1960.793486] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.793541] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.793576] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.793620] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.793665] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.793696] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.793730] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.793768] [ 1960.793790] Allocated by task 11167: [ 1960.793816] save_stack+0x43/0xd0 [ 1960.793841] kasan_kmalloc+0xc4/0xe0 [ 1960.793880] kasan_slab_alloc+0x11/0x20 [ 1960.793905] kmem_cache_alloc+0xd7/0x270 [ 1960.793944] getname_flags+0xbd/0x520 [ 1960.793969] user_path_at_empty+0x23/0x50 [ 1960.793994] do_faccessat+0x1fc/0x5d0 [ 1960.794018] __x64_sys_access+0x59/0x80 [ 1960.794043] do_syscall_64+0xaa/0x400 [ 1960.794067] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794093] [ 1960.794127] Freed by task 11167: [ 1960.794152] save_stack+0x43/0xd0 [ 1960.794190] __kasan_slab_free+0x139/0x190 [ 1960.794215] kasan_slab_free+0xe/0x10 [ 1960.794239] kmem_cache_free+0xcb/0x2c0 [ 1960.794264] putname+0xad/0xe0 [ 1960.794287] filename_lookup.part.59+0x1f1/0x360 [ 1960.794313] user_path_at_empty+0x3e/0x50 [ 1960.794338] do_faccessat+0x1fc/0x5d0 [ 1960.794362] __x64_sys_access+0x59/0x80 [ 1960.794393] do_syscall_64+0xaa/0x400 [ 1960.794421] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794461] [ 1960.794483] The buggy address belongs to the object at ffff88075acceac0 [ 1960.794483] which belongs to the cache names_cache of size 4096 [ 1960.794540] The buggy address is located 2192 bytes inside of [ 1960.794540] 4096-byte region [ffff88075acceac0, ffff88075accfac0) [ 1960.794581] The buggy address belongs to the page: [ 1960.794609] page:ffffea001d6b3200 count:1 mapcount:0 mapping:ffff880778e4b1c0 index:0x0 compound_mapcount: 0 [ 1960.794651] flags: 0x8000000000008100(slab|head) [ 1960.794679] raw: 8000000000008100 ffffea001d39e808 ffffea001d39ea08 ffff880778e4b1c0 [ 1960.794739] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 1960.794785] page dumped because: kasan: bad access detected [ 1960.794813] [ 1960.794834] Memory state around the buggy address: [ 1960.794861] ffff88075accf200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794894] ffff88075accf280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794925] >ffff88075accf300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794956] ^ [ 1960.794985] ffff88075accf380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795017] ffff88075accf400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795061] ================================================================== [ 1960.795106] Disabling lock debugging due to kernel taint [ 1960.795131] ------------[ cut here ]------------ [ 1960.795148] ida_remove called for id=1802201963 which is not allocated. [ 1960.795193] WARNING: CPU: 7 PID: 11185 at lib/idr.c:521 ida_remove+0x184/0x210 [ 1960.795213] Modules linked in: nouveau(O) mxm_wmi ttm i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm joydev vfat fat intel_rapl x86_pkg_temp_thermal coretemp crc32_pclmul iTCO_wdt psmouse wmi_bmof mei_me tpm_tis mei tpm_tis_core tpm i2c_i801 thinkpad_acpi pcc_cpufreq crc32c_intel serio_raw xhci_pci xhci_hcd wmi video i2c_dev i2c_core [ 1960.795305] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G B O 4.18.0Lyude-Test+ #4 [ 1960.795330] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.795352] RIP: 0010:ida_remove+0x184/0x210 [ 1960.795370] Code: 4c 89 f7 e8 ae c8 00 00 eb 22 41 83 c4 02 4c 89 e8 41 83 fc 3f 0f 86 64 ff ff ff 44 89 fe 48 c7 c7 20 94 1e 83 e8 54 ed 81 fe <0f> 0b 48 b8 00 00 00 00 00 fc ff df 48 01 c3 c7 03 00 00 00 00 c7 [ 1960.795402] RSP: 0018:ffff88074d4df7b8 EFLAGS: 00010082 [ 1960.795421] RAX: 0000000000000000 RBX: 1ffff100e9a9befa RCX: ffffffff81479975 [ 1960.795440] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88077c1de690 [ 1960.795460] RBP: ffff88074d4df878 R08: ffffed00ef83bcd3 R09: ffffed00ef83bcd2 [ 1960.795479] R10: ffffed00ef83bcd2 R11: ffff88077c1de697 R12: 000000000000036b [ 1960.795498] R13: 0000000000000202 R14: ffffffffa0aa7fa0 R15: 000000006b6b6b6b [ 1960.795518] FS: 00007f59e0995b80(0000) GS:ffff88077c1c0000(0000) knlGS:0000000000000000 [ 1960.795553] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1960.795571] CR2: 00007f59e09a2010 CR3: 00000004a1a70005 CR4: 00000000003606e0 [ 1960.795596] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1960.795629] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1960.795649] Call Trace: [ 1960.795667] ? ida_destroy+0x1d0/0x1d0 [ 1960.795686] ? kasan_check_write+0x14/0x20 [ 1960.795704] ? do_raw_spin_lock+0xc2/0x1c0 [ 1960.795724] ida_simple_remove+0x26/0x40 [ 1960.795794] nouveau_backlight_exit+0x9d/0x150 [nouveau] [ 1960.795867] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.795930] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.795989] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.796047] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.796067] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.796089] ? trace_hardirqs_on+0xd/0x10 [ 1960.796146] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.796167] pci_device_remove+0x112/0x2d0 [ 1960.796186] ? pcibios_free_irq+0x10/0x10 [ 1960.796218] ? kasan_check_write+0x14/0x20 [ 1960.796237] device_release_driver_internal+0x35c/0x650 [ 1960.796257] device_release_driver+0x12/0x20 [ 1960.796289] pci_stop_bus_device+0x172/0x1e0 [ 1960.796308] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.796328] remove_store+0xcb/0xe0 [ 1960.796345] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.796364] ? __lock_is_held+0xb5/0x140 [ 1960.796383] ? component_add+0x530/0x530 [ 1960.796401] dev_attr_store+0x3f/0x70 [ 1960.796419] ? sysfs_file_ops+0x11d/0x170 [ 1960.796436] sysfs_kf_write+0x104/0x150 [ 1960.796454] ? sysfs_file_ops+0x170/0x170 [ 1960.796471] kernfs_fop_write+0x24f/0x400 [ 1960.796488] ? __lock_acquire+0x6ea/0x47f0 [ 1960.796520] __vfs_write+0xeb/0x760 [ 1960.796538] ? kernel_read+0x130/0x130 [ 1960.796556] ? __lock_is_held+0xb5/0x140 [ 1960.796590] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.796608] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.796626] ? __sb_start_write+0x183/0x220 [ 1960.796648] vfs_write+0x14d/0x4a0 [ 1960.796666] ksys_write+0xd2/0x1b0 [ 1960.796684] ? __ia32_sys_read+0xb0/0xb0 [ 1960.796701] ? fput+0x1d/0x120 [ 1960.796732] ? filp_close+0xf3/0x130 [ 1960.796749] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.796768] __x64_sys_write+0x73/0xb0 [ 1960.796800] do_syscall_64+0xaa/0x400 [ 1960.796818] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.796836] RIP: 0033:0x7f59df433164 [ 1960.796854] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.796884] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.796906] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.796926] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.796946] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.796966] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.796985] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.797008] irq event stamp: 509990 [ 1960.797026] hardirqs last enabled at (509989): [<ffffffff8119ff78>] flush_work+0x4b8/0x6d0 [ 1960.797063] hardirqs last disabled at (509990): [<ffffffff8297c395>] _raw_spin_lock_irqsave+0x25/0x60 [ 1960.797085] softirqs last enabled at (509744): [<ffffffff82c005ad>] __do_softirq+0x5ad/0x8c0 [ 1960.797121] softirqs last disabled at (509735): [<ffffffff8115aa15>] irq_exit+0x1a5/0x1e0 [ 1960.797142] ---[ end trace fb1342325f1846b8 ]--- While I haven't actually gone into the details of what's causing this to happen (maybe the kernel removes the backlight device in the device core before we get to it?), it doesn't really matter anyway because the way nouveau handles backlights has long since been deprecated. According to the documentation on the drm_connector->late_register() hook, the ->late_register() hook should be used for adding extra connector-related devices. Vice versa, the ->early_unregister() hook is meant to be used for removing those devices. So: gut nouveau_drm->bl_list and nouveau_drm->backlight, and replace them with per-connector backlight structures. Additionally, move backlight registration/teardown into the ->late_register() and ->early_unregister() hooks so that DRM can give us a chance to remove the backlight before the connector is even removed. This appears to fix the problem once and for all. Changes since v2: - Use NV_INFO_ONCE for printing GMUX information, since otherwise this will end up printing that message for as many times as we have connectors Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-09-06 21:43:23 +00:00
NV_INFO_ONCE(drm, "Apple GMUX detected: not registering Nouveau backlight interface\n");
return 0;
}
if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)
nv_encoder = find_encoder(connector, DCB_OUTPUT_LVDS);
else if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
nv_encoder = find_encoder(connector, DCB_OUTPUT_DP);
else
return 0;
if (!nv_encoder)
drm/nouveau: Move backlight device into nouveau_connector Currently module unloading is broken in nouveau due to a rather annoying race condition resulting from nouveau_backlight.c having gone a bit stale over time: [ 1960.791143] ================================================================== [ 1960.791394] BUG: KASAN: use-after-free in nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791460] Read of size 4 at addr ffff88075accf350 by task zsh/11185 [ 1960.791521] [ 1960.791545] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G O 4.18.0Lyude-Test+ #4 [ 1960.791580] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.791628] Call Trace: [ 1960.791680] dump_stack+0xa4/0xfd [ 1960.791721] print_address_description+0x71/0x239 [ 1960.791833] ? nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791877] kasan_report.cold.6+0x242/0x2fe [ 1960.791919] __asan_report_load4_noabort+0x19/0x20 [ 1960.792012] nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.792081] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.792150] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.792265] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.792347] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.792378] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.792406] ? trace_hardirqs_on+0xd/0x10 [ 1960.792472] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.792502] pci_device_remove+0x112/0x2d0 [ 1960.792530] ? pcibios_free_irq+0x10/0x10 [ 1960.792558] ? kasan_check_write+0x14/0x20 [ 1960.792587] device_release_driver_internal+0x35c/0x650 [ 1960.792617] device_release_driver+0x12/0x20 [ 1960.792643] pci_stop_bus_device+0x172/0x1e0 [ 1960.792671] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.792715] remove_store+0xcb/0xe0 [ 1960.792753] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.792779] ? __lock_is_held+0xb5/0x140 [ 1960.792808] ? component_add+0x530/0x530 [ 1960.792834] dev_attr_store+0x3f/0x70 [ 1960.792859] ? sysfs_file_ops+0x11d/0x170 [ 1960.792885] sysfs_kf_write+0x104/0x150 [ 1960.792915] ? sysfs_file_ops+0x170/0x170 [ 1960.792940] kernfs_fop_write+0x24f/0x400 [ 1960.792978] ? __lock_acquire+0x6ea/0x47f0 [ 1960.793021] __vfs_write+0xeb/0x760 [ 1960.793048] ? kernel_read+0x130/0x130 [ 1960.793076] ? __lock_is_held+0xb5/0x140 [ 1960.793107] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.793135] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.793162] ? __sb_start_write+0x183/0x220 [ 1960.793189] vfs_write+0x14d/0x4a0 [ 1960.793229] ksys_write+0xd2/0x1b0 [ 1960.793255] ? __ia32_sys_read+0xb0/0xb0 [ 1960.793298] ? fput+0x1d/0x120 [ 1960.793324] ? filp_close+0xf3/0x130 [ 1960.793349] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.793380] __x64_sys_write+0x73/0xb0 [ 1960.793407] do_syscall_64+0xaa/0x400 [ 1960.793433] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.793460] RIP: 0033:0x7f59df433164 [ 1960.793486] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.793541] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.793576] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.793620] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.793665] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.793696] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.793730] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.793768] [ 1960.793790] Allocated by task 11167: [ 1960.793816] save_stack+0x43/0xd0 [ 1960.793841] kasan_kmalloc+0xc4/0xe0 [ 1960.793880] kasan_slab_alloc+0x11/0x20 [ 1960.793905] kmem_cache_alloc+0xd7/0x270 [ 1960.793944] getname_flags+0xbd/0x520 [ 1960.793969] user_path_at_empty+0x23/0x50 [ 1960.793994] do_faccessat+0x1fc/0x5d0 [ 1960.794018] __x64_sys_access+0x59/0x80 [ 1960.794043] do_syscall_64+0xaa/0x400 [ 1960.794067] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794093] [ 1960.794127] Freed by task 11167: [ 1960.794152] save_stack+0x43/0xd0 [ 1960.794190] __kasan_slab_free+0x139/0x190 [ 1960.794215] kasan_slab_free+0xe/0x10 [ 1960.794239] kmem_cache_free+0xcb/0x2c0 [ 1960.794264] putname+0xad/0xe0 [ 1960.794287] filename_lookup.part.59+0x1f1/0x360 [ 1960.794313] user_path_at_empty+0x3e/0x50 [ 1960.794338] do_faccessat+0x1fc/0x5d0 [ 1960.794362] __x64_sys_access+0x59/0x80 [ 1960.794393] do_syscall_64+0xaa/0x400 [ 1960.794421] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794461] [ 1960.794483] The buggy address belongs to the object at ffff88075acceac0 [ 1960.794483] which belongs to the cache names_cache of size 4096 [ 1960.794540] The buggy address is located 2192 bytes inside of [ 1960.794540] 4096-byte region [ffff88075acceac0, ffff88075accfac0) [ 1960.794581] The buggy address belongs to the page: [ 1960.794609] page:ffffea001d6b3200 count:1 mapcount:0 mapping:ffff880778e4b1c0 index:0x0 compound_mapcount: 0 [ 1960.794651] flags: 0x8000000000008100(slab|head) [ 1960.794679] raw: 8000000000008100 ffffea001d39e808 ffffea001d39ea08 ffff880778e4b1c0 [ 1960.794739] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 1960.794785] page dumped because: kasan: bad access detected [ 1960.794813] [ 1960.794834] Memory state around the buggy address: [ 1960.794861] ffff88075accf200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794894] ffff88075accf280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794925] >ffff88075accf300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794956] ^ [ 1960.794985] ffff88075accf380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795017] ffff88075accf400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795061] ================================================================== [ 1960.795106] Disabling lock debugging due to kernel taint [ 1960.795131] ------------[ cut here ]------------ [ 1960.795148] ida_remove called for id=1802201963 which is not allocated. [ 1960.795193] WARNING: CPU: 7 PID: 11185 at lib/idr.c:521 ida_remove+0x184/0x210 [ 1960.795213] Modules linked in: nouveau(O) mxm_wmi ttm i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm joydev vfat fat intel_rapl x86_pkg_temp_thermal coretemp crc32_pclmul iTCO_wdt psmouse wmi_bmof mei_me tpm_tis mei tpm_tis_core tpm i2c_i801 thinkpad_acpi pcc_cpufreq crc32c_intel serio_raw xhci_pci xhci_hcd wmi video i2c_dev i2c_core [ 1960.795305] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G B O 4.18.0Lyude-Test+ #4 [ 1960.795330] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.795352] RIP: 0010:ida_remove+0x184/0x210 [ 1960.795370] Code: 4c 89 f7 e8 ae c8 00 00 eb 22 41 83 c4 02 4c 89 e8 41 83 fc 3f 0f 86 64 ff ff ff 44 89 fe 48 c7 c7 20 94 1e 83 e8 54 ed 81 fe <0f> 0b 48 b8 00 00 00 00 00 fc ff df 48 01 c3 c7 03 00 00 00 00 c7 [ 1960.795402] RSP: 0018:ffff88074d4df7b8 EFLAGS: 00010082 [ 1960.795421] RAX: 0000000000000000 RBX: 1ffff100e9a9befa RCX: ffffffff81479975 [ 1960.795440] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88077c1de690 [ 1960.795460] RBP: ffff88074d4df878 R08: ffffed00ef83bcd3 R09: ffffed00ef83bcd2 [ 1960.795479] R10: ffffed00ef83bcd2 R11: ffff88077c1de697 R12: 000000000000036b [ 1960.795498] R13: 0000000000000202 R14: ffffffffa0aa7fa0 R15: 000000006b6b6b6b [ 1960.795518] FS: 00007f59e0995b80(0000) GS:ffff88077c1c0000(0000) knlGS:0000000000000000 [ 1960.795553] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1960.795571] CR2: 00007f59e09a2010 CR3: 00000004a1a70005 CR4: 00000000003606e0 [ 1960.795596] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1960.795629] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1960.795649] Call Trace: [ 1960.795667] ? ida_destroy+0x1d0/0x1d0 [ 1960.795686] ? kasan_check_write+0x14/0x20 [ 1960.795704] ? do_raw_spin_lock+0xc2/0x1c0 [ 1960.795724] ida_simple_remove+0x26/0x40 [ 1960.795794] nouveau_backlight_exit+0x9d/0x150 [nouveau] [ 1960.795867] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.795930] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.795989] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.796047] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.796067] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.796089] ? trace_hardirqs_on+0xd/0x10 [ 1960.796146] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.796167] pci_device_remove+0x112/0x2d0 [ 1960.796186] ? pcibios_free_irq+0x10/0x10 [ 1960.796218] ? kasan_check_write+0x14/0x20 [ 1960.796237] device_release_driver_internal+0x35c/0x650 [ 1960.796257] device_release_driver+0x12/0x20 [ 1960.796289] pci_stop_bus_device+0x172/0x1e0 [ 1960.796308] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.796328] remove_store+0xcb/0xe0 [ 1960.796345] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.796364] ? __lock_is_held+0xb5/0x140 [ 1960.796383] ? component_add+0x530/0x530 [ 1960.796401] dev_attr_store+0x3f/0x70 [ 1960.796419] ? sysfs_file_ops+0x11d/0x170 [ 1960.796436] sysfs_kf_write+0x104/0x150 [ 1960.796454] ? sysfs_file_ops+0x170/0x170 [ 1960.796471] kernfs_fop_write+0x24f/0x400 [ 1960.796488] ? __lock_acquire+0x6ea/0x47f0 [ 1960.796520] __vfs_write+0xeb/0x760 [ 1960.796538] ? kernel_read+0x130/0x130 [ 1960.796556] ? __lock_is_held+0xb5/0x140 [ 1960.796590] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.796608] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.796626] ? __sb_start_write+0x183/0x220 [ 1960.796648] vfs_write+0x14d/0x4a0 [ 1960.796666] ksys_write+0xd2/0x1b0 [ 1960.796684] ? __ia32_sys_read+0xb0/0xb0 [ 1960.796701] ? fput+0x1d/0x120 [ 1960.796732] ? filp_close+0xf3/0x130 [ 1960.796749] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.796768] __x64_sys_write+0x73/0xb0 [ 1960.796800] do_syscall_64+0xaa/0x400 [ 1960.796818] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.796836] RIP: 0033:0x7f59df433164 [ 1960.796854] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.796884] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.796906] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.796926] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.796946] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.796966] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.796985] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.797008] irq event stamp: 509990 [ 1960.797026] hardirqs last enabled at (509989): [<ffffffff8119ff78>] flush_work+0x4b8/0x6d0 [ 1960.797063] hardirqs last disabled at (509990): [<ffffffff8297c395>] _raw_spin_lock_irqsave+0x25/0x60 [ 1960.797085] softirqs last enabled at (509744): [<ffffffff82c005ad>] __do_softirq+0x5ad/0x8c0 [ 1960.797121] softirqs last disabled at (509735): [<ffffffff8115aa15>] irq_exit+0x1a5/0x1e0 [ 1960.797142] ---[ end trace fb1342325f1846b8 ]--- While I haven't actually gone into the details of what's causing this to happen (maybe the kernel removes the backlight device in the device core before we get to it?), it doesn't really matter anyway because the way nouveau handles backlights has long since been deprecated. According to the documentation on the drm_connector->late_register() hook, the ->late_register() hook should be used for adding extra connector-related devices. Vice versa, the ->early_unregister() hook is meant to be used for removing those devices. So: gut nouveau_drm->bl_list and nouveau_drm->backlight, and replace them with per-connector backlight structures. Additionally, move backlight registration/teardown into the ->late_register() and ->early_unregister() hooks so that DRM can give us a chance to remove the backlight before the connector is even removed. This appears to fix the problem once and for all. Changes since v2: - Use NV_INFO_ONCE for printing GMUX information, since otherwise this will end up printing that message for as many times as we have connectors Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-09-06 21:43:23 +00:00
return 0;
drm/nouveau: Move backlight device into nouveau_connector Currently module unloading is broken in nouveau due to a rather annoying race condition resulting from nouveau_backlight.c having gone a bit stale over time: [ 1960.791143] ================================================================== [ 1960.791394] BUG: KASAN: use-after-free in nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791460] Read of size 4 at addr ffff88075accf350 by task zsh/11185 [ 1960.791521] [ 1960.791545] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G O 4.18.0Lyude-Test+ #4 [ 1960.791580] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.791628] Call Trace: [ 1960.791680] dump_stack+0xa4/0xfd [ 1960.791721] print_address_description+0x71/0x239 [ 1960.791833] ? nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791877] kasan_report.cold.6+0x242/0x2fe [ 1960.791919] __asan_report_load4_noabort+0x19/0x20 [ 1960.792012] nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.792081] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.792150] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.792265] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.792347] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.792378] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.792406] ? trace_hardirqs_on+0xd/0x10 [ 1960.792472] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.792502] pci_device_remove+0x112/0x2d0 [ 1960.792530] ? pcibios_free_irq+0x10/0x10 [ 1960.792558] ? kasan_check_write+0x14/0x20 [ 1960.792587] device_release_driver_internal+0x35c/0x650 [ 1960.792617] device_release_driver+0x12/0x20 [ 1960.792643] pci_stop_bus_device+0x172/0x1e0 [ 1960.792671] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.792715] remove_store+0xcb/0xe0 [ 1960.792753] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.792779] ? __lock_is_held+0xb5/0x140 [ 1960.792808] ? component_add+0x530/0x530 [ 1960.792834] dev_attr_store+0x3f/0x70 [ 1960.792859] ? sysfs_file_ops+0x11d/0x170 [ 1960.792885] sysfs_kf_write+0x104/0x150 [ 1960.792915] ? sysfs_file_ops+0x170/0x170 [ 1960.792940] kernfs_fop_write+0x24f/0x400 [ 1960.792978] ? __lock_acquire+0x6ea/0x47f0 [ 1960.793021] __vfs_write+0xeb/0x760 [ 1960.793048] ? kernel_read+0x130/0x130 [ 1960.793076] ? __lock_is_held+0xb5/0x140 [ 1960.793107] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.793135] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.793162] ? __sb_start_write+0x183/0x220 [ 1960.793189] vfs_write+0x14d/0x4a0 [ 1960.793229] ksys_write+0xd2/0x1b0 [ 1960.793255] ? __ia32_sys_read+0xb0/0xb0 [ 1960.793298] ? fput+0x1d/0x120 [ 1960.793324] ? filp_close+0xf3/0x130 [ 1960.793349] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.793380] __x64_sys_write+0x73/0xb0 [ 1960.793407] do_syscall_64+0xaa/0x400 [ 1960.793433] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.793460] RIP: 0033:0x7f59df433164 [ 1960.793486] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.793541] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.793576] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.793620] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.793665] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.793696] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.793730] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.793768] [ 1960.793790] Allocated by task 11167: [ 1960.793816] save_stack+0x43/0xd0 [ 1960.793841] kasan_kmalloc+0xc4/0xe0 [ 1960.793880] kasan_slab_alloc+0x11/0x20 [ 1960.793905] kmem_cache_alloc+0xd7/0x270 [ 1960.793944] getname_flags+0xbd/0x520 [ 1960.793969] user_path_at_empty+0x23/0x50 [ 1960.793994] do_faccessat+0x1fc/0x5d0 [ 1960.794018] __x64_sys_access+0x59/0x80 [ 1960.794043] do_syscall_64+0xaa/0x400 [ 1960.794067] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794093] [ 1960.794127] Freed by task 11167: [ 1960.794152] save_stack+0x43/0xd0 [ 1960.794190] __kasan_slab_free+0x139/0x190 [ 1960.794215] kasan_slab_free+0xe/0x10 [ 1960.794239] kmem_cache_free+0xcb/0x2c0 [ 1960.794264] putname+0xad/0xe0 [ 1960.794287] filename_lookup.part.59+0x1f1/0x360 [ 1960.794313] user_path_at_empty+0x3e/0x50 [ 1960.794338] do_faccessat+0x1fc/0x5d0 [ 1960.794362] __x64_sys_access+0x59/0x80 [ 1960.794393] do_syscall_64+0xaa/0x400 [ 1960.794421] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794461] [ 1960.794483] The buggy address belongs to the object at ffff88075acceac0 [ 1960.794483] which belongs to the cache names_cache of size 4096 [ 1960.794540] The buggy address is located 2192 bytes inside of [ 1960.794540] 4096-byte region [ffff88075acceac0, ffff88075accfac0) [ 1960.794581] The buggy address belongs to the page: [ 1960.794609] page:ffffea001d6b3200 count:1 mapcount:0 mapping:ffff880778e4b1c0 index:0x0 compound_mapcount: 0 [ 1960.794651] flags: 0x8000000000008100(slab|head) [ 1960.794679] raw: 8000000000008100 ffffea001d39e808 ffffea001d39ea08 ffff880778e4b1c0 [ 1960.794739] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 1960.794785] page dumped because: kasan: bad access detected [ 1960.794813] [ 1960.794834] Memory state around the buggy address: [ 1960.794861] ffff88075accf200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794894] ffff88075accf280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794925] >ffff88075accf300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794956] ^ [ 1960.794985] ffff88075accf380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795017] ffff88075accf400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795061] ================================================================== [ 1960.795106] Disabling lock debugging due to kernel taint [ 1960.795131] ------------[ cut here ]------------ [ 1960.795148] ida_remove called for id=1802201963 which is not allocated. [ 1960.795193] WARNING: CPU: 7 PID: 11185 at lib/idr.c:521 ida_remove+0x184/0x210 [ 1960.795213] Modules linked in: nouveau(O) mxm_wmi ttm i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm joydev vfat fat intel_rapl x86_pkg_temp_thermal coretemp crc32_pclmul iTCO_wdt psmouse wmi_bmof mei_me tpm_tis mei tpm_tis_core tpm i2c_i801 thinkpad_acpi pcc_cpufreq crc32c_intel serio_raw xhci_pci xhci_hcd wmi video i2c_dev i2c_core [ 1960.795305] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G B O 4.18.0Lyude-Test+ #4 [ 1960.795330] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.795352] RIP: 0010:ida_remove+0x184/0x210 [ 1960.795370] Code: 4c 89 f7 e8 ae c8 00 00 eb 22 41 83 c4 02 4c 89 e8 41 83 fc 3f 0f 86 64 ff ff ff 44 89 fe 48 c7 c7 20 94 1e 83 e8 54 ed 81 fe <0f> 0b 48 b8 00 00 00 00 00 fc ff df 48 01 c3 c7 03 00 00 00 00 c7 [ 1960.795402] RSP: 0018:ffff88074d4df7b8 EFLAGS: 00010082 [ 1960.795421] RAX: 0000000000000000 RBX: 1ffff100e9a9befa RCX: ffffffff81479975 [ 1960.795440] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88077c1de690 [ 1960.795460] RBP: ffff88074d4df878 R08: ffffed00ef83bcd3 R09: ffffed00ef83bcd2 [ 1960.795479] R10: ffffed00ef83bcd2 R11: ffff88077c1de697 R12: 000000000000036b [ 1960.795498] R13: 0000000000000202 R14: ffffffffa0aa7fa0 R15: 000000006b6b6b6b [ 1960.795518] FS: 00007f59e0995b80(0000) GS:ffff88077c1c0000(0000) knlGS:0000000000000000 [ 1960.795553] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1960.795571] CR2: 00007f59e09a2010 CR3: 00000004a1a70005 CR4: 00000000003606e0 [ 1960.795596] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1960.795629] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1960.795649] Call Trace: [ 1960.795667] ? ida_destroy+0x1d0/0x1d0 [ 1960.795686] ? kasan_check_write+0x14/0x20 [ 1960.795704] ? do_raw_spin_lock+0xc2/0x1c0 [ 1960.795724] ida_simple_remove+0x26/0x40 [ 1960.795794] nouveau_backlight_exit+0x9d/0x150 [nouveau] [ 1960.795867] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.795930] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.795989] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.796047] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.796067] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.796089] ? trace_hardirqs_on+0xd/0x10 [ 1960.796146] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.796167] pci_device_remove+0x112/0x2d0 [ 1960.796186] ? pcibios_free_irq+0x10/0x10 [ 1960.796218] ? kasan_check_write+0x14/0x20 [ 1960.796237] device_release_driver_internal+0x35c/0x650 [ 1960.796257] device_release_driver+0x12/0x20 [ 1960.796289] pci_stop_bus_device+0x172/0x1e0 [ 1960.796308] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.796328] remove_store+0xcb/0xe0 [ 1960.796345] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.796364] ? __lock_is_held+0xb5/0x140 [ 1960.796383] ? component_add+0x530/0x530 [ 1960.796401] dev_attr_store+0x3f/0x70 [ 1960.796419] ? sysfs_file_ops+0x11d/0x170 [ 1960.796436] sysfs_kf_write+0x104/0x150 [ 1960.796454] ? sysfs_file_ops+0x170/0x170 [ 1960.796471] kernfs_fop_write+0x24f/0x400 [ 1960.796488] ? __lock_acquire+0x6ea/0x47f0 [ 1960.796520] __vfs_write+0xeb/0x760 [ 1960.796538] ? kernel_read+0x130/0x130 [ 1960.796556] ? __lock_is_held+0xb5/0x140 [ 1960.796590] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.796608] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.796626] ? __sb_start_write+0x183/0x220 [ 1960.796648] vfs_write+0x14d/0x4a0 [ 1960.796666] ksys_write+0xd2/0x1b0 [ 1960.796684] ? __ia32_sys_read+0xb0/0xb0 [ 1960.796701] ? fput+0x1d/0x120 [ 1960.796732] ? filp_close+0xf3/0x130 [ 1960.796749] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.796768] __x64_sys_write+0x73/0xb0 [ 1960.796800] do_syscall_64+0xaa/0x400 [ 1960.796818] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.796836] RIP: 0033:0x7f59df433164 [ 1960.796854] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.796884] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.796906] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.796926] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.796946] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.796966] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.796985] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.797008] irq event stamp: 509990 [ 1960.797026] hardirqs last enabled at (509989): [<ffffffff8119ff78>] flush_work+0x4b8/0x6d0 [ 1960.797063] hardirqs last disabled at (509990): [<ffffffff8297c395>] _raw_spin_lock_irqsave+0x25/0x60 [ 1960.797085] softirqs last enabled at (509744): [<ffffffff82c005ad>] __do_softirq+0x5ad/0x8c0 [ 1960.797121] softirqs last disabled at (509735): [<ffffffff8115aa15>] irq_exit+0x1a5/0x1e0 [ 1960.797142] ---[ end trace fb1342325f1846b8 ]--- While I haven't actually gone into the details of what's causing this to happen (maybe the kernel removes the backlight device in the device core before we get to it?), it doesn't really matter anyway because the way nouveau handles backlights has long since been deprecated. According to the documentation on the drm_connector->late_register() hook, the ->late_register() hook should be used for adding extra connector-related devices. Vice versa, the ->early_unregister() hook is meant to be used for removing those devices. So: gut nouveau_drm->bl_list and nouveau_drm->backlight, and replace them with per-connector backlight structures. Additionally, move backlight registration/teardown into the ->late_register() and ->early_unregister() hooks so that DRM can give us a chance to remove the backlight before the connector is even removed. This appears to fix the problem once and for all. Changes since v2: - Use NV_INFO_ONCE for printing GMUX information, since otherwise this will end up printing that message for as many times as we have connectors Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-09-06 21:43:23 +00:00
switch (device->info.family) {
case NV_DEVICE_INFO_V0_CURIE:
ret = nv40_backlight_init(nv_encoder, &props, &ops);
break;
drm/nouveau: Move backlight device into nouveau_connector Currently module unloading is broken in nouveau due to a rather annoying race condition resulting from nouveau_backlight.c having gone a bit stale over time: [ 1960.791143] ================================================================== [ 1960.791394] BUG: KASAN: use-after-free in nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791460] Read of size 4 at addr ffff88075accf350 by task zsh/11185 [ 1960.791521] [ 1960.791545] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G O 4.18.0Lyude-Test+ #4 [ 1960.791580] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.791628] Call Trace: [ 1960.791680] dump_stack+0xa4/0xfd [ 1960.791721] print_address_description+0x71/0x239 [ 1960.791833] ? nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791877] kasan_report.cold.6+0x242/0x2fe [ 1960.791919] __asan_report_load4_noabort+0x19/0x20 [ 1960.792012] nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.792081] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.792150] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.792265] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.792347] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.792378] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.792406] ? trace_hardirqs_on+0xd/0x10 [ 1960.792472] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.792502] pci_device_remove+0x112/0x2d0 [ 1960.792530] ? pcibios_free_irq+0x10/0x10 [ 1960.792558] ? kasan_check_write+0x14/0x20 [ 1960.792587] device_release_driver_internal+0x35c/0x650 [ 1960.792617] device_release_driver+0x12/0x20 [ 1960.792643] pci_stop_bus_device+0x172/0x1e0 [ 1960.792671] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.792715] remove_store+0xcb/0xe0 [ 1960.792753] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.792779] ? __lock_is_held+0xb5/0x140 [ 1960.792808] ? component_add+0x530/0x530 [ 1960.792834] dev_attr_store+0x3f/0x70 [ 1960.792859] ? sysfs_file_ops+0x11d/0x170 [ 1960.792885] sysfs_kf_write+0x104/0x150 [ 1960.792915] ? sysfs_file_ops+0x170/0x170 [ 1960.792940] kernfs_fop_write+0x24f/0x400 [ 1960.792978] ? __lock_acquire+0x6ea/0x47f0 [ 1960.793021] __vfs_write+0xeb/0x760 [ 1960.793048] ? kernel_read+0x130/0x130 [ 1960.793076] ? __lock_is_held+0xb5/0x140 [ 1960.793107] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.793135] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.793162] ? __sb_start_write+0x183/0x220 [ 1960.793189] vfs_write+0x14d/0x4a0 [ 1960.793229] ksys_write+0xd2/0x1b0 [ 1960.793255] ? __ia32_sys_read+0xb0/0xb0 [ 1960.793298] ? fput+0x1d/0x120 [ 1960.793324] ? filp_close+0xf3/0x130 [ 1960.793349] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.793380] __x64_sys_write+0x73/0xb0 [ 1960.793407] do_syscall_64+0xaa/0x400 [ 1960.793433] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.793460] RIP: 0033:0x7f59df433164 [ 1960.793486] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.793541] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.793576] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.793620] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.793665] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.793696] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.793730] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.793768] [ 1960.793790] Allocated by task 11167: [ 1960.793816] save_stack+0x43/0xd0 [ 1960.793841] kasan_kmalloc+0xc4/0xe0 [ 1960.793880] kasan_slab_alloc+0x11/0x20 [ 1960.793905] kmem_cache_alloc+0xd7/0x270 [ 1960.793944] getname_flags+0xbd/0x520 [ 1960.793969] user_path_at_empty+0x23/0x50 [ 1960.793994] do_faccessat+0x1fc/0x5d0 [ 1960.794018] __x64_sys_access+0x59/0x80 [ 1960.794043] do_syscall_64+0xaa/0x400 [ 1960.794067] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794093] [ 1960.794127] Freed by task 11167: [ 1960.794152] save_stack+0x43/0xd0 [ 1960.794190] __kasan_slab_free+0x139/0x190 [ 1960.794215] kasan_slab_free+0xe/0x10 [ 1960.794239] kmem_cache_free+0xcb/0x2c0 [ 1960.794264] putname+0xad/0xe0 [ 1960.794287] filename_lookup.part.59+0x1f1/0x360 [ 1960.794313] user_path_at_empty+0x3e/0x50 [ 1960.794338] do_faccessat+0x1fc/0x5d0 [ 1960.794362] __x64_sys_access+0x59/0x80 [ 1960.794393] do_syscall_64+0xaa/0x400 [ 1960.794421] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794461] [ 1960.794483] The buggy address belongs to the object at ffff88075acceac0 [ 1960.794483] which belongs to the cache names_cache of size 4096 [ 1960.794540] The buggy address is located 2192 bytes inside of [ 1960.794540] 4096-byte region [ffff88075acceac0, ffff88075accfac0) [ 1960.794581] The buggy address belongs to the page: [ 1960.794609] page:ffffea001d6b3200 count:1 mapcount:0 mapping:ffff880778e4b1c0 index:0x0 compound_mapcount: 0 [ 1960.794651] flags: 0x8000000000008100(slab|head) [ 1960.794679] raw: 8000000000008100 ffffea001d39e808 ffffea001d39ea08 ffff880778e4b1c0 [ 1960.794739] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 1960.794785] page dumped because: kasan: bad access detected [ 1960.794813] [ 1960.794834] Memory state around the buggy address: [ 1960.794861] ffff88075accf200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794894] ffff88075accf280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794925] >ffff88075accf300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794956] ^ [ 1960.794985] ffff88075accf380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795017] ffff88075accf400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795061] ================================================================== [ 1960.795106] Disabling lock debugging due to kernel taint [ 1960.795131] ------------[ cut here ]------------ [ 1960.795148] ida_remove called for id=1802201963 which is not allocated. [ 1960.795193] WARNING: CPU: 7 PID: 11185 at lib/idr.c:521 ida_remove+0x184/0x210 [ 1960.795213] Modules linked in: nouveau(O) mxm_wmi ttm i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm joydev vfat fat intel_rapl x86_pkg_temp_thermal coretemp crc32_pclmul iTCO_wdt psmouse wmi_bmof mei_me tpm_tis mei tpm_tis_core tpm i2c_i801 thinkpad_acpi pcc_cpufreq crc32c_intel serio_raw xhci_pci xhci_hcd wmi video i2c_dev i2c_core [ 1960.795305] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G B O 4.18.0Lyude-Test+ #4 [ 1960.795330] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.795352] RIP: 0010:ida_remove+0x184/0x210 [ 1960.795370] Code: 4c 89 f7 e8 ae c8 00 00 eb 22 41 83 c4 02 4c 89 e8 41 83 fc 3f 0f 86 64 ff ff ff 44 89 fe 48 c7 c7 20 94 1e 83 e8 54 ed 81 fe <0f> 0b 48 b8 00 00 00 00 00 fc ff df 48 01 c3 c7 03 00 00 00 00 c7 [ 1960.795402] RSP: 0018:ffff88074d4df7b8 EFLAGS: 00010082 [ 1960.795421] RAX: 0000000000000000 RBX: 1ffff100e9a9befa RCX: ffffffff81479975 [ 1960.795440] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88077c1de690 [ 1960.795460] RBP: ffff88074d4df878 R08: ffffed00ef83bcd3 R09: ffffed00ef83bcd2 [ 1960.795479] R10: ffffed00ef83bcd2 R11: ffff88077c1de697 R12: 000000000000036b [ 1960.795498] R13: 0000000000000202 R14: ffffffffa0aa7fa0 R15: 000000006b6b6b6b [ 1960.795518] FS: 00007f59e0995b80(0000) GS:ffff88077c1c0000(0000) knlGS:0000000000000000 [ 1960.795553] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1960.795571] CR2: 00007f59e09a2010 CR3: 00000004a1a70005 CR4: 00000000003606e0 [ 1960.795596] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1960.795629] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1960.795649] Call Trace: [ 1960.795667] ? ida_destroy+0x1d0/0x1d0 [ 1960.795686] ? kasan_check_write+0x14/0x20 [ 1960.795704] ? do_raw_spin_lock+0xc2/0x1c0 [ 1960.795724] ida_simple_remove+0x26/0x40 [ 1960.795794] nouveau_backlight_exit+0x9d/0x150 [nouveau] [ 1960.795867] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.795930] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.795989] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.796047] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.796067] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.796089] ? trace_hardirqs_on+0xd/0x10 [ 1960.796146] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.796167] pci_device_remove+0x112/0x2d0 [ 1960.796186] ? pcibios_free_irq+0x10/0x10 [ 1960.796218] ? kasan_check_write+0x14/0x20 [ 1960.796237] device_release_driver_internal+0x35c/0x650 [ 1960.796257] device_release_driver+0x12/0x20 [ 1960.796289] pci_stop_bus_device+0x172/0x1e0 [ 1960.796308] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.796328] remove_store+0xcb/0xe0 [ 1960.796345] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.796364] ? __lock_is_held+0xb5/0x140 [ 1960.796383] ? component_add+0x530/0x530 [ 1960.796401] dev_attr_store+0x3f/0x70 [ 1960.796419] ? sysfs_file_ops+0x11d/0x170 [ 1960.796436] sysfs_kf_write+0x104/0x150 [ 1960.796454] ? sysfs_file_ops+0x170/0x170 [ 1960.796471] kernfs_fop_write+0x24f/0x400 [ 1960.796488] ? __lock_acquire+0x6ea/0x47f0 [ 1960.796520] __vfs_write+0xeb/0x760 [ 1960.796538] ? kernel_read+0x130/0x130 [ 1960.796556] ? __lock_is_held+0xb5/0x140 [ 1960.796590] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.796608] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.796626] ? __sb_start_write+0x183/0x220 [ 1960.796648] vfs_write+0x14d/0x4a0 [ 1960.796666] ksys_write+0xd2/0x1b0 [ 1960.796684] ? __ia32_sys_read+0xb0/0xb0 [ 1960.796701] ? fput+0x1d/0x120 [ 1960.796732] ? filp_close+0xf3/0x130 [ 1960.796749] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.796768] __x64_sys_write+0x73/0xb0 [ 1960.796800] do_syscall_64+0xaa/0x400 [ 1960.796818] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.796836] RIP: 0033:0x7f59df433164 [ 1960.796854] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.796884] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.796906] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.796926] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.796946] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.796966] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.796985] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.797008] irq event stamp: 509990 [ 1960.797026] hardirqs last enabled at (509989): [<ffffffff8119ff78>] flush_work+0x4b8/0x6d0 [ 1960.797063] hardirqs last disabled at (509990): [<ffffffff8297c395>] _raw_spin_lock_irqsave+0x25/0x60 [ 1960.797085] softirqs last enabled at (509744): [<ffffffff82c005ad>] __do_softirq+0x5ad/0x8c0 [ 1960.797121] softirqs last disabled at (509735): [<ffffffff8115aa15>] irq_exit+0x1a5/0x1e0 [ 1960.797142] ---[ end trace fb1342325f1846b8 ]--- While I haven't actually gone into the details of what's causing this to happen (maybe the kernel removes the backlight device in the device core before we get to it?), it doesn't really matter anyway because the way nouveau handles backlights has long since been deprecated. According to the documentation on the drm_connector->late_register() hook, the ->late_register() hook should be used for adding extra connector-related devices. Vice versa, the ->early_unregister() hook is meant to be used for removing those devices. So: gut nouveau_drm->bl_list and nouveau_drm->backlight, and replace them with per-connector backlight structures. Additionally, move backlight registration/teardown into the ->late_register() and ->early_unregister() hooks so that DRM can give us a chance to remove the backlight before the connector is even removed. This appears to fix the problem once and for all. Changes since v2: - Use NV_INFO_ONCE for printing GMUX information, since otherwise this will end up printing that message for as many times as we have connectors Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-09-06 21:43:23 +00:00
case NV_DEVICE_INFO_V0_TESLA:
case NV_DEVICE_INFO_V0_FERMI:
case NV_DEVICE_INFO_V0_KEPLER:
case NV_DEVICE_INFO_V0_MAXWELL:
ret = nv50_backlight_init(nv_encoder, &props, &ops);
break;
drm/nouveau: Move backlight device into nouveau_connector Currently module unloading is broken in nouveau due to a rather annoying race condition resulting from nouveau_backlight.c having gone a bit stale over time: [ 1960.791143] ================================================================== [ 1960.791394] BUG: KASAN: use-after-free in nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791460] Read of size 4 at addr ffff88075accf350 by task zsh/11185 [ 1960.791521] [ 1960.791545] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G O 4.18.0Lyude-Test+ #4 [ 1960.791580] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.791628] Call Trace: [ 1960.791680] dump_stack+0xa4/0xfd [ 1960.791721] print_address_description+0x71/0x239 [ 1960.791833] ? nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791877] kasan_report.cold.6+0x242/0x2fe [ 1960.791919] __asan_report_load4_noabort+0x19/0x20 [ 1960.792012] nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.792081] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.792150] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.792265] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.792347] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.792378] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.792406] ? trace_hardirqs_on+0xd/0x10 [ 1960.792472] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.792502] pci_device_remove+0x112/0x2d0 [ 1960.792530] ? pcibios_free_irq+0x10/0x10 [ 1960.792558] ? kasan_check_write+0x14/0x20 [ 1960.792587] device_release_driver_internal+0x35c/0x650 [ 1960.792617] device_release_driver+0x12/0x20 [ 1960.792643] pci_stop_bus_device+0x172/0x1e0 [ 1960.792671] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.792715] remove_store+0xcb/0xe0 [ 1960.792753] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.792779] ? __lock_is_held+0xb5/0x140 [ 1960.792808] ? component_add+0x530/0x530 [ 1960.792834] dev_attr_store+0x3f/0x70 [ 1960.792859] ? sysfs_file_ops+0x11d/0x170 [ 1960.792885] sysfs_kf_write+0x104/0x150 [ 1960.792915] ? sysfs_file_ops+0x170/0x170 [ 1960.792940] kernfs_fop_write+0x24f/0x400 [ 1960.792978] ? __lock_acquire+0x6ea/0x47f0 [ 1960.793021] __vfs_write+0xeb/0x760 [ 1960.793048] ? kernel_read+0x130/0x130 [ 1960.793076] ? __lock_is_held+0xb5/0x140 [ 1960.793107] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.793135] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.793162] ? __sb_start_write+0x183/0x220 [ 1960.793189] vfs_write+0x14d/0x4a0 [ 1960.793229] ksys_write+0xd2/0x1b0 [ 1960.793255] ? __ia32_sys_read+0xb0/0xb0 [ 1960.793298] ? fput+0x1d/0x120 [ 1960.793324] ? filp_close+0xf3/0x130 [ 1960.793349] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.793380] __x64_sys_write+0x73/0xb0 [ 1960.793407] do_syscall_64+0xaa/0x400 [ 1960.793433] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.793460] RIP: 0033:0x7f59df433164 [ 1960.793486] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.793541] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.793576] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.793620] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.793665] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.793696] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.793730] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.793768] [ 1960.793790] Allocated by task 11167: [ 1960.793816] save_stack+0x43/0xd0 [ 1960.793841] kasan_kmalloc+0xc4/0xe0 [ 1960.793880] kasan_slab_alloc+0x11/0x20 [ 1960.793905] kmem_cache_alloc+0xd7/0x270 [ 1960.793944] getname_flags+0xbd/0x520 [ 1960.793969] user_path_at_empty+0x23/0x50 [ 1960.793994] do_faccessat+0x1fc/0x5d0 [ 1960.794018] __x64_sys_access+0x59/0x80 [ 1960.794043] do_syscall_64+0xaa/0x400 [ 1960.794067] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794093] [ 1960.794127] Freed by task 11167: [ 1960.794152] save_stack+0x43/0xd0 [ 1960.794190] __kasan_slab_free+0x139/0x190 [ 1960.794215] kasan_slab_free+0xe/0x10 [ 1960.794239] kmem_cache_free+0xcb/0x2c0 [ 1960.794264] putname+0xad/0xe0 [ 1960.794287] filename_lookup.part.59+0x1f1/0x360 [ 1960.794313] user_path_at_empty+0x3e/0x50 [ 1960.794338] do_faccessat+0x1fc/0x5d0 [ 1960.794362] __x64_sys_access+0x59/0x80 [ 1960.794393] do_syscall_64+0xaa/0x400 [ 1960.794421] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794461] [ 1960.794483] The buggy address belongs to the object at ffff88075acceac0 [ 1960.794483] which belongs to the cache names_cache of size 4096 [ 1960.794540] The buggy address is located 2192 bytes inside of [ 1960.794540] 4096-byte region [ffff88075acceac0, ffff88075accfac0) [ 1960.794581] The buggy address belongs to the page: [ 1960.794609] page:ffffea001d6b3200 count:1 mapcount:0 mapping:ffff880778e4b1c0 index:0x0 compound_mapcount: 0 [ 1960.794651] flags: 0x8000000000008100(slab|head) [ 1960.794679] raw: 8000000000008100 ffffea001d39e808 ffffea001d39ea08 ffff880778e4b1c0 [ 1960.794739] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 1960.794785] page dumped because: kasan: bad access detected [ 1960.794813] [ 1960.794834] Memory state around the buggy address: [ 1960.794861] ffff88075accf200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794894] ffff88075accf280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794925] >ffff88075accf300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794956] ^ [ 1960.794985] ffff88075accf380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795017] ffff88075accf400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795061] ================================================================== [ 1960.795106] Disabling lock debugging due to kernel taint [ 1960.795131] ------------[ cut here ]------------ [ 1960.795148] ida_remove called for id=1802201963 which is not allocated. [ 1960.795193] WARNING: CPU: 7 PID: 11185 at lib/idr.c:521 ida_remove+0x184/0x210 [ 1960.795213] Modules linked in: nouveau(O) mxm_wmi ttm i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm joydev vfat fat intel_rapl x86_pkg_temp_thermal coretemp crc32_pclmul iTCO_wdt psmouse wmi_bmof mei_me tpm_tis mei tpm_tis_core tpm i2c_i801 thinkpad_acpi pcc_cpufreq crc32c_intel serio_raw xhci_pci xhci_hcd wmi video i2c_dev i2c_core [ 1960.795305] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G B O 4.18.0Lyude-Test+ #4 [ 1960.795330] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.795352] RIP: 0010:ida_remove+0x184/0x210 [ 1960.795370] Code: 4c 89 f7 e8 ae c8 00 00 eb 22 41 83 c4 02 4c 89 e8 41 83 fc 3f 0f 86 64 ff ff ff 44 89 fe 48 c7 c7 20 94 1e 83 e8 54 ed 81 fe <0f> 0b 48 b8 00 00 00 00 00 fc ff df 48 01 c3 c7 03 00 00 00 00 c7 [ 1960.795402] RSP: 0018:ffff88074d4df7b8 EFLAGS: 00010082 [ 1960.795421] RAX: 0000000000000000 RBX: 1ffff100e9a9befa RCX: ffffffff81479975 [ 1960.795440] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88077c1de690 [ 1960.795460] RBP: ffff88074d4df878 R08: ffffed00ef83bcd3 R09: ffffed00ef83bcd2 [ 1960.795479] R10: ffffed00ef83bcd2 R11: ffff88077c1de697 R12: 000000000000036b [ 1960.795498] R13: 0000000000000202 R14: ffffffffa0aa7fa0 R15: 000000006b6b6b6b [ 1960.795518] FS: 00007f59e0995b80(0000) GS:ffff88077c1c0000(0000) knlGS:0000000000000000 [ 1960.795553] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1960.795571] CR2: 00007f59e09a2010 CR3: 00000004a1a70005 CR4: 00000000003606e0 [ 1960.795596] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1960.795629] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1960.795649] Call Trace: [ 1960.795667] ? ida_destroy+0x1d0/0x1d0 [ 1960.795686] ? kasan_check_write+0x14/0x20 [ 1960.795704] ? do_raw_spin_lock+0xc2/0x1c0 [ 1960.795724] ida_simple_remove+0x26/0x40 [ 1960.795794] nouveau_backlight_exit+0x9d/0x150 [nouveau] [ 1960.795867] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.795930] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.795989] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.796047] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.796067] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.796089] ? trace_hardirqs_on+0xd/0x10 [ 1960.796146] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.796167] pci_device_remove+0x112/0x2d0 [ 1960.796186] ? pcibios_free_irq+0x10/0x10 [ 1960.796218] ? kasan_check_write+0x14/0x20 [ 1960.796237] device_release_driver_internal+0x35c/0x650 [ 1960.796257] device_release_driver+0x12/0x20 [ 1960.796289] pci_stop_bus_device+0x172/0x1e0 [ 1960.796308] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.796328] remove_store+0xcb/0xe0 [ 1960.796345] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.796364] ? __lock_is_held+0xb5/0x140 [ 1960.796383] ? component_add+0x530/0x530 [ 1960.796401] dev_attr_store+0x3f/0x70 [ 1960.796419] ? sysfs_file_ops+0x11d/0x170 [ 1960.796436] sysfs_kf_write+0x104/0x150 [ 1960.796454] ? sysfs_file_ops+0x170/0x170 [ 1960.796471] kernfs_fop_write+0x24f/0x400 [ 1960.796488] ? __lock_acquire+0x6ea/0x47f0 [ 1960.796520] __vfs_write+0xeb/0x760 [ 1960.796538] ? kernel_read+0x130/0x130 [ 1960.796556] ? __lock_is_held+0xb5/0x140 [ 1960.796590] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.796608] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.796626] ? __sb_start_write+0x183/0x220 [ 1960.796648] vfs_write+0x14d/0x4a0 [ 1960.796666] ksys_write+0xd2/0x1b0 [ 1960.796684] ? __ia32_sys_read+0xb0/0xb0 [ 1960.796701] ? fput+0x1d/0x120 [ 1960.796732] ? filp_close+0xf3/0x130 [ 1960.796749] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.796768] __x64_sys_write+0x73/0xb0 [ 1960.796800] do_syscall_64+0xaa/0x400 [ 1960.796818] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.796836] RIP: 0033:0x7f59df433164 [ 1960.796854] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.796884] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.796906] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.796926] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.796946] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.796966] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.796985] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.797008] irq event stamp: 509990 [ 1960.797026] hardirqs last enabled at (509989): [<ffffffff8119ff78>] flush_work+0x4b8/0x6d0 [ 1960.797063] hardirqs last disabled at (509990): [<ffffffff8297c395>] _raw_spin_lock_irqsave+0x25/0x60 [ 1960.797085] softirqs last enabled at (509744): [<ffffffff82c005ad>] __do_softirq+0x5ad/0x8c0 [ 1960.797121] softirqs last disabled at (509735): [<ffffffff8115aa15>] irq_exit+0x1a5/0x1e0 [ 1960.797142] ---[ end trace fb1342325f1846b8 ]--- While I haven't actually gone into the details of what's causing this to happen (maybe the kernel removes the backlight device in the device core before we get to it?), it doesn't really matter anyway because the way nouveau handles backlights has long since been deprecated. According to the documentation on the drm_connector->late_register() hook, the ->late_register() hook should be used for adding extra connector-related devices. Vice versa, the ->early_unregister() hook is meant to be used for removing those devices. So: gut nouveau_drm->bl_list and nouveau_drm->backlight, and replace them with per-connector backlight structures. Additionally, move backlight registration/teardown into the ->late_register() and ->early_unregister() hooks so that DRM can give us a chance to remove the backlight before the connector is even removed. This appears to fix the problem once and for all. Changes since v2: - Use NV_INFO_ONCE for printing GMUX information, since otherwise this will end up printing that message for as many times as we have connectors Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-09-06 21:43:23 +00:00
default:
return 0;
}
if (ret == -ENODEV)
return 0;
else if (ret)
return ret;
bl = kzalloc(sizeof(*bl), GFP_KERNEL);
if (!bl)
return -ENOMEM;
if (!nouveau_get_backlight_name(backlight_name, bl)) {
NV_ERROR(drm, "Failed to retrieve a unique name for the backlight interface\n");
goto fail_alloc;
}
bl->dev = backlight_device_register(backlight_name, connector->kdev,
nv_encoder, ops, &props);
if (IS_ERR(bl->dev)) {
if (bl->id >= 0)
ida_simple_remove(&bl_ida, bl->id);
ret = PTR_ERR(bl->dev);
goto fail_alloc;
}
nouveau_connector(connector)->backlight = bl;
bl->dev->props.brightness = bl->dev->ops->get_brightness(bl->dev);
backlight_update_status(bl->dev);
return 0;
fail_alloc:
kfree(bl);
return ret;
}
void
nouveau_backlight_fini(struct drm_connector *connector)
{
drm/nouveau: Move backlight device into nouveau_connector Currently module unloading is broken in nouveau due to a rather annoying race condition resulting from nouveau_backlight.c having gone a bit stale over time: [ 1960.791143] ================================================================== [ 1960.791394] BUG: KASAN: use-after-free in nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791460] Read of size 4 at addr ffff88075accf350 by task zsh/11185 [ 1960.791521] [ 1960.791545] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G O 4.18.0Lyude-Test+ #4 [ 1960.791580] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.791628] Call Trace: [ 1960.791680] dump_stack+0xa4/0xfd [ 1960.791721] print_address_description+0x71/0x239 [ 1960.791833] ? nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791877] kasan_report.cold.6+0x242/0x2fe [ 1960.791919] __asan_report_load4_noabort+0x19/0x20 [ 1960.792012] nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.792081] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.792150] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.792265] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.792347] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.792378] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.792406] ? trace_hardirqs_on+0xd/0x10 [ 1960.792472] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.792502] pci_device_remove+0x112/0x2d0 [ 1960.792530] ? pcibios_free_irq+0x10/0x10 [ 1960.792558] ? kasan_check_write+0x14/0x20 [ 1960.792587] device_release_driver_internal+0x35c/0x650 [ 1960.792617] device_release_driver+0x12/0x20 [ 1960.792643] pci_stop_bus_device+0x172/0x1e0 [ 1960.792671] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.792715] remove_store+0xcb/0xe0 [ 1960.792753] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.792779] ? __lock_is_held+0xb5/0x140 [ 1960.792808] ? component_add+0x530/0x530 [ 1960.792834] dev_attr_store+0x3f/0x70 [ 1960.792859] ? sysfs_file_ops+0x11d/0x170 [ 1960.792885] sysfs_kf_write+0x104/0x150 [ 1960.792915] ? sysfs_file_ops+0x170/0x170 [ 1960.792940] kernfs_fop_write+0x24f/0x400 [ 1960.792978] ? __lock_acquire+0x6ea/0x47f0 [ 1960.793021] __vfs_write+0xeb/0x760 [ 1960.793048] ? kernel_read+0x130/0x130 [ 1960.793076] ? __lock_is_held+0xb5/0x140 [ 1960.793107] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.793135] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.793162] ? __sb_start_write+0x183/0x220 [ 1960.793189] vfs_write+0x14d/0x4a0 [ 1960.793229] ksys_write+0xd2/0x1b0 [ 1960.793255] ? __ia32_sys_read+0xb0/0xb0 [ 1960.793298] ? fput+0x1d/0x120 [ 1960.793324] ? filp_close+0xf3/0x130 [ 1960.793349] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.793380] __x64_sys_write+0x73/0xb0 [ 1960.793407] do_syscall_64+0xaa/0x400 [ 1960.793433] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.793460] RIP: 0033:0x7f59df433164 [ 1960.793486] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.793541] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.793576] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.793620] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.793665] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.793696] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.793730] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.793768] [ 1960.793790] Allocated by task 11167: [ 1960.793816] save_stack+0x43/0xd0 [ 1960.793841] kasan_kmalloc+0xc4/0xe0 [ 1960.793880] kasan_slab_alloc+0x11/0x20 [ 1960.793905] kmem_cache_alloc+0xd7/0x270 [ 1960.793944] getname_flags+0xbd/0x520 [ 1960.793969] user_path_at_empty+0x23/0x50 [ 1960.793994] do_faccessat+0x1fc/0x5d0 [ 1960.794018] __x64_sys_access+0x59/0x80 [ 1960.794043] do_syscall_64+0xaa/0x400 [ 1960.794067] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794093] [ 1960.794127] Freed by task 11167: [ 1960.794152] save_stack+0x43/0xd0 [ 1960.794190] __kasan_slab_free+0x139/0x190 [ 1960.794215] kasan_slab_free+0xe/0x10 [ 1960.794239] kmem_cache_free+0xcb/0x2c0 [ 1960.794264] putname+0xad/0xe0 [ 1960.794287] filename_lookup.part.59+0x1f1/0x360 [ 1960.794313] user_path_at_empty+0x3e/0x50 [ 1960.794338] do_faccessat+0x1fc/0x5d0 [ 1960.794362] __x64_sys_access+0x59/0x80 [ 1960.794393] do_syscall_64+0xaa/0x400 [ 1960.794421] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794461] [ 1960.794483] The buggy address belongs to the object at ffff88075acceac0 [ 1960.794483] which belongs to the cache names_cache of size 4096 [ 1960.794540] The buggy address is located 2192 bytes inside of [ 1960.794540] 4096-byte region [ffff88075acceac0, ffff88075accfac0) [ 1960.794581] The buggy address belongs to the page: [ 1960.794609] page:ffffea001d6b3200 count:1 mapcount:0 mapping:ffff880778e4b1c0 index:0x0 compound_mapcount: 0 [ 1960.794651] flags: 0x8000000000008100(slab|head) [ 1960.794679] raw: 8000000000008100 ffffea001d39e808 ffffea001d39ea08 ffff880778e4b1c0 [ 1960.794739] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 1960.794785] page dumped because: kasan: bad access detected [ 1960.794813] [ 1960.794834] Memory state around the buggy address: [ 1960.794861] ffff88075accf200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794894] ffff88075accf280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794925] >ffff88075accf300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794956] ^ [ 1960.794985] ffff88075accf380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795017] ffff88075accf400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795061] ================================================================== [ 1960.795106] Disabling lock debugging due to kernel taint [ 1960.795131] ------------[ cut here ]------------ [ 1960.795148] ida_remove called for id=1802201963 which is not allocated. [ 1960.795193] WARNING: CPU: 7 PID: 11185 at lib/idr.c:521 ida_remove+0x184/0x210 [ 1960.795213] Modules linked in: nouveau(O) mxm_wmi ttm i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm joydev vfat fat intel_rapl x86_pkg_temp_thermal coretemp crc32_pclmul iTCO_wdt psmouse wmi_bmof mei_me tpm_tis mei tpm_tis_core tpm i2c_i801 thinkpad_acpi pcc_cpufreq crc32c_intel serio_raw xhci_pci xhci_hcd wmi video i2c_dev i2c_core [ 1960.795305] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G B O 4.18.0Lyude-Test+ #4 [ 1960.795330] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.795352] RIP: 0010:ida_remove+0x184/0x210 [ 1960.795370] Code: 4c 89 f7 e8 ae c8 00 00 eb 22 41 83 c4 02 4c 89 e8 41 83 fc 3f 0f 86 64 ff ff ff 44 89 fe 48 c7 c7 20 94 1e 83 e8 54 ed 81 fe <0f> 0b 48 b8 00 00 00 00 00 fc ff df 48 01 c3 c7 03 00 00 00 00 c7 [ 1960.795402] RSP: 0018:ffff88074d4df7b8 EFLAGS: 00010082 [ 1960.795421] RAX: 0000000000000000 RBX: 1ffff100e9a9befa RCX: ffffffff81479975 [ 1960.795440] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88077c1de690 [ 1960.795460] RBP: ffff88074d4df878 R08: ffffed00ef83bcd3 R09: ffffed00ef83bcd2 [ 1960.795479] R10: ffffed00ef83bcd2 R11: ffff88077c1de697 R12: 000000000000036b [ 1960.795498] R13: 0000000000000202 R14: ffffffffa0aa7fa0 R15: 000000006b6b6b6b [ 1960.795518] FS: 00007f59e0995b80(0000) GS:ffff88077c1c0000(0000) knlGS:0000000000000000 [ 1960.795553] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1960.795571] CR2: 00007f59e09a2010 CR3: 00000004a1a70005 CR4: 00000000003606e0 [ 1960.795596] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1960.795629] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1960.795649] Call Trace: [ 1960.795667] ? ida_destroy+0x1d0/0x1d0 [ 1960.795686] ? kasan_check_write+0x14/0x20 [ 1960.795704] ? do_raw_spin_lock+0xc2/0x1c0 [ 1960.795724] ida_simple_remove+0x26/0x40 [ 1960.795794] nouveau_backlight_exit+0x9d/0x150 [nouveau] [ 1960.795867] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.795930] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.795989] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.796047] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.796067] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.796089] ? trace_hardirqs_on+0xd/0x10 [ 1960.796146] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.796167] pci_device_remove+0x112/0x2d0 [ 1960.796186] ? pcibios_free_irq+0x10/0x10 [ 1960.796218] ? kasan_check_write+0x14/0x20 [ 1960.796237] device_release_driver_internal+0x35c/0x650 [ 1960.796257] device_release_driver+0x12/0x20 [ 1960.796289] pci_stop_bus_device+0x172/0x1e0 [ 1960.796308] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.796328] remove_store+0xcb/0xe0 [ 1960.796345] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.796364] ? __lock_is_held+0xb5/0x140 [ 1960.796383] ? component_add+0x530/0x530 [ 1960.796401] dev_attr_store+0x3f/0x70 [ 1960.796419] ? sysfs_file_ops+0x11d/0x170 [ 1960.796436] sysfs_kf_write+0x104/0x150 [ 1960.796454] ? sysfs_file_ops+0x170/0x170 [ 1960.796471] kernfs_fop_write+0x24f/0x400 [ 1960.796488] ? __lock_acquire+0x6ea/0x47f0 [ 1960.796520] __vfs_write+0xeb/0x760 [ 1960.796538] ? kernel_read+0x130/0x130 [ 1960.796556] ? __lock_is_held+0xb5/0x140 [ 1960.796590] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.796608] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.796626] ? __sb_start_write+0x183/0x220 [ 1960.796648] vfs_write+0x14d/0x4a0 [ 1960.796666] ksys_write+0xd2/0x1b0 [ 1960.796684] ? __ia32_sys_read+0xb0/0xb0 [ 1960.796701] ? fput+0x1d/0x120 [ 1960.796732] ? filp_close+0xf3/0x130 [ 1960.796749] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.796768] __x64_sys_write+0x73/0xb0 [ 1960.796800] do_syscall_64+0xaa/0x400 [ 1960.796818] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.796836] RIP: 0033:0x7f59df433164 [ 1960.796854] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.796884] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.796906] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.796926] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.796946] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.796966] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.796985] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.797008] irq event stamp: 509990 [ 1960.797026] hardirqs last enabled at (509989): [<ffffffff8119ff78>] flush_work+0x4b8/0x6d0 [ 1960.797063] hardirqs last disabled at (509990): [<ffffffff8297c395>] _raw_spin_lock_irqsave+0x25/0x60 [ 1960.797085] softirqs last enabled at (509744): [<ffffffff82c005ad>] __do_softirq+0x5ad/0x8c0 [ 1960.797121] softirqs last disabled at (509735): [<ffffffff8115aa15>] irq_exit+0x1a5/0x1e0 [ 1960.797142] ---[ end trace fb1342325f1846b8 ]--- While I haven't actually gone into the details of what's causing this to happen (maybe the kernel removes the backlight device in the device core before we get to it?), it doesn't really matter anyway because the way nouveau handles backlights has long since been deprecated. According to the documentation on the drm_connector->late_register() hook, the ->late_register() hook should be used for adding extra connector-related devices. Vice versa, the ->early_unregister() hook is meant to be used for removing those devices. So: gut nouveau_drm->bl_list and nouveau_drm->backlight, and replace them with per-connector backlight structures. Additionally, move backlight registration/teardown into the ->late_register() and ->early_unregister() hooks so that DRM can give us a chance to remove the backlight before the connector is even removed. This appears to fix the problem once and for all. Changes since v2: - Use NV_INFO_ONCE for printing GMUX information, since otherwise this will end up printing that message for as many times as we have connectors Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-09-06 21:43:23 +00:00
struct nouveau_connector *nv_conn = nouveau_connector(connector);
struct nouveau_backlight *bl = nv_conn->backlight;
drm/nouveau: Move backlight device into nouveau_connector Currently module unloading is broken in nouveau due to a rather annoying race condition resulting from nouveau_backlight.c having gone a bit stale over time: [ 1960.791143] ================================================================== [ 1960.791394] BUG: KASAN: use-after-free in nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791460] Read of size 4 at addr ffff88075accf350 by task zsh/11185 [ 1960.791521] [ 1960.791545] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G O 4.18.0Lyude-Test+ #4 [ 1960.791580] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.791628] Call Trace: [ 1960.791680] dump_stack+0xa4/0xfd [ 1960.791721] print_address_description+0x71/0x239 [ 1960.791833] ? nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791877] kasan_report.cold.6+0x242/0x2fe [ 1960.791919] __asan_report_load4_noabort+0x19/0x20 [ 1960.792012] nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.792081] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.792150] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.792265] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.792347] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.792378] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.792406] ? trace_hardirqs_on+0xd/0x10 [ 1960.792472] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.792502] pci_device_remove+0x112/0x2d0 [ 1960.792530] ? pcibios_free_irq+0x10/0x10 [ 1960.792558] ? kasan_check_write+0x14/0x20 [ 1960.792587] device_release_driver_internal+0x35c/0x650 [ 1960.792617] device_release_driver+0x12/0x20 [ 1960.792643] pci_stop_bus_device+0x172/0x1e0 [ 1960.792671] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.792715] remove_store+0xcb/0xe0 [ 1960.792753] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.792779] ? __lock_is_held+0xb5/0x140 [ 1960.792808] ? component_add+0x530/0x530 [ 1960.792834] dev_attr_store+0x3f/0x70 [ 1960.792859] ? sysfs_file_ops+0x11d/0x170 [ 1960.792885] sysfs_kf_write+0x104/0x150 [ 1960.792915] ? sysfs_file_ops+0x170/0x170 [ 1960.792940] kernfs_fop_write+0x24f/0x400 [ 1960.792978] ? __lock_acquire+0x6ea/0x47f0 [ 1960.793021] __vfs_write+0xeb/0x760 [ 1960.793048] ? kernel_read+0x130/0x130 [ 1960.793076] ? __lock_is_held+0xb5/0x140 [ 1960.793107] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.793135] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.793162] ? __sb_start_write+0x183/0x220 [ 1960.793189] vfs_write+0x14d/0x4a0 [ 1960.793229] ksys_write+0xd2/0x1b0 [ 1960.793255] ? __ia32_sys_read+0xb0/0xb0 [ 1960.793298] ? fput+0x1d/0x120 [ 1960.793324] ? filp_close+0xf3/0x130 [ 1960.793349] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.793380] __x64_sys_write+0x73/0xb0 [ 1960.793407] do_syscall_64+0xaa/0x400 [ 1960.793433] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.793460] RIP: 0033:0x7f59df433164 [ 1960.793486] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.793541] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.793576] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.793620] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.793665] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.793696] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.793730] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.793768] [ 1960.793790] Allocated by task 11167: [ 1960.793816] save_stack+0x43/0xd0 [ 1960.793841] kasan_kmalloc+0xc4/0xe0 [ 1960.793880] kasan_slab_alloc+0x11/0x20 [ 1960.793905] kmem_cache_alloc+0xd7/0x270 [ 1960.793944] getname_flags+0xbd/0x520 [ 1960.793969] user_path_at_empty+0x23/0x50 [ 1960.793994] do_faccessat+0x1fc/0x5d0 [ 1960.794018] __x64_sys_access+0x59/0x80 [ 1960.794043] do_syscall_64+0xaa/0x400 [ 1960.794067] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794093] [ 1960.794127] Freed by task 11167: [ 1960.794152] save_stack+0x43/0xd0 [ 1960.794190] __kasan_slab_free+0x139/0x190 [ 1960.794215] kasan_slab_free+0xe/0x10 [ 1960.794239] kmem_cache_free+0xcb/0x2c0 [ 1960.794264] putname+0xad/0xe0 [ 1960.794287] filename_lookup.part.59+0x1f1/0x360 [ 1960.794313] user_path_at_empty+0x3e/0x50 [ 1960.794338] do_faccessat+0x1fc/0x5d0 [ 1960.794362] __x64_sys_access+0x59/0x80 [ 1960.794393] do_syscall_64+0xaa/0x400 [ 1960.794421] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794461] [ 1960.794483] The buggy address belongs to the object at ffff88075acceac0 [ 1960.794483] which belongs to the cache names_cache of size 4096 [ 1960.794540] The buggy address is located 2192 bytes inside of [ 1960.794540] 4096-byte region [ffff88075acceac0, ffff88075accfac0) [ 1960.794581] The buggy address belongs to the page: [ 1960.794609] page:ffffea001d6b3200 count:1 mapcount:0 mapping:ffff880778e4b1c0 index:0x0 compound_mapcount: 0 [ 1960.794651] flags: 0x8000000000008100(slab|head) [ 1960.794679] raw: 8000000000008100 ffffea001d39e808 ffffea001d39ea08 ffff880778e4b1c0 [ 1960.794739] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 1960.794785] page dumped because: kasan: bad access detected [ 1960.794813] [ 1960.794834] Memory state around the buggy address: [ 1960.794861] ffff88075accf200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794894] ffff88075accf280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794925] >ffff88075accf300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794956] ^ [ 1960.794985] ffff88075accf380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795017] ffff88075accf400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795061] ================================================================== [ 1960.795106] Disabling lock debugging due to kernel taint [ 1960.795131] ------------[ cut here ]------------ [ 1960.795148] ida_remove called for id=1802201963 which is not allocated. [ 1960.795193] WARNING: CPU: 7 PID: 11185 at lib/idr.c:521 ida_remove+0x184/0x210 [ 1960.795213] Modules linked in: nouveau(O) mxm_wmi ttm i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm joydev vfat fat intel_rapl x86_pkg_temp_thermal coretemp crc32_pclmul iTCO_wdt psmouse wmi_bmof mei_me tpm_tis mei tpm_tis_core tpm i2c_i801 thinkpad_acpi pcc_cpufreq crc32c_intel serio_raw xhci_pci xhci_hcd wmi video i2c_dev i2c_core [ 1960.795305] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G B O 4.18.0Lyude-Test+ #4 [ 1960.795330] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.795352] RIP: 0010:ida_remove+0x184/0x210 [ 1960.795370] Code: 4c 89 f7 e8 ae c8 00 00 eb 22 41 83 c4 02 4c 89 e8 41 83 fc 3f 0f 86 64 ff ff ff 44 89 fe 48 c7 c7 20 94 1e 83 e8 54 ed 81 fe <0f> 0b 48 b8 00 00 00 00 00 fc ff df 48 01 c3 c7 03 00 00 00 00 c7 [ 1960.795402] RSP: 0018:ffff88074d4df7b8 EFLAGS: 00010082 [ 1960.795421] RAX: 0000000000000000 RBX: 1ffff100e9a9befa RCX: ffffffff81479975 [ 1960.795440] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88077c1de690 [ 1960.795460] RBP: ffff88074d4df878 R08: ffffed00ef83bcd3 R09: ffffed00ef83bcd2 [ 1960.795479] R10: ffffed00ef83bcd2 R11: ffff88077c1de697 R12: 000000000000036b [ 1960.795498] R13: 0000000000000202 R14: ffffffffa0aa7fa0 R15: 000000006b6b6b6b [ 1960.795518] FS: 00007f59e0995b80(0000) GS:ffff88077c1c0000(0000) knlGS:0000000000000000 [ 1960.795553] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1960.795571] CR2: 00007f59e09a2010 CR3: 00000004a1a70005 CR4: 00000000003606e0 [ 1960.795596] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1960.795629] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1960.795649] Call Trace: [ 1960.795667] ? ida_destroy+0x1d0/0x1d0 [ 1960.795686] ? kasan_check_write+0x14/0x20 [ 1960.795704] ? do_raw_spin_lock+0xc2/0x1c0 [ 1960.795724] ida_simple_remove+0x26/0x40 [ 1960.795794] nouveau_backlight_exit+0x9d/0x150 [nouveau] [ 1960.795867] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.795930] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.795989] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.796047] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.796067] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.796089] ? trace_hardirqs_on+0xd/0x10 [ 1960.796146] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.796167] pci_device_remove+0x112/0x2d0 [ 1960.796186] ? pcibios_free_irq+0x10/0x10 [ 1960.796218] ? kasan_check_write+0x14/0x20 [ 1960.796237] device_release_driver_internal+0x35c/0x650 [ 1960.796257] device_release_driver+0x12/0x20 [ 1960.796289] pci_stop_bus_device+0x172/0x1e0 [ 1960.796308] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.796328] remove_store+0xcb/0xe0 [ 1960.796345] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.796364] ? __lock_is_held+0xb5/0x140 [ 1960.796383] ? component_add+0x530/0x530 [ 1960.796401] dev_attr_store+0x3f/0x70 [ 1960.796419] ? sysfs_file_ops+0x11d/0x170 [ 1960.796436] sysfs_kf_write+0x104/0x150 [ 1960.796454] ? sysfs_file_ops+0x170/0x170 [ 1960.796471] kernfs_fop_write+0x24f/0x400 [ 1960.796488] ? __lock_acquire+0x6ea/0x47f0 [ 1960.796520] __vfs_write+0xeb/0x760 [ 1960.796538] ? kernel_read+0x130/0x130 [ 1960.796556] ? __lock_is_held+0xb5/0x140 [ 1960.796590] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.796608] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.796626] ? __sb_start_write+0x183/0x220 [ 1960.796648] vfs_write+0x14d/0x4a0 [ 1960.796666] ksys_write+0xd2/0x1b0 [ 1960.796684] ? __ia32_sys_read+0xb0/0xb0 [ 1960.796701] ? fput+0x1d/0x120 [ 1960.796732] ? filp_close+0xf3/0x130 [ 1960.796749] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.796768] __x64_sys_write+0x73/0xb0 [ 1960.796800] do_syscall_64+0xaa/0x400 [ 1960.796818] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.796836] RIP: 0033:0x7f59df433164 [ 1960.796854] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.796884] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.796906] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.796926] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.796946] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.796966] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.796985] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.797008] irq event stamp: 509990 [ 1960.797026] hardirqs last enabled at (509989): [<ffffffff8119ff78>] flush_work+0x4b8/0x6d0 [ 1960.797063] hardirqs last disabled at (509990): [<ffffffff8297c395>] _raw_spin_lock_irqsave+0x25/0x60 [ 1960.797085] softirqs last enabled at (509744): [<ffffffff82c005ad>] __do_softirq+0x5ad/0x8c0 [ 1960.797121] softirqs last disabled at (509735): [<ffffffff8115aa15>] irq_exit+0x1a5/0x1e0 [ 1960.797142] ---[ end trace fb1342325f1846b8 ]--- While I haven't actually gone into the details of what's causing this to happen (maybe the kernel removes the backlight device in the device core before we get to it?), it doesn't really matter anyway because the way nouveau handles backlights has long since been deprecated. According to the documentation on the drm_connector->late_register() hook, the ->late_register() hook should be used for adding extra connector-related devices. Vice versa, the ->early_unregister() hook is meant to be used for removing those devices. So: gut nouveau_drm->bl_list and nouveau_drm->backlight, and replace them with per-connector backlight structures. Additionally, move backlight registration/teardown into the ->late_register() and ->early_unregister() hooks so that DRM can give us a chance to remove the backlight before the connector is even removed. This appears to fix the problem once and for all. Changes since v2: - Use NV_INFO_ONCE for printing GMUX information, since otherwise this will end up printing that message for as many times as we have connectors Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-09-06 21:43:23 +00:00
if (!bl)
return;
drm/nouveau: Move backlight device into nouveau_connector Currently module unloading is broken in nouveau due to a rather annoying race condition resulting from nouveau_backlight.c having gone a bit stale over time: [ 1960.791143] ================================================================== [ 1960.791394] BUG: KASAN: use-after-free in nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791460] Read of size 4 at addr ffff88075accf350 by task zsh/11185 [ 1960.791521] [ 1960.791545] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G O 4.18.0Lyude-Test+ #4 [ 1960.791580] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.791628] Call Trace: [ 1960.791680] dump_stack+0xa4/0xfd [ 1960.791721] print_address_description+0x71/0x239 [ 1960.791833] ? nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.791877] kasan_report.cold.6+0x242/0x2fe [ 1960.791919] __asan_report_load4_noabort+0x19/0x20 [ 1960.792012] nouveau_backlight_exit+0x112/0x150 [nouveau] [ 1960.792081] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.792150] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.792265] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.792347] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.792378] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.792406] ? trace_hardirqs_on+0xd/0x10 [ 1960.792472] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.792502] pci_device_remove+0x112/0x2d0 [ 1960.792530] ? pcibios_free_irq+0x10/0x10 [ 1960.792558] ? kasan_check_write+0x14/0x20 [ 1960.792587] device_release_driver_internal+0x35c/0x650 [ 1960.792617] device_release_driver+0x12/0x20 [ 1960.792643] pci_stop_bus_device+0x172/0x1e0 [ 1960.792671] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.792715] remove_store+0xcb/0xe0 [ 1960.792753] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.792779] ? __lock_is_held+0xb5/0x140 [ 1960.792808] ? component_add+0x530/0x530 [ 1960.792834] dev_attr_store+0x3f/0x70 [ 1960.792859] ? sysfs_file_ops+0x11d/0x170 [ 1960.792885] sysfs_kf_write+0x104/0x150 [ 1960.792915] ? sysfs_file_ops+0x170/0x170 [ 1960.792940] kernfs_fop_write+0x24f/0x400 [ 1960.792978] ? __lock_acquire+0x6ea/0x47f0 [ 1960.793021] __vfs_write+0xeb/0x760 [ 1960.793048] ? kernel_read+0x130/0x130 [ 1960.793076] ? __lock_is_held+0xb5/0x140 [ 1960.793107] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.793135] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.793162] ? __sb_start_write+0x183/0x220 [ 1960.793189] vfs_write+0x14d/0x4a0 [ 1960.793229] ksys_write+0xd2/0x1b0 [ 1960.793255] ? __ia32_sys_read+0xb0/0xb0 [ 1960.793298] ? fput+0x1d/0x120 [ 1960.793324] ? filp_close+0xf3/0x130 [ 1960.793349] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.793380] __x64_sys_write+0x73/0xb0 [ 1960.793407] do_syscall_64+0xaa/0x400 [ 1960.793433] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.793460] RIP: 0033:0x7f59df433164 [ 1960.793486] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.793541] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.793576] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.793620] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.793665] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.793696] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.793730] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.793768] [ 1960.793790] Allocated by task 11167: [ 1960.793816] save_stack+0x43/0xd0 [ 1960.793841] kasan_kmalloc+0xc4/0xe0 [ 1960.793880] kasan_slab_alloc+0x11/0x20 [ 1960.793905] kmem_cache_alloc+0xd7/0x270 [ 1960.793944] getname_flags+0xbd/0x520 [ 1960.793969] user_path_at_empty+0x23/0x50 [ 1960.793994] do_faccessat+0x1fc/0x5d0 [ 1960.794018] __x64_sys_access+0x59/0x80 [ 1960.794043] do_syscall_64+0xaa/0x400 [ 1960.794067] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794093] [ 1960.794127] Freed by task 11167: [ 1960.794152] save_stack+0x43/0xd0 [ 1960.794190] __kasan_slab_free+0x139/0x190 [ 1960.794215] kasan_slab_free+0xe/0x10 [ 1960.794239] kmem_cache_free+0xcb/0x2c0 [ 1960.794264] putname+0xad/0xe0 [ 1960.794287] filename_lookup.part.59+0x1f1/0x360 [ 1960.794313] user_path_at_empty+0x3e/0x50 [ 1960.794338] do_faccessat+0x1fc/0x5d0 [ 1960.794362] __x64_sys_access+0x59/0x80 [ 1960.794393] do_syscall_64+0xaa/0x400 [ 1960.794421] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.794461] [ 1960.794483] The buggy address belongs to the object at ffff88075acceac0 [ 1960.794483] which belongs to the cache names_cache of size 4096 [ 1960.794540] The buggy address is located 2192 bytes inside of [ 1960.794540] 4096-byte region [ffff88075acceac0, ffff88075accfac0) [ 1960.794581] The buggy address belongs to the page: [ 1960.794609] page:ffffea001d6b3200 count:1 mapcount:0 mapping:ffff880778e4b1c0 index:0x0 compound_mapcount: 0 [ 1960.794651] flags: 0x8000000000008100(slab|head) [ 1960.794679] raw: 8000000000008100 ffffea001d39e808 ffffea001d39ea08 ffff880778e4b1c0 [ 1960.794739] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 1960.794785] page dumped because: kasan: bad access detected [ 1960.794813] [ 1960.794834] Memory state around the buggy address: [ 1960.794861] ffff88075accf200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794894] ffff88075accf280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794925] >ffff88075accf300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.794956] ^ [ 1960.794985] ffff88075accf380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795017] ffff88075accf400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 1960.795061] ================================================================== [ 1960.795106] Disabling lock debugging due to kernel taint [ 1960.795131] ------------[ cut here ]------------ [ 1960.795148] ida_remove called for id=1802201963 which is not allocated. [ 1960.795193] WARNING: CPU: 7 PID: 11185 at lib/idr.c:521 ida_remove+0x184/0x210 [ 1960.795213] Modules linked in: nouveau(O) mxm_wmi ttm i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm joydev vfat fat intel_rapl x86_pkg_temp_thermal coretemp crc32_pclmul iTCO_wdt psmouse wmi_bmof mei_me tpm_tis mei tpm_tis_core tpm i2c_i801 thinkpad_acpi pcc_cpufreq crc32c_intel serio_raw xhci_pci xhci_hcd wmi video i2c_dev i2c_core [ 1960.795305] CPU: 7 PID: 11185 Comm: zsh Kdump: loaded Tainted: G B O 4.18.0Lyude-Test+ #4 [ 1960.795330] Hardware name: LENOVO 20EQS64N0B/20EQS64N0B, BIOS N1EET79W (1.52 ) 07/13/2018 [ 1960.795352] RIP: 0010:ida_remove+0x184/0x210 [ 1960.795370] Code: 4c 89 f7 e8 ae c8 00 00 eb 22 41 83 c4 02 4c 89 e8 41 83 fc 3f 0f 86 64 ff ff ff 44 89 fe 48 c7 c7 20 94 1e 83 e8 54 ed 81 fe <0f> 0b 48 b8 00 00 00 00 00 fc ff df 48 01 c3 c7 03 00 00 00 00 c7 [ 1960.795402] RSP: 0018:ffff88074d4df7b8 EFLAGS: 00010082 [ 1960.795421] RAX: 0000000000000000 RBX: 1ffff100e9a9befa RCX: ffffffff81479975 [ 1960.795440] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88077c1de690 [ 1960.795460] RBP: ffff88074d4df878 R08: ffffed00ef83bcd3 R09: ffffed00ef83bcd2 [ 1960.795479] R10: ffffed00ef83bcd2 R11: ffff88077c1de697 R12: 000000000000036b [ 1960.795498] R13: 0000000000000202 R14: ffffffffa0aa7fa0 R15: 000000006b6b6b6b [ 1960.795518] FS: 00007f59e0995b80(0000) GS:ffff88077c1c0000(0000) knlGS:0000000000000000 [ 1960.795553] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1960.795571] CR2: 00007f59e09a2010 CR3: 00000004a1a70005 CR4: 00000000003606e0 [ 1960.795596] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1960.795629] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1960.795649] Call Trace: [ 1960.795667] ? ida_destroy+0x1d0/0x1d0 [ 1960.795686] ? kasan_check_write+0x14/0x20 [ 1960.795704] ? do_raw_spin_lock+0xc2/0x1c0 [ 1960.795724] ida_simple_remove+0x26/0x40 [ 1960.795794] nouveau_backlight_exit+0x9d/0x150 [nouveau] [ 1960.795867] nouveau_display_destroy+0x76/0x150 [nouveau] [ 1960.795930] nouveau_drm_device_fini+0xb7/0x190 [nouveau] [ 1960.795989] nouveau_drm_device_remove+0x14b/0x1d0 [nouveau] [ 1960.796047] ? nouveau_cli_work_queue+0x2e0/0x2e0 [nouveau] [ 1960.796067] ? trace_hardirqs_on_caller+0x38b/0x570 [ 1960.796089] ? trace_hardirqs_on+0xd/0x10 [ 1960.796146] nouveau_drm_remove+0x37/0x50 [nouveau] [ 1960.796167] pci_device_remove+0x112/0x2d0 [ 1960.796186] ? pcibios_free_irq+0x10/0x10 [ 1960.796218] ? kasan_check_write+0x14/0x20 [ 1960.796237] device_release_driver_internal+0x35c/0x650 [ 1960.796257] device_release_driver+0x12/0x20 [ 1960.796289] pci_stop_bus_device+0x172/0x1e0 [ 1960.796308] pci_stop_and_remove_bus_device_locked+0x1a/0x30 [ 1960.796328] remove_store+0xcb/0xe0 [ 1960.796345] ? sriov_numvfs_store+0x2e0/0x2e0 [ 1960.796364] ? __lock_is_held+0xb5/0x140 [ 1960.796383] ? component_add+0x530/0x530 [ 1960.796401] dev_attr_store+0x3f/0x70 [ 1960.796419] ? sysfs_file_ops+0x11d/0x170 [ 1960.796436] sysfs_kf_write+0x104/0x150 [ 1960.796454] ? sysfs_file_ops+0x170/0x170 [ 1960.796471] kernfs_fop_write+0x24f/0x400 [ 1960.796488] ? __lock_acquire+0x6ea/0x47f0 [ 1960.796520] __vfs_write+0xeb/0x760 [ 1960.796538] ? kernel_read+0x130/0x130 [ 1960.796556] ? __lock_is_held+0xb5/0x140 [ 1960.796590] ? rcu_read_lock_sched_held+0xdd/0x110 [ 1960.796608] ? rcu_sync_lockdep_assert+0x78/0xb0 [ 1960.796626] ? __sb_start_write+0x183/0x220 [ 1960.796648] vfs_write+0x14d/0x4a0 [ 1960.796666] ksys_write+0xd2/0x1b0 [ 1960.796684] ? __ia32_sys_read+0xb0/0xb0 [ 1960.796701] ? fput+0x1d/0x120 [ 1960.796732] ? filp_close+0xf3/0x130 [ 1960.796749] ? entry_SYSCALL_64_after_hwframe+0x59/0xbe [ 1960.796768] __x64_sys_write+0x73/0xb0 [ 1960.796800] do_syscall_64+0xaa/0x400 [ 1960.796818] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1960.796836] RIP: 0033:0x7f59df433164 [ 1960.796854] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 81 38 2d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 1960.796884] RSP: 002b:00007ffd70ee2fb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 1960.796906] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f59df433164 [ 1960.796926] RDX: 0000000000000002 RSI: 00005578088640c0 RDI: 0000000000000001 [ 1960.796946] RBP: 00005578088640c0 R08: 00007f59df7038c0 R09: 00007f59e0995b80 [ 1960.796966] R10: 000000000000000a R11: 0000000000000246 R12: 00007f59df702760 [ 1960.796985] R13: 0000000000000002 R14: 00007f59df6fd760 R15: 0000000000000002 [ 1960.797008] irq event stamp: 509990 [ 1960.797026] hardirqs last enabled at (509989): [<ffffffff8119ff78>] flush_work+0x4b8/0x6d0 [ 1960.797063] hardirqs last disabled at (509990): [<ffffffff8297c395>] _raw_spin_lock_irqsave+0x25/0x60 [ 1960.797085] softirqs last enabled at (509744): [<ffffffff82c005ad>] __do_softirq+0x5ad/0x8c0 [ 1960.797121] softirqs last disabled at (509735): [<ffffffff8115aa15>] irq_exit+0x1a5/0x1e0 [ 1960.797142] ---[ end trace fb1342325f1846b8 ]--- While I haven't actually gone into the details of what's causing this to happen (maybe the kernel removes the backlight device in the device core before we get to it?), it doesn't really matter anyway because the way nouveau handles backlights has long since been deprecated. According to the documentation on the drm_connector->late_register() hook, the ->late_register() hook should be used for adding extra connector-related devices. Vice versa, the ->early_unregister() hook is meant to be used for removing those devices. So: gut nouveau_drm->bl_list and nouveau_drm->backlight, and replace them with per-connector backlight structures. Additionally, move backlight registration/teardown into the ->late_register() and ->early_unregister() hooks so that DRM can give us a chance to remove the backlight before the connector is even removed. This appears to fix the problem once and for all. Changes since v2: - Use NV_INFO_ONCE for printing GMUX information, since otherwise this will end up printing that message for as many times as we have connectors Signed-off-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2018-09-06 21:43:23 +00:00
if (bl->id >= 0)
ida_simple_remove(&bl_ida, bl->id);
backlight_device_unregister(bl->dev);
nv_conn->backlight = NULL;
kfree(bl);
}
void
nouveau_backlight_ctor(void)
{
ida_init(&bl_ida);
}
void
nouveau_backlight_dtor(void)
{
ida_destroy(&bl_ida);
}