From 3ae0415d0bb401abad1db7468105e3d3756e153f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:16:45 -0300 Subject: [PATCH 01/44] tools headers UAPI: Update tools's copy of drm.h headers Picking the changes from: 0e0dc448005583a6 ("drm/doc: demote old doc-comments in drm.h") Silencing these perf build warnings: Warning: Kernel ABI header at 'tools/include/uapi/drm/drm.h' differs from latest version at 'include/uapi/drm/drm.h' diff -u tools/include/uapi/drm/drm.h include/uapi/drm/drm.h No changes in tooling as these are just C comment documentation changes. Cc: Simon Ser Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/drm.h | 97 ++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 48 deletions(-) diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 808b48a93330..0827037c5484 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -1,11 +1,10 @@ -/** - * \file drm.h +/* * Header for the Direct Rendering Manager * - * \author Rickard E. (Rik) Faith + * Author: Rickard E. (Rik) Faith * - * \par Acknowledgments: - * Dec 1999, Richard Henderson , move to generic \c cmpxchg. + * Acknowledgments: + * Dec 1999, Richard Henderson , move to generic cmpxchg. */ /* @@ -85,7 +84,7 @@ typedef unsigned int drm_context_t; typedef unsigned int drm_drawable_t; typedef unsigned int drm_magic_t; -/** +/* * Cliprect. * * \warning: If you change this structure, make sure you change @@ -101,7 +100,7 @@ struct drm_clip_rect { unsigned short y2; }; -/** +/* * Drawable information. */ struct drm_drawable_info { @@ -109,7 +108,7 @@ struct drm_drawable_info { struct drm_clip_rect *rects; }; -/** +/* * Texture region, */ struct drm_tex_region { @@ -120,7 +119,7 @@ struct drm_tex_region { unsigned int age; }; -/** +/* * Hardware lock. * * The lock structure is a simple cache-line aligned integer. To avoid @@ -132,7 +131,7 @@ struct drm_hw_lock { char padding[60]; /**< Pad to cache line */ }; -/** +/* * DRM_IOCTL_VERSION ioctl argument type. * * \sa drmGetVersion(). @@ -149,7 +148,7 @@ struct drm_version { char __user *desc; /**< User-space buffer to hold desc */ }; -/** +/* * DRM_IOCTL_GET_UNIQUE ioctl argument type. * * \sa drmGetBusid() and drmSetBusId(). @@ -168,7 +167,7 @@ struct drm_block { int unused; }; -/** +/* * DRM_IOCTL_CONTROL ioctl argument type. * * \sa drmCtlInstHandler() and drmCtlUninstHandler(). @@ -183,7 +182,7 @@ struct drm_control { int irq; }; -/** +/* * Type of memory to map. */ enum drm_map_type { @@ -195,7 +194,7 @@ enum drm_map_type { _DRM_CONSISTENT = 5 /**< Consistent memory for PCI DMA */ }; -/** +/* * Memory mapping flags. */ enum drm_map_flags { @@ -214,7 +213,7 @@ struct drm_ctx_priv_map { void *handle; /**< Handle of map */ }; -/** +/* * DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls * argument type. * @@ -231,7 +230,7 @@ struct drm_map { /* Private data */ }; -/** +/* * DRM_IOCTL_GET_CLIENT ioctl argument type. */ struct drm_client { @@ -263,7 +262,7 @@ enum drm_stat_type { /* Add to the *END* of the list */ }; -/** +/* * DRM_IOCTL_GET_STATS ioctl argument type. */ struct drm_stats { @@ -274,7 +273,7 @@ struct drm_stats { } data[15]; }; -/** +/* * Hardware locking flags. */ enum drm_lock_flags { @@ -289,7 +288,7 @@ enum drm_lock_flags { _DRM_HALT_CUR_QUEUES = 0x20 /**< Halt all current queues */ }; -/** +/* * DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type. * * \sa drmGetLock() and drmUnlock(). @@ -299,7 +298,7 @@ struct drm_lock { enum drm_lock_flags flags; }; -/** +/* * DMA flags * * \warning @@ -328,7 +327,7 @@ enum drm_dma_flags { _DRM_DMA_LARGER_OK = 0x40 /**< Larger-than-requested buffers OK */ }; -/** +/* * DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type. * * \sa drmAddBufs(). @@ -351,7 +350,7 @@ struct drm_buf_desc { */ }; -/** +/* * DRM_IOCTL_INFO_BUFS ioctl argument type. */ struct drm_buf_info { @@ -359,7 +358,7 @@ struct drm_buf_info { struct drm_buf_desc __user *list; }; -/** +/* * DRM_IOCTL_FREE_BUFS ioctl argument type. */ struct drm_buf_free { @@ -367,7 +366,7 @@ struct drm_buf_free { int __user *list; }; -/** +/* * Buffer information * * \sa drm_buf_map. @@ -379,7 +378,7 @@ struct drm_buf_pub { void __user *address; /**< Address of buffer */ }; -/** +/* * DRM_IOCTL_MAP_BUFS ioctl argument type. */ struct drm_buf_map { @@ -392,7 +391,7 @@ struct drm_buf_map { struct drm_buf_pub __user *list; /**< Buffer information */ }; -/** +/* * DRM_IOCTL_DMA ioctl argument type. * * Indices here refer to the offset into the buffer list in drm_buf_get. @@ -417,7 +416,7 @@ enum drm_ctx_flags { _DRM_CONTEXT_2DONLY = 0x02 }; -/** +/* * DRM_IOCTL_ADD_CTX ioctl argument type. * * \sa drmCreateContext() and drmDestroyContext(). @@ -427,7 +426,7 @@ struct drm_ctx { enum drm_ctx_flags flags; }; -/** +/* * DRM_IOCTL_RES_CTX ioctl argument type. */ struct drm_ctx_res { @@ -435,14 +434,14 @@ struct drm_ctx_res { struct drm_ctx __user *contexts; }; -/** +/* * DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type. */ struct drm_draw { drm_drawable_t handle; }; -/** +/* * DRM_IOCTL_UPDATE_DRAW ioctl argument type. */ typedef enum { @@ -456,14 +455,14 @@ struct drm_update_draw { unsigned long long data; }; -/** +/* * DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type. */ struct drm_auth { drm_magic_t magic; }; -/** +/* * DRM_IOCTL_IRQ_BUSID ioctl argument type. * * \sa drmGetInterruptFromBusID(). @@ -505,7 +504,7 @@ struct drm_wait_vblank_reply { long tval_usec; }; -/** +/* * DRM_IOCTL_WAIT_VBLANK ioctl argument type. * * \sa drmWaitVBlank(). @@ -518,7 +517,7 @@ union drm_wait_vblank { #define _DRM_PRE_MODESET 1 #define _DRM_POST_MODESET 2 -/** +/* * DRM_IOCTL_MODESET_CTL ioctl argument type * * \sa drmModesetCtl(). @@ -528,7 +527,7 @@ struct drm_modeset_ctl { __u32 cmd; }; -/** +/* * DRM_IOCTL_AGP_ENABLE ioctl argument type. * * \sa drmAgpEnable(). @@ -537,7 +536,7 @@ struct drm_agp_mode { unsigned long mode; /**< AGP mode */ }; -/** +/* * DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type. * * \sa drmAgpAlloc() and drmAgpFree(). @@ -549,7 +548,7 @@ struct drm_agp_buffer { unsigned long physical; /**< Physical used by i810 */ }; -/** +/* * DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type. * * \sa drmAgpBind() and drmAgpUnbind(). @@ -559,7 +558,7 @@ struct drm_agp_binding { unsigned long offset; /**< In bytes -- will round to page boundary */ }; -/** +/* * DRM_IOCTL_AGP_INFO ioctl argument type. * * \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(), @@ -580,7 +579,7 @@ struct drm_agp_info { unsigned short id_device; }; -/** +/* * DRM_IOCTL_SG_ALLOC ioctl argument type. */ struct drm_scatter_gather { @@ -588,7 +587,7 @@ struct drm_scatter_gather { unsigned long handle; /**< Used for mapping / unmapping */ }; -/** +/* * DRM_IOCTL_SET_VERSION ioctl argument type. */ struct drm_set_version { @@ -598,14 +597,14 @@ struct drm_set_version { int drm_dd_minor; }; -/** DRM_IOCTL_GEM_CLOSE ioctl argument type */ +/* DRM_IOCTL_GEM_CLOSE ioctl argument type */ struct drm_gem_close { /** Handle of the object to be closed. */ __u32 handle; __u32 pad; }; -/** DRM_IOCTL_GEM_FLINK ioctl argument type */ +/* DRM_IOCTL_GEM_FLINK ioctl argument type */ struct drm_gem_flink { /** Handle for the object being named */ __u32 handle; @@ -614,7 +613,7 @@ struct drm_gem_flink { __u32 name; }; -/** DRM_IOCTL_GEM_OPEN ioctl argument type */ +/* DRM_IOCTL_GEM_OPEN ioctl argument type */ struct drm_gem_open { /** Name of object being opened */ __u32 name; @@ -652,7 +651,7 @@ struct drm_gem_open { #define DRM_CAP_SYNCOBJ 0x13 #define DRM_CAP_SYNCOBJ_TIMELINE 0x14 -/** DRM_IOCTL_GET_CAP ioctl argument type */ +/* DRM_IOCTL_GET_CAP ioctl argument type */ struct drm_get_cap { __u64 capability; __u64 value; @@ -678,7 +677,9 @@ struct drm_get_cap { /** * DRM_CLIENT_CAP_ATOMIC * - * If set to 1, the DRM core will expose atomic properties to userspace + * If set to 1, the DRM core will expose atomic properties to userspace. This + * implicitly enables &DRM_CLIENT_CAP_UNIVERSAL_PLANES and + * &DRM_CLIENT_CAP_ASPECT_RATIO. */ #define DRM_CLIENT_CAP_ATOMIC 3 @@ -698,7 +699,7 @@ struct drm_get_cap { */ #define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5 -/** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ +/* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ struct drm_set_client_cap { __u64 capability; __u64 value; @@ -950,7 +951,7 @@ extern "C" { #define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) -/** +/* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. * Generic IOCTLS restart at 0xA0. @@ -961,7 +962,7 @@ extern "C" { #define DRM_COMMAND_BASE 0x40 #define DRM_COMMAND_END 0xA0 -/** +/* * Header for events written back to userspace on the drm fd. The * type defines the type of event, the length specifies the total * length of the event (including the header), and user_data is From c2446944b3f588d6a0186f2022a2999c90e0cb63 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:21:00 -0300 Subject: [PATCH 02/44] tools headers UAPI: Sync drm/i915_drm.h with the kernel sources To pick the changes in: 8c3b1ba0e7ea9a80 ("drm/i915/gt: Track the overall awake/busy time") 348fb0cb0a79bce0 ("drm/i915/pmu: Deprecate I915_PMU_LAST and optimize state tracking") That don't result in any change in tooling: $ tools/perf/trace/beauty/drm_ioctl.sh > before $ cp include/uapi/drm/i915_drm.h tools/include/uapi/drm/i915_drm.h $ tools/perf/trace/beauty/drm_ioctl.sh > after $ diff -u before after $ Only silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/drm/i915_drm.h' differs from latest version at 'include/uapi/drm/i915_drm.h' diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h Cc: Chris Wilson Cc: Tvrtko Ursulin Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/i915_drm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index fa1f3d62f9a6..1987e2ea79a3 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -177,8 +177,9 @@ enum drm_i915_pmu_engine_sample { #define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) #define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) #define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) +#define I915_PMU_SOFTWARE_GT_AWAKE_TIME __I915_PMU_OTHER(4) -#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY +#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY /* Each region is a minimum of 16k, and there are at most 255 of them. */ From 1e61463cfcd0b3e7a19ba36b8a98c64ebaac5c6e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:44:37 -0300 Subject: [PATCH 03/44] tools headers UAPI: Sync openat2.h with the kernel sources To pick the changes in: 99668f618062816c ("fs: expose LOOKUP_CACHED through openat2() RESOLVE_CACHED") That don't result in any change in tooling, only silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/openat2.h' differs from latest version at 'include/uapi/linux/openat2.h' diff -u tools/include/uapi/linux/openat2.h include/uapi/linux/openat2.h Cc: Al Viro Cc: Jens Axboe Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/openat2.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/include/uapi/linux/openat2.h b/tools/include/uapi/linux/openat2.h index 58b1eb711360..a5feb7604948 100644 --- a/tools/include/uapi/linux/openat2.h +++ b/tools/include/uapi/linux/openat2.h @@ -35,5 +35,9 @@ struct open_how { #define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".." be scoped inside the dirfd (similar to chroot(2)). */ +#define RESOLVE_CACHED 0x20 /* Only complete if resolution can be + completed through cached lookup. May + return -EAGAIN if that's not + possible. */ #endif /* _UAPI_LINUX_OPENAT2_H */ From add76c0113ba6343a221f1ba1fa5edc8963db07c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:48:05 -0300 Subject: [PATCH 04/44] perf arch powerpc: Sync powerpc syscall.tbl with the kernel sources To get the changes in: fbcee2ebe8edbb6a ("powerpc/32: Always save non volatile GPRs at syscall entry") That shouldn't cause any change in tooling, just silences the following tools/perf/ build warning: Warning: Kernel ABI header at 'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version at 'arch/powerpc/kernel/syscalls/syscall.tbl' Cc: Christophe Leroy Cc: Michael Ellerman Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/powerpc/entry/syscalls/syscall.tbl | 20 +++++-------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index f744eb5cba88..96b2157f0371 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -9,9 +9,7 @@ # 0 nospu restart_syscall sys_restart_syscall 1 nospu exit sys_exit -2 32 fork ppc_fork sys_fork -2 64 fork sys_fork -2 spu fork sys_ni_syscall +2 nospu fork sys_fork 3 common read sys_read 4 common write sys_write 5 common open sys_open compat_sys_open @@ -160,9 +158,7 @@ 119 32 sigreturn sys_sigreturn compat_sys_sigreturn 119 64 sigreturn sys_ni_syscall 119 spu sigreturn sys_ni_syscall -120 32 clone ppc_clone sys_clone -120 64 clone sys_clone -120 spu clone sys_ni_syscall +120 nospu clone sys_clone 121 common setdomainname sys_setdomainname 122 common uname sys_newuname 123 common modify_ldt sys_ni_syscall @@ -244,9 +240,7 @@ 186 spu sendfile sys_sendfile64 187 common getpmsg sys_ni_syscall 188 common putpmsg sys_ni_syscall -189 32 vfork ppc_vfork sys_vfork -189 64 vfork sys_vfork -189 spu vfork sys_ni_syscall +189 nospu vfork sys_vfork 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 common readahead sys_readahead compat_sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 @@ -322,9 +316,7 @@ 248 32 clock_nanosleep sys_clock_nanosleep_time32 248 64 clock_nanosleep sys_clock_nanosleep 248 spu clock_nanosleep sys_clock_nanosleep -249 32 swapcontext ppc_swapcontext compat_sys_swapcontext -249 64 swapcontext sys_swapcontext -249 spu swapcontext sys_ni_syscall +249 nospu swapcontext sys_swapcontext compat_sys_swapcontext 250 common tgkill sys_tgkill 251 32 utimes sys_utimes_time32 251 64 utimes sys_utimes @@ -522,9 +514,7 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open -435 32 clone3 ppc_clone3 sys_clone3 -435 64 clone3 sys_clone3 -435 spu clone3 sys_ni_syscall +435 nospu clone3 sys_clone3 436 common close_range sys_close_range 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd From 303550a44741de7e853d1c0f1d252a8719a88cb1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:51:17 -0300 Subject: [PATCH 05/44] tools headers UAPI s390: Sync ptrace.h kernel headers To pick up the changes from: 56e62a7370283601 ("s390: convert to generic entry") That only adds two new defines, so shouldn't cause problems when building the BPF selftests. Silencing this perf build warning: Warning: Kernel ABI header at 'tools/arch/s390/include/uapi/asm/ptrace.h' differs from latest version at 'arch/s390/include/uapi/asm/ptrace.h' diff -u tools/arch/s390/include/uapi/asm/ptrace.h arch/s390/include/uapi/asm/ptrace.h Cc: Hendrik Brueckner Cc: Sven Schnelle Cc: Vasily Gorbik Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/s390/include/uapi/asm/ptrace.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/arch/s390/include/uapi/asm/ptrace.h b/tools/arch/s390/include/uapi/asm/ptrace.h index 543dd70e12c8..ad64d673b5e6 100644 --- a/tools/arch/s390/include/uapi/asm/ptrace.h +++ b/tools/arch/s390/include/uapi/asm/ptrace.h @@ -179,8 +179,9 @@ #define ACR_SIZE 4 -#define PTRACE_OLDSETOPTIONS 21 - +#define PTRACE_OLDSETOPTIONS 21 +#define PTRACE_SYSEMU 31 +#define PTRACE_SYSEMU_SINGLESTEP 32 #ifndef __ASSEMBLY__ #include #include From 21b7e35bdf0a0e44525ec4e8a7862eb4a8df8ebe Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:56:50 -0300 Subject: [PATCH 06/44] tools headers UAPI: Sync kvm.h headers with the kernel sources To pick the changes in: d9a47edabc4f9481 ("KVM: PPC: Book3S HV: Introduce new capability for 2nd DAWR") 8d4e7e80838f45d3 ("KVM: x86: declare Xen HVM shared info capability and add test case") 40da8ccd724f7ca2 ("KVM: x86/xen: Add event channel interrupt vector upcall") These new IOCTLs are now supported on 'perf trace': $ tools/perf/trace/beauty/kvm_ioctl.sh > before $ cp include/uapi/linux/kvm.h tools/include/uapi/linux/kvm.h $ tools/perf/trace/beauty/kvm_ioctl.sh > after $ diff -u before after --- before 2021-02-23 09:55:46.229058308 -0300 +++ after 2021-02-23 09:55:57.509308058 -0300 @@ -91,6 +91,10 @@ [0xc1] = "GET_SUPPORTED_HV_CPUID", [0xc6] = "X86_SET_MSR_FILTER", [0xc7] = "RESET_DIRTY_RINGS", + [0xc8] = "XEN_HVM_GET_ATTR", + [0xc9] = "XEN_HVM_SET_ATTR", + [0xca] = "XEN_VCPU_GET_ATTR", + [0xcb] = "XEN_VCPU_SET_ATTR", [0xe0] = "CREATE_DEVICE", [0xe1] = "SET_DEVICE_ATTR", [0xe2] = "GET_DEVICE_ATTR", $ Addressing this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Cc: David Woodhouse Cc: Paul Mackerras Cc: Ravi Bangoria Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 73 ++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index abb89bbe5635..8b281f722e5b 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -216,6 +216,20 @@ struct kvm_hyperv_exit { } u; }; +struct kvm_xen_exit { +#define KVM_EXIT_XEN_HCALL 1 + __u32 type; + union { + struct { + __u32 longmode; + __u32 cpl; + __u64 input; + __u64 result; + __u64 params[6]; + } hcall; + } u; +}; + #define KVM_S390_GET_SKEYS_NONE 1 #define KVM_S390_SKEYS_MAX 1048576 @@ -252,6 +266,8 @@ struct kvm_hyperv_exit { #define KVM_EXIT_X86_WRMSR 30 #define KVM_EXIT_DIRTY_RING_FULL 31 #define KVM_EXIT_AP_RESET_HOLD 32 +#define KVM_EXIT_X86_BUS_LOCK 33 +#define KVM_EXIT_XEN 34 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -428,6 +444,8 @@ struct kvm_run { __u32 index; /* kernel -> user */ __u64 data; /* kernel <-> user */ } msr; + /* KVM_EXIT_XEN */ + struct kvm_xen_exit xen; /* Fix the size of the union. */ char padding[256]; }; @@ -1058,6 +1076,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 #define KVM_CAP_SYS_HYPERV_CPUID 191 #define KVM_CAP_DIRTY_LOG_RING 192 +#define KVM_CAP_X86_BUS_LOCK_EXIT 193 #define KVM_CAP_PPC_DAWR1 194 #ifdef KVM_CAP_IRQ_ROUTING @@ -1132,6 +1151,10 @@ struct kvm_x86_mce { #endif #ifdef KVM_CAP_XEN_HVM +#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) +#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) +#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) + struct kvm_xen_hvm_config { __u32 flags; __u32 msr; @@ -1566,6 +1589,45 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_DIRTY_LOG_RING */ #define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7) +/* Per-VM Xen attributes */ +#define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr) +#define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr) + +struct kvm_xen_hvm_attr { + __u16 type; + __u16 pad[3]; + union { + __u8 long_mode; + __u8 vector; + struct { + __u64 gfn; + } shared_info; + __u64 pad[8]; + } u; +}; + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 +#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 +#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 + +/* Per-vCPU Xen attributes */ +#define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) +#define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) + +struct kvm_xen_vcpu_attr { + __u16 type; + __u16 pad[3]; + union { + __u64 gpa; + __u64 pad[8]; + } u; +}; + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ @@ -1594,6 +1656,8 @@ enum sev_cmd_id { KVM_SEV_DBG_ENCRYPT, /* Guest certificates commands */ KVM_SEV_CERT_EXPORT, + /* Attestation report */ + KVM_SEV_GET_ATTESTATION_REPORT, KVM_SEV_NR_MAX, }; @@ -1646,6 +1710,12 @@ struct kvm_sev_dbg { __u32 len; }; +struct kvm_sev_attestation_report { + __u8 mnonce[16]; + __u64 uaddr; + __u32 len; +}; + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) @@ -1767,4 +1837,7 @@ struct kvm_dirty_gfn { __u64 offset; }; +#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) +#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) + #endif /* __LINUX_KVM_H */ From ded2e511a8af9f14482b11225f73db63231fc7a4 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Wed, 24 Feb 2021 18:24:10 +0000 Subject: [PATCH 07/44] perf tools: Cast (struct timeval).tv_sec when printing The musl-libc [1] defines (struct timeval).tv_sec as a 'long long' for arm and other architectures. The default build having a '-Wformat' flag, not casting the field when printing prevents from building perf. This patch casts the (struct timeval).tv_sec fields to the expected format. [1] git://git.musl-libc.org/musl Signed-off-by: Pierre Gondois Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Douglas.raillard@arm.com Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210224182410.5366-1-Pierre.Gondois@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/sched-messaging.c | 4 ++-- tools/perf/bench/sched-pipe.c | 4 ++-- tools/perf/bench/syscall.c | 4 ++-- tools/perf/util/header.c | 4 ++-- tools/perf/util/stat-display.c | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index cecce93ccc63..488f6e6ba1a5 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -309,11 +309,11 @@ int bench_sched_messaging(int argc, const char **argv) num_groups, num_groups * 2 * num_fds, thread_mode ? "threads" : "processes"); printf(" %14s: %lu.%03lu [sec]\n", "Total time", - diff.tv_sec, + (unsigned long) diff.tv_sec, (unsigned long) (diff.tv_usec / USEC_PER_MSEC)); break; case BENCH_FORMAT_SIMPLE: - printf("%lu.%03lu\n", diff.tv_sec, + printf("%lu.%03lu\n", (unsigned long) diff.tv_sec, (unsigned long) (diff.tv_usec / USEC_PER_MSEC)); break; default: diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 3c88d1f201f1..a960e7a93aec 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -156,7 +156,7 @@ int bench_sched_pipe(int argc, const char **argv) result_usec += diff.tv_usec; printf(" %14s: %lu.%03lu [sec]\n\n", "Total time", - diff.tv_sec, + (unsigned long) diff.tv_sec, (unsigned long) (diff.tv_usec / USEC_PER_MSEC)); printf(" %14lf usecs/op\n", @@ -168,7 +168,7 @@ int bench_sched_pipe(int argc, const char **argv) case BENCH_FORMAT_SIMPLE: printf("%lu.%03lu\n", - diff.tv_sec, + (unsigned long) diff.tv_sec, (unsigned long) (diff.tv_usec / USEC_PER_MSEC)); break; diff --git a/tools/perf/bench/syscall.c b/tools/perf/bench/syscall.c index 5fe621cff8e9..9b751016f4b6 100644 --- a/tools/perf/bench/syscall.c +++ b/tools/perf/bench/syscall.c @@ -54,7 +54,7 @@ int bench_syscall_basic(int argc, const char **argv) result_usec += diff.tv_usec; printf(" %14s: %lu.%03lu [sec]\n\n", "Total time", - diff.tv_sec, + (unsigned long) diff.tv_sec, (unsigned long) (diff.tv_usec/1000)); printf(" %14lf usecs/op\n", @@ -66,7 +66,7 @@ int bench_syscall_basic(int argc, const char **argv) case BENCH_FORMAT_SIMPLE: printf("%lu.%03lu\n", - diff.tv_sec, + (unsigned long) diff.tv_sec, (unsigned long) (diff.tv_usec / 1000)); break; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 4fe9e2a54346..20effdff76ce 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1618,8 +1618,8 @@ static void print_clock_data(struct feat_fd *ff, FILE *fp) fprintf(fp, "# clockid: %s (%u)\n", clockid_name(clockid), clockid); fprintf(fp, "# reference time: %s = %ld.%06d (TOD) = %ld.%09ld (%s)\n", - tstr, tod_ns.tv_sec, (int) tod_ns.tv_usec, - clockid_ns.tv_sec, clockid_ns.tv_nsec, + tstr, (long) tod_ns.tv_sec, (int) tod_ns.tv_usec, + (long) clockid_ns.tv_sec, clockid_ns.tv_nsec, clockid_name(clockid)); } diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index cce7a76d6473..7f09cdaf5b60 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -983,7 +983,7 @@ static void print_interval(struct perf_stat_config *config, if (config->interval_clear) puts(CONSOLE_CLEAR); - sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, config->csv_sep); + sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep); if ((num_print_interval == 0 && !config->csv_output) || config->interval_clear) { switch (config->aggr_mode) { From 762323eb39a257c3b9875172d5ee134bd448692c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 24 Feb 2021 16:08:31 +0100 Subject: [PATCH 08/44] perf build: Move feature cleanup under tools/build Arnaldo reported issue for following build command: $ rm -rf /tmp/krava; mkdir /tmp/krava; make O=/tmp/krava clean CLEAN config /bin/sh: line 0: cd: /tmp/krava/feature/: No such file or directory ../../scripts/Makefile.include:17: *** output directory "/tmp/krava/feature/" does not exist. Stop. make[1]: *** [Makefile.perf:1010: config-clean] Error 2 make: *** [Makefile:90: clean] Error 2 The problem is that now that we include scripts/Makefile.include in feature's Makefile (which is fine and needed), we need to ensure the OUTPUT directory exists, before executing (out of tree) clean command. Removing the feature's cleanup from perf Makefile and fixing feature's cleanup under build Makefile, so it now checks that there's existing OUTPUT directory before calling the clean. Fixes: 211a741cd3e1 ("tools: Factor Clang, LLC and LLVM utils definitions") Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: Sedat Dilek # LLVM/Clang v13-git Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210224150831.409639-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile | 8 +++++++- tools/perf/Makefile.perf | 10 +--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/tools/build/Makefile b/tools/build/Makefile index bae48e6fa995..5ed41b96fcde 100644 --- a/tools/build/Makefile +++ b/tools/build/Makefile @@ -30,12 +30,18 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj all: $(OUTPUT)fixdep +# Make sure there's anything to clean, +# feature contains check for existing OUTPUT +TMP_O := $(if $(OUTPUT),$(OUTPUT)/feature,./) + clean: $(call QUIET_CLEAN, fixdep) $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)rm -f $(OUTPUT)fixdep $(call QUIET_CLEAN, feature-detect) - $(Q)$(MAKE) -C feature/ clean >/dev/null +ifneq ($(wildcard $(TMP_O)),) + $(Q)$(MAKE) -C feature OUTPUT=$(TMP_O) clean >/dev/null +endif $(OUTPUT)fixdep-in.o: FORCE $(Q)$(MAKE) $(build)=fixdep diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 5345ac70cd83..536f6f90af92 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1001,14 +1001,6 @@ $(INSTALL_DOC_TARGETS): ### Cleaning rules -# -# This is here, not in Makefile.config, because Makefile.config does -# not get included for the clean target: -# -config-clean: - $(call QUIET_CLEAN, config) - $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null - python-clean: $(python-clean) @@ -1048,7 +1040,7 @@ endif # BUILD_BPF_SKEL bpf-skel-clean: $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS) -clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean config-clean fixdep-clean python-clean bpf-skel-clean +clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected From 2b1919ec8338fad3e950f264c0c81f8b17eb6c7e Mon Sep 17 00:00:00 2001 From: Andreas Wendleder Date: Mon, 1 Mar 2021 19:56:42 +0100 Subject: [PATCH 09/44] perf tools: Clean 'generated' directory used for creating the syscall table on x86 Remove generated directory tools/perf/arch/x86/include/generated. Signed-off-by: Andreas Wendleder Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210301185642.163396-1-gonsolo@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/Makefile | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 8cc6642fce7a..5a9f9a7bf07d 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -10,10 +10,11 @@ PERF_HAVE_JITDUMP := 1 # Syscall table generation # -out := $(OUTPUT)arch/x86/include/generated/asm -header := $(out)/syscalls_64.c -sys := $(srctree)/tools/perf/arch/x86/entry/syscalls -systbl := $(sys)/syscalltbl.sh +generated := $(OUTPUT)arch/x86/include/generated +out := $(generated)/asm +header := $(out)/syscalls_64.c +sys := $(srctree)/tools/perf/arch/x86/entry/syscalls +systbl := $(sys)/syscalltbl.sh # Create output directory if not already present _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') @@ -22,6 +23,6 @@ $(header): $(sys)/syscall_64.tbl $(systbl) $(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@ clean:: - $(call QUIET_CLEAN, x86) $(RM) $(header) + $(call QUIET_CLEAN, x86) $(RM) -r $(header) $(generated) archheaders: $(header) From ffc52b7ae5e6ff2b57c05fa8954fd4cae4efaab4 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 2 Mar 2021 02:35:33 +0000 Subject: [PATCH 10/44] perf diff: Don't crash on freeing errno-session on the error path __cmd_diff() sets result of perf_session__new() to d->session. In case of failure, it's errno and perf-diff may crash with: failed to open perf.data: Permission denied Failed to open perf.data Segmentation fault (core dumped) From the coredump: 0 0x00005569a62b5955 in auxtrace__free (session=0xffffffffffffffff) at util/auxtrace.c:2681 1 0x00005569a626b37d in perf_session__delete (session=0xffffffffffffffff) at util/session.c:295 2 perf_session__delete (session=0xffffffffffffffff) at util/session.c:291 3 0x00005569a618008a in __cmd_diff () at builtin-diff.c:1239 4 cmd_diff (argc=, argv=) at builtin-diff.c:2011 [..] Funny enough, it won't always crash. For me it crashes only if failed file is second in cmd-line: the reason is that cmd_diff() check files for branch-stacks [in check_file_brstack()] and if the first file doesn't have brstacks, it doesn't proceed to try open other files from cmd-line. Check d->session before calling perf_session__delete(). Another solution would be assigning to temporary variable, checking it, but I find it easier to follow with IS_ERR() check in the same function. After some time it's still obvious why the check is needed, and with temp variable it's possible to make the same mistake. Committer testing: $ perf record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (8 samples) ] $ perf diff failed to open perf.data.old: No such file or directory Failed to open perf.data.old $ perf record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (8 samples) ] $ perf diff # Event 'cycles:u' # # Baseline Delta Abs Shared Object Symbol # ........ ......... ................ .......................... # 0.92% +87.66% [unknown] [k] 0xffffffff8825de16 11.39% +0.04% ld-2.32.so [.] __GI___tunables_init 87.70% ld-2.32.so [.] _dl_check_map_versions $ sudo chown root:root perf.data [sudo] password for acme: $ perf diff failed to open perf.data: Permission denied Failed to open perf.data Segmentation fault (core dumped) $ After the patch: $ perf diff failed to open perf.data: Permission denied Failed to open perf.data $ Signed-off-by: Dmitry Safonov Acked-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: dmitry safonov Link: http://lore.kernel.org/lkml/20210302023533.1572231-1-dima@arista.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 8f6c784ce629..878e04b1fab7 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1236,7 +1236,8 @@ static int __cmd_diff(void) out_delete: data__for_each_file(i, d) { - perf_session__delete(d->session); + if (!IS_ERR(d->session)) + perf_session__delete(d->session); data__free(d); } From 394e4306b093d037bddcee7e1f0e8e6c53a558fc Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 25 Feb 2021 11:50:02 -0500 Subject: [PATCH 11/44] perf bench numa: Fix the condition checks for max number of NUMA nodes In systems having higher node numbers available like node 255, perf numa bench will fail with SIGABORT. <<>> perf: bench/numa.c:1416: init: Assertion `!(g->p.nr_nodes > 64 || g->p.nr_nodes < 0)' failed. Aborted (core dumped) <<>> Snippet from 'numactl -H' below on a powerpc system where the highest node number available is 255: available: 6 nodes (0,8,252-255) node 0 cpus: node 0 size: 519587 MB node 0 free: 516659 MB node 8 cpus: node 8 size: 523607 MB node 8 free: 486757 MB node 252 cpus: node 252 size: 0 MB node 252 free: 0 MB node 253 cpus: node 253 size: 0 MB node 253 free: 0 MB node 254 cpus: node 254 size: 0 MB node 254 free: 0 MB node 255 cpus: node 255 size: 0 MB node 255 free: 0 MB node distances: node 0 8 252 253 254 255 Note: expands to actual cpu list in the original output. These nodes 252-255 are to represent the memory on GPUs and are valid nodes. The perf numa bench init code has a condition check to see if the number of NUMA nodes (nr_nodes) exceeds MAX_NR_NODES. The value of MAX_NR_NODES defined in perf code is 64. And the 'nr_nodes' is the value from numa_max_node() which represents the highest node number available in the system. In some systems where we could have NUMA node 255, this condition check fails and results in SIGABORT. The numa benchmark uses static value of MAX_NR_NODES in the code to represent size of two NUMA node arrays and node bitmask used for setting memory policy. Patch adds a fix to dynamically allocate size for the two arrays and bitmask value based on the node numbers available in the system. With the fix, perf numa benchmark will work with node configuration on any system and thus removes the static MAX_NR_NODES value. Signed-off-by: Athira Jajeev Reviewed-by: Srikar Dronamraju Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: linuxppc-dev@lists.ozlabs.org Link: http://lore.kernel.org/lkml/1614271802-1503-1-git-send-email-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 42 ++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 11726ec6285f..20b87e29c96f 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -344,18 +344,22 @@ static void mempol_restore(void) static void bind_to_memnode(int node) { - unsigned long nodemask; + struct bitmask *node_mask; int ret; if (node == NUMA_NO_NODE) return; - BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8); - nodemask = 1L << node; + node_mask = numa_allocate_nodemask(); + BUG_ON(!node_mask); - ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8); - dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret); + numa_bitmask_clearall(node_mask); + numa_bitmask_setbit(node_mask, node); + ret = set_mempolicy(MPOL_BIND, node_mask->maskp, node_mask->size + 1); + dprintf("binding to node %d, mask: %016lx => %d\n", node, *node_mask->maskp, ret); + + numa_bitmask_free(node_mask); BUG_ON(ret); } @@ -876,8 +880,6 @@ static void update_curr_cpu(int task_nr, unsigned long bytes_worked) prctl(0, bytes_worked); } -#define MAX_NR_NODES 64 - /* * Count the number of nodes a process's threads * are spread out on. @@ -888,10 +890,15 @@ static void update_curr_cpu(int task_nr, unsigned long bytes_worked) */ static int count_process_nodes(int process_nr) { - char node_present[MAX_NR_NODES] = { 0, }; + char *node_present; int nodes; int n, t; + node_present = (char *)malloc(g->p.nr_nodes * sizeof(char)); + BUG_ON(!node_present); + for (nodes = 0; nodes < g->p.nr_nodes; nodes++) + node_present[nodes] = 0; + for (t = 0; t < g->p.nr_threads; t++) { struct thread_data *td; int task_nr; @@ -901,17 +908,20 @@ static int count_process_nodes(int process_nr) td = g->threads + task_nr; node = numa_node_of_cpu(td->curr_cpu); - if (node < 0) /* curr_cpu was likely still -1 */ + if (node < 0) /* curr_cpu was likely still -1 */ { + free(node_present); return 0; + } node_present[node] = 1; } nodes = 0; - for (n = 0; n < MAX_NR_NODES; n++) + for (n = 0; n < g->p.nr_nodes; n++) nodes += node_present[n]; + free(node_present); return nodes; } @@ -980,7 +990,7 @@ static void calc_convergence(double runtime_ns_max, double *convergence) { unsigned int loops_done_min, loops_done_max; int process_groups; - int nodes[MAX_NR_NODES]; + int *nodes; int distance; int nr_min; int nr_max; @@ -994,6 +1004,8 @@ static void calc_convergence(double runtime_ns_max, double *convergence) if (!g->p.show_convergence && !g->p.measure_convergence) return; + nodes = (int *)malloc(g->p.nr_nodes * sizeof(int)); + BUG_ON(!nodes); for (node = 0; node < g->p.nr_nodes; node++) nodes[node] = 0; @@ -1035,8 +1047,10 @@ static void calc_convergence(double runtime_ns_max, double *convergence) BUG_ON(sum > g->p.nr_tasks); - if (0 && (sum < g->p.nr_tasks)) + if (0 && (sum < g->p.nr_tasks)) { + free(nodes); return; + } /* * Count the number of distinct process groups present @@ -1088,6 +1102,8 @@ static void calc_convergence(double runtime_ns_max, double *convergence) } tprintf("\n"); } + + free(nodes); } static void show_summary(double runtime_ns_max, int l, double *convergence) @@ -1413,7 +1429,7 @@ static int init(void) g->p.nr_nodes = numa_max_node() + 1; /* char array in count_process_nodes(): */ - BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0); + BUG_ON(g->p.nr_nodes < 0); if (g->p.show_quiet && !g->p.show_details) g->p.show_details = -1; From 137a5258939aca56558f3a23eb229b9c4b293917 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 26 Feb 2021 14:14:31 -0800 Subject: [PATCH 12/44] perf traceevent: Ensure read cmdlines are null terminated. Issue detected by address sanitizer. Fixes: cd4ceb63438e9e28 ("perf util: Save pid-cmdline mapping into tracing header") Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210226221431.1985458-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/trace-event-read.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index f507dff713c9..8a01af783310 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -361,6 +361,7 @@ static int read_saved_cmdline(struct tep_handle *pevent) pr_debug("error reading saved cmdlines\n"); goto out; } + buf[ret] = '\0'; parse_saved_cmdline(pevent, buf, size); ret = 0; From b55ff1d1456c86209ba28fd06b1b5fb0e05d92c3 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 26 Feb 2021 10:31:44 -0800 Subject: [PATCH 13/44] perf tools: Fix documentation of verbose options Option doesn't take a value, make sure the man pages agree. For example: $ perf evlist --verbose=1 Error: option `verbose' takes no value Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210226183145.1878782-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-evlist.txt | 2 +- tools/perf/Documentation/perf-ftrace.txt | 4 ++-- tools/perf/Documentation/perf-kallsyms.txt | 2 +- tools/perf/Documentation/perf-trace.txt | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt index c0a66400a960..9af8b8dfb7b6 100644 --- a/tools/perf/Documentation/perf-evlist.txt +++ b/tools/perf/Documentation/perf-evlist.txt @@ -29,7 +29,7 @@ OPTIONS Show just the sample frequency used for each event. -v:: ---verbose=:: +--verbose:: Show all fields. -g:: diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 1e91121bac0f..6e82b7cc0bf0 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -28,8 +28,8 @@ OPTIONS specified: function_graph or function. -v:: ---verbose=:: - Verbosity level. +--verbose:: + Increase the verbosity level. -F:: --funcs:: diff --git a/tools/perf/Documentation/perf-kallsyms.txt b/tools/perf/Documentation/perf-kallsyms.txt index f3c620951f6e..c97527df8ecd 100644 --- a/tools/perf/Documentation/perf-kallsyms.txt +++ b/tools/perf/Documentation/perf-kallsyms.txt @@ -20,5 +20,5 @@ modules). OPTIONS ------- -v:: ---verbose=:: +--verbose:: Increase verbosity level, showing details about symbol table loading, etc. diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index abc9b5d83312..f0da8cf63e9a 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -97,8 +97,8 @@ filter out the startup phase of the program, which is often very different. Filter out events for these pids and for 'trace' itself (comma separated list). -v:: ---verbose=:: - Verbosity level. +--verbose:: + Increase the verbosity level. --no-inherit:: Child tasks do not inherit counters. From dacfc08dcafa7d443ab339592999e37bbb8a3ef0 Mon Sep 17 00:00:00 2001 From: Antonio Terceiro Date: Wed, 24 Feb 2021 10:00:46 -0300 Subject: [PATCH 14/44] perf build: Fix ccache usage in $(CC) when generating arch errno table This was introduced by commit e4ffd066ff440a57 ("perf: Normalize gcc parameter when generating arch errno table"). Assuming the first word of $(CC) is the actual compiler breaks usage like CC="ccache gcc": the script ends up calling ccache directly with gcc arguments, what fails. Instead of getting the first word, just remove from $(CC) any word that starts with a "-". This maintains the spirit of the original patch, while not breaking ccache users. Fixes: e4ffd066ff440a57 ("perf: Normalize gcc parameter when generating arch errno table") Signed-off-by: Antonio Terceiro Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: He Zhe Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20210224130046.346977-1-antonio.terceiro@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 536f6f90af92..f6e609673de2 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -607,7 +607,7 @@ arch_errno_hdr_dir := $(srctree)/tools arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh $(arch_errno_name_array): $(arch_errno_tbl) - $(Q)$(SHELL) '$(arch_errno_tbl)' $(firstword $(CC)) $(arch_errno_hdr_dir) > $@ + $(Q)$(SHELL) '$(arch_errno_tbl)' '$(patsubst -%,,$(CC))' $(arch_errno_hdr_dir) > $@ sync_file_range_arrays := $(beauty_outdir)/sync_file_range_arrays.c sync_file_range_tbls := $(srctree)/tools/perf/trace/beauty/sync_file_range.sh From 31bf4e7cb61363b87f1606ec8efb71eebd6393cf Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 1 Mar 2021 13:25:09 +0100 Subject: [PATCH 15/44] perf daemon: Fix control fifo permissions Add proper mode for mkfifo calls to get read and write permissions for user. We can't use O_RDWR in here, changing to standard permission value. Fixes: 6a6d1804a190 ("perf daemon: Set control fifo for session") Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: John Garry Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210301122510.64402-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-daemon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index 617feaf020f6..8f0ed2e59280 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -373,12 +373,12 @@ static int daemon_session__run(struct daemon_session *session, dup2(fd, 2); close(fd); - if (mkfifo(SESSION_CONTROL, O_RDWR) && errno != EEXIST) { + if (mkfifo(SESSION_CONTROL, 0600) && errno != EEXIST) { perror("failed: create control fifo"); return -1; } - if (mkfifo(SESSION_ACK, O_RDWR) && errno != EEXIST) { + if (mkfifo(SESSION_ACK, 0600) && errno != EEXIST) { perror("failed: create ack fifo"); return -1; } From 36bc511f63fd21c0c44f973c6d064c1228ba15ae Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 1 Mar 2021 13:25:10 +0100 Subject: [PATCH 16/44] perf daemon: Fix running test for non root user John reported that the daemon test is not working for non root user. Changing the tests configurations so it's allowed to run under normal user. Fixes: 2291bb915b55 ("perf tests: Add daemon 'list' command test") Reported-by: John Garry Signed-off-by: Jiri Olsa Tested-by: John Garry Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210301122510.64402-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/daemon.sh | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/perf/tests/shell/daemon.sh b/tools/perf/tests/shell/daemon.sh index e5b824dd08d9..5ad3ca8d681b 100755 --- a/tools/perf/tests/shell/daemon.sh +++ b/tools/perf/tests/shell/daemon.sh @@ -140,10 +140,10 @@ test_list() base=BASE [session-size] -run = -e cpu-clock +run = -e cpu-clock -m 1 sleep 10 [session-time] -run = -e task-clock +run = -e task-clock -m 1 sleep 10 EOF sed -i -e "s|BASE|${base}|" ${config} @@ -159,14 +159,14 @@ EOF # check 1st session # pid:size:-e cpu-clock:base/size:base/size/output:base/size/control:base/size/ack:0 local line=`perf daemon --config ${config} -x: | head -2 | tail -1` - check_line_other "${line}" size "-e cpu-clock" ${base}/session-size \ + check_line_other "${line}" size "-e cpu-clock -m 1 sleep 10" ${base}/session-size \ ${base}/session-size/output ${base}/session-size/control \ ${base}/session-size/ack "0" # check 2nd session # pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0 local line=`perf daemon --config ${config} -x: | head -3 | tail -1` - check_line_other "${line}" time "-e task-clock" ${base}/session-time \ + check_line_other "${line}" time "-e task-clock -m 1 sleep 10" ${base}/session-time \ ${base}/session-time/output ${base}/session-time/control \ ${base}/session-time/ack "0" @@ -190,10 +190,10 @@ test_reconfig() base=BASE [session-size] -run = -e cpu-clock +run = -e cpu-clock -m 1 sleep 10 [session-time] -run = -e task-clock +run = -e task-clock -m 1 sleep 10 EOF sed -i -e "s|BASE|${base}|" ${config} @@ -204,7 +204,7 @@ EOF # check 2nd session # pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0 local line=`perf daemon --config ${config} -x: | head -3 | tail -1` - check_line_other "${line}" time "-e task-clock" ${base}/session-time \ + check_line_other "${line}" time "-e task-clock -m 1 sleep 10" ${base}/session-time \ ${base}/session-time/output ${base}/session-time/control ${base}/session-time/ack "0" local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'` @@ -215,10 +215,10 @@ EOF base=BASE [session-size] -run = -e cpu-clock +run = -e cpu-clock -m 1 sleep 10 [session-time] -run = -e cpu-clock +run = -e cpu-clock -m 1 sleep 10 EOF # TEST 1 - change config @@ -238,7 +238,7 @@ EOF # check reconfigured 2nd session # pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0 local line=`perf daemon --config ${config} -x: | head -3 | tail -1` - check_line_other "${line}" time "-e cpu-clock" ${base}/session-time \ + check_line_other "${line}" time "-e cpu-clock -m 1 sleep 10" ${base}/session-time \ ${base}/session-time/output ${base}/session-time/control ${base}/session-time/ack "0" # TEST 2 - empty config @@ -309,10 +309,10 @@ test_stop() base=BASE [session-size] -run = -e cpu-clock +run = -e cpu-clock -m 1 sleep 10 [session-time] -run = -e task-clock +run = -e task-clock -m 1 sleep 10 EOF sed -i -e "s|BASE|${base}|" ${config} @@ -361,7 +361,7 @@ test_signal() base=BASE [session-test] -run = -e cpu-clock --switch-output +run = -e cpu-clock --switch-output -m 1 sleep 10 EOF sed -i -e "s|BASE|${base}|" ${config} @@ -400,10 +400,10 @@ test_ping() base=BASE [session-size] -run = -e cpu-clock +run = -e cpu-clock -m 1 sleep 10 [session-time] -run = -e task-clock +run = -e task-clock -m 1 sleep 10 EOF sed -i -e "s|BASE|${base}|" ${config} @@ -439,7 +439,7 @@ test_lock() base=BASE [session-size] -run = -e cpu-clock +run = -e cpu-clock -m 1 sleep 10 EOF sed -i -e "s|BASE|${base}|" ${config} From 84ea603650ec41273cc97d50eb01feed8e6baa2e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 1 Mar 2021 13:23:15 +0100 Subject: [PATCH 17/44] perf tools: Fix event's PMU name parsing Jin Yao reported parser error for software event: # perf stat -e software/r1a/ -a -- sleep 1 event syntax error: 'software/r1a/' \___ parser error This happens after commit 8c3b1ba0e7ea9a80 ("drm/i915/gt: Track the overall awake/busy time"), where new software-gt-awake-time event's non-pmu-event-style makes event parser conflict with software PMU. If we allow PE_PMU_EVENT_PRE to be parsed as PMU name, we fix the conflict and the following character '/' for PMU or '-' for non-pmu-event-style event allows parser to decide what even is specified. Fixes: 8c3b1ba0e7ea9a80 ("drm/i915/gt: Track the overall awake/busy time") Reported-by: Jin Yao Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Chris Wilson Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210301122315.63471-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index d5b6aff82f21..d57ac86ce7ca 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -89,6 +89,7 @@ static void inc_group_count(struct list_head *list, %type PE_EVENT_NAME %type PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE %type PE_DRV_CFG_TERM +%type event_pmu_name %destructor { free ($$); } %type event_term %destructor { parse_events_term__delete ($$); } @@ -272,8 +273,11 @@ event_def: event_pmu | event_legacy_raw sep_dc | event_bpf_file +event_pmu_name: +PE_NAME | PE_PMU_EVENT_PRE + event_pmu: -PE_NAME opt_pmu_config +event_pmu_name opt_pmu_config { struct parse_events_state *parse_state = _parse_state; struct parse_events_error *error = parse_state->error; From b0faef924d21d0a4592ec81c4bc2b4badc35a343 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:03:59 +0900 Subject: [PATCH 18/44] perf test: Fix cpu and thread map leaks in basic mmap test The evlist has the maps with its own refcounts so we don't need to set the pointers to NULL. Otherwise following error was reported by Asan. # perf test -v 4 4: Read samples using the mmap interface : --- start --- test child forked, pid 139782 mmap size 528384B ================================================================= ==139782==ERROR: LeakSanitizer: detected memory leaks Direct leak of 40 byte(s) in 1 object(s) allocated from: #0 0x7f1f76daee8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x564ba21a0fea in cpu_map__trim_new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:79 #2 0x564ba21a1a0f in perf_cpu_map__read /home/namhyung/project/linux/tools/lib/perf/cpumap.c:149 #3 0x564ba21a21cf in cpu_map__read_all_cpu_map /home/namhyung/project/linux/tools/lib/perf/cpumap.c:166 #4 0x564ba21a21cf in perf_cpu_map__new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:181 #5 0x564ba1e48298 in test__basic_mmap tests/mmap-basic.c:55 #6 0x564ba1e278fb in run_test tests/builtin-test.c:428 #7 0x564ba1e278fb in test_and_print tests/builtin-test.c:458 #8 0x564ba1e29a53 in __cmd_test tests/builtin-test.c:679 #9 0x564ba1e29a53 in cmd_test tests/builtin-test.c:825 #10 0x564ba1e95cb4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #11 0x564ba1d1fa88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #12 0x564ba1d1fa88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #13 0x564ba1d1fa88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #14 0x7f1f768e4d09 in __libc_start_main ../csu/libc-start.c:308 ... test child finished with 1 ---- end ---- Read samples using the mmap interface: FAILED! failed to open shell test directory: /home/namhyung/libexec/perf-core/tests/shell Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Mark Rutland Cc: Stephane Eranian Cc: Ian Rogers Cc: Peter Zijlstra Cc: Adrian Hunter Cc: Ingo Molnar Cc: Leo Yan Cc: Andi Kleen Cc: Alexander Shishkin Link: https://lore.kernel.org/r/20210301140409.184570-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/mmap-basic.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 57093aeacc6f..73ae8f7aa066 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -158,8 +158,6 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse out_delete_evlist: evlist__delete(evlist); - cpus = NULL; - threads = NULL; out_free_cpus: perf_cpu_map__put(cpus); out_free_threads: From 09a61c8f86aee7b9c514c6906244a22ec37ef028 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:00 +0900 Subject: [PATCH 19/44] perf test: Fix a memory leak in attr test The get_argv_exec_path() returns a dynamic memory so it should be freed after use. $ perf test -v 17 ... ==141682==ERROR: LeakSanitizer: detected memory leaks Direct leak of 33 byte(s) in 1 object(s) allocated from: #0 0x7f09107d2e8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x7f091035f6a7 in __vasprintf_internal libio/vasprintf.c:71 SUMMARY: AddressSanitizer: 33 byte(s) leaked in 1 allocation(s). Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index ec972e0892ab..dd39ce9b0277 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -182,14 +182,20 @@ int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused) struct stat st; char path_perf[PATH_MAX]; char path_dir[PATH_MAX]; + char *exec_path; /* First try development tree tests. */ if (!lstat("./tests", &st)) return run_dir("./tests", "./perf"); + exec_path = get_argv_exec_path(); + if (exec_path == NULL) + return -1; + /* Then installed path. */ - snprintf(path_dir, PATH_MAX, "%s/tests", get_argv_exec_path()); + snprintf(path_dir, PATH_MAX, "%s/tests", exec_path); snprintf(path_perf, PATH_MAX, "%s/perf", BINDIR); + free(exec_path); if (!lstat(path_dir, &st) && !lstat(path_perf, &st)) From 83d25ccde591fe2356ba336e994b190361158b1e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:01 +0900 Subject: [PATCH 20/44] perf test: Fix cpu and thread map leaks in task_exit test The evlist has the maps with its own refcounts so we don't need to set the pointers to NULL. Otherwise following error was reported by Asan. Also change the goto label since it doesn't need to have two. # perf test -v 24 24: Number of exit events of a simple workload : --- start --- test child forked, pid 145915 mmap size 528384B ================================================================= ==145915==ERROR: LeakSanitizer: detected memory leaks Direct leak of 32 byte(s) in 1 object(s) allocated from: #0 0x7fc44e50d1f8 in __interceptor_realloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:164 #1 0x561cf50f4d2e in perf_thread_map__realloc /home/namhyung/project/linux/tools/lib/perf/threadmap.c:23 #2 0x561cf4eeb949 in thread_map__new_by_tid util/thread_map.c:63 #3 0x561cf4db7fd2 in test__task_exit tests/task-exit.c:74 #4 0x561cf4d798fb in run_test tests/builtin-test.c:428 #5 0x561cf4d798fb in test_and_print tests/builtin-test.c:458 #6 0x561cf4d7ba53 in __cmd_test tests/builtin-test.c:679 #7 0x561cf4d7ba53 in cmd_test tests/builtin-test.c:825 #8 0x561cf4de7d04 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #9 0x561cf4c71a88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #10 0x561cf4c71a88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #11 0x561cf4c71a88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #12 0x7fc44e042d09 in __libc_start_main ../csu/libc-start.c:308 ... test child finished with 1 ---- end ---- Number of exit events of a simple workload: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/task-exit.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index bbf94e4aa145..4c2969db59b0 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -75,14 +75,11 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused if (!cpus || !threads) { err = -ENOMEM; pr_debug("Not enough memory to create thread/cpu maps\n"); - goto out_free_maps; + goto out_delete_evlist; } perf_evlist__set_maps(&evlist->core, cpus, threads); - cpus = NULL; - threads = NULL; - err = evlist__prepare_workload(evlist, &target, argv, false, workload_exec_failed_signal); if (err < 0) { pr_debug("Couldn't run the workload!\n"); @@ -137,7 +134,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused if (retry_count++ > 1000) { pr_debug("Failed after retrying 1000 times\n"); err = -1; - goto out_free_maps; + goto out_delete_evlist; } goto retry; @@ -148,10 +145,9 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused err = -1; } -out_free_maps: +out_delete_evlist: perf_cpu_map__put(cpus); perf_thread_map__put(threads); -out_delete_evlist: evlist__delete(evlist); return err; } From 97ab7c524fdcaf3098997f81bdf9d01157816f30 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:02 +0900 Subject: [PATCH 21/44] perf test: Fix cpu and thread map leaks in sw_clock_freq test The evlist has the maps with its own refcounts so we don't need to set the pointers to NULL. Otherwise following error was reported by Asan. Also change the goto label since it doesn't need to have two. # perf test -v 25 25: Software clock events period values : --- start --- test child forked, pid 149154 mmap size 528384B mmap size 528384B ================================================================= ==149154==ERROR: LeakSanitizer: detected memory leaks Direct leak of 32 byte(s) in 1 object(s) allocated from: #0 0x7fef5cd071f8 in __interceptor_realloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:164 #1 0x56260d5e8b8e in perf_thread_map__realloc /home/namhyung/project/linux/tools/lib/perf/threadmap.c:23 #2 0x56260d3df7a9 in thread_map__new_by_tid util/thread_map.c:63 #3 0x56260d2ac6b2 in __test__sw_clock_freq tests/sw-clock.c:65 #4 0x56260d26d8fb in run_test tests/builtin-test.c:428 #5 0x56260d26d8fb in test_and_print tests/builtin-test.c:458 #6 0x56260d26fa53 in __cmd_test tests/builtin-test.c:679 #7 0x56260d26fa53 in cmd_test tests/builtin-test.c:825 #8 0x56260d2dbb64 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #9 0x56260d165a88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #10 0x56260d165a88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #11 0x56260d165a88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #12 0x7fef5c83cd09 in __libc_start_main ../csu/libc-start.c:308 ... test child finished with 1 ---- end ---- Software clock events period values : FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sw-clock.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index a49c9e23053b..74988846be1d 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -42,8 +42,8 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) .disabled = 1, .freq = 1, }; - struct perf_cpu_map *cpus; - struct perf_thread_map *threads; + struct perf_cpu_map *cpus = NULL; + struct perf_thread_map *threads = NULL; struct mmap *md; attr.sample_freq = 500; @@ -66,14 +66,11 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) if (!cpus || !threads) { err = -ENOMEM; pr_debug("Not enough memory to create thread/cpu maps\n"); - goto out_free_maps; + goto out_delete_evlist; } perf_evlist__set_maps(&evlist->core, cpus, threads); - cpus = NULL; - threads = NULL; - if (evlist__open(evlist)) { const char *knob = "/proc/sys/kernel/perf_event_max_sample_rate"; @@ -129,10 +126,9 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) err = -1; } -out_free_maps: +out_delete_evlist: perf_cpu_map__put(cpus); perf_thread_map__put(threads); -out_delete_evlist: evlist__delete(evlist); return err; } From e06c3ca4922ccf24bd36c007a87f193b235cee93 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:03 +0900 Subject: [PATCH 22/44] perf test: Fix cpu and thread map leaks in code_reading test The evlist and the cpu/thread maps should be released together. Otherwise following error was reported by Asan. Note that this test still has memory leaks in DSOs so it still fails even after this change. I'll take a look at that too. # perf test -v 26 26: Object code reading : --- start --- test child forked, pid 154184 Looking at the vmlinux_path (8 entries long) symsrc__init: build id mismatch for vmlinux. symsrc__init: cannot get elf header. Using /proc/kcore for kernel data Using /proc/kallsyms for symbols Parsing event 'cycles' mmap size 528384B ... ================================================================= ==154184==ERROR: LeakSanitizer: detected memory leaks Direct leak of 439 byte(s) in 1 object(s) allocated from: #0 0x7fcb66e77037 in __interceptor_calloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:154 #1 0x55ad9b7e821e in dso__new_id util/dso.c:1256 #2 0x55ad9b8cfd4a in __machine__addnew_vdso util/vdso.c:132 #3 0x55ad9b8cfd4a in machine__findnew_vdso util/vdso.c:347 #4 0x55ad9b845b7e in map__new util/map.c:176 #5 0x55ad9b8415a2 in machine__process_mmap2_event util/machine.c:1787 #6 0x55ad9b8fab16 in perf_tool__process_synth_event util/synthetic-events.c:64 #7 0x55ad9b8fab16 in perf_event__synthesize_mmap_events util/synthetic-events.c:499 #8 0x55ad9b8fbfdf in __event__synthesize_thread util/synthetic-events.c:741 #9 0x55ad9b8ff3e3 in perf_event__synthesize_thread_map util/synthetic-events.c:833 #10 0x55ad9b738585 in do_test_code_reading tests/code-reading.c:608 #11 0x55ad9b73b25d in test__code_reading tests/code-reading.c:722 #12 0x55ad9b6f28fb in run_test tests/builtin-test.c:428 #13 0x55ad9b6f28fb in test_and_print tests/builtin-test.c:458 #14 0x55ad9b6f4a53 in __cmd_test tests/builtin-test.c:679 #15 0x55ad9b6f4a53 in cmd_test tests/builtin-test.c:825 #16 0x55ad9b760cc4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #17 0x55ad9b5eaa88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #18 0x55ad9b5eaa88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #19 0x55ad9b5eaa88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #20 0x7fcb669acd09 in __libc_start_main ../csu/libc-start.c:308 ... SUMMARY: AddressSanitizer: 471 byte(s) leaked in 2 allocation(s). test child finished with 1 ---- end ---- Object code reading: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/code-reading.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 280f0348a09c..2fdc7b2f996e 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -706,13 +706,9 @@ static int do_test_code_reading(bool try_kcore) out_put: thread__put(thread); out_err: - - if (evlist) { - evlist__delete(evlist); - } else { - perf_cpu_map__put(cpus); - perf_thread_map__put(threads); - } + evlist__delete(evlist); + perf_cpu_map__put(cpus); + perf_thread_map__put(threads); machine__delete_threads(machine); machine__delete(machine); From f2c3202ba0c7746c50c71c14d1ab977d929c0a27 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:04 +0900 Subject: [PATCH 23/44] perf test: Fix cpu and thread map leaks in keep_tracking test The evlist and the cpu/thread maps should be released together. Otherwise following error was reported by Asan. $ perf test -v 28 28: Use a dummy software event to keep tracking: --- start --- test child forked, pid 156810 mmap size 528384B ================================================================= ==156810==ERROR: LeakSanitizer: detected memory leaks Direct leak of 40 byte(s) in 1 object(s) allocated from: #0 0x7f637d2bce8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x55cc6295cffa in cpu_map__trim_new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:79 #2 0x55cc6295da1f in perf_cpu_map__read /home/namhyung/project/linux/tools/lib/perf/cpumap.c:149 #3 0x55cc6295e1df in cpu_map__read_all_cpu_map /home/namhyung/project/linux/tools/lib/perf/cpumap.c:166 #4 0x55cc6295e1df in perf_cpu_map__new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:181 #5 0x55cc626287cf in test__keep_tracking tests/keep-tracking.c:84 #6 0x55cc625e38fb in run_test tests/builtin-test.c:428 #7 0x55cc625e38fb in test_and_print tests/builtin-test.c:458 #8 0x55cc625e5a53 in __cmd_test tests/builtin-test.c:679 #9 0x55cc625e5a53 in cmd_test tests/builtin-test.c:825 #10 0x55cc62651cc4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #11 0x55cc624dba88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #12 0x55cc624dba88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #13 0x55cc624dba88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #14 0x7f637cdf2d09 in __libc_start_main ../csu/libc-start.c:308 SUMMARY: AddressSanitizer: 72 byte(s) leaked in 2 allocation(s). test child finished with 1 ---- end ---- Use a dummy software event to keep tracking: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/keep-tracking.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index e6f1b2a38e03..a0438b0f0805 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -154,10 +154,9 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un if (evlist) { evlist__disable(evlist); evlist__delete(evlist); - } else { - perf_cpu_map__put(cpus); - perf_thread_map__put(threads); } + perf_cpu_map__put(cpus); + perf_thread_map__put(threads); return err; } From 953e7b5960f1cf0825da60dbdc762e19b127a94c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:05 +0900 Subject: [PATCH 24/44] perf test: Fix cpu and thread map leaks in switch_tracking test The evlist and cpu/thread maps should be released together. Otherwise the following error was reported by Asan. $ perf test -v 35 35: Track with sched_switch : --- start --- test child forked, pid 159287 Using CPUID GenuineIntel-6-8E-C mmap size 528384B 1295 events recorded ================================================================= ==159287==ERROR: LeakSanitizer: detected memory leaks Direct leak of 40 byte(s) in 1 object(s) allocated from: #0 0x7fa28d9a2e8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x5652f5a5affa in cpu_map__trim_new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:79 #2 0x5652f5a5ba1f in perf_cpu_map__read /home/namhyung/project/linux/tools/lib/perf/cpumap.c:149 #3 0x5652f5a5c1df in cpu_map__read_all_cpu_map /home/namhyung/project/linux/tools/lib/perf/cpumap.c:166 #4 0x5652f5a5c1df in perf_cpu_map__new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:181 #5 0x5652f5723bbf in test__switch_tracking tests/switch-tracking.c:350 #6 0x5652f56e18fb in run_test tests/builtin-test.c:428 #7 0x5652f56e18fb in test_and_print tests/builtin-test.c:458 #8 0x5652f56e3a53 in __cmd_test tests/builtin-test.c:679 #9 0x5652f56e3a53 in cmd_test tests/builtin-test.c:825 #10 0x5652f574fcc4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #11 0x5652f55d9a88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #12 0x5652f55d9a88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #13 0x5652f55d9a88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #14 0x7fa28d4d8d09 in __libc_start_main ../csu/libc-start.c:308 SUMMARY: AddressSanitizer: 72 byte(s) leaked in 2 allocation(s). test child finished with 1 ---- end ---- Track with sched_switch: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-8-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/switch-tracking.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 15a2ab765d89..3ebaa758df77 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -574,10 +574,9 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ if (evlist) { evlist__disable(evlist); evlist__delete(evlist); - } else { - perf_cpu_map__put(cpus); - perf_thread_map__put(threads); } + perf_cpu_map__put(cpus); + perf_thread_map__put(threads); return err; From 4be42882e1f9c8a2d7d7bc066f420418f45b566c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:06 +0900 Subject: [PATCH 25/44] perf test: Fix a thread map leak in thread_map_synthesize test It missed to call perf_thread_map__put() after using the map. $ perf test -v 43 43: Synthesize thread map : --- start --- test child forked, pid 162640 ================================================================= ==162640==ERROR: LeakSanitizer: detected memory leaks Direct leak of 32 byte(s) in 1 object(s) allocated from: #0 0x7fd48cdaa1f8 in __interceptor_realloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:164 #1 0x563e6d5f8d0e in perf_thread_map__realloc /home/namhyung/project/linux/tools/lib/perf/threadmap.c:23 #2 0x563e6d3ef69a in thread_map__new_by_pid util/thread_map.c:46 #3 0x563e6d2cec90 in test__thread_map_synthesize tests/thread-map.c:97 #4 0x563e6d27d8fb in run_test tests/builtin-test.c:428 #5 0x563e6d27d8fb in test_and_print tests/builtin-test.c:458 #6 0x563e6d27fa53 in __cmd_test tests/builtin-test.c:679 #7 0x563e6d27fa53 in cmd_test tests/builtin-test.c:825 #8 0x563e6d2ebce4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #9 0x563e6d175a88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #10 0x563e6d175a88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #11 0x563e6d175a88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #12 0x7fd48c8dfd09 in __libc_start_main ../csu/libc-start.c:308 SUMMARY: AddressSanitizer: 8224 byte(s) leaked in 2 allocation(s). test child finished with 1 ---- end ---- Synthesize thread map: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-9-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/thread-map.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index 28f51c4bd373..9e1cf11149ef 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -102,6 +102,7 @@ int test__thread_map_synthesize(struct test *test __maybe_unused, int subtest __ TEST_ASSERT_VAL("failed to synthesize map", !perf_event__synthesize_thread_map2(NULL, threads, process_event, NULL)); + perf_thread_map__put(threads); return 0; } From 641b6250337027311a09009e18264bb65c4d521c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:07 +0900 Subject: [PATCH 26/44] perf test: Fix a memory leak in thread_map_remove test The str should be freed after creating a thread map. Also change the open-coded thread map deletion to a call to perf_thread_map__put(). $ perf test -v 44 44: Remove thread map : --- start --- test child forked, pid 165536 2 threads: 165535, 165536 1 thread: 165536 0 thread: ================================================================= ==165536==ERROR: LeakSanitizer: detected memory leaks Direct leak of 14 byte(s) in 1 object(s) allocated from: #0 0x7f54453ffe8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x7f5444f8c6a7 in __vasprintf_internal libio/vasprintf.c:71 SUMMARY: AddressSanitizer: 14 byte(s) leaked in 1 allocation(s). test child finished with 1 ---- end ---- Remove thread map: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-10-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/thread-map.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index 9e1cf11149ef..d1e208b4a571 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -110,12 +110,12 @@ int test__thread_map_remove(struct test *test __maybe_unused, int subtest __mayb { struct perf_thread_map *threads; char *str; - int i; TEST_ASSERT_VAL("failed to allocate map string", asprintf(&str, "%d,%d", getpid(), getppid()) >= 0); threads = thread_map__new_str(str, NULL, 0, false); + free(str); TEST_ASSERT_VAL("failed to allocate thread_map", threads); @@ -142,9 +142,6 @@ int test__thread_map_remove(struct test *test __maybe_unused, int subtest __mayb TEST_ASSERT_VAL("failed to not remove thread", thread_map__remove(threads, 0)); - for (i = 0; i < threads->nr; i++) - zfree(&threads->map[i].comm); - - free(threads); + perf_thread_map__put(threads); return 0; } From 690d91f5ec388448f6c2e9e3a8b3da856f400311 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:08 +0900 Subject: [PATCH 27/44] perf test: Fix cpu map leaks in cpu_map_print test It should be released after printing the map. $ perf test -v 52 52: Print cpu map : --- start --- test child forked, pid 172233 ================================================================= ==172233==ERROR: LeakSanitizer: detected memory leaks Direct leak of 156 byte(s) in 1 object(s) allocated from: #0 0x7fc472518e8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x55e63b378f7a in cpu_map__trim_new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:79 #2 0x55e63b37a05c in perf_cpu_map__new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:237 #3 0x55e63b056d16 in cpu_map_print tests/cpumap.c:102 #4 0x55e63b056d16 in test__cpu_map_print tests/cpumap.c:120 #5 0x55e63afff8fb in run_test tests/builtin-test.c:428 #6 0x55e63afff8fb in test_and_print tests/builtin-test.c:458 #7 0x55e63b001a53 in __cmd_test tests/builtin-test.c:679 #8 0x55e63b001a53 in cmd_test tests/builtin-test.c:825 #9 0x55e63b06dc44 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #10 0x55e63aef7a88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #11 0x55e63aef7a88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #12 0x55e63aef7a88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #13 0x7fc47204ed09 in __libc_start_main ../csu/libc-start.c:308 ... SUMMARY: AddressSanitizer: 448 byte(s) leaked in 7 allocation(s). test child finished with 1 ---- end ---- Print cpu map: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-11-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/cpumap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 29c793ac7d10..0472b110fe65 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -106,6 +106,8 @@ static int cpu_map_print(const char *str) return -1; cpu_map__snprint(map, buf, sizeof(buf)); + perf_cpu_map__put(map); + return !strcmp(buf, str); } From 846580c235b3e2625ed494f654a28d235976d3b0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:09 +0900 Subject: [PATCH 28/44] perf test: Fix cpu and thread map leaks in perf_time_to_tsc test It should release the maps at the end. $ perf test -v 71 71: Convert perf time to TSC : --- start --- test child forked, pid 178744 mmap size 528384B 1st event perf time 59207256505278 tsc 13187166645142 rdtsc time 59207256542151 tsc 13187166723020 2nd event perf time 59207256543749 tsc 13187166726393 ================================================================= ==178744==ERROR: LeakSanitizer: detected memory leaks Direct leak of 40 byte(s) in 1 object(s) allocated from: #0 0x7faf601f9e8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x55b620cfc00a in cpu_map__trim_new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:79 #2 0x55b620cfca2f in perf_cpu_map__read /home/namhyung/project/linux/tools/lib/perf/cpumap.c:149 #3 0x55b620cfd1ef in cpu_map__read_all_cpu_map /home/namhyung/project/linux/tools/lib/perf/cpumap.c:166 #4 0x55b620cfd1ef in perf_cpu_map__new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:181 #5 0x55b6209ef1b2 in test__perf_time_to_tsc tests/perf-time-to-tsc.c:73 #6 0x55b6209828fb in run_test tests/builtin-test.c:428 #7 0x55b6209828fb in test_and_print tests/builtin-test.c:458 #8 0x55b620984a53 in __cmd_test tests/builtin-test.c:679 #9 0x55b620984a53 in cmd_test tests/builtin-test.c:825 #10 0x55b6209f0cd4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #11 0x55b62087aa88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #12 0x55b62087aa88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #13 0x55b62087aa88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #14 0x7faf5fd2fd09 in __libc_start_main ../csu/libc-start.c:308 SUMMARY: AddressSanitizer: 72 byte(s) leaked in 2 allocation(s). test child finished with 1 ---- end ---- Convert perf time to TSC: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-12-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/perf-time-to-tsc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index 7cff02664d0e..680c3cffb128 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -167,6 +167,8 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe out_err: evlist__delete(evlist); + perf_cpu_map__put(cpus); + perf_thread_map__put(threads); return err; } From 743108e1048ee73e0eda394597c1fc2ea46a599b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 2 Mar 2021 16:44:14 -0300 Subject: [PATCH 29/44] tools headers: Update syscall.tbl files to support mount_setattr To pick the changes from: 9caccd41541a6f7d ("fs: introduce MOUNT_ATTR_IDMAP") This adds this new syscall to the tables used by tools such as 'perf trace', so that one can specify it by name and have it filtered, etc. Addressing these perf build warnings: Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl' diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl Warning: Kernel ABI header at 'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version at 'arch/powerpc/kernel/syscalls/syscall.tbl' diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl Warning: Kernel ABI header at 'tools/perf/arch/s390/entry/syscalls/syscall.tbl' differs from latest version at 'arch/s390/kernel/syscalls/syscall.tbl' diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl Cc: Adrian Hunter Cc: Christian Brauner Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/YD6Wsxr9ByUbab/a@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/mount.h | 16 ++++++++++++++++ .../perf/arch/powerpc/entry/syscalls/syscall.tbl | 1 + tools/perf/arch/s390/entry/syscalls/syscall.tbl | 1 + .../perf/arch/x86/entry/syscalls/syscall_64.tbl | 1 + 4 files changed, 19 insertions(+) diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h index dd8306ea336c..e6524ead2b7b 100644 --- a/tools/include/uapi/linux/mount.h +++ b/tools/include/uapi/linux/mount.h @@ -1,6 +1,8 @@ #ifndef _UAPI_LINUX_MOUNT_H #define _UAPI_LINUX_MOUNT_H +#include + /* * These are the fs-independent mount-flags: up to 32 flags are supported * @@ -117,5 +119,19 @@ enum fsconfig_command { #define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */ #define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ #define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ +#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */ + +/* + * mount_setattr() + */ +struct mount_attr { + __u64 attr_set; + __u64 attr_clr; + __u64 propagation; + __u64 userns_fd; +}; + +/* List of all mount_attr versions. */ +#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */ #endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 96b2157f0371..0b2480cf3e47 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -521,3 +521,4 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index d443423495e5..3abef2144dac 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -444,3 +444,4 @@ 439 common faccessat2 sys_faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr sys_mount_setattr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 78672124d28b..7bf01cbe582f 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -363,6 +363,7 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr # # Due to a historical design error, certain syscalls are numbered differently From 6c0afc579aff90e84736d35ee35a1945ec0f279f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 2 Mar 2021 17:10:52 -0300 Subject: [PATCH 30/44] tools headers UAPI: Update tools' copy of linux/coresight-pmu.h To get the changes in these commits: 88f11864cf1d1324 ("coresight: etm-perf: Support PID tracing for kernel at EL2") 53abf3fe83175626 ("coresight: etm-perf: Clarify comment on perf options") This will possibly be used in patches lined up for v5.13. And silence this perf build warning: Warning: Kernel ABI header at 'tools/include/linux/coresight-pmu.h' differs from latest version at 'include/linux/coresight-pmu.h' diff -u tools/include/linux/coresight-pmu.h include/linux/coresight-pmu.h Cc: Greg Kroah-Hartman Cc: Leo Yan Cc: Mathieu Poirier Cc: Mike Leach Cc: Suzuki K Poulose Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/coresight-pmu.h | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index b0e35eec6499..4ac5c081af93 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -10,17 +10,27 @@ #define CORESIGHT_ETM_PMU_NAME "cs_etm" #define CORESIGHT_ETM_PMU_SEED 0x10 -/* ETMv3.5/PTM's ETMCR config bit */ -#define ETM_OPT_CYCACC 12 -#define ETM_OPT_CTXTID 14 -#define ETM_OPT_TS 28 -#define ETM_OPT_RETSTK 29 +/* + * Below are the definition of bit offsets for perf option, and works as + * arbitrary values for all ETM versions. + * + * Most of them are orignally from ETMv3.5/PTM's ETMCR config, therefore, + * ETMv3.5/PTM doesn't define ETMCR config bits with prefix "ETM3_" and + * directly use below macros as config bits. + */ +#define ETM_OPT_CYCACC 12 +#define ETM_OPT_CTXTID 14 +#define ETM_OPT_CTXTID2 15 +#define ETM_OPT_TS 28 +#define ETM_OPT_RETSTK 29 /* ETMv4 CONFIGR programming bits for the ETM OPTs */ #define ETM4_CFG_BIT_CYCACC 4 #define ETM4_CFG_BIT_CTXTID 6 +#define ETM4_CFG_BIT_VMID 7 #define ETM4_CFG_BIT_TS 11 #define ETM4_CFG_BIT_RETSTK 12 +#define ETM4_CFG_BIT_VMID_OPT 15 static inline int coresight_get_trace_id(int cpu) { From 1a9bcadd0058a3e81c1beca48e5e08dee9446a01 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 2 Mar 2021 17:16:17 -0300 Subject: [PATCH 31/44] tools headers cpufeatures: Sync with the kernel sources To pick the changes from: 3b9c723ed7cfa4e1 ("KVM: SVM: Add support for SVM instruction address check change") b85a0425d8056f3b ("Enumerate AVX Vector Neural Network instructions") fb35d30fe5b06cc2 ("x86/cpufeatures: Assign dedicated feature word for CPUID_0x8000001F[EAX]") This only causes these perf files to be rebuilt: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And addresses this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Cc: Borislav Petkov Cc: Kyung Min Park Cc: Paolo Bonzini Cc: Sean Christopherson Cc: Wei Huang Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 84b887825f12..cc96e26d69f7 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 19 /* N 32-bit words worth of info */ +#define NCAPINTS 20 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -96,7 +96,7 @@ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -#define X86_FEATURE_SME_COHERENT ( 3*32+17) /* "" AMD hardware-enforced cache coherency */ +/* FREE! ( 3*32+17) */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ @@ -201,7 +201,7 @@ #define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ +/* FREE! ( 7*32+10) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ @@ -211,7 +211,7 @@ #define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ +/* FREE! ( 7*32+20) */ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ @@ -236,8 +236,6 @@ #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ -#define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */ -#define X86_FEATURE_VM_PAGE_FLUSH ( 8*32+21) /* "" VM Page Flush MSR is supported */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -294,6 +292,7 @@ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ +#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ @@ -337,6 +336,7 @@ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ #define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ +#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ @@ -385,6 +385,13 @@ #define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */ #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ +/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ +#define X86_FEATURE_SME (19*32+ 0) /* AMD Secure Memory Encryption */ +#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */ +#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ + /* * BUG word(s) */ From 33dc525f93216bc83935ce98518644def04d6c54 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 2 Mar 2021 17:20:08 -0300 Subject: [PATCH 32/44] tools headers UAPI: Sync KVM's kvm.h and vmx.h headers with the kernel sources To pick the changes in: fe6b6bc802b40081 ("KVM: VMX: Enable bus lock VM exit") That makes 'perf kvm-stat' aware of this new BUS_LOCK exit reason, thus addressing the following perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/vmx.h' differs from latest version at 'arch/x86/include/uapi/asm/vmx.h' diff -u tools/arch/x86/include/uapi/asm/vmx.h arch/x86/include/uapi/asm/vmx.h Cc: Chenyi Qiang Cc: Paolo Bonzini Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/kvm.h | 1 + tools/arch/x86/include/uapi/asm/vmx.h | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 8e76d3701db3..5a3022c8af82 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -112,6 +112,7 @@ struct kvm_ioapic_state { #define KVM_NR_IRQCHIPS 3 #define KVM_RUN_X86_SMM (1 << 0) +#define KVM_RUN_X86_BUS_LOCK (1 << 1) /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index ada955c5ebb6..b8e650a985e3 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -89,6 +89,7 @@ #define EXIT_REASON_XRSTORS 64 #define EXIT_REASON_UMWAIT 67 #define EXIT_REASON_TPAUSE 68 +#define EXIT_REASON_BUS_LOCK 74 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -150,7 +151,8 @@ { EXIT_REASON_XSAVES, "XSAVES" }, \ { EXIT_REASON_XRSTORS, "XRSTORS" }, \ { EXIT_REASON_UMWAIT, "UMWAIT" }, \ - { EXIT_REASON_TPAUSE, "TPAUSE" } + { EXIT_REASON_TPAUSE, "TPAUSE" }, \ + { EXIT_REASON_BUS_LOCK, "BUS_LOCK" } #define VMX_EXIT_REASON_FLAGS \ { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" } From 034f7ee130c19b7b04347238395cff1f402198c3 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 28 Jan 2021 09:34:17 +0800 Subject: [PATCH 33/44] perf stat: Fix wrong skipping for per-die aggregation Uncore becomes die-scope on Xeon Cascade Lake-AP and perf has supported --per-die aggregation yet. One issue is found in check_per_pkg() for uncore events running on AP system. On cascade Lake-AP, we have: S0-D0 S0-D1 S1-D0 S1-D1 But in check_per_pkg(), S0-D1 and S1-D1 are skipped because the mask bits for S0 and S1 have been set for S0-D0 and S1-D0. It doesn't check die_id. So the counting for S0-D1 and S1-D1 are set to zero. That's not correct. root@lkp-csl-2ap4 ~# ./perf stat -a -I 1000 -e llc_misses.mem_read --per-die -- sleep 5 1.001460963 S0-D0 1 1317376 Bytes llc_misses.mem_read 1.001460963 S0-D1 1 998016 Bytes llc_misses.mem_read 1.001460963 S1-D0 1 970496 Bytes llc_misses.mem_read 1.001460963 S1-D1 1 1291264 Bytes llc_misses.mem_read 2.003488021 S0-D0 1 1082048 Bytes llc_misses.mem_read 2.003488021 S0-D1 1 1919040 Bytes llc_misses.mem_read 2.003488021 S1-D0 1 890752 Bytes llc_misses.mem_read 2.003488021 S1-D1 1 2380800 Bytes llc_misses.mem_read 3.005613270 S0-D0 1 1126080 Bytes llc_misses.mem_read 3.005613270 S0-D1 1 2898176 Bytes llc_misses.mem_read 3.005613270 S1-D0 1 870912 Bytes llc_misses.mem_read 3.005613270 S1-D1 1 3388608 Bytes llc_misses.mem_read 4.007627598 S0-D0 1 1124608 Bytes llc_misses.mem_read 4.007627598 S0-D1 1 3884416 Bytes llc_misses.mem_read 4.007627598 S1-D0 1 921088 Bytes llc_misses.mem_read 4.007627598 S1-D1 1 4451840 Bytes llc_misses.mem_read 5.001479927 S0-D0 1 963328 Bytes llc_misses.mem_read 5.001479927 S0-D1 1 4831936 Bytes llc_misses.mem_read 5.001479927 S1-D0 1 895104 Bytes llc_misses.mem_read 5.001479927 S1-D1 1 5496640 Bytes llc_misses.mem_read From above output, we can see S0-D1 and S1-D1 don't report the interval values, they are continued to grow. That's because check_per_pkg() wrongly decides to use zero counts for S0-D1 and S1-D1. So in check_per_pkg(), we should use hashmap(socket,die) to decide if the cpu counts needs to skip. Only considering socket is not enough. Now with this patch, root@lkp-csl-2ap4 ~# ./perf stat -a -I 1000 -e llc_misses.mem_read --per-die -- sleep 5 1.001586691 S0-D0 1 1229440 Bytes llc_misses.mem_read 1.001586691 S0-D1 1 976832 Bytes llc_misses.mem_read 1.001586691 S1-D0 1 938304 Bytes llc_misses.mem_read 1.001586691 S1-D1 1 1227328 Bytes llc_misses.mem_read 2.003776312 S0-D0 1 1586752 Bytes llc_misses.mem_read 2.003776312 S0-D1 1 875392 Bytes llc_misses.mem_read 2.003776312 S1-D0 1 855616 Bytes llc_misses.mem_read 2.003776312 S1-D1 1 949376 Bytes llc_misses.mem_read 3.006512788 S0-D0 1 1338880 Bytes llc_misses.mem_read 3.006512788 S0-D1 1 920064 Bytes llc_misses.mem_read 3.006512788 S1-D0 1 877184 Bytes llc_misses.mem_read 3.006512788 S1-D1 1 1020736 Bytes llc_misses.mem_read 4.008895291 S0-D0 1 926592 Bytes llc_misses.mem_read 4.008895291 S0-D1 1 906368 Bytes llc_misses.mem_read 4.008895291 S1-D0 1 892224 Bytes llc_misses.mem_read 4.008895291 S1-D1 1 987712 Bytes llc_misses.mem_read 5.001590993 S0-D0 1 962624 Bytes llc_misses.mem_read 5.001590993 S0-D1 1 912512 Bytes llc_misses.mem_read 5.001590993 S1-D0 1 891200 Bytes llc_misses.mem_read 5.001590993 S1-D1 1 978432 Bytes llc_misses.mem_read On no-die system, die_id is 0, actually it's hashmap(socket,0), original behavior is not changed. Reported-by: Ying Huang Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Cc: Ying Huang Link: http://lore.kernel.org/lkml/20210128013417.25597-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 18 +++++++++++- tools/perf/util/evsel.h | 4 ++- tools/perf/util/python-ext-sources | 1 + tools/perf/util/stat.c | 47 ++++++++++++++++++++++++------ 4 files changed, 59 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1bf76864c4f2..7ecbc8e2fbfa 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -46,6 +46,7 @@ #include "string2.h" #include "memswap.h" #include "util.h" +#include "hashmap.h" #include "../perf-sys.h" #include "util/parse-branch-options.h" #include @@ -1390,7 +1391,9 @@ void evsel__exit(struct evsel *evsel) zfree(&evsel->group_name); zfree(&evsel->name); zfree(&evsel->pmu_name); - zfree(&evsel->per_pkg_mask); + evsel__zero_per_pkg(evsel); + hashmap__free(evsel->per_pkg_mask); + evsel->per_pkg_mask = NULL; zfree(&evsel->metric_events); perf_evsel__object.fini(evsel); } @@ -2781,3 +2784,16 @@ int evsel__store_ids(struct evsel *evsel, struct evlist *evlist) return store_evsel_ids(evsel, evlist); } + +void evsel__zero_per_pkg(struct evsel *evsel) +{ + struct hashmap_entry *cur; + size_t bkt; + + if (evsel->per_pkg_mask) { + hashmap__for_each_entry(evsel->per_pkg_mask, cur, bkt) + free((char *)cur->key); + + hashmap__clear(evsel->per_pkg_mask); + } +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 4e8e49fb7e9d..6026487353dd 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -19,6 +19,7 @@ struct perf_stat_evsel; union perf_event; struct bpf_counter_ops; struct target; +struct hashmap; typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); @@ -112,7 +113,7 @@ struct evsel { bool merged_stat; bool reset_group; bool errored; - unsigned long *per_pkg_mask; + struct hashmap *per_pkg_mask; struct evsel *leader; struct list_head config_terms; int err; @@ -433,4 +434,5 @@ struct perf_env *evsel__env(struct evsel *evsel); int evsel__store_ids(struct evsel *evsel, struct evlist *evlist); +void evsel__zero_per_pkg(struct evsel *evsel); #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 71b753523fac..845dd46e3c61 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -36,3 +36,4 @@ util/symbol_fprintf.c util/units.c util/affinity.c util/rwsem.c +util/hashmap.c diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 5d8af29447f4..c400f8dde017 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -13,6 +13,7 @@ #include "evlist.h" #include "evsel.h" #include "thread_map.h" +#include "hashmap.h" #include void update_stats(struct stats *stats, u64 val) @@ -277,18 +278,29 @@ void evlist__save_aggr_prev_raw_counts(struct evlist *evlist) } } -static void zero_per_pkg(struct evsel *counter) +static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused) { - if (counter->per_pkg_mask) - memset(counter->per_pkg_mask, 0, cpu__max_cpu()); + uint64_t *key = (uint64_t *) __key; + + return *key & 0xffffffff; +} + +static bool pkg_id_equal(const void *__key1, const void *__key2, + void *ctx __maybe_unused) +{ + uint64_t *key1 = (uint64_t *) __key1; + uint64_t *key2 = (uint64_t *) __key2; + + return *key1 == *key2; } static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, int cpu, bool *skip) { - unsigned long *mask = counter->per_pkg_mask; + struct hashmap *mask = counter->per_pkg_mask; struct perf_cpu_map *cpus = evsel__cpus(counter); - int s; + int s, d, ret = 0; + uint64_t *key; *skip = false; @@ -299,7 +311,7 @@ static int check_per_pkg(struct evsel *counter, return 0; if (!mask) { - mask = zalloc(cpu__max_cpu()); + mask = hashmap__new(pkg_id_hash, pkg_id_equal, NULL); if (!mask) return -ENOMEM; @@ -321,8 +333,25 @@ static int check_per_pkg(struct evsel *counter, if (s < 0) return -1; - *skip = test_and_set_bit(s, mask) == 1; - return 0; + /* + * On multi-die system, die_id > 0. On no-die system, die_id = 0. + * We use hashmap(socket, die) to check the used socket+die pair. + */ + d = cpu_map__get_die(cpus, cpu, NULL).die; + if (d < 0) + return -1; + + key = malloc(sizeof(*key)); + if (!key) + return -ENOMEM; + + *key = (uint64_t)d << 32 | s; + if (hashmap__find(mask, (void *)key, NULL)) + *skip = true; + else + ret = hashmap__add(mask, (void *)key, (void *)1); + + return ret; } static int @@ -422,7 +451,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, } if (counter->per_pkg) - zero_per_pkg(counter); + evsel__zero_per_pkg(counter); ret = process_counter_maps(config, counter); if (ret) From e2a99c9a9aa02ddc7c08d5089ef140965879f8f4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2021 12:51:47 +0900 Subject: [PATCH 34/44] libperf: Add perf_evlist__reset_id_hash() Add the perf_evlist__reset_id_hash() function as an internal function so that it can be called by perf to reset the hash table. This is necessary for 'perf stat' to run the workload multiple times. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210225035148.778569-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 13 +++++++++---- tools/lib/perf/include/internal/evlist.h | 2 ++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 17465d454a0e..a0aaf385cbb5 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -26,13 +26,10 @@ void perf_evlist__init(struct perf_evlist *evlist) { - int i; - - for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) - INIT_HLIST_HEAD(&evlist->heads[i]); INIT_LIST_HEAD(&evlist->entries); evlist->nr_entries = 0; fdarray__init(&evlist->pollfd, 64); + perf_evlist__reset_id_hash(evlist); } static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, @@ -237,6 +234,14 @@ static void perf_evlist__id_hash(struct perf_evlist *evlist, hlist_add_head(&sid->node, &evlist->heads[hash]); } +void perf_evlist__reset_id_hash(struct perf_evlist *evlist) +{ + int i; + + for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) + INIT_HLIST_HEAD(&evlist->heads[i]); +} + void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, int cpu, int thread, u64 id) diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index 2d0fa02b036f..212c29063ad4 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -124,4 +124,6 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist, struct perf_evsel *evsel, int cpu, int thread, int fd); +void perf_evlist__reset_id_hash(struct perf_evlist *evlist); + #endif /* __LIBPERF_INTERNAL_EVLIST_H */ From 513068f2b1fe39a60d89f6f8afbdd79c2534889c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2021 12:51:48 +0900 Subject: [PATCH 35/44] perf stat: Fix use-after-free when -r option is used I got a segfault when using -r option with event groups. The option makes it run the workload multiple times and it will reuse the evlist and evsel for each run. While most of resources are allocated and freed properly, the id hash in the evlist was not and it resulted in the bug. You can see it with the address sanitizer like below: $ perf stat -r 100 -e '{cycles,instructions}' true ================================================================= ==693052==ERROR: AddressSanitizer: heap-use-after-free on address 0x6080000003d0 at pc 0x558c57732835 bp 0x7fff1526adb0 sp 0x7fff1526ada8 WRITE of size 8 at 0x6080000003d0 thread T0 #0 0x558c57732834 in hlist_add_head /home/namhyung/project/linux/tools/include/linux/list.h:644 #1 0x558c57732834 in perf_evlist__id_hash /home/namhyung/project/linux/tools/lib/perf/evlist.c:237 #2 0x558c57732834 in perf_evlist__id_add /home/namhyung/project/linux/tools/lib/perf/evlist.c:244 #3 0x558c57732834 in perf_evlist__id_add_fd /home/namhyung/project/linux/tools/lib/perf/evlist.c:285 #4 0x558c5747733e in store_evsel_ids util/evsel.c:2765 #5 0x558c5747733e in evsel__store_ids util/evsel.c:2782 #6 0x558c5730b717 in __run_perf_stat /home/namhyung/project/linux/tools/perf/builtin-stat.c:895 #7 0x558c5730b717 in run_perf_stat /home/namhyung/project/linux/tools/perf/builtin-stat.c:1014 #8 0x558c5730b717 in cmd_stat /home/namhyung/project/linux/tools/perf/builtin-stat.c:2446 #9 0x558c57427c24 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #10 0x558c572b1a48 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #11 0x558c572b1a48 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #12 0x558c572b1a48 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #13 0x7fcadb9f7d09 in __libc_start_main ../csu/libc-start.c:308 #14 0x558c572b60f9 in _start (/home/namhyung/project/linux/tools/perf/perf+0x45d0f9) Actually the nodes in the hash table are struct perf_stream_id and they were freed in the previous run. Fix it by resetting the hash. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210225035148.778569-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 5121b4db66fe..882cd1f721d9 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1306,6 +1306,7 @@ void evlist__close(struct evlist *evlist) perf_evsel__free_fd(&evsel->core); perf_evsel__free_id(&evsel->core); } + perf_evlist__reset_id_hash(&evlist->core); } static int evlist__create_syswide_maps(struct evlist *evlist) From bd57a9f33abc0adede5bafa06b2f1af3de03190d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 24 Feb 2021 16:14:38 +0900 Subject: [PATCH 36/44] perf daemon: Fix compile error with Asan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'm seeing a build failure when build with address sanitizer. It seems we could write to the name[100] if the var is longer. $ make EXTRA_CFLAGS=-fsanitize=address ... CC builtin-daemon.o In function ‘get_session_name’, inlined from ‘session_config’ at builtin-daemon.c:164:6, inlined from ‘server_config’ at builtin-daemon.c:223:10: builtin-daemon.c:155:11: error: writing 1 byte into a region of size 0 [-Werror=stringop-overflow=] 155 | *session = 0; | ~~~~~~~~~^~~ builtin-daemon.c: In function ‘server_config’: builtin-daemon.c:162:7: note: at offset 100 to object ‘name’ with size 100 declared here 162 | char name[100]; | ^~~~ Fixes: c0666261ff38 ("perf daemon: Add config file support") Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210224071438.686677-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-daemon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index 8f0ed2e59280..ace8772a4f03 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -161,7 +161,7 @@ static int session_config(struct daemon *daemon, const char *var, const char *va struct daemon_session *session; char name[100]; - if (get_session_name(var, name, sizeof(name))) + if (get_session_name(var, name, sizeof(name) - 1)) return -EINVAL; var = strchr(var, '.'); From ec4d0a7680c793ef68d47507fcec245019ee6f33 Mon Sep 17 00:00:00 2001 From: Nicholas Fraser Date: Fri, 19 Feb 2021 11:09:32 -0500 Subject: [PATCH 37/44] perf archive: Fix filtering of empty build-ids A non-existent build-id used to be treated as all-zero SHA-1 hash. Build-ids are now variable width. A non-existent build-id is an empty string and "perf buildid-list" pads this with spaces. This is true even when using old perf.data files recorded from older versions of perf; "perf buildid-list" never reports an all-zero hash anymore. This fixes "perf-archive" to skip missing build-ids by skipping lines that start with a padding space rather than with zeroes. Signed-off-by: Nicholas Fraser Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Huw Davies Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ulrich Czekalla Link: https://lore.kernel.org/r/442bffc7-ac5c-0975-b876-a549efce2413@codeweavers.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-archive.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh index 0cfb3e2cefef..133f0eddbcc4 100644 --- a/tools/perf/perf-archive.sh +++ b/tools/perf/perf-archive.sh @@ -20,9 +20,8 @@ else fi BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX) -NOBUILDID=0000000000000000000000000000000000000000 -perf buildid-list -i $PERF_DATA --with-hits | grep -v "^$NOBUILDID " > $BUILDIDS +perf buildid-list -i $PERF_DATA --with-hits | grep -v "^ " > $BUILDIDS if [ ! -s $BUILDIDS ] ; then echo "perf archive: no build-ids found" rm $BUILDIDS || true From a8146d66ab0184ad1728eaeb59cfdf256f4b8fbf Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 3 Mar 2021 08:01:24 -0800 Subject: [PATCH 38/44] perf test: Fix sample-parsing failure on non-x86 platforms Executing 'perf test 27' fails on s390: [root@t35lp46 perf]# ./perf test -Fv 27 27: Sample parsing --- start --- ---- end ---- Sample parsing: FAILED! [root@t35lp46 perf]# The commit fbefe9c2f87fd392 ("perf tools: Support arch specific PERF_SAMPLE_WEIGHT_STRUCT processing") changes the ins_lat to a model-specific variable only for X86, but perf test still verify the variable in the generic test. Remove the ins_lat check in the generic test. The following patch will add it in the X86 specific test. Fixes: fbefe9c2f87fd392 ("perf tools: Support arch specific PERF_SAMPLE_WEIGHT_STRUCT processing") Reported-by: Thomas Richter Signed-off-by: Kan Liang Tested-by: Thomas Richter Cc: Athira Jajeev Cc: Heiko Carstens Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/1614787285-104151-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sample-parsing.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 0dbe3aa99853..8fd8a4ef97da 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -129,9 +129,6 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_WEIGHT) COMP(weight); - if (type & PERF_SAMPLE_WEIGHT_STRUCT) - COMP(ins_lat); - if (type & PERF_SAMPLE_DATA_SRC) COMP(data_src); @@ -245,7 +242,6 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) .cgroup = 114, .data_page_size = 115, .code_page_size = 116, - .ins_lat = 117, .aux_sample = { .size = sizeof(aux_data), .data = (void *)aux_data, From 7d9d4c6edba93cd96899affe2fc60c3341df152c Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 3 Mar 2021 08:01:25 -0800 Subject: [PATCH 39/44] perf test: Support the ins_lat check in the X86 specific test The ins_lat of PERF_SAMPLE_WEIGHT_STRUCT stands for the instruction latency, which is only available for X86. Add a X86 specific test for the ins_lat and PERF_SAMPLE_WEIGHT_STRUCT type. The test__x86_sample_parsing() uses the same way as the test__sample_parsing() to verify a sample type. Since the ins_lat and PERF_SAMPLE_WEIGHT_STRUCT are the only X86 specific sample type for now, the test__x86_sample_parsing() only verify the PERF_SAMPLE_WEIGHT_STRUCT type. Other sample types are still verified in the generic test. $ perf test 77 -v 77: x86 Sample parsing : --- start --- test child forked, pid 102370 test child finished with 0 ---- end ---- x86 Sample parsing: Ok Signed-off-by: Kan Liang Cc: Athira Jajeev Cc: Heiko Carstens Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Thomas Richter Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/1614787285-104151-2-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/include/arch-tests.h | 1 + tools/perf/arch/x86/tests/Build | 1 + tools/perf/arch/x86/tests/arch-tests.c | 4 + tools/perf/arch/x86/tests/sample-parsing.c | 121 +++++++++++++++++++++ 4 files changed, 127 insertions(+) create mode 100644 tools/perf/arch/x86/tests/sample-parsing.c diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index 6a54b94f1c25..0e20f3dc69f3 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -10,6 +10,7 @@ int test__rdpmc(struct test *test __maybe_unused, int subtest); int test__insn_x86(struct test *test __maybe_unused, int subtest); int test__intel_pt_pkt_decoder(struct test *test, int subtest); int test__bp_modify(struct test *test, int subtest); +int test__x86_sample_parsing(struct test *test, int subtest); #ifdef HAVE_DWARF_UNWIND_SUPPORT struct thread; diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 36d4f248b51d..28d793390198 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -3,5 +3,6 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o perf-y += arch-tests.o perf-y += rdpmc.o +perf-y += sample-parsing.o perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o perf-$(CONFIG_X86_64) += bp-modify.o diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index bc25d727b4e9..71aa67367ad6 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -30,6 +30,10 @@ struct test arch_tests[] = { .func = test__bp_modify, }, #endif + { + .desc = "x86 Sample parsing", + .func = test__x86_sample_parsing, + }, { .func = NULL, }, diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c new file mode 100644 index 000000000000..c92db87e4479 --- /dev/null +++ b/tools/perf/arch/x86/tests/sample-parsing.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include +#include + +#include "event.h" +#include "evsel.h" +#include "debug.h" +#include "util/synthetic-events.h" + +#include "tests/tests.h" +#include "arch-tests.h" + +#define COMP(m) do { \ + if (s1->m != s2->m) { \ + pr_debug("Samples differ at '"#m"'\n"); \ + return false; \ + } \ +} while (0) + +static bool samples_same(const struct perf_sample *s1, + const struct perf_sample *s2, + u64 type) +{ + if (type & PERF_SAMPLE_WEIGHT_STRUCT) + COMP(ins_lat); + + return true; +} + +static int do_test(u64 sample_type) +{ + struct evsel evsel = { + .needs_swap = false, + .core = { + . attr = { + .sample_type = sample_type, + .read_format = 0, + }, + }, + }; + union perf_event *event; + struct perf_sample sample = { + .weight = 101, + .ins_lat = 102, + }; + struct perf_sample sample_out; + size_t i, sz, bufsz; + int err, ret = -1; + + sz = perf_event__sample_event_size(&sample, sample_type, 0); + bufsz = sz + 4096; /* Add a bit for overrun checking */ + event = malloc(bufsz); + if (!event) { + pr_debug("malloc failed\n"); + return -1; + } + + memset(event, 0xff, bufsz); + event->header.type = PERF_RECORD_SAMPLE; + event->header.misc = 0; + event->header.size = sz; + + err = perf_event__synthesize_sample(event, sample_type, 0, &sample); + if (err) { + pr_debug("%s failed for sample_type %#"PRIx64", error %d\n", + "perf_event__synthesize_sample", sample_type, err); + goto out_free; + } + + /* The data does not contain 0xff so we use that to check the size */ + for (i = bufsz; i > 0; i--) { + if (*(i - 1 + (u8 *)event) != 0xff) + break; + } + if (i != sz) { + pr_debug("Event size mismatch: actual %zu vs expected %zu\n", + i, sz); + goto out_free; + } + + evsel.sample_size = __evsel__sample_size(sample_type); + + err = evsel__parse_sample(&evsel, event, &sample_out); + if (err) { + pr_debug("%s failed for sample_type %#"PRIx64", error %d\n", + "evsel__parse_sample", sample_type, err); + goto out_free; + } + + if (!samples_same(&sample, &sample_out, sample_type)) { + pr_debug("parsing failed for sample_type %#"PRIx64"\n", + sample_type); + goto out_free; + } + + ret = 0; +out_free: + free(event); + + return ret; +} + +/** + * test__x86_sample_parsing - test X86 specific sample parsing + * + * This function implements a test that synthesizes a sample event, parses it + * and then checks that the parsed sample matches the original sample. If the + * test passes %0 is returned, otherwise %-1 is returned. + * + * For now, the PERF_SAMPLE_WEIGHT_STRUCT is the only X86 specific sample type. + * The test only checks the PERF_SAMPLE_WEIGHT_STRUCT type. + */ +int test__x86_sample_parsing(struct test *test __maybe_unused, int subtest __maybe_unused) +{ + return do_test(PERF_SAMPLE_WEIGHT_STRUCT); +} From c1f272df510c6b1db68ca6597724d17b557d1407 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 4 Mar 2021 09:51:52 -0300 Subject: [PATCH 40/44] perf tests x86: Move insn.h include to make sure it finds stddef.h In some versions of alpine Linux the perf build is broken since commit 1d509f2a6ebca1ae ("x86/insn: Support big endian cross-compiles"): In file included from /usr/include/linux/byteorder/little_endian.h:13, from /usr/include/asm/byteorder.h:5, from arch/x86/util/../../../../arch/x86/include/asm/insn.h:10, from arch/x86/util/archinsn.c:2: /usr/include/linux/swab.h:161:8: error: unknown type name '__always_inline' static __always_inline __u16 __swab16p(const __u16 *p) So move the inclusion of arch/x86/include/asm/insn.h to later in the places where linux/stddef.h (that conditionally defines __always_inline) to workaround this problem on Alpine Linux 3.9 to 3.11, 3.12 onwards works. Cc: Josh Poimboeuf Cc: Martin Schwidefsky Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/insn-x86.c | 2 +- tools/perf/arch/x86/util/archinsn.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c index f782ef8c5982..4f75ae990140 100644 --- a/tools/perf/arch/x86/tests/insn-x86.c +++ b/tools/perf/arch/x86/tests/insn-x86.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include "../../../../arch/x86/include/asm/insn.h" #include #include "debug.h" #include "tests/tests.h" #include "arch-tests.h" +#include "../../../../arch/x86/include/asm/insn.h" #include "intel-pt-decoder/intel-pt-insn-decoder.h" diff --git a/tools/perf/arch/x86/util/archinsn.c b/tools/perf/arch/x86/util/archinsn.c index 3e6791531ca5..34d600c51044 100644 --- a/tools/perf/arch/x86/util/archinsn.c +++ b/tools/perf/arch/x86/util/archinsn.c @@ -1,10 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 -#include "../../../../arch/x86/include/asm/insn.h" #include "archinsn.h" #include "event.h" #include "machine.h" #include "thread.h" #include "symbol.h" +#include "../../../../arch/x86/include/asm/insn.h" void arch_fetch_insn(struct perf_sample *sample, struct thread *thread, From 6740a4e70e5d1b9d8e7fe41fd46dd5656d65dadf Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 4 Mar 2021 11:59:58 +0530 Subject: [PATCH 41/44] perf report: Fix -F for branch & mem modes perf report fails to add valid additional fields with -F when used with branch or mem modes. Fix it. Before patch: $ perf record -b $ perf report -b -F +srcline_from --stdio Error: Invalid --fields key: `srcline_from' After patch: $ perf report -b -F +srcline_from --stdio # Samples: 8K of event 'cycles' # Event count (approx.): 8784 ... Committer notes: There was an inversion: when looking at branch stack dimensions (keys) it was checking if the sort mode was 'mem', not 'branch'. Fixes: aa6b3c99236b ("perf report: Make -F more strict like -s") Reported-by: Athira Jajeev Signed-off-by: Ravi Bangoria Reviewed-by: Athira Jajeev Tested-by: Arnaldo Carvalho de Melo Tested-by: Athira Jajeev Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20210304062958.85465-1-ravi.bangoria@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 0d5ad42812b9..552b590485bf 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -3140,7 +3140,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - if (sort__mode != SORT_MODE__MEMORY) + if (sort__mode != SORT_MODE__BRANCH) return -EINVAL; return __sort_dimension__add_output(list, sd); @@ -3152,7 +3152,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - if (sort__mode != SORT_MODE__BRANCH) + if (sort__mode != SORT_MODE__MEMORY) return -EINVAL; return __sort_dimension__add_output(list, sd); From 77d02bd00cea9f1a87afe58113fa75b983d6c23a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 5 Mar 2021 10:02:09 -0300 Subject: [PATCH 42/44] perf map: Tighten snprintf() string precision to pass gcc check on some 32-bit arches Noticed on a debian:experimental mips and mipsel cross build build environment: perfbuilder@ec265a086e9b:~$ mips-linux-gnu-gcc --version | head -1 mips-linux-gnu-gcc (Debian 10.2.1-3) 10.2.1 20201224 perfbuilder@ec265a086e9b:~$ CC /tmp/build/perf/util/map.o util/map.c: In function 'map__new': util/map.c:109:5: error: '%s' directive output may be truncated writing between 1 and 2147483645 bytes into a region of size 4096 [-Werror=format-truncation=] 109 | "%s/platforms/%s/arch-%s/usr/lib/%s", | ^~ In file included from /usr/mips-linux-gnu/include/stdio.h:867, from util/symbol.h:11, from util/map.c:2: /usr/mips-linux-gnu/include/bits/stdio2.h:67:10: note: '__builtin___snprintf_chk' output 32 or more bytes (assuming 4294967321) into a destination of size 4096 67 | return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 68 | __bos (__s), __fmt, __va_arg_pack ()); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors Since we have the lenghts for what lands in that place, use it to give the compiler more info and make it happy. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 692e56dc832e..fbc40a2c17d4 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -77,8 +77,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) if (strstarts(filename, "/system/lib/")) { char *ndk, *app; const char *arch; - size_t ndk_length; - size_t app_length; + int ndk_length, app_length; ndk = getenv("NDK_ROOT"); app = getenv("APP_PLATFORM"); @@ -106,8 +105,8 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) if (new_length > PATH_MAX) return false; snprintf(newfilename, new_length, - "%s/platforms/%s/arch-%s/usr/lib/%s", - ndk, app, arch, libname); + "%.*s/platforms/%.*s/arch-%s/usr/lib/%s", + ndk_length, ndk, app_length, app, arch, libname); return true; } From 86a19008af5d88d5d523dbfe9b6ede11473e9a7f Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Tue, 2 Mar 2021 15:41:20 +0100 Subject: [PATCH 43/44] perf trace: Fix race in signal handling Since a lot of stuff happens before the SIGINT signal handler is registered (scanning /proc/*, etc.), on bigger systems, such as Cavium Sabre CN99xx, it may happen that first interrupt signal is lost and perf isn't correctly terminated. The reproduction code might look like the following: perf trace -a & PERF_PID=$! sleep 4 kill -INT $PERF_PID The issue has been found on a CN99xx machine with RHEL-8 and the patch fixes it by registering the signal handlers earlier in the init stage. Suggested-by: Jiri Olsa Signed-off-by: Michael Petlan Tested-by: Michael Petlan Cc: Jiri Olsa Link: https://lore.kernel.org/lkml/YEJnaMzH2ctp3PPx@kernel.org/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 85b6a46e85b6..7ec18ff57fc4 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3964,9 +3964,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv) evlist__config(evlist, &trace->opts, &callchain_param); - signal(SIGCHLD, sig_handler); - signal(SIGINT, sig_handler); - if (forks) { err = evlist__prepare_workload(evlist, &trace->opts.target, argv, false, NULL); if (err < 0) { @@ -4827,6 +4824,8 @@ int cmd_trace(int argc, const char **argv) signal(SIGSEGV, sighandler_dump_stack); signal(SIGFPE, sighandler_dump_stack); + signal(SIGCHLD, sig_handler); + signal(SIGINT, sig_handler); trace.evlist = evlist__new(); trace.sctbl = syscalltbl__new(); From 6fc5baf5471700fd613f0b4e52ab4563f1942b78 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Sat, 6 Feb 2021 23:08:29 +0800 Subject: [PATCH 44/44] perf cs-etm: Fix bitmap for option When set option with macros ETM_OPT_CTXTID and ETM_OPT_TS, it wrongly takes these two values (14 and 28 prespectively) as bit masks, but actually both are the offset for bits. But this doesn't lead to further failure due to the AND logic operation will be always true for ETM_OPT_CTXTID / ETM_OPT_TS. This patch defines new independent macros (rather than using the "config" bits) for requesting the "contextid" and "timestamp" for cs_etm_set_option(). Signed-off-by: Suzuki Poulouse Reviewed-by: Mike Leach Cc: Al Grant Cc: Daniel Kiss Cc: Denis Nikitin Cc: Jiri Olsa Cc: John Garry Cc: Jonathan Corbet Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-doc@vger.kernel.org Link: http://lore.kernel.org/lkml/20210206150833.42120-5-leo.yan@linaro.org [ Extract the change as a separate patch for easier review ] Signed-off-by: Leo Yan Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index bd446aba64f7..c25c878fd06c 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -156,6 +156,10 @@ static int cs_etm_set_timestamp(struct auxtrace_record *itr, return err; } +#define ETM_SET_OPT_CTXTID (1 << 0) +#define ETM_SET_OPT_TS (1 << 1) +#define ETM_SET_OPT_MASK (ETM_SET_OPT_CTXTID | ETM_SET_OPT_TS) + static int cs_etm_set_option(struct auxtrace_record *itr, struct evsel *evsel, u32 option) { @@ -169,17 +173,17 @@ static int cs_etm_set_option(struct auxtrace_record *itr, !cpu_map__has(online_cpus, i)) continue; - if (option & ETM_OPT_CTXTID) { + if (option & ETM_SET_OPT_CTXTID) { err = cs_etm_set_context_id(itr, evsel, i); if (err) goto out; } - if (option & ETM_OPT_TS) { + if (option & ETM_SET_OPT_TS) { err = cs_etm_set_timestamp(itr, evsel, i); if (err) goto out; } - if (option & ~(ETM_OPT_CTXTID | ETM_OPT_TS)) + if (option & ~(ETM_SET_OPT_MASK)) /* Nothing else is currently supported */ goto out; } @@ -406,7 +410,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, evsel__set_sample_bit(cs_etm_evsel, CPU); err = cs_etm_set_option(itr, cs_etm_evsel, - ETM_OPT_CTXTID | ETM_OPT_TS); + ETM_SET_OPT_CTXTID | ETM_SET_OPT_TS); if (err) goto out; }