From d3fd203f36d46aa29600a72d57a1b61af80e4a25 Mon Sep 17 00:00:00 2001 From: Baisong Zhong Date: Wed, 2 Nov 2022 16:16:20 +0800 Subject: [PATCH 01/51] bpf, test_run: Fix alignment problem in bpf_prog_test_run_skb() We got a syzkaller problem because of aarch64 alignment fault if KFENCE enabled. When the size from user bpf program is an odd number, like 399, 407, etc, it will cause the struct skb_shared_info's unaligned access. As seen below: BUG: KFENCE: use-after-free read in __skb_clone+0x23c/0x2a0 net/core/skbuff.c:1032 Use-after-free read at 0xffff6254fffac077 (in kfence-#213): __lse_atomic_add arch/arm64/include/asm/atomic_lse.h:26 [inline] arch_atomic_add arch/arm64/include/asm/atomic.h:28 [inline] arch_atomic_inc include/linux/atomic-arch-fallback.h:270 [inline] atomic_inc include/asm-generic/atomic-instrumented.h:241 [inline] __skb_clone+0x23c/0x2a0 net/core/skbuff.c:1032 skb_clone+0xf4/0x214 net/core/skbuff.c:1481 ____bpf_clone_redirect net/core/filter.c:2433 [inline] bpf_clone_redirect+0x78/0x1c0 net/core/filter.c:2420 bpf_prog_d3839dd9068ceb51+0x80/0x330 bpf_dispatcher_nop_func include/linux/bpf.h:728 [inline] bpf_test_run+0x3c0/0x6c0 net/bpf/test_run.c:53 bpf_prog_test_run_skb+0x638/0xa7c net/bpf/test_run.c:594 bpf_prog_test_run kernel/bpf/syscall.c:3148 [inline] __do_sys_bpf kernel/bpf/syscall.c:4441 [inline] __se_sys_bpf+0xad0/0x1634 kernel/bpf/syscall.c:4381 kfence-#213: 0xffff6254fffac000-0xffff6254fffac196, size=407, cache=kmalloc-512 allocated by task 15074 on cpu 0 at 1342.585390s: kmalloc include/linux/slab.h:568 [inline] kzalloc include/linux/slab.h:675 [inline] bpf_test_init.isra.0+0xac/0x290 net/bpf/test_run.c:191 bpf_prog_test_run_skb+0x11c/0xa7c net/bpf/test_run.c:512 bpf_prog_test_run kernel/bpf/syscall.c:3148 [inline] __do_sys_bpf kernel/bpf/syscall.c:4441 [inline] __se_sys_bpf+0xad0/0x1634 kernel/bpf/syscall.c:4381 __arm64_sys_bpf+0x50/0x60 kernel/bpf/syscall.c:4381 To fix the problem, we adjust @size so that (@size + @hearoom) is a multiple of SMP_CACHE_BYTES. So we make sure the struct skb_shared_info is aligned to a cache line. Fixes: 1cf1cae963c2 ("bpf: introduce BPF_PROG_TEST_RUN command") Signed-off-by: Baisong Zhong Signed-off-by: Daniel Borkmann Cc: Eric Dumazet Link: https://lore.kernel.org/bpf/20221102081620.1465154-1-zhongbaisong@huawei.com --- net/bpf/test_run.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 13d578ce2a09..fcb3e6c5e03c 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -774,6 +774,7 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, if (user_size > size) return ERR_PTR(-EMSGSIZE); + size = SKB_DATA_ALIGN(size); data = kzalloc(size + headroom + tailroom, GFP_USER); if (!data) return ERR_PTR(-ENOMEM); From 18acb7fac22ff7b36c7ea5a76b12996e7b7dbaba Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Nov 2022 13:00:13 +0100 Subject: [PATCH 02/51] bpf: Revert ("Fix dispatcher patchable function entry to 5 bytes nop") MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because __attribute__((patchable_function_entry)) is only available since GCC-8 this solution fails to build on the minimum required GCC version. Undo these changes so we might try again -- without cluttering up the patches with too many changes. This is an almost complete revert of: dbe69b299884 ("bpf: Fix dispatcher patchable function entry to 5 bytes nop") ceea991a019c ("bpf: Move bpf_dispatcher function out of ftrace locations") (notably the arch/x86/Kconfig hunk is kept). Reported-by: David Laight Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Daniel Borkmann Tested-by: Björn Töpel Tested-by: Jiri Olsa Acked-by: Björn Töpel Acked-by: Jiri Olsa Link: https://lkml.kernel.org/r/439d8dc735bb4858875377df67f1b29a@AcuMS.aculab.com Link: https://lore.kernel.org/bpf/20221103120647.728830733@infradead.org --- arch/x86/net/bpf_jit_comp.c | 13 ------------- include/linux/bpf.h | 21 +-------------------- kernel/bpf/dispatcher.c | 6 ------ 3 files changed, 1 insertion(+), 39 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 00127abd89ee..99620428ad78 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -389,18 +388,6 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, return ret; } -int __init bpf_arch_init_dispatcher_early(void *ip) -{ - const u8 *nop_insn = x86_nops[5]; - - if (is_endbr(*(u32 *)ip)) - ip += ENDBR_INSN_SIZE; - - if (memcmp(ip, nop_insn, X86_PATCH_SIZE)) - text_poke_early(ip, nop_insn, X86_PATCH_SIZE); - return 0; -} - int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *old_addr, void *new_addr) { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0566705c1d4e..5cd95716b441 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -27,7 +27,6 @@ #include #include #include -#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -971,8 +970,6 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs); -int __init bpf_arch_init_dispatcher_early(void *ip); - #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ @@ -986,21 +983,7 @@ int __init bpf_arch_init_dispatcher_early(void *ip); }, \ } -#define BPF_DISPATCHER_INIT_CALL(_name) \ - static int __init _name##_init(void) \ - { \ - return bpf_arch_init_dispatcher_early(_name##_func); \ - } \ - early_initcall(_name##_init) - -#ifdef CONFIG_X86_64 -#define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5))) -#else -#define BPF_DISPATCHER_ATTRIBUTES -#endif - #define DEFINE_BPF_DISPATCHER(name) \ - notrace BPF_DISPATCHER_ATTRIBUTES \ noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ @@ -1010,9 +993,7 @@ int __init bpf_arch_init_dispatcher_early(void *ip); } \ EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \ struct bpf_dispatcher bpf_dispatcher_##name = \ - BPF_DISPATCHER_INIT(bpf_dispatcher_##name); \ - BPF_DISPATCHER_INIT_CALL(bpf_dispatcher_##name); - + BPF_DISPATCHER_INIT(bpf_dispatcher_##name); #define DECLARE_BPF_DISPATCHER(name) \ unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index 04f0a045dcaa..fa64b80b8bca 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -4,7 +4,6 @@ #include #include #include -#include /* The BPF dispatcher is a multiway branch code generator. The * dispatcher is a mechanism to avoid the performance penalty of an @@ -91,11 +90,6 @@ int __weak arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int n return -ENOTSUPP; } -int __weak __init bpf_arch_init_dispatcher_early(void *ip) -{ - return -ENOTSUPP; -} - static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image, void *buf) { s64 ips[BPF_DISPATCHER_MAX] = {}, *ipsp = &ips[0]; From c86df29d11dfba27c0a1f5039cd6fe387fbf4239 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Nov 2022 13:00:14 +0100 Subject: [PATCH 03/51] bpf: Convert BPF_DISPATCHER to use static_call() (not ftrace) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dispatcher function is currently abusing the ftrace __fentry__ call location for its own purposes -- this obviously gives trouble when the dispatcher and ftrace are both in use. A previous solution tried using __attribute__((patchable_function_entry())) which works, except it is GCC-8+ only, breaking the build on the earlier still supported compilers. Instead use static_call() -- which has its own annotations and does not conflict with ftrace -- to rewrite the dispatch function. By using: return static_call()(ctx, insni, bpf_func) you get a perfect forwarding tail call as function body (iow a single jmp instruction). By having the default static_call() target be bpf_dispatcher_nop_func() it retains the default behaviour (an indirect call to the argument function). Only once a dispatcher program is attached is the target rewritten to directly call the JIT'ed image. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Daniel Borkmann Tested-by: Björn Töpel Tested-by: Jiri Olsa Acked-by: Björn Töpel Acked-by: Jiri Olsa Link: https://lkml.kernel.org/r/Y1/oBlK0yFk5c/Im@hirez.programming.kicks-ass.net Link: https://lore.kernel.org/bpf/20221103120647.796772565@infradead.org --- include/linux/bpf.h | 39 ++++++++++++++++++++++++++++++++++++++- kernel/bpf/dispatcher.c | 20 +++++++------------- 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5cd95716b441..74c6f449d81e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -27,6 +27,7 @@ #include #include #include +#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -953,6 +954,10 @@ struct bpf_dispatcher { void *rw_image; u32 image_off; struct bpf_ksym ksym; +#ifdef CONFIG_HAVE_STATIC_CALL + struct static_call_key *sc_key; + void *sc_tramp; +#endif }; static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( @@ -970,6 +975,34 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs); + +/* + * When the architecture supports STATIC_CALL replace the bpf_dispatcher_fn + * indirection with a direct call to the bpf program. If the architecture does + * not have STATIC_CALL, avoid a double-indirection. + */ +#ifdef CONFIG_HAVE_STATIC_CALL + +#define __BPF_DISPATCHER_SC_INIT(_name) \ + .sc_key = &STATIC_CALL_KEY(_name), \ + .sc_tramp = STATIC_CALL_TRAMP_ADDR(_name), + +#define __BPF_DISPATCHER_SC(name) \ + DEFINE_STATIC_CALL(bpf_dispatcher_##name##_call, bpf_dispatcher_nop_func) + +#define __BPF_DISPATCHER_CALL(name) \ + static_call(bpf_dispatcher_##name##_call)(ctx, insnsi, bpf_func) + +#define __BPF_DISPATCHER_UPDATE(_d, _new) \ + __static_call_update((_d)->sc_key, (_d)->sc_tramp, (_new)) + +#else +#define __BPF_DISPATCHER_SC_INIT(name) +#define __BPF_DISPATCHER_SC(name) +#define __BPF_DISPATCHER_CALL(name) bpf_func(ctx, insnsi) +#define __BPF_DISPATCHER_UPDATE(_d, _new) +#endif + #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ @@ -981,25 +1014,29 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func .name = #_name, \ .lnode = LIST_HEAD_INIT(_name.ksym.lnode), \ }, \ + __BPF_DISPATCHER_SC_INIT(_name##_call) \ } #define DEFINE_BPF_DISPATCHER(name) \ + __BPF_DISPATCHER_SC(name); \ noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ bpf_func_t bpf_func) \ { \ - return bpf_func(ctx, insnsi); \ + return __BPF_DISPATCHER_CALL(name); \ } \ EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \ struct bpf_dispatcher bpf_dispatcher_##name = \ BPF_DISPATCHER_INIT(bpf_dispatcher_##name); + #define DECLARE_BPF_DISPATCHER(name) \ unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ bpf_func_t bpf_func); \ extern struct bpf_dispatcher bpf_dispatcher_##name; + #define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func #define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name) void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index fa64b80b8bca..7dfb8d0d5202 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -4,6 +4,7 @@ #include #include #include +#include /* The BPF dispatcher is a multiway branch code generator. The * dispatcher is a mechanism to avoid the performance penalty of an @@ -104,17 +105,11 @@ static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image, void *b static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) { - void *old, *new, *tmp; - u32 noff; - int err; + void *new, *tmp; + u32 noff = 0; - if (!prev_num_progs) { - old = NULL; - noff = 0; - } else { - old = d->image + d->image_off; + if (prev_num_progs) noff = d->image_off ^ (PAGE_SIZE / 2); - } new = d->num_progs ? d->image + noff : NULL; tmp = d->num_progs ? d->rw_image + noff : NULL; @@ -128,11 +123,10 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) return; } - err = bpf_arch_text_poke(d->func, BPF_MOD_JUMP, old, new); - if (err || !new) - return; + __BPF_DISPATCHER_UPDATE(d, new ?: &bpf_dispatcher_nop_func); - d->image_off = noff; + if (new) + d->image_off = noff; } void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, From a679120edfcf3d63f066f53afd425d51b480e533 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 7 Nov 2022 10:07:11 -0700 Subject: [PATCH 04/51] bpf: Add explicit cast to 'void *' for __BPF_DISPATCHER_UPDATE() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with clang: kernel/bpf/dispatcher.c:126:33: error: pointer type mismatch ('void *' and 'unsigned int (*)(const void *, const struct bpf_insn *, bpf_func_t)' (aka 'unsigned int (*)(const void *, const struct bpf_insn *, unsigned int (*)(const void *, const struct bpf_insn *))')) [-Werror,-Wpointer-type-mismatch] __BPF_DISPATCHER_UPDATE(d, new ?: &bpf_dispatcher_nop_func); ~~~ ^ ~~~~~~~~~~~~~~~~~~~~~~~~ ./include/linux/bpf.h:1045:54: note: expanded from macro '__BPF_DISPATCHER_UPDATE' __static_call_update((_d)->sc_key, (_d)->sc_tramp, (_new)) ^~~~ 1 error generated. The warning is pointing out that the type of new ('void *') and &bpf_dispatcher_nop_func are not compatible, which could have side effects coming out of a conditional operator due to promotion rules. Add the explicit cast to 'void *' to make it clear that this is expected, as __BPF_DISPATCHER_UPDATE() expands to a call to __static_call_update(), which expects a 'void *' as its final argument. Fixes: c86df29d11df ("bpf: Convert BPF_DISPATCHER to use static_call() (not ftrace)") Link: https://github.com/ClangBuiltLinux/linux/issues/1755 Reported-by: kernel test robot Reported-by: "kernelci.org bot" Signed-off-by: Nathan Chancellor Acked-by: Björn Töpel Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20221107170711.42409-1-nathan@kernel.org Signed-off-by: Martin KaFai Lau --- kernel/bpf/dispatcher.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index 7dfb8d0d5202..c19719f48ce0 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -123,7 +123,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) return; } - __BPF_DISPATCHER_UPDATE(d, new ?: &bpf_dispatcher_nop_func); + __BPF_DISPATCHER_UPDATE(d, new ?: (void *)&bpf_dispatcher_nop_func); if (new) d->image_off = noff; From eb86559a691cea5fa63e57a03ec3dc9c31e97955 Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Tue, 8 Nov 2022 13:11:31 +0800 Subject: [PATCH 05/51] bpf: Fix memory leaks in __check_func_call kmemleak reports this issue: unreferenced object 0xffff88817139d000 (size 2048): comm "test_progs", pid 33246, jiffies 4307381979 (age 45851.820s) hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000045f075f0>] kmalloc_trace+0x27/0xa0 [<0000000098b7c90a>] __check_func_call+0x316/0x1230 [<00000000b4c3c403>] check_helper_call+0x172e/0x4700 [<00000000aa3875b7>] do_check+0x21d8/0x45e0 [<000000001147357b>] do_check_common+0x767/0xaf0 [<00000000b5a595b4>] bpf_check+0x43e3/0x5bc0 [<0000000011e391b1>] bpf_prog_load+0xf26/0x1940 [<0000000007f765c0>] __sys_bpf+0xd2c/0x3650 [<00000000839815d6>] __x64_sys_bpf+0x75/0xc0 [<00000000946ee250>] do_syscall_64+0x3b/0x90 [<0000000000506b7f>] entry_SYSCALL_64_after_hwframe+0x63/0xcd The root case here is: In function prepare_func_exit(), the callee is not released in the abnormal scenario after "state->curframe--;". To fix, move "state->curframe--;" to the very bottom of the function, right when we free callee and reset frame[] pointer to NULL, as Andrii suggested. In addition, function __check_func_call() has a similar problem. In the abnormal scenario before "state->curframe++;", the callee also should be released by free_func_state(). Fixes: 69c087ba6225 ("bpf: Add bpf_for_each_map_elem() helper") Fixes: fd978bf7fd31 ("bpf: Add reference tracking to verifier") Signed-off-by: Wang Yufen Link: https://lore.kernel.org/r/1667884291-15666-1-git-send-email-wangyufen@huawei.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/verifier.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 225666307bba..264b3dc714cc 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6745,11 +6745,11 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn /* Transfer references to the callee */ err = copy_reference_state(callee, caller); if (err) - return err; + goto err_out; err = set_callee_state_cb(env, caller, callee, *insn_idx); if (err) - return err; + goto err_out; clear_caller_saved_regs(env, caller->regs); @@ -6766,6 +6766,11 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn print_verifier_state(env, callee, true); } return 0; + +err_out: + free_func_state(callee); + state->frame[state->curframe + 1] = NULL; + return err; } int map_set_for_each_callback_args(struct bpf_verifier_env *env, @@ -6979,8 +6984,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) return -EINVAL; } - state->curframe--; - caller = state->frame[state->curframe]; + caller = state->frame[state->curframe - 1]; if (callee->in_callback_fn) { /* enforce R0 return value range [0, 1]. */ struct tnum range = callee->callback_ret_range; @@ -7019,7 +7023,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) } /* clear everything in the callee */ free_func_state(callee); - state->frame[state->curframe + 1] = NULL; + state->frame[state->curframe--] = NULL; return 0; } From 0811664da064c6d7ca64c02f5579f758a007e52d Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Tue, 8 Nov 2022 20:19:45 +0800 Subject: [PATCH 06/51] selftests/bpf: Fix casting error when cross-compiling test_verifier for 32-bit platforms When cross-compiling test_verifier for 32-bit platforms, the casting error is shown below: test_verifier.c:1263:27: error: cast from pointer to integer of different size [-Werror=pointer-to-int-cast] 1263 | info.xlated_prog_insns = (__u64)*buf; | ^ cc1: all warnings being treated as errors Fix it by adding zero-extension for it. Fixes: 933ff53191eb ("selftests/bpf: specify expected instructions in test_verifier tests") Signed-off-by: Pu Lehui Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20221108121945.4104644-1-pulehui@huaweicloud.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/test_verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 2dbcbf363c18..b605a70d4f6b 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1260,7 +1260,7 @@ static int get_xlated_program(int fd_prog, struct bpf_insn **buf, int *cnt) bzero(&info, sizeof(info)); info.xlated_prog_len = xlated_prog_len; - info.xlated_prog_insns = (__u64)*buf; + info.xlated_prog_insns = (__u64)(unsigned long)*buf; if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) { perror("second bpf_obj_get_info_by_fd failed"); goto out_free_buf; From 5704bc7e8991164b14efb748b5afa0715c25fac3 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 8 Nov 2022 09:58:57 +0800 Subject: [PATCH 07/51] selftests/bpf: Fix test_progs compilation failure in 32-bit arch test_progs fails to be compiled in the 32-bit arch, log is as follows: test_progs.c:1013:52: error: format '%ld' expects argument of type 'long int', but argument 3 has type 'size_t' {aka 'unsigned int'} [-Werror=format=] 1013 | sprintf(buf, "MSG_TEST_LOG (cnt: %ld, last: %d)", | ~~^ | | | long int | %d 1014 | strlen(msg->test_log.log_buf), | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | | | size_t {aka unsigned int} Fix it. Fixes: 91b2c0afd00c ("selftests/bpf: Add parallelism to test_progs") Signed-off-by: Yang Jihong Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20221108015857.132457-1-yangjihong1@huawei.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/test_progs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 0e9a47f97890..3fef451d8831 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1010,7 +1010,7 @@ static inline const char *str_msg(const struct msg *msg, char *buf) msg->subtest_done.have_log); break; case MSG_TEST_LOG: - sprintf(buf, "MSG_TEST_LOG (cnt: %ld, last: %d)", + sprintf(buf, "MSG_TEST_LOG (cnt: %zu, last: %d)", strlen(msg->test_log.log_buf), msg->test_log.is_last); break; From f3a72878a3de720661b7ed0d6b7f7c506ddb8a52 Mon Sep 17 00:00:00 2001 From: Jaco Coetzee Date: Wed, 9 Nov 2022 15:27:57 -0500 Subject: [PATCH 08/51] nfp: change eeprom length to max length enumerators Extend the size of QSFP EEPROM for types SSF8436 and SFF8636 from 256 to 640 bytes in order to expose all the EEPROM pages by ethtool. For SFF-8636 and SFF-8436 specifications, the driver exposes 256 bytes of EEPROM data for ethtool's get_module_eeprom() callback, resulting in "netlink error: Invalid argument" when an EEPROM read with an offset larger than 256 bytes is attempted. Changing the length enumerators to the _MAX_LEN variants exposes all 640 bytes of the EEPROM allowing upper pages 1, 2 and 3 to be read. Fixes: 96d971e307cc ("ethtool: Add fallback to get_module_eeprom from netlink command") Signed-off-by: Jaco Coetzee Reviewed-by: Louis Peens Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 22a5d2419084..1775997f9c69 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -1477,15 +1477,15 @@ nfp_port_get_module_info(struct net_device *netdev, if (data < 0x3) { modinfo->type = ETH_MODULE_SFF_8436; - modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN; } else { modinfo->type = ETH_MODULE_SFF_8636; - modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN; } break; case NFP_INTERFACE_QSFP28: modinfo->type = ETH_MODULE_SFF_8636; - modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN; break; default: netdev_err(netdev, "Unsupported module 0x%x detected\n", From 8678ea06852cd1f819b870c773d43df888d15d46 Mon Sep 17 00:00:00 2001 From: Alban Crequy Date: Thu, 10 Nov 2022 09:56:13 +0100 Subject: [PATCH 09/51] maccess: Fix writing offset in case of fault in strncpy_from_kernel_nofault() If a page fault occurs while copying the first byte, this function resets one byte before dst. As a consequence, an address could be modified and leaded to kernel crashes if case the modified address was accessed later. Fixes: b58294ead14c ("maccess: allow architectures to provide kernel probing directly") Signed-off-by: Alban Crequy Signed-off-by: Andrii Nakryiko Tested-by: Francis Laniel Reviewed-by: Andrew Morton Cc: [5.8] Link: https://lore.kernel.org/bpf/20221110085614.111213-2-albancrequy@linux.microsoft.com --- mm/maccess.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/maccess.c b/mm/maccess.c index 5f4d240f67ec..074f6b086671 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -97,7 +97,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) return src - unsafe_addr; Efault: pagefault_enable(); - dst[-1] = '\0'; + dst[0] = '\0'; return -EFAULT; } From 9cd094829dae949a755c18533479c20e74415ab2 Mon Sep 17 00:00:00 2001 From: Alban Crequy Date: Thu, 10 Nov 2022 09:56:14 +0100 Subject: [PATCH 10/51] selftests: bpf: Add a test when bpf_probe_read_kernel_str() returns EFAULT This commit tests previous fix of bpf_probe_read_kernel_str(). The BPF helper bpf_probe_read_kernel_str should return -EFAULT when given a bad source pointer and the target buffer should only be modified to make the string NULL terminated. bpf_probe_read_kernel_str() was previously inserting a NULL before the beginning of the dst buffer. This test should ensure that the implementation stays correct for now on. Without the fix, this test will fail as follows: $ cd tools/testing/selftests/bpf $ make $ sudo ./test_progs --name=varlen ... test_varlen:FAIL:check got 0 != exp 66 Signed-off-by: Alban Crequy Signed-off-by: Francis Laniel Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20221110085614.111213-3-albancrequy@linux.microsoft.com Changes v1 to v2: - add ack tag - fix my email - rebase on bpf tree and tag for bpf tree --- tools/testing/selftests/bpf/prog_tests/varlen.c | 7 +++++++ tools/testing/selftests/bpf/progs/test_varlen.c | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/varlen.c b/tools/testing/selftests/bpf/prog_tests/varlen.c index dd324b4933db..4d7056f8f177 100644 --- a/tools/testing/selftests/bpf/prog_tests/varlen.c +++ b/tools/testing/selftests/bpf/prog_tests/varlen.c @@ -63,6 +63,13 @@ void test_varlen(void) CHECK_VAL(data->total4, size1 + size2); CHECK(memcmp(data->payload4, exp_str, size1 + size2), "content_check", "doesn't match!\n"); + + CHECK_VAL(bss->ret_bad_read, -EFAULT); + CHECK_VAL(data->payload_bad[0], 0x42); + CHECK_VAL(data->payload_bad[1], 0x42); + CHECK_VAL(data->payload_bad[2], 0); + CHECK_VAL(data->payload_bad[3], 0x42); + CHECK_VAL(data->payload_bad[4], 0x42); cleanup: test_varlen__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c index 3987ff174f1f..20eb7d422c41 100644 --- a/tools/testing/selftests/bpf/progs/test_varlen.c +++ b/tools/testing/selftests/bpf/progs/test_varlen.c @@ -19,6 +19,7 @@ __u64 payload1_len1 = 0; __u64 payload1_len2 = 0; __u64 total1 = 0; char payload1[MAX_LEN + MAX_LEN] = {}; +__u64 ret_bad_read = 0; /* .data */ int payload2_len1 = -1; @@ -36,6 +37,8 @@ int payload4_len2 = -1; int total4= -1; char payload4[MAX_LEN + MAX_LEN] = { 1 }; +char payload_bad[5] = { 0x42, 0x42, 0x42, 0x42, 0x42 }; + SEC("raw_tp/sys_enter") int handler64_unsigned(void *regs) { @@ -61,6 +64,8 @@ int handler64_unsigned(void *regs) total1 = payload - (void *)payload1; + ret_bad_read = bpf_probe_read_kernel_str(payload_bad + 2, 1, (void *) -1); + return 0; } From 4b45cd81f737d79d0fbfc0d320a1e518e7f0bbf0 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Thu, 10 Nov 2022 07:21:28 -0500 Subject: [PATCH 11/51] bpf: Initialize same number of free nodes for each pcpu_freelist pcpu_freelist_populate() initializes nr_elems / num_possible_cpus() + 1 free nodes for some CPUs, and then possibly one CPU with fewer nodes, followed by remaining cpus with 0 nodes. For example, when nr_elems == 256 and num_possible_cpus() == 32, CPU 0~27 each gets 9 free nodes, CPU 28 gets 4 free nodes, CPU 29~31 get 0 free nodes, while in fact each CPU should get 8 nodes equally. This patch initializes nr_elems / num_possible_cpus() free nodes for each CPU firstly, then allocates the remaining free nodes by one for each CPU until no free nodes left. Fixes: e19494edab82 ("bpf: introduce percpu_freelist") Signed-off-by: Xu Kuohai Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20221110122128.105214-1-xukuohai@huawei.com --- kernel/bpf/percpu_freelist.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c index b6e7f5c5b9ab..034cf87b54e9 100644 --- a/kernel/bpf/percpu_freelist.c +++ b/kernel/bpf/percpu_freelist.c @@ -100,22 +100,21 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, u32 nr_elems) { struct pcpu_freelist_head *head; - int i, cpu, pcpu_entries; + unsigned int cpu, cpu_idx, i, j, n, m; - pcpu_entries = nr_elems / num_possible_cpus() + 1; - i = 0; + n = nr_elems / num_possible_cpus(); + m = nr_elems % num_possible_cpus(); + cpu_idx = 0; for_each_possible_cpu(cpu) { -again: head = per_cpu_ptr(s->freelist, cpu); - /* No locking required as this is not visible yet. */ - pcpu_freelist_push_node(head, buf); - i++; - buf += elem_size; - if (i == nr_elems) - break; - if (i % pcpu_entries) - goto again; + j = n + (cpu_idx < m ? 1 : 0); + for (i = 0; i < j; i++) { + /* No locking required as this is not visible yet. */ + pcpu_freelist_push_node(head, buf); + buf += elem_size; + } + cpu_idx++; } } From 1f6e04a1c7b85da3b765ca9f46029e5d1826d839 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Fri, 11 Nov 2022 07:56:20 -0500 Subject: [PATCH 12/51] bpf: Fix offset calculation error in __copy_map_value and zero_map_value Function __copy_map_value and zero_map_value miscalculated copy offset, resulting in possible copy of unwanted data to user or kernel. Fix it. Fixes: cc48755808c6 ("bpf: Add zero_map_value to zero map value with special fields") Fixes: 4d7d7f69f4b1 ("bpf: Adapt copy_map_value for multiple offset case") Signed-off-by: Xu Kuohai Signed-off-by: Andrii Nakryiko Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/bpf/20221111125620.754855-1-xukuohai@huaweicloud.com --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 74c6f449d81e..c1bd1bd10506 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -315,7 +315,7 @@ static inline void __copy_map_value(struct bpf_map *map, void *dst, void *src, b u32 next_off = map->off_arr->field_off[i]; memcpy(dst + curr_off, src + curr_off, next_off - curr_off); - curr_off += map->off_arr->field_sz[i]; + curr_off = next_off + map->off_arr->field_sz[i]; } memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off); } @@ -344,7 +344,7 @@ static inline void zero_map_value(struct bpf_map *map, void *dst) u32 next_off = map->off_arr->field_off[i]; memset(dst + curr_off, 0, next_off - curr_off); - curr_off += map->off_arr->field_sz[i]; + curr_off = next_off + map->off_arr->field_sz[i]; } memset(dst + curr_off, 0, map->value_size - curr_off); } From 6f928ab8ee9bfbcb0e631c47ea8a16c3d5116ff1 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 9 Nov 2022 15:01:36 +0000 Subject: [PATCH 13/51] net: bgmac: Drop free_netdev() from bgmac_enet_remove() netdev is allocated in bgmac_alloc() with devm_alloc_etherdev() and will be auto released in ->remove and ->probe failure path. Using free_netdev() in bgmac_enet_remove() leads to double free. Fixes: 34a5102c3235 ("net: bgmac: allocate struct bgmac just once & don't copy it") Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20221109150136.2991171-1-weiyongjun@huaweicloud.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bgmac.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 5fb3af5670ec..3038386a5afd 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1568,7 +1568,6 @@ void bgmac_enet_remove(struct bgmac *bgmac) phy_disconnect(bgmac->net_dev->phydev); netif_napi_del(&bgmac->napi); bgmac_dma_free(bgmac); - free_netdev(bgmac->net_dev); } EXPORT_SYMBOL_GPL(bgmac_enet_remove); From 98a2ac1ca8fd6eca6867726fe238d06e75eb1acd Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 9 Nov 2022 21:28:32 +0800 Subject: [PATCH 14/51] mISDN: fix possible memory leak in mISDN_dsp_element_register() Afer commit 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array"), the name of device is allocated dynamically, use put_device() to give up the reference, so that the name can be freed in kobject_cleanup() when the refcount is 0. The 'entry' is going to be freed in mISDN_dsp_dev_release(), so the kfree() is removed. list_del() is called in mISDN_dsp_dev_release(), so it need be initialized. Fixes: 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221109132832.3270119-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/isdn/mISDN/dsp_pipeline.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/isdn/mISDN/dsp_pipeline.c b/drivers/isdn/mISDN/dsp_pipeline.c index c3b2c99b5cd5..cfbcd9e973c2 100644 --- a/drivers/isdn/mISDN/dsp_pipeline.c +++ b/drivers/isdn/mISDN/dsp_pipeline.c @@ -77,6 +77,7 @@ int mISDN_dsp_element_register(struct mISDN_dsp_element *elem) if (!entry) return -ENOMEM; + INIT_LIST_HEAD(&entry->list); entry->elem = elem; entry->dev.class = elements_class; @@ -107,7 +108,7 @@ int mISDN_dsp_element_register(struct mISDN_dsp_element *elem) device_unregister(&entry->dev); return ret; err1: - kfree(entry); + put_device(&entry->dev); return ret; } EXPORT_SYMBOL(mISDN_dsp_element_register); From 8eab9be56cc6b702a445d2b6d0256aa0992316b3 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Thu, 10 Nov 2022 02:16:42 +0000 Subject: [PATCH 15/51] net: hinic: Fix error handling in hinic_module_init() A problem about hinic create debugfs failed is triggered with the following log given: [ 931.419023] debugfs: Directory 'hinic' with parent '/' already present! The reason is that hinic_module_init() returns pci_register_driver() directly without checking its return value, if pci_register_driver() failed, it returns without destroy the newly created debugfs, resulting the debugfs of hinic can never be created later. hinic_module_init() hinic_dbg_register_debugfs() # create debugfs directory pci_register_driver() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without destroy debugfs directory Fix by removing debugfs when pci_register_driver() returns error. Fixes: 253ac3a97921 ("hinic: add support to query sq info") Signed-off-by: Yuan Can Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20221110021642.80378-1-yuancan@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/huawei/hinic/hinic_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index e1f54a2f28b2..2d6906aba2a2 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -1474,8 +1474,15 @@ static struct pci_driver hinic_driver = { static int __init hinic_module_init(void) { + int ret; + hinic_dbg_register_debugfs(HINIC_DRV_NAME); - return pci_register_driver(&hinic_driver); + + ret = pci_register_driver(&hinic_driver); + if (ret) + hinic_dbg_unregister_debugfs(); + + return ret; } static void __exit hinic_module_exit(void) From e2a54350dc9642e7dfc07335ca355581caa9dbfe Mon Sep 17 00:00:00 2001 From: Michael Sit Wei Hong Date: Thu, 10 Nov 2022 13:49:38 +0800 Subject: [PATCH 16/51] net: phy: dp83867: Fix SGMII FIFO depth for non OF devices Current driver code will read device tree node information, and set default values if there is no info provided. This is not done in non-OF devices leading to SGMII fifo depths being set to the smallest size. This patch sets the value to the default value of the PHY as stated in the PHY datasheet. Fixes: 4dc08dcc9f6f ("net: phy: dp83867: introduce critical chip default init for non-of platform") Signed-off-by: Michael Sit Wei Hong Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20221110054938.925347-1-michael.wei.hong.sit@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83867.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 417527f8bbf5..7446d5c6c714 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -682,6 +682,13 @@ static int dp83867_of_init(struct phy_device *phydev) */ dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN / 2; + /* For non-OF device, the RX and TX FIFO depths are taken from + * default value. So, we init RX & TX FIFO depths here + * so that it is configured correctly later in dp83867_config_init(); + */ + dp83867->tx_fifo_depth = DP83867_PHYCR_FIFO_DEPTH_4_B_NIB; + dp83867->rx_fifo_depth = DP83867_PHYCR_FIFO_DEPTH_4_B_NIB; + return 0; } #endif /* CONFIG_OF_MDIO */ From 77711683a50477de39757d67ab1a3638220d6860 Mon Sep 17 00:00:00 2001 From: Mohd Faizal Abdul Rahim Date: Thu, 10 Nov 2022 14:45:52 +0800 Subject: [PATCH 17/51] net: stmmac: ensure tx function is not running in stmmac_xdp_release() When stmmac_xdp_release() is called, there is a possibility that tx function is still running on other queues which will lead to tx queue timed out and reset adapter. This commit ensure that tx function is not running xdp before release flow continue to run. Fixes: ac746c8520d9 ("net: stmmac: enhance XDP ZC driver level switching performance") Signed-off-by: Song Yoong Siang Signed-off-by: Mohd Faizal Abdul Rahim Signed-off-by: Noor Azura Ahmad Tarmizi Link: https://lore.kernel.org/r/20221110064552.22504-1-noor.azura.ahmad.tarmizi@linux.intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 8273e6a175c8..6b43da78cdf0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -6548,6 +6548,9 @@ void stmmac_xdp_release(struct net_device *dev) struct stmmac_priv *priv = netdev_priv(dev); u32 chan; + /* Ensure tx function is not running */ + netif_tx_disable(dev); + /* Disable NAPI process */ stmmac_disable_all_queues(priv); From 0834ced65a6a1eaa10d0b319b685879a671b29aa Mon Sep 17 00:00:00 2001 From: Yu Liao Date: Thu, 10 Nov 2022 17:03:29 +0800 Subject: [PATCH 18/51] net/tls: Fix memory leak in tls_enc_skb() and tls_sw_fallback_init() 'aead_req' and 'aead_send' is allocated but not freed in default switch case. This commit fixes the potential memory leak by freeing them under the situation. Note that the default cases here should never be reached as they'd mean we allowed offloading an unsupported algorithm. Fixes: ea7a9d88ba21 ("net/tls: Use cipher sizes structs") Signed-off-by: Yu Liao Reviewed-by: Gal Pressman Link: https://lore.kernel.org/r/20221110090329.2036382-1-liaoyu15@huawei.com Signed-off-by: Jakub Kicinski --- net/tls/tls_device_fallback.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index cdb391a8754b..7fbb1d0b69b3 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -346,7 +346,7 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx, salt = tls_ctx->crypto_send.aes_gcm_256.salt; break; default: - return NULL; + goto free_req; } cipher_sz = &tls_cipher_size_desc[tls_ctx->crypto_send.info.cipher_type]; buf_len = cipher_sz->salt + cipher_sz->iv + TLS_AAD_SPACE_SIZE + @@ -492,7 +492,8 @@ int tls_sw_fallback_init(struct sock *sk, key = ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->key; break; default: - return -EINVAL; + rc = -EINVAL; + goto free_aead; } cipher_sz = &tls_cipher_size_desc[crypto_info->cipher_type]; From 9cbd48d5fa14e4c65f8580de16686077f7cea02b Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 10 Nov 2022 13:31:35 +0800 Subject: [PATCH 19/51] mctp i2c: don't count unused / invalid keys for flow release We're currently hitting the WARN_ON in mctp_i2c_flow_release: if (midev->release_count > midev->i2c_lock_count) { WARN_ONCE(1, "release count overflow"); This may be hit if we expire a flow before sending the first packet it contains - as we will not be pairing the increment of release_count (performed on flow release) with the i2c lock operation (only performed on actual TX). To fix this, only release a flow if we've encountered it previously (ie, dev_flow_state does not indicate NEW), as we will mark the flow as ACTIVE at the same time as accounting for the i2c lock operation. We also need to add an INVALID flow state, to indicate when we've done the release. Fixes: f5b8abf9fc3d ("mctp i2c: MCTP I2C binding driver") Reported-by: Jian Zhang Tested-by: Jian Zhang Signed-off-by: Jeremy Kerr Link: https://lore.kernel.org/r/20221110053135.329071-1-jk@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- drivers/net/mctp/mctp-i2c.c | 47 +++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c index 0762c735dd8a..1d67a3ca1fd1 100644 --- a/drivers/net/mctp/mctp-i2c.c +++ b/drivers/net/mctp/mctp-i2c.c @@ -43,6 +43,7 @@ enum { MCTP_I2C_FLOW_STATE_NEW = 0, MCTP_I2C_FLOW_STATE_ACTIVE, + MCTP_I2C_FLOW_STATE_INVALID, }; /* List of all struct mctp_i2c_client @@ -374,12 +375,18 @@ mctp_i2c_get_tx_flow_state(struct mctp_i2c_dev *midev, struct sk_buff *skb) */ if (!key->valid) { state = MCTP_I2C_TX_FLOW_INVALID; - - } else if (key->dev_flow_state == MCTP_I2C_FLOW_STATE_NEW) { - key->dev_flow_state = MCTP_I2C_FLOW_STATE_ACTIVE; - state = MCTP_I2C_TX_FLOW_NEW; } else { - state = MCTP_I2C_TX_FLOW_EXISTING; + switch (key->dev_flow_state) { + case MCTP_I2C_FLOW_STATE_NEW: + key->dev_flow_state = MCTP_I2C_FLOW_STATE_ACTIVE; + state = MCTP_I2C_TX_FLOW_NEW; + break; + case MCTP_I2C_FLOW_STATE_ACTIVE: + state = MCTP_I2C_TX_FLOW_EXISTING; + break; + default: + state = MCTP_I2C_TX_FLOW_INVALID; + } } spin_unlock_irqrestore(&key->lock, flags); @@ -617,21 +624,31 @@ static void mctp_i2c_release_flow(struct mctp_dev *mdev, { struct mctp_i2c_dev *midev = netdev_priv(mdev->dev); + bool queue_release = false; unsigned long flags; spin_lock_irqsave(&midev->lock, flags); - midev->release_count++; + /* if we have seen the flow/key previously, we need to pair the + * original lock with a release + */ + if (key->dev_flow_state == MCTP_I2C_FLOW_STATE_ACTIVE) { + midev->release_count++; + queue_release = true; + } + key->dev_flow_state = MCTP_I2C_FLOW_STATE_INVALID; spin_unlock_irqrestore(&midev->lock, flags); - /* Ensure we have a release operation queued, through the fake - * marker skb - */ - spin_lock(&midev->tx_queue.lock); - if (!midev->unlock_marker.next) - __skb_queue_tail(&midev->tx_queue, &midev->unlock_marker); - spin_unlock(&midev->tx_queue.lock); - - wake_up(&midev->tx_wq); + if (queue_release) { + /* Ensure we have a release operation queued, through the fake + * marker skb + */ + spin_lock(&midev->tx_queue.lock); + if (!midev->unlock_marker.next) + __skb_queue_tail(&midev->tx_queue, + &midev->unlock_marker); + spin_unlock(&midev->tx_queue.lock); + wake_up(&midev->tx_wq); + } } static const struct net_device_ops mctp_i2c_ops = { From 8979f428a4afc215e390006e5ea19fd4e22c7ca9 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Thu, 10 Nov 2022 18:30:37 +0800 Subject: [PATCH 20/51] net: liquidio: release resources when liquidio driver open failed When liquidio driver open failed, it doesn't release resources. Compile tested only. Fixes: 5b07aee11227 ("liquidio: MSIX support for CN23XX") Fixes: dbc97bfd3918 ("net: liquidio: Add missing null pointer checks") Signed-off-by: Zhengchao Shao Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- .../net/ethernet/cavium/liquidio/lio_main.c | 34 ++++++++++++++----- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index d312bd594935..75771825c3f9 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1794,13 +1794,10 @@ static int liquidio_open(struct net_device *netdev) ifstate_set(lio, LIO_IFSTATE_RUNNING); - if (OCTEON_CN23XX_PF(oct)) { - if (!oct->msix_on) - if (setup_tx_poll_fn(netdev)) - return -1; - } else { - if (setup_tx_poll_fn(netdev)) - return -1; + if (!OCTEON_CN23XX_PF(oct) || (OCTEON_CN23XX_PF(oct) && !oct->msix_on)) { + ret = setup_tx_poll_fn(netdev); + if (ret) + goto err_poll; } netif_tx_start_all_queues(netdev); @@ -1813,7 +1810,7 @@ static int liquidio_open(struct net_device *netdev) /* tell Octeon to start forwarding packets to host */ ret = send_rx_ctrl_cmd(lio, 1); if (ret) - return ret; + goto err_rx_ctrl; /* start periodical statistics fetch */ INIT_DELAYED_WORK(&lio->stats_wk.work, lio_fetch_stats); @@ -1824,6 +1821,27 @@ static int liquidio_open(struct net_device *netdev) dev_info(&oct->pci_dev->dev, "%s interface is opened\n", netdev->name); + return 0; + +err_rx_ctrl: + if (!OCTEON_CN23XX_PF(oct) || (OCTEON_CN23XX_PF(oct) && !oct->msix_on)) + cleanup_tx_poll_fn(netdev); +err_poll: + if (lio->ptp_clock) { + ptp_clock_unregister(lio->ptp_clock); + lio->ptp_clock = NULL; + } + + if (oct->props[lio->ifidx].napi_enabled == 1) { + list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list) + napi_disable(napi); + + oct->props[lio->ifidx].napi_enabled = 0; + + if (OCTEON_CN23XX_PF(oct)) + oct->droq[0]->ops.poll_mode = 0; + } + return ret; } From 2d25107e111a85c56f601a5470f1780ec054e6ac Mon Sep 17 00:00:00 2001 From: Wang ShaoBo Date: Thu, 10 Nov 2022 19:38:23 +0800 Subject: [PATCH 21/51] mISDN: fix misuse of put_device() in mISDN_register_device() We should not release reference by put_device() before calling device_initialize(). Fixes: e7d1d4d9ac0d ("mISDN: fix possible memory leak in mISDN_register_device()") Signed-off-by: Wang ShaoBo Signed-off-by: David S. Miller --- drivers/isdn/mISDN/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c index 7ea0100f218a..90ee56d07a6e 100644 --- a/drivers/isdn/mISDN/core.c +++ b/drivers/isdn/mISDN/core.c @@ -222,7 +222,7 @@ mISDN_register_device(struct mISDNdevice *dev, err = get_free_devid(); if (err < 0) - goto error1; + return err; dev->id = err; device_initialize(&dev->dev); From 5df1341ea822292275c56744aab9c536d75c33be Mon Sep 17 00:00:00 2001 From: Chuang Wang Date: Fri, 11 Nov 2022 09:41:30 +0800 Subject: [PATCH 22/51] net: macvlan: Use built-in RCU list checking hlist_for_each_entry_rcu() has built-in RCU and lock checking. Pass cond argument to hlist_for_each_entry_rcu() to silence false lockdep warning when CONFIG_PROVE_RCU_LIST is enabled. Execute as follow: ip link add link eth0 type macvlan mode source macaddr add The rtnl_lock is held when macvlan_hash_lookup_source() or macvlan_fill_info_macaddr() are called in the non-RCU read side section. So, pass lockdep_rtnl_is_held() to silence false lockdep warning. Fixes: 79cf79abce71 ("macvlan: add source mode") Signed-off-by: Chuang Wang Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 578897aaada0..b8cc55b2d721 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -141,7 +141,7 @@ static struct macvlan_source_entry *macvlan_hash_lookup_source( u32 idx = macvlan_eth_hash(addr); struct hlist_head *h = &vlan->port->vlan_source_hash[idx]; - hlist_for_each_entry_rcu(entry, h, hlist) { + hlist_for_each_entry_rcu(entry, h, hlist, lockdep_rtnl_is_held()) { if (ether_addr_equal_64bits(entry->addr, addr) && entry->vlan == vlan) return entry; @@ -1647,7 +1647,7 @@ static int macvlan_fill_info_macaddr(struct sk_buff *skb, struct hlist_head *h = &vlan->port->vlan_source_hash[i]; struct macvlan_source_entry *entry; - hlist_for_each_entry_rcu(entry, h, hlist) { + hlist_for_each_entry_rcu(entry, h, hlist, lockdep_rtnl_is_held()) { if (entry->vlan != vlan) continue; if (nla_put(skb, IFLA_MACVLAN_MACADDR, ETH_ALEN, entry->addr)) From 8fbb53c8bfd8c56ecf1f78dc821778b58f505503 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Fri, 11 Nov 2022 09:47:34 +0800 Subject: [PATCH 23/51] net: caif: fix double disconnect client in chnl_net_open() When connecting to client timeout, disconnect client for twice in chnl_net_open(). Remove one. Compile tested only. Fixes: 2aa40aef9deb ("caif: Use link layer MTU instead of fixed MTU") Signed-off-by: Zhengchao Shao Signed-off-by: David S. Miller --- net/caif/chnl_net.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 4d63ef13a1fd..f35fc87c453a 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -310,9 +310,6 @@ static int chnl_net_open(struct net_device *dev) if (result == 0) { pr_debug("connect timeout\n"); - caif_disconnect_client(dev_net(dev), &priv->chnl); - priv->state = CAIF_DISCONNECTED; - pr_debug("state disconnected\n"); result = -ETIMEDOUT; goto error; } From 991aef4ee4f6eb999924f429b943441a32835c8f Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Fri, 11 Nov 2022 15:04:33 +0800 Subject: [PATCH 24/51] bnxt_en: Remove debugfs when pci_register_driver failed When pci_register_driver failed, we need to remove debugfs, which will caused a resource leak, fix it. Resource leak logs as follows: [ 52.184456] debugfs: Directory 'bnxt_en' with parent '/' already present! Fixes: cabfb09d87bd ("bnxt_en: add debugfs support for DIM") Signed-off-by: Gaosheng Cui Reviewed-by: Leon Romanovsky Reviewed-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c78b6e9dea2c..9f8a6ce4b356 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -14037,8 +14037,16 @@ static struct pci_driver bnxt_pci_driver = { static int __init bnxt_init(void) { + int err; + bnxt_debug_init(); - return pci_register_driver(&bnxt_pci_driver); + err = pci_register_driver(&bnxt_pci_driver); + if (err) { + bnxt_debug_exit(); + return err; + } + + return 0; } static void __exit bnxt_exit(void) From 298b83e180d53a310f9b47e3bf13b7b583e75e9c Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Fri, 11 Nov 2022 15:08:27 +0800 Subject: [PATCH 25/51] octeon_ep: delete unnecessary napi rollback under set_queues_err in octep_open() octep_napi_add() and octep_napi_enable() are all after netif_set_real_num_{tx,rx}_queues() in octep_open(), so it is unnecessary napi rollback under set_queues_err. Delete them to fix it. Fixes: 37d79d059606 ("octeon_ep: add Tx/Rx processing and interrupt support") Signed-off-by: Ziyang Xuan Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 9089adcb75f9..7985a748fafe 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -527,8 +527,6 @@ static int octep_open(struct net_device *netdev) return 0; set_queues_err: - octep_napi_disable(oct); - octep_napi_delete(oct); octep_clean_irqs(oct); setup_irq_err: octep_free_oqs(oct); From 9d3ff7131877fb092185c369fbb14b57ac4e7cec Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Fri, 11 Nov 2022 15:08:47 +0800 Subject: [PATCH 26/51] octeon_ep: ensure octep_get_link_status() successfully before octep_link_up() octep_get_link_status() can fail because send mbox message failed, then octep_get_link_status() will return ret less than 0. Excute octep_link_up() as long as ret is not equal to 0 in octep_open() now. That is not correct. The value type of link.state is enum octep_ctrl_net_state. Positive value represents up. Excute octep_link_up() when ret is bigger than 0. Fixes: 862cd659a6fb ("octeon_ep: Add driver framework and device initialization") Signed-off-by: Ziyang Xuan Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 7985a748fafe..546bcebf4462 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -521,7 +521,7 @@ static int octep_open(struct net_device *netdev) octep_oq_dbell_init(oct); ret = octep_get_link_status(oct); - if (ret) + if (ret > 0) octep_link_up(netdev); return 0; From e4041be97b15302ebfffda8bbd45f3b2d096048f Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Fri, 11 Nov 2022 15:09:23 +0800 Subject: [PATCH 27/51] octeon_ep: fix potential memory leak in octep_device_setup() When occur unsupported_dev and mbox init errors, it did not free oct->conf and iounmap() oct->mmio[i].hw_addr. That would trigger memory leak problem. Add kfree() for oct->conf and iounmap() for oct->mmio[i].hw_addr under unsupported_dev and mbox init errors to fix the problem. Fixes: 862cd659a6fb ("octeon_ep: Add driver framework and device initialization") Signed-off-by: Ziyang Xuan Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 546bcebf4462..53f288c32238 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -956,7 +956,7 @@ int octep_device_setup(struct octep_device *oct) ret = octep_ctrl_mbox_init(ctrl_mbox); if (ret) { dev_err(&pdev->dev, "Failed to initialize control mbox\n"); - return -1; + goto unsupported_dev; } oct->ctrl_mbox_ifstats_offset = OCTEP_CTRL_MBOX_SZ(ctrl_mbox->h2fq.elem_sz, ctrl_mbox->h2fq.elem_cnt, @@ -966,6 +966,10 @@ int octep_device_setup(struct octep_device *oct) return 0; unsupported_dev: + for (i = 0; i < OCTEP_MMIO_REGIONS; i++) + iounmap(oct->mmio[i].hw_addr); + + kfree(oct->conf); return -1; } From 848ffce2f0c93f3481052340a919123a21f808b6 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Fri, 11 Nov 2022 15:09:35 +0800 Subject: [PATCH 28/51] octeon_ep: ensure get mac address successfully before eth_hw_addr_set() octep_get_mac_addr() can fail because send mbox message failed. If this happens, octep_dev->mac_addr will be zero. It should not continue to initialize. Add exception handling for octep_get_mac_addr() to fix it. Fixes: 862cd659a6fb ("octeon_ep: Add driver framework and device initialization") Signed-off-by: Ziyang Xuan Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 53f288c32238..b45dd7f04e21 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -1072,7 +1072,11 @@ static int octep_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->max_mtu = OCTEP_MAX_MTU; netdev->mtu = OCTEP_DEFAULT_MTU; - octep_get_mac_addr(octep_dev, octep_dev->mac_addr); + err = octep_get_mac_addr(octep_dev, octep_dev->mac_addr); + if (err) { + dev_err(&pdev->dev, "Failed to get mac address\n"); + goto register_dev_err; + } eth_hw_addr_set(netdev, octep_dev->mac_addr); err = register_netdev(netdev); From f7c125bd79f50ec6094761090be81d02726ec6f4 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 11 Nov 2022 09:20:44 +0000 Subject: [PATCH 29/51] net: mhi: Fix memory leak in mhi_net_dellink() MHI driver registers network device without setting the needs_free_netdev flag, and does NOT call free_netdev() when unregisters network device, which causes a memory leak. This patch calls free_netdev() to fix it since netdev_priv is used after unregister. Fixes: 13adac032982 ("net: mhi_net: Register wwan_ops for link creation") Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/mhi_net.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi_net.c index 0b1b6f650104..0b9d37979133 100644 --- a/drivers/net/mhi_net.c +++ b/drivers/net/mhi_net.c @@ -343,6 +343,8 @@ static void mhi_net_dellink(struct mhi_device *mhi_dev, struct net_device *ndev) kfree_skb(mhi_netdev->skbagg_head); + free_netdev(ndev); + dev_set_drvdata(&mhi_dev->dev, NULL); } From ed1fe1bebe18884b11e5536b5ac42e3a48960835 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 11 Nov 2022 23:10:20 +0200 Subject: [PATCH 30/51] net: dsa: make dsa_master_ioctl() see through port_hwtstamp_get() shims MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are multi-generational drivers like mv88e6xxx which have code like this: int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port, struct ifreq *ifr) { if (!chip->info->ptp_support) return -EOPNOTSUPP; ... } DSA wants to deny PTP timestamping on the master if the switch supports timestamping too. However it currently relies on the presence of the port_hwtstamp_get() callback to determine PTP capability, and this clearly does not work in that case (method is present but returns -EOPNOTSUPP). We should not deny PTP on the DSA master for those switches which truly do not support hardware timestamping. Create a dsa_port_supports_hwtstamp() method which actually probes for support by calling port_hwtstamp_get() and seeing whether that returned -EOPNOTSUPP or not. Fixes: f685e609a301 ("net: dsa: Deny PTP on master if switch supports it") Link: https://patchwork.kernel.org/project/netdevbpf/patch/20221110124345.3901389-1-festevam@gmail.com/ Reported-by: Fabio Estevam Reported-by: Steffen Bätz Signed-off-by: Vladimir Oltean Tested-by: Fabio Estevam Signed-off-by: David S. Miller --- net/dsa/dsa_priv.h | 1 + net/dsa/master.c | 3 +-- net/dsa/port.c | 16 ++++++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 6e65c7ffd6f3..71e9707d11d4 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -210,6 +210,7 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev, extern struct rtnl_link_ops dsa_link_ops __read_mostly; /* port.c */ +bool dsa_port_supports_hwtstamp(struct dsa_port *dp, struct ifreq *ifr); void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, const struct dsa_device_ops *tag_ops); int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age); diff --git a/net/dsa/master.c b/net/dsa/master.c index 40367ab41cf8..421de166515f 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -204,8 +204,7 @@ static int dsa_master_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) * switch in the tree that is PTP capable. */ list_for_each_entry(dp, &dst->ports, list) - if (dp->ds->ops->port_hwtstamp_get || - dp->ds->ops->port_hwtstamp_set) + if (dsa_port_supports_hwtstamp(dp, ifr)) return -EBUSY; break; } diff --git a/net/dsa/port.c b/net/dsa/port.c index 208168276995..750fe68d9b2a 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -110,6 +110,22 @@ static bool dsa_port_can_configure_learning(struct dsa_port *dp) return !err; } +bool dsa_port_supports_hwtstamp(struct dsa_port *dp, struct ifreq *ifr) +{ + struct dsa_switch *ds = dp->ds; + int err; + + if (!ds->ops->port_hwtstamp_get || !ds->ops->port_hwtstamp_set) + return false; + + /* "See through" shim implementations of the "get" method. + * This will clobber the ifreq structure, but we will either return an + * error, or the master will overwrite it with proper values. + */ + err = ds->ops->port_hwtstamp_get(ds, dp->index, ifr); + return err != -EOPNOTSUPP; +} + int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age) { struct dsa_switch *ds = dp->ds; From 30f5312d2c722107364f266cfa98ef4f0857c1fb Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Fri, 11 Nov 2022 18:03:27 +0100 Subject: [PATCH 31/51] mlxsw: Avoid warnings when not offloaded FDB entry with IPv6 is removed FDB entries that perform VXLAN encapsulation with an IPv6 underlay hold a reference on a resource - the KVDL entry where the IPv6 underlay destination IP is stored. For that, the driver maintains two hash tables: 1. Maps IPv6 to KVDL index 2. Maps {MAC, FID index} to IPv6 address When a FDB entry is removed, the second table is used to find the relevant IPv6 address and the first table is used to remove the reference count and free the index if is not used anymore. In order for a packet to be forwarded to a single remote VTEP, FDB entries need to be configured at both the bridge and VXLAN devices' FDB tables. Both entries are squashed into one {MAC, VLAN/VNI} -> IP entry in the hardware. Therefore, in case one entry is removed, the entry will be removed from the hardware and the remaining entry will be unmarked with 'offload' flag since it is not offloaded anymore. For example, the two FDB entries should be added to allow packets to be forwarded via vx10: $ bridge fdb add dev vx10 aa:bb:cc:dd:ee:ff self static dst 2001:db8:5::1 $ bridge fdb add dev vx10 aa:bb:cc:dd:ee:ff master static vlan 10 When one entry will be removed, the second one will not be offloaded anymore. When the first entry (in VXLAN FDB) will be removed / will not be offloaded anymore, the two mappings in IPv6 hash tables will be removed. In case that the second entry is removed before the first one, unexpected warnings[1][2] will be shown in user space as a result of removing the first entry. The issue is that not offloaded entry is removed, the driver tries to search the relevant entries in the hash tables, does not find them and therefore warns. Do not handle removing of not offloaded VXLAN FDB entries, as they were already removed when the offload flag was removed. [1]: WARNING: CPU: 1 PID: 239 at drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c:914 mlxsw_sp_nve_ipv6_addr_map_del+0x6b/0x80 [mlxsw_spectrum] ... Hardware name: Mellanox Technologies Ltd. Mellanox switch/Mellanox switch, BIOS 4.6.5 05/21/2015 Workqueue: mlxsw_core_ordered mlxsw_sp_switchdev_vxlan_fdb_event_work [mlxsw_spectrum] RIP: 0010:mlxsw_sp_nve_ipv6_addr_map_del+0x6b/0x80 [mlxsw_spectrum] ... Call Trace: mlxsw_sp_port_fdb_tunnel_uc_op+0x6cf/0x7b0 [mlxsw_spectrum] mlxsw_sp_switchdev_vxlan_fdb_event_work+0x17c/0x420 [mlxsw_spectrum] ? finish_task_switch.isra.0+0x8c/0x290 process_one_work+0x1cd/0x390 worker_thread+0x48/0x3c0 ? process_one_work+0x390/0x390 kthread+0xe0/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 [2]: WARNING: CPU: 0 PID: 239 at drivers/net/ethernet/mellanox/mlxsw/spectrum.c:3035 mlxsw_sp_ipv6_addr_put+0x142/0x220 [mlxsw_spectrum] ... Hardware name: Mellanox Technologies Ltd. Mellanox switch/Mellanox switch, BIOS 4.6.5 05/21/2015 Workqueue: mlxsw_core_ordered mlxsw_sp_switchdev_vxlan_fdb_event_work [mlxsw_spectrum] RIP: 0010:mlxsw_sp_ipv6_addr_put+0x142/0x220 [mlxsw_spectrum] ... Call Trace: ? mlxsw_sp_port_fdb_tun_uc_op6_sfd_write+0x5c1/0x610 [mlxsw_spectrum] mlxsw_sp_port_fdb_tunnel_uc_op+0x6ec/0x7b0 [mlxsw_spectrum] mlxsw_sp_switchdev_vxlan_fdb_event_work+0x17c/0x420 [mlxsw_spectrum] ? finish_task_switch.isra.0+0x8c/0x290 process_one_work+0x1cd/0x390 worker_thread+0x48/0x3c0 ? process_one_work+0x390/0x390 kthread+0xe0/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 Fixes: 0860c7641634 ("mlxsw: spectrum_nve: Keep track of IPv6 addresses used by FDB entries") Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/c186de8cbd28e3eb661e06f31f7f2f2dff30020f.1668184350.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 4efccd942fb8..1290b2d3eae6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -3470,6 +3470,8 @@ mlxsw_sp_switchdev_vxlan_fdb_del(struct mlxsw_sp *mlxsw_sp, u16 vid; vxlan_fdb_info = &switchdev_work->vxlan_fdb_info; + if (!vxlan_fdb_info->offloaded) + return; bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); if (!bridge_device) From 280c0f7cd0aa4d190619b18243110e052a90775c Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Sun, 13 Nov 2022 09:29:29 +0000 Subject: [PATCH 32/51] net: ionic: Fix error handling in ionic_init_module() A problem about ionic create debugfs failed is triggered with the following log given: [ 415.799514] debugfs: Directory 'ionic' with parent '/' already present! The reason is that ionic_init_module() returns ionic_bus_register_driver() directly without checking its return value, if ionic_bus_register_driver() failed, it returns without destroy the newly created debugfs, resulting the debugfs of ionic can never be created later. ionic_init_module() ionic_debugfs_create() # create debugfs directory ionic_bus_register_driver() pci_register_driver() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without destroy debugfs directory Fix by removing debugfs when ionic_bus_register_driver() returns error. Fixes: fbfb8031533c ("ionic: Add hardware init and device commands") Signed-off-by: Yuan Can Acked-by: Shannon Nelson Link: https://lore.kernel.org/r/20221113092929.19161-1-yuancan@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index 56f93b030551..5456c2b15d9b 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -687,8 +687,14 @@ int ionic_port_reset(struct ionic *ionic) static int __init ionic_init_module(void) { + int ret; + ionic_debugfs_create(); - return ionic_bus_register_driver(); + ret = ionic_bus_register_driver(); + if (ret) + ionic_debugfs_destroy(); + + return ret; } static void __exit ionic_cleanup_module(void) From 5121197ecc5db58c07da95eb1ff82b98b121a221 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 13 Nov 2022 16:51:19 -0800 Subject: [PATCH 33/51] kcm: close race conditions on sk_receive_queue sk->sk_receive_queue is protected by skb queue lock, but for KCM sockets its RX path takes mux->rx_lock to protect more than just skb queue. However, kcm_recvmsg() still only grabs the skb queue lock, so race conditions still exist. We can teach kcm_recvmsg() to grab mux->rx_lock too but this would introduce a potential performance regression as struct kcm_mux can be shared by multiple KCM sockets. So we have to enforce skb queue lock in requeue_rx_msgs() and handle skb peek case carefully in kcm_wait_data(). Fortunately, skb_recv_datagram() already handles it nicely and is widely used by other sockets, we can just switch to skb_recv_datagram() after getting rid of the unnecessary sock lock in kcm_recvmsg() and kcm_splice_read(). Side note: SOCK_DONE is not used by KCM sockets, so it is safe to get rid of this check too. I ran the original syzbot reproducer for 30 min without seeing any issue. Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Reported-by: syzbot+278279efdd2730dd14bf@syzkaller.appspotmail.com Reported-by: shaozhengchao Cc: Paolo Abeni Cc: Tom Herbert Signed-off-by: Cong Wang Link: https://lore.kernel.org/r/20221114005119.597905-1-xiyou.wangcong@gmail.com Signed-off-by: Paolo Abeni --- net/kcm/kcmsock.c | 58 +++++------------------------------------------ 1 file changed, 6 insertions(+), 52 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index a5004228111d..890a2423f559 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -222,7 +222,7 @@ static void requeue_rx_msgs(struct kcm_mux *mux, struct sk_buff_head *head) struct sk_buff *skb; struct kcm_sock *kcm; - while ((skb = __skb_dequeue(head))) { + while ((skb = skb_dequeue(head))) { /* Reset destructor to avoid calling kcm_rcv_ready */ skb->destructor = sock_rfree; skb_orphan(skb); @@ -1085,53 +1085,17 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) return err; } -static struct sk_buff *kcm_wait_data(struct sock *sk, int flags, - long timeo, int *err) -{ - struct sk_buff *skb; - - while (!(skb = skb_peek(&sk->sk_receive_queue))) { - if (sk->sk_err) { - *err = sock_error(sk); - return NULL; - } - - if (sock_flag(sk, SOCK_DONE)) - return NULL; - - if ((flags & MSG_DONTWAIT) || !timeo) { - *err = -EAGAIN; - return NULL; - } - - sk_wait_data(sk, &timeo, NULL); - - /* Handle signals */ - if (signal_pending(current)) { - *err = sock_intr_errno(timeo); - return NULL; - } - } - - return skb; -} - static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; struct kcm_sock *kcm = kcm_sk(sk); int err = 0; - long timeo; struct strp_msg *stm; int copied = 0; struct sk_buff *skb; - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - - lock_sock(sk); - - skb = kcm_wait_data(sk, flags, timeo, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; @@ -1162,14 +1126,11 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, /* Finished with message */ msg->msg_flags |= MSG_EOR; KCM_STATS_INCR(kcm->stats.rx_msgs); - skb_unlink(skb, &sk->sk_receive_queue); - kfree_skb(skb); } } out: - release_sock(sk); - + skb_free_datagram(sk, skb); return copied ? : err; } @@ -1179,7 +1140,6 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, { struct sock *sk = sock->sk; struct kcm_sock *kcm = kcm_sk(sk); - long timeo; struct strp_msg *stm; int err = 0; ssize_t copied; @@ -1187,11 +1147,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, /* Only support splice for SOCKSEQPACKET */ - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - - lock_sock(sk); - - skb = kcm_wait_data(sk, flags, timeo, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto err_out; @@ -1219,13 +1175,11 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, * finish reading the message. */ - release_sock(sk); - + skb_free_datagram(sk, skb); return copied; err_out: - release_sock(sk); - + skb_free_datagram(sk, skb); return err; } From d349e9be5a2c2d7588a2c4e4bfa0bb3dc1226769 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Mon, 14 Nov 2022 02:56:59 +0000 Subject: [PATCH 34/51] net: ena: Fix error handling in ena_init() The ena_init() won't destroy workqueue created by create_singlethread_workqueue() when pci_register_driver() failed. Call destroy_workqueue() when pci_register_driver() failed to prevent the resource leak. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Yuan Can Acked-by: Shay Agroskin Link: https://lore.kernel.org/r/20221114025659.124726-1-yuancan@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index d350eeec8bad..5a454b58498f 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -4543,13 +4543,19 @@ static struct pci_driver ena_pci_driver = { static int __init ena_init(void) { + int ret; + ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME); if (!ena_wq) { pr_err("Failed to create workqueue\n"); return -ENOMEM; } - return pci_register_driver(&ena_pci_driver); + ret = pci_register_driver(&ena_pci_driver); + if (ret) + destroy_workqueue(ena_wq); + + return ret; } static void __exit ena_cleanup(void) From 18c532e44939caa17f1fa380f7ac50dbc0718dbb Mon Sep 17 00:00:00 2001 From: Aminuddin Jamaluddin Date: Mon, 14 Nov 2022 14:53:02 +0800 Subject: [PATCH 35/51] net: phy: marvell: add sleep time after enabling the loopback bit Sleep time is added to ensure the phy to be ready after loopback bit was set. This to prevent the phy loopback test from failing. Fixes: 020a45aff119 ("net: phy: marvell: add Marvell specific PHY loopback") Cc: # 5.15.x Signed-off-by: Muhammad Husaini Zulkifli Signed-off-by: Aminuddin Jamaluddin Link: https://lore.kernel.org/r/20221114065302.10625-1-aminuddin.jamaluddin@intel.com Signed-off-by: Paolo Abeni --- drivers/net/phy/marvell.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 2810f4f9da0c..0d706ee266af 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -2015,14 +2015,16 @@ static int m88e1510_loopback(struct phy_device *phydev, bool enable) if (err < 0) return err; - /* FIXME: Based on trial and error test, it seem 1G need to have - * delay between soft reset and loopback enablement. - */ - if (phydev->speed == SPEED_1000) - msleep(1000); + err = phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, + BMCR_LOOPBACK); - return phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, - BMCR_LOOPBACK); + if (!err) { + /* It takes some time for PHY device to switch + * into/out-of loopback mode. + */ + msleep(1000); + } + return err; } else { err = phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, 0); if (err < 0) From a56cad694767ebdb7d80f27ffc239db46fff64de Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Mon, 14 Nov 2022 16:20:46 +0800 Subject: [PATCH 36/51] net: hns3: fix incorrect hw rss hash type of rx packet Currently, the HNS3 driver reports the rss hash type of each packet based on the rss hash tuples set. It always reports PKT_HASH_TYPE_L4, without checking the type of current packet. It's incorrect. Fixes it by reporting it base on the packet type. Fixes: 796640778c26 ("net: hns3: support RXD advanced layout") Fixes: 232fc64b6e62 ("net: hns3: Add HW RSS hash information to RX skb") Fixes: ea4858670717 ("net: hns3: handle the BD info on the last BD of the packet") Signed-off-by: Jian Shen Signed-off-by: Hao Lan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 - .../hns3/hns3_common/hclge_comm_rss.c | 20 --- .../hns3/hns3_common/hclge_comm_rss.h | 2 - .../net/ethernet/hisilicon/hns3/hns3_enet.c | 163 ++++++++++-------- .../net/ethernet/hisilicon/hns3/hns3_enet.h | 1 + .../hisilicon/hns3/hns3pf/hclge_main.c | 1 - 6 files changed, 94 insertions(+), 94 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 0179fc288f5f..17137de9338c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -819,7 +819,6 @@ struct hnae3_knic_private_info { const struct hnae3_dcb_ops *dcb_ops; u16 int_rl_setting; - enum pkt_hash_types rss_type; void __iomem *io_base; }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.c index e23729ac3bb8..ae2736549526 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.c @@ -191,23 +191,6 @@ u32 hclge_comm_get_rss_key_size(struct hnae3_handle *handle) return HCLGE_COMM_RSS_KEY_SIZE; } -void hclge_comm_get_rss_type(struct hnae3_handle *nic, - struct hclge_comm_rss_tuple_cfg *rss_tuple_sets) -{ - if (rss_tuple_sets->ipv4_tcp_en || - rss_tuple_sets->ipv4_udp_en || - rss_tuple_sets->ipv4_sctp_en || - rss_tuple_sets->ipv6_tcp_en || - rss_tuple_sets->ipv6_udp_en || - rss_tuple_sets->ipv6_sctp_en) - nic->kinfo.rss_type = PKT_HASH_TYPE_L4; - else if (rss_tuple_sets->ipv4_fragment_en || - rss_tuple_sets->ipv6_fragment_en) - nic->kinfo.rss_type = PKT_HASH_TYPE_L3; - else - nic->kinfo.rss_type = PKT_HASH_TYPE_NONE; -} - int hclge_comm_parse_rss_hfunc(struct hclge_comm_rss_cfg *rss_cfg, const u8 hfunc, u8 *hash_algo) { @@ -344,9 +327,6 @@ int hclge_comm_set_rss_input_tuple(struct hnae3_handle *nic, req->ipv6_sctp_en = rss_cfg->rss_tuple_sets.ipv6_sctp_en; req->ipv6_fragment_en = rss_cfg->rss_tuple_sets.ipv6_fragment_en; - if (is_pf) - hclge_comm_get_rss_type(nic, &rss_cfg->rss_tuple_sets); - ret = hclge_comm_cmd_send(hw, &desc, 1); if (ret) dev_err(&hw->cmq.csq.pdev->dev, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.h b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.h index 946d166a452d..92af3d2980d3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.h @@ -95,8 +95,6 @@ struct hclge_comm_rss_tc_mode_cmd { }; u32 hclge_comm_get_rss_key_size(struct hnae3_handle *handle); -void hclge_comm_get_rss_type(struct hnae3_handle *nic, - struct hclge_comm_rss_tuple_cfg *rss_tuple_sets); void hclge_comm_rss_indir_init_cfg(struct hnae3_ae_dev *ae_dev, struct hclge_comm_rss_cfg *rss_cfg); int hclge_comm_get_rss_tuple(struct hclge_comm_rss_cfg *rss_cfg, int flow_type, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 4cb2421e71a7..7fc83409f257 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -105,26 +105,28 @@ static const struct pci_device_id hns3_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, hns3_pci_tbl); -#define HNS3_RX_PTYPE_ENTRY(ptype, l, s, t) \ +#define HNS3_RX_PTYPE_ENTRY(ptype, l, s, t, h) \ { ptype, \ l, \ CHECKSUM_##s, \ HNS3_L3_TYPE_##t, \ - 1 } + 1, \ + h} #define HNS3_RX_PTYPE_UNUSED_ENTRY(ptype) \ - { ptype, 0, CHECKSUM_NONE, HNS3_L3_TYPE_PARSE_FAIL, 0 } + { ptype, 0, CHECKSUM_NONE, HNS3_L3_TYPE_PARSE_FAIL, 0, \ + PKT_HASH_TYPE_NONE } static const struct hns3_rx_ptype hns3_rx_ptype_tbl[] = { HNS3_RX_PTYPE_UNUSED_ENTRY(0), - HNS3_RX_PTYPE_ENTRY(1, 0, COMPLETE, ARP), - HNS3_RX_PTYPE_ENTRY(2, 0, COMPLETE, RARP), - HNS3_RX_PTYPE_ENTRY(3, 0, COMPLETE, LLDP), - HNS3_RX_PTYPE_ENTRY(4, 0, COMPLETE, PARSE_FAIL), - HNS3_RX_PTYPE_ENTRY(5, 0, COMPLETE, PARSE_FAIL), - HNS3_RX_PTYPE_ENTRY(6, 0, COMPLETE, PARSE_FAIL), - HNS3_RX_PTYPE_ENTRY(7, 0, COMPLETE, CNM), - HNS3_RX_PTYPE_ENTRY(8, 0, NONE, PARSE_FAIL), + HNS3_RX_PTYPE_ENTRY(1, 0, COMPLETE, ARP, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(2, 0, COMPLETE, RARP, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(3, 0, COMPLETE, LLDP, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(4, 0, COMPLETE, PARSE_FAIL, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(5, 0, COMPLETE, PARSE_FAIL, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(6, 0, COMPLETE, PARSE_FAIL, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(7, 0, COMPLETE, CNM, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(8, 0, NONE, PARSE_FAIL, PKT_HASH_TYPE_NONE), HNS3_RX_PTYPE_UNUSED_ENTRY(9), HNS3_RX_PTYPE_UNUSED_ENTRY(10), HNS3_RX_PTYPE_UNUSED_ENTRY(11), @@ -132,36 +134,36 @@ static const struct hns3_rx_ptype hns3_rx_ptype_tbl[] = { HNS3_RX_PTYPE_UNUSED_ENTRY(13), HNS3_RX_PTYPE_UNUSED_ENTRY(14), HNS3_RX_PTYPE_UNUSED_ENTRY(15), - HNS3_RX_PTYPE_ENTRY(16, 0, COMPLETE, PARSE_FAIL), - HNS3_RX_PTYPE_ENTRY(17, 0, COMPLETE, IPV4), - HNS3_RX_PTYPE_ENTRY(18, 0, COMPLETE, IPV4), - HNS3_RX_PTYPE_ENTRY(19, 0, UNNECESSARY, IPV4), - HNS3_RX_PTYPE_ENTRY(20, 0, UNNECESSARY, IPV4), - HNS3_RX_PTYPE_ENTRY(21, 0, NONE, IPV4), - HNS3_RX_PTYPE_ENTRY(22, 0, UNNECESSARY, IPV4), - HNS3_RX_PTYPE_ENTRY(23, 0, NONE, IPV4), - HNS3_RX_PTYPE_ENTRY(24, 0, NONE, IPV4), - HNS3_RX_PTYPE_ENTRY(25, 0, UNNECESSARY, IPV4), + HNS3_RX_PTYPE_ENTRY(16, 0, COMPLETE, PARSE_FAIL, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(17, 0, COMPLETE, IPV4, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(18, 0, COMPLETE, IPV4, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(19, 0, UNNECESSARY, IPV4, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(20, 0, UNNECESSARY, IPV4, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(21, 0, NONE, IPV4, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(22, 0, UNNECESSARY, IPV4, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(23, 0, NONE, IPV4, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(24, 0, NONE, IPV4, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(25, 0, UNNECESSARY, IPV4, PKT_HASH_TYPE_L4), HNS3_RX_PTYPE_UNUSED_ENTRY(26), HNS3_RX_PTYPE_UNUSED_ENTRY(27), HNS3_RX_PTYPE_UNUSED_ENTRY(28), - HNS3_RX_PTYPE_ENTRY(29, 0, COMPLETE, PARSE_FAIL), - HNS3_RX_PTYPE_ENTRY(30, 0, COMPLETE, PARSE_FAIL), - HNS3_RX_PTYPE_ENTRY(31, 0, COMPLETE, IPV4), - HNS3_RX_PTYPE_ENTRY(32, 0, COMPLETE, IPV4), - HNS3_RX_PTYPE_ENTRY(33, 1, UNNECESSARY, IPV4), - HNS3_RX_PTYPE_ENTRY(34, 1, UNNECESSARY, IPV4), - HNS3_RX_PTYPE_ENTRY(35, 1, UNNECESSARY, IPV4), - HNS3_RX_PTYPE_ENTRY(36, 0, COMPLETE, IPV4), - HNS3_RX_PTYPE_ENTRY(37, 0, COMPLETE, IPV4), + HNS3_RX_PTYPE_ENTRY(29, 0, COMPLETE, PARSE_FAIL, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(30, 0, COMPLETE, PARSE_FAIL, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(31, 0, COMPLETE, IPV4, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(32, 0, COMPLETE, IPV4, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(33, 1, UNNECESSARY, IPV4, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(34, 1, UNNECESSARY, IPV4, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(35, 1, UNNECESSARY, IPV4, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(36, 0, COMPLETE, IPV4, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(37, 0, COMPLETE, IPV4, PKT_HASH_TYPE_L3), HNS3_RX_PTYPE_UNUSED_ENTRY(38), - HNS3_RX_PTYPE_ENTRY(39, 0, COMPLETE, IPV6), - HNS3_RX_PTYPE_ENTRY(40, 0, COMPLETE, IPV6), - HNS3_RX_PTYPE_ENTRY(41, 1, UNNECESSARY, IPV6), - HNS3_RX_PTYPE_ENTRY(42, 1, UNNECESSARY, IPV6), - HNS3_RX_PTYPE_ENTRY(43, 1, UNNECESSARY, IPV6), - HNS3_RX_PTYPE_ENTRY(44, 0, COMPLETE, IPV6), - HNS3_RX_PTYPE_ENTRY(45, 0, COMPLETE, IPV6), + HNS3_RX_PTYPE_ENTRY(39, 0, COMPLETE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(40, 0, COMPLETE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(41, 1, UNNECESSARY, IPV6, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(42, 1, UNNECESSARY, IPV6, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(43, 1, UNNECESSARY, IPV6, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(44, 0, COMPLETE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(45, 0, COMPLETE, IPV6, PKT_HASH_TYPE_L3), HNS3_RX_PTYPE_UNUSED_ENTRY(46), HNS3_RX_PTYPE_UNUSED_ENTRY(47), HNS3_RX_PTYPE_UNUSED_ENTRY(48), @@ -227,35 +229,35 @@ static const struct hns3_rx_ptype hns3_rx_ptype_tbl[] = { HNS3_RX_PTYPE_UNUSED_ENTRY(108), HNS3_RX_PTYPE_UNUSED_ENTRY(109), HNS3_RX_PTYPE_UNUSED_ENTRY(110), - HNS3_RX_PTYPE_ENTRY(111, 0, COMPLETE, IPV6), - HNS3_RX_PTYPE_ENTRY(112, 0, COMPLETE, IPV6), - HNS3_RX_PTYPE_ENTRY(113, 0, UNNECESSARY, IPV6), - HNS3_RX_PTYPE_ENTRY(114, 0, UNNECESSARY, IPV6), - HNS3_RX_PTYPE_ENTRY(115, 0, NONE, IPV6), - HNS3_RX_PTYPE_ENTRY(116, 0, UNNECESSARY, IPV6), - HNS3_RX_PTYPE_ENTRY(117, 0, NONE, IPV6), - HNS3_RX_PTYPE_ENTRY(118, 0, NONE, IPV6), - HNS3_RX_PTYPE_ENTRY(119, 0, UNNECESSARY, IPV6), + HNS3_RX_PTYPE_ENTRY(111, 0, COMPLETE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(112, 0, COMPLETE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(113, 0, UNNECESSARY, IPV6, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(114, 0, UNNECESSARY, IPV6, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(115, 0, NONE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(116, 0, UNNECESSARY, IPV6, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(117, 0, NONE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(118, 0, NONE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(119, 0, UNNECESSARY, IPV6, PKT_HASH_TYPE_L4), HNS3_RX_PTYPE_UNUSED_ENTRY(120), HNS3_RX_PTYPE_UNUSED_ENTRY(121), HNS3_RX_PTYPE_UNUSED_ENTRY(122), - HNS3_RX_PTYPE_ENTRY(123, 0, COMPLETE, PARSE_FAIL), - HNS3_RX_PTYPE_ENTRY(124, 0, COMPLETE, PARSE_FAIL), - HNS3_RX_PTYPE_ENTRY(125, 0, COMPLETE, IPV4), - HNS3_RX_PTYPE_ENTRY(126, 0, COMPLETE, IPV4), - HNS3_RX_PTYPE_ENTRY(127, 1, UNNECESSARY, IPV4), - HNS3_RX_PTYPE_ENTRY(128, 1, UNNECESSARY, IPV4), - HNS3_RX_PTYPE_ENTRY(129, 1, UNNECESSARY, IPV4), - HNS3_RX_PTYPE_ENTRY(130, 0, COMPLETE, IPV4), - HNS3_RX_PTYPE_ENTRY(131, 0, COMPLETE, IPV4), + HNS3_RX_PTYPE_ENTRY(123, 0, COMPLETE, PARSE_FAIL, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(124, 0, COMPLETE, PARSE_FAIL, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(125, 0, COMPLETE, IPV4, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(126, 0, COMPLETE, IPV4, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(127, 1, UNNECESSARY, IPV4, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(128, 1, UNNECESSARY, IPV4, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(129, 1, UNNECESSARY, IPV4, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(130, 0, COMPLETE, IPV4, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(131, 0, COMPLETE, IPV4, PKT_HASH_TYPE_L3), HNS3_RX_PTYPE_UNUSED_ENTRY(132), - HNS3_RX_PTYPE_ENTRY(133, 0, COMPLETE, IPV6), - HNS3_RX_PTYPE_ENTRY(134, 0, COMPLETE, IPV6), - HNS3_RX_PTYPE_ENTRY(135, 1, UNNECESSARY, IPV6), - HNS3_RX_PTYPE_ENTRY(136, 1, UNNECESSARY, IPV6), - HNS3_RX_PTYPE_ENTRY(137, 1, UNNECESSARY, IPV6), - HNS3_RX_PTYPE_ENTRY(138, 0, COMPLETE, IPV6), - HNS3_RX_PTYPE_ENTRY(139, 0, COMPLETE, IPV6), + HNS3_RX_PTYPE_ENTRY(133, 0, COMPLETE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(134, 0, COMPLETE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(135, 1, UNNECESSARY, IPV6, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(136, 1, UNNECESSARY, IPV6, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(137, 1, UNNECESSARY, IPV6, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(138, 0, COMPLETE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(139, 0, COMPLETE, IPV6, PKT_HASH_TYPE_L3), HNS3_RX_PTYPE_UNUSED_ENTRY(140), HNS3_RX_PTYPE_UNUSED_ENTRY(141), HNS3_RX_PTYPE_UNUSED_ENTRY(142), @@ -4171,15 +4173,35 @@ static int hns3_set_gro_and_checksum(struct hns3_enet_ring *ring, } static void hns3_set_rx_skb_rss_type(struct hns3_enet_ring *ring, - struct sk_buff *skb, u32 rss_hash) + struct sk_buff *skb, u32 rss_hash, + u32 l234info, u32 ol_info) { - struct hnae3_handle *handle = ring->tqp->handle; - enum pkt_hash_types rss_type; + enum pkt_hash_types rss_type = PKT_HASH_TYPE_NONE; + struct net_device *netdev = ring_to_netdev(ring); + struct hns3_nic_priv *priv = netdev_priv(netdev); - if (rss_hash) - rss_type = handle->kinfo.rss_type; - else - rss_type = PKT_HASH_TYPE_NONE; + if (test_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state)) { + u32 ptype = hnae3_get_field(ol_info, HNS3_RXD_PTYPE_M, + HNS3_RXD_PTYPE_S); + + rss_type = hns3_rx_ptype_tbl[ptype].hash_type; + } else { + int l3_type = hnae3_get_field(l234info, HNS3_RXD_L3ID_M, + HNS3_RXD_L3ID_S); + int l4_type = hnae3_get_field(l234info, HNS3_RXD_L4ID_M, + HNS3_RXD_L4ID_S); + + if (l3_type == HNS3_L3_TYPE_IPV4 || + l3_type == HNS3_L3_TYPE_IPV6) { + if (l4_type == HNS3_L4_TYPE_UDP || + l4_type == HNS3_L4_TYPE_TCP || + l4_type == HNS3_L4_TYPE_SCTP) + rss_type = PKT_HASH_TYPE_L4; + else if (l4_type == HNS3_L4_TYPE_IGMP || + l4_type == HNS3_L4_TYPE_ICMP) + rss_type = PKT_HASH_TYPE_L3; + } + } skb_set_hash(skb, rss_hash, rss_type); } @@ -4282,7 +4304,8 @@ static int hns3_handle_bdinfo(struct hns3_enet_ring *ring, struct sk_buff *skb) ring->tqp_vector->rx_group.total_bytes += len; - hns3_set_rx_skb_rss_type(ring, skb, le32_to_cpu(desc->rx.rss_hash)); + hns3_set_rx_skb_rss_type(ring, skb, le32_to_cpu(desc->rx.rss_hash), + l234info, ol_info); return 0; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 133a054af6b7..294a14b4fdef 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -404,6 +404,7 @@ struct hns3_rx_ptype { u32 ip_summed : 2; u32 l3_type : 4; u32 valid : 1; + u32 hash_type: 3; }; struct ring_stats { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 987271da6e9b..e34ac9c5900e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -4859,7 +4859,6 @@ static int hclge_set_rss_tuple(struct hnae3_handle *handle, return ret; } - hclge_comm_get_rss_type(&vport->nic, &hdev->rss_cfg.rss_tuple_sets); return 0; } From 29df7c695ed67a8fa32bb7805bad8fe2a76c1f88 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Mon, 14 Nov 2022 16:20:47 +0800 Subject: [PATCH 37/51] net: hns3: fix return value check bug of rx copybreak The refactoring of rx copybreak modifies the original return logic, which will make this feature unavailable. So this patch fixes the return logic of rx copybreak. Fixes: e74a726da2c4 ("net: hns3: refactor hns3_nic_reuse_page()") Fixes: 99f6b5fb5f63 ("net: hns3: use bounce buffer when rx page can not be reused") Signed-off-by: Jie Wang Signed-off-by: Hao Lan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 7fc83409f257..028577943ec5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3778,8 +3778,8 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i, desc_cb->reuse_flag = 1; } else if (frag_size <= ring->rx_copybreak) { ret = hns3_handle_rx_copybreak(skb, i, ring, pull_len, desc_cb); - if (ret) - goto out; + if (!ret) + return; } out: From 510d7b6ae842e59ee00d57e5f07ac15131b6d899 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Mon, 14 Nov 2022 16:20:48 +0800 Subject: [PATCH 38/51] net: hns3: fix setting incorrect phy link ksettings for firmware in resetting process Currently, if driver is in phy-imp(phy controlled by imp firmware) mode, as driver did not update phy link ksettings after initialization process or not update advertising when getting phy link ksettings from firmware, it may set incorrect phy link ksettings for firmware in resetting process. So fix it. Fixes: f5f2b3e4dcc0 ("net: hns3: add support for imp-controlled PHYs") Fixes: c5ef83cbb1e9 ("net: hns3: fix for phy_addr error in hclge_mac_mdio_config") Fixes: 2312e050f42b ("net: hns3: Fix for deadlock problem occurring when unregistering ae_algo") Signed-off-by: Guangbin Huang Signed-off-by: Hao Lan Signed-off-by: Paolo Abeni --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index e34ac9c5900e..4e54f91f7a6c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -3443,6 +3443,7 @@ static int hclge_update_tp_port_info(struct hclge_dev *hdev) hdev->hw.mac.autoneg = cmd.base.autoneg; hdev->hw.mac.speed = cmd.base.speed; hdev->hw.mac.duplex = cmd.base.duplex; + linkmode_copy(hdev->hw.mac.advertising, cmd.link_modes.advertising); return 0; } @@ -11586,9 +11587,12 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) if (ret) goto err_msi_irq_uninit; - if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER && - !hnae3_dev_phy_imp_supported(hdev)) { - ret = hclge_mac_mdio_config(hdev); + if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER) { + if (hnae3_dev_phy_imp_supported(hdev)) + ret = hclge_update_tp_port_info(hdev); + else + ret = hclge_mac_mdio_config(hdev); + if (ret) goto err_msi_irq_uninit; } From 9d45921ee4cb364910097e7d1b7558559c2f9fd2 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 14 Nov 2022 10:45:09 +0200 Subject: [PATCH 39/51] bridge: switchdev: Fix memory leaks when changing VLAN protocol The bridge driver can offload VLANs to the underlying hardware either via switchdev or the 8021q driver. When the former is used, the VLAN is marked in the bridge driver with the 'BR_VLFLAG_ADDED_BY_SWITCHDEV' private flag. To avoid the memory leaks mentioned in the cited commit, the bridge driver will try to delete a VLAN via the 8021q driver if the VLAN is not marked with the previously mentioned flag. When the VLAN protocol of the bridge changes, switchdev drivers are notified via the 'SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL' attribute, but the 8021q driver is also called to add the existing VLANs with the new protocol and delete them with the old protocol. In case the VLANs were offloaded via switchdev, the above behavior is both redundant and buggy. Redundant because the VLANs are already programmed in hardware and drivers that support VLAN protocol change (currently only mlx5) change the protocol upon the switchdev attribute notification. Buggy because the 8021q driver is called despite these VLANs being marked with 'BR_VLFLAG_ADDED_BY_SWITCHDEV'. This leads to memory leaks [1] when the VLANs are deleted. Fix by not calling the 8021q driver for VLANs that were already programmed via switchdev. [1] unreferenced object 0xffff8881f6771200 (size 256): comm "ip", pid 446855, jiffies 4298238841 (age 55.240s) hex dump (first 32 bytes): 00 00 7f 0e 83 88 ff ff 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000012819ac>] vlan_vid_add+0x437/0x750 [<00000000f2281fad>] __br_vlan_set_proto+0x289/0x920 [<000000000632b56f>] br_changelink+0x3d6/0x13f0 [<0000000089d25f04>] __rtnl_newlink+0x8ae/0x14c0 [<00000000f6276baf>] rtnl_newlink+0x5f/0x90 [<00000000746dc902>] rtnetlink_rcv_msg+0x336/0xa00 [<000000001c2241c0>] netlink_rcv_skb+0x11d/0x340 [<0000000010588814>] netlink_unicast+0x438/0x710 [<00000000e1a4cd5c>] netlink_sendmsg+0x788/0xc40 [<00000000e8992d4e>] sock_sendmsg+0xb0/0xe0 [<00000000621b8f91>] ____sys_sendmsg+0x4ff/0x6d0 [<000000000ea26996>] ___sys_sendmsg+0x12e/0x1b0 [<00000000684f7e25>] __sys_sendmsg+0xab/0x130 [<000000004538b104>] do_syscall_64+0x3d/0x90 [<0000000091ed9678>] entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fixes: 279737939a81 ("net: bridge: Fix VLANs memory leak") Reported-by: Vlad Buslov Tested-by: Vlad Buslov Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20221114084509.860831-1-idosch@nvidia.com Signed-off-by: Paolo Abeni --- net/bridge/br_vlan.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 6e53dc991409..9ffd40b8270c 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -959,6 +959,8 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto, list_for_each_entry(p, &br->port_list, list) { vg = nbp_vlan_group(p); list_for_each_entry(vlan, &vg->vlan_list, vlist) { + if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) + continue; err = vlan_vid_add(p->dev, proto, vlan->vid); if (err) goto err_filt; @@ -973,8 +975,11 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto, /* Delete VLANs for the old proto from the device filter. */ list_for_each_entry(p, &br->port_list, list) { vg = nbp_vlan_group(p); - list_for_each_entry(vlan, &vg->vlan_list, vlist) + list_for_each_entry(vlan, &vg->vlan_list, vlist) { + if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) + continue; vlan_vid_del(p->dev, oldproto, vlan->vid); + } } return 0; @@ -983,13 +988,19 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto, attr.u.vlan_protocol = ntohs(oldproto); switchdev_port_attr_set(br->dev, &attr, NULL); - list_for_each_entry_continue_reverse(vlan, &vg->vlan_list, vlist) + list_for_each_entry_continue_reverse(vlan, &vg->vlan_list, vlist) { + if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) + continue; vlan_vid_del(p->dev, proto, vlan->vid); + } list_for_each_entry_continue_reverse(p, &br->port_list, list) { vg = nbp_vlan_group(p); - list_for_each_entry(vlan, &vg->vlan_list, vlist) + list_for_each_entry(vlan, &vg->vlan_list, vlist) { + if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) + continue; vlan_vid_del(p->dev, proto, vlan->vid); + } } return err; From c9b895c6878bdb6789dc1d7af60fd10f4a9f1937 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Mon, 14 Nov 2022 17:55:49 +0800 Subject: [PATCH 40/51] net: ag71xx: call phylink_disconnect_phy if ag71xx_hw_enable() fail in ag71xx_open() If ag71xx_hw_enable() fails, call phylink_disconnect_phy() to clean up. And if phylink_of_phy_connect() fails, nothing needs to be done. Compile tested only. Fixes: 892e09153fa3 ("net: ag71xx: port to phylink") Signed-off-by: Liu Jian Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20221114095549.40342-1-liujian56@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/atheros/ag71xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index cc932b3cf873..4a1efe9b37d0 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -1427,7 +1427,7 @@ static int ag71xx_open(struct net_device *ndev) if (ret) { netif_err(ag, link, ndev, "phylink_of_phy_connect filed with err: %i\n", ret); - goto err; + return ret; } max_frame_len = ag71xx_max_frame_len(ndev->mtu); @@ -1448,6 +1448,7 @@ static int ag71xx_open(struct net_device *ndev) err: ag71xx_rings_cleanup(ag); + phylink_disconnect_phy(ag->phylink); return ret; } From 2929cceb2fcf0ded7182562e4888afafece82cce Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 14 Nov 2022 11:05:19 +0000 Subject: [PATCH 41/51] net/x25: Fix skb leak in x25_lapb_receive_frame() x25_lapb_receive_frame() using skb_copy() to get a private copy of skb, the new skb should be freed in the undersized/fragmented skb error handling path. Otherwise there is a memory leak. Fixes: cb101ed2c3c7 ("x25: Handle undersized/fragmented skbs") Signed-off-by: Wei Yongjun Acked-by: Martin Schiller Link: https://lore.kernel.org/r/20221114110519.514538-1-weiyongjun@huaweicloud.com Signed-off-by: Jakub Kicinski --- net/x25/x25_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 5259ef8f5242..748d8630ab58 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -117,7 +117,7 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, if (!pskb_may_pull(skb, 1)) { x25_neigh_put(nb); - return 0; + goto drop; } switch (skb->data[0]) { From 4e0c19fcb8b5323716140fa82b79aa9f60e60407 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 14 Nov 2022 16:35:51 +0200 Subject: [PATCH 42/51] net: dsa: don't leak tagger-owned storage on switch driver unbind In the initial commit dc452a471dba ("net: dsa: introduce tagger-owned storage for private and shared data"), we had a call to tag_ops->disconnect(dst) issued from dsa_tree_free(), which is called at tree teardown time. There were problems with connecting to a switch tree as a whole, so this got reworked to connecting to individual switches within the tree. In this process, tag_ops->disconnect(ds) was made to be called only from switch.c (cross-chip notifiers emitted as a result of dynamic tag proto changes), but the normal driver teardown code path wasn't replaced with anything. Solve this problem by adding a function that does the opposite of dsa_switch_setup_tag_protocol(), which is called from the equivalent spot in dsa_switch_teardown(). The positioning here also ensures that we won't have any use-after-free in tagging protocol (*rcv) ops, since the teardown sequence is as follows: dsa_tree_teardown -> dsa_tree_teardown_master -> dsa_master_teardown -> unsets master->dsa_ptr, making no further packets match the ETH_P_XDSA packet type handler -> dsa_tree_teardown_ports -> dsa_port_teardown -> dsa_slave_destroy -> unregisters DSA net devices, there is even a synchronize_net() in unregister_netdevice_many() -> dsa_tree_teardown_switches -> dsa_switch_teardown -> dsa_switch_teardown_tag_protocol -> finally frees the tagger-owned storage Fixes: 7f2973149c22 ("net: dsa: make tagging protocols connect to individual switches from a tree") Signed-off-by: Vladimir Oltean Reviewed-by: Saeed Mahameed Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20221114143551.1906361-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/dsa2.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index e504a18fc125..5417f7b1187c 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -864,6 +864,14 @@ static int dsa_switch_setup_tag_protocol(struct dsa_switch *ds) return err; } +static void dsa_switch_teardown_tag_protocol(struct dsa_switch *ds) +{ + const struct dsa_device_ops *tag_ops = ds->dst->tag_ops; + + if (tag_ops->disconnect) + tag_ops->disconnect(ds); +} + static int dsa_switch_setup(struct dsa_switch *ds) { struct dsa_devlink_priv *dl_priv; @@ -953,6 +961,8 @@ static void dsa_switch_teardown(struct dsa_switch *ds) ds->slave_mii_bus = NULL; } + dsa_switch_teardown_tag_protocol(ds); + if (ds->ops->teardown) ds->ops->teardown(ds); From ba86af3733aece88dbcee0dfebf7e2dcfefb2be4 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Mon, 14 Nov 2022 21:38:52 +0800 Subject: [PATCH 43/51] net: lan966x: Fix potential null-ptr-deref in lan966x_stats_init() lan966x_stats_init() calls create_singlethread_workqueue() and not checked the ret value, which may return NULL. And a null-ptr-deref may happen: lan966x_stats_init() create_singlethread_workqueue() # failed, lan966x->stats_queue is NULL queue_delayed_work() queue_delayed_work_on() __queue_delayed_work() # warning here, but continue __queue_work() # access wq->flags, null-ptr-deref Check the ret value and return -ENOMEM if it is NULL. Fixes: 12c2d0a5b8e2 ("net: lan966x: add ethtool configuration and statistics") Signed-off-by: Shang XiaoJing Reviewed-by: Horatiu Vultur Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c index fea42542be28..06811c60d598 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c @@ -716,6 +716,9 @@ int lan966x_stats_init(struct lan966x *lan966x) snprintf(queue_name, sizeof(queue_name), "%s-stats", dev_name(lan966x->dev)); lan966x->stats_queue = create_singlethread_workqueue(queue_name); + if (!lan966x->stats_queue) + return -ENOMEM; + INIT_DELAYED_WORK(&lan966x->stats_work, lan966x_check_stats_work); queue_delayed_work(lan966x->stats_queue, &lan966x->stats_work, LAN966X_STATS_CHECK_DELAY); From 639f5d006e36bb303f525d9479448c412b720c39 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Mon, 14 Nov 2022 21:38:53 +0800 Subject: [PATCH 44/51] net: microchip: sparx5: Fix potential null-ptr-deref in sparx_stats_init() and sparx5_start() sparx_stats_init() calls create_singlethread_workqueue() and not checked the ret value, which may return NULL. And a null-ptr-deref may happen: sparx_stats_init() create_singlethread_workqueue() # failed, sparx5->stats_queue is NULL queue_delayed_work() queue_delayed_work_on() __queue_delayed_work() # warning here, but continue __queue_work() # access wq->flags, null-ptr-deref Check the ret value and return -ENOMEM if it is NULL. So as sparx5_start(). Fixes: af4b11022e2d ("net: sparx5: add ethtool configuration and statistics support") Fixes: b37a1bae742f ("net: sparx5: add mactable support") Signed-off-by: Shang XiaoJing Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c | 3 +++ drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c index 6b0febcb7fa9..01f3a3a41cdb 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c @@ -1253,6 +1253,9 @@ int sparx_stats_init(struct sparx5 *sparx5) snprintf(queue_name, sizeof(queue_name), "%s-stats", dev_name(sparx5->dev)); sparx5->stats_queue = create_singlethread_workqueue(queue_name); + if (!sparx5->stats_queue) + return -ENOMEM; + INIT_DELAYED_WORK(&sparx5->stats_work, sparx5_check_stats_work); queue_delayed_work(sparx5->stats_queue, &sparx5->stats_work, SPX5_STATS_CHECK_DELAY); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index 62a325e96345..eeac04b84638 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -659,6 +659,9 @@ static int sparx5_start(struct sparx5 *sparx5) snprintf(queue_name, sizeof(queue_name), "%s-mact", dev_name(sparx5->dev)); sparx5->mact_queue = create_singlethread_workqueue(queue_name); + if (!sparx5->mact_queue) + return -ENOMEM; + INIT_DELAYED_WORK(&sparx5->mact_work, sparx5_mact_pull_work); queue_delayed_work(sparx5->mact_queue, &sparx5->mact_work, SPX5_MACT_PULL_DELAY); From f524b7289bbb0c8ffaa2ba3c34c146e43da54fb2 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Mon, 14 Nov 2022 14:22:25 +0000 Subject: [PATCH 45/51] net: thunderbolt: Fix error handling in tbnet_init() A problem about insmod thunderbolt-net failed is triggered with following log given while lsmod does not show thunderbolt_net: insmod: ERROR: could not insert module thunderbolt-net.ko: File exists The reason is that tbnet_init() returns tb_register_service_driver() directly without checking its return value, if tb_register_service_driver() failed, it returns without removing property directory, resulting the property directory can never be created later. tbnet_init() tb_register_property_dir() # register property directory tb_register_service_driver() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without remove property directory Fix by remove property directory when tb_register_service_driver() returns error. Fixes: e69b6c02b4c3 ("net: Add support for networking over Thunderbolt cable") Signed-off-by: Yuan Can Acked-by: Mika Westerberg Signed-off-by: David S. Miller --- drivers/net/thunderbolt.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c index 83fcaeb2ac5e..a52ee2bf5575 100644 --- a/drivers/net/thunderbolt.c +++ b/drivers/net/thunderbolt.c @@ -1391,12 +1391,21 @@ static int __init tbnet_init(void) tb_property_add_immediate(tbnet_dir, "prtcstns", flags); ret = tb_register_property_dir("network", tbnet_dir); - if (ret) { - tb_property_free_dir(tbnet_dir); - return ret; - } + if (ret) + goto err_free_dir; - return tb_register_service_driver(&tbnet_driver); + ret = tb_register_service_driver(&tbnet_driver); + if (ret) + goto err_unregister; + + return 0; + +err_unregister: + tb_unregister_property_dir("network", tbnet_dir); +err_free_dir: + tb_property_free_dir(tbnet_dir); + + return ret; } module_init(tbnet_init); From b68777d54fac21fc833ec26ea1a2a84f975ab035 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 14 Nov 2022 20:16:19 +0100 Subject: [PATCH 46/51] l2tp: Serialize access to sk_user_data with sk_callback_lock sk->sk_user_data has multiple users, which are not compatible with each other. Writers must synchronize by grabbing the sk->sk_callback_lock. l2tp currently fails to grab the lock when modifying the underlying tunnel socket fields. Fix it by adding appropriate locking. We err on the side of safety and grab the sk_callback_lock also inside the sk_destruct callback overridden by l2tp, even though there should be no refs allowing access to the sock at the time when sk_destruct gets called. v4: - serialize write to sk_user_data in l2tp sk_destruct v3: - switch from sock lock to sk_callback_lock - document write-protection for sk_user_data v2: - update Fixes to point to origin of the bug - use real names in Reported/Tested-by tags Cc: Tom Parkin Fixes: 3557baabf280 ("[L2TP]: PPP over L2TP driver core") Reported-by: Haowei Yan Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- net/l2tp/l2tp_core.c | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 5db02546941c..e0517ecc6531 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -323,7 +323,7 @@ struct sk_filter; * @sk_tskey: counter to disambiguate concurrent tstamp requests * @sk_zckey: counter to order MSG_ZEROCOPY notifications * @sk_socket: Identd and reporting IO signals - * @sk_user_data: RPC layer private data + * @sk_user_data: RPC layer private data. Write-protected by @sk_callback_lock. * @sk_frag: cached page frag * @sk_peek_off: current peek_offset value * @sk_send_head: front of stuff to transmit diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 7499c51b1850..754fdda8a5f5 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1150,8 +1150,10 @@ static void l2tp_tunnel_destruct(struct sock *sk) } /* Remove hooks into tunnel socket */ + write_lock_bh(&sk->sk_callback_lock); sk->sk_destruct = tunnel->old_sk_destruct; sk->sk_user_data = NULL; + write_unlock_bh(&sk->sk_callback_lock); /* Call the original destructor */ if (sk->sk_destruct) @@ -1469,16 +1471,18 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, sock = sockfd_lookup(tunnel->fd, &ret); if (!sock) goto err; - - ret = l2tp_validate_socket(sock->sk, net, tunnel->encap); - if (ret < 0) - goto err_sock; } + sk = sock->sk; + write_lock(&sk->sk_callback_lock); + + ret = l2tp_validate_socket(sk, net, tunnel->encap); + if (ret < 0) + goto err_sock; + tunnel->l2tp_net = net; pn = l2tp_pernet(net); - sk = sock->sk; sock_hold(sk); tunnel->sock = sk; @@ -1504,7 +1508,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, setup_udp_tunnel_sock(net, sock, &udp_cfg); } else { - sk->sk_user_data = tunnel; + rcu_assign_sk_user_data(sk, tunnel); } tunnel->old_sk_destruct = sk->sk_destruct; @@ -1518,6 +1522,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, if (tunnel->fd >= 0) sockfd_put(sock); + write_unlock(&sk->sk_callback_lock); return 0; err_sock: @@ -1525,6 +1530,8 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, sock_release(sock); else sockfd_put(sock); + + write_unlock(&sk->sk_callback_lock); err: return ret; } From aeac4ec8f46d610a10adbaeff5e2edf6a88ffc62 Mon Sep 17 00:00:00 2001 From: Gleb Mazovetskiy Date: Mon, 14 Nov 2022 22:56:16 +0000 Subject: [PATCH 47/51] tcp: configurable source port perturb table size On embedded systems with little memory and no relevant security concerns, it is beneficial to reduce the size of the table. Reducing the size from 2^16 to 2^8 saves 255 KiB of kernel RAM. Makes the table size configurable as an expert option. The size was previously increased from 2^8 to 2^16 in commit 4c2c8f03a5ab ("tcp: increase source port perturb table to 2^16"). Signed-off-by: Gleb Mazovetskiy Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 10 ++++++++++ net/ipv4/inet_hashtables.c | 10 +++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index e983bb0c5012..2dfb12230f08 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -402,6 +402,16 @@ config INET_IPCOMP If unsure, say Y. +config INET_TABLE_PERTURB_ORDER + int "INET: Source port perturbation table size (as power of 2)" if EXPERT + default 16 + help + Source port perturbation table size (as power of 2) for + RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm. + + The default is almost always what you want. + Only change this if you know what you are doing. + config INET_XFRM_TUNNEL tristate select INET_TUNNEL diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index d3dc28156622..033bf3c2538f 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -906,13 +906,13 @@ EXPORT_SYMBOL_GPL(inet_bhash2_update_saddr); * Note that we use 32bit integers (vs RFC 'short integers') * because 2^16 is not a multiple of num_ephemeral and this * property might be used by clever attacker. + * * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though - * attacks were since demonstrated, thus we use 65536 instead to really - * give more isolation and privacy, at the expense of 256kB of kernel - * memory. + * attacks were since demonstrated, thus we use 65536 by default instead + * to really give more isolation and privacy, at the expense of 256kB + * of kernel memory. */ -#define INET_TABLE_PERTURB_SHIFT 16 -#define INET_TABLE_PERTURB_SIZE (1 << INET_TABLE_PERTURB_SHIFT) +#define INET_TABLE_PERTURB_SIZE (1 << CONFIG_INET_TABLE_PERTURB_ORDER) static u32 *table_perturb; int __inet_hash_connect(struct inet_timewait_death_row *death_row, From 064bc7312bd09a48798418663090be0c776183db Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Tue, 15 Nov 2022 17:30:25 +0800 Subject: [PATCH 48/51] netdevsim: Fix memory leak of nsim_dev->fa_cookie kmemleak reports this issue: unreferenced object 0xffff8881bac872d0 (size 8): comm "sh", pid 58603, jiffies 4481524462 (age 68.065s) hex dump (first 8 bytes): 04 00 00 00 de ad be ef ........ backtrace: [<00000000c80b8577>] __kmalloc+0x49/0x150 [<000000005292b8c6>] nsim_dev_trap_fa_cookie_write+0xc1/0x210 [netdevsim] [<0000000093d78e77>] full_proxy_write+0xf3/0x180 [<000000005a662c16>] vfs_write+0x1c5/0xaf0 [<000000007aabf84a>] ksys_write+0xed/0x1c0 [<000000005f1d2e47>] do_syscall_64+0x3b/0x90 [<000000006001c6ec>] entry_SYSCALL_64_after_hwframe+0x63/0xcd The issue occurs in the following scenarios: nsim_dev_trap_fa_cookie_write() kmalloc() fa_cookie nsim_dev->fa_cookie = fa_cookie .. nsim_drv_remove() The fa_cookie allocked in nsim_dev_trap_fa_cookie_write() is not freed. To fix, add kfree(nsim_dev->fa_cookie) to nsim_drv_remove(). Fixes: d3cbb907ae57 ("netdevsim: add ACL trap reporting cookie as a metadata") Signed-off-by: Wang Yufen Cc: Jiri Pirko Link: https://lore.kernel.org/r/1668504625-14698-1-git-send-email-wangyufen@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index a7880c7ce94c..68e56e451b2b 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -1683,6 +1683,7 @@ void nsim_drv_remove(struct nsim_bus_dev *nsim_bus_dev) ARRAY_SIZE(nsim_devlink_params)); devl_resources_unregister(devlink); kfree(nsim_dev->vfconfigs); + kfree(nsim_dev->fa_cookie); devl_unlock(devlink); devlink_free(devlink); dev_set_drvdata(&nsim_bus_dev->dev, NULL); From e103ba33998d0f25653cc8ebe745b68d1ee10cda Mon Sep 17 00:00:00 2001 From: Enrico Sau Date: Tue, 15 Nov 2022 11:58:59 +0100 Subject: [PATCH 49/51] net: usb: qmi_wwan: add Telit 0x103a composition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the following Telit LE910C4-WWX composition: 0x103a: rmnet Signed-off-by: Enrico Sau Acked-by: Bjørn Mork Link: https://lore.kernel.org/r/20221115105859.14324-1-enrico.sau@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 26c34a7c21bd..afd6faa4c2ec 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1357,6 +1357,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1031, 3)}, /* Telit LE910C1-EUX */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x103a, 0)}, /* Telit LE910C4-WWX */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1057, 2)}, /* Telit FN980 */ From 809ff97a677ff85dfb7ce2b63be261d1633dcf29 Mon Sep 17 00:00:00 2001 From: Alexandru Tachici Date: Tue, 15 Nov 2022 13:44:34 +0200 Subject: [PATCH 50/51] net: usb: smsc95xx: fix external PHY reset An external PHY needs settling time after power up or reset. In the bind() function an mdio bus is registered. If at this point the external PHY is still initialising, no valid PHY ID will be read and on phy_find_first() the bind() function will fail. If an external PHY is present, wait the maximum time specified in 802.3 45.2.7.1.1. Fixes: 05b35e7eb9a1 ("smsc95xx: add phylib support") Signed-off-by: Alexandru Tachici Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20221115114434.9991-2-alexandru.tachici@analog.com Signed-off-by: Paolo Abeni --- drivers/net/usb/smsc95xx.c | 46 ++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index bfb58c91db04..32d2c60d334d 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -66,6 +66,7 @@ struct smsc95xx_priv { spinlock_t mac_cr_lock; u8 features; u8 suspend_flags; + bool is_internal_phy; struct irq_chip irqchip; struct irq_domain *irqdomain; struct fwnode_handle *irqfwnode; @@ -252,6 +253,43 @@ static void smsc95xx_mdio_write(struct usbnet *dev, int phy_id, int idx, mutex_unlock(&dev->phy_mutex); } +static int smsc95xx_mdiobus_reset(struct mii_bus *bus) +{ + struct smsc95xx_priv *pdata; + struct usbnet *dev; + u32 val; + int ret; + + dev = bus->priv; + pdata = dev->driver_priv; + + if (pdata->is_internal_phy) + return 0; + + mutex_lock(&dev->phy_mutex); + + ret = smsc95xx_read_reg(dev, PM_CTRL, &val); + if (ret < 0) + goto reset_out; + + val |= PM_CTL_PHY_RST_; + + ret = smsc95xx_write_reg(dev, PM_CTRL, val); + if (ret < 0) + goto reset_out; + + /* Driver has no knowledge at this point about the external PHY. + * The 802.3 specifies that the reset process shall + * be completed within 0.5 s. + */ + fsleep(500000); + +reset_out: + mutex_unlock(&dev->phy_mutex); + + return 0; +} + static int smsc95xx_mdiobus_read(struct mii_bus *bus, int phy_id, int idx) { struct usbnet *dev = bus->priv; @@ -1052,7 +1090,6 @@ static void smsc95xx_handle_link_change(struct net_device *net) static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) { struct smsc95xx_priv *pdata; - bool is_internal_phy; char usb_path[64]; int ret, phy_irq; u32 val; @@ -1133,13 +1170,14 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) if (ret < 0) goto free_mdio; - is_internal_phy = !(val & HW_CFG_PSEL_); - if (is_internal_phy) + pdata->is_internal_phy = !(val & HW_CFG_PSEL_); + if (pdata->is_internal_phy) pdata->mdiobus->phy_mask = ~(1u << SMSC95XX_INTERNAL_PHY_ID); pdata->mdiobus->priv = dev; pdata->mdiobus->read = smsc95xx_mdiobus_read; pdata->mdiobus->write = smsc95xx_mdiobus_write; + pdata->mdiobus->reset = smsc95xx_mdiobus_reset; pdata->mdiobus->name = "smsc95xx-mdiobus"; pdata->mdiobus->parent = &dev->udev->dev; @@ -1160,7 +1198,7 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) } pdata->phydev->irq = phy_irq; - pdata->phydev->is_internal = is_internal_phy; + pdata->phydev->is_internal = pdata->is_internal_phy; /* detect device revision as different features may be available */ ret = smsc95xx_read_reg(dev, ID_REV, &val); From 58e0be1ef6118c5352b56a4d06e974c5599993a5 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 15 Nov 2022 22:24:00 +0800 Subject: [PATCH 51/51] net: use struct_group to copy ip/ipv6 header addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kernel test robot reported warnings when build bonding module with make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash drivers/net/bonding/: from ../drivers/net/bonding/bond_main.c:35: In function ‘fortify_memcpy_chk’, inlined from ‘iph_to_flow_copy_v4addrs’ at ../include/net/ip.h:566:2, inlined from ‘bond_flow_ip’ at ../drivers/net/bonding/bond_main.c:3984:3: ../include/linux/fortify-string.h:413:25: warning: call to ‘__read_overflow2_field’ declared with attribute warning: detected read beyond size of f ield (2nd parameter); maybe use struct_group()? [-Wattribute-warning] 413 | __read_overflow2_field(q_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In function ‘fortify_memcpy_chk’, inlined from ‘iph_to_flow_copy_v6addrs’ at ../include/net/ipv6.h:900:2, inlined from ‘bond_flow_ip’ at ../drivers/net/bonding/bond_main.c:3994:3: ../include/linux/fortify-string.h:413:25: warning: call to ‘__read_overflow2_field’ declared with attribute warning: detected read beyond size of f ield (2nd parameter); maybe use struct_group()? [-Wattribute-warning] 413 | __read_overflow2_field(q_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is because we try to copy the whole ip/ip6 address to the flow_key, while we only point the to ip/ip6 saddr. Note that since these are UAPI headers, __struct_group() is used to avoid the compiler warnings. Reported-by: kernel test robot Fixes: c3f8324188fa ("net: Add full IPv6 addresses to flow_keys") Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20221115142400.1204786-1-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- include/net/ip.h | 2 +- include/net/ipv6.h | 2 +- include/uapi/linux/ip.h | 6 ++++-- include/uapi/linux/ipv6.h | 6 ++++-- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 038097c2a152..144bdfbb25af 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -563,7 +563,7 @@ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow, BUILD_BUG_ON(offsetof(typeof(flow->addrs), v4addrs.dst) != offsetof(typeof(flow->addrs), v4addrs.src) + sizeof(flow->addrs.v4addrs.src)); - memcpy(&flow->addrs.v4addrs, &iph->saddr, sizeof(flow->addrs.v4addrs)); + memcpy(&flow->addrs.v4addrs, &iph->addrs, sizeof(flow->addrs.v4addrs)); flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; } diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 37943ba3a73c..d383c895592a 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -897,7 +897,7 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow, BUILD_BUG_ON(offsetof(typeof(flow->addrs), v6addrs.dst) != offsetof(typeof(flow->addrs), v6addrs.src) + sizeof(flow->addrs.v6addrs.src)); - memcpy(&flow->addrs.v6addrs, &iph->saddr, sizeof(flow->addrs.v6addrs)); + memcpy(&flow->addrs.v6addrs, &iph->addrs, sizeof(flow->addrs.v6addrs)); flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index 961ec16a26b8..874a92349bf5 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -100,8 +100,10 @@ struct iphdr { __u8 ttl; __u8 protocol; __sum16 check; - __be32 saddr; - __be32 daddr; + __struct_group(/* no tag */, addrs, /* no attrs */, + __be32 saddr; + __be32 daddr; + ); /*The options start here. */ }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 03cdbe798fe3..81f4243bebb1 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -130,8 +130,10 @@ struct ipv6hdr { __u8 nexthdr; __u8 hop_limit; - struct in6_addr saddr; - struct in6_addr daddr; + __struct_group(/* no tag */, addrs, /* no attrs */, + struct in6_addr saddr; + struct in6_addr daddr; + ); };