From c6d277064b1da7f9015b575a562734de87a7e463 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Sep 2023 11:36:55 -0700 Subject: [PATCH 1/6] tcp: Factorise sk_family-independent comparison in inet_bind2_bucket_match(_addr_any). This is a prep patch to make the following patches cleaner that touch inet_bind2_bucket_match() and inet_bind2_bucket_match_addr_any(). Both functions have duplicated comparison for netns, port, and l3mdev. Let's factorise them. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 7876b7d703cb..5c54f2804174 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -815,41 +815,39 @@ static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb, const struct net *net, unsigned short port, int l3mdev, const struct sock *sk) { + if (!net_eq(ib2_net(tb), net) || tb->port != port || + tb->l3mdev != l3mdev) + return false; + #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family != tb->family) return false; if (sk->sk_family == AF_INET6) - return net_eq(ib2_net(tb), net) && tb->port == port && - tb->l3mdev == l3mdev && - ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr); - else + return ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr); #endif - return net_eq(ib2_net(tb), net) && tb->port == port && - tb->l3mdev == l3mdev && tb->rcv_saddr == sk->sk_rcv_saddr; + return tb->rcv_saddr == sk->sk_rcv_saddr; } bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const struct net *net, unsigned short port, int l3mdev, const struct sock *sk) { + if (!net_eq(ib2_net(tb), net) || tb->port != port || + tb->l3mdev != l3mdev) + return false; + #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family != tb->family) { if (sk->sk_family == AF_INET) - return net_eq(ib2_net(tb), net) && tb->port == port && - tb->l3mdev == l3mdev && - ipv6_addr_any(&tb->v6_rcv_saddr); + return ipv6_addr_any(&tb->v6_rcv_saddr); return false; } if (sk->sk_family == AF_INET6) - return net_eq(ib2_net(tb), net) && tb->port == port && - tb->l3mdev == l3mdev && - ipv6_addr_any(&tb->v6_rcv_saddr); - else + return ipv6_addr_any(&tb->v6_rcv_saddr); #endif - return net_eq(ib2_net(tb), net) && tb->port == port && - tb->l3mdev == l3mdev && tb->rcv_saddr == 0; + return tb->rcv_saddr == 0; } /* The socket's bhash2 hashbucket spinlock must be held when this is called */ From aa99e5f87bd54db55dd37cb130bd5eb55933027f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Sep 2023 11:36:56 -0700 Subject: [PATCH 2/6] tcp: Fix bind() regression for v4-mapped-v6 wildcard address. Andrei Vagin reported bind() regression with strace logs. If we bind() a TCPv6 socket to ::FFFF:0.0.0.0 and then bind() a TCPv4 socket to 127.0.0.1, the 2nd bind() should fail but now succeeds. from socket import * s1 = socket(AF_INET6, SOCK_STREAM) s1.bind(('::ffff:0.0.0.0', 0)) s2 = socket(AF_INET, SOCK_STREAM) s2.bind(('127.0.0.1', s1.getsockname()[1])) During the 2nd bind(), if tb->family is AF_INET6 and sk->sk_family is AF_INET in inet_bind2_bucket_match_addr_any(), we still need to check if tb has the v4-mapped-v6 wildcard address. The example above does not work after commit 5456262d2baa ("net: Fix incorrect address comparison when searching for a bind2 bucket"), but the blamed change is not the commit. Before the commit, the leading zeros of ::FFFF:0.0.0.0 were treated as 0.0.0.0, and the sequence above worked by chance. Technically, this case has been broken since bhash2 was introduced. Note that if we bind() two sockets to 127.0.0.1 and then ::FFFF:0.0.0.0, the 2nd bind() fails properly because we fall back to using bhash to detect conflicts for the v4-mapped-v6 address. Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address") Reported-by: Andrei Vagin Closes: https://lore.kernel.org/netdev/ZPuYBOFC8zsK6r9T@google.com/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ipv6.h | 5 +++++ net/ipv4/inet_hashtables.c | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index fe274c122a56..c6932d1a3fa8 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -784,6 +784,11 @@ static inline bool ipv6_addr_v4mapped(const struct in6_addr *a) cpu_to_be32(0x0000ffff))) == 0UL; } +static inline bool ipv6_addr_v4mapped_any(const struct in6_addr *a) +{ + return ipv6_addr_v4mapped(a) && ipv4_is_zeronet(a->s6_addr32[3]); +} + static inline bool ipv6_addr_v4mapped_loopback(const struct in6_addr *a) { return ipv6_addr_v4mapped(a) && ipv4_is_loopback(a->s6_addr32[3]); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 5c54f2804174..a58b04052ca6 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -839,7 +839,8 @@ bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family != tb->family) { if (sk->sk_family == AF_INET) - return ipv6_addr_any(&tb->v6_rcv_saddr); + return ipv6_addr_any(&tb->v6_rcv_saddr) || + ipv6_addr_v4mapped_any(&tb->v6_rcv_saddr); return false; } From c48ef9c4aed3632566b57ba66cec6ec78624d4cb Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Sep 2023 11:36:57 -0700 Subject: [PATCH 3/6] tcp: Fix bind() regression for v4-mapped-v6 non-wildcard address. Since bhash2 was introduced, the example below does not work as expected. These two bind() should conflict, but the 2nd bind() now succeeds. from socket import * s1 = socket(AF_INET6, SOCK_STREAM) s1.bind(('::ffff:127.0.0.1', 0)) s2 = socket(AF_INET, SOCK_STREAM) s2.bind(('127.0.0.1', s1.getsockname()[1])) During the 2nd bind() in inet_csk_get_port(), inet_bind2_bucket_find() fails to find the 1st socket's tb2, so inet_bind2_bucket_create() allocates a new tb2 for the 2nd socket. Then, we call inet_csk_bind_conflict() that checks conflicts in the new tb2 by inet_bhash2_conflict(). However, the new tb2 does not include the 1st socket, thus the bind() finally succeeds. In this case, inet_bind2_bucket_match() must check if AF_INET6 tb2 has the conflicting v4-mapped-v6 address so that inet_bind2_bucket_find() returns the 1st socket's tb2. Note that if we bind two sockets to 127.0.0.1 and then ::FFFF:127.0.0.1, the 2nd bind() fails properly for the same reason mentinoed in the previous commit. Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index a58b04052ca6..c32f5e28758b 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -820,8 +820,13 @@ static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb, return false; #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family != tb->family) + if (sk->sk_family != tb->family) { + if (sk->sk_family == AF_INET) + return ipv6_addr_v4mapped(&tb->v6_rcv_saddr) && + tb->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr; + return false; + } if (sk->sk_family == AF_INET6) return ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr); From 0071d15517b4a3d265abc00395beb1138e7236c7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Sep 2023 11:36:58 -0700 Subject: [PATCH 4/6] selftest: tcp: Fix address length in bind_wildcard.c. The selftest passes the IPv6 address length for an IPv4 address. We should pass the correct length. Note inet_bind_sk() does not check if the size is larger than sizeof(struct sockaddr_in), so there is no real bug in this selftest. Fixes: 13715acf8ab5 ("selftest: Add test for bind() conflicts.") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- tools/testing/selftests/net/bind_wildcard.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c index 58edfc15d28b..e7ebe72e879d 100644 --- a/tools/testing/selftests/net/bind_wildcard.c +++ b/tools/testing/selftests/net/bind_wildcard.c @@ -100,7 +100,7 @@ void bind_sockets(struct __test_metadata *_metadata, TEST_F(bind_wildcard, v4_v6) { bind_sockets(_metadata, self, - (struct sockaddr *)&self->addr4, sizeof(self->addr6), + (struct sockaddr *)&self->addr4, sizeof(self->addr4), (struct sockaddr *)&self->addr6, sizeof(self->addr6)); } From 2895d879dd41a588d80acde1aa832deb38d67823 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Sep 2023 11:36:59 -0700 Subject: [PATCH 5/6] selftest: tcp: Move expected_errno into each test case in bind_wildcard.c. This is a preparation patch for the following patch. Let's define expected_errno in each test case so that we can add other test cases easily. Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- tools/testing/selftests/net/bind_wildcard.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c index e7ebe72e879d..81f694536099 100644 --- a/tools/testing/selftests/net/bind_wildcard.c +++ b/tools/testing/selftests/net/bind_wildcard.c @@ -10,37 +10,41 @@ FIXTURE(bind_wildcard) { struct sockaddr_in addr4; struct sockaddr_in6 addr6; - int expected_errno; }; FIXTURE_VARIANT(bind_wildcard) { const __u32 addr4_const; const struct in6_addr *addr6_const; + int expected_errno; }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any) { .addr4_const = INADDR_ANY, .addr6_const = &in6addr_any, + .expected_errno = EADDRINUSE, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_local) { .addr4_const = INADDR_ANY, .addr6_const = &in6addr_loopback, + .expected_errno = 0, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any) { .addr4_const = INADDR_LOOPBACK, .addr6_const = &in6addr_any, + .expected_errno = EADDRINUSE, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_local) { .addr4_const = INADDR_LOOPBACK, .addr6_const = &in6addr_loopback, + .expected_errno = 0, }; FIXTURE_SETUP(bind_wildcard) @@ -52,11 +56,6 @@ FIXTURE_SETUP(bind_wildcard) self->addr6.sin6_family = AF_INET6; self->addr6.sin6_port = htons(0); self->addr6.sin6_addr = *variant->addr6_const; - - if (variant->addr6_const == &in6addr_any) - self->expected_errno = EADDRINUSE; - else - self->expected_errno = 0; } FIXTURE_TEARDOWN(bind_wildcard) @@ -65,6 +64,7 @@ FIXTURE_TEARDOWN(bind_wildcard) void bind_sockets(struct __test_metadata *_metadata, FIXTURE_DATA(bind_wildcard) *self, + int expected_errno, struct sockaddr *addr1, socklen_t addrlen1, struct sockaddr *addr2, socklen_t addrlen2) { @@ -86,9 +86,9 @@ void bind_sockets(struct __test_metadata *_metadata, ASSERT_GT(fd[1], 0); ret = bind(fd[1], addr2, addrlen2); - if (self->expected_errno) { + if (expected_errno) { ASSERT_EQ(ret, -1); - ASSERT_EQ(errno, self->expected_errno); + ASSERT_EQ(errno, expected_errno); } else { ASSERT_EQ(ret, 0); } @@ -99,14 +99,14 @@ void bind_sockets(struct __test_metadata *_metadata, TEST_F(bind_wildcard, v4_v6) { - bind_sockets(_metadata, self, + bind_sockets(_metadata, self, variant->expected_errno, (struct sockaddr *)&self->addr4, sizeof(self->addr4), (struct sockaddr *)&self->addr6, sizeof(self->addr6)); } TEST_F(bind_wildcard, v6_v4) { - bind_sockets(_metadata, self, + bind_sockets(_metadata, self, variant->expected_errno, (struct sockaddr *)&self->addr6, sizeof(self->addr6), (struct sockaddr *)&self->addr4, sizeof(self->addr4)); } From 8637d8e8b653f4c8b6fd277b434b118f844d1d77 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Sep 2023 11:37:00 -0700 Subject: [PATCH 6/6] selftest: tcp: Add v4-mapped-v6 cases in bind_wildcard.c. We add these 8 test cases in bind_wildcard.c to check bind() conflicts. 1st bind() 2nd bind() --------- --------- 0.0.0.0 ::FFFF:0.0.0.0 ::FFFF:0.0.0.0 0.0.0.0 0.0.0.0 ::FFFF:127.0.0.1 ::FFFF:127.0.0.1 0.0.0.0 127.0.0.1 ::FFFF:0.0.0.0 ::FFFF:0.0.0.0 127.0.0.1 127.0.0.1 ::FFFF:127.0.0.1 ::FFFF:127.0.0.1 127.0.0.1 All test passed without bhash2 and with bhash2 and this series. Before bhash2: $ uname -r 6.0.0-rc1-00393-g0bf73255d3a3 $ ./bind_wildcard ... # PASSED: 16 / 16 tests passed. Just after bhash2: $ uname -r 6.0.0-rc1-00394-g28044fc1d495 $ ./bind_wildcard ... ok 15 bind_wildcard.v4_local_v6_v4mapped_local.v4_v6 not ok 16 bind_wildcard.v4_local_v6_v4mapped_local.v6_v4 # FAILED: 15 / 16 tests passed. On net.git: $ ./bind_wildcard ... not ok 14 bind_wildcard.v4_local_v6_v4mapped_any.v6_v4 not ok 16 bind_wildcard.v4_local_v6_v4mapped_local.v6_v4 # FAILED: 13 / 16 tests passed. With this series: $ ./bind_wildcard ... # PASSED: 16 / 16 tests passed. Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- tools/testing/selftests/net/bind_wildcard.c | 46 +++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c index 81f694536099..a2662348cdb1 100644 --- a/tools/testing/selftests/net/bind_wildcard.c +++ b/tools/testing/selftests/net/bind_wildcard.c @@ -6,6 +6,24 @@ #include "../kselftest_harness.h" +struct in6_addr in6addr_v4mapped_any = { + .s6_addr = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 255, 255, + 0, 0, 0, 0 + } +}; + +struct in6_addr in6addr_v4mapped_loopback = { + .s6_addr = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 255, 255, + 127, 0, 0, 1 + } +}; + FIXTURE(bind_wildcard) { struct sockaddr_in addr4; @@ -33,6 +51,20 @@ FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_local) .expected_errno = 0, }; +FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_any) +{ + .addr4_const = INADDR_ANY, + .addr6_const = &in6addr_v4mapped_any, + .expected_errno = EADDRINUSE, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_local) +{ + .addr4_const = INADDR_ANY, + .addr6_const = &in6addr_v4mapped_loopback, + .expected_errno = EADDRINUSE, +}; + FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any) { .addr4_const = INADDR_LOOPBACK, @@ -47,6 +79,20 @@ FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_local) .expected_errno = 0, }; +FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_any) +{ + .addr4_const = INADDR_LOOPBACK, + .addr6_const = &in6addr_v4mapped_any, + .expected_errno = EADDRINUSE, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_local) +{ + .addr4_const = INADDR_LOOPBACK, + .addr6_const = &in6addr_v4mapped_loopback, + .expected_errno = EADDRINUSE, +}; + FIXTURE_SETUP(bind_wildcard) { self->addr4.sin_family = AF_INET;