mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-11-01 17:08:10 +00:00
aacd9289af
We must try harder to get unique (addr, port) pairs when
doing port autoselection for sockets with SO_REUSEADDR
option set.
We achieve this by adding a relaxation parameter to
inet_csk_bind_conflict. When 'relax' parameter is off
we return a conflict whenever the current searched
pair (addr, port) is not unique.
This tries to address the problems reported in patch:
8d238b25b1
Revert "tcp: bind() fix when many ports are bound"
Tests where ran for creating and binding(0) many sockets
on 100 IPs. The results are, on average:
* 60000 sockets, 600 ports / IP:
* 0.210 s, 620 (IP, port) duplicates without patch
* 0.219 s, no duplicates with patch
* 100000 sockets, 1000 ports / IP:
* 0.371 s, 1720 duplicates without patch
* 0.373 s, no duplicates with patch
* 200000 sockets, 2000 ports / IP:
* 0.766 s, 6900 duplicates without patch
* 0.768 s, no duplicates with patch
* 500000 sockets, 5000 ports / IP:
* 2.227 s, 41500 duplicates without patch
* 2.284 s, no duplicates with patch
Signed-off-by: Alex Copot <alex.mihai.c@gmail.com>
Signed-off-by: Daniel Baluta <dbaluta@ixiacom.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
255 lines
6.8 KiB
C
255 lines
6.8 KiB
C
/*
|
|
* INET An implementation of the TCP/IP protocol suite for the LINUX
|
|
* operating system. INET is implemented using the BSD Socket
|
|
* interface as the means of communication with the user level.
|
|
*
|
|
* Support for INET6 connection oriented protocols.
|
|
*
|
|
* Authors: See the TCPv6 sources
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or(at your option) any later version.
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/in6.h>
|
|
#include <linux/ipv6.h>
|
|
#include <linux/jhash.h>
|
|
#include <linux/slab.h>
|
|
|
|
#include <net/addrconf.h>
|
|
#include <net/inet_connection_sock.h>
|
|
#include <net/inet_ecn.h>
|
|
#include <net/inet_hashtables.h>
|
|
#include <net/ip6_route.h>
|
|
#include <net/sock.h>
|
|
#include <net/inet6_connection_sock.h>
|
|
|
|
int inet6_csk_bind_conflict(const struct sock *sk,
|
|
const struct inet_bind_bucket *tb, bool relax)
|
|
{
|
|
const struct sock *sk2;
|
|
const struct hlist_node *node;
|
|
|
|
/* We must walk the whole port owner list in this case. -DaveM */
|
|
/*
|
|
* See comment in inet_csk_bind_conflict about sock lookup
|
|
* vs net namespaces issues.
|
|
*/
|
|
sk_for_each_bound(sk2, node, &tb->owners) {
|
|
if (sk != sk2 &&
|
|
(!sk->sk_bound_dev_if ||
|
|
!sk2->sk_bound_dev_if ||
|
|
sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
|
|
(!sk->sk_reuse || !sk2->sk_reuse ||
|
|
sk2->sk_state == TCP_LISTEN) &&
|
|
ipv6_rcv_saddr_equal(sk, sk2))
|
|
break;
|
|
}
|
|
|
|
return node != NULL;
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
|
|
|
|
struct dst_entry *inet6_csk_route_req(struct sock *sk,
|
|
const struct request_sock *req)
|
|
{
|
|
struct inet6_request_sock *treq = inet6_rsk(req);
|
|
struct ipv6_pinfo *np = inet6_sk(sk);
|
|
struct in6_addr *final_p, final;
|
|
struct dst_entry *dst;
|
|
struct flowi6 fl6;
|
|
|
|
memset(&fl6, 0, sizeof(fl6));
|
|
fl6.flowi6_proto = IPPROTO_TCP;
|
|
fl6.daddr = treq->rmt_addr;
|
|
final_p = fl6_update_dst(&fl6, np->opt, &final);
|
|
fl6.saddr = treq->loc_addr;
|
|
fl6.flowi6_oif = sk->sk_bound_dev_if;
|
|
fl6.flowi6_mark = sk->sk_mark;
|
|
fl6.fl6_dport = inet_rsk(req)->rmt_port;
|
|
fl6.fl6_sport = inet_rsk(req)->loc_port;
|
|
security_req_classify_flow(req, flowi6_to_flowi(&fl6));
|
|
|
|
dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
|
|
if (IS_ERR(dst))
|
|
return NULL;
|
|
|
|
return dst;
|
|
}
|
|
|
|
/*
|
|
* request_sock (formerly open request) hash tables.
|
|
*/
|
|
static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
|
|
const u32 rnd, const u32 synq_hsize)
|
|
{
|
|
u32 c;
|
|
|
|
c = jhash_3words((__force u32)raddr->s6_addr32[0],
|
|
(__force u32)raddr->s6_addr32[1],
|
|
(__force u32)raddr->s6_addr32[2],
|
|
rnd);
|
|
|
|
c = jhash_2words((__force u32)raddr->s6_addr32[3],
|
|
(__force u32)rport,
|
|
c);
|
|
|
|
return c & (synq_hsize - 1);
|
|
}
|
|
|
|
struct request_sock *inet6_csk_search_req(const struct sock *sk,
|
|
struct request_sock ***prevp,
|
|
const __be16 rport,
|
|
const struct in6_addr *raddr,
|
|
const struct in6_addr *laddr,
|
|
const int iif)
|
|
{
|
|
const struct inet_connection_sock *icsk = inet_csk(sk);
|
|
struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
|
|
struct request_sock *req, **prev;
|
|
|
|
for (prev = &lopt->syn_table[inet6_synq_hash(raddr, rport,
|
|
lopt->hash_rnd,
|
|
lopt->nr_table_entries)];
|
|
(req = *prev) != NULL;
|
|
prev = &req->dl_next) {
|
|
const struct inet6_request_sock *treq = inet6_rsk(req);
|
|
|
|
if (inet_rsk(req)->rmt_port == rport &&
|
|
req->rsk_ops->family == AF_INET6 &&
|
|
ipv6_addr_equal(&treq->rmt_addr, raddr) &&
|
|
ipv6_addr_equal(&treq->loc_addr, laddr) &&
|
|
(!treq->iif || treq->iif == iif)) {
|
|
WARN_ON(req->sk != NULL);
|
|
*prevp = prev;
|
|
return req;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(inet6_csk_search_req);
|
|
|
|
void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
|
|
struct request_sock *req,
|
|
const unsigned long timeout)
|
|
{
|
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
|
struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
|
|
const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr,
|
|
inet_rsk(req)->rmt_port,
|
|
lopt->hash_rnd, lopt->nr_table_entries);
|
|
|
|
reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
|
|
inet_csk_reqsk_queue_added(sk, timeout);
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
|
|
|
|
void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
|
|
{
|
|
struct ipv6_pinfo *np = inet6_sk(sk);
|
|
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
|
|
|
|
sin6->sin6_family = AF_INET6;
|
|
sin6->sin6_addr = np->daddr;
|
|
sin6->sin6_port = inet_sk(sk)->inet_dport;
|
|
/* We do not store received flowlabel for TCP */
|
|
sin6->sin6_flowinfo = 0;
|
|
sin6->sin6_scope_id = 0;
|
|
if (sk->sk_bound_dev_if &&
|
|
ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
|
|
sin6->sin6_scope_id = sk->sk_bound_dev_if;
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
|
|
|
|
static inline
|
|
void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
|
|
struct in6_addr *daddr, struct in6_addr *saddr)
|
|
{
|
|
__ip6_dst_store(sk, dst, daddr, saddr);
|
|
|
|
#ifdef CONFIG_XFRM
|
|
{
|
|
struct rt6_info *rt = (struct rt6_info *)dst;
|
|
rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static inline
|
|
struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
|
|
{
|
|
struct dst_entry *dst;
|
|
|
|
dst = __sk_dst_check(sk, cookie);
|
|
|
|
#ifdef CONFIG_XFRM
|
|
if (dst) {
|
|
struct rt6_info *rt = (struct rt6_info *)dst;
|
|
if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) {
|
|
__sk_dst_reset(sk);
|
|
dst = NULL;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return dst;
|
|
}
|
|
|
|
int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
|
|
{
|
|
struct sock *sk = skb->sk;
|
|
struct inet_sock *inet = inet_sk(sk);
|
|
struct ipv6_pinfo *np = inet6_sk(sk);
|
|
struct flowi6 fl6;
|
|
struct dst_entry *dst;
|
|
struct in6_addr *final_p, final;
|
|
int res;
|
|
|
|
memset(&fl6, 0, sizeof(fl6));
|
|
fl6.flowi6_proto = sk->sk_protocol;
|
|
fl6.daddr = np->daddr;
|
|
fl6.saddr = np->saddr;
|
|
fl6.flowlabel = np->flow_label;
|
|
IP6_ECN_flow_xmit(sk, fl6.flowlabel);
|
|
fl6.flowi6_oif = sk->sk_bound_dev_if;
|
|
fl6.flowi6_mark = sk->sk_mark;
|
|
fl6.fl6_sport = inet->inet_sport;
|
|
fl6.fl6_dport = inet->inet_dport;
|
|
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
|
|
|
|
final_p = fl6_update_dst(&fl6, np->opt, &final);
|
|
|
|
dst = __inet6_csk_dst_check(sk, np->dst_cookie);
|
|
|
|
if (dst == NULL) {
|
|
dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
|
|
|
|
if (IS_ERR(dst)) {
|
|
sk->sk_err_soft = -PTR_ERR(dst);
|
|
sk->sk_route_caps = 0;
|
|
kfree_skb(skb);
|
|
return PTR_ERR(dst);
|
|
}
|
|
|
|
__inet6_csk_dst_store(sk, dst, NULL, NULL);
|
|
}
|
|
|
|
rcu_read_lock();
|
|
skb_dst_set_noref(skb, dst);
|
|
|
|
/* Restore final destination back after routing done */
|
|
fl6.daddr = np->daddr;
|
|
|
|
res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
|
|
rcu_read_unlock();
|
|
return res;
|
|
}
|
|
EXPORT_SYMBOL_GPL(inet6_csk_xmit);
|