linux-stable/include/linux/udp.h
Willem de Bruijn bec1f6f697 udp: generate gso with UDP_SEGMENT
Support generic segmentation offload for udp datagrams. Callers can
concatenate and send at once the payload of multiple datagrams with
the same destination.

To set segment size, the caller sets socket option UDP_SEGMENT to the
length of each discrete payload. This value must be smaller than or
equal to the relevant MTU.

A follow-up patch adds cmsg UDP_SEGMENT to specify segment size on a
per send call basis.

Total byte length may then exceed MTU. If not an exact multiple of
segment size, the last segment will be shorter.

The implementation adds a gso_size field to the udp socket, ip(v6)
cmsg cookie and inet_cork structure to be able to set the value at
setsockopt or cmsg time and to work with both lockless and corked
paths.

Initial benchmark numbers show UDP GSO about as expensive as TCP GSO.

    tcp tso
     3197 MB/s 54232 msg/s 54232 calls/s
         6,457,754,262      cycles

    tcp gso
     1765 MB/s 29939 msg/s 29939 calls/s
        11,203,021,806      cycles

    tcp without tso/gso *
      739 MB/s 12548 msg/s 12548 calls/s
        11,205,483,630      cycles

    udp
      876 MB/s 14873 msg/s 624666 calls/s
        11,205,777,429      cycles

    udp gso
     2139 MB/s 36282 msg/s 36282 calls/s
        11,204,374,561      cycles

   [*] after reverting commit 0a6b2a1dc2
       ("tcp: switch to GSO being always on")

Measured total system cycles ('-a') for one core while pinning both
the network receive path and benchmark process to that core:

  perf stat -a -C 12 -e cycles \
    ./udpgso_bench_tx -C 12 -4 -D "$DST" -l 4

Note the reduction in calls/s with GSO. Bytes per syscall drops
increases from 1470 to 61818.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-04-26 15:08:04 -04:00

126 lines
3.7 KiB
C

/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Definitions for the UDP protocol.
*
* Version: @(#)udp.h 1.0.2 04/28/93
*
* Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef _LINUX_UDP_H
#define _LINUX_UDP_H
#include <net/inet_sock.h>
#include <linux/skbuff.h>
#include <net/netns/hash.h>
#include <uapi/linux/udp.h>
static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
{
return (struct udphdr *)skb_transport_header(skb);
}
static inline struct udphdr *inner_udp_hdr(const struct sk_buff *skb)
{
return (struct udphdr *)skb_inner_transport_header(skb);
}
#define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256)
static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
{
return (num + net_hash_mix(net)) & mask;
}
struct udp_sock {
/* inet_sock has to be the first member */
struct inet_sock inet;
#define udp_port_hash inet.sk.__sk_common.skc_u16hashes[0]
#define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1]
#define udp_portaddr_node inet.sk.__sk_common.skc_portaddr_node
int pending; /* Any pending frames ? */
unsigned int corkflag; /* Cork is required */
__u8 encap_type; /* Is this an Encapsulation socket? */
unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
no_check6_rx:1;/* Allow zero UDP6 checksums on RX? */
/*
* Following member retains the information to create a UDP header
* when the socket is uncorked.
*/
__u16 len; /* total length of pending frames */
__u16 gso_size;
/*
* Fields specific to UDP-Lite.
*/
__u16 pcslen;
__u16 pcrlen;
/* indicator bits used by pcflag: */
#define UDPLITE_BIT 0x1 /* set by udplite proto init function */
#define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */
#define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */
__u8 pcflag; /* marks socket as UDP-Lite if > 0 */
__u8 unused[3];
/*
* For encapsulation sockets.
*/
int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
void (*encap_destroy)(struct sock *sk);
/* GRO functions for UDP socket */
struct sk_buff ** (*gro_receive)(struct sock *sk,
struct sk_buff **head,
struct sk_buff *skb);
int (*gro_complete)(struct sock *sk,
struct sk_buff *skb,
int nhoff);
/* udp_recvmsg try to use this before splicing sk_receive_queue */
struct sk_buff_head reader_queue ____cacheline_aligned_in_smp;
/* This field is dirtied by udp_recvmsg() */
int forward_deficit;
};
#define UDP_MAX_SEGMENTS (1 << 6UL)
static inline struct udp_sock *udp_sk(const struct sock *sk)
{
return (struct udp_sock *)sk;
}
static inline void udp_set_no_check6_tx(struct sock *sk, bool val)
{
udp_sk(sk)->no_check6_tx = val;
}
static inline void udp_set_no_check6_rx(struct sock *sk, bool val)
{
udp_sk(sk)->no_check6_rx = val;
}
static inline bool udp_get_no_check6_tx(struct sock *sk)
{
return udp_sk(sk)->no_check6_tx;
}
static inline bool udp_get_no_check6_rx(struct sock *sk)
{
return udp_sk(sk)->no_check6_rx;
}
#define udp_portaddr_for_each_entry(__sk, list) \
hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node)
#define udp_portaddr_for_each_entry_rcu(__sk, list) \
hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node)
#define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE)
#endif /* _LINUX_UDP_H */